From 9a564f649f4ef1ee07df5a1b6485f8158160e63c Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:21 +0000
Subject: [PATCH 01/16] xsched: Add base vstream support

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

--------------------------------

Add sys_vstream_manage() syscall.
Add the basic function framework.
Add basic header files.
Add new Kconfig.xsched with XCU_SCHEDULER and XCU_VSTREAM configurations.
Create new dir kernel/xsched with vstream.c file with
base xsched syscalls stubs.

Add Makefile in kernel/xsched. Update main kernel Makefile to include
kernel/xsched in build.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 arch/arm/include/uapi/asm/unistd.h     |  1 +
 arch/arm64/configs/openeuler_defconfig |  1 +
 arch/powerpc/include/uapi/asm/unistd.h |  1 +
 arch/x86/configs/openeuler_defconfig   |  1 +
 arch/x86/entry/syscalls/syscall_64.tbl |  2 +-
 arch/x86/include/uapi/asm/unistd.h     |  1 +
 include/linux/syscalls.h               |  2 +
 include/linux/vstream.h                |  9 ++++
 include/uapi/asm-generic/unistd.h      |  5 +-
 include/uapi/linux/xcu_vstream.h       | 54 +++++++++++++++++++
 init/Kconfig                           |  1 +
 kernel/Makefile                        |  1 +
 kernel/xsched/Kconfig                  | 27 ++++++++++
 kernel/xsched/Makefile                 |  2 +
 kernel/xsched/vstream.c                | 73 ++++++++++++++++++++++++++
 15 files changed, 178 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/vstream.h
 create mode 100644 include/uapi/linux/xcu_vstream.h
 create mode 100644 kernel/xsched/Kconfig
 create mode 100644 kernel/xsched/Makefile
 create mode 100644 kernel/xsched/vstream.c

diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index a1149911464c..725c03445c4a 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -14,6 +14,7 @@
 #ifndef _UAPI__ASM_ARM_UNISTD_H
 #define _UAPI__ASM_ARM_UNISTD_H
 
+#define __IGNORE_kabi_reserved456
 #define __NR_OABI_SYSCALL_BASE	0x900000
 #define __NR_SYSCALL_MASK	0x0fffff
 
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 8f97574813ca..a1b73ac9bd56 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -96,6 +96,7 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_DYNAMIC is not set
+CONFIG_XCU_SCHEDULER=n
 
 #
 # CPU/Task time and stats accounting
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 5f84e3dc98d0..52148408c41b 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -9,6 +9,7 @@
  */
 #ifndef _UAPI_ASM_POWERPC_UNISTD_H_
 #define _UAPI_ASM_POWERPC_UNISTD_H_
+#define __IGNORE_kabi_reserved456
 
 #ifndef __powerpc64__
 #include <asm/unistd_32.h>
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index d5d3307a9290..633036782e59 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -116,6 +116,7 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_DYNAMIC is not set
+CONFIG_XCU_SCHEDULER=n
 
 #
 # CPU/Task time and stats accounting
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f88268a37ec2..504d1a1701d4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -377,7 +377,7 @@
 453	64	map_shadow_stack	sys_map_shadow_stack
 454	common	kabi_reserved454	sys_ni_syscall
 455	common	kabi_reserved455	sys_ni_syscall
-456	common	kabi_reserved456	sys_ni_syscall
+456	common	vstream_manage	sys_vstream_manage
 457	common	kabi_reserved457	sys_ni_syscall
 458	common	kabi_reserved458	sys_ni_syscall
 459	common	kabi_reserved459	sys_ni_syscall
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index be5e2e747f50..c4e01e910ecd 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -11,6 +11,7 @@
  * thing regardless.
  */
 #define __X32_SYSCALL_BIT	0x40000000
+#define __IGNORE_kabi_reserved456
 
 #ifndef __KERNEL__
 # ifdef __i386__
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 36c592e43d65..119aabc72a2d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -74,6 +74,7 @@ struct landlock_ruleset_attr;
 enum landlock_rule_type;
 struct cachestat_range;
 struct cachestat;
+struct vstream_args;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -948,6 +949,7 @@ asmlinkage long sys_cachestat(unsigned int fd,
 		struct cachestat __user *cstat, unsigned int flags);
 asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
 
+asmlinkage long sys_vstream_manage(struct vstream_args __user *arg, int cmd);
 /*
  * Architecture-specific system calls
  */
diff --git a/include/linux/vstream.h b/include/linux/vstream.h
new file mode 100644
index 000000000000..627f754f83c5
--- /dev/null
+++ b/include/linux/vstream.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VSTREAM_H
+#define _LINUX_VSTREAM_H
+
+#include <uapi/linux/xcu_vstream.h>
+
+typedef int vstream_manage_t(struct vstream_args *arg);
+
+#endif /* _LINUX_VSTREAM_H */
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index bf2b30463784..9c7ef260593a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -830,8 +830,9 @@ __SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack)
 __SYSCALL(__NR_kabi_reserved454, sys_ni_syscall)
 #define __NR_kabi_reserved455 455
 __SYSCALL(__NR_kabi_reserved455, sys_ni_syscall)
-#define __NR_kabi_reserved456 456
-__SYSCALL(__NR_kabi_reserved456, sys_ni_syscall)
+#define __IGNORE_kabi_reserved456
+#define __NR_vstream_manage 456
+__SYSCALL(__NR_vstream_manage, sys_vstream_manage)
 #define __NR_kabi_reserved457 457
 __SYSCALL(__NR_kabi_reserved457, sys_ni_syscall)
 #define __NR_kabi_reserved458 458
diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h
new file mode 100644
index 000000000000..32c71dce5ad1
--- /dev/null
+++ b/include/uapi/linux/xcu_vstream.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_XCU_VSTREAM_H
+#define _UAPI_XCU_VSTREAM_H
+
+#include <linux/types.h>
+
+#define PAYLOAD_SIZE_MAX 512
+#define XCU_SQE_SIZE_MAX 64
+
+/*
+ * VSTREAM_ALLOC: alloc a vstream, buffer for tasks
+ * VSTREAM_FREE: free a vstream
+ * VSTREAM_KICK: there are tasks to be executed in the vstream
+ */
+typedef enum VSTREAM_COMMAND {
+	VSTREAM_ALLOC = 0,
+	VSTREAM_FREE,
+	VSTREAM_KICK,
+	MAX_COMMAND
+} vstream_command_t;
+
+typedef struct vstream_alloc_args {
+	__s32 type;
+	__u32 user_stream_id;
+} vstream_alloc_args_t;
+
+typedef struct vstream_free_args { } vstream_free_args_t;
+
+typedef struct vstream_kick_args {
+	__u32 sqe_num;
+	__s32 timeout;
+	__s8 sqe[XCU_SQE_SIZE_MAX];
+} vstream_kick_args_t;
+
+typedef struct vstream_args {
+	__u32 channel_id;
+	__u32 fd;
+	__u32 dev_id;
+	__u32 task_type;
+	__u32 sq_id;
+	__u32 cq_id;
+
+	/* Device related structures. */
+	union {
+		vstream_alloc_args_t va_args;
+		vstream_free_args_t vf_args;
+		vstream_kick_args_t vk_args;
+	};
+
+	__u32 payload_size;
+	__s8 payload[PAYLOAD_SIZE_MAX];
+} vstream_args_t;
+
+#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/init/Kconfig b/init/Kconfig
index 5af21834fbff..52290ec7c8db 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -478,6 +478,7 @@ source "kernel/irq/Kconfig"
 source "kernel/time/Kconfig"
 source "kernel/bpf/Kconfig"
 source "kernel/Kconfig.preempt"
+source "kernel/xsched/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 1fe46db40806..0baddecc349f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -50,6 +50,7 @@ obj-y += rcu/
 obj-y += livepatch/
 obj-y += dma/
 obj-y += entry/
+obj-y += xsched/
 obj-$(CONFIG_MODULES) += module/
 
 obj-$(CONFIG_KCMP) += kcmp.o
diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig
new file mode 100644
index 000000000000..47a5361f2ad3
--- /dev/null
+++ b/kernel/xsched/Kconfig
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config XCU_SCHEDULER
+    bool "Enable XSched functionality"
+    select CGROUP_XCU
+    default n
+    help
+      This option enables the XSched scheduler, a custom scheduling mechanism
+      designed for heterogeneous compute units (e.g., XPUs). It provides:
+      - Priority-based task scheduling with latency-sensitive optimizations.
+      - Integration with cgroups (via CGROUP_XCU) for resource isolation.
+
+      Enable this only if your system requires advanced scheduling for XPU workloads.
+      If unsure, say N.
+
+config XCU_VSTREAM
+    bool "Enable vstream SQ/CQ buffers maintaining for XPU"
+    default n
+    depends on XCU_SCHEDULER
+    help
+      This option enables virtual stream (vstream) support for XPUs, managing
+      submission queues (SQ) and completion queues (CQ) in kernel space. Key features:
+      - Zero-copy buffer management between user and kernel space.
+      - Batch processing of XPU commands to reduce MMIO overhead.
+
+      Requires XCU_SCHEDULER to be enabled. May increase kernel memory usage.
+      Recommended for high-throughput XPU workloads. If unsure, say N.
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
new file mode 100644
index 000000000000..e972cd93b607
--- /dev/null
+++ b/kernel/xsched/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += vstream.o
diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c
new file mode 100644
index 000000000000..5723c359e0f2
--- /dev/null
+++ b/kernel/xsched/vstream.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Vstream manage for XPU device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/syscalls.h>
+#include <linux/vstream.h>
+
+#ifdef CONFIG_XCU_VSTREAM
+
+int vstream_alloc(struct vstream_args *arg)
+{
+	return 0;
+}
+
+int vstream_free(struct vstream_args *arg)
+{
+	return 0;
+}
+
+int vstream_kick(struct vstream_args *arg)
+{
+	return 0;
+}
+
+/*
+ * vstream_manage_cmd table
+ */
+static vstream_manage_t(*vstream_command_table[MAX_COMMAND + 1]) = {
+	vstream_alloc, // VSTREAM_ALLOC
+	vstream_free, // VSTREAM_FREE
+	vstream_kick, // VSTREAM_KICK
+	NULL // MAX_COMMAND
+};
+
+SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd)
+{
+	int res = 0;
+	struct vstream_args vstream_arg;
+
+	if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) {
+		pr_err("copy_from_user failed\n");
+		return -EFAULT;
+	}
+
+	res = vstream_command_table[cmd](&vstream_arg);
+	if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) {
+		pr_err("copy_to_user failed\n");
+		return -EFAULT;
+	}
+
+	pr_debug("vstream_manage: cmd %d\n", cmd);
+	return res;
+}
+#else
+SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd)
+{
+	return 0;
+}
+#endif
-- 
Gitee


From b4c74a4066ecfae462a4466aa33b5f93b59eb7d0 Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:22 +0000
Subject: [PATCH 02/16] xcu: Add base NPU driver support

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add base xcu_group structure, xcu_type enum, xcu_operation struct
Add build support in Makefiles.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 drivers/Makefile          |  1 +
 drivers/xcu/Makefile      |  2 +
 drivers/xcu/xcu_group.c   | 79 +++++++++++++++++++++++++++++++++++++++
 include/linux/xcu_group.h | 56 +++++++++++++++++++++++++++
 4 files changed, 138 insertions(+)
 create mode 100644 drivers/xcu/Makefile
 create mode 100644 drivers/xcu/xcu_group.c
 create mode 100644 include/linux/xcu_group.h

diff --git a/drivers/Makefile b/drivers/Makefile
index 3955e605df14..b06192df4c3c 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -195,6 +195,7 @@ obj-$(CONFIG_GNSS)		+= gnss/
 obj-$(CONFIG_INTERCONNECT)	+= interconnect/
 obj-$(CONFIG_COUNTER)		+= counter/
 obj-$(CONFIG_MOST)		+= most/
+obj-$(CONFIG_XCU_SCHEDULER)	+= xcu/
 obj-$(CONFIG_PECI)		+= peci/
 obj-$(CONFIG_HTE)		+= hte/
 obj-$(CONFIG_DRM_ACCEL)		+= accel/
diff --git a/drivers/xcu/Makefile b/drivers/xcu/Makefile
new file mode 100644
index 000000000000..575115b148ec
--- /dev/null
+++ b/drivers/xcu/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_XCU_SCHEDULER)	+= xcu_group.o
diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c
new file mode 100644
index 000000000000..11bf0e54aaaa
--- /dev/null
+++ b/drivers/xcu/xcu_group.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Code for NPU driver support
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/xcu_group.h>
+
+/* This function runs "run" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object
+ */
+int xcu_run(struct xcu_op_handler_params *params)
+{
+	return 0;
+}
+
+/* This function runs "wait" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object
+ */
+int xcu_wait(struct xcu_op_handler_params *params)
+{
+	return 0;
+}
+
+/* This function runs "complete" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object.
+ */
+int xcu_complete(struct xcu_op_handler_params *params)
+{
+	return 0;
+}
+
+/* This function runs "finish" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object.
+ *
+ * This handler provides an interface to implement deallocation
+ * and freeing memory for SQ and CQ buffers.
+ */
+int xcu_finish(struct xcu_op_handler_params *params)
+{
+	return 0;
+}
+
+/* This function runs a "alloc" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object.
+ *
+ * This handler provides an interface to implement allocation
+ * and registering memory for SQ and CQ buffers.
+ */
+int xcu_alloc(struct xcu_op_handler_params *params)
+{
+	return 0;
+}
+
+static struct xcu_group __xcu_group_root = {
+	.id = 0,
+	.type = XCU_TYPE_ROOT,
+	.next_layer = IDR_INIT(next_layer),
+};
+
+struct xcu_group *xcu_group_root = &__xcu_group_root;
+EXPORT_SYMBOL(xcu_group_root);
diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h
new file mode 100644
index 000000000000..d1a6af4c8c47
--- /dev/null
+++ b/include/linux/xcu_group.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __XSCHED_XCU_GROUP_H__
+#define __XSCHED_XCU_GROUP_H__
+
+#include <linux/idr.h>
+#include <uapi/linux/xcu_vstream.h>
+
+extern struct xcu_group *xcu_group_root;
+
+enum xcu_type {
+	XCU_TYPE_ROOT,
+	XCU_TYPE_XPU,
+};
+
+struct xcu_op_handler_params {
+};
+
+typedef int (*xcu_op_handler_fn_t)(struct xcu_op_handler_params *params);
+
+struct xcu_operation {
+	xcu_op_handler_fn_t run;
+	xcu_op_handler_fn_t finish;
+	xcu_op_handler_fn_t wait;
+	xcu_op_handler_fn_t complete;
+	xcu_op_handler_fn_t alloc;
+};
+
+struct xcu_group {
+	/* sq id. */
+	uint32_t id;
+
+	/* Type of XCU group. */
+	enum xcu_type type;
+
+	/* IDR for the next layer of XCU group tree. */
+	struct idr next_layer;
+};
+
+#ifdef CONFIG_XCU_SCHEDULER
+int xcu_group_attach(struct xcu_group *new_group,
+		     struct xcu_group *previous_group);
+void xcu_group_detach(struct xcu_group *group);
+struct xcu_group *xcu_group_find(struct xcu_group *group, int id);
+struct xcu_group *xcu_group_init(int id);
+void xcu_group_free(struct xcu_group *group);
+
+extern int xcu_run(struct xcu_op_handler_params *params);
+extern int xcu_wait(struct xcu_op_handler_params *params);
+extern int xcu_complete(struct xcu_op_handler_params *params);
+extern int xcu_finish(struct xcu_op_handler_params *params);
+extern int xcu_alloc(struct xcu_op_handler_params *params);
+extern int xcu_logic_alloc(struct xcu_op_handler_params *params);
+extern int xcu_logic_free(struct xcu_op_handler_params *params);
+#endif /* !CONFIG_XCU_SCHEDULER */
+
+#endif /* __XSCHED_XCU_GROUP_H__ */
-- 
Gitee


From e86ad9a238e46178861f39773a914c869a88ee6c Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:23 +0000
Subject: [PATCH 03/16] xsched: Add debug prints in XSched mechanism

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add xsched.h with xsched related prints. Add XSCHED_DEBUG_PRINTS
configuration to switch_on/off xsched prints.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xsched.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 include/linux/xsched.h

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
new file mode 100644
index 000000000000..628a2201b3d8
--- /dev/null
+++ b/include/linux/xsched.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_XSCHED_H__
+#define __LINUX_XSCHED_H__
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define XSCHED_ERR_PREFIX "XSched [ERROR]: "
+#define XSCHED_ERR(fmt, ...)                                                   \
+	pr_err(pr_fmt(XSCHED_ERR_PREFIX fmt), ##__VA_ARGS__)
+
+#define XSCHED_WARN_PREFIX "XSched [WARNING]: "
+#define XSCHED_WARN(fmt, ...)                                                  \
+	pr_warn(pr_fmt(XSCHED_WARN_PREFIX fmt), ##__VA_ARGS__)
+
+/*
+ * Debug specific prints for XSched
+ */
+
+#define XSCHED_DEBUG_PREFIX "XSched [DEBUG]: "
+#define XSCHED_DEBUG(fmt, ...)                                                 \
+	pr_debug(pr_fmt(XSCHED_DEBUG_PREFIX fmt), ##__VA_ARGS__)
+
+#define XSCHED_CALL_STUB()                                                     \
+	XSCHED_DEBUG(" -----* %s @ %s called *-----\n", __func__, __FILE__)
+
+#define XSCHED_EXIT_STUB()                                                     \
+	XSCHED_DEBUG(" -----* %s @ %s exited *-----\n", __func__, __FILE__)
+
+#endif /* !__LINUX_XSCHED_H__ */
-- 
Gitee


From f33c254fc2ce27e984345a28c34209ae6f406369 Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:24 +0000
Subject: [PATCH 04/16] xsched: Add XCU initialization support

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add xcu group alloc/find/attach/detach funcs implementation.
Add xsched_cu data structures, all related enumerators.
Add xsched_xcu_register() for driver to register xcu.
Add XSCHED_NR_CUS config parameters.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 drivers/xcu/xcu_group.c   |  99 ++++++++++++++++++++++++++++++++++
 include/linux/xcu_group.h |  21 ++++++++
 include/linux/xsched.h    |  40 ++++++++++++++
 kernel/xsched/Kconfig     |   9 ++++
 kernel/xsched/Makefile    |   1 +
 kernel/xsched/core.c      | 110 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 280 insertions(+)
 create mode 100644 kernel/xsched/core.c

diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c
index 11bf0e54aaaa..fef05b9787f3 100644
--- a/drivers/xcu/xcu_group.c
+++ b/drivers/xcu/xcu_group.c
@@ -16,7 +16,106 @@
  * more details.
  *
  */
+#include <linux/rwsem.h>
+#include <linux/slab.h>
 #include <linux/xcu_group.h>
+#include <linux/xsched.h>
+
+static DECLARE_RWSEM(xcu_group_rwsem);
+
+struct xcu_group *xcu_group_init(int id)
+{
+	struct xcu_group *node = kzalloc(sizeof(*node), GFP_KERNEL);
+
+	if (!node)
+		return NULL;
+
+	node->id = id;
+	node->type = XCU_TYPE_XPU;
+	idr_init(&node->next_layer);
+
+	return node;
+}
+EXPORT_SYMBOL(xcu_group_init);
+
+int __xcu_group_attach(struct xcu_group *new_group,
+				struct xcu_group *previous_group)
+{
+	int id = new_group->id;
+
+	if (id == -1)
+		id = idr_alloc(&previous_group->next_layer, new_group, 0,
+			       INT_MAX, GFP_KERNEL);
+	else
+		id = idr_alloc(&previous_group->next_layer, new_group, id,
+			       id + 1, GFP_KERNEL);
+
+	if (id < 0) {
+		XSCHED_ERR("Fail to attach xcu_group: id conflict @ %s\n",
+			   __func__);
+		return -EEXIST;
+	}
+	new_group->id = id;
+	new_group->previous_layer = previous_group;
+
+	return 0;
+}
+
+int xcu_group_attach(struct xcu_group *new_group,
+				struct xcu_group *previous_group)
+{
+	int ret;
+
+	down_write(&xcu_group_rwsem);
+	ret = __xcu_group_attach(new_group, previous_group);
+	up_write(&xcu_group_rwsem);
+
+	return ret;
+}
+EXPORT_SYMBOL(xcu_group_attach);
+
+static inline void __xcu_group_detach(struct xcu_group *group)
+{
+	if (!group || !group->previous_layer)
+		return;
+
+	idr_remove(&group->previous_layer->next_layer, group->id);
+	group->previous_layer = NULL;
+}
+
+void xcu_group_detach(struct xcu_group *group)
+{
+	down_write(&xcu_group_rwsem);
+	__xcu_group_detach(group);
+	up_write(&xcu_group_rwsem);
+}
+EXPORT_SYMBOL(xcu_group_detach);
+
+void xcu_group_free(struct xcu_group *group)
+{
+	idr_destroy(&group->next_layer);
+	if (group != xcu_group_root)
+		kfree(group);
+}
+EXPORT_SYMBOL(xcu_group_free);
+
+static struct xcu_group *__xcu_group_find_nolock(struct xcu_group *group,
+						int id)
+{
+	return idr_find(&group->next_layer, id);
+}
+
+struct xcu_group *xcu_group_find(struct xcu_group *group, int id)
+{
+	struct xcu_group *result;
+
+	down_read(&xcu_group_rwsem);
+	result = __xcu_group_find_nolock(group, id);
+	up_read(&xcu_group_rwsem);
+
+	return result;
+}
+EXPORT_SYMBOL(xcu_group_find);
 
 /* This function runs "run" callback for a given xcu_group
  * and a given vstream that are passed within
diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h
index d1a6af4c8c47..ef11886c18a1 100644
--- a/include/linux/xcu_group.h
+++ b/include/linux/xcu_group.h
@@ -5,6 +5,11 @@
 #include <linux/idr.h>
 #include <uapi/linux/xcu_vstream.h>
 
+#ifndef CONFIG_XSCHED_NR_CUS
+#define CONFIG_XSCHED_NR_CUS 1
+#endif /* !CONFIG_XSCHED_NR_CUS */
+#define XSCHED_NR_CUS CONFIG_XSCHED_NR_CUS
+
 extern struct xcu_group *xcu_group_root;
 
 enum xcu_type {
@@ -34,6 +39,22 @@ struct xcu_group {
 
 	/* IDR for the next layer of XCU group tree. */
 	struct idr next_layer;
+
+	/* Pointer to the previous XCU group in the XCU group tree. */
+	struct xcu_group *previous_layer;
+
+	/* Pointer to operation fn pointers object describing
+	 * this XCU group's callbacks.
+	 */
+	struct xcu_operation *opt;
+
+	/* Pointer to the XCU related to this XCU group. */
+	struct xsched_cu *xcu;
+
+	/* Mask of XCU ids associated with this XCU group
+	 * and this group's children's XCUs.
+	 */
+	DECLARE_BITMAP(xcu_mask, XSCHED_NR_CUS);
 };
 
 #ifdef CONFIG_XCU_SCHEDULER
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 628a2201b3d8..011f0e9bc227 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_XSCHED_H__
 #define __LINUX_XSCHED_H__
 
+#include <linux/xcu_group.h>
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
@@ -28,4 +29,43 @@
 #define XSCHED_EXIT_STUB()                                                     \
 	XSCHED_DEBUG(" -----* %s @ %s exited *-----\n", __func__, __FILE__)
 
+enum xcu_state {
+	XCU_INACTIVE,
+	XCU_IDLE,
+	XCU_BUSY,
+	XCU_SUBMIT,
+};
+
+enum xsched_cu_status {
+	/* Worker not initialized. */
+	XSCHED_XCU_NONE,
+
+	/* Worker is sleeping in idle state. */
+	XSCHED_XCU_WAIT_IDLE,
+
+	/* Worker is sleeping in running state. */
+	XSCHED_XCU_WAIT_RUNNING,
+
+	/* Worker is active but not processing anything. */
+	XSCHED_XCU_ACTIVE,
+
+	NR_XSCHED_XCU_STATUS,
+};
+
+/* This is the abstraction object of the xcu computing unit. */
+struct xsched_cu {
+	uint32_t id;
+	uint32_t state;
+
+	struct task_struct *worker;
+
+	struct xcu_group *group;
+
+	struct mutex xcu_lock;
+
+	wait_queue_head_t wq_xcu_idle;
+	wait_queue_head_t wq_xcu_running;
+};
+
+int xsched_xcu_register(struct xcu_group *group);
 #endif /* !__LINUX_XSCHED_H__ */
diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig
index 47a5361f2ad3..fc5d7767d8e0 100644
--- a/kernel/xsched/Kconfig
+++ b/kernel/xsched/Kconfig
@@ -25,3 +25,12 @@ config XCU_VSTREAM
 
       Requires XCU_SCHEDULER to be enabled. May increase kernel memory usage.
       Recommended for high-throughput XPU workloads. If unsure, say N.
+
+config XSCHED_NR_CUS
+    int "Number of CUs (a.k.a. XCUs) available to XSched mechanism"
+    default 8
+    depends on XCU_SCHEDULER
+    help
+      This option defines the maximum number of Compute Units (CUs) that can be
+      managed by the XSched scheduler, consider changing this value proportionally
+      to the number of available XCU cores.
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
index e972cd93b607..62e58e4151b0 100644
--- a/kernel/xsched/Makefile
+++ b/kernel/xsched/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vstream.o
+obj-$(CONFIG_XCU_SCHEDULER) += core.o
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
new file mode 100644
index 000000000000..bbd125044c88
--- /dev/null
+++ b/kernel/xsched/core.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Core kernel scheduler code for XPU device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/xsched.h>
+#include <uapi/linux/sched/types.h>
+
+int num_active_xcu;
+spinlock_t xcu_mgr_lock;
+
+/* Xsched XCU array and bitmask that represents which XCUs
+ * are present and online.
+ */
+DECLARE_BITMAP(xcu_online_mask, XSCHED_NR_CUS);
+struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS];
+
+static int xsched_schedule(void *input_xcu)
+{
+	return 0;
+}
+
+/* Initializes all xsched XCU objects.
+ * Should only be called from xsched_xcu_register function.
+ */
+static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
+			    int xcu_id)
+{
+	bitmap_clear(xcu_group_root->xcu_mask, 0, XSCHED_NR_CUS);
+
+	xcu->id = xcu_id;
+	xcu->state = XSCHED_XCU_NONE;
+	xcu->group = group;
+
+	mutex_init(&xcu->xcu_lock);
+
+	/* Mark current XCU in a mask inside XCU root group. */
+	set_bit(xcu->id, xcu_group_root->xcu_mask);
+
+	/* This worker should set XCU to XSCHED_XCU_WAIT_IDLE.
+	 * If after initialization XCU still has XSCHED_XCU_NONE
+	 * status then we can assume that there was a problem
+	 * with XCU kthread job.
+	 */
+	xcu->worker = kthread_run(xsched_schedule, xcu, "xcu_%u", xcu->id);
+}
+
+/* Allocates xcu id in xcu_manager array. */
+static int alloc_xcu_id(void)
+{
+	int xcu_id = -1;
+
+	spin_lock(&xcu_mgr_lock);
+	if (num_active_xcu >= XSCHED_NR_CUS)
+		goto out_unlock;
+
+	xcu_id = num_active_xcu;
+	num_active_xcu++;
+	XSCHED_DEBUG("Number of active xcu: %d.\n", num_active_xcu);
+
+out_unlock:
+	spin_unlock(&xcu_mgr_lock);
+	return xcu_id;
+}
+
+/*
+ * Initialize and register xcu in xcu_manager array.
+ */
+int xsched_xcu_register(struct xcu_group *group)
+{
+	int xcu_id;
+	struct xsched_cu *xcu;
+
+	xcu_id = alloc_xcu_id();
+	if (xcu_id < 0) {
+		XSCHED_ERR("Fail to alloc xcu id.\n");
+		return -ENOSPC;
+	};
+
+	xcu = kzalloc(sizeof(struct xsched_cu), GFP_KERNEL);
+	if (!xcu) {
+		XSCHED_ERR("Fail to alloc xcu.\n");
+		return -ENOMEM;
+	};
+
+	group->xcu = xcu;
+	xsched_cu_mgr[xcu_id] = xcu;
+
+	/* Init xcu's internals. */
+	xsched_xcu_init(xcu, group, xcu_id);
+	return 0;
+}
+EXPORT_SYMBOL(xsched_xcu_register);
-- 
Gitee


From 0d273bb1400bc9ac0a7a5a1d6f0d85d057f60d11 Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:25 +0000
Subject: [PATCH 05/16] xsched: Introduce vstream management

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add vstream related data structures:
 - vstream_info.

Add vstream related context and entity data structures:
 - xsched_entity
 - xsched_context

Add xsched_init() implementation.
Add vstream_alloc/free implementation.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Artem Kuzin <artem.kuzin@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 drivers/xcu/xcu_group.c   |  48 ++++-
 include/linux/vstream.h   |  46 +++++
 include/linux/xcu_group.h |  16 ++
 include/linux/xsched.h    |  74 ++++++++
 kernel/xsched/core.c      |  93 ++++++++++
 kernel/xsched/vstream.c   | 374 +++++++++++++++++++++++++++++++++++++-
 6 files changed, 647 insertions(+), 4 deletions(-)

diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c
index fef05b9787f3..86b935af00b6 100644
--- a/drivers/xcu/xcu_group.c
+++ b/drivers/xcu/xcu_group.c
@@ -153,7 +153,12 @@ int xcu_complete(struct xcu_op_handler_params *params)
  */
 int xcu_finish(struct xcu_op_handler_params *params)
 {
-	return 0;
+	if (!params->group->opt || !params->group->opt->finish) {
+		XSCHED_ERR("No function [finish] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->finish(params);
 }
 
 /* This function runs a "alloc" callback for a given xcu_group
@@ -165,7 +170,46 @@ int xcu_finish(struct xcu_op_handler_params *params)
  */
 int xcu_alloc(struct xcu_op_handler_params *params)
 {
-	return 0;
+	if (!params->group->opt || !params->group->opt->alloc) {
+		XSCHED_ERR("No function [alloc] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->alloc(params);
+}
+
+/* This function runs a "logic_alloc" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object.
+ *
+ * This handler provides an interface to implement allocation
+ * and registering memory of logic CQ buffer.
+ */
+int xcu_logic_alloc(struct xcu_op_handler_params *params)
+{
+	if (!params->group->opt || !params->group->opt->logic_alloc) {
+		XSCHED_ERR("No function [logic_alloc] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->logic_alloc(params);
+}
+
+/* This function runs a "logic_free" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object.
+ *
+ * This handler provides an interface to implement deallocation
+ * and unregistering memory of a logic CQ buffer.
+ */
+int xcu_logic_free(struct xcu_op_handler_params *params)
+{
+	if (!params->group->opt || !params->group->opt->logic_free) {
+		XSCHED_ERR("No function [logic_free] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->logic_free(params);
 }
 
 static struct xcu_group __xcu_group_root = {
diff --git a/include/linux/vstream.h b/include/linux/vstream.h
index 627f754f83c5..ca956ac2cf4f 100644
--- a/include/linux/vstream.h
+++ b/include/linux/vstream.h
@@ -4,6 +4,52 @@
 
 #include <uapi/linux/xcu_vstream.h>
 
+typedef struct vstream_info {
+	uint32_t user_stream_id;
+	uint32_t id;
+	uint32_t vcq_id;
+	uint32_t logic_vcq_id;
+	uint32_t dev_id;
+	uint32_t channel_id;
+	uint32_t fd;
+	uint32_t task_type;
+	int tgid;
+	int sqcq_type;
+
+	void *drv_ctx;
+
+	int inode_fd;
+
+	/* Pointer to corresponding context. */
+	struct xsched_context *ctx;
+
+	/* List node in context's vstream list. */
+	struct list_head ctx_node;
+
+	/* Pointer to an CU object on which this
+	 * vstream is currently being processed.
+	 * NULL if vstream is not being processed.
+	 */
+	struct xsched_cu *xcu;
+
+	/* List node in an CU list of vstreams that
+	 * are currently being processed by this specific CU.
+	 */
+	struct list_head xcu_node;
+
+	/* Private vstream data. */
+	void *data;
+
+	spinlock_t stream_lock;
+
+	uint32_t kicks_count;
+
+	/* List of metadata a.k.a. all recorded unprocesed
+	 * kicks for this exact vstream.
+	 */
+	struct list_head metadata_list;
+} vstream_info_t;
+
 typedef int vstream_manage_t(struct vstream_args *arg);
 
 #endif /* _LINUX_VSTREAM_H */
diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h
index ef11886c18a1..c0168969c67a 100644
--- a/include/linux/xcu_group.h
+++ b/include/linux/xcu_group.h
@@ -18,6 +18,20 @@ enum xcu_type {
 };
 
 struct xcu_op_handler_params {
+	int fd;
+	struct xcu_group *group;
+	void *payload;
+	union {
+		struct {
+			void *param_1;
+			void *param_2;
+			void *param_3;
+			void *param_4;
+			void *param_5;
+			void *param_6;
+			void *param_7;
+		};
+	};
 };
 
 typedef int (*xcu_op_handler_fn_t)(struct xcu_op_handler_params *params);
@@ -28,6 +42,8 @@ struct xcu_operation {
 	xcu_op_handler_fn_t wait;
 	xcu_op_handler_fn_t complete;
 	xcu_op_handler_fn_t alloc;
+	xcu_op_handler_fn_t logic_alloc;
+	xcu_op_handler_fn_t logic_free;
 };
 
 struct xcu_group {
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 011f0e9bc227..702483b85621 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -3,6 +3,8 @@
 #define __LINUX_XSCHED_H__
 
 #include <linux/xcu_group.h>
+#include <linux/kref.h>
+#include <linux/vstream.h>
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
@@ -67,5 +69,77 @@ struct xsched_cu {
 	wait_queue_head_t wq_xcu_running;
 };
 
+struct xsched_entity {
+	uint32_t task_type;
+
+	bool on_rq;
+
+	pid_t owner_pid;
+	pid_t tgid;
+
+	/* File descriptor coming from an associated context
+	 * used for identifying a given xsched entity in
+	 * info and error prints.
+	 */
+	uint32_t fd;
+
+	/* Xsched class for this xse. */
+	const struct xsched_class *class;
+
+	/* Pointer to context object. */
+	struct xsched_context *ctx;
+
+	/* Pointer to an XCU object that represents an XCU
+	 * on which this xse is to be processed or is being
+	 * processed currently.
+	 */
+	struct xsched_cu *xcu;
+
+	/* General purpose xse lock. */
+	spinlock_t xse_lock;
+};
+
+struct xsched_context {
+	uint32_t fd;
+	uint32_t dev_id;
+	pid_t tgid;
+
+	struct list_head vstream_list;
+	struct list_head ctx_node;
+
+	struct xsched_entity xse;
+
+	spinlock_t ctx_lock;
+	struct mutex ctx_mutex;
+	struct kref kref;
+};
+
+extern struct list_head xsched_ctx_list;
+extern struct mutex xsched_ctx_list_mutex;
+
+/* Returns a pointer to xsched_context object corresponding to a given
+ * device file descriptor provided by fd argument.
+ */
+static inline struct xsched_context *ctx_find_by_tgid(pid_t tgid)
+{
+	struct xsched_context *ctx;
+	struct xsched_context *ret = NULL;
+
+	list_for_each_entry(ctx, &xsched_ctx_list, ctx_node) {
+		if (ctx->tgid == tgid) {
+			ret = ctx;
+			break;
+		}
+	}
+
+	return ret;
+}
+
 int xsched_xcu_register(struct xcu_group *group);
+void xsched_task_free(struct kref *kref);
+int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs);
+int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx);
+int vstream_bind_to_xcu(vstream_info_t *vstream_info);
+struct xsched_cu *xcu_find(uint32_t *type,
+				uint32_t dev_id, uint32_t channel_id);
 #endif /* !__LINUX_XSCHED_H__ */
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index bbd125044c88..018c73101a70 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -32,6 +32,89 @@ spinlock_t xcu_mgr_lock;
 DECLARE_BITMAP(xcu_online_mask, XSCHED_NR_CUS);
 struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS];
 
+/* Storage list for contexts. */
+struct list_head xsched_ctx_list;
+DEFINE_MUTEX(xsched_ctx_list_mutex);
+
+/* Frees a given vstream and also frees and dequeues it's context
+ * if a given vstream is the last and only vstream attached to it's
+ * corresponding context object.
+ */
+void xsched_task_free(struct kref *kref)
+{
+	struct xsched_context *ctx;
+	vstream_info_t *vs, *tmp;
+
+	ctx = container_of(kref, struct xsched_context, kref);
+
+	mutex_lock(&xsched_ctx_list_mutex);
+	list_for_each_entry_safe(vs, tmp, &ctx->vstream_list, ctx_node) {
+		list_del(&vs->ctx_node);
+		kfree(vs->data);
+		kfree(vs);
+	}
+
+	list_del(&ctx->ctx_node);
+	mutex_unlock(&xsched_ctx_list_mutex);
+
+	kfree(ctx);
+}
+
+int vstream_bind_to_xcu(vstream_info_t *vstream_info)
+{
+	struct xsched_cu *xcu_found = NULL;
+	uint32_t type = XCU_TYPE_XPU;
+
+	xcu_found = xcu_find(&type, vstream_info->dev_id, vstream_info->channel_id);
+	if (!xcu_found)
+		return -EINVAL;
+
+	/* Bind vstream to a xcu. */
+	vstream_info->xcu = xcu_found;
+	XSCHED_DEBUG("XCU bound to a vstream: type=%u, dev_id=%u, chan_id=%u.\n",
+		type, vstream_info->dev_id, vstream_info->channel_id);
+
+	return 0;
+}
+
+struct xsched_cu *xcu_find(uint32_t *type,
+				uint32_t dev_id, uint32_t channel_id)
+{
+	struct xcu_group *group = NULL;
+	uint32_t local_type = *type;
+
+	/* Find xcu by type. */
+	group = xcu_group_find(xcu_group_root, local_type);
+	if (group == NULL) {
+		XSCHED_ERR("Fail to find type group.\n");
+		return NULL;
+	}
+
+	/* Find device id group. */
+	group = xcu_group_find(group, dev_id);
+	if (group == NULL) {
+		XSCHED_ERR("Fail to find device group.\n");
+		return NULL;
+	}
+	/* Find channel id group. */
+	group = xcu_group_find(group, channel_id);
+	if (group == NULL) {
+		XSCHED_ERR("Fail to find channel group.\n");
+		return NULL;
+	}
+
+	*type = local_type;
+	XSCHED_DEBUG("XCU found: type=%u, dev_id=%u, chan_id=%u.\n",
+		local_type, dev_id, channel_id);
+
+	return group->xcu;
+}
+
+int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
+{
+	return 0;
+}
+
 static int xsched_schedule(void *input_xcu)
 {
 	return 0;
@@ -108,3 +191,13 @@ int xsched_xcu_register(struct xcu_group *group)
 	return 0;
 }
 EXPORT_SYMBOL(xsched_xcu_register);
+
+int __init xsched_init(void)
+{
+	/* Initializing global Xsched context list. */
+	INIT_LIST_HEAD(&xsched_ctx_list);
+
+	return 0;
+}
+
+late_initcall(xsched_init);
diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c
index 5723c359e0f2..127ff96ce48c 100644
--- a/kernel/xsched/vstream.c
+++ b/kernel/xsched/vstream.c
@@ -17,20 +17,390 @@
  *
  */
 #include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
 #include <linux/vstream.h>
+#include <linux/xsched.h>
+#include <linux/delay.h>
 
 #ifdef CONFIG_XCU_VSTREAM
+#define MAX_VSTREAM_NUM 512
 
-int vstream_alloc(struct vstream_args *arg)
+static DEFINE_MUTEX(vs_mutex);
+static vstream_info_t *vstream_array[MAX_VSTREAM_NUM];
+
+static int vstream_del(uint32_t vstream_id);
+static int vstream_file_release(struct inode *inode, struct file *file);
+static const struct file_operations vstreamfd_fops = {
+	.release = vstream_file_release,
+};
+
+static inline struct file *vstream_file_get(int vs_fd)
+{
+	return fget(vs_fd);
+}
+
+static inline void vstream_file_put(struct file *vstream_file)
+{
+	fput(vstream_file);
+}
+
+static int vstream_file_create(struct vstream_info *vs)
+{
+	int err = anon_inode_getfd("[vstreamfd]",
+		&vstreamfd_fops, vs, O_RDWR | O_CLOEXEC | O_NONBLOCK);
+	if (err < 0)
+		XSCHED_ERR("Fail to alloc anon inode vs %u @ %s\n",
+			vs->id, __func__);
+
+	return err;
+}
+
+static int vstream_destroy(vstream_info_t *vstream)
 {
+	int err;
+	struct xsched_context *ctx = NULL;
+	struct xsched_entity *xse = NULL;
+
+	err = vstream_del(vstream->id);
+	if (err)
+		return err;
+
+	xse = &vstream->ctx->xse;
+	ctx = vstream->ctx;
+	kref_put(&ctx->kref, xsched_task_free);
+
 	return 0;
 }
 
-int vstream_free(struct vstream_args *arg)
+static int vstream_file_release(struct inode *inode, struct file *file)
+{
+	vstream_info_t *vstream;
+	(void) inode;
+
+	if (!file->private_data)
+		return 0;
+
+	vstream = file->private_data;
+	return vstream_destroy(vstream);
+}
+
+static void init_xsched_ctx(struct xsched_context *ctx,
+				const struct vstream_info *vs)
 {
+	ctx->tgid = vs->tgid;
+	ctx->fd = vs->fd;
+	ctx->dev_id = vs->dev_id;
+	kref_init(&ctx->kref);
+
+	INIT_LIST_HEAD(&ctx->vstream_list);
+	INIT_LIST_HEAD(&ctx->ctx_node);
+
+	spin_lock_init(&ctx->ctx_lock);
+	mutex_init(&ctx->ctx_mutex);
+}
+
+/* Allocates a new xsched_context if a new vstream_info is bound
+ * to a device that no other vstream that is currently present
+ * is bound to.
+ */
+static int alloc_ctx_from_vstream(struct vstream_info *vstream_info,
+				struct xsched_context **ctx)
+{
+	*ctx = ctx_find_by_tgid(vstream_info->tgid);
+	if (*ctx)
+		return 0;
+
+	*ctx = kzalloc(sizeof(struct xsched_context), GFP_KERNEL);
+	if (!*ctx) {
+		XSCHED_ERR("Fail to alloc xsched context (tgid=%d) @ %s\n",
+			vstream_info->tgid, __func__);
+		return -ENOMEM;
+	}
+
+	init_xsched_ctx(*ctx, vstream_info);
+
+	if (xsched_ctx_init_xse(*ctx, vstream_info) != 0) {
+		XSCHED_ERR("Fail to initialize XSE for context @ %s\n",
+			__func__);
+		kfree(*ctx);
+		return -EINVAL;
+	}
+
+	list_add(&(*ctx)->ctx_node, &xsched_ctx_list);
+
+	return 0;
+}
+
+/* Bounds a new vstream_info object to a corresponding xsched context. */
+static int vstream_bind_to_ctx(struct vstream_info *vs)
+{
+	struct xsched_context *ctx = NULL;
+	int alloc_err = 0;
+
+	mutex_lock(&xsched_ctx_list_mutex);
+	ctx = ctx_find_by_tgid(vs->tgid);
+	if (ctx) {
+		XSCHED_DEBUG("Ctx %d found @ %s\n", vs->tgid, __func__);
+		kref_get(&ctx->kref);
+	} else {
+		alloc_err = alloc_ctx_from_vstream(vs, &ctx);
+		if (alloc_err)
+			goto out_err;
+	}
+
+	vs->ctx = ctx;
+	vs->xcu = ctx->xse.xcu;
+	ctx->dev_id = vs->dev_id;
+	list_add(&vs->ctx_node, &vs->ctx->vstream_list);
+
+out_err:
+	mutex_unlock(&xsched_ctx_list_mutex);
+	return alloc_err;
+}
+
+static vstream_info_t *vstream_create(struct vstream_args *arg)
+{
+	struct vstream_info *vstream = NULL;
+
+	vstream = kzalloc(sizeof(vstream_info_t), GFP_KERNEL);
+	if (!vstream) {
+		XSCHED_ERR("Failed to allocate vstream.\n");
+		return NULL;
+	}
+
+	vstream->inode_fd = vstream_file_create(vstream);
+	vstream->dev_id = arg->dev_id;
+	vstream->channel_id = arg->channel_id;
+	vstream->kicks_count = 0;
+	vstream->xcu = NULL;
+
+	INIT_LIST_HEAD(&vstream->ctx_node);
+	INIT_LIST_HEAD(&vstream->xcu_node);
+	INIT_LIST_HEAD(&vstream->metadata_list);
+
+	spin_lock_init(&vstream->stream_lock);
+
+	return vstream;
+}
+
+static int vstream_add(vstream_info_t *vstream, uint32_t id)
+{
+	if (id >= MAX_VSTREAM_NUM) {
+		XSCHED_ERR("vstream id out of range.\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&vs_mutex);
+	if (vstream_array[id] != NULL) {
+		mutex_unlock(&vs_mutex);
+		XSCHED_ERR("Vstream id=%u cell is busy.\n", id);
+		return -EINVAL;
+	}
+	vstream_array[id] = vstream;
+	mutex_unlock(&vs_mutex);
+
 	return 0;
 }
 
+static int vstream_del(uint32_t vstream_id)
+{
+	if (vstream_id >= MAX_VSTREAM_NUM) {
+		XSCHED_ERR("Vstream id=%u out of range.\n", vstream_id);
+		return -EINVAL;
+	}
+
+	mutex_lock(&vs_mutex);
+	vstream_array[vstream_id] = NULL;
+	mutex_unlock(&vs_mutex);
+	return 0;
+}
+
+static vstream_info_t *vstream_get(uint32_t vstream_id)
+{
+	vstream_info_t *vstream = NULL;
+
+	if (vstream_id >= MAX_VSTREAM_NUM) {
+		XSCHED_ERR("Vstream id=%u out of range.\n", vstream_id);
+		return NULL;
+	}
+
+	mutex_lock(&vs_mutex);
+	vstream = vstream_array[vstream_id];
+	mutex_unlock(&vs_mutex);
+
+	return vstream;
+}
+
+static vstream_info_t *
+vstream_get_by_user_stream_id(uint32_t user_stream_id)
+{
+	int id;
+
+	for (id = 0; id < MAX_VSTREAM_NUM; id++) {
+		if (vstream_array[id] != NULL &&
+			vstream_array[id]->user_stream_id == user_stream_id)
+			return vstream_array[id];
+	}
+	return NULL;
+}
+
+static int sqcq_alloc(struct vstream_args *arg)
+{
+	vstream_alloc_args_t *va_args = &arg->va_args;
+	struct xsched_context *ctx = NULL;
+	struct xcu_op_handler_params params;
+	uint32_t logic_cq_id = 0;
+	vstream_info_t *vstream;
+	int ret = 0;
+	uint32_t tgid = 0;
+	uint32_t cq_id = 0;
+	uint32_t sq_id = 0;
+
+	vstream = vstream_create(arg);
+	if (!vstream)
+		return -ENOSPC;
+
+	vstream->fd = arg->fd;
+	vstream->task_type = arg->task_type;
+
+	ret = vstream_bind_to_xcu(vstream);
+	if (ret < 0) {
+		ret = -EINVAL;
+		goto out_err_vstream_free;
+	}
+
+	/* Allocates vstream's SQ and CQ memory on a XCU for processing. */
+	params.group = vstream->xcu->group;
+	params.fd = arg->fd;
+	params.payload = arg->payload;
+	params.param_1 = &tgid;
+	params.param_2 = &sq_id;
+	params.param_3 = &cq_id;
+	params.param_4 = &logic_cq_id;
+	ret = xcu_alloc(&params);
+	if (ret) {
+		XSCHED_ERR("Fail to allocate SQ/CQ memory to a vstream.\n");
+		goto out_err_vstream_free;
+	}
+
+	vstream->drv_ctx = params.param_5;
+	vstream->id = sq_id;
+	vstream->vcq_id = cq_id;
+	vstream->logic_vcq_id = logic_cq_id;
+	vstream->user_stream_id = va_args->user_stream_id;
+	vstream->tgid = tgid;
+	vstream->sqcq_type = va_args->type;
+
+	ret = vstream_bind_to_ctx(vstream);
+	if (ret < 0)
+		goto out_err_vstream_free;
+
+	ctx = vstream->ctx;
+
+	/* Add new vstream to array after allocating inode */
+	ret = vstream_add(vstream, vstream->id);
+	if (ret < 0)
+		goto out_err_vstream_free;
+
+	return 0;
+
+out_err_vstream_free:
+	kfree(vstream);
+	XSCHED_ERR("Exit %s with error, current_pid=%d, err=%d.\n",
+		__func__, current->pid, ret);
+
+	return ret;
+}
+
+static int logic_cq_alloc(struct vstream_args *arg)
+{
+	int err = 0;
+	struct xcu_op_handler_params params;
+	vstream_info_t *vstream = NULL;
+	vstream_alloc_args_t *logic_cq_alloc_para = &arg->va_args;
+	struct xsched_cu *xcu_found = NULL;
+	uint32_t logic_cq_id = 0, type = XCU_TYPE_XPU;
+
+	vstream = vstream_get_by_user_stream_id(
+		logic_cq_alloc_para->user_stream_id);
+	if (!vstream) {
+		xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id);
+		if (!xcu_found) {
+			err = -EINVAL;
+			goto out_err;
+		}
+	} else {
+		xcu_found = vstream->xcu;
+	}
+
+	params.group = xcu_found->group;
+	params.fd = arg->fd;
+	params.payload = arg->payload;
+	params.param_1 = &logic_cq_id;
+	err = xcu_logic_alloc(&params);
+	if (err) {
+		XSCHED_ERR("Fail to alloc logic CQ memory to a vstream.\n");
+		goto out_err;
+	}
+
+	vstream->logic_vcq_id = logic_cq_id;
+	XSCHED_DEBUG(
+		"Vstream logic CQ: dev_id=%u, stream_id=%u, logic_cqid=%u @ %s\n",
+		vstream->dev_id, vstream->user_stream_id,
+		vstream->logic_vcq_id, __func__);
+	return 0;
+
+out_err:
+	XSCHED_ERR(
+		"Exit %s with error, current_pid=%d, err=%d.\n",
+		__func__, current->pid, err);
+	return err;
+}
+
+int vstream_alloc(struct vstream_args *arg)
+{
+	vstream_alloc_args_t *va_args = &arg->va_args;
+	int ret;
+
+	if (!va_args->type)
+		ret = sqcq_alloc(arg);
+	else
+		ret = logic_cq_alloc(arg);
+
+	return ret;
+}
+
+int vstream_free(struct vstream_args *arg)
+{
+	struct file *vs_file;
+	struct xcu_op_handler_params params;
+	uint32_t vstream_id = arg->sq_id;
+	vstream_info_t *vstream = NULL;
+	int err = 0;
+
+	vstream = vstream_get(vstream_id);
+	if (!vstream) {
+		XSCHED_ERR("Fail to free NULL vstream, vstream id=%u\n", vstream_id);
+		return -EINVAL;
+	}
+
+	params.group = vstream->xcu->group;
+	params.fd = arg->fd;
+	params.payload = arg->payload;
+
+	vs_file = vstream_file_get(vstream->inode_fd);
+	vstream_destroy(vstream);
+	vs_file->private_data = NULL;
+	vstream_file_put(vs_file);
+
+	err = xcu_finish(&params);
+	if (err)
+		XSCHED_ERR("Fail to free vstream sqId=%u, cqId=%u.\n",
+			arg->sq_id, arg->cq_id);
+
+	return err;
+}
+
 int vstream_kick(struct vstream_args *arg)
 {
 	return 0;
-- 
Gitee


From 140cdac22a0e2aea379904eac5e7a72f16a3e29c Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:26 +0000
Subject: [PATCH 06/16] xsched: Add vstream_kick() implementation

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add vstream_metadata data structures.
Add vstream_kick() and xcu_run() implementation.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/vstream.h | 28 ++++++++++++++++++++++++
 include/linux/xsched.h  | 15 +++++++++++++
 kernel/xsched/core.c    | 25 +++++++++++++++++++++
 kernel/xsched/vstream.c | 48 ++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/include/linux/vstream.h b/include/linux/vstream.h
index ca956ac2cf4f..58ee4c235a07 100644
--- a/include/linux/vstream.h
+++ b/include/linux/vstream.h
@@ -3,6 +3,34 @@
 #define _LINUX_VSTREAM_H
 
 #include <uapi/linux/xcu_vstream.h>
+#include <linux/ktime.h>
+
+#define MAX_VSTREAM_SIZE 2048
+
+/* Vstream metadata describes each incoming kick
+ * that gets stored into a list of pending kicks
+ * inside a vstream to keep track of what is left
+ * to be processed by a driver.
+ */
+typedef struct vstream_metadata {
+	uint32_t exec_time;
+	/* A value of SQ tail that has been passed with the
+	 * kick that is described by this exact metadata object.
+	 */
+	uint32_t sq_tail;
+	uint32_t sqe_num;
+	uint32_t sq_id;
+	int32_t timeout;
+	int8_t sqe[XCU_SQE_SIZE_MAX];
+
+	/* A node for metadata list */
+	struct list_head node;
+
+	struct vstream_info *parent;
+
+	/* Time of list insertion */
+	ktime_t add_time;
+} vstream_metadata_t;
 
 typedef struct vstream_info {
 	uint32_t user_stream_id;
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 702483b85621..7a25d2a7455b 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -135,6 +135,18 @@ static inline struct xsched_context *ctx_find_by_tgid(pid_t tgid)
 	return ret;
 }
 
+
+static inline void xsched_init_vsm(struct vstream_metadata *vsm,
+				struct vstream_info *vs, vstream_args_t *arg)
+{
+	vsm->sq_id = arg->sq_id;
+	vsm->sqe_num = arg->vk_args.sqe_num;
+	vsm->timeout = arg->vk_args.timeout;
+	memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX);
+	vsm->parent = vs;
+	INIT_LIST_HEAD(&vsm->node);
+}
+
 int xsched_xcu_register(struct xcu_group *group);
 void xsched_task_free(struct kref *kref);
 int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs);
@@ -142,4 +154,7 @@ int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx);
 int vstream_bind_to_xcu(vstream_info_t *vstream_info);
 struct xsched_cu *xcu_find(uint32_t *type,
 				uint32_t dev_id, uint32_t channel_id);
+
+/* Vstream metadata proccesing functions.*/
+int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg);
 #endif /* !__LINUX_XSCHED_H__ */
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 018c73101a70..5064ecbbd179 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -163,6 +163,31 @@ static int alloc_xcu_id(void)
 	return xcu_id;
 }
 
+/* Adds vstream_metadata object to a specified vstream. */
+int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg)
+{
+	struct vstream_metadata *new_vsm;
+
+	new_vsm = kmalloc(sizeof(struct vstream_metadata), GFP_KERNEL);
+	if (!new_vsm) {
+		XSCHED_ERR("Failed to alloc kick metadata for vs %u @ %s\n",
+			vs->id, __func__);
+		return -ENOMEM;
+	}
+
+	if (vs->kicks_count > MAX_VSTREAM_SIZE) {
+		kfree(new_vsm);
+		return -EBUSY;
+	}
+
+	xsched_init_vsm(new_vsm, vs, arg);
+	list_add_tail(&new_vsm->node, &vs->metadata_list);
+	new_vsm->add_time = ktime_get();
+	vs->kicks_count += 1;
+
+	return 0;
+}
+
 /*
  * Initialize and register xcu in xcu_manager array.
  */
diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c
index 127ff96ce48c..d4f916415682 100644
--- a/kernel/xsched/vstream.c
+++ b/kernel/xsched/vstream.c
@@ -403,7 +403,53 @@ int vstream_free(struct vstream_args *arg)
 
 int vstream_kick(struct vstream_args *arg)
 {
-	return 0;
+	vstream_info_t *vstream;
+	int vstream_id = arg->sq_id;
+	struct xsched_entity *xse;
+	int err = 0;
+
+	struct xsched_cu *xcu = NULL;
+
+	XSCHED_CALL_STUB();
+
+	/* Get vstream. */
+	vstream = vstream_get(vstream_id);
+	if (!vstream || !vstream->ctx) {
+		XSCHED_ERR("Vstream NULL or doesn't have a context.\n");
+		return -EINVAL;
+	}
+
+	xse = &vstream->ctx->xse;
+	xcu = vstream->xcu;
+	XSCHED_DEBUG("New kick on xse %d @ %s\n", xse->tgid, __func__);
+
+	do {
+		mutex_lock(&xcu->xcu_lock);
+		spin_lock(&vstream->stream_lock);
+
+		/* Adding kick metadata. */
+		err = xsched_vsm_add_tail(vstream, arg);
+		if (err == -EBUSY) {
+			spin_unlock(&vstream->stream_lock);
+			mutex_unlock(&xcu->xcu_lock);
+
+			/* Retry after a while */
+			usleep_range(100, 200);
+			continue;
+		}
+
+		/* Don't forget to unlock */
+		if (err) {
+			XSCHED_ERR("Fail to add kick metadata to vs %u @ %s\n",
+				vstream->id, __func__);
+			break;
+		}
+	} while (err == -EBUSY);
+
+	spin_unlock(&vstream->stream_lock);
+	mutex_unlock(&xcu->xcu_lock);
+
+	return err;
 }
 
 /*
-- 
Gitee


From 148fd5207ec34a721e15a2ebfd7ef008ea829850 Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:27 +0000
Subject: [PATCH 07/16] xsched: Add xsched_ctx_init_xse() implementation

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add xsched_ctx_init_xse() implementation:
- Add 2 atomics kicks_pending_ctx_cnt and kicks_submited members to
- xsched_entity structure.
- Add GET_VS_TASK_TYPE macro helper.
- Add xsched_xse_set_class() and bind_ctx_to_xcu() stubs.

Add bind_ctx_to_xcu() implementation:
- Add ctx_revmap hash table to save XCUs' history.
- Add XCU_HASH_ORDER to set hashtable order.
- Add additional data structure ctx_devid_revmap_data
  to xcu_group that is used to save XCU history in hashtable by devid.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xcu_group.h | 11 ++++++
 include/linux/xsched.h    | 15 +++++++-
 kernel/xsched/core.c      | 79 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h
index c0168969c67a..93f732f84694 100644
--- a/include/linux/xcu_group.h
+++ b/include/linux/xcu_group.h
@@ -17,6 +17,17 @@ enum xcu_type {
 	XCU_TYPE_XPU,
 };
 
+/**
+ * @group: value for this entry.
+ * @hash_node: hash node list.
+ * @dev_id: device id to bind with ctx.
+ */
+struct ctx_devid_revmap_data {
+	unsigned int dev_id;
+	struct xcu_group *group;
+	struct hlist_node hash_node;
+};
+
 struct xcu_op_handler_params {
 	int fd;
 	struct xcu_group *group;
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 7a25d2a7455b..47f0a43a72dc 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -2,6 +2,8 @@
 #ifndef __LINUX_XSCHED_H__
 #define __LINUX_XSCHED_H__
 
+#include <linux/hash.h>
+#include <linux/hashtable.h>
 #include <linux/xcu_group.h>
 #include <linux/kref.h>
 #include <linux/vstream.h>
@@ -31,6 +33,12 @@
 #define XSCHED_EXIT_STUB()                                                     \
 	XSCHED_DEBUG(" -----* %s @ %s exited *-----\n", __func__, __FILE__)
 
+#define XCU_HASH_ORDER 6
+
+#define __GET_VS_TASK_TYPE(t) ((t)&0xFF)
+
+#define GET_VS_TASK_TYPE(vs_ptr) __GET_VS_TASK_TYPE((vs_ptr)->task_type)
+
 enum xcu_state {
 	XCU_INACTIVE,
 	XCU_IDLE,
@@ -77,6 +85,12 @@ struct xsched_entity {
 	pid_t owner_pid;
 	pid_t tgid;
 
+	/* Amount of pending kicks currently sitting on this context. */
+	atomic_t kicks_pending_ctx_cnt;
+
+	/* Amount of submitted kicks context, used for resched decision. */
+	atomic_t submitted_one_kick;
+
 	/* File descriptor coming from an associated context
 	 * used for identifying a given xsched entity in
 	 * info and error prints.
@@ -135,7 +149,6 @@ static inline struct xsched_context *ctx_find_by_tgid(pid_t tgid)
 	return ret;
 }
 
-
 static inline void xsched_init_vsm(struct vstream_metadata *vsm,
 				struct vstream_info *vs, vstream_args_t *arg)
 {
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 5064ecbbd179..3c20a493629e 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -36,6 +36,9 @@ struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS];
 struct list_head xsched_ctx_list;
 DEFINE_MUTEX(xsched_ctx_list_mutex);
 
+static DEFINE_MUTEX(revmap_mutex);
+static DEFINE_HASHTABLE(ctx_revmap, XCU_HASH_ORDER);
+
 /* Frees a given vstream and also frees and dequeues it's context
  * if a given vstream is the last and only vstream attached to it's
  * corresponding context object.
@@ -60,6 +63,45 @@ void xsched_task_free(struct kref *kref)
 	kfree(ctx);
 }
 
+int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx)
+{
+	struct ctx_devid_revmap_data *revmap_data;
+	struct xsched_cu *xcu_found = NULL;
+	uint32_t type = XCU_TYPE_XPU;
+
+	/* Find XCU history. */
+	hash_for_each_possible(ctx_revmap, revmap_data, hash_node,
+				(unsigned long)ctx->dev_id) {
+		if (revmap_data && revmap_data->group) {
+			/* Bind ctx to group xcu.*/
+			ctx->xse.xcu = revmap_data->group->xcu;
+			return 0;
+		}
+	}
+
+	revmap_data = kzalloc(sizeof(struct ctx_devid_revmap_data), GFP_KERNEL);
+	if (revmap_data == NULL) {
+		XSCHED_ERR("Revmap_data is NULL @ %s\n", __func__);
+		return -ENOMEM;
+	}
+
+	xcu_found = xcu_find(&type, ctx->dev_id, vstream_info->channel_id);
+	if (!xcu_found)
+		return -EINVAL;
+
+	/* Bind ctx to an XCU from channel group. */
+	revmap_data->group = xcu_found->group;
+	ctx->xse.xcu = xcu_found;
+	vstream_info->xcu = xcu_found;
+	revmap_data->dev_id = vstream_info->dev_id;
+	XSCHED_DEBUG("Ctx bind to xcu %u @ %s\n", xcu_found->id, __func__);
+
+	hash_add(ctx_revmap, &revmap_data->hash_node,
+		 (unsigned long)ctx->dev_id);
+
+	return 0;
+}
+
 int vstream_bind_to_xcu(vstream_info_t *vstream_info)
 {
 	struct xsched_cu *xcu_found = NULL;
@@ -110,11 +152,46 @@ struct xsched_cu *xcu_find(uint32_t *type,
 	return group->xcu;
 }
 
-int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
+int xsched_xse_set_class(struct xsched_entity *xse)
 {
 	return 0;
 }
 
+int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
+{
+	int err = 0;
+	struct xsched_entity *xse = &ctx->xse;
+
+	atomic_set(&xse->kicks_pending_ctx_cnt, 0);
+	atomic_set(&xse->submitted_one_kick, 0);
+
+	xse->fd = ctx->fd;
+	xse->tgid = ctx->tgid;
+
+	err = ctx_bind_to_xcu(vs, ctx);
+	if (err) {
+		XSCHED_ERR(
+			"Couldn't find valid xcu for vstream %u dev_id %u @ %s\n",
+			vs->id, vs->dev_id, __func__);
+		return -EINVAL;
+	}
+
+	xse->ctx = ctx;
+	if (likely(vs->xcu != NULL))
+		xse->xcu = vs->xcu;
+
+	err = xsched_xse_set_class(xse);
+	if (err) {
+		XSCHED_ERR("Failed to set xse class @ %s\n", __func__);
+		return err;
+	}
+
+	WRITE_ONCE(xse->on_rq, false);
+
+	spin_lock_init(&xse->xse_lock);
+	return err;
+}
+
 static int xsched_schedule(void *input_xcu)
 {
 	return 0;
-- 
Gitee


From e0f52b726ef60a685607d887c52e77ee521e85a2 Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:28 +0000
Subject: [PATCH 08/16] xsched: Add XCU xsched_schedule() implementation

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add XCU xsched_schedule() implementation.
Add xsched_rq data structures and related process.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xsched.h  |  79 +++++++++++++++++++-
 kernel/xsched/core.c    | 161 +++++++++++++++++++++++++++++++++++++++-
 kernel/xsched/vstream.c |   8 ++
 3 files changed, 246 insertions(+), 2 deletions(-)

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 47f0a43a72dc..efe5d92a5acd 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -39,6 +39,28 @@
 
 #define GET_VS_TASK_TYPE(vs_ptr) __GET_VS_TASK_TYPE((vs_ptr)->task_type)
 
+enum xsched_rq_state {
+	XRQ_STATE_INACTIVE = 0x00,
+	XRQ_STATE_IDLE = 0x01,
+	XRQ_STATE_BUSY = 0x02,
+	XRQ_STATE_SUBMIT = 0x04,
+	XRQ_STATE_WAIT_RUNNING = 0x08,
+};
+
+#define for_each_vstream_in_ctx(vs, ctx)                                       \
+	list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node)
+
+
+/* Base XSched runqueue object structure that contains both mutual and
+ * individual parameters for different scheduling classes.
+ */
+struct xsched_rq {
+	struct xsched_entity *curr_xse;
+
+	int state;
+	int nr_running;
+};
+
 enum xcu_state {
 	XCU_INACTIVE,
 	XCU_IDLE,
@@ -69,10 +91,15 @@ struct xsched_cu {
 
 	struct task_struct *worker;
 
+	struct xsched_rq xrq;
+	struct list_head vsm_list;
+
 	struct xcu_group *group;
 
 	struct mutex xcu_lock;
 
+	atomic_t has_active;
+
 	wait_queue_head_t wq_xcu_idle;
 	wait_queue_head_t wq_xcu_running;
 };
@@ -113,6 +140,53 @@ struct xsched_entity {
 	spinlock_t xse_lock;
 };
 
+/* Increments pending kicks counter for an XCU that the given
+ * xsched entity is attached to and for xsched entity's xsched
+ * class.
+ */
+static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse)
+{
+	/* Icrement pending kicks for current XSE. */
+	atomic_inc(&xse->kicks_pending_ctx_cnt);
+
+	return 0;
+}
+
+/* Decrements pending kicks counter for an XCU that the given
+ * xsched entity is attached to and for XSched entity's sched
+ * class.
+ */
+static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse)
+{
+	/* Decrementing pending kicks for current XSE. */
+	atomic_dec(&xse->kicks_pending_ctx_cnt);
+
+	return 0;
+}
+
+/* Checks if there are pending kicks left on a given XCU for all
+ * xsched classes.
+ */
+static inline bool xsched_check_pending_kicks_xcu(struct xsched_cu *xcu)
+{
+	return 0;
+}
+
+static inline int xse_integrity_check(const struct xsched_entity *xse)
+{
+	if (!xse) {
+		XSCHED_ERR("xse is null @ %s\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!xse->class) {
+		XSCHED_ERR("xse->class is null @ %s\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 struct xsched_context {
 	uint32_t fd;
 	uint32_t dev_id;
@@ -170,4 +244,7 @@ struct xsched_cu *xcu_find(uint32_t *type,
 
 /* Vstream metadata proccesing functions.*/
 int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg);
-#endif /* !__LINUX_XSCHED_H__ */
+struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs);
+void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
+void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
+#endif /* __LINUX_XSCHED_H__ */
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 3c20a493629e..d21e90bc9826 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -16,6 +16,7 @@
  * more details.
  *
  */
+#include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <linux/spinlock_types.h>
@@ -39,6 +40,66 @@ DEFINE_MUTEX(xsched_ctx_list_mutex);
 static DEFINE_MUTEX(revmap_mutex);
 static DEFINE_HASHTABLE(ctx_revmap, XCU_HASH_ORDER);
 
+static void put_prev_ctx(struct xsched_entity *xse)
+{
+}
+
+static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu)
+{
+	return NULL;
+}
+
+void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu)
+{
+}
+
+void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu)
+{
+}
+
+static int delete_ctx(struct xsched_context *ctx)
+{
+	struct xsched_cu *xcu = ctx->xse.xcu;
+	struct xsched_entity *curr_xse = xcu->xrq.curr_xse;
+	struct xsched_entity *xse = &ctx->xse;
+
+	if (xse_integrity_check(xse)) {
+		XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!xse->xcu) {
+		XSCHED_ERR("Try to delete ctx that is not attached to xcu @ %s\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	/* Wait till context has been submitted. */
+	while (atomic_read(&xse->kicks_pending_ctx_cnt)) {
+		XSCHED_DEBUG("Deleting ctx %d, xse->kicks_pending_ctx_cnt=%d @ %s\n",
+			xse->tgid, atomic_read(&xse->kicks_pending_ctx_cnt),
+			__func__);
+		usleep_range(100, 200);
+	}
+
+	if (atomic_read(&xse->kicks_pending_ctx_cnt)) {
+		XSCHED_ERR("Deleting ctx %d that has pending kicks left @ %s\n",
+			xse->tgid, __func__);
+		return -EINVAL;
+	}
+
+	mutex_lock(&xcu->xcu_lock);
+	if (curr_xse == xse)
+		xcu->xrq.curr_xse = NULL;
+
+	dequeue_ctx(xse, xcu);
+	mutex_unlock(&xcu->xcu_lock);
+	XSCHED_DEBUG("Deleting ctx %d, pending kicks left=%d @ %s\n", xse->tgid,
+		atomic_read(&xse->kicks_pending_ctx_cnt), __func__);
+
+	return 0;
+}
+
 /* Frees a given vstream and also frees and dequeues it's context
  * if a given vstream is the last and only vstream attached to it's
  * corresponding context object.
@@ -50,6 +111,10 @@ void xsched_task_free(struct kref *kref)
 
 	ctx = container_of(kref, struct xsched_context, kref);
 
+	/* Wait till xse dequeues */
+	while (READ_ONCE(ctx->xse.on_rq))
+		usleep_range(100, 200);
+
 	mutex_lock(&xsched_ctx_list_mutex);
 	list_for_each_entry_safe(vs, tmp, &ctx->vstream_list, ctx_node) {
 		list_del(&vs->ctx_node);
@@ -57,6 +122,7 @@ void xsched_task_free(struct kref *kref)
 		kfree(vs);
 	}
 
+	delete_ctx(ctx);
 	list_del(&ctx->ctx_node);
 	mutex_unlock(&xsched_ctx_list_mutex);
 
@@ -192,11 +258,67 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 	return err;
 }
 
-static int xsched_schedule(void *input_xcu)
+static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse)
 {
 	return 0;
 }
 
+static int xsched_schedule(void *input_xcu)
+{
+	struct xsched_cu *xcu = input_xcu;
+	int err = 0;
+	struct xsched_entity *curr_xse = NULL;
+	struct xsched_entity *next_xse = NULL;
+
+	while (!kthread_should_stop()) {
+		mutex_unlock(&xcu->xcu_lock);
+		wait_event_interruptible(xcu->wq_xcu_idle,
+					 atomic_read(&xcu->has_active) || xcu->xrq.nr_running);
+
+		XSCHED_DEBUG("%s: rt_nr_running = %d, has_active = %d\n",
+			__func__, xcu->xrq.nr_running, atomic_read(&xcu->has_active));
+
+		mutex_lock(&xcu->xcu_lock);
+		if (!xsched_check_pending_kicks_xcu(xcu)) {
+			XSCHED_WARN("%s: No pending kicks on xcu %u\n", __func__, xcu->id);
+			continue;
+		}
+
+		next_xse = __raw_pick_next_ctx(xcu);
+		if (!next_xse) {
+			XSCHED_WARN("%s: Couldn't find next xse on xcu %u\n", __func__, xcu->id);
+			continue;
+		}
+
+		xcu->xrq.curr_xse = next_xse;
+
+		if (__xsched_submit(xcu, next_xse) == 0)
+			continue;
+
+		curr_xse = xcu->xrq.curr_xse;
+		if (curr_xse) { /* if not deleted yet */
+			put_prev_ctx(curr_xse);
+			if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt)) {
+				dequeue_ctx(curr_xse, xcu);
+				XSCHED_DEBUG(
+					"%s: Dequeue xse %d due to zero kicks on xcu %u\n",
+					__func__, curr_xse->tgid, xcu->id);
+				curr_xse = xcu->xrq.curr_xse = NULL;
+			}
+		}
+	}
+
+	return err;
+}
+
+/* Initialize xsched classes' runqueues. */
+static inline void xsched_rq_init(struct xsched_cu *xcu)
+{
+	xcu->xrq.nr_running = 0;
+	xcu->xrq.curr_xse = NULL;
+	xcu->xrq.state = XRQ_STATE_IDLE;
+}
+
 /* Initializes all xsched XCU objects.
  * Should only be called from xsched_xcu_register function.
  */
@@ -209,6 +331,12 @@ static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
 	xcu->state = XSCHED_XCU_NONE;
 	xcu->group = group;
 
+	atomic_set(&xcu->has_active, 0);
+
+	INIT_LIST_HEAD(&xcu->vsm_list);
+
+	init_waitqueue_head(&xcu->wq_xcu_idle);
+
 	mutex_init(&xcu->xcu_lock);
 
 	/* Mark current XCU in a mask inside XCU root group. */
@@ -265,6 +393,37 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg)
 	return 0;
 }
 
+/* Fetch the first vstream metadata from vstream metadata list
+ * and removes it from that list. Returned vstream metadata pointer
+ * to be freed after.
+ */
+struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs)
+{
+	struct vstream_metadata *vsm;
+
+	if (list_empty(&vs->metadata_list)) {
+		XSCHED_DEBUG("No metadata to fetch from vs %u @ %s\n",
+			vs->id, __func__);
+		return NULL;
+	}
+
+	vsm = list_first_entry(&vs->metadata_list, struct vstream_metadata, node);
+	if (!vsm) {
+		XSCHED_ERR("Corrupted metadata list in vs %u @ %s\n",
+			vs->id, __func__);
+		return NULL;
+	}
+
+	list_del(&vsm->node);
+	if (vs->kicks_count == 0)
+		XSCHED_WARN("kicks_count underflow in vs %u @ %s\n",
+			vs->id, __func__);
+	else
+		vs->kicks_count -= 1;
+
+	return vsm;
+}
+
 /*
  * Initialize and register xcu in xcu_manager array.
  */
diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c
index d4f916415682..78ead98d1a66 100644
--- a/kernel/xsched/vstream.c
+++ b/kernel/xsched/vstream.c
@@ -444,10 +444,18 @@ int vstream_kick(struct vstream_args *arg)
 				vstream->id, __func__);
 			break;
 		}
+
+		enqueue_ctx(xse, xcu);
+		/* Increasing a total amount of kicks on an CU to which this
+		 * context is attached to based on sched_class.
+		 */
+		xsched_inc_pending_kicks_xse(&vstream->ctx->xse);
 	} while (err == -EBUSY);
 
 	spin_unlock(&vstream->stream_lock);
 	mutex_unlock(&xcu->xcu_lock);
+	if (!err)
+		wake_up_interruptible(&xcu->wq_xcu_idle);
 
 	return err;
 }
-- 
Gitee


From 932bf8474eda8542251b8ed234dc89570a96a14c Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:29 +0000
Subject: [PATCH 09/16] xsched: Add xsched RT class

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add rt class callbacks implementation:
- dequeue_ctx
- enqueue_ctx
- pick_next_ctx
- put_prev_ctx
- submit_prepare_ctx
- select_work
- check_preempt

Add xsched_rt.c in /kernel/xsched Makefile.
Add RT class callbacks support in core.c.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xsched.h | 228 ++++++++++++++++++++++++++++++++++++++++-
 kernel/xsched/Makefile |   2 +-
 kernel/xsched/core.c   | 162 ++++++++++++++++++++++++++++-
 kernel/xsched/rt.c     | 227 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 613 insertions(+), 6 deletions(-)
 create mode 100644 kernel/xsched/rt.c

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index efe5d92a5acd..ba2c2e903f59 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -37,8 +37,33 @@
 
 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF)
 
+#define __GET_VS_TASK_PRIO_RT(t) (((t) >> 8) & 0xFF)
+
 #define GET_VS_TASK_TYPE(vs_ptr) __GET_VS_TASK_TYPE((vs_ptr)->task_type)
 
+#define GET_VS_TASK_PRIO_RT(vs_ptr) __GET_VS_TASK_PRIO_RT((vs_ptr)->task_type)
+
+/*
+ * A default kick slice for RT class XSEs.
+ */
+#define XSCHED_RT_KICK_SLICE 20
+/*
+ * A default kick slice for CFS class XSEs.
+ */
+#define XSCHED_CFS_KICK_SLICE 10
+
+enum xcu_sched_type {
+	XSCHED_TYPE_RT,
+	XSCHED_TYPE_DFLT = XSCHED_TYPE_RT,
+	XSCHED_TYPE_NUM,
+};
+
+enum xse_prio {
+	XSE_PRIO_LOW,
+	XSE_PRIO_HIGH,
+	NR_XSE_PRIO,
+};
+
 enum xsched_rq_state {
 	XRQ_STATE_INACTIVE = 0x00,
 	XRQ_STATE_IDLE = 0x01,
@@ -47,18 +72,61 @@ enum xsched_rq_state {
 	XRQ_STATE_WAIT_RUNNING = 0x08,
 };
 
+enum xse_state {
+	XSE_PREPARE,
+	XSE_READY,
+	XSE_RUNNING,
+	XSE_BLOCK,
+	XSE_DEAD,
+};
+
+enum xse_flag {
+	XSE_TIF_NONE,
+	XSE_TIF_PREEMPT,
+	XSE_TIF_BALANCE, /* Unused so far */
+};
+
+
+extern const struct xsched_class rt_xsched_class;
+
+#define xsched_first_class (&rt_xsched_class)
+
+#define for_each_xsched_class(class)                                           \
+	for (class = xsched_first_class; class; class = class->next)
+
+#define for_each_xse_prio(prio)                                                \
+	for (prio = XSE_PRIO_LOW; prio < NR_XSE_PRIO; prio++)
+
 #define for_each_vstream_in_ctx(vs, ctx)                                       \
 	list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node)
 
 
+/* Manages xsched RT-like class linked list based runqueue.
+ *
+ * Now RT-like class runqueue structs is identical
+ * but will most likely grow different in the
+ * future as the Xsched evolves.
+ */
+struct xsched_rq_rt {
+	struct list_head rq[NR_XSE_PRIO];
+	unsigned int nr_running;
+	int prio_nr_running[NR_XSE_PRIO];
+	atomic_t prio_nr_kicks[NR_XSE_PRIO];
+	DECLARE_BITMAP(curr_prios, NR_XSE_PRIO);
+};
+
 /* Base XSched runqueue object structure that contains both mutual and
  * individual parameters for different scheduling classes.
  */
 struct xsched_rq {
 	struct xsched_entity *curr_xse;
+	const struct xsched_class *class;
 
 	int state;
 	int nr_running;
+
+	/* RT class run queue.*/
+	struct xsched_rq_rt rt;
 };
 
 enum xcu_state {
@@ -89,6 +157,9 @@ struct xsched_cu {
 	uint32_t id;
 	uint32_t state;
 
+	/* RT class kick counter. */
+	atomic_t pending_kicks_rt;
+
 	struct task_struct *worker;
 
 	struct xsched_rq xrq;
@@ -104,6 +175,16 @@ struct xsched_cu {
 	wait_queue_head_t wq_xcu_running;
 };
 
+struct xsched_entity_rt {
+	struct list_head list_node;
+	enum xse_state state;
+	enum xse_flag flag;
+	enum xse_prio prio;
+
+	ktime_t timeslice;
+	s64 kick_slice;
+};
+
 struct xsched_entity {
 	uint32_t task_type;
 
@@ -118,6 +199,9 @@ struct xsched_entity {
 	/* Amount of submitted kicks context, used for resched decision. */
 	atomic_t submitted_one_kick;
 
+	size_t total_scheduled;
+	size_t total_submitted;
+
 	/* File descriptor coming from an associated context
 	 * used for identifying a given xsched entity in
 	 * info and error prints.
@@ -127,9 +211,15 @@ struct xsched_entity {
 	/* Xsched class for this xse. */
 	const struct xsched_class *class;
 
+	/* RT class entity. */
+	struct xsched_entity_rt rt;
+
 	/* Pointer to context object. */
 	struct xsched_context *ctx;
 
+	/* Xsched entity execution statistics */
+	u64 last_exec_runtime;
+
 	/* Pointer to an XCU object that represents an XCU
 	 * on which this xse is to be processed or is being
 	 * processed currently.
@@ -140,15 +230,85 @@ struct xsched_entity {
 	spinlock_t xse_lock;
 };
 
+static inline bool xse_is_rt(const struct xsched_entity *xse)
+{
+	return xse && xse->class == &rt_xsched_class;
+}
+
+/* Returns a pointer to an atomic_t variable representing a counter
+ * of currently pending vstream kicks on a given XCU and for a
+ * given xsched class.
+ */
+static inline atomic_t *
+xsched_get_pending_kicks_class(const struct xsched_class *class,
+				struct xsched_cu *xcu)
+{
+	/* Right now for testing purposes we have only XCU running streams. */
+	if (!xcu) {
+		XSCHED_ERR("Try to get pending kicks with xcu=NULL.\n");
+		return NULL;
+	}
+
+	if (!class) {
+		XSCHED_ERR("Try to get pending kicks with class=NULL.\n");
+		return NULL;
+	}
+
+	if (class == &rt_xsched_class)
+		return &xcu->pending_kicks_rt;
+
+	XSCHED_ERR("Xsched entity has an invalid class @ %s\n", __func__);
+	return NULL;
+}
+
+/* Returns a pointer to an atomic_t variable representing a counter of
+ * currently pending vstream kicks for an XCU on which a given xsched
+ * entity is enqueued on and for a xsched class that assigned to a
+ * given xsched entity.
+ */
+static inline atomic_t *
+xsched_get_pending_kicks_xse(const struct xsched_entity *xse)
+{
+	if (!xse) {
+		XSCHED_ERR("Try to get pending kicks with xse=NULL\n");
+		return NULL;
+	}
+
+	if (!xse->xcu) {
+		XSCHED_ERR("Try to get pending kicks with xse->xcu=NULL\n");
+		return NULL;
+	}
+
+	return xsched_get_pending_kicks_class(xse->class, xse->xcu);
+}
+
 /* Increments pending kicks counter for an XCU that the given
  * xsched entity is attached to and for xsched entity's xsched
  * class.
  */
 static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse)
 {
+	atomic_t *kicks_class = NULL;
+
+	kicks_class = xsched_get_pending_kicks_xse(xse);
+	if (!kicks_class)
+		return -EINVAL;
+
+	/* Incrementing pending kicks for XSE's sched class */
+	atomic_inc(kicks_class);
+
 	/* Icrement pending kicks for current XSE. */
 	atomic_inc(&xse->kicks_pending_ctx_cnt);
 
+	/* Incrementing prio based pending kicks counter for RT class */
+	if (xse_is_rt(xse)) {
+		atomic_inc(&xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio]);
+		XSCHED_DEBUG("xcu increased pending kicks @ %s\n", __func__);
+	} else {
+		XSCHED_DEBUG("xse %u isn't rt class @ %s\n", xse->tgid,
+			    __func__);
+	}
+
 	return 0;
 }
 
@@ -158,9 +318,41 @@ static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse)
  */
 static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse)
 {
+	atomic_t *kicks_class = NULL;
+	atomic_t *kicks_prio_rt = NULL;
+
+	kicks_class = xsched_get_pending_kicks_xse(xse);
+	if (!kicks_class)
+		return -EINVAL;
+
+	if (!atomic_read(kicks_class)) {
+		XSCHED_ERR("Try to decrement pending kicks beyond 0!\n");
+		return -EINVAL;
+	}
+
+	/* Decrementing pending kicks for XSE's sched class. */
+	atomic_dec(kicks_class);
+
 	/* Decrementing pending kicks for current XSE. */
 	atomic_dec(&xse->kicks_pending_ctx_cnt);
 
+	/* Decrementing prio based pending kicks counter for RT class. */
+	if (xse_is_rt(xse)) {
+		kicks_prio_rt = &xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio];
+
+		if (!atomic_read(kicks_prio_rt)) {
+			XSCHED_ERR(
+				"Tried to decrement prio pending kicks beyond 0!\n");
+			return -EINVAL;
+		}
+
+		atomic_dec(kicks_prio_rt);
+		XSCHED_DEBUG("xcu decreased pending kicks @ %s\n", __func__);
+	} else {
+		XSCHED_DEBUG("xse %u isn't rt class @ %s\n", xse->tgid,
+			    __func__);
+	}
+
 	return 0;
 }
 
@@ -169,7 +361,14 @@ static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse)
  */
 static inline bool xsched_check_pending_kicks_xcu(struct xsched_cu *xcu)
 {
-	return 0;
+	atomic_t *kicks_rt;
+
+	kicks_rt = xsched_get_pending_kicks_class(&rt_xsched_class, xcu);
+
+	if (!kicks_rt)
+		return false;
+
+	return !!atomic_read(kicks_rt);
 }
 
 static inline int xse_integrity_check(const struct xsched_entity *xse)
@@ -223,6 +422,33 @@ static inline struct xsched_context *ctx_find_by_tgid(pid_t tgid)
 	return ret;
 }
 
+/* Xsched class. */
+struct xsched_class {
+	const struct xsched_class *next;
+
+	/* Removes a given XSE from it's runqueue. */
+	void (*dequeue_ctx)(struct xsched_entity *xse);
+
+	/* Places a given XSE on a runqueue on a given XCU. */
+	void (*enqueue_ctx)(struct xsched_entity *xse, struct xsched_cu *xcu);
+
+	/* Returns a next XSE to be submitted on a given XCU. */
+	struct xsched_entity *(*pick_next_ctx)(struct xsched_cu *xcu);
+
+	/* Put a XSE back into rq during preemption. */
+	void (*put_prev_ctx)(struct xsched_entity *xse);
+
+	/* Prepares a given XSE for submission on a given XCU. */
+	int (*submit_prepare_ctx)(struct xsched_entity *xse,
+				  struct xsched_cu *xcu);
+
+	/* Check context preemption. */
+	bool (*check_preempt)(struct xsched_entity *xse);
+
+	/* Select jobs from XSE to submit on XCU */
+	size_t (*select_work)(struct xsched_cu *xcu, struct xsched_entity *xse);
+};
+
 static inline void xsched_init_vsm(struct vstream_metadata *vsm,
 				struct vstream_info *vs, vstream_args_t *arg)
 {
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
index 62e58e4151b0..f882518d54ab 100644
--- a/kernel/xsched/Makefile
+++ b/kernel/xsched/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vstream.o
-obj-$(CONFIG_XCU_SCHEDULER) += core.o
+obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index d21e90bc9826..97b073d804bc 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -42,19 +42,116 @@ static DEFINE_HASHTABLE(ctx_revmap, XCU_HASH_ORDER);
 
 static void put_prev_ctx(struct xsched_entity *xse)
 {
+	struct xsched_cu *xcu = xse->xcu;
+
+	lockdep_assert_held(&xcu->xcu_lock);
+
+	xse->class->put_prev_ctx(xse);
+	xse->last_exec_runtime = 0;
+	atomic_set(&xse->submitted_one_kick, 0);
+}
+
+static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse)
+{
+	int kick_count;
+	struct vstream_info *vs;
+	unsigned int sum_exec_time = 0;
+	size_t kicks_submitted = 0;
+	struct vstream_metadata *vsm;
+	int not_empty;
+
+	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
+	XSCHED_DEBUG("Before decrement XSE kick_count=%u @ %s\n",
+		kick_count, __func__);
+
+	if (kick_count == 0) {
+		XSCHED_WARN("Try to select xse that has 0 kicks @ %s\n",
+			__func__);
+		return 0;
+	}
+
+	do {
+		not_empty = 0;
+		for_each_vstream_in_ctx(vs, xse->ctx) {
+			spin_lock(&vs->stream_lock);
+			vsm = xsched_vsm_fetch_first(vs);
+			spin_unlock(&vs->stream_lock);
+			if (vsm) {
+				list_add_tail(&vsm->node, &xcu->vsm_list);
+
+				sum_exec_time += vsm->exec_time;
+				kicks_submitted++;
+				xsched_dec_pending_kicks_xse(xse);
+				XSCHED_DEBUG(
+					"vs id = %d Kick submit exec_time %u sq_tail %u sqe_num %u sq_id %u @ %s\n",
+					vs->id, vsm->exec_time, vsm->sq_tail,
+					vsm->sqe_num, vsm->sq_id, __func__);
+				not_empty++;
+			}
+		}
+	} while (not_empty);
+
+	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
+	XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n",
+		    kick_count, __func__);
+
+	xse->total_scheduled += kicks_submitted;
+
+	return kicks_submitted;
 }
 
 static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu)
 {
-	return NULL;
+	const struct xsched_class *class;
+	struct xsched_entity *next = NULL;
+	size_t scheduled;
+
+	lockdep_assert_held(&xcu->xcu_lock);
+	for_each_xsched_class(class) {
+		next = class->pick_next_ctx(xcu);
+		if (next) {
+			scheduled = class->select_work ?
+				class->select_work(xcu, next) : select_work_def(xcu, next);
+
+			XSCHED_DEBUG("xse %d scheduled=%zu total=%zu @ %s\n",
+				next->tgid, scheduled, next->total_scheduled, __func__);
+			break;
+		}
+	}
+
+	return next;
 }
 
 void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu)
 {
+	lockdep_assert_held(&xcu->xcu_lock);
+
+	if (xse_integrity_check(xse)) {
+		XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__);
+		return;
+	}
+
+	if (!xse->on_rq) {
+		xse->on_rq = true;
+		xse->class->enqueue_ctx(xse, xcu);
+		XSCHED_DEBUG("Enqueue xse %d @ %s\n", xse->tgid, __func__);
+	}
 }
 
 void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu)
 {
+	lockdep_assert_held(&xcu->xcu_lock);
+
+	if (xse_integrity_check(xse)) {
+		XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__);
+		return;
+	}
+
+	if (xse->on_rq) {
+		xse->class->dequeue_ctx(xse);
+		xse->on_rq = false;
+		XSCHED_DEBUG("Dequeue xse %d @ %s\n", xse->tgid, __func__);
+	}
 }
 
 static int delete_ctx(struct xsched_context *ctx)
@@ -220,6 +317,15 @@ struct xsched_cu *xcu_find(uint32_t *type,
 
 int xsched_xse_set_class(struct xsched_entity *xse)
 {
+	switch (xse->task_type) {
+	case XSCHED_TYPE_RT:
+		xse->class = &rt_xsched_class;
+		XSCHED_DEBUG("Context is in RT class %s\n", __func__);
+		break;
+	default:
+		XSCHED_ERR("Xse has incorrect class @ %s\n", __func__);
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -231,6 +337,10 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 	atomic_set(&xse->kicks_pending_ctx_cnt, 0);
 	atomic_set(&xse->submitted_one_kick, 0);
 
+	xse->total_scheduled = 0;
+	xse->total_submitted = 0;
+	xse->last_exec_runtime = 0;
+	xse->task_type = XSCHED_TYPE_RT;
 	xse->fd = ctx->fd;
 	xse->tgid = ctx->tgid;
 
@@ -252,6 +362,25 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 		return err;
 	}
 
+	if (xse_is_rt(xse)) {
+		xse->rt.state = XSE_PREPARE;
+		xse->rt.flag = XSE_TIF_NONE;
+		xse->rt.prio = GET_VS_TASK_PRIO_RT(vs);
+		xse->rt.kick_slice = XSCHED_RT_KICK_SLICE;
+
+		/* XSE priority is being decreased by 1 here because
+		 * in libucc priority counter starts from 1 while in the
+		 * kernel counter starts with 0.
+		 *
+		 * This inconsistency has to be solve in libucc in the
+		 * future rather that having this confusing decrement to
+		 * priority inside the kernel.
+		 */
+		if (xse->rt.prio > 0)
+			xse->rt.prio -= 1;
+
+		INIT_LIST_HEAD(&xse->rt.list_node);
+	}
 	WRITE_ONCE(xse->on_rq, false);
 
 	spin_lock_init(&xse->xse_lock);
@@ -263,6 +392,11 @@ static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse)
 	return 0;
 }
 
+static inline bool should_preempt(struct xsched_entity *xse)
+{
+	return xse->class->check_preempt(xse);
+}
+
 static int xsched_schedule(void *input_xcu)
 {
 	struct xsched_cu *xcu = input_xcu;
@@ -311,19 +445,37 @@ static int xsched_schedule(void *input_xcu)
 	return err;
 }
 
+/* Initialize xsched rt runqueue during kernel init.
+ * Should only be called from xsched_init function.
+ */
+static inline void xsched_rt_rq_init(struct xsched_cu *xcu)
+{
+	int prio = 0;
+
+	xcu->xrq.rt.nr_running = 0;
+
+	for_each_xse_prio(prio) {
+		INIT_LIST_HEAD(&xcu->xrq.rt.rq[prio]);
+		xcu->xrq.rt.prio_nr_running[prio] = 0;
+		atomic_set(&xcu->xrq.rt.prio_nr_kicks[prio], 0);
+	}
+}
+
 /* Initialize xsched classes' runqueues. */
 static inline void xsched_rq_init(struct xsched_cu *xcu)
 {
 	xcu->xrq.nr_running = 0;
 	xcu->xrq.curr_xse = NULL;
+	xcu->xrq.class = &rt_xsched_class;
 	xcu->xrq.state = XRQ_STATE_IDLE;
+	xsched_rt_rq_init(xcu);
 }
 
 /* Initializes all xsched XCU objects.
  * Should only be called from xsched_xcu_register function.
  */
 static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
-			    int xcu_id)
+				int xcu_id)
 {
 	bitmap_clear(xcu_group_root->xcu_mask, 0, XSCHED_NR_CUS);
 
@@ -331,17 +483,19 @@ static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
 	xcu->state = XSCHED_XCU_NONE;
 	xcu->group = group;
 
+	atomic_set(&xcu->pending_kicks_rt, 0);
 	atomic_set(&xcu->has_active, 0);
 
 	INIT_LIST_HEAD(&xcu->vsm_list);
-
 	init_waitqueue_head(&xcu->wq_xcu_idle);
-
 	mutex_init(&xcu->xcu_lock);
 
 	/* Mark current XCU in a mask inside XCU root group. */
 	set_bit(xcu->id, xcu_group_root->xcu_mask);
 
+	/* Initialize current XCU's runqueue. */
+	xsched_rq_init(xcu);
+
 	/* This worker should set XCU to XSCHED_XCU_WAIT_IDLE.
 	 * If after initialization XCU still has XSCHED_XCU_NONE
 	 * status then we can assume that there was a problem
diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c
new file mode 100644
index 000000000000..e018c3f8f96c
--- /dev/null
+++ b/kernel/xsched/rt.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Real-Time Scheduling Class for XPU device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <uapi/linux/sched/types.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/xsched.h>
+#include <linux/vstream.h>
+
+/* Add xsched entitiy to a run list based on priority, set on_cu flag
+ * and set a corresponding curr_prios bit if necessary.
+ */
+static inline void
+xse_rt_add(struct xsched_entity *xse, struct xsched_cu *xcu)
+{
+	list_add_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]);
+	__set_bit(xse->rt.prio, xcu->xrq.rt.curr_prios);
+}
+
+/* Delete xsched entitiy from a run list, unset on_cu flag and
+ * unset corresponding curr_prios bit if necessary.
+ */
+static inline void xse_rt_del(struct xsched_entity *xse)
+{
+	struct xsched_cu *xcu = xse->xcu;
+
+	list_del_init(&xse->rt.list_node);
+	if (list_empty(&xcu->xrq.rt.rq[xse->rt.prio]))
+		__clear_bit(xse->rt.prio, xcu->xrq.rt.curr_prios);
+}
+
+static inline void xse_rt_move_tail(struct xsched_entity *xse)
+{
+	struct xsched_cu *xcu = xse->xcu;
+
+	list_move_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]);
+}
+
+/* Increase RT runqueue total and per prio nr_running stat. */
+static inline void xrq_inc_nr_running(struct xsched_entity *xse,
+				      struct xsched_cu *xcu)
+{
+	xcu->xrq.rt.nr_running++;
+	xcu->xrq.rt.prio_nr_running[xse->rt.prio]++;
+	set_bit(xse->rt.prio, xcu->xrq.rt.curr_prios);
+}
+
+/* Decrease RT runqueue total and per prio nr_running stat
+ * and raise a bug if nr_running decrease beyond zero.
+ */
+static inline void xrq_dec_nr_running(struct xsched_entity *xse)
+{
+	struct xsched_cu *xcu = xse->xcu;
+
+	xcu->xrq.rt.nr_running--;
+	xcu->xrq.rt.prio_nr_running[xse->rt.prio]--;
+
+	if (!xcu->xrq.rt.prio_nr_running[xse->rt.prio])
+		clear_bit(xse->rt.prio, xcu->xrq.rt.curr_prios);
+}
+
+static void dequeue_ctx_rt(struct xsched_entity *xse)
+{
+	xse_rt_del(xse);
+	xrq_dec_nr_running(xse);
+}
+
+static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu)
+{
+	xse_rt_add(xse, xcu);
+	xrq_inc_nr_running(xse, xcu);
+}
+
+static inline struct xsched_entity *xrq_next_xse(struct xsched_cu *xcu,
+						 int prio)
+{
+	return list_first_entry(&xcu->xrq.rt.rq[prio], struct xsched_entity,
+				rt.list_node);
+}
+
+/* Return the next priority for pick_next_ctx taking into
+ * account if there are pending kicks on certain priority.
+ */
+static inline uint32_t get_next_prio_rt(struct xsched_rq *xrq)
+{
+	int32_t curr_prio;
+	bool bit_val;
+	unsigned long *prios = xrq->rt.curr_prios;
+	atomic_t *prio_nr_kicks = xrq->rt.prio_nr_kicks;
+
+	/* Using generic for loop instead of for_each_set_bit
+	 * because it will be faster than for_each_set_bit.
+	 */
+	for (curr_prio = NR_XSE_PRIO - 1; curr_prio >= 0; curr_prio--) {
+		bit_val = test_bit(curr_prio, prios);
+		if (!bit_val && atomic_read(&prio_nr_kicks[curr_prio])) {
+			XSCHED_ERR(
+				"kicks > 0 on RT priority with the priority bit unset\n");
+			BUG();
+			return NR_XSE_PRIO;
+		}
+
+		if (bit_val && atomic_read(&prio_nr_kicks[curr_prio]))
+			return curr_prio;
+	}
+	return NR_XSE_PRIO;
+}
+
+static struct xsched_entity *pick_next_ctx_rt(struct xsched_cu *xcu)
+{
+	struct xsched_entity *result;
+	int next_prio;
+
+	next_prio = get_next_prio_rt(&xcu->xrq);
+	if (next_prio >= NR_XSE_PRIO) {
+		XSCHED_DEBUG("No pending kicks in RT class @ %s\n", __func__);
+		return NULL;
+	}
+
+	if (!xcu->xrq.rt.prio_nr_running[next_prio]) {
+		XSCHED_ERR(
+			"The nr_running of RT is 0 while there are pending kicks for %u prio\n",
+			next_prio);
+		return NULL;
+	}
+
+	result = xrq_next_xse(xcu, next_prio);
+	if (!result)
+		XSCHED_ERR("Next XSE not found @ %s\n", __func__);
+
+	return result;
+}
+
+static void put_prev_ctx_rt(struct xsched_entity *xse)
+{
+	xse->rt.kick_slice -= atomic_read(&xse->submitted_one_kick);
+	XSCHED_DEBUG(
+		"Update XSE=%d kick_slice=%lld, XSE submitted=%d in RT class @ %s\n",
+		xse->tgid, xse->rt.kick_slice,
+		atomic_read(&xse->submitted_one_kick), __func__);
+
+	if (xse->rt.kick_slice <= 0) {
+		xse->rt.kick_slice = XSCHED_RT_KICK_SLICE;
+		XSCHED_DEBUG("Refill XSE=%d kick_slice=%lld in RT class @ %s\n",
+			    xse->tgid, xse->rt.kick_slice, __func__);
+		xse_rt_move_tail(xse);
+	}
+}
+
+static int submit_prepare_ctx_rt(struct xsched_entity *xse,
+				struct xsched_cu *xcu)
+{
+	if (!atomic_read(&xse->kicks_pending_ctx_cnt)) {
+		xse->rt.state = XSE_READY;
+		xse->rt.kick_slice = 0;
+		return -EAGAIN;
+	}
+	xse->rt.state = XSE_RUNNING;
+
+	return 0;
+}
+
+static bool check_preempt_ctx_rt(struct xsched_entity *xse)
+{
+	return true;
+}
+
+static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse)
+{
+	int kick_count, scheduled = 0;
+	struct vstream_info *vs;
+	struct vstream_metadata *vsm;
+
+	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
+	XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n",
+		kick_count, __func__);
+
+	if (kick_count == 0) {
+		XSCHED_WARN("Try to select xse that has 0 kicks @ %s\n",
+			__func__);
+		return 0;
+	}
+
+	for_each_vstream_in_ctx(vs, xse->ctx) {
+		spin_lock(&vs->stream_lock);
+		while ((vsm = xsched_vsm_fetch_first(vs))) {
+			list_add_tail(&vsm->node, &xcu->vsm_list);
+			scheduled++;
+			xsched_dec_pending_kicks_xse(xse);
+		}
+		spin_unlock(&vs->stream_lock);
+	}
+
+	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
+	XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n",
+		kick_count, __func__);
+
+	xse->total_scheduled += scheduled;
+	return scheduled;
+}
+
+const struct xsched_class rt_xsched_class = {
+	.next = NULL,
+	.dequeue_ctx = dequeue_ctx_rt,
+	.enqueue_ctx = enqueue_ctx_rt,
+	.pick_next_ctx = pick_next_ctx_rt,
+	.put_prev_ctx = put_prev_ctx_rt,
+	.submit_prepare_ctx = submit_prepare_ctx_rt,
+	.select_work = select_work_rt,
+	.check_preempt = check_preempt_ctx_rt
+};
-- 
Gitee


From 9bf8a43d5e2ec3dd81b39c407f9a4e275a7d84ca Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:30 +0000
Subject: [PATCH 10/16] xsched: Add xsched_submit() implementation

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add __xsched_submit() implementation:
- Add xsched_proc() implementation.
- Add submit_kick() implementation.

Add xcu_run() implementation.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 drivers/xcu/xcu_group.c |  7 +++-
 include/linux/xsched.h  |  2 +
 kernel/xsched/core.c    | 92 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c
index 86b935af00b6..605c023a2cfe 100644
--- a/drivers/xcu/xcu_group.c
+++ b/drivers/xcu/xcu_group.c
@@ -123,7 +123,12 @@ EXPORT_SYMBOL(xcu_group_find);
  */
 int xcu_run(struct xcu_op_handler_params *params)
 {
-	return 0;
+	if (!params->group->opt || !params->group->opt->run) {
+		XSCHED_ERR("No function [run] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->run(params);
 }
 
 /* This function runs "wait" callback for a given xcu_group
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index ba2c2e903f59..18e62f265199 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -471,6 +471,8 @@ struct xsched_cu *xcu_find(uint32_t *type,
 /* Vstream metadata proccesing functions.*/
 int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg);
 struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs);
+void submit_kick(struct vstream_info *vs, struct xcu_op_handler_params *params,
+		 struct vstream_metadata *vsm);
 void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 #endif /* __LINUX_XSCHED_H__ */
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 97b073d804bc..543967335b17 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -387,11 +387,72 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 	return err;
 }
 
-static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse)
+/*
+ * A function for submitting stream's commands (sending commands to a XCU).
+ */
+static int xsched_proc(struct xsched_cu *xcu, struct vstream_info *vs,
+		       struct vstream_metadata *vsm)
 {
+	struct xcu_op_handler_params params;
+	struct xsched_entity *xse;
+
+	XSCHED_CALL_STUB();
+
+	xse = &vs->ctx->xse;
+
+	/* Init input parameters for xcu_run and xcu_wait callbacks. */
+	params.group = xcu->group;
+
+	/* Increase process time by abstract kick handling time. */
+	xse->last_exec_runtime += vsm->exec_time;
+
+	XSCHED_DEBUG("Process vsm sq_tail %d exec_time %u sqe_num %d sq_id %d@ %s\n",
+		    vsm->sq_tail, vsm->exec_time, vsm->sqe_num, vsm->sq_id, __func__);
+	submit_kick(vs, &params, vsm);
+
+	xse->total_submitted++;
+
+	XSCHED_DEBUG("xse %d total_submitted = %lu @ %s\n",
+		    xse->tgid, xse->total_submitted, __func__);
+
+	XSCHED_EXIT_STUB();
 	return 0;
 }
 
+static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse)
+{
+	struct vstream_metadata *vsm, *tmp;
+	unsigned int submit_exec_time = 0;
+	size_t kicks_submitted = 0;
+	unsigned long wait_us;
+
+	XSCHED_DEBUG("%s called for xse %d on xcu %u\n",
+		__func__, xse->tgid, xcu->id);
+
+	list_for_each_entry_safe(vsm, tmp, &xcu->vsm_list, node) {
+		xsched_proc(xcu, vsm->parent, vsm);
+		submit_exec_time += vsm->exec_time;
+		kicks_submitted++;
+	}
+
+	INIT_LIST_HEAD(&xcu->vsm_list);
+
+	mutex_unlock(&xcu->xcu_lock);
+
+	wait_us = div_u64(submit_exec_time, NSEC_PER_USEC);
+	XSCHED_DEBUG("XCU kicks_submitted=%lu wait_us=%lu @ %s\n",
+		    kicks_submitted, wait_us, __func__);
+
+	if (wait_us > 0) {
+		/* Sleep shift not larger than 12.5% */
+		usleep_range(wait_us, wait_us + (wait_us >> 3));
+	}
+
+	mutex_lock(&xcu->xcu_lock);
+
+	return kicks_submitted;
+}
+
 static inline bool should_preempt(struct xsched_entity *xse)
 {
 	return xse->class->check_preempt(xse);
@@ -445,6 +506,35 @@ static int xsched_schedule(void *input_xcu)
 	return err;
 }
 
+void submit_kick(struct vstream_info *vs,
+			struct xcu_op_handler_params *params,
+			struct vstream_metadata *vsm)
+{
+	int ret;
+
+	params->fd = vs->fd;
+	params->param_1 = &vs->id;
+	params->param_2 = &vs->channel_id;
+	params->param_3 = vsm->sqe;
+	params->param_4 = &vsm->sqe_num;
+	params->param_5 = &vsm->timeout;
+	params->param_6 = &vs->sqcq_type;
+	params->param_7 = vs->drv_ctx;
+	/* Send vstream on a device for processing. */
+	ret = xcu_run(params);
+	if (ret) {
+		XSCHED_ERR(
+			"Failed to send vstream tasks vstreamId=%d to a device for processing.\n",
+			vs->id);
+	}
+
+	XSCHED_DEBUG("Vstream_id %d submit vsm: sq_tail %d\n", vs->id, vsm->sq_tail);
+
+	kfree(vsm);
+
+	return;
+}
+
 /* Initialize xsched rt runqueue during kernel init.
  * Should only be called from xsched_init function.
  */
-- 
Gitee


From 4ff0f8c4f8c9a7c1bf32d7cf0bc22cbcf591bcaf Mon Sep 17 00:00:00 2001
From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Date: Tue, 30 Sep 2025 07:06:31 +0000
Subject: [PATCH 11/16] xsched: Add xsched CFS class

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add xsched cfs class callbacks implementation:
- dequeue_ctx_fair.
- enqueue_ctx_fair.
- pick_next_ctx_fair.
- check_preempt_fair.
- put_prev_ctx_fair.
- submit_prepare_ctx_fair.

Add xsched_cfs.c in /kernel/xsched Makefile.
Add cfs class related data structure.

Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xsched.h |  50 ++++++++++-
 kernel/xsched/Makefile |   2 +-
 kernel/xsched/cfs.c    | 185 +++++++++++++++++++++++++++++++++++++++++
 kernel/xsched/core.c   |  32 +++++--
 kernel/xsched/rt.c     |   2 +-
 5 files changed, 257 insertions(+), 14 deletions(-)
 create mode 100644 kernel/xsched/cfs.c

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 18e62f265199..b4b6274b4c06 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -35,6 +35,11 @@
 
 #define XCU_HASH_ORDER 6
 
+#define RUNTIME_INF ((u64)~0ULL)
+#define XSCHED_TIME_INF RUNTIME_INF
+#define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1
+#define XSCHED_CFS_MIN_TIMESLICE (10*NSEC_PER_MSEC)
+
 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF)
 
 #define __GET_VS_TASK_PRIO_RT(t) (((t) >> 8) & 0xFF)
@@ -55,6 +60,7 @@
 enum xcu_sched_type {
 	XSCHED_TYPE_RT,
 	XSCHED_TYPE_DFLT = XSCHED_TYPE_RT,
+	XSCHED_TYPE_CFS,
 	XSCHED_TYPE_NUM,
 };
 
@@ -88,6 +94,7 @@ enum xse_flag {
 
 
 extern const struct xsched_class rt_xsched_class;
+extern const struct xsched_class fair_xsched_class;
 
 #define xsched_first_class (&rt_xsched_class)
 
@@ -100,6 +107,13 @@ extern const struct xsched_class rt_xsched_class;
 #define for_each_vstream_in_ctx(vs, ctx)                                       \
 	list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node)
 
+/* Manages xsched CFS-like class rbtree based runqueue. */
+struct xsched_rq_cfs {
+	unsigned int nr_running;
+	unsigned int load;
+	u64 min_xruntime;
+	struct rb_root_cached ctx_timeline;
+};
 
 /* Manages xsched RT-like class linked list based runqueue.
  *
@@ -123,10 +137,11 @@ struct xsched_rq {
 	const struct xsched_class *class;
 
 	int state;
-	int nr_running;
 
 	/* RT class run queue.*/
 	struct xsched_rq_rt rt;
+	/* CFS class run queue.*/
+	struct xsched_rq_cfs cfs;
 };
 
 enum xcu_state {
@@ -159,6 +174,8 @@ struct xsched_cu {
 
 	/* RT class kick counter. */
 	atomic_t pending_kicks_rt;
+	/* CFS class kick counter. */
+	atomic_t pending_kicks_cfs;
 
 	struct task_struct *worker;
 
@@ -185,6 +202,21 @@ struct xsched_entity_rt {
 	s64 kick_slice;
 };
 
+struct xsched_entity_cfs {
+	struct rb_node run_node;
+
+	/* Rq on which this entity is (to be) queued. */
+	struct xsched_rq_cfs *cfs_rq;
+
+	/* Value of "virtual" runtime to sort entities in rbtree */
+	u64 xruntime;
+	u32 weight;
+
+	/* Execution time of scheduling entity */
+	u64 exec_start;
+	u64 sum_exec_runtime;
+};
+
 struct xsched_entity {
 	uint32_t task_type;
 
@@ -213,6 +245,8 @@ struct xsched_entity {
 
 	/* RT class entity. */
 	struct xsched_entity_rt rt;
+	/* CFS class entity. */
+	struct xsched_entity_cfs cfs;
 
 	/* Pointer to context object. */
 	struct xsched_context *ctx;
@@ -235,6 +269,11 @@ static inline bool xse_is_rt(const struct xsched_entity *xse)
 	return xse && xse->class == &rt_xsched_class;
 }
 
+static inline bool xse_is_cfs(const struct xsched_entity *xse)
+{
+	return xse && xse->class == &fair_xsched_class;
+}
+
 /* Returns a pointer to an atomic_t variable representing a counter
  * of currently pending vstream kicks on a given XCU and for a
  * given xsched class.
@@ -256,6 +295,8 @@ xsched_get_pending_kicks_class(const struct xsched_class *class,
 
 	if (class == &rt_xsched_class)
 		return &xcu->pending_kicks_rt;
+	if (class == &fair_xsched_class)
+		return &xcu->pending_kicks_cfs;
 
 	XSCHED_ERR("Xsched entity has an invalid class @ %s\n", __func__);
 	return NULL;
@@ -362,13 +403,14 @@ static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse)
 static inline bool xsched_check_pending_kicks_xcu(struct xsched_cu *xcu)
 {
 	atomic_t *kicks_rt;
+	atomic_t *kicks_cfs;
 
 	kicks_rt = xsched_get_pending_kicks_class(&rt_xsched_class, xcu);
-
-	if (!kicks_rt)
+	kicks_cfs = xsched_get_pending_kicks_class(&fair_xsched_class, xcu);
+	if (!kicks_rt || !kicks_cfs)
 		return false;
 
-	return !!atomic_read(kicks_rt);
+	return (!!atomic_read(kicks_rt) || !!atomic_read(kicks_cfs));
 }
 
 static inline int xse_integrity_check(const struct xsched_entity *xse)
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
index f882518d54ab..fe212f228cf6 100644
--- a/kernel/xsched/Makefile
+++ b/kernel/xsched/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vstream.o
-obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o
+obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o
diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c
new file mode 100644
index 000000000000..dadd16a5a90b
--- /dev/null
+++ b/kernel/xsched/cfs.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Completely Fair Scheduling (CFS) Class for XPU device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/xsched.h>
+
+#define CFS_INNER_RQ_EMPTY(cfs_xse)                                            \
+	((cfs_xse)->xruntime == XSCHED_TIME_INF)
+
+void xs_rq_add(struct xsched_entity_cfs *xse)
+{
+	struct xsched_rq_cfs *cfs_rq = xse->cfs_rq;
+	struct rb_node **link = &cfs_rq->ctx_timeline.rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct xsched_entity_cfs *entry;
+	bool leftmost = true;
+
+	while (*link) {
+		parent = *link;
+		entry = rb_entry(parent, struct xsched_entity_cfs, run_node);
+		if (xse->xruntime <= entry->xruntime) {
+			link = &parent->rb_left;
+		} else {
+			link = &parent->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(&xse->run_node, parent, link);
+	rb_insert_color_cached(&xse->run_node, &cfs_rq->ctx_timeline, leftmost);
+}
+
+void xs_rq_remove(struct xsched_entity_cfs *xse)
+{
+	struct xsched_rq_cfs *cfs_rq = xse->cfs_rq;
+
+	rb_erase_cached(&xse->run_node, &cfs_rq->ctx_timeline);
+}
+
+/**
+ * xs_cfs_rq_update() - Update entity's runqueue position with new xruntime
+ */
+static void xs_cfs_rq_update(struct xsched_entity_cfs *xse_cfs, u64 new_xrt)
+{
+	xs_rq_remove(xse_cfs);
+	xse_cfs->xruntime = new_xrt;
+	xs_rq_add(xse_cfs);
+}
+
+static inline struct xsched_entity_cfs *
+xs_pick_first(struct xsched_rq_cfs *cfs_rq)
+{
+	struct xsched_entity_cfs *xse_cfs;
+	struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline);
+
+	if (!left)
+		return NULL;
+
+	xse_cfs = rb_entry(left, struct xsched_entity_cfs, run_node);
+	return xse_cfs;
+}
+
+/**
+ * xs_update() - Account xruntime and runtime metrics.
+ * @xse_cfs: Point to CFS scheduling entity.
+ * @delta: Execution time in last period
+ */
+static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta)
+{
+	u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight;
+
+	xs_cfs_rq_update(xse_cfs, new_xrt);
+	xse_cfs->sum_exec_runtime += delta;
+}
+
+/*
+ * Xsched Fair class methods
+ * For rq manipulation we rely on root runqueue lock already acquired in core.
+ * Access xsched_group_xcu_priv requires no locks because one thread per XCU.
+ */
+static void dequeue_ctx_fair(struct xsched_entity *xse)
+{
+	struct xsched_cu *xcu = xse->xcu;
+	struct xsched_entity_cfs *first;
+	struct xsched_entity_cfs *xse_cfs = &xse->cfs;
+
+	xs_rq_remove(xse_cfs);
+
+	first = xs_pick_first(&xcu->xrq.cfs);
+	xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF;
+
+	if (xcu->xrq.cfs.min_xruntime == XSCHED_TIME_INF) {
+		atomic_set(&xcu->has_active, 0);
+		XSCHED_DEBUG("%s: set has_active to 0\n", __func__);
+	}
+}
+
+/**
+ * enqueue_ctx_fair() - Add context to the runqueue
+ * @xse: xsched entity of context
+ * @xcu: executor
+ *
+ * In contrary to enqueue_task it is called once on context init.
+ * Although groups reside in tree, their nodes not counted in nr_running.
+ * The xruntime of a group xsched entitry represented by min xruntime inside.
+ */
+static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu)
+{
+	struct xsched_entity_cfs *first;
+	struct xsched_rq_cfs *rq;
+	struct xsched_entity_cfs *xse_cfs = &xse->cfs;
+
+	xse_cfs->weight = XSCHED_CFS_ENTITY_WEIGHT_DFLT;
+	rq = xse_cfs->cfs_rq = &xcu->xrq.cfs;
+
+	/* If no XSE of only empty groups */
+	if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF)
+		rq->min_xruntime = xse_cfs->xruntime;
+	else
+		xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime);
+
+	xs_rq_add(xse_cfs);
+
+	first = xs_pick_first(&xcu->xrq.cfs);
+	xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF;
+
+	if (xcu->xrq.cfs.min_xruntime != XSCHED_TIME_INF) {
+		atomic_set(&xcu->has_active, 1);
+		XSCHED_DEBUG("%s: set has_active to 1\n", __func__);
+	}
+}
+
+static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu)
+{
+	struct xsched_entity_cfs *xse;
+	struct xsched_rq_cfs *rq = &xcu->xrq.cfs;
+
+	xse = xs_pick_first(rq);
+	if (!xse)
+		return NULL;
+
+	return container_of(xse, struct xsched_entity, cfs);
+}
+
+static inline bool
+xs_should_preempt_fair(struct xsched_entity *xse)
+{
+	return (atomic_read(&xse->submitted_one_kick) >= XSCHED_CFS_KICK_SLICE);
+}
+
+static void put_prev_ctx_fair(struct xsched_entity *xse)
+{
+	struct xsched_entity_cfs *prev = &xse->cfs;
+
+	xs_update(prev, xse->last_exec_runtime);
+}
+
+int submit_prepare_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu)
+{
+	return 0;
+}
+
+const struct xsched_class fair_xsched_class = {
+	.next = NULL,
+	.dequeue_ctx = dequeue_ctx_fair,
+	.enqueue_ctx = enqueue_ctx_fair,
+	.pick_next_ctx = pick_next_ctx_fair,
+	.put_prev_ctx = put_prev_ctx_fair,
+	.submit_prepare_ctx = submit_prepare_ctx_fair,
+	.check_preempt = xs_should_preempt_fair,
+};
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 543967335b17..378dd4e66476 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -89,7 +89,7 @@ static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse)
 				not_empty++;
 			}
 		}
-	} while (not_empty);
+	} while ((sum_exec_time < XSCHED_CFS_MIN_TIMESLICE) && (not_empty));
 
 	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
 	XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n",
@@ -322,6 +322,10 @@ int xsched_xse_set_class(struct xsched_entity *xse)
 		xse->class = &rt_xsched_class;
 		XSCHED_DEBUG("Context is in RT class %s\n", __func__);
 		break;
+	case XSCHED_TYPE_CFS:
+		xse->class = &fair_xsched_class;
+		XSCHED_DEBUG("Context is in CFS class %s\n", __func__);
+		break;
 	default:
 		XSCHED_ERR("Xse has incorrect class @ %s\n", __func__);
 		return -EINVAL;
@@ -362,6 +366,10 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 		return err;
 	}
 
+	if (xse_is_cfs(xse)) {
+		xse->cfs.sum_exec_runtime = 0;
+	}
+
 	if (xse_is_rt(xse)) {
 		xse->rt.state = XSE_PREPARE;
 		xse->rt.flag = XSE_TIF_NONE;
@@ -468,10 +476,9 @@ static int xsched_schedule(void *input_xcu)
 	while (!kthread_should_stop()) {
 		mutex_unlock(&xcu->xcu_lock);
 		wait_event_interruptible(xcu->wq_xcu_idle,
-					 atomic_read(&xcu->has_active) || xcu->xrq.nr_running);
-
-		XSCHED_DEBUG("%s: rt_nr_running = %d, has_active = %d\n",
-			__func__, xcu->xrq.nr_running, atomic_read(&xcu->has_active));
+					 xcu->xrq.cfs.nr_running || xcu->xrq.rt.nr_running);
+		XSCHED_DEBUG("%s: rt nr_running = %u, cfs nr_running = %u\n",
+			__func__, xcu->xrq.rt.nr_running, xcu->xrq.cfs.nr_running);
 
 		mutex_lock(&xcu->xcu_lock);
 		if (!xsched_check_pending_kicks_xcu(xcu)) {
@@ -536,7 +543,7 @@ void submit_kick(struct vstream_info *vs,
 }
 
 /* Initialize xsched rt runqueue during kernel init.
- * Should only be called from xsched_init function.
+ * Should only be called from xsched_rq_init function.
  */
 static inline void xsched_rt_rq_init(struct xsched_cu *xcu)
 {
@@ -551,14 +558,23 @@ static inline void xsched_rt_rq_init(struct xsched_cu *xcu)
 	}
 }
 
+/* Initialize xsched cfs runqueue during kernel init.
+ * Should only be called from xsched_rq_init function.
+ */
+static inline void xsched_cfs_rq_init(struct xsched_cu *xcu)
+{
+	xcu->xrq.cfs.nr_running = 0;
+	xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED;
+}
+
 /* Initialize xsched classes' runqueues. */
 static inline void xsched_rq_init(struct xsched_cu *xcu)
 {
-	xcu->xrq.nr_running = 0;
 	xcu->xrq.curr_xse = NULL;
 	xcu->xrq.class = &rt_xsched_class;
 	xcu->xrq.state = XRQ_STATE_IDLE;
 	xsched_rt_rq_init(xcu);
+	xsched_cfs_rq_init(xcu);
 }
 
 /* Initializes all xsched XCU objects.
@@ -574,7 +590,7 @@ static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
 	xcu->group = group;
 
 	atomic_set(&xcu->pending_kicks_rt, 0);
-	atomic_set(&xcu->has_active, 0);
+	atomic_set(&xcu->pending_kicks_cfs, 0);
 
 	INIT_LIST_HEAD(&xcu->vsm_list);
 	init_waitqueue_head(&xcu->wq_xcu_idle);
diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c
index e018c3f8f96c..3629fba3a48d 100644
--- a/kernel/xsched/rt.c
+++ b/kernel/xsched/rt.c
@@ -216,7 +216,7 @@ static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse)
 }
 
 const struct xsched_class rt_xsched_class = {
-	.next = NULL,
+	.next = &fair_xsched_class,
 	.dequeue_ctx = dequeue_ctx_rt,
 	.enqueue_ctx = enqueue_ctx_rt,
 	.pick_next_ctx = pick_next_ctx_rt,
-- 
Gitee


From c39bba222115b3ab24a2e911eba7670f9621ee6c Mon Sep 17 00:00:00 2001
From: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Date: Tue, 30 Sep 2025 07:06:32 +0000
Subject: [PATCH 12/16] xsched: Add XCU control group implementation and its
 backend in xsched CFS

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add cgroup initialization inculing root cgroup.
Add xcu cgroup callbacks: alloc, free, attach, detach, etc.
Add xsched_group cgroup management files and methods for:
- sched type
- shares
Add xcu cgroup subsys and option CONFIG_CGROUP_XCU
Add cgroup.c in /kernel/xsched Makefile.

Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/cgroup_subsys.h |   4 +
 include/linux/xsched.h        | 112 ++++++-
 kernel/cgroup/cgroup.c        |   2 +-
 kernel/xsched/Kconfig         |  15 +
 kernel/xsched/Makefile        |   1 +
 kernel/xsched/cfs.c           |  73 +++-
 kernel/xsched/cgroup.c        | 616 ++++++++++++++++++++++++++++++++++
 kernel/xsched/core.c          |  16 +-
 8 files changed, 812 insertions(+), 27 deletions(-)
 create mode 100644 kernel/xsched/cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 85fa78049bd0..e65ae90946c2 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -61,6 +61,10 @@ SUBSYS(pids)
 SUBSYS(rdma)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_XCU)
+SUBSYS(xcu)
+#endif
+
 #if IS_ENABLED(CONFIG_CGROUP_MISC)
 SUBSYS(misc)
 #endif
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index b4b6274b4c06..e59e4fe5e4b4 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -5,7 +5,7 @@
 #include <linux/hash.h>
 #include <linux/hashtable.h>
 #include <linux/xcu_group.h>
-#include <linux/kref.h>
+#include <linux/cgroup.h>
 #include <linux/vstream.h>
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
@@ -39,13 +39,11 @@
 #define XSCHED_TIME_INF RUNTIME_INF
 #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1
 #define XSCHED_CFS_MIN_TIMESLICE (10*NSEC_PER_MSEC)
+#define XSCHED_CFG_SHARE_DFLT 1024
 
 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF)
-
 #define __GET_VS_TASK_PRIO_RT(t) (((t) >> 8) & 0xFF)
-
 #define GET_VS_TASK_TYPE(vs_ptr) __GET_VS_TASK_TYPE((vs_ptr)->task_type)
-
 #define GET_VS_TASK_PRIO_RT(vs_ptr) __GET_VS_TASK_PRIO_RT((vs_ptr)->task_type)
 
 /*
@@ -57,6 +55,8 @@
  */
 #define XSCHED_CFS_KICK_SLICE 10
 
+extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS];
+
 enum xcu_sched_type {
 	XSCHED_TYPE_RT,
 	XSCHED_TYPE_DFLT = XSCHED_TYPE_RT,
@@ -186,12 +186,15 @@ struct xsched_cu {
 
 	struct mutex xcu_lock;
 
-	atomic_t has_active;
-
 	wait_queue_head_t wq_xcu_idle;
 	wait_queue_head_t wq_xcu_running;
 };
 
+extern int num_active_xcu;
+#define for_each_active_xcu(xcu, id)                                           \
+	for ((id) = 0, xcu = xsched_cu_mgr[(id)];                                  \
+	     (id) < num_active_xcu && (xcu = xsched_cu_mgr[(id)]); (id)++)
+
 struct xsched_entity_rt {
 	struct list_head list_node;
 	enum xse_state state;
@@ -260,6 +263,11 @@ struct xsched_entity {
 	 */
 	struct xsched_cu *xcu;
 
+	/* Link to list of xsched_group items */
+	struct list_head group_node;
+	struct xsched_group *parent_grp;
+	bool is_group;
+
 	/* General purpose xse lock. */
 	spinlock_t xse_lock;
 };
@@ -274,6 +282,93 @@ static inline bool xse_is_cfs(const struct xsched_entity *xse)
 	return xse && xse->class == &fair_xsched_class;
 }
 
+/* xsched_group's xcu related stuff */
+struct xsched_group_xcu_priv {
+	/* Owner of this group */
+	struct xsched_group *self;
+
+	/* xcu id */
+	int xcu_id;
+
+	/* Link to scheduler */
+	struct xsched_entity xse; /* xse of this group on runqueue */
+	struct xsched_rq_cfs *cfs_rq; /* cfs runqueue "owned" by this group */
+	struct xsched_rq_rt *rt_rq; /* rt runqueue "owned" by this group */
+
+	/* Statistics */
+	int nr_throttled;
+	u64 throttled_time;
+	u64 overrun_time;
+};
+
+/* Xsched scheduling control group */
+struct xsched_group {
+	/* Cgroups controller structure */
+	struct cgroup_subsys_state css;
+
+	/* Control group settings: */
+	int sched_type;
+	int prio;
+
+	/* Bandwidth setting: shares value set by user */
+	u64 shares_cfg;
+	u64 shares_cfg_red;
+	u32 weight;
+	u64 children_shares_sum;
+
+	/* Bandwidth setting: maximal quota in period */
+	s64 quota;
+	/* record the runtime of operators during the period */
+	s64 runtime;
+	s64 period;
+	struct hrtimer quota_timeout;
+	struct work_struct refill_work;
+	u64 qoslevel;
+
+	struct xsched_group_xcu_priv perxcu_priv[XSCHED_NR_CUS];
+
+	/* Groups hierarchcy */
+	struct xsched_group *parent;
+	struct list_head children_groups;
+	struct list_head group_node;
+
+	spinlock_t lock;
+
+	/* for XSE to move in perxcu */
+	struct list_head members;
+};
+
+#define XSCHED_RQ_OF(xse)                                                      \
+	(container_of(((xse)->cfs.cfs_rq), struct xsched_rq, cfs))
+
+#define XSCHED_RQ_OF_CFS_XSE(cfs_xse)                                          \
+	(container_of(((cfs_xse)->cfs_rq), struct xsched_rq, cfs))
+
+#define XSCHED_SE_OF(cfs_xse)                                                  \
+	(container_of((cfs_xse), struct xsched_entity, cfs))
+
+#define xcg_parent_grp_xcu(xcg)                                                \
+	((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id])
+
+#define xse_parent_grp_xcu(xse_cfs)                                            \
+	(&((XSCHED_SE_OF(xse_cfs)                                                  \
+		    ->parent_grp->perxcu_priv[(XSCHED_SE_OF(xse_cfs))->xcu->id])))
+
+static inline struct xsched_group_xcu_priv *
+xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs)
+{
+	struct xsched_entity *xse;
+
+	xse = xse_cfs ? container_of(xse_cfs, struct xsched_entity, cfs) : NULL;
+	return xse ? container_of(xse, struct xsched_group_xcu_priv, xse) : NULL;
+}
+
+static inline struct xsched_group *
+xse_this_grp(struct xsched_entity_cfs *xse_cfs)
+{
+	return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL;
+}
+
 /* Returns a pointer to an atomic_t variable representing a counter
  * of currently pending vstream kicks on a given XCU and for a
  * given xsched class.
@@ -515,6 +610,11 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg);
 struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs);
 void submit_kick(struct vstream_info *vs, struct xcu_op_handler_params *params,
 		 struct vstream_metadata *vsm);
+/* Xsched group manage functions */
+int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse);
+void xcu_cg_init_common(struct xsched_group *xcg);
+void xcu_grp_shares_update(struct xsched_group *xg);
+void xsched_group_xse_detach(struct xsched_entity *xse);
 void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 #endif /* __LINUX_XSCHED_H__ */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c26a9b3a3576..b632590eae0f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6190,7 +6190,7 @@ int __init cgroup_init(void)
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
+	BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 17);
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig
index fc5d7767d8e0..776c3dfbdaa4 100644
--- a/kernel/xsched/Kconfig
+++ b/kernel/xsched/Kconfig
@@ -34,3 +34,18 @@ config XSCHED_NR_CUS
       This option defines the maximum number of Compute Units (CUs) that can be
       managed by the XSched scheduler, consider changing this value proportionally
       to the number of available XCU cores.
+
+config CGROUP_XCU
+    bool "XCU bandwidth control and group scheduling for xsched_cfs"
+    default n
+    depends on XCU_SCHEDULER
+    help
+      This option enables the extended Compute Unit (XCU) resource controller for
+      CFS task groups, providing hierarchical scheduling and fine-grained bandwidth
+      allocation capabilities. Key features include:
+      - Proportional XCU time distribution across cgroups based on shares/quotas
+      - Nested group scheduling with latency isolation
+      - Integration with xsched_cfs for fair CPU resource management
+
+      Required for systems requiring fine-grained resource control in cgroups.
+      If unsure, say N.
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
index fe212f228cf6..c4c06b6038ff 100644
--- a/kernel/xsched/Makefile
+++ b/kernel/xsched/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vstream.o
 obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o
+obj-$(CONFIG_CGROUP_XCU) += cgroup.o
diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c
index dadd16a5a90b..1313c7e73a11 100644
--- a/kernel/xsched/cfs.c
+++ b/kernel/xsched/cfs.c
@@ -21,6 +21,8 @@
 #define CFS_INNER_RQ_EMPTY(cfs_xse)                                            \
 	((cfs_xse)->xruntime == XSCHED_TIME_INF)
 
+extern struct xsched_group *root_xcg;
+
 void xs_rq_add(struct xsched_entity_cfs *xse)
 {
 	struct xsched_rq_cfs *cfs_rq = xse->cfs_rq;
@@ -81,10 +83,46 @@ xs_pick_first(struct xsched_rq_cfs *cfs_rq)
  */
 static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta)
 {
-	u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight;
+	struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs);
+
+	for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) {
+		u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight;
+
+		xs_cfs_rq_update(xse_cfs, new_xrt);
+		xse_cfs->sum_exec_runtime += delta;
 
-	xs_cfs_rq_update(xse_cfs, new_xrt);
-	xse_cfs->sum_exec_runtime += delta;
+		if (xg->self->parent == NULL)
+			break;
+	}
+}
+
+/**
+ * xg_update() - Update container group's xruntime
+ * @gxcu: Descendant xsched group's private xcu control structure
+ *
+ * No locks required to access xsched_group_xcu_priv members,
+ * because only one worker thread works for one XCU.
+ */
+static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta)
+{
+	u64 new_xrt;
+	struct xsched_entity_cfs *entry;
+
+	for (; xg; xg = &xcg_parent_grp_xcu(xg)) {
+		xg->cfs_rq->nr_running += task_delta;
+		entry = xs_pick_first(xg->cfs_rq);
+		new_xrt = entry ? entry->xruntime * xg->xse.cfs.weight : XSCHED_TIME_INF;
+
+		xg->cfs_rq->min_xruntime = new_xrt;
+		xg->xse.cfs.xruntime = new_xrt;
+
+		if (!xg->xse.on_rq)
+			break;
+		if (!xg->self->parent)
+			break;
+
+		xs_cfs_rq_update(&xg->xse.cfs, new_xrt);
+	}
 }
 
 /*
@@ -94,19 +132,19 @@ static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta)
  */
 static void dequeue_ctx_fair(struct xsched_entity *xse)
 {
+	int task_delta;
 	struct xsched_cu *xcu = xse->xcu;
 	struct xsched_entity_cfs *first;
 	struct xsched_entity_cfs *xse_cfs = &xse->cfs;
 
+	task_delta =
+		(xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1;
+
 	xs_rq_remove(xse_cfs);
+	xg_update(xse_parent_grp_xcu(xse_cfs), task_delta);
 
 	first = xs_pick_first(&xcu->xrq.cfs);
 	xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF;
-
-	if (xcu->xrq.cfs.min_xruntime == XSCHED_TIME_INF) {
-		atomic_set(&xcu->has_active, 0);
-		XSCHED_DEBUG("%s: set has_active to 0\n", __func__);
-	}
 }
 
 /**
@@ -120,28 +158,27 @@ static void dequeue_ctx_fair(struct xsched_entity *xse)
  */
 static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu)
 {
+	int task_delta;
 	struct xsched_entity_cfs *first;
 	struct xsched_rq_cfs *rq;
 	struct xsched_entity_cfs *xse_cfs = &xse->cfs;
 
 	xse_cfs->weight = XSCHED_CFS_ENTITY_WEIGHT_DFLT;
-	rq = xse_cfs->cfs_rq = &xcu->xrq.cfs;
+	rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->cfs_rq;
+	task_delta =
+		(xse->is_group) ? xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running : 1;
 
-	/* If no XSE of only empty groups */
+	/* If no XSE or only empty groups */
 	if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF)
 		rq->min_xruntime = xse_cfs->xruntime;
 	else
 		xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime);
 
 	xs_rq_add(xse_cfs);
+	xg_update(xse_parent_grp_xcu(xse_cfs), task_delta);
 
 	first = xs_pick_first(&xcu->xrq.cfs);
 	xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF;
-
-	if (xcu->xrq.cfs.min_xruntime != XSCHED_TIME_INF) {
-		atomic_set(&xcu->has_active, 1);
-		XSCHED_DEBUG("%s: set has_active to 1\n", __func__);
-	}
 }
 
 static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu)
@@ -153,6 +190,12 @@ static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu)
 	if (!xse)
 		return NULL;
 
+	for (; XSCHED_SE_OF(xse)->is_group; xse = xs_pick_first(rq)) {
+		if (!xse || CFS_INNER_RQ_EMPTY(xse))
+			return NULL;
+		rq = xse_this_grp_xcu(xse)->cfs_rq;
+	}
+
 	return container_of(xse, struct xsched_entity, cfs);
 }
 
diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c
new file mode 100644
index 000000000000..8ae17069e031
--- /dev/null
+++ b/kernel/xsched/cgroup.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Support cgroup for xpu device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/err.h>
+#include <linux/cgroup.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/xsched.h>
+
+enum xcu_file_type {
+	XCU_FILE_PERIOD_MS,
+	XCU_FILE_QUOTA_MS,
+	XCU_FILE_SHARES,
+};
+
+static struct xsched_group root_xsched_group;
+struct xsched_group *root_xcg = &root_xsched_group;
+static bool root_cg_inited;
+
+static struct xsched_group *old_xcg;
+static DECLARE_WAIT_QUEUE_HEAD(xcg_attach_wq);
+static bool attach_in_progress;
+static DEFINE_MUTEX(xcg_mutex);
+
+static const char xcu_sched_name[XSCHED_TYPE_NUM][4] = {
+	[XSCHED_TYPE_RT] = "rt",
+	[XSCHED_TYPE_CFS] = "cfs"
+};
+
+void xcu_cg_init_common(struct xsched_group *xcg)
+{
+	spin_lock_init(&xcg->lock);
+	INIT_LIST_HEAD(&xcg->members);
+	INIT_LIST_HEAD(&xcg->children_groups);
+}
+
+static void xcu_cfs_root_cg_init(void)
+{
+	int id;
+	struct xsched_cu *xcu;
+
+	for_each_active_xcu(xcu, id) {
+		root_xcg->perxcu_priv[id].xcu_id = id;
+		root_xcg->perxcu_priv[id].self = root_xcg;
+		root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs;
+		root_xcg->perxcu_priv[id].xse.cfs.weight = 1;
+	}
+
+	root_xcg->sched_type = XSCHED_TYPE_DFLT;
+}
+
+/**
+ * xcu_cfs_cg_init() - Initialize xsched_group cfs runqueues and bw control.
+ * @xcg: new xsched_cgroup
+ * @parent_xg: parent's group
+ *
+ * One xsched_group can host many processes with contexts on different devices.
+ * Function creates xsched_entity for every XCU, and places it in runqueue
+ * of parent group. Create new cfs rq for xse inside group.
+ */
+static int xcu_cfs_cg_init(struct xsched_group *xcg,
+				struct xsched_group *parent_xg)
+{
+	int id = 0, err, i;
+	struct xsched_cu *xcu;
+	struct xsched_rq_cfs *sub_cfs_rq;
+
+	if (unlikely(!root_cg_inited)) {
+		xcu_cfs_root_cg_init();
+		root_cg_inited = true;
+	}
+
+	for_each_active_xcu(xcu, id) {
+		xcg->perxcu_priv[id].xcu_id = id;
+		xcg->perxcu_priv[id].self = xcg;
+
+		sub_cfs_rq = kzalloc(sizeof(struct xsched_rq_cfs), GFP_KERNEL);
+		if (!sub_cfs_rq) {
+			XSCHED_ERR("Fail to alloc cfs runqueue on xcu %d\n", id);
+			err = -ENOMEM;
+			goto alloc_error;
+		}
+		xcg->perxcu_priv[id].cfs_rq = sub_cfs_rq;
+		xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED;
+
+		xcg->perxcu_priv[id].xse.is_group = true;
+		xcg->perxcu_priv[id].xse.xcu = xcu;
+		xcg->perxcu_priv[id].xse.class = &fair_xsched_class;
+
+		/* Put new empty groups to the right in parent's rbtree: */
+		xcg->perxcu_priv[id].xse.cfs.xruntime = XSCHED_TIME_INF;
+		xcg->perxcu_priv[id].xse.cfs.weight =
+			XSCHED_CFS_ENTITY_WEIGHT_DFLT;
+		xcg->perxcu_priv[id].xse.parent_grp = parent_xg;
+
+		mutex_lock(&xcu->xcu_lock);
+		enqueue_ctx(&xcg->perxcu_priv[id].xse, xcu);
+		mutex_unlock(&xcu->xcu_lock);
+	}
+
+	xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT;
+	xcu_grp_shares_update(parent_xg);
+
+	return 0;
+
+alloc_error:
+	for (i = 0; i < id; i++)
+		kfree(xcg->perxcu_priv[i].cfs_rq);
+	return err;
+}
+
+static void xcu_cfs_cg_deinit(struct xsched_group *xcg)
+{
+	uint32_t id;
+	struct xsched_cu *xcu;
+
+	for_each_active_xcu(xcu, id) {
+		mutex_lock(&xcu->xcu_lock);
+		dequeue_ctx(&xcg->perxcu_priv[id].xse, xcu);
+		mutex_unlock(&xcu->xcu_lock);
+		kfree(xcg->perxcu_priv[id].cfs_rq);
+	}
+	xcu_grp_shares_update(xcg->parent);
+}
+
+/**
+ * xcu_cg_init() - Initialize non-root xsched_group structure.
+ * @xcg: new xsched_cgroup
+ * @parent_xg: parent's group
+ */
+static int xcu_cg_init(struct xsched_group *xcg,
+				struct xsched_group *parent_xg)
+{
+	xcu_cg_init_common(xcg);
+	xcg->parent = parent_xg;
+	list_add_tail(&xcg->group_node, &parent_xg->children_groups);
+	xcg->sched_type = parent_xg->sched_type;
+
+	switch (xcg->sched_type) {
+	case XSCHED_TYPE_CFS:
+		return xcu_cfs_cg_init(xcg, parent_xg);
+	default:
+		pr_info("xcu_cgroup: init RT group css=0x%lx\n",
+		       (uintptr_t)&xcg->css);
+		break;
+	}
+
+	return 0;
+}
+
+inline struct xsched_group *xcu_cg_from_css(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct xsched_group, css) : NULL;
+}
+
+/**
+ * xcu_css_alloc() - Allocate and init xcu cgroup.
+ * @parent_css: css of parent xcu cgroup
+ *
+ * Called from kernel/cgroup.c with cgroup_lock() held.
+ * First called in subsys initialization to create root xcu cgroup, when
+ * XCUs haven't been initialized yet. Func used on every new cgroup creation,
+ * on second call to set root xsched_group runqueue.
+ *
+ * Return: pointer of new xcu cgroup css on success, -ENOMEM otherwise.
+ */
+static struct cgroup_subsys_state *
+xcu_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct xsched_group *parent_xg;
+	struct xsched_group *xg;
+	int err;
+
+	if (!parent_css)
+		return &root_xsched_group.css;
+
+	xg = kzalloc(sizeof(*xg), GFP_KERNEL);
+	if (!xg)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&xcg_mutex);
+	parent_xg = xcu_cg_from_css(parent_css);
+	err = xcu_cg_init(xg, parent_xg);
+	mutex_unlock(&xcg_mutex);
+	if (err) {
+		kfree(xg);
+		XSCHED_ERR("Fail to alloc new xcu group %s\n", __func__);
+		return ERR_PTR(err);
+	}
+
+	return &xg->css;
+}
+
+static void xcu_css_free(struct cgroup_subsys_state *css)
+{
+	struct xsched_group *xcg;
+
+	mutex_lock(&xcg_mutex);
+	xcg = xcu_cg_from_css(css);
+	if (xcg->parent != NULL) {
+		switch (xcg->sched_type) {
+		case XSCHED_TYPE_CFS:
+			xcu_cfs_cg_deinit(xcg);
+			break;
+		default:
+			pr_info("xcu_cgroup: deinit RT group css=0x%lx\n",
+			       (uintptr_t)&xcg->css);
+			break;
+		}
+	}
+	list_del(&xcg->group_node);
+	mutex_unlock(&xcg_mutex);
+
+	kfree(xcg);
+}
+
+int xcu_css_online(struct cgroup_subsys_state *css)
+{
+	return 0;
+}
+
+static void xcu_css_offline(struct cgroup_subsys_state *css)
+{
+	;
+}
+
+static void xsched_group_xse_attach(struct xsched_group *xg,
+				struct xsched_entity *xse)
+{
+	spin_lock(&xg->lock);
+	list_add_tail(&xse->group_node, &xg->members);
+	spin_unlock(&xg->lock);
+	xse->parent_grp = xg;
+}
+
+void xsched_group_xse_detach(struct xsched_entity *xse)
+{
+	struct xsched_group *xcg = xse->parent_grp;
+
+	spin_lock(&xcg->lock);
+	list_del(&xse->group_node);
+	spin_unlock(&xcg->lock);
+}
+
+static int xcu_task_can_attach(struct task_struct *task,
+			struct xsched_group *old, struct xsched_group *dst)
+{
+	struct xsched_entity *xse;
+	bool has_xse = false;
+
+	spin_lock(&old->lock);
+	list_for_each_entry(xse, &old->members, group_node) {
+		if (xse->owner_pid == task_pid_nr(task)) {
+			has_xse = true;
+			break;
+		}
+	}
+	spin_unlock(&old->lock);
+
+	return has_xse ? -EINVAL : 0;
+}
+
+static int xcu_can_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *dst_css, *old_css;
+	struct xsched_group *dst_xcg;
+	int ret = 0;
+
+	mutex_lock(&xcg_mutex);
+	cgroup_taskset_for_each(task, dst_css, tset) {
+		old_css = task_css(task, xcu_cgrp_id);
+		dst_xcg = xcu_cg_from_css(dst_css);
+		old_xcg = xcu_cg_from_css(old_css);
+		ret = xcu_task_can_attach(task, old_xcg, dst_xcg);
+		if (ret)
+			break;
+	}
+	if (!ret)
+		attach_in_progress = true;
+	mutex_unlock(&xcg_mutex);
+	return ret;
+}
+
+static void xcu_cancel_attach(struct cgroup_taskset *tset)
+{
+	mutex_lock(&xcg_mutex);
+	attach_in_progress = false;
+	wake_up(&xcg_attach_wq);
+	mutex_unlock(&xcg_mutex);
+}
+
+void xcu_move_task(struct task_struct *task, struct xsched_group *old_xcg,
+			struct xsched_group *new_xcg)
+{
+	struct xsched_entity *xse, *tmp;
+	struct xsched_cu *xcu;
+
+	spin_lock(&old_xcg->lock);
+	list_for_each_entry_safe(xse, tmp, &old_xcg->members, group_node) {
+		if (xse->owner_pid != task_pid_nr(task))
+			continue;
+
+		xcu = xse->xcu;
+		BUG_ON(old_xcg != xse->parent_grp);
+
+		/* delete from the old_xcg */
+		list_del(&xse->group_node);
+
+		mutex_lock(&xcu->xcu_lock);
+		/* dequeue from the current runqueue */
+		dequeue_ctx(xse, xcu);
+		/* attach to the new_xcg */
+		xsched_group_xse_attach(new_xcg, xse);
+		/* enqueue to the runqueue in new_xcg */
+		enqueue_ctx(xse, xcu);
+		mutex_unlock(&xcu->xcu_lock);
+	}
+	spin_unlock(&old_xcg->lock);
+}
+
+static void xcu_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+
+	mutex_lock(&xcg_mutex);
+	cgroup_taskset_for_each(task, css, tset) {
+		xcu_move_task(task, old_xcg, xcu_cg_from_css(css));
+	}
+	attach_in_progress = false;
+	wake_up(&xcg_attach_wq);
+	mutex_unlock(&xcg_mutex);
+}
+
+/**
+ * xsched_group_inherit() - Attach new entity to task's xsched_group.
+ * @task: task_struct
+ * @xse: xsched entity
+ *
+ * Called in xsched context initialization to attach xse to task's group
+ * and inherit its xse scheduling class and bandwidth control policy.
+ *
+ * Return: Zero on success.
+ */
+int xsched_group_inherit(struct task_struct *task, struct xsched_entity *xse)
+{
+	struct cgroup_subsys_state *css;
+	struct xsched_group *xg;
+
+retry:
+	wait_event(xcg_attach_wq, !attach_in_progress);
+
+	mutex_lock(&xcg_mutex);
+	if (attach_in_progress) {
+		mutex_unlock(&xcg_mutex);
+		goto retry;
+	}
+	xse->owner_pid = task_pid_nr(task);
+	css = task_get_css(task, xcu_cgrp_id);
+	xg = xcu_cg_from_css(css);
+	xsched_group_xse_attach(xg, xse);
+	css_put(css);
+	mutex_unlock(&xcg_mutex);
+
+	return 0;
+}
+
+static int xcu_sched_show(struct seq_file *sf, void *v)
+{
+	struct cgroup_subsys_state *css = seq_css(sf);
+	struct xsched_group *xg = xcu_cg_from_css(css);
+
+	seq_printf(sf, "%s\n", xcu_sched_name[xg->sched_type]);
+	return 0;
+}
+
+/**
+ * xcu_cg_set_sched() - Set scheduling type for group.
+ * @xg: xsched group
+ * @type: scheduler type
+ *
+ * Scheduler type can be changed if task is child of root group
+ * and haven't got scheduling entities.
+ *
+ * Return: Zero on success or -EINVAL
+ */
+int xcu_cg_set_sched(struct xsched_group *xg, int type)
+{
+	if (type == xg->sched_type)
+		return 0;
+
+	if (xg->parent != root_xcg)
+		return -EINVAL;
+
+	if (!list_empty(&xg->members))
+		return -EBUSY;
+
+	if (xg->sched_type == XSCHED_TYPE_CFS)
+		xcu_cfs_cg_deinit(xg);
+
+	xg->sched_type = type;
+	if (type != XSCHED_TYPE_CFS)
+		return 0;
+
+	/* type is XSCHED_TYPE_CFS */
+	return xcu_cfs_cg_init(xg, xg->parent);
+}
+
+static ssize_t xcu_sched_write(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
+{
+	struct cgroup_subsys_state *css = of_css(of);
+	struct xsched_group *xg = xcu_cg_from_css(css);
+	char type_name[4];
+	int type = -1;
+
+	ssize_t ret = sscanf(buf, "%3s", type_name);
+
+	if (ret < 1)
+		return -EINVAL;
+
+	for (type = 0; type < XSCHED_TYPE_NUM; type++) {
+		if (!strcmp(type_name, xcu_sched_name[type]))
+			break;
+	}
+
+	if (type == XSCHED_TYPE_NUM)
+		return -EINVAL;
+
+	if (!list_empty(&css->children))
+		return -EBUSY;
+
+	mutex_lock(&xcg_mutex);
+	ret = xcu_cg_set_sched(xg, type);
+	mutex_unlock(&xcg_mutex);
+
+	return (ret) ? ret : nbytes;
+}
+
+static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	s64 ret = 0;
+	struct xsched_group *xcucg = xcu_cg_from_css(css);
+
+	spin_lock(&xcucg->lock);
+	switch (cft->private) {
+	case XCU_FILE_SHARES:
+		ret = xcucg->shares_cfg;
+		break;
+	default:
+		break;
+	}
+	spin_unlock(&xcucg->lock);
+	return ret;
+}
+
+static inline u64 gcd(u64 a, u64 b)
+{
+	while (a != 0 && b != 0) {
+		if (a > b)
+			a %= b;
+		else
+			b %= a;
+	}
+	return (a) ? a : b;
+}
+
+void xcu_grp_shares_update(struct xsched_group *xg)
+{
+	int id;
+	struct xsched_cu *xcu;
+	struct xsched_group *xgi, *parent = xg;
+	u64 sh_sum = 0, sh_gcd = 0, w_gcd = 0, sh_prod_red = 1;
+
+	spin_lock(&parent->lock);
+	list_for_each_entry((xgi), &(parent)->children_groups, group_node) {
+		if ((xgi)->sched_type == XSCHED_TYPE_CFS)
+			sh_gcd = gcd(sh_gcd, xgi->shares_cfg);
+	}
+
+	list_for_each_entry((xgi), &(parent)->children_groups, group_node) {
+		if ((xgi)->sched_type == XSCHED_TYPE_CFS) {
+			sh_sum += xgi->shares_cfg;
+			xgi->shares_cfg_red = div_u64(xgi->shares_cfg, sh_gcd);
+
+			if ((sh_prod_red % xgi->shares_cfg_red) != 0)
+				sh_prod_red *= xgi->shares_cfg_red;
+		}
+	}
+
+	parent->children_shares_sum = sh_sum;
+	list_for_each_entry((xgi), &(parent)->children_groups, group_node) {
+		if ((xgi)->sched_type == XSCHED_TYPE_CFS) {
+			xgi->weight = div_u64(sh_prod_red, xgi->shares_cfg_red);
+			w_gcd = gcd(w_gcd, xgi->weight);
+		}
+	}
+
+	list_for_each_entry((xgi), &(parent)->children_groups, group_node) {
+		if ((xgi)->sched_type == XSCHED_TYPE_CFS) {
+			xgi->weight = div_u64(xgi->weight, w_gcd);
+			for_each_active_xcu(xcu, id) {
+				mutex_lock(&xcu->xcu_lock);
+				xgi->perxcu_priv[id].xse.cfs.weight = xgi->weight;
+				mutex_unlock(&xcu->xcu_lock);
+			}
+		}
+	}
+	spin_unlock(&parent->lock);
+}
+
+static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
+			s64 val)
+{
+	int ret = 0;
+	struct xsched_group *xcucg = xcu_cg_from_css(css);
+
+	spin_lock(&xcucg->lock);
+	switch (cft->private) {
+	case XCU_FILE_SHARES:
+		if (val <= 0) {
+			ret = -EINVAL;
+			break;
+		}
+		xcucg->shares_cfg = val;
+		xcu_grp_shares_update(xcucg->parent);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	spin_unlock(&xcucg->lock);
+
+	return ret;
+}
+
+static int xcu_stat(struct seq_file *sf, void *v)
+{
+	struct cgroup_subsys_state *css = seq_css(sf);
+	struct xsched_group *xcucg = xcu_cg_from_css(css);
+
+	u64 nr_throttled = 0;
+	u64 throttled_time = 0;
+	u64 exec_runtime = 0;
+
+	int xcu_id;
+	struct xsched_cu *xcu;
+
+	if (xcucg->sched_type == XSCHED_TYPE_RT) {
+		seq_printf(sf, "RT group stat is not supported\n");
+		return 0;
+	}
+
+	for_each_active_xcu(xcu, xcu_id) {
+		nr_throttled += xcucg->perxcu_priv[xcu_id].nr_throttled;
+		throttled_time += xcucg->perxcu_priv[xcu_id].throttled_time;
+		exec_runtime +=
+			xcucg->perxcu_priv[xcu_id].xse.cfs.sum_exec_runtime;
+	}
+
+	seq_printf(sf, "exec_runtime:	%llu\n", exec_runtime);
+	seq_printf(sf, "shares cfg:	%llu/%llu x%u\n", xcucg->shares_cfg,
+		   xcucg->parent->children_shares_sum, xcucg->weight);
+
+	return 0;
+}
+
+static struct cftype xcu_cg_files[] = {
+	{
+		.name = "shares",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_s64 = xcu_read_s64,
+		.write_s64 = xcu_write_s64,
+		.private = XCU_FILE_SHARES,
+	},
+	{
+		.name = "stat",
+		.seq_show = xcu_stat,
+	},
+	{
+		.name = "sched",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = xcu_sched_show,
+		.write = xcu_sched_write,
+	},
+	{} /* terminate */
+};
+
+struct cgroup_subsys xcu_cgrp_subsys = {
+	.css_alloc = xcu_css_alloc,
+	.css_online = xcu_css_online,
+	.css_offline = xcu_css_offline,
+	.css_free = xcu_css_free,
+	.can_attach = xcu_can_attach,
+	.cancel_attach = xcu_cancel_attach,
+	.attach = xcu_attach,
+	.dfl_cftypes = xcu_cg_files,
+	.legacy_cftypes = xcu_cg_files,
+	.early_init = false,
+};
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 378dd4e66476..64f2cbafb8cd 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -26,6 +26,7 @@
 
 int num_active_xcu;
 spinlock_t xcu_mgr_lock;
+extern struct xsched_group *root_xcg;
 
 /* Xsched XCU array and bitmask that represents which XCUs
  * are present and online.
@@ -49,6 +50,7 @@ static void put_prev_ctx(struct xsched_entity *xse)
 	xse->class->put_prev_ctx(xse);
 	xse->last_exec_runtime = 0;
 	atomic_set(&xse->submitted_one_kick, 0);
+	XSCHED_DEBUG("Put current xse %d @ %s\n", xse->tgid, __func__);
 }
 
 static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse)
@@ -194,6 +196,8 @@ static int delete_ctx(struct xsched_context *ctx)
 	XSCHED_DEBUG("Deleting ctx %d, pending kicks left=%d @ %s\n", xse->tgid,
 		atomic_read(&xse->kicks_pending_ctx_cnt), __func__);
 
+	xsched_group_xse_detach(xse);
+
 	return 0;
 }
 
@@ -317,7 +321,10 @@ struct xsched_cu *xcu_find(uint32_t *type,
 
 int xsched_xse_set_class(struct xsched_entity *xse)
 {
-	switch (xse->task_type) {
+#ifdef CONFIG_CGROUP_XCU
+	xsched_group_inherit(current, xse);
+#endif
+	switch (xse->parent_grp->sched_type) {
 	case XSCHED_TYPE_RT:
 		xse->class = &rt_xsched_class;
 		XSCHED_DEBUG("Context is in RT class %s\n", __func__);
@@ -344,7 +351,7 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 	xse->total_scheduled = 0;
 	xse->total_submitted = 0;
 	xse->last_exec_runtime = 0;
-	xse->task_type = XSCHED_TYPE_RT;
+	xse->task_type = GET_VS_TASK_TYPE(vs);
 	xse->fd = ctx->fd;
 	xse->tgid = ctx->tgid;
 
@@ -715,10 +722,9 @@ EXPORT_SYMBOL(xsched_xcu_register);
 
 int __init xsched_init(void)
 {
-	/* Initializing global Xsched context list. */
+	/* Initializing global XSched context list. */
 	INIT_LIST_HEAD(&xsched_ctx_list);
-
+	xcu_cg_init_common(root_xcg);
 	return 0;
 }
-
 late_initcall(xsched_init);
-- 
Gitee


From b01b7acc3f5c8d5910cca24312ca64240c2f3ad6 Mon Sep 17 00:00:00 2001
From: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Date: Tue, 30 Sep 2025 07:06:33 +0000
Subject: [PATCH 13/16] xsched: Add support for CFS quota for cgroups

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Add support for CFS quota for cgroups.

Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Signed-off-by: Hui Tang <tanghui20@.huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xsched.h           | 11 +++-
 include/uapi/linux/xcu_vstream.h |  1 +
 kernel/xsched/Makefile           |  2 +-
 kernel/xsched/cfs.c              |  1 +
 kernel/xsched/cfs_quota.c        | 95 ++++++++++++++++++++++++++++++++
 kernel/xsched/cgroup.c           | 62 ++++++++++++++++++++-
 kernel/xsched/core.c             | 23 ++++----
 7 files changed, 181 insertions(+), 14 deletions(-)
 create mode 100644 kernel/xsched/cfs_quota.c

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index e59e4fe5e4b4..5ffaffc5afdb 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -38,7 +38,8 @@
 #define RUNTIME_INF ((u64)~0ULL)
 #define XSCHED_TIME_INF RUNTIME_INF
 #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1
-#define XSCHED_CFS_MIN_TIMESLICE (10*NSEC_PER_MSEC)
+#define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC)
+#define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC)
 #define XSCHED_CFG_SHARE_DFLT 1024
 
 #define __GET_VS_TASK_TYPE(t) ((t)&0xFF)
@@ -590,6 +591,7 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm,
 				struct vstream_info *vs, vstream_args_t *arg)
 {
 	vsm->sq_id = arg->sq_id;
+	vsm->exec_time = arg->vk_args.exec_time;
 	vsm->sqe_num = arg->vk_args.sqe_num;
 	vsm->timeout = arg->vk_args.timeout;
 	memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX);
@@ -615,6 +617,13 @@ int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse);
 void xcu_cg_init_common(struct xsched_group *xcg);
 void xcu_grp_shares_update(struct xsched_group *xg);
 void xsched_group_xse_detach(struct xsched_entity *xse);
+
+void xsched_quota_init(void);
+void xsched_quota_timeout_init(struct xsched_group *xg);
+void xsched_quota_timeout_update(struct xsched_group *xg);
+void xsched_quota_account(struct xsched_group *xg, s64 exec_time);
+bool xsched_quota_exceed(struct xsched_group *xg);
+void xsched_quota_refill(struct work_struct *work);
 void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu);
 #endif /* __LINUX_XSCHED_H__ */
diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h
index 32c71dce5ad1..46d5a32db68e 100644
--- a/include/uapi/linux/xcu_vstream.h
+++ b/include/uapi/linux/xcu_vstream.h
@@ -28,6 +28,7 @@ typedef struct vstream_free_args { } vstream_free_args_t;
 
 typedef struct vstream_kick_args {
 	__u32 sqe_num;
+	__u32 exec_time;
 	__s32 timeout;
 	__s8 sqe[XCU_SQE_SIZE_MAX];
 } vstream_kick_args_t;
diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile
index c4c06b6038ff..8ab32b086b3d 100644
--- a/kernel/xsched/Makefile
+++ b/kernel/xsched/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vstream.o
-obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o
+obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o cfs_quota.o
 obj-$(CONFIG_CGROUP_XCU) += cgroup.o
diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c
index 1313c7e73a11..94189d8088ac 100644
--- a/kernel/xsched/cfs.c
+++ b/kernel/xsched/cfs.c
@@ -209,6 +209,7 @@ static void put_prev_ctx_fair(struct xsched_entity *xse)
 {
 	struct xsched_entity_cfs *prev = &xse->cfs;
 
+	xsched_quota_account(xse->parent_grp, (s64)xse->last_exec_runtime);
 	xs_update(prev, xse->last_exec_runtime);
 }
 
diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c
new file mode 100644
index 000000000000..a62f07ad3cdc
--- /dev/null
+++ b/kernel/xsched/cfs_quota.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Bandwidth provisioning for XPU device
+ *
+ * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd
+ *
+ * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/timer.h>
+#include <linux/xsched.h>
+
+static struct workqueue_struct *quota_workqueue;
+
+void xsched_quota_refill(struct work_struct *work)
+{
+	uint32_t id;
+	struct xsched_cu *xcu;
+	struct xsched_group *xg;
+
+	xg = container_of(work, struct xsched_group, refill_work);
+
+	spin_lock(&xg->lock);
+	xg->runtime = max((xg->runtime - xg->quota), (s64)0);
+	hrtimer_start(&xg->quota_timeout, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT);
+	spin_unlock(&xg->lock);
+
+	for_each_active_xcu(xcu, id) {
+		xcu = xsched_cu_mgr[id];
+		mutex_lock(&xcu->xcu_lock);
+		if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) {
+			enqueue_ctx(&xg->perxcu_priv[id].xse, xcu);
+			wake_up_interruptible(&xcu->wq_xcu_idle);
+		}
+		mutex_unlock(&xcu->xcu_lock);
+	}
+}
+
+static enum hrtimer_restart quota_timer_cb(struct hrtimer *hrtimer)
+{
+	struct xsched_group *xg;
+
+	xg = container_of(hrtimer, struct xsched_group, quota_timeout);
+	queue_work(quota_workqueue, &xg->refill_work);
+
+	return HRTIMER_NORESTART;
+}
+
+void xsched_quota_account(struct xsched_group *xg, s64 exec_time)
+{
+	spin_lock(&xg->lock);
+	xg->runtime += exec_time;
+	spin_unlock(&xg->lock);
+}
+
+bool xsched_quota_exceed(struct xsched_group *xg)
+{
+	bool ret;
+
+	spin_lock(&xg->lock);
+	ret = (xg->quota > 0) ? (xg->runtime >= xg->quota) : false;
+	spin_unlock(&xg->lock);
+
+	return ret;
+}
+
+void xsched_quota_init(void)
+{
+	quota_workqueue = create_singlethread_workqueue("xsched_quota_workqueue");
+}
+
+void xsched_quota_timeout_init(struct xsched_group *xg)
+{
+	hrtimer_init(&xg->quota_timeout, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+	xg->quota_timeout.function = quota_timer_cb;
+}
+
+void xsched_quota_timeout_update(struct xsched_group *xg)
+{
+	struct hrtimer *t = &xg->quota_timeout;
+
+	hrtimer_cancel(t);
+
+	if (xg->quota > 0 && xg->period > 0)
+		hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT);
+}
diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c
index 8ae17069e031..aa675a013927 100644
--- a/kernel/xsched/cgroup.c
+++ b/kernel/xsched/cgroup.c
@@ -47,6 +47,8 @@ void xcu_cg_init_common(struct xsched_group *xcg)
 	spin_lock_init(&xcg->lock);
 	INIT_LIST_HEAD(&xcg->members);
 	INIT_LIST_HEAD(&xcg->children_groups);
+	xsched_quota_timeout_init(xcg);
+	INIT_WORK(&xcg->refill_work, xsched_quota_refill);
 }
 
 static void xcu_cfs_root_cg_init(void)
@@ -62,6 +64,10 @@ static void xcu_cfs_root_cg_init(void)
 	}
 
 	root_xcg->sched_type = XSCHED_TYPE_DFLT;
+	root_xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS;
+	root_xcg->quota = XSCHED_TIME_INF;
+	root_xcg->runtime = 0;
+	xsched_quota_init();
 }
 
 /**
@@ -115,6 +121,9 @@ static int xcu_cfs_cg_init(struct xsched_group *xcg,
 
 	xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT;
 	xcu_grp_shares_update(parent_xg);
+	xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS;
+	xcg->quota = XSCHED_TIME_INF;
+	xcg->runtime = 0;
 
 	return 0;
 
@@ -223,6 +232,8 @@ static void xcu_css_free(struct cgroup_subsys_state *css)
 			break;
 		}
 	}
+	hrtimer_cancel(&xcg->quota_timeout);
+	cancel_work_sync(&xcg->refill_work);
 	list_del(&xcg->group_node);
 	mutex_unlock(&xcg_mutex);
 
@@ -460,6 +471,12 @@ static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
 
 	spin_lock(&xcucg->lock);
 	switch (cft->private) {
+	case XCU_FILE_PERIOD_MS:
+		ret = xcucg->period / NSEC_PER_MSEC;
+		break;
+	case XCU_FILE_QUOTA_MS:
+		ret = (xcucg->quota > 0) ? xcucg->quota / NSEC_PER_MSEC : xcucg->quota;
+		break;
 	case XCU_FILE_SHARES:
 		ret = xcucg->shares_cfg;
 		break;
@@ -530,11 +547,37 @@ static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 {
 	int ret = 0;
 	struct xsched_group *xcucg = xcu_cg_from_css(css);
+	s64 quota_ns;
+
+	if (xcucg->sched_type != XSCHED_TYPE_CFS)
+		return -EINVAL;
 
 	spin_lock(&xcucg->lock);
 	switch (cft->private) {
+	case XCU_FILE_PERIOD_MS:
+		if (val < 1 || val > (S64_MAX / NSEC_PER_MSEC)) {
+			ret = -EINVAL;
+			break;
+		}
+		xcucg->period = val * NSEC_PER_MSEC;
+		xsched_quota_timeout_update(xcucg);
+		break;
+	case XCU_FILE_QUOTA_MS:
+		if (val < -1 || val > (S64_MAX / NSEC_PER_MSEC)) {
+			ret = -EINVAL;
+			break;
+		}
+		/* Runtime should be updated when modifying quota_ms configuration */
+		quota_ns = (val > 0) ? val * NSEC_PER_MSEC : val;
+		if (xcucg->quota > 0 && quota_ns > 0)
+			xcucg->runtime = max((xcucg->runtime - quota_ns), (s64)0);
+		else
+			xcucg->runtime = 0;
+		xcucg->quota = quota_ns;
+		xsched_quota_timeout_update(xcucg);
+		break;
 	case XCU_FILE_SHARES:
-		if (val <= 0) {
+		if (val <= 0 || val > U64_MAX) {
 			ret = -EINVAL;
 			break;
 		}
@@ -577,11 +620,28 @@ static int xcu_stat(struct seq_file *sf, void *v)
 	seq_printf(sf, "exec_runtime:	%llu\n", exec_runtime);
 	seq_printf(sf, "shares cfg:	%llu/%llu x%u\n", xcucg->shares_cfg,
 		   xcucg->parent->children_shares_sum, xcucg->weight);
+	seq_printf(sf, "quota:	%lld\n", xcucg->quota);
+	seq_printf(sf, "used:	%lld\n", xcucg->runtime);
+	seq_printf(sf, "period:	%lld\n", xcucg->period);
 
 	return 0;
 }
 
 static struct cftype xcu_cg_files[] = {
+	{
+		.name = "period_ms",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_s64 = xcu_read_s64,
+		.write_s64 = xcu_write_s64,
+		.private = XCU_FILE_PERIOD_MS,
+	},
+	{
+		.name = "quota_ms",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_s64 = xcu_read_s64,
+		.write_s64 = xcu_write_s64,
+		.private = XCU_FILE_QUOTA_MS,
+	},
 	{
 		.name = "shares",
 		.flags = CFTYPE_NOT_ON_ROOT,
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 64f2cbafb8cd..78808f6ae561 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -46,7 +46,6 @@ static void put_prev_ctx(struct xsched_entity *xse)
 	struct xsched_cu *xcu = xse->xcu;
 
 	lockdep_assert_held(&xcu->xcu_lock);
-
 	xse->class->put_prev_ctx(xse);
 	xse->last_exec_runtime = 0;
 	atomic_set(&xse->submitted_one_kick, 0);
@@ -505,16 +504,18 @@ static int xsched_schedule(void *input_xcu)
 			continue;
 
 		curr_xse = xcu->xrq.curr_xse;
-		if (curr_xse) { /* if not deleted yet */
-			put_prev_ctx(curr_xse);
-			if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt)) {
-				dequeue_ctx(curr_xse, xcu);
-				XSCHED_DEBUG(
-					"%s: Dequeue xse %d due to zero kicks on xcu %u\n",
-					__func__, curr_xse->tgid, xcu->id);
-				curr_xse = xcu->xrq.curr_xse = NULL;
-			}
-		}
+		if (!curr_xse)
+			continue;
+
+		/* if not deleted yet */
+		put_prev_ctx(curr_xse);
+		if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt))
+			dequeue_ctx(curr_xse, xcu);
+
+		if (xsched_quota_exceed(curr_xse->parent_grp))
+			dequeue_ctx(&curr_xse->parent_grp->perxcu_priv[xcu->id].xse, xcu);
+
+		xcu->xrq.curr_xse = NULL;
 	}
 
 	return err;
-- 
Gitee


From 3b4199d3759f870e3d70d4a5e521bb5d0468f589 Mon Sep 17 00:00:00 2001
From: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Date: Tue, 30 Sep 2025 07:06:34 +0000
Subject: [PATCH 14/16] xsched/core: Add xcu_wait() implementation

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

The xpu device should be exclusive in sched slice. So add
xcu_wait() implementation which return after xpu kernels
executed completed. It is called after submit_kick().

Signed-off-by: Alekseev Dmitry <alekseev.dmitry@huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 drivers/xcu/xcu_group.c          |  23 +++-
 include/linux/vstream.h          |   7 +-
 include/linux/xcu_group.h        |   8 ++
 include/linux/xsched.h           |  19 +--
 include/uapi/linux/xcu_vstream.h |   4 +-
 kernel/xsched/core.c             | 200 ++++++++++++++++---------------
 kernel/xsched/rt.c               |  17 +++
 7 files changed, 161 insertions(+), 117 deletions(-)

diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c
index 605c023a2cfe..891ce70bb36f 100644
--- a/drivers/xcu/xcu_group.c
+++ b/drivers/xcu/xcu_group.c
@@ -137,7 +137,12 @@ int xcu_run(struct xcu_op_handler_params *params)
  */
 int xcu_wait(struct xcu_op_handler_params *params)
 {
-	return 0;
+	if (!params->group->opt || !params->group->opt->wait) {
+		XSCHED_ERR("No function [wait] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->wait(params);
 }
 
 /* This function runs "complete" callback for a given xcu_group
@@ -217,6 +222,22 @@ int xcu_logic_free(struct xcu_op_handler_params *params)
 	return params->group->opt->logic_free(params);
 }
 
+/* This function runs a "sqe_op" callback for a given xcu_group
+ * and a given vstream that are passed within
+ * xcu_op_handler_params object.
+ *
+ * This handler provides an interface to set or get sqe info.
+ */
+int xcu_sqe_op(struct xcu_op_handler_params *params)
+{
+	if (!params->group->opt || !params->group->opt->sqe_op) {
+		XSCHED_ERR("No function [sqe_op] called.\n");
+		return -EINVAL;
+	}
+
+	return params->group->opt->sqe_op(params);
+}
+
 static struct xcu_group __xcu_group_root = {
 	.id = 0,
 	.type = XCU_TYPE_ROOT,
diff --git a/include/linux/vstream.h b/include/linux/vstream.h
index 58ee4c235a07..f0c290dc184c 100644
--- a/include/linux/vstream.h
+++ b/include/linux/vstream.h
@@ -13,15 +13,18 @@
  * to be processed by a driver.
  */
 typedef struct vstream_metadata {
-	uint32_t exec_time;
 	/* A value of SQ tail that has been passed with the
 	 * kick that is described by this exact metadata object.
 	 */
 	uint32_t sq_tail;
 	uint32_t sqe_num;
 	uint32_t sq_id;
+	uint8_t sqe[XCU_SQE_SIZE_MAX];
+
+	/* Report buffer for fake read. */
+	int8_t cqe[XCU_CQE_BUF_SIZE];
+	uint32_t cqe_num;
 	int32_t timeout;
-	int8_t sqe[XCU_SQE_SIZE_MAX];
 
 	/* A node for metadata list */
 	struct list_head node;
diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h
index 93f732f84694..e73c64f6c520 100644
--- a/include/linux/xcu_group.h
+++ b/include/linux/xcu_group.h
@@ -17,6 +17,11 @@ enum xcu_type {
 	XCU_TYPE_XPU,
 };
 
+enum xcu_sqe_op_type {
+	SQE_SET_NOTIFY,
+	SQE_IS_NOTIFY,
+};
+
 /**
  * @group: value for this entry.
  * @hash_node: hash node list.
@@ -41,6 +46,7 @@ struct xcu_op_handler_params {
 			void *param_5;
 			void *param_6;
 			void *param_7;
+			void *param_8;
 		};
 	};
 };
@@ -55,6 +61,7 @@ struct xcu_operation {
 	xcu_op_handler_fn_t alloc;
 	xcu_op_handler_fn_t logic_alloc;
 	xcu_op_handler_fn_t logic_free;
+	xcu_op_handler_fn_t sqe_op;
 };
 
 struct xcu_group {
@@ -99,6 +106,7 @@ extern int xcu_finish(struct xcu_op_handler_params *params);
 extern int xcu_alloc(struct xcu_op_handler_params *params);
 extern int xcu_logic_alloc(struct xcu_op_handler_params *params);
 extern int xcu_logic_free(struct xcu_op_handler_params *params);
+extern int xcu_sqe_op(struct xcu_op_handler_params *params);
 #endif /* !CONFIG_XCU_SCHEDULER */
 
 #endif /* __XSCHED_XCU_GROUP_H__ */
diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 5ffaffc5afdb..825ff2dc0c8e 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -38,7 +38,6 @@
 #define RUNTIME_INF ((u64)~0ULL)
 #define XSCHED_TIME_INF RUNTIME_INF
 #define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1
-#define XSCHED_CFS_MIN_TIMESLICE (10 * NSEC_PER_MSEC)
 #define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC)
 #define XSCHED_CFG_SHARE_DFLT 1024
 
@@ -438,13 +437,8 @@ static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse)
 	atomic_inc(&xse->kicks_pending_ctx_cnt);
 
 	/* Incrementing prio based pending kicks counter for RT class */
-	if (xse_is_rt(xse)) {
+	if (xse_is_rt(xse))
 		atomic_inc(&xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio]);
-		XSCHED_DEBUG("xcu increased pending kicks @ %s\n", __func__);
-	} else {
-		XSCHED_DEBUG("xse %u isn't rt class @ %s\n", xse->tgid,
-			    __func__);
-	}
 
 	return 0;
 }
@@ -476,18 +470,12 @@ static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse)
 	/* Decrementing prio based pending kicks counter for RT class. */
 	if (xse_is_rt(xse)) {
 		kicks_prio_rt = &xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio];
-
 		if (!atomic_read(kicks_prio_rt)) {
 			XSCHED_ERR(
-				"Tried to decrement prio pending kicks beyond 0!\n");
+				"Try to decrement prio pending kicks beyond 0!\n");
 			return -EINVAL;
 		}
-
 		atomic_dec(kicks_prio_rt);
-		XSCHED_DEBUG("xcu decreased pending kicks @ %s\n", __func__);
-	} else {
-		XSCHED_DEBUG("xse %u isn't rt class @ %s\n", xse->tgid,
-			    __func__);
 	}
 
 	return 0;
@@ -591,7 +579,6 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm,
 				struct vstream_info *vs, vstream_args_t *arg)
 {
 	vsm->sq_id = arg->sq_id;
-	vsm->exec_time = arg->vk_args.exec_time;
 	vsm->sqe_num = arg->vk_args.sqe_num;
 	vsm->timeout = arg->vk_args.timeout;
 	memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX);
@@ -610,8 +597,6 @@ struct xsched_cu *xcu_find(uint32_t *type,
 /* Vstream metadata proccesing functions.*/
 int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg);
 struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs);
-void submit_kick(struct vstream_info *vs, struct xcu_op_handler_params *params,
-		 struct vstream_metadata *vsm);
 /* Xsched group manage functions */
 int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse);
 void xcu_cg_init_common(struct xsched_group *xcg);
diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h
index 46d5a32db68e..f72d4720e993 100644
--- a/include/uapi/linux/xcu_vstream.h
+++ b/include/uapi/linux/xcu_vstream.h
@@ -6,6 +6,9 @@
 
 #define PAYLOAD_SIZE_MAX 512
 #define XCU_SQE_SIZE_MAX 64
+#define XCU_CQE_SIZE_MAX 32
+#define XCU_CQE_REPORT_NUM 4
+#define XCU_CQE_BUF_SIZE (XCU_CQE_REPORT_NUM * XCU_CQE_SIZE_MAX)
 
 /*
  * VSTREAM_ALLOC: alloc a vstream, buffer for tasks
@@ -28,7 +31,6 @@ typedef struct vstream_free_args { } vstream_free_args_t;
 
 typedef struct vstream_kick_args {
 	__u32 sqe_num;
-	__u32 exec_time;
 	__s32 timeout;
 	__s8 sqe[XCU_SQE_SIZE_MAX];
 } vstream_kick_args_t;
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 78808f6ae561..744db05e36ec 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -54,15 +54,13 @@ static void put_prev_ctx(struct xsched_entity *xse)
 
 static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse)
 {
-	int kick_count;
+	int kick_count, scheduled = 0, not_empty;
 	struct vstream_info *vs;
-	unsigned int sum_exec_time = 0;
-	size_t kicks_submitted = 0;
+	struct xcu_op_handler_params params;
 	struct vstream_metadata *vsm;
-	int not_empty;
 
 	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
-	XSCHED_DEBUG("Before decrement XSE kick_count=%u @ %s\n",
+	XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n",
 		kick_count, __func__);
 
 	if (kick_count == 0) {
@@ -74,31 +72,44 @@ static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse)
 	do {
 		not_empty = 0;
 		for_each_vstream_in_ctx(vs, xse->ctx) {
+			if (scheduled >= XSCHED_CFS_KICK_SLICE)
+				break;
+
 			spin_lock(&vs->stream_lock);
 			vsm = xsched_vsm_fetch_first(vs);
 			spin_unlock(&vs->stream_lock);
-			if (vsm) {
-				list_add_tail(&vsm->node, &xcu->vsm_list);
-
-				sum_exec_time += vsm->exec_time;
-				kicks_submitted++;
-				xsched_dec_pending_kicks_xse(xse);
-				XSCHED_DEBUG(
-					"vs id = %d Kick submit exec_time %u sq_tail %u sqe_num %u sq_id %u @ %s\n",
-					vs->id, vsm->exec_time, vsm->sq_tail,
-					vsm->sqe_num, vsm->sq_id, __func__);
-				not_empty++;
+
+			if (!vsm)
+				continue;
+			list_add_tail(&vsm->node, &xcu->vsm_list);
+			scheduled++;
+			xsched_dec_pending_kicks_xse(xse);
+			not_empty++;
+		}
+	} while ((scheduled < XSCHED_CFS_KICK_SLICE) && (not_empty));
+
+	/*
+	 * Iterate over all vstreams in context:
+	 * Set wr_cqe bit in last computing task in vsm_list
+	 */
+	for_each_vstream_in_ctx(vs, xse->ctx) {
+		list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) {
+			if (vsm->parent == vs) {
+				params.group = vsm->parent->xcu->group;
+				params.param_1 = &(int){SQE_SET_NOTIFY};
+				params.param_2 = &vsm->sqe;
+				xcu_sqe_op(&params);
+				break;
 			}
 		}
-	} while ((sum_exec_time < XSCHED_CFS_MIN_TIMESLICE) && (not_empty));
+	}
 
 	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
 	XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n",
 		    kick_count, __func__);
 
-	xse->total_scheduled += kicks_submitted;
-
-	return kicks_submitted;
+	xse->total_scheduled += scheduled;
+	return scheduled;
 }
 
 static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu)
@@ -196,7 +207,6 @@ static int delete_ctx(struct xsched_context *ctx)
 		atomic_read(&xse->kicks_pending_ctx_cnt), __func__);
 
 	xsched_group_xse_detach(xse);
-
 	return 0;
 }
 
@@ -368,7 +378,7 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 
 	err = xsched_xse_set_class(xse);
 	if (err) {
-		XSCHED_ERR("Failed to set xse class @ %s\n", __func__);
+		XSCHED_ERR("Fail to set xse class @ %s\n", __func__);
 		return err;
 	}
 
@@ -401,70 +411,97 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 	return err;
 }
 
-/*
- * A function for submitting stream's commands (sending commands to a XCU).
- */
-static int xsched_proc(struct xsched_cu *xcu, struct vstream_info *vs,
-		       struct vstream_metadata *vsm)
+static void submit_kick(struct vstream_metadata *vsm)
 {
+	struct vstream_info *vs = vsm->parent;
 	struct xcu_op_handler_params params;
-	struct xsched_entity *xse;
+	params.group = vs->xcu->group;
+	params.fd = vs->fd;
+	params.param_1 = &vs->id;
+	params.param_2 = &vs->channel_id;
+	params.param_3 = vsm->sqe;
+	params.param_4 = &vsm->sqe_num;
+	params.param_5 = &vsm->timeout;
+	params.param_6 = &vs->sqcq_type;
+	params.param_7 = vs->drv_ctx;
+	params.param_8 = &vs->logic_vcq_id;
 
-	XSCHED_CALL_STUB();
-
-	xse = &vs->ctx->xse;
-
-	/* Init input parameters for xcu_run and xcu_wait callbacks. */
-	params.group = xcu->group;
-
-	/* Increase process time by abstract kick handling time. */
-	xse->last_exec_runtime += vsm->exec_time;
-
-	XSCHED_DEBUG("Process vsm sq_tail %d exec_time %u sqe_num %d sq_id %d@ %s\n",
-		    vsm->sq_tail, vsm->exec_time, vsm->sqe_num, vsm->sq_id, __func__);
-	submit_kick(vs, &params, vsm);
+	/* Send vstream on a device for processing. */
+	if (xcu_run(&params)) {
+		XSCHED_ERR(
+			"Fail to send Vstream id %u tasks to a device for processing.\n",
+			vs->id);
+	}
 
-	xse->total_submitted++;
+	XSCHED_DEBUG("Vstream id %u submit vsm: sq_tail %u\n", vs->id, vsm->sq_tail);
+}
 
-	XSCHED_DEBUG("xse %d total_submitted = %lu @ %s\n",
-		    xse->tgid, xse->total_submitted, __func__);
+static void submit_wait(struct vstream_metadata *vsm)
+{
+	struct vstream_info *vs = vsm->parent;
+	struct xcu_op_handler_params params;
+	/* Wait timeout in ms. */
+	int32_t timeout = 500;
+
+	params.group = vs->xcu->group;
+	params.param_1 = &vs->channel_id;
+	params.param_2 = &vs->logic_vcq_id;
+	params.param_3 = &vs->user_stream_id;
+	params.param_4 = &vsm->sqe;
+	params.param_5 = vsm->cqe;
+	params.param_6 = vs->drv_ctx;
+	params.param_7 = &timeout;
+
+	/* Wait for a device to complete processing. */
+	if (xcu_wait(&params)) {
+		XSCHED_ERR("Fail to wait Vstream id %u tasks, logic_cq_id %u.\n",
+			vs->id, vs->logic_vcq_id);
+	}
 
-	XSCHED_EXIT_STUB();
-	return 0;
+	XSCHED_DEBUG("Vstream id %u wait finish, logic_cq_id %u\n",
+		vs->id, vs->logic_vcq_id);
 }
 
 static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse)
 {
 	struct vstream_metadata *vsm, *tmp;
-	unsigned int submit_exec_time = 0;
-	size_t kicks_submitted = 0;
-	unsigned long wait_us;
+	int submitted = 0;
+	long submit_exec_time = 0;
+	ktime_t t_start = 0;
+	struct xcu_op_handler_params params;
 
 	XSCHED_DEBUG("%s called for xse %d on xcu %u\n",
 		__func__, xse->tgid, xcu->id);
 
 	list_for_each_entry_safe(vsm, tmp, &xcu->vsm_list, node) {
-		xsched_proc(xcu, vsm->parent, vsm);
-		submit_exec_time += vsm->exec_time;
-		kicks_submitted++;
+		submit_kick(vsm);
+		XSCHED_DEBUG("Xse %d vsm %u sched_delay: %lld ns\n",
+			xse->tgid, vsm->sq_id, ktime_to_ns(ktime_sub(ktime_get(), vsm->add_time)));
+
+		params.group = vsm->parent->xcu->group;
+		params.param_1 = &(int){SQE_IS_NOTIFY};
+		params.param_2 = &vsm->sqe;
+		if (xcu_sqe_op(&params)) {
+			mutex_unlock(&xcu->xcu_lock);
+			t_start = ktime_get();
+			submit_wait(vsm);
+			submit_exec_time += ktime_to_ns(ktime_sub(ktime_get(), t_start));
+			mutex_lock(&xcu->xcu_lock);
+		}
+		submitted++;
+		list_del(&vsm->node);
+		kfree(vsm);
 	}
 
+	xse->last_exec_runtime += submit_exec_time;
+	xse->total_submitted += submitted;
+	atomic_add(submitted, &xse->submitted_one_kick);
 	INIT_LIST_HEAD(&xcu->vsm_list);
+	XSCHED_DEBUG("Xse %d submitted=%d total=%zu, exec_time=%ld @ %s\n",
+		xse->tgid, submitted, xse->total_submitted,
+		submit_exec_time, __func__);
 
-	mutex_unlock(&xcu->xcu_lock);
-
-	wait_us = div_u64(submit_exec_time, NSEC_PER_USEC);
-	XSCHED_DEBUG("XCU kicks_submitted=%lu wait_us=%lu @ %s\n",
-		    kicks_submitted, wait_us, __func__);
-
-	if (wait_us > 0) {
-		/* Sleep shift not larger than 12.5% */
-		usleep_range(wait_us, wait_us + (wait_us >> 3));
-	}
-
-	mutex_lock(&xcu->xcu_lock);
-
-	return kicks_submitted;
+	return submitted;
 }
 
 static inline bool should_preempt(struct xsched_entity *xse)
@@ -521,35 +558,6 @@ static int xsched_schedule(void *input_xcu)
 	return err;
 }
 
-void submit_kick(struct vstream_info *vs,
-			struct xcu_op_handler_params *params,
-			struct vstream_metadata *vsm)
-{
-	int ret;
-
-	params->fd = vs->fd;
-	params->param_1 = &vs->id;
-	params->param_2 = &vs->channel_id;
-	params->param_3 = vsm->sqe;
-	params->param_4 = &vsm->sqe_num;
-	params->param_5 = &vsm->timeout;
-	params->param_6 = &vs->sqcq_type;
-	params->param_7 = vs->drv_ctx;
-	/* Send vstream on a device for processing. */
-	ret = xcu_run(params);
-	if (ret) {
-		XSCHED_ERR(
-			"Failed to send vstream tasks vstreamId=%d to a device for processing.\n",
-			vs->id);
-	}
-
-	XSCHED_DEBUG("Vstream_id %d submit vsm: sq_tail %d\n", vs->id, vsm->sq_tail);
-
-	kfree(vsm);
-
-	return;
-}
-
 /* Initialize xsched rt runqueue during kernel init.
  * Should only be called from xsched_rq_init function.
  */
@@ -643,7 +651,7 @@ int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg)
 
 	new_vsm = kmalloc(sizeof(struct vstream_metadata), GFP_KERNEL);
 	if (!new_vsm) {
-		XSCHED_ERR("Failed to alloc kick metadata for vs %u @ %s\n",
+		XSCHED_ERR("Fail to alloc kick metadata for vs %u @ %s\n",
 			vs->id, __func__);
 		return -ENOMEM;
 	}
diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c
index 3629fba3a48d..1a0a7f542060 100644
--- a/kernel/xsched/rt.c
+++ b/kernel/xsched/rt.c
@@ -186,6 +186,7 @@ static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse)
 	int kick_count, scheduled = 0;
 	struct vstream_info *vs;
 	struct vstream_metadata *vsm;
+	struct xcu_op_handler_params params;
 
 	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
 	XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n",
@@ -207,6 +208,22 @@ static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse)
 		spin_unlock(&vs->stream_lock);
 	}
 
+	/*
+	 * Iterate over all vstreams in context:
+	 * Set wr_cqe bit in last computing task in vsm_list
+	 */
+	for_each_vstream_in_ctx(vs, xse->ctx) {
+		list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) {
+			if (vsm->parent == vs) {
+				params.group = vsm->parent->xcu->group;
+				params.param_1 = &(int){SQE_SET_NOTIFY};
+				params.param_2 = &vsm->sqe;
+				xcu_sqe_op(&params);
+				break;
+			}
+		}
+	}
+
 	kick_count = atomic_read(&xse->kicks_pending_ctx_cnt);
 	XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n",
 		kick_count, __func__);
-- 
Gitee


From 412cb896571681c3e8241d4a7b0cec516afedf69 Mon Sep 17 00:00:00 2001
From: Liu Kai <liukai284@huawei.com>
Date: Tue, 30 Sep 2025 07:06:35 +0000
Subject: [PATCH 15/16] xsched/core: Add multi-card support for xsched

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Implement multi-device support allowing xsched to enable simultaneously on
multiple hardware devices.

Signed-off-by: Dashchynski Aliaksandr <dashchynski.aliaksandr1@h-partners.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 include/linux/xsched.h    |  29 ++++--
 kernel/xsched/cfs_quota.c |   1 -
 kernel/xsched/core.c      |  61 +++++++-----
 kernel/xsched/vstream.c   | 193 ++++++++++++++++++++------------------
 4 files changed, 158 insertions(+), 126 deletions(-)

diff --git a/include/linux/xsched.h b/include/linux/xsched.h
index 825ff2dc0c8e..3a6007f33387 100644
--- a/include/linux/xsched.h
+++ b/include/linux/xsched.h
@@ -7,10 +7,15 @@
 #include <linux/xcu_group.h>
 #include <linux/cgroup.h>
 #include <linux/vstream.h>
+
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
 
+#ifdef CONFIG_XCU_VSTREAM
+#define MAX_VSTREAM_NUM (512)
+#endif
+
 #define XSCHED_ERR_PREFIX "XSched [ERROR]: "
 #define XSCHED_ERR(fmt, ...)                                                   \
 	pr_err(pr_fmt(XSCHED_ERR_PREFIX fmt), ##__VA_ARGS__)
@@ -92,18 +97,14 @@ enum xse_flag {
 	XSE_TIF_BALANCE, /* Unused so far */
 };
 
-
 extern const struct xsched_class rt_xsched_class;
 extern const struct xsched_class fair_xsched_class;
 
 #define xsched_first_class (&rt_xsched_class)
-
 #define for_each_xsched_class(class)                                           \
 	for (class = xsched_first_class; class; class = class->next)
-
 #define for_each_xse_prio(prio)                                                \
 	for (prio = XSE_PRIO_LOW; prio < NR_XSE_PRIO; prio++)
-
 #define for_each_vstream_in_ctx(vs, ctx)                                       \
 	list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node)
 
@@ -179,6 +180,16 @@ struct xsched_cu {
 
 	struct task_struct *worker;
 
+	/* Storage list for contexts associated with this xcu */
+	uint32_t nr_ctx;
+	struct list_head ctx_list;
+	struct mutex ctx_list_lock;
+
+#ifdef CONFIG_XCU_VSTREAM
+	vstream_info_t *vs_array[MAX_VSTREAM_NUM];
+	struct mutex vs_array_lock;
+#endif
+
 	struct xsched_rq xrq;
 	struct list_head vsm_list;
 
@@ -531,20 +542,20 @@ extern struct list_head xsched_ctx_list;
 extern struct mutex xsched_ctx_list_mutex;
 
 /* Returns a pointer to xsched_context object corresponding to a given
- * device file descriptor provided by fd argument.
+ * tgid and xcu.
  */
-static inline struct xsched_context *ctx_find_by_tgid(pid_t tgid)
+static inline struct xsched_context *
+ctx_find_by_tgid_and_xcu(pid_t tgid, struct xsched_cu *xcu)
 {
 	struct xsched_context *ctx;
 	struct xsched_context *ret = NULL;
 
-	list_for_each_entry(ctx, &xsched_ctx_list, ctx_node) {
+	list_for_each_entry(ctx, &xcu->ctx_list, ctx_node) {
 		if (ctx->tgid == tgid) {
 			ret = ctx;
 			break;
 		}
 	}
-
 	return ret;
 }
 
@@ -586,7 +597,7 @@ static inline void xsched_init_vsm(struct vstream_metadata *vsm,
 	INIT_LIST_HEAD(&vsm->node);
 }
 
-int xsched_xcu_register(struct xcu_group *group);
+int xsched_xcu_register(struct xcu_group *group, int phys_id);
 void xsched_task_free(struct kref *kref);
 int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs);
 int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx);
diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c
index a62f07ad3cdc..5bded83b3561 100644
--- a/kernel/xsched/cfs_quota.c
+++ b/kernel/xsched/cfs_quota.c
@@ -35,7 +35,6 @@ void xsched_quota_refill(struct work_struct *work)
 	spin_unlock(&xg->lock);
 
 	for_each_active_xcu(xcu, id) {
-		xcu = xsched_cu_mgr[id];
 		mutex_lock(&xcu->xcu_lock);
 		if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) {
 			enqueue_ctx(&xg->perxcu_priv[id].xse, xcu);
diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c
index 744db05e36ec..6e5cf060a612 100644
--- a/kernel/xsched/core.c
+++ b/kernel/xsched/core.c
@@ -34,10 +34,6 @@ extern struct xsched_group *root_xcg;
 DECLARE_BITMAP(xcu_online_mask, XSCHED_NR_CUS);
 struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS];
 
-/* Storage list for contexts. */
-struct list_head xsched_ctx_list;
-DEFINE_MUTEX(xsched_ctx_list_mutex);
-
 static DEFINE_MUTEX(revmap_mutex);
 static DEFINE_HASHTABLE(ctx_revmap, XCU_HASH_ORDER);
 
@@ -218,14 +214,16 @@ void xsched_task_free(struct kref *kref)
 {
 	struct xsched_context *ctx;
 	vstream_info_t *vs, *tmp;
+	struct xsched_cu *xcu;
 
 	ctx = container_of(kref, struct xsched_context, kref);
+	xcu = ctx->xse.xcu;
 
 	/* Wait till xse dequeues */
 	while (READ_ONCE(ctx->xse.on_rq))
 		usleep_range(100, 200);
 
-	mutex_lock(&xsched_ctx_list_mutex);
+	mutex_lock(&xcu->ctx_list_lock);
 	list_for_each_entry_safe(vs, tmp, &ctx->vstream_list, ctx_node) {
 		list_del(&vs->ctx_node);
 		kfree(vs->data);
@@ -234,7 +232,8 @@ void xsched_task_free(struct kref *kref)
 
 	delete_ctx(ctx);
 	list_del(&ctx->ctx_node);
-	mutex_unlock(&xsched_ctx_list_mutex);
+	--xcu->nr_ctx;
+	mutex_unlock(&xcu->ctx_list_lock);
 
 	kfree(ctx);
 }
@@ -289,6 +288,7 @@ int vstream_bind_to_xcu(vstream_info_t *vstream_info)
 
 	/* Bind vstream to a xcu. */
 	vstream_info->xcu = xcu_found;
+	vstream_info->dev_id = xcu_found->id;
 	XSCHED_DEBUG("XCU bound to a vstream: type=%u, dev_id=%u, chan_id=%u.\n",
 		type, vstream_info->dev_id, vstream_info->channel_id);
 
@@ -373,8 +373,8 @@ int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs)
 	}
 
 	xse->ctx = ctx;
-	if (likely(vs->xcu != NULL))
-		xse->xcu = vs->xcu;
+	BUG_ON(vs->xcu == NULL);
+	xse->xcu = vs->xcu;
 
 	err = xsched_xse_set_class(xse);
 	if (err) {
@@ -604,13 +604,20 @@ static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
 	xcu->id = xcu_id;
 	xcu->state = XSCHED_XCU_NONE;
 	xcu->group = group;
+	xcu->nr_ctx = 0;
 
 	atomic_set(&xcu->pending_kicks_rt, 0);
 	atomic_set(&xcu->pending_kicks_cfs, 0);
 
 	INIT_LIST_HEAD(&xcu->vsm_list);
+	INIT_LIST_HEAD(&xcu->ctx_list);
 	init_waitqueue_head(&xcu->wq_xcu_idle);
 	mutex_init(&xcu->xcu_lock);
+	mutex_init(&xcu->ctx_list_lock);
+
+#ifdef CONFIG_XCU_VSTREAM
+	mutex_init(&xcu->vs_array_lock);
+#endif
 
 	/* Mark current XCU in a mask inside XCU root group. */
 	set_bit(xcu->id, xcu_group_root->xcu_mask);
@@ -626,22 +633,22 @@ static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group,
 	xcu->worker = kthread_run(xsched_schedule, xcu, "xcu_%u", xcu->id);
 }
 
-/* Allocates xcu id in xcu_manager array. */
-static int alloc_xcu_id(void)
+/* Increment xcu id */
+static int nr_active_cu_inc(void)
 {
-	int xcu_id = -1;
+	int cur_num = -1;
 
 	spin_lock(&xcu_mgr_lock);
 	if (num_active_xcu >= XSCHED_NR_CUS)
 		goto out_unlock;
 
-	xcu_id = num_active_xcu;
+	cur_num = num_active_xcu;
 	num_active_xcu++;
-	XSCHED_DEBUG("Number of active xcu: %d.\n", num_active_xcu);
+	XSCHED_DEBUG("Number of active xcus: %d.\n", num_active_xcu);
 
 out_unlock:
 	spin_unlock(&xcu_mgr_lock);
-	return xcu_id;
+	return cur_num;
 }
 
 /* Adds vstream_metadata object to a specified vstream. */
@@ -703,14 +710,24 @@ struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs)
 /*
  * Initialize and register xcu in xcu_manager array.
  */
-int xsched_xcu_register(struct xcu_group *group)
+int xsched_xcu_register(struct xcu_group *group, int phys_id)
 {
-	int xcu_id;
+	int xcu_cur_num;
 	struct xsched_cu *xcu;
 
-	xcu_id = alloc_xcu_id();
-	if (xcu_id < 0) {
-		XSCHED_ERR("Fail to alloc xcu id.\n");
+	/* Can be refactored in future because it's possible that
+	 * device contains more than 1 hardware task scheduler.
+	 */
+	if (phys_id >= XSCHED_NR_CUS) {
+		XSCHED_ERR("phys_id (%d) >= XSCHED_NR_CUS (%d).\n",
+			phys_id, XSCHED_NR_CUS);
+		return -EINVAL;
+	}
+
+	xcu_cur_num = nr_active_cu_inc();
+	if (xcu_cur_num < 0) {
+		XSCHED_ERR("Number of present XCU's exceeds %d: %d.\n",
+			XSCHED_NR_CUS, num_active_xcu);
 		return -ENOSPC;
 	};
 
@@ -721,18 +738,16 @@ int xsched_xcu_register(struct xcu_group *group)
 	};
 
 	group->xcu = xcu;
-	xsched_cu_mgr[xcu_id] = xcu;
+	xsched_cu_mgr[phys_id] = xcu;
 
 	/* Init xcu's internals. */
-	xsched_xcu_init(xcu, group, xcu_id);
+	xsched_xcu_init(xcu, group, phys_id);
 	return 0;
 }
 EXPORT_SYMBOL(xsched_xcu_register);
 
 int __init xsched_init(void)
 {
-	/* Initializing global XSched context list. */
-	INIT_LIST_HEAD(&xsched_ctx_list);
 	xcu_cg_init_common(root_xcg);
 	return 0;
 }
diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c
index 78ead98d1a66..e47a117497ef 100644
--- a/kernel/xsched/vstream.c
+++ b/kernel/xsched/vstream.c
@@ -23,12 +23,7 @@
 #include <linux/delay.h>
 
 #ifdef CONFIG_XCU_VSTREAM
-#define MAX_VSTREAM_NUM 512
-
-static DEFINE_MUTEX(vs_mutex);
-static vstream_info_t *vstream_array[MAX_VSTREAM_NUM];
-
-static int vstream_del(uint32_t vstream_id);
+static int vstream_del(vstream_info_t *vstream, uint32_t vstream_id);
 static int vstream_file_release(struct inode *inode, struct file *file);
 static const struct file_operations vstreamfd_fops = {
 	.release = vstream_file_release,
@@ -46,13 +41,8 @@ static inline void vstream_file_put(struct file *vstream_file)
 
 static int vstream_file_create(struct vstream_info *vs)
 {
-	int err = anon_inode_getfd("[vstreamfd]",
-		&vstreamfd_fops, vs, O_RDWR | O_CLOEXEC | O_NONBLOCK);
-	if (err < 0)
-		XSCHED_ERR("Fail to alloc anon inode vs %u @ %s\n",
-			vs->id, __func__);
-
-	return err;
+	return anon_inode_getfd("[vstreamfd]", &vstreamfd_fops, vs,
+		O_RDWR | O_CLOEXEC | O_NONBLOCK);
 }
 
 static int vstream_destroy(vstream_info_t *vstream)
@@ -61,7 +51,7 @@ static int vstream_destroy(vstream_info_t *vstream)
 	struct xsched_context *ctx = NULL;
 	struct xsched_entity *xse = NULL;
 
-	err = vstream_del(vstream->id);
+	err = vstream_del(vstream, vstream->id);
 	if (err)
 		return err;
 
@@ -106,7 +96,9 @@ static void init_xsched_ctx(struct xsched_context *ctx,
 static int alloc_ctx_from_vstream(struct vstream_info *vstream_info,
 				struct xsched_context **ctx)
 {
-	*ctx = ctx_find_by_tgid(vstream_info->tgid);
+	struct xsched_cu *xcu = vstream_info->xcu;
+
+	*ctx = ctx_find_by_tgid_and_xcu(vstream_info->tgid, xcu);
 	if (*ctx)
 		return 0;
 
@@ -125,8 +117,8 @@ static int alloc_ctx_from_vstream(struct vstream_info *vstream_info,
 		kfree(*ctx);
 		return -EINVAL;
 	}
-
-	list_add(&(*ctx)->ctx_node, &xsched_ctx_list);
+	list_add(&(*ctx)->ctx_node, &xcu->ctx_list);
+	++xcu->nr_ctx;
 
 	return 0;
 }
@@ -135,27 +127,26 @@ static int alloc_ctx_from_vstream(struct vstream_info *vstream_info,
 static int vstream_bind_to_ctx(struct vstream_info *vs)
 {
 	struct xsched_context *ctx = NULL;
-	int alloc_err = 0;
+	struct xsched_cu *xcu = vs->xcu;
+	int err = 0;
 
-	mutex_lock(&xsched_ctx_list_mutex);
-	ctx = ctx_find_by_tgid(vs->tgid);
+	mutex_lock(&xcu->ctx_list_lock);
+	ctx = ctx_find_by_tgid_and_xcu(vs->tgid, xcu);
 	if (ctx) {
 		XSCHED_DEBUG("Ctx %d found @ %s\n", vs->tgid, __func__);
 		kref_get(&ctx->kref);
 	} else {
-		alloc_err = alloc_ctx_from_vstream(vs, &ctx);
-		if (alloc_err)
+		err = alloc_ctx_from_vstream(vs, &ctx);
+		if (err)
 			goto out_err;
 	}
 
 	vs->ctx = ctx;
-	vs->xcu = ctx->xse.xcu;
-	ctx->dev_id = vs->dev_id;
 	list_add(&vs->ctx_node, &vs->ctx->vstream_list);
 
 out_err:
-	mutex_unlock(&xsched_ctx_list_mutex);
-	return alloc_err;
+	mutex_unlock(&xcu->ctx_list_lock);
+	return err;
 }
 
 static vstream_info_t *vstream_create(struct vstream_args *arg)
@@ -168,7 +159,6 @@ static vstream_info_t *vstream_create(struct vstream_args *arg)
 		return NULL;
 	}
 
-	vstream->inode_fd = vstream_file_create(vstream);
 	vstream->dev_id = arg->dev_id;
 	vstream->channel_id = arg->channel_id;
 	vstream->kicks_count = 0;
@@ -185,63 +175,77 @@ static vstream_info_t *vstream_create(struct vstream_args *arg)
 
 static int vstream_add(vstream_info_t *vstream, uint32_t id)
 {
+	int err = 0;
+	struct xsched_cu *xcu = vstream->xcu;
+
 	if (id >= MAX_VSTREAM_NUM) {
-		XSCHED_ERR("vstream id out of range.\n");
+		XSCHED_ERR("Vstream id=%u out of range @ %s.\n",
+			id, __func__);
 		return -EINVAL;
 	}
 
-	mutex_lock(&vs_mutex);
-	if (vstream_array[id] != NULL) {
-		mutex_unlock(&vs_mutex);
+	mutex_lock(&xcu->vs_array_lock);
+	if (xcu->vs_array[id] != NULL) {
 		XSCHED_ERR("Vstream id=%u cell is busy.\n", id);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out_err;
 	}
-	vstream_array[id] = vstream;
-	mutex_unlock(&vs_mutex);
+	xcu->vs_array[id] = vstream;
 
-	return 0;
+out_err:
+	mutex_unlock(&xcu->vs_array_lock);
+	return err;
 }
 
-static int vstream_del(uint32_t vstream_id)
+static int vstream_del(vstream_info_t *vstream, uint32_t vstream_id)
 {
+	struct xsched_cu *xcu = vstream->xcu;
+
 	if (vstream_id >= MAX_VSTREAM_NUM) {
-		XSCHED_ERR("Vstream id=%u out of range.\n", vstream_id);
+		XSCHED_ERR("Vstream id=%u out of range @ %s.\n",
+			vstream_id, __func__);
 		return -EINVAL;
 	}
 
-	mutex_lock(&vs_mutex);
-	vstream_array[vstream_id] = NULL;
-	mutex_unlock(&vs_mutex);
+	mutex_lock(&xcu->vs_array_lock);
+	xcu->vs_array[vstream_id] = NULL;
+	mutex_unlock(&xcu->vs_array_lock);
 	return 0;
 }
 
-static vstream_info_t *vstream_get(uint32_t vstream_id)
+static vstream_info_t *vstream_get(struct xsched_cu *xcu, uint32_t vstream_id)
 {
 	vstream_info_t *vstream = NULL;
 
 	if (vstream_id >= MAX_VSTREAM_NUM) {
-		XSCHED_ERR("Vstream id=%u out of range.\n", vstream_id);
+		XSCHED_ERR("Vstream id=%u out of range @ %s.\n",
+			vstream_id, __func__);
 		return NULL;
 	}
 
-	mutex_lock(&vs_mutex);
-	vstream = vstream_array[vstream_id];
-	mutex_unlock(&vs_mutex);
+	mutex_lock(&xcu->vs_array_lock);
+	vstream = xcu->vs_array[vstream_id];
+	mutex_unlock(&xcu->vs_array_lock);
 
 	return vstream;
 }
 
 static vstream_info_t *
-vstream_get_by_user_stream_id(uint32_t user_stream_id)
+vstream_get_by_user_stream_id(struct xsched_cu *xcu, uint32_t user_stream_id)
 {
 	int id;
+	static vstream_info_t *ret;
 
+	mutex_lock(&xcu->vs_array_lock);
 	for (id = 0; id < MAX_VSTREAM_NUM; id++) {
-		if (vstream_array[id] != NULL &&
-			vstream_array[id]->user_stream_id == user_stream_id)
-			return vstream_array[id];
+		if (xcu->vs_array[id] != NULL &&
+			xcu->vs_array[id]->user_stream_id == user_stream_id) {
+			ret = xcu->vs_array[id];
+			break;
+		}
 	}
-	return NULL;
+	mutex_unlock(&xcu->vs_array_lock);
+	return ret;
 }
 
 static int sqcq_alloc(struct vstream_args *arg)
@@ -264,7 +268,7 @@ static int sqcq_alloc(struct vstream_args *arg)
 	vstream->task_type = arg->task_type;
 
 	ret = vstream_bind_to_xcu(vstream);
-	if (ret < 0) {
+	if (ret) {
 		ret = -EINVAL;
 		goto out_err_vstream_free;
 	}
@@ -292,23 +296,27 @@ static int sqcq_alloc(struct vstream_args *arg)
 	vstream->sqcq_type = va_args->type;
 
 	ret = vstream_bind_to_ctx(vstream);
-	if (ret < 0)
+	if (ret)
 		goto out_err_vstream_free;
 
 	ctx = vstream->ctx;
+	ret = vstream_file_create(vstream);
+	if (ret < 0) {
+		XSCHED_ERR("Fail to alloc anon inode for vstream %u @ %s\n",
+			vstream->id, __func__);
+		goto out_err_vstream_free;
+	}
+	vstream->inode_fd = ret;
 
 	/* Add new vstream to array after allocating inode */
 	ret = vstream_add(vstream, vstream->id);
-	if (ret < 0)
+	if (ret)
 		goto out_err_vstream_free;
 
 	return 0;
 
 out_err_vstream_free:
 	kfree(vstream);
-	XSCHED_ERR("Exit %s with error, current_pid=%d, err=%d.\n",
-		__func__, current->pid, ret);
-
 	return ret;
 }
 
@@ -319,20 +327,17 @@ static int logic_cq_alloc(struct vstream_args *arg)
 	vstream_info_t *vstream = NULL;
 	vstream_alloc_args_t *logic_cq_alloc_para = &arg->va_args;
 	struct xsched_cu *xcu_found = NULL;
-	uint32_t logic_cq_id = 0, type = XCU_TYPE_XPU;
+	uint32_t logic_cq_id = 0;
+	uint32_t type = XCU_TYPE_XPU;
+
+	xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id);
+	if (!xcu_found)
+		return -EINVAL;
 
-	vstream = vstream_get_by_user_stream_id(
+	vstream = vstream_get_by_user_stream_id(xcu_found,
 		logic_cq_alloc_para->user_stream_id);
-	if (!vstream) {
-		xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id);
-		if (!xcu_found) {
-			err = -EINVAL;
-			goto out_err;
-		}
-	} else {
+	if (vstream)
 		xcu_found = vstream->xcu;
-	}
-
 	params.group = xcu_found->group;
 	params.fd = arg->fd;
 	params.payload = arg->payload;
@@ -340,21 +345,12 @@ static int logic_cq_alloc(struct vstream_args *arg)
 	err = xcu_logic_alloc(&params);
 	if (err) {
 		XSCHED_ERR("Fail to alloc logic CQ memory to a vstream.\n");
-		goto out_err;
+		return err;
 	}
+	if (vstream)
+		vstream->logic_vcq_id = logic_cq_id;
 
-	vstream->logic_vcq_id = logic_cq_id;
-	XSCHED_DEBUG(
-		"Vstream logic CQ: dev_id=%u, stream_id=%u, logic_cqid=%u @ %s\n",
-		vstream->dev_id, vstream->user_stream_id,
-		vstream->logic_vcq_id, __func__);
 	return 0;
-
-out_err:
-	XSCHED_ERR(
-		"Exit %s with error, current_pid=%d, err=%d.\n",
-		__func__, current->pid, err);
-	return err;
 }
 
 int vstream_alloc(struct vstream_args *arg)
@@ -374,11 +370,17 @@ int vstream_free(struct vstream_args *arg)
 {
 	struct file *vs_file;
 	struct xcu_op_handler_params params;
+	struct xsched_cu *xcu_found;
 	uint32_t vstream_id = arg->sq_id;
+	uint32_t type = XCU_TYPE_XPU;
 	vstream_info_t *vstream = NULL;
 	int err = 0;
 
-	vstream = vstream_get(vstream_id);
+	xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id);
+	if (!xcu_found)
+		return -EINVAL;
+
+	vstream = vstream_get(xcu_found, vstream_id);
 	if (!vstream) {
 		XSCHED_ERR("Fail to free NULL vstream, vstream id=%u\n", vstream_id);
 		return -EINVAL;
@@ -389,10 +391,13 @@ int vstream_free(struct vstream_args *arg)
 	params.payload = arg->payload;
 
 	vs_file = vstream_file_get(vstream->inode_fd);
-	vstream_destroy(vstream);
-	vs_file->private_data = NULL;
-	vstream_file_put(vs_file);
+	if (vs_file) {
+		vs_file->private_data = NULL;
+		vstream_file_put(vs_file);
+	}
 
+	/* After vstream_get(), destroying the vstream may not fail */
+	vstream_destroy(vstream);
 	err = xcu_finish(&params);
 	if (err)
 		XSCHED_ERR("Fail to free vstream sqId=%u, cqId=%u.\n",
@@ -404,23 +409,25 @@ int vstream_free(struct vstream_args *arg)
 int vstream_kick(struct vstream_args *arg)
 {
 	vstream_info_t *vstream;
-	int vstream_id = arg->sq_id;
+	struct xsched_cu *xcu = NULL;
 	struct xsched_entity *xse;
 	int err = 0;
+	uint32_t vstream_id = arg->sq_id;
+	uint32_t type = XCU_TYPE_XPU;
 
-	struct xsched_cu *xcu = NULL;
-
-	XSCHED_CALL_STUB();
+	xcu = xcu_find(&type, arg->dev_id, arg->channel_id);
+	if (!xcu)
+		return -EINVAL;
 
 	/* Get vstream. */
-	vstream = vstream_get(vstream_id);
+	vstream = vstream_get(xcu, vstream_id);
 	if (!vstream || !vstream->ctx) {
-		XSCHED_ERR("Vstream NULL or doesn't have a context.\n");
+		XSCHED_ERR("Vstream NULL or doesn't have a context. "
+			"vstream_id=%u, dev_id=%u\n", vstream_id, arg->dev_id);
 		return -EINVAL;
 	}
 
 	xse = &vstream->ctx->xse;
-	xcu = vstream->xcu;
 	XSCHED_DEBUG("New kick on xse %d @ %s\n", xse->tgid, __func__);
 
 	do {
@@ -476,17 +483,17 @@ SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd)
 	struct vstream_args vstream_arg;
 
 	if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) {
-		pr_err("copy_from_user failed\n");
+		XSCHED_ERR("copy_from_user failed\n");
 		return -EFAULT;
 	}
 
 	res = vstream_command_table[cmd](&vstream_arg);
 	if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) {
-		pr_err("copy_to_user failed\n");
+		XSCHED_ERR("copy_to_user failed\n");
 		return -EFAULT;
 	}
 
-	pr_debug("vstream_manage: cmd %d\n", cmd);
+	XSCHED_DEBUG("vstream_manage: cmd %d\n", cmd);
 	return res;
 }
 #else
-- 
Gitee


From 49fd04425a505eb9de44e91584b9ead4a9f153b5 Mon Sep 17 00:00:00 2001
From: Xia Fukun <xiafukun@huawei.com>
Date: Tue, 30 Sep 2025 07:06:36 +0000
Subject: [PATCH 16/16] XSched driver patch

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB

-----------------------------------------

Adapt 910b npu driver for xsched

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Signed-off-by: Liu Kai <liukai284@huawei.com>
Signed-off-by: Xia Fukun <xiafukun@huawei.com>
---
 .../0001-Adapt-910b-npu-driver-for-xsched.txt | 918 ++++++++++++++++++
 1 file changed, 918 insertions(+)
 create mode 100644 drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt

diff --git a/drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt b/drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt
new file mode 100644
index 000000000000..83fada81dbb9
--- /dev/null
+++ b/drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt
@@ -0,0 +1,918 @@
+From fe53ea5d5abcc587972079bcae5a706e54f52749 Mon Sep 17 00:00:00 2001
+From: Hui Tang <tanghui20@huawei.com>
+Date: Tue, 25 Feb 2025 10:18:24 +0000
+Subject: [PATCH openEuler-25.03] Adapt 910b npu driver for xsched
+
+hulk inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB
+
+-----------------------------------------
+
+Adapt 910b npu driver for xsched
+
+Signed-off-by: Hui Tang <tanghui20@huawei.com>
+Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+Signed-off-by: Liu Kai <liukai284@huawei.com>
+Signed-off-by: Xia Fukun <xiafukun@huawei.com>
+---
+ .../depends/inc_driver/ascend_hal_define.h    |   2 +-
+ rms/trs_drv/chan/chan_rxtx.c                  |   2 +-
+ .../lba/near/comm/adapt/trs_near_adapt_init.h |   2 +
+ rms/trs_drv/lba/near/sia/adapt/Makefile       |   7 +-
+ .../lba/near/sia/adapt/trs_host_init.c        |   3 +
+ .../lba/near/sia/adapt/xsched_xpu_interface.c | 263 ++++++++++++++++++
+ rms/trs_drv/trs_core/Makefile                 |   1 +
+ rms/trs_drv/trs_core/trs_fops.c               | 143 +++++++---
+ rms/trs_drv/trs_core/trs_hw_sqcq.c            |   3 +-
+ rms/trs_drv/trs_core/trs_hw_sqcq.h            |   3 +-
+ rms/trs_drv/trs_core/trs_logic_cq.c           | 100 ++++---
+ rms/trs_drv/trs_core/trs_logic_cq.h           |   3 +-
+ rms/trs_drv/trs_core/trs_sqcq_map.c           |   4 +
+ ts_agent/src/ts_agent_update_sqe.c            |   6 +
+ 14 files changed, 466 insertions(+), 76 deletions(-)
+ create mode 100755 rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c
+
+diff --git a/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h b/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h
+index a76efda..cc51c4d 100644
+--- a/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h
++++ b/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h
+@@ -893,7 +893,7 @@ typedef enum tagDrvSqCqType {
+ }  drvSqCqType_t;
+
+ struct halSqCqInputInfo {
+-    drvSqCqType_t type;  // normal : 0, callback : 1
++    drvSqCqType_t type;  // normal : 0, callback : 1, logic : 2
+     uint32_t tsId;
+     /* The size and depth of each cqsq can be configured in normal mode, but this function is not yet supported */
+     uint32_t sqeSize;    // normal : 64Byte
+diff --git a/rms/trs_drv/chan/chan_rxtx.c b/rms/trs_drv/chan/chan_rxtx.c
+index 1fc72da..1e4ef38 100755
+--- a/rms/trs_drv/chan/chan_rxtx.c
++++ b/rms/trs_drv/chan/chan_rxtx.c
+@@ -156,7 +156,7 @@ static int trs_chan_fill_sqe(struct trs_chan *chan, u8 *sqe, int timeout, int ad
+     /* if using bar to r/w sqe, it should use stack value to store sqe to avoid waster time */
+     sqe_addr = trs_chan_mem_is_local_mem(&sq->mem_attr) ? dst_addr : sqe_tmp;
+
+-    if (addr_domain == CHAN_ADDR_DOMAIN_KERNEL) {
++    if (addr_domain == CHAN_ADDR_DOMAIN_KERNEL || !access_ok(sqe, sq->para.sqe_size)) {
+         memcpy_s(sqe_addr, sq->para.sqe_size, sqe, sq->para.sqe_size);
+     } else {
+         ret_cpy = copy_from_user(sqe_addr, sqe, sq->para.sqe_size);
+diff --git a/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h b/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h
+index 3a60d1d..6b4598f 100755
+--- a/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h
++++ b/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h
+@@ -21,4 +21,6 @@
+ void trs_ts_adapt_init(struct trs_id_inst *inst);
+ void trs_ts_adapt_uninit(struct trs_id_inst *inst);
+
++int xsched_xcu_group_init(u32 dev_id, u32 ts_num, u32 version);
++
+ #endif /* TRS_NEAR_ADAPT_INIT_H */
+diff --git a/rms/trs_drv/lba/near/sia/adapt/Makefile b/rms/trs_drv/lba/near/sia/adapt/Makefile
+index 16a3f05..2cbdd43 100755
+--- a/rms/trs_drv/lba/near/sia/adapt/Makefile
++++ b/rms/trs_drv/lba/near/sia/adapt/Makefile
+@@ -59,8 +59,13 @@ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/lba/near/sia/adapt/comm/tsc
+ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dms/include/
+ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dms/config/
+ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dbl/dev_urd/
++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/trs_core
++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dev/inc/
++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/drv_davinci_intf_host
++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/tsch/
++
+ obj-m += ascend_trs_pm_adapt.o
+-ascend_trs_pm_adapt-objs := trs_host_init.o near_comm/trs_host_comm.o near_comm/trs_host_db.o near_comm/trs_host_id.o near_comm/trs_host_mbox.o near_comm/trs_host_msg.o near_comm/trs_near_adapt_init.o
++ascend_trs_pm_adapt-objs := trs_host_init.o near_comm/trs_host_comm.o near_comm/trs_host_db.o near_comm/trs_host_id.o near_comm/trs_host_mbox.o near_comm/trs_host_msg.o near_comm/trs_near_adapt_init.o xsched_xpu_interface.o
+
+ ascend_trs_pm_adapt-objs += near_comm/soc_adapt/soc_adapt.o trs_host_init/trs_host.o trs_host_chan/trs_host_chan.o trs_host_chan/trs_sqe_update.o trs_host_core/trs_host_core.o
+ ascend_trs_pm_adapt-objs += near_comm/trs_host_chan/stars_v1/trs_chan_stars_v1_ops.o near_comm/trs_host_chan/stars_v1/trs_chan_stars_v1_ops_stars.o
+diff --git a/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c b/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c
+index abdabc6..9de8549 100755
+--- a/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c
++++ b/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c
+@@ -17,6 +17,7 @@
+ #include <linux/types.h>
+ #include <linux/init.h>
+ #include <linux/module.h>
++#include <linux/xcu_group.h>
+
+ #include "soc_res.h"
+ #include "trs_pub_def.h"
+@@ -162,6 +163,8 @@ int trs_host_init(u32 phy_devid)
+         }
+     }
+
++    xsched_xcu_group_init(phy_devid, ts_num, XCU_HW_V2);
++
+     return 0;
+ }
+ EXPORT_SYMBOL(trs_host_init);
+diff --git a/rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c b/rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c
+new file mode 100755
+index 0000000..a7c01ba
+--- /dev/null
++++ b/rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c
+@@ -0,0 +1,263 @@
++/*
++ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 and
++ * only version 2 as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * Description:
++ * Author: Huawei
++ * Create: 2024-06-17
++ */
++
++#ifndef TSDRV_KERNEL_UT
++#include <linux/cdev.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/errno.h>
++#include <linux/list.h>
++#include <linux/platform_device.h>
++#include <linux/vmalloc.h>
++#include <linux/xcu_group.h>
++#include <linux/file.h>
++
++#include "securec.h"
++#include "devdrv_manager_comm.h"
++#include "ascend_hal_define.h"
++#include "trs_pub_def.h"
++#include "trs_res_id_def.h"
++#include "trs_proc.h"
++#include "trs_cmd.h"
++#include "davinci_api.h"
++#include "davinci_interface.h"
++#include "davinci_intf_init.h"
++#include "task_struct.h"
++
++int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg);
++int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg);
++int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg);
++int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg);
++
++extern struct xcu_group *xcu_group_root;
++extern int xsched_xcu_register(struct xcu_group *group, uint32_t phys_id);
++
++/* Gets device driver TS context from a file descriptor of opened device. */
++static void *get_tsdrv_ctx(int fd)
++{
++	struct davinci_intf_private_stru *file_private_data;
++	void *ctx = NULL;
++	struct fd f;
++
++	f = fdget(fd);
++	if (!f.file)
++		goto out;
++
++	file_private_data = f.file->private_data;
++	if (!file_private_data)
++		goto out;
++
++	ctx = file_private_data->priv_filep.private_data;
++
++out:
++	fdput(f);
++	return ctx;
++}
++
++int trs_xsched_ctx_run(struct xcu_op_handler_params *params)
++{
++	uint32_t sq_id = *(uint32_t *)params->param_1;
++	uint32_t tsId = *(uint32_t *)params->param_2;
++	uint8_t *sqe_addr = params->param_3;
++	uint32_t sqe_num = *(uint32_t *)params->param_4;
++	int32_t timeout = *(int32_t *)params->param_5;
++	int32_t type = *(int32_t *)params->param_6;
++	struct halTaskSendInfo input = {0};
++	struct trs_proc_ctx *ctx = params->param_7;
++	uint32_t logic_cqId = *(uint32_t *)params->param_8;
++
++	input.tsId = tsId;
++	input.sqId = sq_id;
++	input.timeout = timeout;
++	input.sqe_addr = sqe_addr;
++	input.sqe_num = sqe_num;
++	input.type = type;
++
++	trs_debug("%s %d: tsId %u sqId %u timeout %d num %u\n",
++		__FUNCTION__, __LINE__, tsId, sq_id, timeout, sqe_num);
++
++	/* Send SQ tail to a doorbel. */
++	return ioctl_trs_sqcq_send(ctx, logic_cqId, (unsigned long)&input);;
++}
++
++int trs_xsched_ctx_free(struct xcu_op_handler_params *params)
++{
++	struct trs_proc_ctx *ctx;
++
++	ctx = get_tsdrv_ctx(params->fd);
++	if (!ctx)
++		return -ENOENT;
++
++	return ioctl_trs_sqcq_free(ctx, 0, (unsigned long)params->payload);
++}
++
++int trs_xsched_ctx_wait(struct xcu_op_handler_params *params)
++{
++	uint32_t tsId = *(uint32_t *)params->param_1;
++	uint32_t cqId = *(uint32_t *)params->param_2;
++	uint32_t streamId = *(uint32_t *)params->param_3;
++	struct ts_stars_sqe_header *sqe = params->param_4;
++	uint8_t *cqe_addr = params->param_5;
++	struct trs_proc_ctx *ctx = params->param_6;
++	int32_t timeout = *(uint32_t *)params->param_7;
++	int32_t cqe_num = 1;
++	struct halReportRecvInfo input = {0};
++	uint32_t task_id = sqe->task_id;
++
++	input.type = DRV_LOGIC_TYPE;
++	input.tsId = tsId;
++	input.cqId = cqId;
++	input.timeout = timeout;
++	input.cqe_num = cqe_num;
++	input.cqe_addr = cqe_addr;
++	input.stream_id = streamId;
++	input.task_id = task_id;
++	input.res[0] = 1; /* version 1 for new runtime. */
++
++	trs_debug("%s %d: tdId %u logic_cqId %u streamid %u task_id %d timeout %d \n",
++		__FUNCTION__, __LINE__, tsId, cqId, streamId, task_id, timeout);
++
++	/* Wait for cq irq and read result. */
++	return ioctl_trs_sqcq_recv(ctx, 0, (unsigned long)&input);
++}
++
++int trs_xsched_ctx_complete(struct xcu_op_handler_params *params)
++{
++	return 0;
++}
++
++int trs_xsched_ctx_alloc(struct xcu_op_handler_params *params)
++{
++	struct halSqCqInputInfo *input_info = params->payload;
++	uint32_t *tgid = (uint32_t *)params->param_1;
++	uint32_t *sq_id = (uint32_t *)params->param_2;
++	uint32_t *cq_id = (uint32_t *)params->param_3;
++	uint32_t *user_stream_id = (uint32_t *)params->param_4;
++	struct trs_proc_ctx *ctx;
++	int ret = 0;
++
++	trs_debug("%s %d, input_info %lx, type: %d\n",
++		__FUNCTION__, __LINE__, (unsigned long)input_info, input_info->type);
++
++	ctx = get_tsdrv_ctx(params->fd);
++	if (!ctx)
++		return -ENOENT;
++
++	trs_debug("%s %d, pid %d, task_id %d, size %ld\n",
++		__FUNCTION__, __LINE__, ctx->pid, ctx->task_id, sizeof(*ctx));
++	ret = ioctl_trs_sqcq_alloc(ctx, 0, (unsigned long)input_info);
++	if (ret != 0)
++		return ret;
++
++	*tgid = ctx->pid;
++	*sq_id = input_info->sqId;
++	*cq_id = input_info->cqId;
++	*user_stream_id = input_info->info[0];
++	params->param_5 = ctx;
++	return 0;
++}
++
++int trs_xsched_ctx_logic_alloc(struct xcu_op_handler_params *params)
++{
++	struct halSqCqInputInfo *input_info = params->payload;
++	uint32_t *logic_cq_id = (uint32_t *)params->param_1;
++	struct trs_proc_ctx *ctx;
++	int ret = 0;
++
++	trs_debug("%s %d, type: %d\n", __FUNCTION__, __LINE__, input_info->type);
++
++	ctx = get_tsdrv_ctx(params->fd);
++	if (!ctx)
++		return -ENOENT;
++
++	trs_debug("%s %d, pid %d, task_id %d, size %ld\n",
++		__FUNCTION__, __LINE__, ctx->pid, ctx->task_id, sizeof(*ctx));
++
++	ret = ioctl_trs_sqcq_alloc(ctx, 0, (unsigned long)input_info);
++	if (ret != 0)
++		return ret;
++
++	*logic_cq_id = input_info->cqId;
++	trs_debug("%s %d, type: %d, cq_id: %u\n",
++		__FUNCTION__, __LINE__, input_info->type, *logic_cq_id);
++	return 0;
++}
++
++int trs_xsched_ctx_sqe_op(struct xcu_op_handler_params *params)
++{
++	struct ts_stars_sqe_header *sqe = params->param_2;
++	int op_type = *(int *)(params->param_1);
++
++	switch (op_type) {
++	case SQE_IS_NOTIFY:
++		return (sqe->type == 0) && (sqe->wr_cqe == 1);
++	case SQE_SET_NOTIFY:
++		if (sqe->type == 0)
++			sqe->wr_cqe = 1;
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++static struct xcu_operation trs_xsched_ctx_xcu_ops = {
++	.run = trs_xsched_ctx_run,
++	.finish = trs_xsched_ctx_free,
++	.wait = trs_xsched_ctx_wait,
++	.complete = trs_xsched_ctx_complete,
++	.alloc = trs_xsched_ctx_alloc,
++	.logic_alloc = trs_xsched_ctx_logic_alloc,
++	.sqe_op = trs_xsched_ctx_sqe_op,
++};
++
++int xsched_xcu_group_init(u32 dev_id, u32 ts_num, u32 version)
++{
++	struct xcu_group *type_group;
++	struct xcu_group *dev_group;
++	struct xcu_group *ts_group;
++	int tsid;
++
++	trs_debug("dev_id %u ts_num %u\n", dev_id, ts_num);
++	type_group = xcu_group_find(xcu_group_root, XCU_TYPE_XPU);
++
++	if (!type_group) {
++		type_group = xcu_group_init(XCU_TYPE_XPU);
++		xcu_group_attach(type_group, xcu_group_root);
++	}
++
++	dev_group = xcu_group_init(dev_id);
++
++	trs_debug("%s %d deviceid is %d\n", __FUNCTION__, __LINE__, dev_id);
++	dev_group->id = dev_id;
++	xcu_group_attach(dev_group, type_group);
++
++	for (tsid = 0; tsid < ts_num; tsid++) {
++		ts_group = xcu_group_init(tsid);
++		ts_group->ver = version;
++		ts_group->opt = &trs_xsched_ctx_xcu_ops;
++
++		xcu_group_attach(ts_group, dev_group);
++		xsched_xcu_register(ts_group, dev_id);
++
++		cond_resched();
++	}
++
++	return 0;
++}
++#endif
+diff --git a/rms/trs_drv/trs_core/Makefile b/rms/trs_drv/trs_core/Makefile
+index e0a6a55..8d27ad9 100755
+--- a/rms/trs_drv/trs_core/Makefile
++++ b/rms/trs_drv/trs_core/Makefile
+@@ -41,6 +41,7 @@ endif
+
+ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/inc
+ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/trs_core
++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/tsch/
+
+ obj-m += ascend_trs_core.o
+ ascend_trs_core-objs := trs_fops.o trs_ts_inst.o trs_proc.o trs_res_mng.o trs_sqcq_map.o trs_hw_sqcq.o trs_sw_sqcq.o trs_logic_cq.o trs_cb_sqcq.o trs_shm_sqcq.o trs_proc_fs.o
+diff --git a/rms/trs_drv/trs_core/trs_fops.c b/rms/trs_drv/trs_core/trs_fops.c
+index e5702d2..1a9b3c7 100755
+--- a/rms/trs_drv/trs_core/trs_fops.c
++++ b/rms/trs_drv/trs_core/trs_fops.c
+@@ -21,6 +21,7 @@
+ #include <linux/mm.h>
+ #include <linux/sched/mm.h>
+ #include <linux/version.h>
++#include <linux/vstream.h>
+
+ #include "ascend_hal_define.h"
+
+@@ -33,6 +34,8 @@
+ #include "trs_ts_inst.h"
+ #include "trs_cmd.h"
+ #include "trs_fops.h"
++#include "trs_logic_cq.h"
++#include "task_struct.h"
+
+ static int (*const trs_res_id_handles[TRS_MAX_CMD])(struct trs_proc_ctx *proc_ctx,
+     struct trs_core_ts_inst *ts_inst, struct trs_res_id_para *para) = {
+@@ -195,8 +198,17 @@ static int (*const trs_sqcq_alloc_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx
+     [DRV_CTRL_TYPE] = trs_sw_sqcq_alloc
+ };
+
+-static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
++static bool is_xsched_used(void __user *ptr, int size)
+ {
++    if (access_ok(ptr, size))
++        return false;
++
++    return true;
++}
++
++int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
++{
++    bool xsched_used = is_xsched_used((void __user *)arg, sizeof(struct halSqCqInputInfo));
+     struct trs_core_ts_inst *ts_inst = NULL;
+     struct halSqCqInputInfo para;
+     struct trs_alloc_para *alloc_para = NULL;
+@@ -204,10 +216,14 @@ static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd,
+     struct trs_uio_info uio_info;
+     int ret;
+
+-    ret = copy_from_user(&para, (struct halSqCqInputInfo __user *)arg, sizeof(para));
+-    if (ret != 0) {
+-        trs_err("Copy from user failed. (ret=%d)\n", ret);
+-        return ret;
++    if (xsched_used) {
++        memcpy(&para, (struct halSqCqInputInfo *)arg, sizeof(para));
++    } else {
++        ret = copy_from_user(&para, (struct halSqCqInputInfo __user *)arg, sizeof(para));
++        if (ret != 0) {
++            trs_err("Copy from user failed. (ret=%d)\n", ret);
++            return ret;
++        }
+     }
+
+     alloc_para = get_alloc_para_addr(&para);
+@@ -238,15 +254,24 @@ static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd,
+     trs_core_inst_put(ts_inst);
+
+     if (ret == 0) {
+-        ret = copy_to_user((struct halSqCqInputInfo __user *)arg, &para, sizeof(para));
+-        ret |= copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info));
+-        if (ret != 0) {
+-            trs_err("Copy to user failed. (ret=%d)\n", ret);
++        if (xsched_used) {
++            memcpy((struct halSqCqInputInfo *)arg, &para, sizeof(para));
++            ret = copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info));
++            if (ret != 0) {
++                trs_err("Copy to user failed. (ret=%d)\n", ret);
++            }
++        } else {
++            ret = copy_to_user((struct halSqCqInputInfo __user *)arg, &para, sizeof(para));
++            ret |= copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info));
++            if (ret != 0) {
++                trs_err("Copy to user failed. (ret=%d)\n", ret);
++            }
+         }
+     }
+
+     return ret;
+ }
++EXPORT_SYMBOL(ioctl_trs_sqcq_alloc);
+
+ static int (*const trs_sqcq_free_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx *proc_ctx,
+     struct trs_core_ts_inst *ts_inst, struct halSqCqFreeInfo *para) = {
+@@ -257,16 +282,20 @@ static int (*const trs_sqcq_free_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx
+     [DRV_CTRL_TYPE] = trs_sw_sqcq_free
+ };
+
+-static int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
++int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
+ {
+     struct trs_core_ts_inst *ts_inst = NULL;
+     struct halSqCqFreeInfo para;
+     int ret;
+
+-    ret = copy_from_user(&para, (struct halSqCqFreeInfo __user *)arg, sizeof(para));
+-    if (ret != 0) {
+-        trs_err("Copy from user failed. (ret=%d)\n", ret);
+-        return ret;
++    if (is_xsched_used((void *)arg, sizeof(struct halSqCqFreeInfo))) {
++        memcpy(&para, (struct halSqCqFreeInfo *)arg, sizeof(para));
++    } else {
++        ret = copy_from_user(&para, (struct halSqCqFreeInfo __user *)arg, sizeof(para));
++        if (ret != 0) {
++            trs_err("Copy from user failed. (ret=%d)\n", ret);
++            return ret;
++        }
+     }
+
+     if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE)) {
+@@ -287,6 +316,7 @@ static int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd,
+     trs_core_inst_put(ts_inst);
+     return ret;
+ }
++EXPORT_SYMBOL(ioctl_trs_sqcq_free);
+
+ static int ioctl_trs_sqcq_config(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
+ {
+@@ -362,17 +392,26 @@ static int (*const trs_sqcq_send_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx
+     [DRV_CALLBACK_TYPE] = trs_cb_sqcq_send,
+ };
+
+-static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
++int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
+ {
+     struct trs_core_ts_inst *ts_inst = NULL;
++    struct halTaskSendInfo *kern_para = (struct halTaskSendInfo *)arg;
+     struct halTaskSendInfo __user *usr_para = (struct halTaskSendInfo __user *)arg;
+     struct halTaskSendInfo para;
++    struct trs_logic_cq *logic_cq = NULL;
++    struct ts_stars_sqe_header *sqe_header = NULL;
++    uint32_t logic_cqId = cmd;
++    bool xsched_used = is_xsched_used((void *)arg, sizeof(struct halTaskSendInfo));
+     int ret;
+
+-    ret = copy_from_user(&para, usr_para, sizeof(para));
+-    if (ret != 0) {
+-        trs_err("Copy from user failed. (ret=%d)\n", ret);
+-        return ret;
++    if (xsched_used) {
++        memcpy(&para, (struct halTaskSendInfo *)arg, sizeof(para));
++    } else {
++        ret = copy_from_user(&para, usr_para, sizeof(para));
++        if (ret != 0) {
++            trs_err("Copy from user failed. (ret=%d)\n", ret);
++            return ret;
++        }
+     }
+
+     if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE) || (trs_sqcq_send_handles[para.type] == NULL) ||
+@@ -387,37 +426,69 @@ static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd,
+         return -EINVAL;
+     }
+
++    if (xsched_used) {
++        logic_cq = &ts_inst->logic_cq_ctx.cq[logic_cqId];
++        if (logic_cq == NULL) {
++            trs_err("Invalid para. (logic_cqId=%u)\n", logic_cqId);
++            return -EINVAL;
++        }
++
++        sqe_header = (struct ts_stars_sqe_header *)para.sqe_addr;
++        trs_debug("sqe_header->type=%u logic_cqId=%u stream_id=%u task_id=%u\n",
++            sqe_header->type, logic_cqId, sqe_header->rt_stream_id, sqe_header->task_id);
++
++        if ((sqe_header->type == 0) && (sqe_header->wr_cqe == 1)) {
++            trs_debug("logic_cq->wakeup_num=%u\n", atomic_read(&logic_cq->wakeup_num));
++
++            if (atomic_read(&logic_cq->wakeup_num) > 0) {
++                atomic_dec(&logic_cq->wakeup_num);
++                trs_debug("logic_cq->wakeup_num=%u\n", atomic_read(&logic_cq->wakeup_num));
++            }
++        }
++    }
++
+     ret = trs_sqcq_send_handles[para.type](proc_ctx, ts_inst, &para);
+
+     trs_core_inst_put(ts_inst);
+
+     if ((ret == 0) && (para.type == DRV_NORMAL_TYPE)) {
+-        ret = put_user(para.pos, &usr_para->pos);
+-        if (ret != 0) {
+-            trs_err("Put to user fail. (devid=%u; tsid=%u; sqId=%u)\n", proc_ctx->devid, para.tsId, para.sqId);
++        if (xsched_used) {
++            kern_para->pos = para.pos;
++        } else {
++            ret = put_user(para.pos, &usr_para->pos);
++            if (ret != 0) {
++                trs_err("Put to user fail. (devid=%u; tsid=%u; sqId=%u)\n", proc_ctx->devid, para.tsId, para.sqId);
++            }
+         }
+     }
+
+     return ret;
+ }
++EXPORT_SYMBOL(ioctl_trs_sqcq_send);
+
+ static int (*const trs_sqcq_recv_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx *proc_ctx,
+-    struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) = {
++    struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, bool is_xsched) = {
+     [DRV_NORMAL_TYPE] = trs_hw_sqcq_recv,
+     [DRV_LOGIC_TYPE] = trs_logic_cq_recv,
+ };
+
+-static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
++int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
+ {
+     struct trs_core_ts_inst *ts_inst = NULL;
+-    struct halReportRecvInfo *usr_para = (struct halReportRecvInfo __user *)arg;
++    struct halReportRecvInfo *kern_para = (struct halReportRecvInfo *)arg;
++    struct halReportRecvInfo __user *usr_para = (struct halReportRecvInfo __user *)arg;
+     struct halReportRecvInfo para;
+     int ret;
++    bool xsched_used = is_xsched_used((void *)arg, sizeof(struct halReportRecvInfo));
+
+-    ret = copy_from_user(&para, usr_para, sizeof(para));
+-    if (ret != 0) {
+-        trs_err("Copy from user failed. (ret=%d)\n", ret);
+-        return ret;
++    if (xsched_used) {
++        memcpy(&para, (struct halReportRecvInfo *)arg, sizeof(para));
++    } else {
++        ret = copy_from_user(&para, usr_para, sizeof(para));
++        if (ret != 0) {
++            trs_err("Copy from user failed. (ret=%d)\n", ret);
++            return ret;
++        }
+     }
+
+     if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE) || (trs_sqcq_recv_handles[para.type] == NULL) ||
+@@ -432,11 +503,16 @@ static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd,
+         return -EINVAL;
+     }
+
+-    ret = trs_sqcq_recv_handles[para.type](proc_ctx, ts_inst, &para);
++    ret = trs_sqcq_recv_handles[para.type](proc_ctx, ts_inst, &para, xsched_used);
++
+     if (ret == 0) {
+-        ret = put_user(para.report_cqe_num, &usr_para->report_cqe_num);
+-        if (ret != 0) {
+-            trs_err("Put to user fail. (devid=%u; tsid=%u; cqId=%u)\n", proc_ctx->devid, para.tsId, para.cqId);
++        if (xsched_used) {
++            kern_para->report_cqe_num = para.report_cqe_num;
++        } else {
++            ret = put_user(para.report_cqe_num, &usr_para->report_cqe_num);
++            if (ret != 0) {
++                trs_err("Put to user fail. (devid=%u; tsid=%u; cqId=%u)\n", proc_ctx->devid, para.tsId, para.cqId);
++            }
+         }
+     } else {
+         u32 ts_status;
+@@ -449,6 +525,7 @@ static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd,
+
+     return ret;
+ }
++EXPORT_SYMBOL(ioctl_trs_sqcq_recv);
+
+ int ioctl_trs_stl_bind(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg)
+ {
+diff --git a/rms/trs_drv/trs_core/trs_hw_sqcq.c b/rms/trs_drv/trs_core/trs_hw_sqcq.c
+index 825d603..10f3903 100755
+--- a/rms/trs_drv/trs_core/trs_hw_sqcq.c
++++ b/rms/trs_drv/trs_core/trs_hw_sqcq.c
+@@ -1160,7 +1160,8 @@ int trs_hw_sqcq_send(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_
+     return ret;
+ }
+
+-int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para)
++int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para,
++                     bool is_xsched)
+ {
+     struct trs_id_inst *inst = &ts_inst->inst;
+     struct trs_chan_recv_para recv_para;
+diff --git a/rms/trs_drv/trs_core/trs_hw_sqcq.h b/rms/trs_drv/trs_core/trs_hw_sqcq.h
+index b32cd64..b6affdf 100755
+--- a/rms/trs_drv/trs_core/trs_hw_sqcq.h
++++ b/rms/trs_drv/trs_core/trs_hw_sqcq.h
+@@ -32,7 +32,8 @@ int trs_sqcq_config(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_i
+ int trs_sqcq_query(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqQueryInfo *para);
+
+ int trs_hw_sqcq_send(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halTaskSendInfo *para);
+-int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para);
++int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para,
++                     bool is_xsched);
+ void trs_proc_diable_sq_status(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst,
+     int res_type, u32 res_id);
+ void trs_hw_sqcq_recycle(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, int res_type, u32 res_id);
+diff --git a/rms/trs_drv/trs_core/trs_logic_cq.c b/rms/trs_drv/trs_core/trs_logic_cq.c
+index d35b8d5..72cf64a 100755
+--- a/rms/trs_drv/trs_core/trs_logic_cq.c
++++ b/rms/trs_drv/trs_core/trs_logic_cq.c
+@@ -265,13 +265,15 @@ static bool trs_logic_is_cqe_match(struct trs_logic_cq *logic_cq, void *cqe, u32
+ }
+
+ static int trs_logic_cq_recv_para_check(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst,
+-    struct halReportRecvInfo *para)
++    struct halReportRecvInfo *para, bool is_xsched)
+ {
+     struct trs_id_inst *inst = &ts_inst->inst;
+
+-    if (!trs_proc_has_res(proc_ctx, ts_inst, TRS_LOGIC_CQ, para->cqId)) {
+-        trs_err("Not proc owner cq. (devid=%u; tsid=%u; logic_cqid=%u)\n", inst->devid, inst->tsid, para->cqId);
+-        return -EINVAL;
++    if (!is_xsched) {
++        if (!trs_proc_has_res(proc_ctx, ts_inst, TRS_LOGIC_CQ, para->cqId)) {
++            trs_err("Not proc owner cq. (devid=%u; tsid=%u; logic_cqid=%u)\n", inst->devid, inst->tsid, para->cqId);
++            return -EINVAL;
++        }
+     }
+
+     if (((para->timeout < 0) && (para->timeout != -1)) || (para->cqe_num == 0) || (para->cqe_addr == NULL)) {
+@@ -441,7 +443,7 @@ static void trs_logic_cq_eliminate_holes(struct trs_logic_cq *logic_cq, u32 star
+ }
+
+ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_logic_cq *logic_cq,
+-    struct halReportRecvInfo *para)
++    struct halReportRecvInfo *para, bool is_xsched)
+ {
+     u32 start, report_cnt, tail;
+     u32 rollback = 0;
+@@ -463,11 +465,17 @@ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_
+     }
+
+     trs_logic_cq_copy_trace("Logic Cq Recv Match", ts_inst, logic_cq, start, report_cnt);
+-    ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size),
+-        (unsigned long)report_cnt * logic_cq->cqe_size);
+-    if (ret != 0) {
+-        trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt);
+-        return ret;
++
++    if (is_xsched) {
++        memcpy((void *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size),
++            (unsigned long)report_cnt * logic_cq->cqe_size);
++    } else {
++        ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size),
++            (unsigned long)report_cnt * logic_cq->cqe_size);
++        if (ret != 0) {
++            trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt);
++            return ret;
++        }
+     }
+
+     para->report_cqe_num = report_cnt;
+@@ -480,7 +488,7 @@ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_
+ }
+
+ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_logic_cq *logic_cq,
+-    struct halReportRecvInfo *para)
++    struct halReportRecvInfo *para, bool is_xsched)
+ {
+     u32 start, report_cnt, tail;
+     int ret;
+@@ -490,11 +498,17 @@ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct
+     report_cnt = (tail > start) ? tail - start : logic_cq->cq_depth - start;
+
+     trs_logic_cq_copy_trace("Logic Cq Recv NoMatch", ts_inst, logic_cq, start, report_cnt);
+-    ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size),
+-        (unsigned long)report_cnt * logic_cq->cqe_size);
+-    if (ret != 0) {
+-        trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt);
+-        return ret;
++
++    if (is_xsched) {
++        memcpy((void *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size),
++                (unsigned long)report_cnt * logic_cq->cqe_size);
++    } else {
++        ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size),
++            (unsigned long)report_cnt * logic_cq->cqe_size);
++        if (ret != 0) {
++            trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt);
++            return ret;
++        }
+     }
+
+     para->report_cqe_num = report_cnt;
+@@ -503,7 +517,7 @@ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct
+ }
+
+ static int trs_logic_cq_copy_report(struct trs_core_ts_inst *ts_inst,
+-    struct trs_logic_cq *logic_cq, struct halReportRecvInfo *para)
++    struct trs_logic_cq *logic_cq, struct halReportRecvInfo *para, bool is_xsched)
+ {
+     u32 version = para->res[0];
+     int full_flag = 0;
+@@ -522,9 +536,9 @@ static int trs_logic_cq_copy_report(struct trs_core_ts_inst *ts_inst,
+     }
+
+     if (version == 1) {
+-        ret = trs_logic_cq_match_copy(ts_inst, logic_cq, para); // runtime new version
++        ret = trs_logic_cq_match_copy(ts_inst, logic_cq, para, is_xsched); // runtime new version
+     } else {
+-        ret = trs_logic_cq_non_match_copy(ts_inst, logic_cq, para);
++        ret = trs_logic_cq_non_match_copy(ts_inst, logic_cq, para, is_xsched);
+     }
+     if (ret != 0) {
+         return ret;
+@@ -553,8 +567,8 @@ static int trs_logic_cq_wait_event(struct trs_logic_cq *logic_cq, int timeout)
+     long ret, tm;
+
+     atomic_inc(&logic_cq->wait_thread_num);
+-    trs_debug("Wake wait start. (logic_cqid=%u; timeout=%d; wait_thread_num=%d)\n",
+-        logic_cq->cqid, timeout, atomic_read(&logic_cq->wait_thread_num));
++    trs_debug("Wake wait start. (logic_cqid=%u; timeout=%d; wait_thread_num=%d, wakeup_num=%d)\n",
++              logic_cq->cqid, timeout, atomic_read(&logic_cq->wait_thread_num), atomic_read(&logic_cq->wakeup_num));
+
+     tm = (timeout == -1) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies((u32)timeout);
+     (void)prepare_to_wait_exclusive(&logic_cq->wait_queue, &wq_entry, TASK_INTERRUPTIBLE);
+@@ -592,12 +606,13 @@ static int trs_logic_cq_wait_event(struct trs_logic_cq *logic_cq, int timeout)
+     return ret;
+ }
+
+-int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para)
++int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para,
++                      bool is_xsched)
+ {
+     struct trs_logic_cq *logic_cq = NULL;
+     int ret;
+
+-    ret = trs_logic_cq_recv_para_check(proc_ctx, ts_inst, para);
++    ret = trs_logic_cq_recv_para_check(proc_ctx, ts_inst, para, is_xsched);
+     if (ret != 0) {
+         return ret;
+     }
+@@ -609,24 +624,35 @@ int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts
+         return trs_thread_bind_irq_wait(logic_cq, para->timeout);
+     }
+     trs_logic_cq_recv_trace("Recv start", ts_inst, para);
+-    do {
+-        mutex_lock(&logic_cq->mutex);
+-        ret = trs_logic_cq_copy_report(ts_inst, logic_cq, para);
+-        mutex_unlock(&logic_cq->mutex);
+-        if (ret == 0) {
+-            logic_cq->stat.recv++;
+-            trs_logic_cq_recv_trace("Recv finish", ts_inst, para);
+-            return ret;
+-        }
+
+-        if (ret == -EAGAIN) {
++    if (is_xsched) {
+             if (para->timeout == 0) {
+-                para->report_cqe_num = 0;
+-                return 0;
++                    para->report_cqe_num = 0;
++                    return 0;
+             }
+             ret = trs_logic_cq_wait_event(logic_cq, para->timeout);
+-        }
+-    } while (ret >= 0);
++            trs_debug("Skip reading report for xsched, waiting for cq irq: logic_cqid=%u, timeout=%u, ret=%u)\n",
++                      para->cqId, para->timeout, ret);
++    } else {
++        do {
++            mutex_lock(&logic_cq->mutex);
++            ret = trs_logic_cq_copy_report(ts_inst, logic_cq, para, is_xsched);
++            mutex_unlock(&logic_cq->mutex);
++            if (ret == 0) {
++                logic_cq->stat.recv++;
++                trs_logic_cq_recv_trace("Recv finish", ts_inst, para);
++                return ret;
++            }
++
++            if (ret == -EAGAIN) {
++                if (para->timeout == 0) {
++                    para->report_cqe_num = 0;
++                    return 0;
++                }
++                ret = trs_logic_cq_wait_event(logic_cq, para->timeout);
++            }
++        } while (ret >= 0);
++    }
+
+     return ret;
+ }
+diff --git a/rms/trs_drv/trs_core/trs_logic_cq.h b/rms/trs_drv/trs_core/trs_logic_cq.h
+index a45b110..b776b7f 100755
+--- a/rms/trs_drv/trs_core/trs_logic_cq.h
++++ b/rms/trs_drv/trs_core/trs_logic_cq.h
+@@ -90,7 +90,8 @@ struct trs_core_ts_inst;
+
+ int trs_logic_cq_alloc(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqInputInfo *para);
+ int trs_logic_cq_free(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqFreeInfo *para);
+-int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para);
++int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para,
++                      bool is_xsched);
+
+ void trs_logic_set_cqe_version(struct trs_core_ts_inst *ts_inst, u32 logic_cqid, u32 cqe_verion);
+ int trs_logic_cq_enque(struct trs_core_ts_inst *ts_inst, u32 logic_cq_id, u32 stream_id, u32 task_id, void *cqe);
+diff --git a/rms/trs_drv/trs_core/trs_sqcq_map.c b/rms/trs_drv/trs_core/trs_sqcq_map.c
+index 8103d65..998ecfb 100755
+--- a/rms/trs_drv/trs_core/trs_sqcq_map.c
++++ b/rms/trs_drv/trs_core/trs_sqcq_map.c
+@@ -305,6 +305,10 @@ int trs_sq_remap(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst
+     int sq_reg_type = TRS_MAP_TYPE_REG;
+     int ret;
+
++    ret = 0;
++    goto out;
++
++
+     if ((sq_info->sq_phy_addr == 0) || (sq_info->db_addr == 0) || (uio_info->sq_que_addr == 0)) {
+         ret = 0;
+         goto out;
+diff --git a/ts_agent/src/ts_agent_update_sqe.c b/ts_agent/src/ts_agent_update_sqe.c
+index bb4e3b2..01fe60c 100755
+--- a/ts_agent/src/ts_agent_update_sqe.c
++++ b/ts_agent/src/ts_agent_update_sqe.c
+@@ -1146,6 +1146,12 @@ static void cqe_set_drop_flag(ts_stars_cqe_t *cqe)
+     if (cqe->warn || (cqe->sqe_type == TS_STARS_SQE_TYPE_PCIE_DMA)) {
+         /* cqe has been processed in ts_agent, no need to send to runtime */
+         cqe->drop_flag = 1U;
++        ts_agent_debug("cqe has been processed in ts_agent, no need to send to runtime, drop_flag=%u\n", cqe->drop_flag);
++
++        /* no drop, xsched needs to proc cqe */
++        cqe->drop_flag = 0U;
++        ts_agent_debug("send cqe to runtime/xsched anyway, drop_flag=%u\n", cqe->drop_flag);
++
+         return;
+     }
+     cqe->drop_flag = 0U;
+--
+2.34.1
-- 
Gitee