diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index a1149911464c3b5005714e19b2d743961005c5fb..725c03445c4aac0656e2e1461568c42783d226c1 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -14,6 +14,7 @@ #ifndef _UAPI__ASM_ARM_UNISTD_H #define _UAPI__ASM_ARM_UNISTD_H +#define __IGNORE_kabi_reserved456 #define __NR_OABI_SYSCALL_BASE 0x900000 #define __NR_SYSCALL_MASK 0x0fffff diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8f97574813caaa9d1ae8f45813b53c6195805b87..a1b73ac9bd56b9ee6011182e196c81ee91a484b3 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -96,6 +96,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_DYNAMIC is not set +CONFIG_XCU_SCHEDULER=n # # CPU/Task time and stats accounting diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 5f84e3dc98d0d98be0794f6c710c3548ebf75839..52148408c41b440ca24bca27787075d2e909e339 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -9,6 +9,7 @@ */ #ifndef _UAPI_ASM_POWERPC_UNISTD_H_ #define _UAPI_ASM_POWERPC_UNISTD_H_ +#define __IGNORE_kabi_reserved456 #ifndef __powerpc64__ #include diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index d5d3307a9290846f9691ae2412e821d0bf523a6f..633036782e59166fb9c9e2514b24705724af060a 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -116,6 +116,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_DYNAMIC is not set +CONFIG_XCU_SCHEDULER=n # # CPU/Task time and stats accounting diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f88268a37ec25d53404d225e414eefe633f5d087..504d1a1701d4449117d502586289e0ef7a149a28 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -377,7 +377,7 @@ 453 64 map_shadow_stack sys_map_shadow_stack 454 common kabi_reserved454 sys_ni_syscall 455 common kabi_reserved455 sys_ni_syscall -456 common kabi_reserved456 sys_ni_syscall +456 common vstream_manage sys_vstream_manage 457 common kabi_reserved457 sys_ni_syscall 458 common kabi_reserved458 sys_ni_syscall 459 common kabi_reserved459 sys_ni_syscall diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h index be5e2e747f507657efc74f5ed2b68ed262103fda..c4e01e910ecd27c0d5551b1a81094a8b19d520a1 100644 --- a/arch/x86/include/uapi/asm/unistd.h +++ b/arch/x86/include/uapi/asm/unistd.h @@ -11,6 +11,7 @@ * thing regardless. */ #define __X32_SYSCALL_BIT 0x40000000 +#define __IGNORE_kabi_reserved456 #ifndef __KERNEL__ # ifdef __i386__ diff --git a/drivers/Makefile b/drivers/Makefile index 3955e605df14ffb72cce5203c1d1b60df19d75ee..b06192df4c3ce9d8e97cee14a5fe20ef14449347 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -195,6 +195,7 @@ obj-$(CONFIG_GNSS) += gnss/ obj-$(CONFIG_INTERCONNECT) += interconnect/ obj-$(CONFIG_COUNTER) += counter/ obj-$(CONFIG_MOST) += most/ +obj-$(CONFIG_XCU_SCHEDULER) += xcu/ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ diff --git a/drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt b/drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt new file mode 100644 index 0000000000000000000000000000000000000000..83fada81dbb9cee40a23483bc710283e4ed176ce --- /dev/null +++ b/drivers/xcu/0001-Adapt-910b-npu-driver-for-xsched.txt @@ -0,0 +1,918 @@ +From fe53ea5d5abcc587972079bcae5a706e54f52749 Mon Sep 17 00:00:00 2001 +From: Hui Tang +Date: Tue, 25 Feb 2025 10:18:24 +0000 +Subject: [PATCH openEuler-25.03] Adapt 910b npu driver for xsched + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB + +----------------------------------------- + +Adapt 910b npu driver for xsched + +Signed-off-by: Hui Tang +Signed-off-by: Konstantin Meskhidze +Signed-off-by: Liu Kai +Signed-off-by: Xia Fukun +--- + .../depends/inc_driver/ascend_hal_define.h | 2 +- + rms/trs_drv/chan/chan_rxtx.c | 2 +- + .../lba/near/comm/adapt/trs_near_adapt_init.h | 2 + + rms/trs_drv/lba/near/sia/adapt/Makefile | 7 +- + .../lba/near/sia/adapt/trs_host_init.c | 3 + + .../lba/near/sia/adapt/xsched_xpu_interface.c | 263 ++++++++++++++++++ + rms/trs_drv/trs_core/Makefile | 1 + + rms/trs_drv/trs_core/trs_fops.c | 143 +++++++--- + rms/trs_drv/trs_core/trs_hw_sqcq.c | 3 +- + rms/trs_drv/trs_core/trs_hw_sqcq.h | 3 +- + rms/trs_drv/trs_core/trs_logic_cq.c | 100 ++++--- + rms/trs_drv/trs_core/trs_logic_cq.h | 3 +- + rms/trs_drv/trs_core/trs_sqcq_map.c | 4 + + ts_agent/src/ts_agent_update_sqe.c | 6 + + 14 files changed, 466 insertions(+), 76 deletions(-) + create mode 100755 rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c + +diff --git a/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h b/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h +index a76efda..cc51c4d 100644 +--- a/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h ++++ b/dev_inc_open/inc/depends/inc_driver/ascend_hal_define.h +@@ -893,7 +893,7 @@ typedef enum tagDrvSqCqType { + } drvSqCqType_t; + + struct halSqCqInputInfo { +- drvSqCqType_t type; // normal : 0, callback : 1 ++ drvSqCqType_t type; // normal : 0, callback : 1, logic : 2 + uint32_t tsId; + /* The size and depth of each cqsq can be configured in normal mode, but this function is not yet supported */ + uint32_t sqeSize; // normal : 64Byte +diff --git a/rms/trs_drv/chan/chan_rxtx.c b/rms/trs_drv/chan/chan_rxtx.c +index 1fc72da..1e4ef38 100755 +--- a/rms/trs_drv/chan/chan_rxtx.c ++++ b/rms/trs_drv/chan/chan_rxtx.c +@@ -156,7 +156,7 @@ static int trs_chan_fill_sqe(struct trs_chan *chan, u8 *sqe, int timeout, int ad + /* if using bar to r/w sqe, it should use stack value to store sqe to avoid waster time */ + sqe_addr = trs_chan_mem_is_local_mem(&sq->mem_attr) ? dst_addr : sqe_tmp; + +- if (addr_domain == CHAN_ADDR_DOMAIN_KERNEL) { ++ if (addr_domain == CHAN_ADDR_DOMAIN_KERNEL || !access_ok(sqe, sq->para.sqe_size)) { + memcpy_s(sqe_addr, sq->para.sqe_size, sqe, sq->para.sqe_size); + } else { + ret_cpy = copy_from_user(sqe_addr, sqe, sq->para.sqe_size); +diff --git a/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h b/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h +index 3a60d1d..6b4598f 100755 +--- a/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h ++++ b/rms/trs_drv/lba/near/comm/adapt/trs_near_adapt_init.h +@@ -21,4 +21,6 @@ + void trs_ts_adapt_init(struct trs_id_inst *inst); + void trs_ts_adapt_uninit(struct trs_id_inst *inst); + ++int xsched_xcu_group_init(u32 dev_id, u32 ts_num, u32 version); ++ + #endif /* TRS_NEAR_ADAPT_INIT_H */ +diff --git a/rms/trs_drv/lba/near/sia/adapt/Makefile b/rms/trs_drv/lba/near/sia/adapt/Makefile +index 16a3f05..2cbdd43 100755 +--- a/rms/trs_drv/lba/near/sia/adapt/Makefile ++++ b/rms/trs_drv/lba/near/sia/adapt/Makefile +@@ -59,8 +59,13 @@ EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/lba/near/sia/adapt/comm/tsc + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dms/include/ + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dms/config/ + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dbl/dev_urd/ ++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/trs_core ++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/dev/inc/ ++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/drv_davinci_intf_host ++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/tsch/ ++ + obj-m += ascend_trs_pm_adapt.o +-ascend_trs_pm_adapt-objs := trs_host_init.o near_comm/trs_host_comm.o near_comm/trs_host_db.o near_comm/trs_host_id.o near_comm/trs_host_mbox.o near_comm/trs_host_msg.o near_comm/trs_near_adapt_init.o ++ascend_trs_pm_adapt-objs := trs_host_init.o near_comm/trs_host_comm.o near_comm/trs_host_db.o near_comm/trs_host_id.o near_comm/trs_host_mbox.o near_comm/trs_host_msg.o near_comm/trs_near_adapt_init.o xsched_xpu_interface.o + + ascend_trs_pm_adapt-objs += near_comm/soc_adapt/soc_adapt.o trs_host_init/trs_host.o trs_host_chan/trs_host_chan.o trs_host_chan/trs_sqe_update.o trs_host_core/trs_host_core.o + ascend_trs_pm_adapt-objs += near_comm/trs_host_chan/stars_v1/trs_chan_stars_v1_ops.o near_comm/trs_host_chan/stars_v1/trs_chan_stars_v1_ops_stars.o +diff --git a/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c b/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c +index abdabc6..9de8549 100755 +--- a/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c ++++ b/rms/trs_drv/lba/near/sia/adapt/trs_host_init.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #include "soc_res.h" + #include "trs_pub_def.h" +@@ -162,6 +163,8 @@ int trs_host_init(u32 phy_devid) + } + } + ++ xsched_xcu_group_init(phy_devid, ts_num, XCU_HW_V2); ++ + return 0; + } + EXPORT_SYMBOL(trs_host_init); +diff --git a/rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c b/rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c +new file mode 100755 +index 0000000..a7c01ba +--- /dev/null ++++ b/rms/trs_drv/lba/near/sia/adapt/xsched_xpu_interface.c +@@ -0,0 +1,263 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * Description: ++ * Author: Huawei ++ * Create: 2024-06-17 ++ */ ++ ++#ifndef TSDRV_KERNEL_UT ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "securec.h" ++#include "devdrv_manager_comm.h" ++#include "ascend_hal_define.h" ++#include "trs_pub_def.h" ++#include "trs_res_id_def.h" ++#include "trs_proc.h" ++#include "trs_cmd.h" ++#include "davinci_api.h" ++#include "davinci_interface.h" ++#include "davinci_intf_init.h" ++#include "task_struct.h" ++ ++int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg); ++int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg); ++int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg); ++int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg); ++ ++extern struct xcu_group *xcu_group_root; ++extern int xsched_xcu_register(struct xcu_group *group, uint32_t phys_id); ++ ++/* Gets device driver TS context from a file descriptor of opened device. */ ++static void *get_tsdrv_ctx(int fd) ++{ ++ struct davinci_intf_private_stru *file_private_data; ++ void *ctx = NULL; ++ struct fd f; ++ ++ f = fdget(fd); ++ if (!f.file) ++ goto out; ++ ++ file_private_data = f.file->private_data; ++ if (!file_private_data) ++ goto out; ++ ++ ctx = file_private_data->priv_filep.private_data; ++ ++out: ++ fdput(f); ++ return ctx; ++} ++ ++int trs_xsched_ctx_run(struct xcu_op_handler_params *params) ++{ ++ uint32_t sq_id = *(uint32_t *)params->param_1; ++ uint32_t tsId = *(uint32_t *)params->param_2; ++ uint8_t *sqe_addr = params->param_3; ++ uint32_t sqe_num = *(uint32_t *)params->param_4; ++ int32_t timeout = *(int32_t *)params->param_5; ++ int32_t type = *(int32_t *)params->param_6; ++ struct halTaskSendInfo input = {0}; ++ struct trs_proc_ctx *ctx = params->param_7; ++ uint32_t logic_cqId = *(uint32_t *)params->param_8; ++ ++ input.tsId = tsId; ++ input.sqId = sq_id; ++ input.timeout = timeout; ++ input.sqe_addr = sqe_addr; ++ input.sqe_num = sqe_num; ++ input.type = type; ++ ++ trs_debug("%s %d: tsId %u sqId %u timeout %d num %u\n", ++ __FUNCTION__, __LINE__, tsId, sq_id, timeout, sqe_num); ++ ++ /* Send SQ tail to a doorbel. */ ++ return ioctl_trs_sqcq_send(ctx, logic_cqId, (unsigned long)&input);; ++} ++ ++int trs_xsched_ctx_free(struct xcu_op_handler_params *params) ++{ ++ struct trs_proc_ctx *ctx; ++ ++ ctx = get_tsdrv_ctx(params->fd); ++ if (!ctx) ++ return -ENOENT; ++ ++ return ioctl_trs_sqcq_free(ctx, 0, (unsigned long)params->payload); ++} ++ ++int trs_xsched_ctx_wait(struct xcu_op_handler_params *params) ++{ ++ uint32_t tsId = *(uint32_t *)params->param_1; ++ uint32_t cqId = *(uint32_t *)params->param_2; ++ uint32_t streamId = *(uint32_t *)params->param_3; ++ struct ts_stars_sqe_header *sqe = params->param_4; ++ uint8_t *cqe_addr = params->param_5; ++ struct trs_proc_ctx *ctx = params->param_6; ++ int32_t timeout = *(uint32_t *)params->param_7; ++ int32_t cqe_num = 1; ++ struct halReportRecvInfo input = {0}; ++ uint32_t task_id = sqe->task_id; ++ ++ input.type = DRV_LOGIC_TYPE; ++ input.tsId = tsId; ++ input.cqId = cqId; ++ input.timeout = timeout; ++ input.cqe_num = cqe_num; ++ input.cqe_addr = cqe_addr; ++ input.stream_id = streamId; ++ input.task_id = task_id; ++ input.res[0] = 1; /* version 1 for new runtime. */ ++ ++ trs_debug("%s %d: tdId %u logic_cqId %u streamid %u task_id %d timeout %d \n", ++ __FUNCTION__, __LINE__, tsId, cqId, streamId, task_id, timeout); ++ ++ /* Wait for cq irq and read result. */ ++ return ioctl_trs_sqcq_recv(ctx, 0, (unsigned long)&input); ++} ++ ++int trs_xsched_ctx_complete(struct xcu_op_handler_params *params) ++{ ++ return 0; ++} ++ ++int trs_xsched_ctx_alloc(struct xcu_op_handler_params *params) ++{ ++ struct halSqCqInputInfo *input_info = params->payload; ++ uint32_t *tgid = (uint32_t *)params->param_1; ++ uint32_t *sq_id = (uint32_t *)params->param_2; ++ uint32_t *cq_id = (uint32_t *)params->param_3; ++ uint32_t *user_stream_id = (uint32_t *)params->param_4; ++ struct trs_proc_ctx *ctx; ++ int ret = 0; ++ ++ trs_debug("%s %d, input_info %lx, type: %d\n", ++ __FUNCTION__, __LINE__, (unsigned long)input_info, input_info->type); ++ ++ ctx = get_tsdrv_ctx(params->fd); ++ if (!ctx) ++ return -ENOENT; ++ ++ trs_debug("%s %d, pid %d, task_id %d, size %ld\n", ++ __FUNCTION__, __LINE__, ctx->pid, ctx->task_id, sizeof(*ctx)); ++ ret = ioctl_trs_sqcq_alloc(ctx, 0, (unsigned long)input_info); ++ if (ret != 0) ++ return ret; ++ ++ *tgid = ctx->pid; ++ *sq_id = input_info->sqId; ++ *cq_id = input_info->cqId; ++ *user_stream_id = input_info->info[0]; ++ params->param_5 = ctx; ++ return 0; ++} ++ ++int trs_xsched_ctx_logic_alloc(struct xcu_op_handler_params *params) ++{ ++ struct halSqCqInputInfo *input_info = params->payload; ++ uint32_t *logic_cq_id = (uint32_t *)params->param_1; ++ struct trs_proc_ctx *ctx; ++ int ret = 0; ++ ++ trs_debug("%s %d, type: %d\n", __FUNCTION__, __LINE__, input_info->type); ++ ++ ctx = get_tsdrv_ctx(params->fd); ++ if (!ctx) ++ return -ENOENT; ++ ++ trs_debug("%s %d, pid %d, task_id %d, size %ld\n", ++ __FUNCTION__, __LINE__, ctx->pid, ctx->task_id, sizeof(*ctx)); ++ ++ ret = ioctl_trs_sqcq_alloc(ctx, 0, (unsigned long)input_info); ++ if (ret != 0) ++ return ret; ++ ++ *logic_cq_id = input_info->cqId; ++ trs_debug("%s %d, type: %d, cq_id: %u\n", ++ __FUNCTION__, __LINE__, input_info->type, *logic_cq_id); ++ return 0; ++} ++ ++int trs_xsched_ctx_sqe_op(struct xcu_op_handler_params *params) ++{ ++ struct ts_stars_sqe_header *sqe = params->param_2; ++ int op_type = *(int *)(params->param_1); ++ ++ switch (op_type) { ++ case SQE_IS_NOTIFY: ++ return (sqe->type == 0) && (sqe->wr_cqe == 1); ++ case SQE_SET_NOTIFY: ++ if (sqe->type == 0) ++ sqe->wr_cqe = 1; ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++static struct xcu_operation trs_xsched_ctx_xcu_ops = { ++ .run = trs_xsched_ctx_run, ++ .finish = trs_xsched_ctx_free, ++ .wait = trs_xsched_ctx_wait, ++ .complete = trs_xsched_ctx_complete, ++ .alloc = trs_xsched_ctx_alloc, ++ .logic_alloc = trs_xsched_ctx_logic_alloc, ++ .sqe_op = trs_xsched_ctx_sqe_op, ++}; ++ ++int xsched_xcu_group_init(u32 dev_id, u32 ts_num, u32 version) ++{ ++ struct xcu_group *type_group; ++ struct xcu_group *dev_group; ++ struct xcu_group *ts_group; ++ int tsid; ++ ++ trs_debug("dev_id %u ts_num %u\n", dev_id, ts_num); ++ type_group = xcu_group_find(xcu_group_root, XCU_TYPE_XPU); ++ ++ if (!type_group) { ++ type_group = xcu_group_init(XCU_TYPE_XPU); ++ xcu_group_attach(type_group, xcu_group_root); ++ } ++ ++ dev_group = xcu_group_init(dev_id); ++ ++ trs_debug("%s %d deviceid is %d\n", __FUNCTION__, __LINE__, dev_id); ++ dev_group->id = dev_id; ++ xcu_group_attach(dev_group, type_group); ++ ++ for (tsid = 0; tsid < ts_num; tsid++) { ++ ts_group = xcu_group_init(tsid); ++ ts_group->ver = version; ++ ts_group->opt = &trs_xsched_ctx_xcu_ops; ++ ++ xcu_group_attach(ts_group, dev_group); ++ xsched_xcu_register(ts_group, dev_id); ++ ++ cond_resched(); ++ } ++ ++ return 0; ++} ++#endif +diff --git a/rms/trs_drv/trs_core/Makefile b/rms/trs_drv/trs_core/Makefile +index e0a6a55..8d27ad9 100755 +--- a/rms/trs_drv/trs_core/Makefile ++++ b/rms/trs_drv/trs_core/Makefile +@@ -41,6 +41,7 @@ endif + + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/inc + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/trs_core ++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/tsch/ + + obj-m += ascend_trs_core.o + ascend_trs_core-objs := trs_fops.o trs_ts_inst.o trs_proc.o trs_res_mng.o trs_sqcq_map.o trs_hw_sqcq.o trs_sw_sqcq.o trs_logic_cq.o trs_cb_sqcq.o trs_shm_sqcq.o trs_proc_fs.o +diff --git a/rms/trs_drv/trs_core/trs_fops.c b/rms/trs_drv/trs_core/trs_fops.c +index e5702d2..1a9b3c7 100755 +--- a/rms/trs_drv/trs_core/trs_fops.c ++++ b/rms/trs_drv/trs_core/trs_fops.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include "ascend_hal_define.h" + +@@ -33,6 +34,8 @@ + #include "trs_ts_inst.h" + #include "trs_cmd.h" + #include "trs_fops.h" ++#include "trs_logic_cq.h" ++#include "task_struct.h" + + static int (*const trs_res_id_handles[TRS_MAX_CMD])(struct trs_proc_ctx *proc_ctx, + struct trs_core_ts_inst *ts_inst, struct trs_res_id_para *para) = { +@@ -195,8 +198,17 @@ static int (*const trs_sqcq_alloc_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx + [DRV_CTRL_TYPE] = trs_sw_sqcq_alloc + }; + +-static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++static bool is_xsched_used(void __user *ptr, int size) + { ++ if (access_ok(ptr, size)) ++ return false; ++ ++ return true; ++} ++ ++int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++{ ++ bool xsched_used = is_xsched_used((void __user *)arg, sizeof(struct halSqCqInputInfo)); + struct trs_core_ts_inst *ts_inst = NULL; + struct halSqCqInputInfo para; + struct trs_alloc_para *alloc_para = NULL; +@@ -204,10 +216,14 @@ static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + struct trs_uio_info uio_info; + int ret; + +- ret = copy_from_user(¶, (struct halSqCqInputInfo __user *)arg, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (xsched_used) { ++ memcpy(¶, (struct halSqCqInputInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, (struct halSqCqInputInfo __user *)arg, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + alloc_para = get_alloc_para_addr(¶); +@@ -238,15 +254,24 @@ static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + trs_core_inst_put(ts_inst); + + if (ret == 0) { +- ret = copy_to_user((struct halSqCqInputInfo __user *)arg, ¶, sizeof(para)); +- ret |= copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info)); +- if (ret != 0) { +- trs_err("Copy to user failed. (ret=%d)\n", ret); ++ if (xsched_used) { ++ memcpy((struct halSqCqInputInfo *)arg, ¶, sizeof(para)); ++ ret = copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info)); ++ if (ret != 0) { ++ trs_err("Copy to user failed. (ret=%d)\n", ret); ++ } ++ } else { ++ ret = copy_to_user((struct halSqCqInputInfo __user *)arg, ¶, sizeof(para)); ++ ret |= copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info)); ++ if (ret != 0) { ++ trs_err("Copy to user failed. (ret=%d)\n", ret); ++ } + } + } + + return ret; + } ++EXPORT_SYMBOL(ioctl_trs_sqcq_alloc); + + static int (*const trs_sqcq_free_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx *proc_ctx, + struct trs_core_ts_inst *ts_inst, struct halSqCqFreeInfo *para) = { +@@ -257,16 +282,20 @@ static int (*const trs_sqcq_free_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx + [DRV_CTRL_TYPE] = trs_sw_sqcq_free + }; + +-static int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { + struct trs_core_ts_inst *ts_inst = NULL; + struct halSqCqFreeInfo para; + int ret; + +- ret = copy_from_user(¶, (struct halSqCqFreeInfo __user *)arg, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (is_xsched_used((void *)arg, sizeof(struct halSqCqFreeInfo))) { ++ memcpy(¶, (struct halSqCqFreeInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, (struct halSqCqFreeInfo __user *)arg, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE)) { +@@ -287,6 +316,7 @@ static int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + trs_core_inst_put(ts_inst); + return ret; + } ++EXPORT_SYMBOL(ioctl_trs_sqcq_free); + + static int ioctl_trs_sqcq_config(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { +@@ -362,17 +392,26 @@ static int (*const trs_sqcq_send_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx + [DRV_CALLBACK_TYPE] = trs_cb_sqcq_send, + }; + +-static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { + struct trs_core_ts_inst *ts_inst = NULL; ++ struct halTaskSendInfo *kern_para = (struct halTaskSendInfo *)arg; + struct halTaskSendInfo __user *usr_para = (struct halTaskSendInfo __user *)arg; + struct halTaskSendInfo para; ++ struct trs_logic_cq *logic_cq = NULL; ++ struct ts_stars_sqe_header *sqe_header = NULL; ++ uint32_t logic_cqId = cmd; ++ bool xsched_used = is_xsched_used((void *)arg, sizeof(struct halTaskSendInfo)); + int ret; + +- ret = copy_from_user(¶, usr_para, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (xsched_used) { ++ memcpy(¶, (struct halTaskSendInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, usr_para, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE) || (trs_sqcq_send_handles[para.type] == NULL) || +@@ -387,37 +426,69 @@ static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + return -EINVAL; + } + ++ if (xsched_used) { ++ logic_cq = &ts_inst->logic_cq_ctx.cq[logic_cqId]; ++ if (logic_cq == NULL) { ++ trs_err("Invalid para. (logic_cqId=%u)\n", logic_cqId); ++ return -EINVAL; ++ } ++ ++ sqe_header = (struct ts_stars_sqe_header *)para.sqe_addr; ++ trs_debug("sqe_header->type=%u logic_cqId=%u stream_id=%u task_id=%u\n", ++ sqe_header->type, logic_cqId, sqe_header->rt_stream_id, sqe_header->task_id); ++ ++ if ((sqe_header->type == 0) && (sqe_header->wr_cqe == 1)) { ++ trs_debug("logic_cq->wakeup_num=%u\n", atomic_read(&logic_cq->wakeup_num)); ++ ++ if (atomic_read(&logic_cq->wakeup_num) > 0) { ++ atomic_dec(&logic_cq->wakeup_num); ++ trs_debug("logic_cq->wakeup_num=%u\n", atomic_read(&logic_cq->wakeup_num)); ++ } ++ } ++ } ++ + ret = trs_sqcq_send_handles[para.type](proc_ctx, ts_inst, ¶); + + trs_core_inst_put(ts_inst); + + if ((ret == 0) && (para.type == DRV_NORMAL_TYPE)) { +- ret = put_user(para.pos, &usr_para->pos); +- if (ret != 0) { +- trs_err("Put to user fail. (devid=%u; tsid=%u; sqId=%u)\n", proc_ctx->devid, para.tsId, para.sqId); ++ if (xsched_used) { ++ kern_para->pos = para.pos; ++ } else { ++ ret = put_user(para.pos, &usr_para->pos); ++ if (ret != 0) { ++ trs_err("Put to user fail. (devid=%u; tsid=%u; sqId=%u)\n", proc_ctx->devid, para.tsId, para.sqId); ++ } + } + } + + return ret; + } ++EXPORT_SYMBOL(ioctl_trs_sqcq_send); + + static int (*const trs_sqcq_recv_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx *proc_ctx, +- struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) = { ++ struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, bool is_xsched) = { + [DRV_NORMAL_TYPE] = trs_hw_sqcq_recv, + [DRV_LOGIC_TYPE] = trs_logic_cq_recv, + }; + +-static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { + struct trs_core_ts_inst *ts_inst = NULL; +- struct halReportRecvInfo *usr_para = (struct halReportRecvInfo __user *)arg; ++ struct halReportRecvInfo *kern_para = (struct halReportRecvInfo *)arg; ++ struct halReportRecvInfo __user *usr_para = (struct halReportRecvInfo __user *)arg; + struct halReportRecvInfo para; + int ret; ++ bool xsched_used = is_xsched_used((void *)arg, sizeof(struct halReportRecvInfo)); + +- ret = copy_from_user(¶, usr_para, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (xsched_used) { ++ memcpy(¶, (struct halReportRecvInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, usr_para, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE) || (trs_sqcq_recv_handles[para.type] == NULL) || +@@ -432,11 +503,16 @@ static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + return -EINVAL; + } + +- ret = trs_sqcq_recv_handles[para.type](proc_ctx, ts_inst, ¶); ++ ret = trs_sqcq_recv_handles[para.type](proc_ctx, ts_inst, ¶, xsched_used); ++ + if (ret == 0) { +- ret = put_user(para.report_cqe_num, &usr_para->report_cqe_num); +- if (ret != 0) { +- trs_err("Put to user fail. (devid=%u; tsid=%u; cqId=%u)\n", proc_ctx->devid, para.tsId, para.cqId); ++ if (xsched_used) { ++ kern_para->report_cqe_num = para.report_cqe_num; ++ } else { ++ ret = put_user(para.report_cqe_num, &usr_para->report_cqe_num); ++ if (ret != 0) { ++ trs_err("Put to user fail. (devid=%u; tsid=%u; cqId=%u)\n", proc_ctx->devid, para.tsId, para.cqId); ++ } + } + } else { + u32 ts_status; +@@ -449,6 +525,7 @@ static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + + return ret; + } ++EXPORT_SYMBOL(ioctl_trs_sqcq_recv); + + int ioctl_trs_stl_bind(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { +diff --git a/rms/trs_drv/trs_core/trs_hw_sqcq.c b/rms/trs_drv/trs_core/trs_hw_sqcq.c +index 825d603..10f3903 100755 +--- a/rms/trs_drv/trs_core/trs_hw_sqcq.c ++++ b/rms/trs_drv/trs_core/trs_hw_sqcq.c +@@ -1160,7 +1160,8 @@ int trs_hw_sqcq_send(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_ + return ret; + } + +-int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) ++int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched) + { + struct trs_id_inst *inst = &ts_inst->inst; + struct trs_chan_recv_para recv_para; +diff --git a/rms/trs_drv/trs_core/trs_hw_sqcq.h b/rms/trs_drv/trs_core/trs_hw_sqcq.h +index b32cd64..b6affdf 100755 +--- a/rms/trs_drv/trs_core/trs_hw_sqcq.h ++++ b/rms/trs_drv/trs_core/trs_hw_sqcq.h +@@ -32,7 +32,8 @@ int trs_sqcq_config(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_i + int trs_sqcq_query(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqQueryInfo *para); + + int trs_hw_sqcq_send(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halTaskSendInfo *para); +-int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para); ++int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched); + void trs_proc_diable_sq_status(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, + int res_type, u32 res_id); + void trs_hw_sqcq_recycle(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, int res_type, u32 res_id); +diff --git a/rms/trs_drv/trs_core/trs_logic_cq.c b/rms/trs_drv/trs_core/trs_logic_cq.c +index d35b8d5..72cf64a 100755 +--- a/rms/trs_drv/trs_core/trs_logic_cq.c ++++ b/rms/trs_drv/trs_core/trs_logic_cq.c +@@ -265,13 +265,15 @@ static bool trs_logic_is_cqe_match(struct trs_logic_cq *logic_cq, void *cqe, u32 + } + + static int trs_logic_cq_recv_para_check(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, +- struct halReportRecvInfo *para) ++ struct halReportRecvInfo *para, bool is_xsched) + { + struct trs_id_inst *inst = &ts_inst->inst; + +- if (!trs_proc_has_res(proc_ctx, ts_inst, TRS_LOGIC_CQ, para->cqId)) { +- trs_err("Not proc owner cq. (devid=%u; tsid=%u; logic_cqid=%u)\n", inst->devid, inst->tsid, para->cqId); +- return -EINVAL; ++ if (!is_xsched) { ++ if (!trs_proc_has_res(proc_ctx, ts_inst, TRS_LOGIC_CQ, para->cqId)) { ++ trs_err("Not proc owner cq. (devid=%u; tsid=%u; logic_cqid=%u)\n", inst->devid, inst->tsid, para->cqId); ++ return -EINVAL; ++ } + } + + if (((para->timeout < 0) && (para->timeout != -1)) || (para->cqe_num == 0) || (para->cqe_addr == NULL)) { +@@ -441,7 +443,7 @@ static void trs_logic_cq_eliminate_holes(struct trs_logic_cq *logic_cq, u32 star + } + + static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_logic_cq *logic_cq, +- struct halReportRecvInfo *para) ++ struct halReportRecvInfo *para, bool is_xsched) + { + u32 start, report_cnt, tail; + u32 rollback = 0; +@@ -463,11 +465,17 @@ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_ + } + + trs_logic_cq_copy_trace("Logic Cq Recv Match", ts_inst, logic_cq, start, report_cnt); +- ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), +- (unsigned long)report_cnt * logic_cq->cqe_size); +- if (ret != 0) { +- trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); +- return ret; ++ ++ if (is_xsched) { ++ memcpy((void *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ } else { ++ ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ if (ret != 0) { ++ trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); ++ return ret; ++ } + } + + para->report_cqe_num = report_cnt; +@@ -480,7 +488,7 @@ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_ + } + + static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_logic_cq *logic_cq, +- struct halReportRecvInfo *para) ++ struct halReportRecvInfo *para, bool is_xsched) + { + u32 start, report_cnt, tail; + int ret; +@@ -490,11 +498,17 @@ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct + report_cnt = (tail > start) ? tail - start : logic_cq->cq_depth - start; + + trs_logic_cq_copy_trace("Logic Cq Recv NoMatch", ts_inst, logic_cq, start, report_cnt); +- ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), +- (unsigned long)report_cnt * logic_cq->cqe_size); +- if (ret != 0) { +- trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); +- return ret; ++ ++ if (is_xsched) { ++ memcpy((void *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ } else { ++ ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ if (ret != 0) { ++ trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); ++ return ret; ++ } + } + + para->report_cqe_num = report_cnt; +@@ -503,7 +517,7 @@ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct + } + + static int trs_logic_cq_copy_report(struct trs_core_ts_inst *ts_inst, +- struct trs_logic_cq *logic_cq, struct halReportRecvInfo *para) ++ struct trs_logic_cq *logic_cq, struct halReportRecvInfo *para, bool is_xsched) + { + u32 version = para->res[0]; + int full_flag = 0; +@@ -522,9 +536,9 @@ static int trs_logic_cq_copy_report(struct trs_core_ts_inst *ts_inst, + } + + if (version == 1) { +- ret = trs_logic_cq_match_copy(ts_inst, logic_cq, para); // runtime new version ++ ret = trs_logic_cq_match_copy(ts_inst, logic_cq, para, is_xsched); // runtime new version + } else { +- ret = trs_logic_cq_non_match_copy(ts_inst, logic_cq, para); ++ ret = trs_logic_cq_non_match_copy(ts_inst, logic_cq, para, is_xsched); + } + if (ret != 0) { + return ret; +@@ -553,8 +567,8 @@ static int trs_logic_cq_wait_event(struct trs_logic_cq *logic_cq, int timeout) + long ret, tm; + + atomic_inc(&logic_cq->wait_thread_num); +- trs_debug("Wake wait start. (logic_cqid=%u; timeout=%d; wait_thread_num=%d)\n", +- logic_cq->cqid, timeout, atomic_read(&logic_cq->wait_thread_num)); ++ trs_debug("Wake wait start. (logic_cqid=%u; timeout=%d; wait_thread_num=%d, wakeup_num=%d)\n", ++ logic_cq->cqid, timeout, atomic_read(&logic_cq->wait_thread_num), atomic_read(&logic_cq->wakeup_num)); + + tm = (timeout == -1) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies((u32)timeout); + (void)prepare_to_wait_exclusive(&logic_cq->wait_queue, &wq_entry, TASK_INTERRUPTIBLE); +@@ -592,12 +606,13 @@ static int trs_logic_cq_wait_event(struct trs_logic_cq *logic_cq, int timeout) + return ret; + } + +-int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) ++int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched) + { + struct trs_logic_cq *logic_cq = NULL; + int ret; + +- ret = trs_logic_cq_recv_para_check(proc_ctx, ts_inst, para); ++ ret = trs_logic_cq_recv_para_check(proc_ctx, ts_inst, para, is_xsched); + if (ret != 0) { + return ret; + } +@@ -609,24 +624,35 @@ int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts + return trs_thread_bind_irq_wait(logic_cq, para->timeout); + } + trs_logic_cq_recv_trace("Recv start", ts_inst, para); +- do { +- mutex_lock(&logic_cq->mutex); +- ret = trs_logic_cq_copy_report(ts_inst, logic_cq, para); +- mutex_unlock(&logic_cq->mutex); +- if (ret == 0) { +- logic_cq->stat.recv++; +- trs_logic_cq_recv_trace("Recv finish", ts_inst, para); +- return ret; +- } + +- if (ret == -EAGAIN) { ++ if (is_xsched) { + if (para->timeout == 0) { +- para->report_cqe_num = 0; +- return 0; ++ para->report_cqe_num = 0; ++ return 0; + } + ret = trs_logic_cq_wait_event(logic_cq, para->timeout); +- } +- } while (ret >= 0); ++ trs_debug("Skip reading report for xsched, waiting for cq irq: logic_cqid=%u, timeout=%u, ret=%u)\n", ++ para->cqId, para->timeout, ret); ++ } else { ++ do { ++ mutex_lock(&logic_cq->mutex); ++ ret = trs_logic_cq_copy_report(ts_inst, logic_cq, para, is_xsched); ++ mutex_unlock(&logic_cq->mutex); ++ if (ret == 0) { ++ logic_cq->stat.recv++; ++ trs_logic_cq_recv_trace("Recv finish", ts_inst, para); ++ return ret; ++ } ++ ++ if (ret == -EAGAIN) { ++ if (para->timeout == 0) { ++ para->report_cqe_num = 0; ++ return 0; ++ } ++ ret = trs_logic_cq_wait_event(logic_cq, para->timeout); ++ } ++ } while (ret >= 0); ++ } + + return ret; + } +diff --git a/rms/trs_drv/trs_core/trs_logic_cq.h b/rms/trs_drv/trs_core/trs_logic_cq.h +index a45b110..b776b7f 100755 +--- a/rms/trs_drv/trs_core/trs_logic_cq.h ++++ b/rms/trs_drv/trs_core/trs_logic_cq.h +@@ -90,7 +90,8 @@ struct trs_core_ts_inst; + + int trs_logic_cq_alloc(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqInputInfo *para); + int trs_logic_cq_free(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqFreeInfo *para); +-int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para); ++int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched); + + void trs_logic_set_cqe_version(struct trs_core_ts_inst *ts_inst, u32 logic_cqid, u32 cqe_verion); + int trs_logic_cq_enque(struct trs_core_ts_inst *ts_inst, u32 logic_cq_id, u32 stream_id, u32 task_id, void *cqe); +diff --git a/rms/trs_drv/trs_core/trs_sqcq_map.c b/rms/trs_drv/trs_core/trs_sqcq_map.c +index 8103d65..998ecfb 100755 +--- a/rms/trs_drv/trs_core/trs_sqcq_map.c ++++ b/rms/trs_drv/trs_core/trs_sqcq_map.c +@@ -305,6 +305,10 @@ int trs_sq_remap(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst + int sq_reg_type = TRS_MAP_TYPE_REG; + int ret; + ++ ret = 0; ++ goto out; ++ ++ + if ((sq_info->sq_phy_addr == 0) || (sq_info->db_addr == 0) || (uio_info->sq_que_addr == 0)) { + ret = 0; + goto out; +diff --git a/ts_agent/src/ts_agent_update_sqe.c b/ts_agent/src/ts_agent_update_sqe.c +index bb4e3b2..01fe60c 100755 +--- a/ts_agent/src/ts_agent_update_sqe.c ++++ b/ts_agent/src/ts_agent_update_sqe.c +@@ -1146,6 +1146,12 @@ static void cqe_set_drop_flag(ts_stars_cqe_t *cqe) + if (cqe->warn || (cqe->sqe_type == TS_STARS_SQE_TYPE_PCIE_DMA)) { + /* cqe has been processed in ts_agent, no need to send to runtime */ + cqe->drop_flag = 1U; ++ ts_agent_debug("cqe has been processed in ts_agent, no need to send to runtime, drop_flag=%u\n", cqe->drop_flag); ++ ++ /* no drop, xsched needs to proc cqe */ ++ cqe->drop_flag = 0U; ++ ts_agent_debug("send cqe to runtime/xsched anyway, drop_flag=%u\n", cqe->drop_flag); ++ + return; + } + cqe->drop_flag = 0U; +-- +2.34.1 diff --git a/drivers/xcu/Makefile b/drivers/xcu/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..575115b148ecb689af0cb3047e443a95958e21f7 --- /dev/null +++ b/drivers/xcu/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_XCU_SCHEDULER) += xcu_group.o diff --git a/drivers/xcu/xcu_group.c b/drivers/xcu/xcu_group.c new file mode 100644 index 0000000000000000000000000000000000000000..891ce70bb36f728ecacc7bd3c3fb22b4ad495865 --- /dev/null +++ b/drivers/xcu/xcu_group.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Code for NPU driver support + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include + +static DECLARE_RWSEM(xcu_group_rwsem); + +struct xcu_group *xcu_group_init(int id) +{ + struct xcu_group *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->id = id; + node->type = XCU_TYPE_XPU; + idr_init(&node->next_layer); + + return node; +} +EXPORT_SYMBOL(xcu_group_init); + +int __xcu_group_attach(struct xcu_group *new_group, + struct xcu_group *previous_group) +{ + int id = new_group->id; + + if (id == -1) + id = idr_alloc(&previous_group->next_layer, new_group, 0, + INT_MAX, GFP_KERNEL); + else + id = idr_alloc(&previous_group->next_layer, new_group, id, + id + 1, GFP_KERNEL); + + if (id < 0) { + XSCHED_ERR("Fail to attach xcu_group: id conflict @ %s\n", + __func__); + return -EEXIST; + } + new_group->id = id; + new_group->previous_layer = previous_group; + + return 0; +} + +int xcu_group_attach(struct xcu_group *new_group, + struct xcu_group *previous_group) +{ + int ret; + + down_write(&xcu_group_rwsem); + ret = __xcu_group_attach(new_group, previous_group); + up_write(&xcu_group_rwsem); + + return ret; +} +EXPORT_SYMBOL(xcu_group_attach); + +static inline void __xcu_group_detach(struct xcu_group *group) +{ + if (!group || !group->previous_layer) + return; + + idr_remove(&group->previous_layer->next_layer, group->id); + group->previous_layer = NULL; +} + +void xcu_group_detach(struct xcu_group *group) +{ + down_write(&xcu_group_rwsem); + __xcu_group_detach(group); + up_write(&xcu_group_rwsem); +} +EXPORT_SYMBOL(xcu_group_detach); + +void xcu_group_free(struct xcu_group *group) +{ + idr_destroy(&group->next_layer); + if (group != xcu_group_root) + kfree(group); +} +EXPORT_SYMBOL(xcu_group_free); + +static struct xcu_group *__xcu_group_find_nolock(struct xcu_group *group, + int id) +{ + return idr_find(&group->next_layer, id); +} + +struct xcu_group *xcu_group_find(struct xcu_group *group, int id) +{ + struct xcu_group *result; + + down_read(&xcu_group_rwsem); + result = __xcu_group_find_nolock(group, id); + up_read(&xcu_group_rwsem); + + return result; +} +EXPORT_SYMBOL(xcu_group_find); + +/* This function runs "run" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object + */ +int xcu_run(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->run) { + XSCHED_ERR("No function [run] called.\n"); + return -EINVAL; + } + + return params->group->opt->run(params); +} + +/* This function runs "wait" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object + */ +int xcu_wait(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->wait) { + XSCHED_ERR("No function [wait] called.\n"); + return -EINVAL; + } + + return params->group->opt->wait(params); +} + +/* This function runs "complete" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + */ +int xcu_complete(struct xcu_op_handler_params *params) +{ + return 0; +} + +/* This function runs "finish" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement deallocation + * and freeing memory for SQ and CQ buffers. + */ +int xcu_finish(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->finish) { + XSCHED_ERR("No function [finish] called.\n"); + return -EINVAL; + } + + return params->group->opt->finish(params); +} + +/* This function runs a "alloc" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement allocation + * and registering memory for SQ and CQ buffers. + */ +int xcu_alloc(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->alloc) { + XSCHED_ERR("No function [alloc] called.\n"); + return -EINVAL; + } + + return params->group->opt->alloc(params); +} + +/* This function runs a "logic_alloc" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement allocation + * and registering memory of logic CQ buffer. + */ +int xcu_logic_alloc(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->logic_alloc) { + XSCHED_ERR("No function [logic_alloc] called.\n"); + return -EINVAL; + } + + return params->group->opt->logic_alloc(params); +} + +/* This function runs a "logic_free" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement deallocation + * and unregistering memory of a logic CQ buffer. + */ +int xcu_logic_free(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->logic_free) { + XSCHED_ERR("No function [logic_free] called.\n"); + return -EINVAL; + } + + return params->group->opt->logic_free(params); +} + +/* This function runs a "sqe_op" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to set or get sqe info. + */ +int xcu_sqe_op(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->sqe_op) { + XSCHED_ERR("No function [sqe_op] called.\n"); + return -EINVAL; + } + + return params->group->opt->sqe_op(params); +} + +static struct xcu_group __xcu_group_root = { + .id = 0, + .type = XCU_TYPE_ROOT, + .next_layer = IDR_INIT(next_layer), +}; + +struct xcu_group *xcu_group_root = &__xcu_group_root; +EXPORT_SYMBOL(xcu_group_root); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 85fa78049bd0fbebee59db79363c962541a15c92..e65ae90946c2d3e27f724f4b965157b404b7b849 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -61,6 +61,10 @@ SUBSYS(pids) SUBSYS(rdma) #endif +#if IS_ENABLED(CONFIG_CGROUP_XCU) +SUBSYS(xcu) +#endif + #if IS_ENABLED(CONFIG_CGROUP_MISC) SUBSYS(misc) #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 36c592e43d65208f6d1b3099fa6805a24d5961de..119aabc72a2d403596ff695208e3fc02c9de350e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -74,6 +74,7 @@ struct landlock_ruleset_attr; enum landlock_rule_type; struct cachestat_range; struct cachestat; +struct vstream_args; #include #include @@ -948,6 +949,7 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); +asmlinkage long sys_vstream_manage(struct vstream_args __user *arg, int cmd); /* * Architecture-specific system calls */ diff --git a/include/linux/vstream.h b/include/linux/vstream.h new file mode 100644 index 0000000000000000000000000000000000000000..f0c290dc184c5b11ee8686921a231e7708da81cd --- /dev/null +++ b/include/linux/vstream.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VSTREAM_H +#define _LINUX_VSTREAM_H + +#include +#include + +#define MAX_VSTREAM_SIZE 2048 + +/* Vstream metadata describes each incoming kick + * that gets stored into a list of pending kicks + * inside a vstream to keep track of what is left + * to be processed by a driver. + */ +typedef struct vstream_metadata { + /* A value of SQ tail that has been passed with the + * kick that is described by this exact metadata object. + */ + uint32_t sq_tail; + uint32_t sqe_num; + uint32_t sq_id; + uint8_t sqe[XCU_SQE_SIZE_MAX]; + + /* Report buffer for fake read. */ + int8_t cqe[XCU_CQE_BUF_SIZE]; + uint32_t cqe_num; + int32_t timeout; + + /* A node for metadata list */ + struct list_head node; + + struct vstream_info *parent; + + /* Time of list insertion */ + ktime_t add_time; +} vstream_metadata_t; + +typedef struct vstream_info { + uint32_t user_stream_id; + uint32_t id; + uint32_t vcq_id; + uint32_t logic_vcq_id; + uint32_t dev_id; + uint32_t channel_id; + uint32_t fd; + uint32_t task_type; + int tgid; + int sqcq_type; + + void *drv_ctx; + + int inode_fd; + + /* Pointer to corresponding context. */ + struct xsched_context *ctx; + + /* List node in context's vstream list. */ + struct list_head ctx_node; + + /* Pointer to an CU object on which this + * vstream is currently being processed. + * NULL if vstream is not being processed. + */ + struct xsched_cu *xcu; + + /* List node in an CU list of vstreams that + * are currently being processed by this specific CU. + */ + struct list_head xcu_node; + + /* Private vstream data. */ + void *data; + + spinlock_t stream_lock; + + uint32_t kicks_count; + + /* List of metadata a.k.a. all recorded unprocesed + * kicks for this exact vstream. + */ + struct list_head metadata_list; +} vstream_info_t; + +typedef int vstream_manage_t(struct vstream_args *arg); + +#endif /* _LINUX_VSTREAM_H */ diff --git a/include/linux/xcu_group.h b/include/linux/xcu_group.h new file mode 100644 index 0000000000000000000000000000000000000000..e73c64f6c5209aaef7fc0e891a6400062132c440 --- /dev/null +++ b/include/linux/xcu_group.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __XSCHED_XCU_GROUP_H__ +#define __XSCHED_XCU_GROUP_H__ + +#include +#include + +#ifndef CONFIG_XSCHED_NR_CUS +#define CONFIG_XSCHED_NR_CUS 1 +#endif /* !CONFIG_XSCHED_NR_CUS */ +#define XSCHED_NR_CUS CONFIG_XSCHED_NR_CUS + +extern struct xcu_group *xcu_group_root; + +enum xcu_type { + XCU_TYPE_ROOT, + XCU_TYPE_XPU, +}; + +enum xcu_sqe_op_type { + SQE_SET_NOTIFY, + SQE_IS_NOTIFY, +}; + +/** + * @group: value for this entry. + * @hash_node: hash node list. + * @dev_id: device id to bind with ctx. + */ +struct ctx_devid_revmap_data { + unsigned int dev_id; + struct xcu_group *group; + struct hlist_node hash_node; +}; + +struct xcu_op_handler_params { + int fd; + struct xcu_group *group; + void *payload; + union { + struct { + void *param_1; + void *param_2; + void *param_3; + void *param_4; + void *param_5; + void *param_6; + void *param_7; + void *param_8; + }; + }; +}; + +typedef int (*xcu_op_handler_fn_t)(struct xcu_op_handler_params *params); + +struct xcu_operation { + xcu_op_handler_fn_t run; + xcu_op_handler_fn_t finish; + xcu_op_handler_fn_t wait; + xcu_op_handler_fn_t complete; + xcu_op_handler_fn_t alloc; + xcu_op_handler_fn_t logic_alloc; + xcu_op_handler_fn_t logic_free; + xcu_op_handler_fn_t sqe_op; +}; + +struct xcu_group { + /* sq id. */ + uint32_t id; + + /* Type of XCU group. */ + enum xcu_type type; + + /* IDR for the next layer of XCU group tree. */ + struct idr next_layer; + + /* Pointer to the previous XCU group in the XCU group tree. */ + struct xcu_group *previous_layer; + + /* Pointer to operation fn pointers object describing + * this XCU group's callbacks. + */ + struct xcu_operation *opt; + + /* Pointer to the XCU related to this XCU group. */ + struct xsched_cu *xcu; + + /* Mask of XCU ids associated with this XCU group + * and this group's children's XCUs. + */ + DECLARE_BITMAP(xcu_mask, XSCHED_NR_CUS); +}; + +#ifdef CONFIG_XCU_SCHEDULER +int xcu_group_attach(struct xcu_group *new_group, + struct xcu_group *previous_group); +void xcu_group_detach(struct xcu_group *group); +struct xcu_group *xcu_group_find(struct xcu_group *group, int id); +struct xcu_group *xcu_group_init(int id); +void xcu_group_free(struct xcu_group *group); + +extern int xcu_run(struct xcu_op_handler_params *params); +extern int xcu_wait(struct xcu_op_handler_params *params); +extern int xcu_complete(struct xcu_op_handler_params *params); +extern int xcu_finish(struct xcu_op_handler_params *params); +extern int xcu_alloc(struct xcu_op_handler_params *params); +extern int xcu_logic_alloc(struct xcu_op_handler_params *params); +extern int xcu_logic_free(struct xcu_op_handler_params *params); +extern int xcu_sqe_op(struct xcu_op_handler_params *params); +#endif /* !CONFIG_XCU_SCHEDULER */ + +#endif /* __XSCHED_XCU_GROUP_H__ */ diff --git a/include/linux/xsched.h b/include/linux/xsched.h new file mode 100644 index 0000000000000000000000000000000000000000..3a6007f33387105d335f5917387891545f6a750d --- /dev/null +++ b/include/linux/xsched.h @@ -0,0 +1,625 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_XSCHED_H__ +#define __LINUX_XSCHED_H__ + +#include +#include +#include +#include +#include + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#ifdef CONFIG_XCU_VSTREAM +#define MAX_VSTREAM_NUM (512) +#endif + +#define XSCHED_ERR_PREFIX "XSched [ERROR]: " +#define XSCHED_ERR(fmt, ...) \ + pr_err(pr_fmt(XSCHED_ERR_PREFIX fmt), ##__VA_ARGS__) + +#define XSCHED_WARN_PREFIX "XSched [WARNING]: " +#define XSCHED_WARN(fmt, ...) \ + pr_warn(pr_fmt(XSCHED_WARN_PREFIX fmt), ##__VA_ARGS__) + +/* + * Debug specific prints for XSched + */ + +#define XSCHED_DEBUG_PREFIX "XSched [DEBUG]: " +#define XSCHED_DEBUG(fmt, ...) \ + pr_debug(pr_fmt(XSCHED_DEBUG_PREFIX fmt), ##__VA_ARGS__) + +#define XSCHED_CALL_STUB() \ + XSCHED_DEBUG(" -----* %s @ %s called *-----\n", __func__, __FILE__) + +#define XSCHED_EXIT_STUB() \ + XSCHED_DEBUG(" -----* %s @ %s exited *-----\n", __func__, __FILE__) + +#define XCU_HASH_ORDER 6 + +#define RUNTIME_INF ((u64)~0ULL) +#define XSCHED_TIME_INF RUNTIME_INF +#define XSCHED_CFS_ENTITY_WEIGHT_DFLT 1 +#define XSCHED_CFS_QUOTA_PERIOD_MS (100 * NSEC_PER_MSEC) +#define XSCHED_CFG_SHARE_DFLT 1024 + +#define __GET_VS_TASK_TYPE(t) ((t)&0xFF) +#define __GET_VS_TASK_PRIO_RT(t) (((t) >> 8) & 0xFF) +#define GET_VS_TASK_TYPE(vs_ptr) __GET_VS_TASK_TYPE((vs_ptr)->task_type) +#define GET_VS_TASK_PRIO_RT(vs_ptr) __GET_VS_TASK_PRIO_RT((vs_ptr)->task_type) + +/* + * A default kick slice for RT class XSEs. + */ +#define XSCHED_RT_KICK_SLICE 20 +/* + * A default kick slice for CFS class XSEs. + */ +#define XSCHED_CFS_KICK_SLICE 10 + +extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; + +enum xcu_sched_type { + XSCHED_TYPE_RT, + XSCHED_TYPE_DFLT = XSCHED_TYPE_RT, + XSCHED_TYPE_CFS, + XSCHED_TYPE_NUM, +}; + +enum xse_prio { + XSE_PRIO_LOW, + XSE_PRIO_HIGH, + NR_XSE_PRIO, +}; + +enum xsched_rq_state { + XRQ_STATE_INACTIVE = 0x00, + XRQ_STATE_IDLE = 0x01, + XRQ_STATE_BUSY = 0x02, + XRQ_STATE_SUBMIT = 0x04, + XRQ_STATE_WAIT_RUNNING = 0x08, +}; + +enum xse_state { + XSE_PREPARE, + XSE_READY, + XSE_RUNNING, + XSE_BLOCK, + XSE_DEAD, +}; + +enum xse_flag { + XSE_TIF_NONE, + XSE_TIF_PREEMPT, + XSE_TIF_BALANCE, /* Unused so far */ +}; + +extern const struct xsched_class rt_xsched_class; +extern const struct xsched_class fair_xsched_class; + +#define xsched_first_class (&rt_xsched_class) +#define for_each_xsched_class(class) \ + for (class = xsched_first_class; class; class = class->next) +#define for_each_xse_prio(prio) \ + for (prio = XSE_PRIO_LOW; prio < NR_XSE_PRIO; prio++) +#define for_each_vstream_in_ctx(vs, ctx) \ + list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node) + +/* Manages xsched CFS-like class rbtree based runqueue. */ +struct xsched_rq_cfs { + unsigned int nr_running; + unsigned int load; + u64 min_xruntime; + struct rb_root_cached ctx_timeline; +}; + +/* Manages xsched RT-like class linked list based runqueue. + * + * Now RT-like class runqueue structs is identical + * but will most likely grow different in the + * future as the Xsched evolves. + */ +struct xsched_rq_rt { + struct list_head rq[NR_XSE_PRIO]; + unsigned int nr_running; + int prio_nr_running[NR_XSE_PRIO]; + atomic_t prio_nr_kicks[NR_XSE_PRIO]; + DECLARE_BITMAP(curr_prios, NR_XSE_PRIO); +}; + +/* Base XSched runqueue object structure that contains both mutual and + * individual parameters for different scheduling classes. + */ +struct xsched_rq { + struct xsched_entity *curr_xse; + const struct xsched_class *class; + + int state; + + /* RT class run queue.*/ + struct xsched_rq_rt rt; + /* CFS class run queue.*/ + struct xsched_rq_cfs cfs; +}; + +enum xcu_state { + XCU_INACTIVE, + XCU_IDLE, + XCU_BUSY, + XCU_SUBMIT, +}; + +enum xsched_cu_status { + /* Worker not initialized. */ + XSCHED_XCU_NONE, + + /* Worker is sleeping in idle state. */ + XSCHED_XCU_WAIT_IDLE, + + /* Worker is sleeping in running state. */ + XSCHED_XCU_WAIT_RUNNING, + + /* Worker is active but not processing anything. */ + XSCHED_XCU_ACTIVE, + + NR_XSCHED_XCU_STATUS, +}; + +/* This is the abstraction object of the xcu computing unit. */ +struct xsched_cu { + uint32_t id; + uint32_t state; + + /* RT class kick counter. */ + atomic_t pending_kicks_rt; + /* CFS class kick counter. */ + atomic_t pending_kicks_cfs; + + struct task_struct *worker; + + /* Storage list for contexts associated with this xcu */ + uint32_t nr_ctx; + struct list_head ctx_list; + struct mutex ctx_list_lock; + +#ifdef CONFIG_XCU_VSTREAM + vstream_info_t *vs_array[MAX_VSTREAM_NUM]; + struct mutex vs_array_lock; +#endif + + struct xsched_rq xrq; + struct list_head vsm_list; + + struct xcu_group *group; + + struct mutex xcu_lock; + + wait_queue_head_t wq_xcu_idle; + wait_queue_head_t wq_xcu_running; +}; + +extern int num_active_xcu; +#define for_each_active_xcu(xcu, id) \ + for ((id) = 0, xcu = xsched_cu_mgr[(id)]; \ + (id) < num_active_xcu && (xcu = xsched_cu_mgr[(id)]); (id)++) + +struct xsched_entity_rt { + struct list_head list_node; + enum xse_state state; + enum xse_flag flag; + enum xse_prio prio; + + ktime_t timeslice; + s64 kick_slice; +}; + +struct xsched_entity_cfs { + struct rb_node run_node; + + /* Rq on which this entity is (to be) queued. */ + struct xsched_rq_cfs *cfs_rq; + + /* Value of "virtual" runtime to sort entities in rbtree */ + u64 xruntime; + u32 weight; + + /* Execution time of scheduling entity */ + u64 exec_start; + u64 sum_exec_runtime; +}; + +struct xsched_entity { + uint32_t task_type; + + bool on_rq; + + pid_t owner_pid; + pid_t tgid; + + /* Amount of pending kicks currently sitting on this context. */ + atomic_t kicks_pending_ctx_cnt; + + /* Amount of submitted kicks context, used for resched decision. */ + atomic_t submitted_one_kick; + + size_t total_scheduled; + size_t total_submitted; + + /* File descriptor coming from an associated context + * used for identifying a given xsched entity in + * info and error prints. + */ + uint32_t fd; + + /* Xsched class for this xse. */ + const struct xsched_class *class; + + /* RT class entity. */ + struct xsched_entity_rt rt; + /* CFS class entity. */ + struct xsched_entity_cfs cfs; + + /* Pointer to context object. */ + struct xsched_context *ctx; + + /* Xsched entity execution statistics */ + u64 last_exec_runtime; + + /* Pointer to an XCU object that represents an XCU + * on which this xse is to be processed or is being + * processed currently. + */ + struct xsched_cu *xcu; + + /* Link to list of xsched_group items */ + struct list_head group_node; + struct xsched_group *parent_grp; + bool is_group; + + /* General purpose xse lock. */ + spinlock_t xse_lock; +}; + +static inline bool xse_is_rt(const struct xsched_entity *xse) +{ + return xse && xse->class == &rt_xsched_class; +} + +static inline bool xse_is_cfs(const struct xsched_entity *xse) +{ + return xse && xse->class == &fair_xsched_class; +} + +/* xsched_group's xcu related stuff */ +struct xsched_group_xcu_priv { + /* Owner of this group */ + struct xsched_group *self; + + /* xcu id */ + int xcu_id; + + /* Link to scheduler */ + struct xsched_entity xse; /* xse of this group on runqueue */ + struct xsched_rq_cfs *cfs_rq; /* cfs runqueue "owned" by this group */ + struct xsched_rq_rt *rt_rq; /* rt runqueue "owned" by this group */ + + /* Statistics */ + int nr_throttled; + u64 throttled_time; + u64 overrun_time; +}; + +/* Xsched scheduling control group */ +struct xsched_group { + /* Cgroups controller structure */ + struct cgroup_subsys_state css; + + /* Control group settings: */ + int sched_type; + int prio; + + /* Bandwidth setting: shares value set by user */ + u64 shares_cfg; + u64 shares_cfg_red; + u32 weight; + u64 children_shares_sum; + + /* Bandwidth setting: maximal quota in period */ + s64 quota; + /* record the runtime of operators during the period */ + s64 runtime; + s64 period; + struct hrtimer quota_timeout; + struct work_struct refill_work; + u64 qoslevel; + + struct xsched_group_xcu_priv perxcu_priv[XSCHED_NR_CUS]; + + /* Groups hierarchcy */ + struct xsched_group *parent; + struct list_head children_groups; + struct list_head group_node; + + spinlock_t lock; + + /* for XSE to move in perxcu */ + struct list_head members; +}; + +#define XSCHED_RQ_OF(xse) \ + (container_of(((xse)->cfs.cfs_rq), struct xsched_rq, cfs)) + +#define XSCHED_RQ_OF_CFS_XSE(cfs_xse) \ + (container_of(((cfs_xse)->cfs_rq), struct xsched_rq, cfs)) + +#define XSCHED_SE_OF(cfs_xse) \ + (container_of((cfs_xse), struct xsched_entity, cfs)) + +#define xcg_parent_grp_xcu(xcg) \ + ((xcg)->self->parent->perxcu_priv[(xcg)->xcu_id]) + +#define xse_parent_grp_xcu(xse_cfs) \ + (&((XSCHED_SE_OF(xse_cfs) \ + ->parent_grp->perxcu_priv[(XSCHED_SE_OF(xse_cfs))->xcu->id]))) + +static inline struct xsched_group_xcu_priv * +xse_this_grp_xcu(struct xsched_entity_cfs *xse_cfs) +{ + struct xsched_entity *xse; + + xse = xse_cfs ? container_of(xse_cfs, struct xsched_entity, cfs) : NULL; + return xse ? container_of(xse, struct xsched_group_xcu_priv, xse) : NULL; +} + +static inline struct xsched_group * +xse_this_grp(struct xsched_entity_cfs *xse_cfs) +{ + return xse_cfs ? xse_this_grp_xcu(xse_cfs)->self : NULL; +} + +/* Returns a pointer to an atomic_t variable representing a counter + * of currently pending vstream kicks on a given XCU and for a + * given xsched class. + */ +static inline atomic_t * +xsched_get_pending_kicks_class(const struct xsched_class *class, + struct xsched_cu *xcu) +{ + /* Right now for testing purposes we have only XCU running streams. */ + if (!xcu) { + XSCHED_ERR("Try to get pending kicks with xcu=NULL.\n"); + return NULL; + } + + if (!class) { + XSCHED_ERR("Try to get pending kicks with class=NULL.\n"); + return NULL; + } + + if (class == &rt_xsched_class) + return &xcu->pending_kicks_rt; + if (class == &fair_xsched_class) + return &xcu->pending_kicks_cfs; + + XSCHED_ERR("Xsched entity has an invalid class @ %s\n", __func__); + return NULL; +} + +/* Returns a pointer to an atomic_t variable representing a counter of + * currently pending vstream kicks for an XCU on which a given xsched + * entity is enqueued on and for a xsched class that assigned to a + * given xsched entity. + */ +static inline atomic_t * +xsched_get_pending_kicks_xse(const struct xsched_entity *xse) +{ + if (!xse) { + XSCHED_ERR("Try to get pending kicks with xse=NULL\n"); + return NULL; + } + + if (!xse->xcu) { + XSCHED_ERR("Try to get pending kicks with xse->xcu=NULL\n"); + return NULL; + } + + return xsched_get_pending_kicks_class(xse->class, xse->xcu); +} + +/* Increments pending kicks counter for an XCU that the given + * xsched entity is attached to and for xsched entity's xsched + * class. + */ +static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse) +{ + atomic_t *kicks_class = NULL; + + kicks_class = xsched_get_pending_kicks_xse(xse); + if (!kicks_class) + return -EINVAL; + + /* Incrementing pending kicks for XSE's sched class */ + atomic_inc(kicks_class); + + /* Icrement pending kicks for current XSE. */ + atomic_inc(&xse->kicks_pending_ctx_cnt); + + /* Incrementing prio based pending kicks counter for RT class */ + if (xse_is_rt(xse)) + atomic_inc(&xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio]); + + return 0; +} + +/* Decrements pending kicks counter for an XCU that the given + * xsched entity is attached to and for XSched entity's sched + * class. + */ +static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse) +{ + atomic_t *kicks_class = NULL; + atomic_t *kicks_prio_rt = NULL; + + kicks_class = xsched_get_pending_kicks_xse(xse); + if (!kicks_class) + return -EINVAL; + + if (!atomic_read(kicks_class)) { + XSCHED_ERR("Try to decrement pending kicks beyond 0!\n"); + return -EINVAL; + } + + /* Decrementing pending kicks for XSE's sched class. */ + atomic_dec(kicks_class); + + /* Decrementing pending kicks for current XSE. */ + atomic_dec(&xse->kicks_pending_ctx_cnt); + + /* Decrementing prio based pending kicks counter for RT class. */ + if (xse_is_rt(xse)) { + kicks_prio_rt = &xse->xcu->xrq.rt.prio_nr_kicks[xse->rt.prio]; + if (!atomic_read(kicks_prio_rt)) { + XSCHED_ERR( + "Try to decrement prio pending kicks beyond 0!\n"); + return -EINVAL; + } + atomic_dec(kicks_prio_rt); + } + + return 0; +} + +/* Checks if there are pending kicks left on a given XCU for all + * xsched classes. + */ +static inline bool xsched_check_pending_kicks_xcu(struct xsched_cu *xcu) +{ + atomic_t *kicks_rt; + atomic_t *kicks_cfs; + + kicks_rt = xsched_get_pending_kicks_class(&rt_xsched_class, xcu); + kicks_cfs = xsched_get_pending_kicks_class(&fair_xsched_class, xcu); + if (!kicks_rt || !kicks_cfs) + return false; + + return (!!atomic_read(kicks_rt) || !!atomic_read(kicks_cfs)); +} + +static inline int xse_integrity_check(const struct xsched_entity *xse) +{ + if (!xse) { + XSCHED_ERR("xse is null @ %s\n", __func__); + return -EINVAL; + } + + if (!xse->class) { + XSCHED_ERR("xse->class is null @ %s\n", __func__); + return -EINVAL; + } + + return 0; +} + +struct xsched_context { + uint32_t fd; + uint32_t dev_id; + pid_t tgid; + + struct list_head vstream_list; + struct list_head ctx_node; + + struct xsched_entity xse; + + spinlock_t ctx_lock; + struct mutex ctx_mutex; + struct kref kref; +}; + +extern struct list_head xsched_ctx_list; +extern struct mutex xsched_ctx_list_mutex; + +/* Returns a pointer to xsched_context object corresponding to a given + * tgid and xcu. + */ +static inline struct xsched_context * +ctx_find_by_tgid_and_xcu(pid_t tgid, struct xsched_cu *xcu) +{ + struct xsched_context *ctx; + struct xsched_context *ret = NULL; + + list_for_each_entry(ctx, &xcu->ctx_list, ctx_node) { + if (ctx->tgid == tgid) { + ret = ctx; + break; + } + } + return ret; +} + +/* Xsched class. */ +struct xsched_class { + const struct xsched_class *next; + + /* Removes a given XSE from it's runqueue. */ + void (*dequeue_ctx)(struct xsched_entity *xse); + + /* Places a given XSE on a runqueue on a given XCU. */ + void (*enqueue_ctx)(struct xsched_entity *xse, struct xsched_cu *xcu); + + /* Returns a next XSE to be submitted on a given XCU. */ + struct xsched_entity *(*pick_next_ctx)(struct xsched_cu *xcu); + + /* Put a XSE back into rq during preemption. */ + void (*put_prev_ctx)(struct xsched_entity *xse); + + /* Prepares a given XSE for submission on a given XCU. */ + int (*submit_prepare_ctx)(struct xsched_entity *xse, + struct xsched_cu *xcu); + + /* Check context preemption. */ + bool (*check_preempt)(struct xsched_entity *xse); + + /* Select jobs from XSE to submit on XCU */ + size_t (*select_work)(struct xsched_cu *xcu, struct xsched_entity *xse); +}; + +static inline void xsched_init_vsm(struct vstream_metadata *vsm, + struct vstream_info *vs, vstream_args_t *arg) +{ + vsm->sq_id = arg->sq_id; + vsm->sqe_num = arg->vk_args.sqe_num; + vsm->timeout = arg->vk_args.timeout; + memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX); + vsm->parent = vs; + INIT_LIST_HEAD(&vsm->node); +} + +int xsched_xcu_register(struct xcu_group *group, int phys_id); +void xsched_task_free(struct kref *kref); +int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs); +int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx); +int vstream_bind_to_xcu(vstream_info_t *vstream_info); +struct xsched_cu *xcu_find(uint32_t *type, + uint32_t dev_id, uint32_t channel_id); + +/* Vstream metadata proccesing functions.*/ +int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); +struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); +/* Xsched group manage functions */ +int xsched_group_inherit(struct task_struct *tsk, struct xsched_entity *xse); +void xcu_cg_init_common(struct xsched_group *xcg); +void xcu_grp_shares_update(struct xsched_group *xg); +void xsched_group_xse_detach(struct xsched_entity *xse); + +void xsched_quota_init(void); +void xsched_quota_timeout_init(struct xsched_group *xg); +void xsched_quota_timeout_update(struct xsched_group *xg); +void xsched_quota_account(struct xsched_group *xg, s64 exec_time); +bool xsched_quota_exceed(struct xsched_group *xg); +void xsched_quota_refill(struct work_struct *work); +void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); +void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); +#endif /* __LINUX_XSCHED_H__ */ diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index bf2b30463784e026092d3dcde784237f4b5a49f0..9c7ef260593abad0e0704e58d3067378c74cfe34 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -830,8 +830,9 @@ __SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) __SYSCALL(__NR_kabi_reserved454, sys_ni_syscall) #define __NR_kabi_reserved455 455 __SYSCALL(__NR_kabi_reserved455, sys_ni_syscall) -#define __NR_kabi_reserved456 456 -__SYSCALL(__NR_kabi_reserved456, sys_ni_syscall) +#define __IGNORE_kabi_reserved456 +#define __NR_vstream_manage 456 +__SYSCALL(__NR_vstream_manage, sys_vstream_manage) #define __NR_kabi_reserved457 457 __SYSCALL(__NR_kabi_reserved457, sys_ni_syscall) #define __NR_kabi_reserved458 458 diff --git a/include/uapi/linux/xcu_vstream.h b/include/uapi/linux/xcu_vstream.h new file mode 100644 index 0000000000000000000000000000000000000000..f72d4720e9934a3f3f2c629a44ecba8579bd7b3d --- /dev/null +++ b/include/uapi/linux/xcu_vstream.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_XCU_VSTREAM_H +#define _UAPI_XCU_VSTREAM_H + +#include + +#define PAYLOAD_SIZE_MAX 512 +#define XCU_SQE_SIZE_MAX 64 +#define XCU_CQE_SIZE_MAX 32 +#define XCU_CQE_REPORT_NUM 4 +#define XCU_CQE_BUF_SIZE (XCU_CQE_REPORT_NUM * XCU_CQE_SIZE_MAX) + +/* + * VSTREAM_ALLOC: alloc a vstream, buffer for tasks + * VSTREAM_FREE: free a vstream + * VSTREAM_KICK: there are tasks to be executed in the vstream + */ +typedef enum VSTREAM_COMMAND { + VSTREAM_ALLOC = 0, + VSTREAM_FREE, + VSTREAM_KICK, + MAX_COMMAND +} vstream_command_t; + +typedef struct vstream_alloc_args { + __s32 type; + __u32 user_stream_id; +} vstream_alloc_args_t; + +typedef struct vstream_free_args { } vstream_free_args_t; + +typedef struct vstream_kick_args { + __u32 sqe_num; + __s32 timeout; + __s8 sqe[XCU_SQE_SIZE_MAX]; +} vstream_kick_args_t; + +typedef struct vstream_args { + __u32 channel_id; + __u32 fd; + __u32 dev_id; + __u32 task_type; + __u32 sq_id; + __u32 cq_id; + + /* Device related structures. */ + union { + vstream_alloc_args_t va_args; + vstream_free_args_t vf_args; + vstream_kick_args_t vk_args; + }; + + __u32 payload_size; + __s8 payload[PAYLOAD_SIZE_MAX]; +} vstream_args_t; + +#endif /* _UAPI_LINUX_SCHED_H */ diff --git a/init/Kconfig b/init/Kconfig index 5af21834fbff4db476b7730a1f428b3f0cb7ea9d..52290ec7c8dbf5f950af65a3fe45b86f901bf05d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -478,6 +478,7 @@ source "kernel/irq/Kconfig" source "kernel/time/Kconfig" source "kernel/bpf/Kconfig" source "kernel/Kconfig.preempt" +source "kernel/xsched/Kconfig" menu "CPU/Task time and stats accounting" diff --git a/kernel/Makefile b/kernel/Makefile index 1fe46db40806212be0f6b554d15992c73a87ed93..0baddecc349f232d7e4ee39b268b6a30e31fe949 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -50,6 +50,7 @@ obj-y += rcu/ obj-y += livepatch/ obj-y += dma/ obj-y += entry/ +obj-y += xsched/ obj-$(CONFIG_MODULES) += module/ obj-$(CONFIG_KCMP) += kcmp.o diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index c26a9b3a35768df637484f1549f5ca3165255af7..b632590eae0fc083b9c9f5908080016a33589811 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6190,7 +6190,7 @@ int __init cgroup_init(void) struct cgroup_subsys *ss; int ssid; - BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); + BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 17); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); diff --git a/kernel/xsched/Kconfig b/kernel/xsched/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..776c3dfbdaa43627cbd6fa0ee5261ddb621b2471 --- /dev/null +++ b/kernel/xsched/Kconfig @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0 + +config XCU_SCHEDULER + bool "Enable XSched functionality" + select CGROUP_XCU + default n + help + This option enables the XSched scheduler, a custom scheduling mechanism + designed for heterogeneous compute units (e.g., XPUs). It provides: + - Priority-based task scheduling with latency-sensitive optimizations. + - Integration with cgroups (via CGROUP_XCU) for resource isolation. + + Enable this only if your system requires advanced scheduling for XPU workloads. + If unsure, say N. + +config XCU_VSTREAM + bool "Enable vstream SQ/CQ buffers maintaining for XPU" + default n + depends on XCU_SCHEDULER + help + This option enables virtual stream (vstream) support for XPUs, managing + submission queues (SQ) and completion queues (CQ) in kernel space. Key features: + - Zero-copy buffer management between user and kernel space. + - Batch processing of XPU commands to reduce MMIO overhead. + + Requires XCU_SCHEDULER to be enabled. May increase kernel memory usage. + Recommended for high-throughput XPU workloads. If unsure, say N. + +config XSCHED_NR_CUS + int "Number of CUs (a.k.a. XCUs) available to XSched mechanism" + default 8 + depends on XCU_SCHEDULER + help + This option defines the maximum number of Compute Units (CUs) that can be + managed by the XSched scheduler, consider changing this value proportionally + to the number of available XCU cores. + +config CGROUP_XCU + bool "XCU bandwidth control and group scheduling for xsched_cfs" + default n + depends on XCU_SCHEDULER + help + This option enables the extended Compute Unit (XCU) resource controller for + CFS task groups, providing hierarchical scheduling and fine-grained bandwidth + allocation capabilities. Key features include: + - Proportional XCU time distribution across cgroups based on shares/quotas + - Nested group scheduling with latency isolation + - Integration with xsched_cfs for fair CPU resource management + + Required for systems requiring fine-grained resource control in cgroups. + If unsure, say N. diff --git a/kernel/xsched/Makefile b/kernel/xsched/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8ab32b086b3de29458b8c0d26a70966032343e01 --- /dev/null +++ b/kernel/xsched/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y += vstream.o +obj-$(CONFIG_XCU_SCHEDULER) += core.o rt.o cfs.o cfs_quota.o +obj-$(CONFIG_CGROUP_XCU) += cgroup.o diff --git a/kernel/xsched/cfs.c b/kernel/xsched/cfs.c new file mode 100644 index 0000000000000000000000000000000000000000..94189d8088ac5794c60f1f2088060233eafde8bf --- /dev/null +++ b/kernel/xsched/cfs.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Completely Fair Scheduling (CFS) Class for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include + +#define CFS_INNER_RQ_EMPTY(cfs_xse) \ + ((cfs_xse)->xruntime == XSCHED_TIME_INF) + +extern struct xsched_group *root_xcg; + +void xs_rq_add(struct xsched_entity_cfs *xse) +{ + struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; + struct rb_node **link = &cfs_rq->ctx_timeline.rb_root.rb_node; + struct rb_node *parent = NULL; + struct xsched_entity_cfs *entry; + bool leftmost = true; + + while (*link) { + parent = *link; + entry = rb_entry(parent, struct xsched_entity_cfs, run_node); + if (xse->xruntime <= entry->xruntime) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(&xse->run_node, parent, link); + rb_insert_color_cached(&xse->run_node, &cfs_rq->ctx_timeline, leftmost); +} + +void xs_rq_remove(struct xsched_entity_cfs *xse) +{ + struct xsched_rq_cfs *cfs_rq = xse->cfs_rq; + + rb_erase_cached(&xse->run_node, &cfs_rq->ctx_timeline); +} + +/** + * xs_cfs_rq_update() - Update entity's runqueue position with new xruntime + */ +static void xs_cfs_rq_update(struct xsched_entity_cfs *xse_cfs, u64 new_xrt) +{ + xs_rq_remove(xse_cfs); + xse_cfs->xruntime = new_xrt; + xs_rq_add(xse_cfs); +} + +static inline struct xsched_entity_cfs * +xs_pick_first(struct xsched_rq_cfs *cfs_rq) +{ + struct xsched_entity_cfs *xse_cfs; + struct rb_node *left = rb_first_cached(&cfs_rq->ctx_timeline); + + if (!left) + return NULL; + + xse_cfs = rb_entry(left, struct xsched_entity_cfs, run_node); + return xse_cfs; +} + +/** + * xs_update() - Account xruntime and runtime metrics. + * @xse_cfs: Point to CFS scheduling entity. + * @delta: Execution time in last period + */ +static void xs_update(struct xsched_entity_cfs *xse_cfs, u64 delta) +{ + struct xsched_group_xcu_priv *xg = xse_parent_grp_xcu(xse_cfs); + + for (; xg; xse_cfs = &xg->xse.cfs, xg = &xcg_parent_grp_xcu(xg)) { + u64 new_xrt = xse_cfs->xruntime + delta * xse_cfs->weight; + + xs_cfs_rq_update(xse_cfs, new_xrt); + xse_cfs->sum_exec_runtime += delta; + + if (xg->self->parent == NULL) + break; + } +} + +/** + * xg_update() - Update container group's xruntime + * @gxcu: Descendant xsched group's private xcu control structure + * + * No locks required to access xsched_group_xcu_priv members, + * because only one worker thread works for one XCU. + */ +static void xg_update(struct xsched_group_xcu_priv *xg, int task_delta) +{ + u64 new_xrt; + struct xsched_entity_cfs *entry; + + for (; xg; xg = &xcg_parent_grp_xcu(xg)) { + xg->cfs_rq->nr_running += task_delta; + entry = xs_pick_first(xg->cfs_rq); + new_xrt = entry ? entry->xruntime * xg->xse.cfs.weight : XSCHED_TIME_INF; + + xg->cfs_rq->min_xruntime = new_xrt; + xg->xse.cfs.xruntime = new_xrt; + + if (!xg->xse.on_rq) + break; + if (!xg->self->parent) + break; + + xs_cfs_rq_update(&xg->xse.cfs, new_xrt); + } +} + +/* + * Xsched Fair class methods + * For rq manipulation we rely on root runqueue lock already acquired in core. + * Access xsched_group_xcu_priv requires no locks because one thread per XCU. + */ +static void dequeue_ctx_fair(struct xsched_entity *xse) +{ + int task_delta; + struct xsched_cu *xcu = xse->xcu; + struct xsched_entity_cfs *first; + struct xsched_entity_cfs *xse_cfs = &xse->cfs; + + task_delta = + (xse->is_group) ? -(xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running) : -1; + + xs_rq_remove(xse_cfs); + xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); + + first = xs_pick_first(&xcu->xrq.cfs); + xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; +} + +/** + * enqueue_ctx_fair() - Add context to the runqueue + * @xse: xsched entity of context + * @xcu: executor + * + * In contrary to enqueue_task it is called once on context init. + * Although groups reside in tree, their nodes not counted in nr_running. + * The xruntime of a group xsched entitry represented by min xruntime inside. + */ +static void enqueue_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + int task_delta; + struct xsched_entity_cfs *first; + struct xsched_rq_cfs *rq; + struct xsched_entity_cfs *xse_cfs = &xse->cfs; + + xse_cfs->weight = XSCHED_CFS_ENTITY_WEIGHT_DFLT; + rq = xse_cfs->cfs_rq = xse_parent_grp_xcu(xse_cfs)->cfs_rq; + task_delta = + (xse->is_group) ? xse_this_grp_xcu(xse_cfs)->cfs_rq->nr_running : 1; + + /* If no XSE or only empty groups */ + if (xs_pick_first(rq) == NULL || rq->min_xruntime == XSCHED_TIME_INF) + rq->min_xruntime = xse_cfs->xruntime; + else + xse_cfs->xruntime = max(xse_cfs->xruntime, rq->min_xruntime); + + xs_rq_add(xse_cfs); + xg_update(xse_parent_grp_xcu(xse_cfs), task_delta); + + first = xs_pick_first(&xcu->xrq.cfs); + xcu->xrq.cfs.min_xruntime = (first) ? first->xruntime : XSCHED_TIME_INF; +} + +static struct xsched_entity *pick_next_ctx_fair(struct xsched_cu *xcu) +{ + struct xsched_entity_cfs *xse; + struct xsched_rq_cfs *rq = &xcu->xrq.cfs; + + xse = xs_pick_first(rq); + if (!xse) + return NULL; + + for (; XSCHED_SE_OF(xse)->is_group; xse = xs_pick_first(rq)) { + if (!xse || CFS_INNER_RQ_EMPTY(xse)) + return NULL; + rq = xse_this_grp_xcu(xse)->cfs_rq; + } + + return container_of(xse, struct xsched_entity, cfs); +} + +static inline bool +xs_should_preempt_fair(struct xsched_entity *xse) +{ + return (atomic_read(&xse->submitted_one_kick) >= XSCHED_CFS_KICK_SLICE); +} + +static void put_prev_ctx_fair(struct xsched_entity *xse) +{ + struct xsched_entity_cfs *prev = &xse->cfs; + + xsched_quota_account(xse->parent_grp, (s64)xse->last_exec_runtime); + xs_update(prev, xse->last_exec_runtime); +} + +int submit_prepare_ctx_fair(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + return 0; +} + +const struct xsched_class fair_xsched_class = { + .next = NULL, + .dequeue_ctx = dequeue_ctx_fair, + .enqueue_ctx = enqueue_ctx_fair, + .pick_next_ctx = pick_next_ctx_fair, + .put_prev_ctx = put_prev_ctx_fair, + .submit_prepare_ctx = submit_prepare_ctx_fair, + .check_preempt = xs_should_preempt_fair, +}; diff --git a/kernel/xsched/cfs_quota.c b/kernel/xsched/cfs_quota.c new file mode 100644 index 0000000000000000000000000000000000000000..5bded83b3561a365e23286dfa88e561126203c69 --- /dev/null +++ b/kernel/xsched/cfs_quota.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Bandwidth provisioning for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include + +static struct workqueue_struct *quota_workqueue; + +void xsched_quota_refill(struct work_struct *work) +{ + uint32_t id; + struct xsched_cu *xcu; + struct xsched_group *xg; + + xg = container_of(work, struct xsched_group, refill_work); + + spin_lock(&xg->lock); + xg->runtime = max((xg->runtime - xg->quota), (s64)0); + hrtimer_start(&xg->quota_timeout, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); + spin_unlock(&xg->lock); + + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + if (!READ_ONCE(xg->perxcu_priv[id].xse.on_rq)) { + enqueue_ctx(&xg->perxcu_priv[id].xse, xcu); + wake_up_interruptible(&xcu->wq_xcu_idle); + } + mutex_unlock(&xcu->xcu_lock); + } +} + +static enum hrtimer_restart quota_timer_cb(struct hrtimer *hrtimer) +{ + struct xsched_group *xg; + + xg = container_of(hrtimer, struct xsched_group, quota_timeout); + queue_work(quota_workqueue, &xg->refill_work); + + return HRTIMER_NORESTART; +} + +void xsched_quota_account(struct xsched_group *xg, s64 exec_time) +{ + spin_lock(&xg->lock); + xg->runtime += exec_time; + spin_unlock(&xg->lock); +} + +bool xsched_quota_exceed(struct xsched_group *xg) +{ + bool ret; + + spin_lock(&xg->lock); + ret = (xg->quota > 0) ? (xg->runtime >= xg->quota) : false; + spin_unlock(&xg->lock); + + return ret; +} + +void xsched_quota_init(void) +{ + quota_workqueue = create_singlethread_workqueue("xsched_quota_workqueue"); +} + +void xsched_quota_timeout_init(struct xsched_group *xg) +{ + hrtimer_init(&xg->quota_timeout, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + xg->quota_timeout.function = quota_timer_cb; +} + +void xsched_quota_timeout_update(struct xsched_group *xg) +{ + struct hrtimer *t = &xg->quota_timeout; + + hrtimer_cancel(t); + + if (xg->quota > 0 && xg->period > 0) + hrtimer_start(t, ns_to_ktime(xg->period), HRTIMER_MODE_REL_SOFT); +} diff --git a/kernel/xsched/cgroup.c b/kernel/xsched/cgroup.c new file mode 100644 index 0000000000000000000000000000000000000000..aa675a0139279640f2f91f5f5229e0c3741e2ef2 --- /dev/null +++ b/kernel/xsched/cgroup.c @@ -0,0 +1,676 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Support cgroup for xpu device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include +#include + +enum xcu_file_type { + XCU_FILE_PERIOD_MS, + XCU_FILE_QUOTA_MS, + XCU_FILE_SHARES, +}; + +static struct xsched_group root_xsched_group; +struct xsched_group *root_xcg = &root_xsched_group; +static bool root_cg_inited; + +static struct xsched_group *old_xcg; +static DECLARE_WAIT_QUEUE_HEAD(xcg_attach_wq); +static bool attach_in_progress; +static DEFINE_MUTEX(xcg_mutex); + +static const char xcu_sched_name[XSCHED_TYPE_NUM][4] = { + [XSCHED_TYPE_RT] = "rt", + [XSCHED_TYPE_CFS] = "cfs" +}; + +void xcu_cg_init_common(struct xsched_group *xcg) +{ + spin_lock_init(&xcg->lock); + INIT_LIST_HEAD(&xcg->members); + INIT_LIST_HEAD(&xcg->children_groups); + xsched_quota_timeout_init(xcg); + INIT_WORK(&xcg->refill_work, xsched_quota_refill); +} + +static void xcu_cfs_root_cg_init(void) +{ + int id; + struct xsched_cu *xcu; + + for_each_active_xcu(xcu, id) { + root_xcg->perxcu_priv[id].xcu_id = id; + root_xcg->perxcu_priv[id].self = root_xcg; + root_xcg->perxcu_priv[id].cfs_rq = &xcu->xrq.cfs; + root_xcg->perxcu_priv[id].xse.cfs.weight = 1; + } + + root_xcg->sched_type = XSCHED_TYPE_DFLT; + root_xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; + root_xcg->quota = XSCHED_TIME_INF; + root_xcg->runtime = 0; + xsched_quota_init(); +} + +/** + * xcu_cfs_cg_init() - Initialize xsched_group cfs runqueues and bw control. + * @xcg: new xsched_cgroup + * @parent_xg: parent's group + * + * One xsched_group can host many processes with contexts on different devices. + * Function creates xsched_entity for every XCU, and places it in runqueue + * of parent group. Create new cfs rq for xse inside group. + */ +static int xcu_cfs_cg_init(struct xsched_group *xcg, + struct xsched_group *parent_xg) +{ + int id = 0, err, i; + struct xsched_cu *xcu; + struct xsched_rq_cfs *sub_cfs_rq; + + if (unlikely(!root_cg_inited)) { + xcu_cfs_root_cg_init(); + root_cg_inited = true; + } + + for_each_active_xcu(xcu, id) { + xcg->perxcu_priv[id].xcu_id = id; + xcg->perxcu_priv[id].self = xcg; + + sub_cfs_rq = kzalloc(sizeof(struct xsched_rq_cfs), GFP_KERNEL); + if (!sub_cfs_rq) { + XSCHED_ERR("Fail to alloc cfs runqueue on xcu %d\n", id); + err = -ENOMEM; + goto alloc_error; + } + xcg->perxcu_priv[id].cfs_rq = sub_cfs_rq; + xcg->perxcu_priv[id].cfs_rq->ctx_timeline = RB_ROOT_CACHED; + + xcg->perxcu_priv[id].xse.is_group = true; + xcg->perxcu_priv[id].xse.xcu = xcu; + xcg->perxcu_priv[id].xse.class = &fair_xsched_class; + + /* Put new empty groups to the right in parent's rbtree: */ + xcg->perxcu_priv[id].xse.cfs.xruntime = XSCHED_TIME_INF; + xcg->perxcu_priv[id].xse.cfs.weight = + XSCHED_CFS_ENTITY_WEIGHT_DFLT; + xcg->perxcu_priv[id].xse.parent_grp = parent_xg; + + mutex_lock(&xcu->xcu_lock); + enqueue_ctx(&xcg->perxcu_priv[id].xse, xcu); + mutex_unlock(&xcu->xcu_lock); + } + + xcg->shares_cfg = XSCHED_CFG_SHARE_DFLT; + xcu_grp_shares_update(parent_xg); + xcg->period = XSCHED_CFS_QUOTA_PERIOD_MS; + xcg->quota = XSCHED_TIME_INF; + xcg->runtime = 0; + + return 0; + +alloc_error: + for (i = 0; i < id; i++) + kfree(xcg->perxcu_priv[i].cfs_rq); + return err; +} + +static void xcu_cfs_cg_deinit(struct xsched_group *xcg) +{ + uint32_t id; + struct xsched_cu *xcu; + + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + dequeue_ctx(&xcg->perxcu_priv[id].xse, xcu); + mutex_unlock(&xcu->xcu_lock); + kfree(xcg->perxcu_priv[id].cfs_rq); + } + xcu_grp_shares_update(xcg->parent); +} + +/** + * xcu_cg_init() - Initialize non-root xsched_group structure. + * @xcg: new xsched_cgroup + * @parent_xg: parent's group + */ +static int xcu_cg_init(struct xsched_group *xcg, + struct xsched_group *parent_xg) +{ + xcu_cg_init_common(xcg); + xcg->parent = parent_xg; + list_add_tail(&xcg->group_node, &parent_xg->children_groups); + xcg->sched_type = parent_xg->sched_type; + + switch (xcg->sched_type) { + case XSCHED_TYPE_CFS: + return xcu_cfs_cg_init(xcg, parent_xg); + default: + pr_info("xcu_cgroup: init RT group css=0x%lx\n", + (uintptr_t)&xcg->css); + break; + } + + return 0; +} + +inline struct xsched_group *xcu_cg_from_css(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct xsched_group, css) : NULL; +} + +/** + * xcu_css_alloc() - Allocate and init xcu cgroup. + * @parent_css: css of parent xcu cgroup + * + * Called from kernel/cgroup.c with cgroup_lock() held. + * First called in subsys initialization to create root xcu cgroup, when + * XCUs haven't been initialized yet. Func used on every new cgroup creation, + * on second call to set root xsched_group runqueue. + * + * Return: pointer of new xcu cgroup css on success, -ENOMEM otherwise. + */ +static struct cgroup_subsys_state * +xcu_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct xsched_group *parent_xg; + struct xsched_group *xg; + int err; + + if (!parent_css) + return &root_xsched_group.css; + + xg = kzalloc(sizeof(*xg), GFP_KERNEL); + if (!xg) + return ERR_PTR(-ENOMEM); + + mutex_lock(&xcg_mutex); + parent_xg = xcu_cg_from_css(parent_css); + err = xcu_cg_init(xg, parent_xg); + mutex_unlock(&xcg_mutex); + if (err) { + kfree(xg); + XSCHED_ERR("Fail to alloc new xcu group %s\n", __func__); + return ERR_PTR(err); + } + + return &xg->css; +} + +static void xcu_css_free(struct cgroup_subsys_state *css) +{ + struct xsched_group *xcg; + + mutex_lock(&xcg_mutex); + xcg = xcu_cg_from_css(css); + if (xcg->parent != NULL) { + switch (xcg->sched_type) { + case XSCHED_TYPE_CFS: + xcu_cfs_cg_deinit(xcg); + break; + default: + pr_info("xcu_cgroup: deinit RT group css=0x%lx\n", + (uintptr_t)&xcg->css); + break; + } + } + hrtimer_cancel(&xcg->quota_timeout); + cancel_work_sync(&xcg->refill_work); + list_del(&xcg->group_node); + mutex_unlock(&xcg_mutex); + + kfree(xcg); +} + +int xcu_css_online(struct cgroup_subsys_state *css) +{ + return 0; +} + +static void xcu_css_offline(struct cgroup_subsys_state *css) +{ + ; +} + +static void xsched_group_xse_attach(struct xsched_group *xg, + struct xsched_entity *xse) +{ + spin_lock(&xg->lock); + list_add_tail(&xse->group_node, &xg->members); + spin_unlock(&xg->lock); + xse->parent_grp = xg; +} + +void xsched_group_xse_detach(struct xsched_entity *xse) +{ + struct xsched_group *xcg = xse->parent_grp; + + spin_lock(&xcg->lock); + list_del(&xse->group_node); + spin_unlock(&xcg->lock); +} + +static int xcu_task_can_attach(struct task_struct *task, + struct xsched_group *old, struct xsched_group *dst) +{ + struct xsched_entity *xse; + bool has_xse = false; + + spin_lock(&old->lock); + list_for_each_entry(xse, &old->members, group_node) { + if (xse->owner_pid == task_pid_nr(task)) { + has_xse = true; + break; + } + } + spin_unlock(&old->lock); + + return has_xse ? -EINVAL : 0; +} + +static int xcu_can_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *dst_css, *old_css; + struct xsched_group *dst_xcg; + int ret = 0; + + mutex_lock(&xcg_mutex); + cgroup_taskset_for_each(task, dst_css, tset) { + old_css = task_css(task, xcu_cgrp_id); + dst_xcg = xcu_cg_from_css(dst_css); + old_xcg = xcu_cg_from_css(old_css); + ret = xcu_task_can_attach(task, old_xcg, dst_xcg); + if (ret) + break; + } + if (!ret) + attach_in_progress = true; + mutex_unlock(&xcg_mutex); + return ret; +} + +static void xcu_cancel_attach(struct cgroup_taskset *tset) +{ + mutex_lock(&xcg_mutex); + attach_in_progress = false; + wake_up(&xcg_attach_wq); + mutex_unlock(&xcg_mutex); +} + +void xcu_move_task(struct task_struct *task, struct xsched_group *old_xcg, + struct xsched_group *new_xcg) +{ + struct xsched_entity *xse, *tmp; + struct xsched_cu *xcu; + + spin_lock(&old_xcg->lock); + list_for_each_entry_safe(xse, tmp, &old_xcg->members, group_node) { + if (xse->owner_pid != task_pid_nr(task)) + continue; + + xcu = xse->xcu; + BUG_ON(old_xcg != xse->parent_grp); + + /* delete from the old_xcg */ + list_del(&xse->group_node); + + mutex_lock(&xcu->xcu_lock); + /* dequeue from the current runqueue */ + dequeue_ctx(xse, xcu); + /* attach to the new_xcg */ + xsched_group_xse_attach(new_xcg, xse); + /* enqueue to the runqueue in new_xcg */ + enqueue_ctx(xse, xcu); + mutex_unlock(&xcu->xcu_lock); + } + spin_unlock(&old_xcg->lock); +} + +static void xcu_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + + mutex_lock(&xcg_mutex); + cgroup_taskset_for_each(task, css, tset) { + xcu_move_task(task, old_xcg, xcu_cg_from_css(css)); + } + attach_in_progress = false; + wake_up(&xcg_attach_wq); + mutex_unlock(&xcg_mutex); +} + +/** + * xsched_group_inherit() - Attach new entity to task's xsched_group. + * @task: task_struct + * @xse: xsched entity + * + * Called in xsched context initialization to attach xse to task's group + * and inherit its xse scheduling class and bandwidth control policy. + * + * Return: Zero on success. + */ +int xsched_group_inherit(struct task_struct *task, struct xsched_entity *xse) +{ + struct cgroup_subsys_state *css; + struct xsched_group *xg; + +retry: + wait_event(xcg_attach_wq, !attach_in_progress); + + mutex_lock(&xcg_mutex); + if (attach_in_progress) { + mutex_unlock(&xcg_mutex); + goto retry; + } + xse->owner_pid = task_pid_nr(task); + css = task_get_css(task, xcu_cgrp_id); + xg = xcu_cg_from_css(css); + xsched_group_xse_attach(xg, xse); + css_put(css); + mutex_unlock(&xcg_mutex); + + return 0; +} + +static int xcu_sched_show(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct xsched_group *xg = xcu_cg_from_css(css); + + seq_printf(sf, "%s\n", xcu_sched_name[xg->sched_type]); + return 0; +} + +/** + * xcu_cg_set_sched() - Set scheduling type for group. + * @xg: xsched group + * @type: scheduler type + * + * Scheduler type can be changed if task is child of root group + * and haven't got scheduling entities. + * + * Return: Zero on success or -EINVAL + */ +int xcu_cg_set_sched(struct xsched_group *xg, int type) +{ + if (type == xg->sched_type) + return 0; + + if (xg->parent != root_xcg) + return -EINVAL; + + if (!list_empty(&xg->members)) + return -EBUSY; + + if (xg->sched_type == XSCHED_TYPE_CFS) + xcu_cfs_cg_deinit(xg); + + xg->sched_type = type; + if (type != XSCHED_TYPE_CFS) + return 0; + + /* type is XSCHED_TYPE_CFS */ + return xcu_cfs_cg_init(xg, xg->parent); +} + +static ssize_t xcu_sched_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct cgroup_subsys_state *css = of_css(of); + struct xsched_group *xg = xcu_cg_from_css(css); + char type_name[4]; + int type = -1; + + ssize_t ret = sscanf(buf, "%3s", type_name); + + if (ret < 1) + return -EINVAL; + + for (type = 0; type < XSCHED_TYPE_NUM; type++) { + if (!strcmp(type_name, xcu_sched_name[type])) + break; + } + + if (type == XSCHED_TYPE_NUM) + return -EINVAL; + + if (!list_empty(&css->children)) + return -EBUSY; + + mutex_lock(&xcg_mutex); + ret = xcu_cg_set_sched(xg, type); + mutex_unlock(&xcg_mutex); + + return (ret) ? ret : nbytes; +} + +static s64 xcu_read_s64(struct cgroup_subsys_state *css, struct cftype *cft) +{ + s64 ret = 0; + struct xsched_group *xcucg = xcu_cg_from_css(css); + + spin_lock(&xcucg->lock); + switch (cft->private) { + case XCU_FILE_PERIOD_MS: + ret = xcucg->period / NSEC_PER_MSEC; + break; + case XCU_FILE_QUOTA_MS: + ret = (xcucg->quota > 0) ? xcucg->quota / NSEC_PER_MSEC : xcucg->quota; + break; + case XCU_FILE_SHARES: + ret = xcucg->shares_cfg; + break; + default: + break; + } + spin_unlock(&xcucg->lock); + return ret; +} + +static inline u64 gcd(u64 a, u64 b) +{ + while (a != 0 && b != 0) { + if (a > b) + a %= b; + else + b %= a; + } + return (a) ? a : b; +} + +void xcu_grp_shares_update(struct xsched_group *xg) +{ + int id; + struct xsched_cu *xcu; + struct xsched_group *xgi, *parent = xg; + u64 sh_sum = 0, sh_gcd = 0, w_gcd = 0, sh_prod_red = 1; + + spin_lock(&parent->lock); + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) + sh_gcd = gcd(sh_gcd, xgi->shares_cfg); + } + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + sh_sum += xgi->shares_cfg; + xgi->shares_cfg_red = div_u64(xgi->shares_cfg, sh_gcd); + + if ((sh_prod_red % xgi->shares_cfg_red) != 0) + sh_prod_red *= xgi->shares_cfg_red; + } + } + + parent->children_shares_sum = sh_sum; + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + xgi->weight = div_u64(sh_prod_red, xgi->shares_cfg_red); + w_gcd = gcd(w_gcd, xgi->weight); + } + } + + list_for_each_entry((xgi), &(parent)->children_groups, group_node) { + if ((xgi)->sched_type == XSCHED_TYPE_CFS) { + xgi->weight = div_u64(xgi->weight, w_gcd); + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + xgi->perxcu_priv[id].xse.cfs.weight = xgi->weight; + mutex_unlock(&xcu->xcu_lock); + } + } + } + spin_unlock(&parent->lock); +} + +static int xcu_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, + s64 val) +{ + int ret = 0; + struct xsched_group *xcucg = xcu_cg_from_css(css); + s64 quota_ns; + + if (xcucg->sched_type != XSCHED_TYPE_CFS) + return -EINVAL; + + spin_lock(&xcucg->lock); + switch (cft->private) { + case XCU_FILE_PERIOD_MS: + if (val < 1 || val > (S64_MAX / NSEC_PER_MSEC)) { + ret = -EINVAL; + break; + } + xcucg->period = val * NSEC_PER_MSEC; + xsched_quota_timeout_update(xcucg); + break; + case XCU_FILE_QUOTA_MS: + if (val < -1 || val > (S64_MAX / NSEC_PER_MSEC)) { + ret = -EINVAL; + break; + } + /* Runtime should be updated when modifying quota_ms configuration */ + quota_ns = (val > 0) ? val * NSEC_PER_MSEC : val; + if (xcucg->quota > 0 && quota_ns > 0) + xcucg->runtime = max((xcucg->runtime - quota_ns), (s64)0); + else + xcucg->runtime = 0; + xcucg->quota = quota_ns; + xsched_quota_timeout_update(xcucg); + break; + case XCU_FILE_SHARES: + if (val <= 0 || val > U64_MAX) { + ret = -EINVAL; + break; + } + xcucg->shares_cfg = val; + xcu_grp_shares_update(xcucg->parent); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock(&xcucg->lock); + + return ret; +} + +static int xcu_stat(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct xsched_group *xcucg = xcu_cg_from_css(css); + + u64 nr_throttled = 0; + u64 throttled_time = 0; + u64 exec_runtime = 0; + + int xcu_id; + struct xsched_cu *xcu; + + if (xcucg->sched_type == XSCHED_TYPE_RT) { + seq_printf(sf, "RT group stat is not supported\n"); + return 0; + } + + for_each_active_xcu(xcu, xcu_id) { + nr_throttled += xcucg->perxcu_priv[xcu_id].nr_throttled; + throttled_time += xcucg->perxcu_priv[xcu_id].throttled_time; + exec_runtime += + xcucg->perxcu_priv[xcu_id].xse.cfs.sum_exec_runtime; + } + + seq_printf(sf, "exec_runtime: %llu\n", exec_runtime); + seq_printf(sf, "shares cfg: %llu/%llu x%u\n", xcucg->shares_cfg, + xcucg->parent->children_shares_sum, xcucg->weight); + seq_printf(sf, "quota: %lld\n", xcucg->quota); + seq_printf(sf, "used: %lld\n", xcucg->runtime); + seq_printf(sf, "period: %lld\n", xcucg->period); + + return 0; +} + +static struct cftype xcu_cg_files[] = { + { + .name = "period_ms", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_PERIOD_MS, + }, + { + .name = "quota_ms", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_QUOTA_MS, + }, + { + .name = "shares", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = xcu_read_s64, + .write_s64 = xcu_write_s64, + .private = XCU_FILE_SHARES, + }, + { + .name = "stat", + .seq_show = xcu_stat, + }, + { + .name = "sched", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = xcu_sched_show, + .write = xcu_sched_write, + }, + {} /* terminate */ +}; + +struct cgroup_subsys xcu_cgrp_subsys = { + .css_alloc = xcu_css_alloc, + .css_online = xcu_css_online, + .css_offline = xcu_css_offline, + .css_free = xcu_css_free, + .can_attach = xcu_can_attach, + .cancel_attach = xcu_cancel_attach, + .attach = xcu_attach, + .dfl_cftypes = xcu_cg_files, + .legacy_cftypes = xcu_cg_files, + .early_init = false, +}; diff --git a/kernel/xsched/core.c b/kernel/xsched/core.c new file mode 100644 index 0000000000000000000000000000000000000000..6e5cf060a612cd917af014daf1ca6492c6dc8031 --- /dev/null +++ b/kernel/xsched/core.c @@ -0,0 +1,754 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Core kernel scheduler code for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include +#include +#include +#include + +int num_active_xcu; +spinlock_t xcu_mgr_lock; +extern struct xsched_group *root_xcg; + +/* Xsched XCU array and bitmask that represents which XCUs + * are present and online. + */ +DECLARE_BITMAP(xcu_online_mask, XSCHED_NR_CUS); +struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; + +static DEFINE_MUTEX(revmap_mutex); +static DEFINE_HASHTABLE(ctx_revmap, XCU_HASH_ORDER); + +static void put_prev_ctx(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + lockdep_assert_held(&xcu->xcu_lock); + xse->class->put_prev_ctx(xse); + xse->last_exec_runtime = 0; + atomic_set(&xse->submitted_one_kick, 0); + XSCHED_DEBUG("Put current xse %d @ %s\n", xse->tgid, __func__); +} + +static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse) +{ + int kick_count, scheduled = 0, not_empty; + struct vstream_info *vs; + struct xcu_op_handler_params params; + struct vstream_metadata *vsm; + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + if (kick_count == 0) { + XSCHED_WARN("Try to select xse that has 0 kicks @ %s\n", + __func__); + return 0; + } + + do { + not_empty = 0; + for_each_vstream_in_ctx(vs, xse->ctx) { + if (scheduled >= XSCHED_CFS_KICK_SLICE) + break; + + spin_lock(&vs->stream_lock); + vsm = xsched_vsm_fetch_first(vs); + spin_unlock(&vs->stream_lock); + + if (!vsm) + continue; + list_add_tail(&vsm->node, &xcu->vsm_list); + scheduled++; + xsched_dec_pending_kicks_xse(xse); + not_empty++; + } + } while ((scheduled < XSCHED_CFS_KICK_SLICE) && (not_empty)); + + /* + * Iterate over all vstreams in context: + * Set wr_cqe bit in last computing task in vsm_list + */ + for_each_vstream_in_ctx(vs, xse->ctx) { + list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) { + if (vsm->parent == vs) { + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_SET_NOTIFY}; + params.param_2 = &vsm->sqe; + xcu_sqe_op(¶ms); + break; + } + } + } + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + xse->total_scheduled += scheduled; + return scheduled; +} + +static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu) +{ + const struct xsched_class *class; + struct xsched_entity *next = NULL; + size_t scheduled; + + lockdep_assert_held(&xcu->xcu_lock); + for_each_xsched_class(class) { + next = class->pick_next_ctx(xcu); + if (next) { + scheduled = class->select_work ? + class->select_work(xcu, next) : select_work_def(xcu, next); + + XSCHED_DEBUG("xse %d scheduled=%zu total=%zu @ %s\n", + next->tgid, scheduled, next->total_scheduled, __func__); + break; + } + } + + return next; +} + +void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + lockdep_assert_held(&xcu->xcu_lock); + + if (xse_integrity_check(xse)) { + XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__); + return; + } + + if (!xse->on_rq) { + xse->on_rq = true; + xse->class->enqueue_ctx(xse, xcu); + XSCHED_DEBUG("Enqueue xse %d @ %s\n", xse->tgid, __func__); + } +} + +void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + lockdep_assert_held(&xcu->xcu_lock); + + if (xse_integrity_check(xse)) { + XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__); + return; + } + + if (xse->on_rq) { + xse->class->dequeue_ctx(xse); + xse->on_rq = false; + XSCHED_DEBUG("Dequeue xse %d @ %s\n", xse->tgid, __func__); + } +} + +static int delete_ctx(struct xsched_context *ctx) +{ + struct xsched_cu *xcu = ctx->xse.xcu; + struct xsched_entity *curr_xse = xcu->xrq.curr_xse; + struct xsched_entity *xse = &ctx->xse; + + if (xse_integrity_check(xse)) { + XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__); + return -EINVAL; + } + + if (!xse->xcu) { + XSCHED_ERR("Try to delete ctx that is not attached to xcu @ %s\n", + __func__); + return -EINVAL; + } + + /* Wait till context has been submitted. */ + while (atomic_read(&xse->kicks_pending_ctx_cnt)) { + XSCHED_DEBUG("Deleting ctx %d, xse->kicks_pending_ctx_cnt=%d @ %s\n", + xse->tgid, atomic_read(&xse->kicks_pending_ctx_cnt), + __func__); + usleep_range(100, 200); + } + + if (atomic_read(&xse->kicks_pending_ctx_cnt)) { + XSCHED_ERR("Deleting ctx %d that has pending kicks left @ %s\n", + xse->tgid, __func__); + return -EINVAL; + } + + mutex_lock(&xcu->xcu_lock); + if (curr_xse == xse) + xcu->xrq.curr_xse = NULL; + + dequeue_ctx(xse, xcu); + mutex_unlock(&xcu->xcu_lock); + XSCHED_DEBUG("Deleting ctx %d, pending kicks left=%d @ %s\n", xse->tgid, + atomic_read(&xse->kicks_pending_ctx_cnt), __func__); + + xsched_group_xse_detach(xse); + return 0; +} + +/* Frees a given vstream and also frees and dequeues it's context + * if a given vstream is the last and only vstream attached to it's + * corresponding context object. + */ +void xsched_task_free(struct kref *kref) +{ + struct xsched_context *ctx; + vstream_info_t *vs, *tmp; + struct xsched_cu *xcu; + + ctx = container_of(kref, struct xsched_context, kref); + xcu = ctx->xse.xcu; + + /* Wait till xse dequeues */ + while (READ_ONCE(ctx->xse.on_rq)) + usleep_range(100, 200); + + mutex_lock(&xcu->ctx_list_lock); + list_for_each_entry_safe(vs, tmp, &ctx->vstream_list, ctx_node) { + list_del(&vs->ctx_node); + kfree(vs->data); + kfree(vs); + } + + delete_ctx(ctx); + list_del(&ctx->ctx_node); + --xcu->nr_ctx; + mutex_unlock(&xcu->ctx_list_lock); + + kfree(ctx); +} + +int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx) +{ + struct ctx_devid_revmap_data *revmap_data; + struct xsched_cu *xcu_found = NULL; + uint32_t type = XCU_TYPE_XPU; + + /* Find XCU history. */ + hash_for_each_possible(ctx_revmap, revmap_data, hash_node, + (unsigned long)ctx->dev_id) { + if (revmap_data && revmap_data->group) { + /* Bind ctx to group xcu.*/ + ctx->xse.xcu = revmap_data->group->xcu; + return 0; + } + } + + revmap_data = kzalloc(sizeof(struct ctx_devid_revmap_data), GFP_KERNEL); + if (revmap_data == NULL) { + XSCHED_ERR("Revmap_data is NULL @ %s\n", __func__); + return -ENOMEM; + } + + xcu_found = xcu_find(&type, ctx->dev_id, vstream_info->channel_id); + if (!xcu_found) + return -EINVAL; + + /* Bind ctx to an XCU from channel group. */ + revmap_data->group = xcu_found->group; + ctx->xse.xcu = xcu_found; + vstream_info->xcu = xcu_found; + revmap_data->dev_id = vstream_info->dev_id; + XSCHED_DEBUG("Ctx bind to xcu %u @ %s\n", xcu_found->id, __func__); + + hash_add(ctx_revmap, &revmap_data->hash_node, + (unsigned long)ctx->dev_id); + + return 0; +} + +int vstream_bind_to_xcu(vstream_info_t *vstream_info) +{ + struct xsched_cu *xcu_found = NULL; + uint32_t type = XCU_TYPE_XPU; + + xcu_found = xcu_find(&type, vstream_info->dev_id, vstream_info->channel_id); + if (!xcu_found) + return -EINVAL; + + /* Bind vstream to a xcu. */ + vstream_info->xcu = xcu_found; + vstream_info->dev_id = xcu_found->id; + XSCHED_DEBUG("XCU bound to a vstream: type=%u, dev_id=%u, chan_id=%u.\n", + type, vstream_info->dev_id, vstream_info->channel_id); + + return 0; +} + +struct xsched_cu *xcu_find(uint32_t *type, + uint32_t dev_id, uint32_t channel_id) +{ + struct xcu_group *group = NULL; + uint32_t local_type = *type; + + /* Find xcu by type. */ + group = xcu_group_find(xcu_group_root, local_type); + if (group == NULL) { + XSCHED_ERR("Fail to find type group.\n"); + return NULL; + } + + /* Find device id group. */ + group = xcu_group_find(group, dev_id); + if (group == NULL) { + XSCHED_ERR("Fail to find device group.\n"); + return NULL; + } + /* Find channel id group. */ + group = xcu_group_find(group, channel_id); + if (group == NULL) { + XSCHED_ERR("Fail to find channel group.\n"); + return NULL; + } + + *type = local_type; + XSCHED_DEBUG("XCU found: type=%u, dev_id=%u, chan_id=%u.\n", + local_type, dev_id, channel_id); + + return group->xcu; +} + +int xsched_xse_set_class(struct xsched_entity *xse) +{ +#ifdef CONFIG_CGROUP_XCU + xsched_group_inherit(current, xse); +#endif + switch (xse->parent_grp->sched_type) { + case XSCHED_TYPE_RT: + xse->class = &rt_xsched_class; + XSCHED_DEBUG("Context is in RT class %s\n", __func__); + break; + case XSCHED_TYPE_CFS: + xse->class = &fair_xsched_class; + XSCHED_DEBUG("Context is in CFS class %s\n", __func__); + break; + default: + XSCHED_ERR("Xse has incorrect class @ %s\n", __func__); + return -EINVAL; + } + return 0; +} + +int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) +{ + int err = 0; + struct xsched_entity *xse = &ctx->xse; + + atomic_set(&xse->kicks_pending_ctx_cnt, 0); + atomic_set(&xse->submitted_one_kick, 0); + + xse->total_scheduled = 0; + xse->total_submitted = 0; + xse->last_exec_runtime = 0; + xse->task_type = GET_VS_TASK_TYPE(vs); + xse->fd = ctx->fd; + xse->tgid = ctx->tgid; + + err = ctx_bind_to_xcu(vs, ctx); + if (err) { + XSCHED_ERR( + "Couldn't find valid xcu for vstream %u dev_id %u @ %s\n", + vs->id, vs->dev_id, __func__); + return -EINVAL; + } + + xse->ctx = ctx; + BUG_ON(vs->xcu == NULL); + xse->xcu = vs->xcu; + + err = xsched_xse_set_class(xse); + if (err) { + XSCHED_ERR("Fail to set xse class @ %s\n", __func__); + return err; + } + + if (xse_is_cfs(xse)) { + xse->cfs.sum_exec_runtime = 0; + } + + if (xse_is_rt(xse)) { + xse->rt.state = XSE_PREPARE; + xse->rt.flag = XSE_TIF_NONE; + xse->rt.prio = GET_VS_TASK_PRIO_RT(vs); + xse->rt.kick_slice = XSCHED_RT_KICK_SLICE; + + /* XSE priority is being decreased by 1 here because + * in libucc priority counter starts from 1 while in the + * kernel counter starts with 0. + * + * This inconsistency has to be solve in libucc in the + * future rather that having this confusing decrement to + * priority inside the kernel. + */ + if (xse->rt.prio > 0) + xse->rt.prio -= 1; + + INIT_LIST_HEAD(&xse->rt.list_node); + } + WRITE_ONCE(xse->on_rq, false); + + spin_lock_init(&xse->xse_lock); + return err; +} + +static void submit_kick(struct vstream_metadata *vsm) +{ + struct vstream_info *vs = vsm->parent; + struct xcu_op_handler_params params; + params.group = vs->xcu->group; + params.fd = vs->fd; + params.param_1 = &vs->id; + params.param_2 = &vs->channel_id; + params.param_3 = vsm->sqe; + params.param_4 = &vsm->sqe_num; + params.param_5 = &vsm->timeout; + params.param_6 = &vs->sqcq_type; + params.param_7 = vs->drv_ctx; + params.param_8 = &vs->logic_vcq_id; + + /* Send vstream on a device for processing. */ + if (xcu_run(¶ms)) { + XSCHED_ERR( + "Fail to send Vstream id %u tasks to a device for processing.\n", + vs->id); + } + + XSCHED_DEBUG("Vstream id %u submit vsm: sq_tail %u\n", vs->id, vsm->sq_tail); +} + +static void submit_wait(struct vstream_metadata *vsm) +{ + struct vstream_info *vs = vsm->parent; + struct xcu_op_handler_params params; + /* Wait timeout in ms. */ + int32_t timeout = 500; + + params.group = vs->xcu->group; + params.param_1 = &vs->channel_id; + params.param_2 = &vs->logic_vcq_id; + params.param_3 = &vs->user_stream_id; + params.param_4 = &vsm->sqe; + params.param_5 = vsm->cqe; + params.param_6 = vs->drv_ctx; + params.param_7 = &timeout; + + /* Wait for a device to complete processing. */ + if (xcu_wait(¶ms)) { + XSCHED_ERR("Fail to wait Vstream id %u tasks, logic_cq_id %u.\n", + vs->id, vs->logic_vcq_id); + } + + XSCHED_DEBUG("Vstream id %u wait finish, logic_cq_id %u\n", + vs->id, vs->logic_vcq_id); +} + +static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse) +{ + struct vstream_metadata *vsm, *tmp; + int submitted = 0; + long submit_exec_time = 0; + ktime_t t_start = 0; + struct xcu_op_handler_params params; + + XSCHED_DEBUG("%s called for xse %d on xcu %u\n", + __func__, xse->tgid, xcu->id); + + list_for_each_entry_safe(vsm, tmp, &xcu->vsm_list, node) { + submit_kick(vsm); + XSCHED_DEBUG("Xse %d vsm %u sched_delay: %lld ns\n", + xse->tgid, vsm->sq_id, ktime_to_ns(ktime_sub(ktime_get(), vsm->add_time))); + + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_IS_NOTIFY}; + params.param_2 = &vsm->sqe; + if (xcu_sqe_op(¶ms)) { + mutex_unlock(&xcu->xcu_lock); + t_start = ktime_get(); + submit_wait(vsm); + submit_exec_time += ktime_to_ns(ktime_sub(ktime_get(), t_start)); + mutex_lock(&xcu->xcu_lock); + } + submitted++; + list_del(&vsm->node); + kfree(vsm); + } + + xse->last_exec_runtime += submit_exec_time; + xse->total_submitted += submitted; + atomic_add(submitted, &xse->submitted_one_kick); + INIT_LIST_HEAD(&xcu->vsm_list); + XSCHED_DEBUG("Xse %d submitted=%d total=%zu, exec_time=%ld @ %s\n", + xse->tgid, submitted, xse->total_submitted, + submit_exec_time, __func__); + + return submitted; +} + +static inline bool should_preempt(struct xsched_entity *xse) +{ + return xse->class->check_preempt(xse); +} + +static int xsched_schedule(void *input_xcu) +{ + struct xsched_cu *xcu = input_xcu; + int err = 0; + struct xsched_entity *curr_xse = NULL; + struct xsched_entity *next_xse = NULL; + + while (!kthread_should_stop()) { + mutex_unlock(&xcu->xcu_lock); + wait_event_interruptible(xcu->wq_xcu_idle, + xcu->xrq.cfs.nr_running || xcu->xrq.rt.nr_running); + XSCHED_DEBUG("%s: rt nr_running = %u, cfs nr_running = %u\n", + __func__, xcu->xrq.rt.nr_running, xcu->xrq.cfs.nr_running); + + mutex_lock(&xcu->xcu_lock); + if (!xsched_check_pending_kicks_xcu(xcu)) { + XSCHED_WARN("%s: No pending kicks on xcu %u\n", __func__, xcu->id); + continue; + } + + next_xse = __raw_pick_next_ctx(xcu); + if (!next_xse) { + XSCHED_WARN("%s: Couldn't find next xse on xcu %u\n", __func__, xcu->id); + continue; + } + + xcu->xrq.curr_xse = next_xse; + + if (__xsched_submit(xcu, next_xse) == 0) + continue; + + curr_xse = xcu->xrq.curr_xse; + if (!curr_xse) + continue; + + /* if not deleted yet */ + put_prev_ctx(curr_xse); + if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt)) + dequeue_ctx(curr_xse, xcu); + + if (xsched_quota_exceed(curr_xse->parent_grp)) + dequeue_ctx(&curr_xse->parent_grp->perxcu_priv[xcu->id].xse, xcu); + + xcu->xrq.curr_xse = NULL; + } + + return err; +} + +/* Initialize xsched rt runqueue during kernel init. + * Should only be called from xsched_rq_init function. + */ +static inline void xsched_rt_rq_init(struct xsched_cu *xcu) +{ + int prio = 0; + + xcu->xrq.rt.nr_running = 0; + + for_each_xse_prio(prio) { + INIT_LIST_HEAD(&xcu->xrq.rt.rq[prio]); + xcu->xrq.rt.prio_nr_running[prio] = 0; + atomic_set(&xcu->xrq.rt.prio_nr_kicks[prio], 0); + } +} + +/* Initialize xsched cfs runqueue during kernel init. + * Should only be called from xsched_rq_init function. + */ +static inline void xsched_cfs_rq_init(struct xsched_cu *xcu) +{ + xcu->xrq.cfs.nr_running = 0; + xcu->xrq.cfs.ctx_timeline = RB_ROOT_CACHED; +} + +/* Initialize xsched classes' runqueues. */ +static inline void xsched_rq_init(struct xsched_cu *xcu) +{ + xcu->xrq.curr_xse = NULL; + xcu->xrq.class = &rt_xsched_class; + xcu->xrq.state = XRQ_STATE_IDLE; + xsched_rt_rq_init(xcu); + xsched_cfs_rq_init(xcu); +} + +/* Initializes all xsched XCU objects. + * Should only be called from xsched_xcu_register function. + */ +static void xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group, + int xcu_id) +{ + bitmap_clear(xcu_group_root->xcu_mask, 0, XSCHED_NR_CUS); + + xcu->id = xcu_id; + xcu->state = XSCHED_XCU_NONE; + xcu->group = group; + xcu->nr_ctx = 0; + + atomic_set(&xcu->pending_kicks_rt, 0); + atomic_set(&xcu->pending_kicks_cfs, 0); + + INIT_LIST_HEAD(&xcu->vsm_list); + INIT_LIST_HEAD(&xcu->ctx_list); + init_waitqueue_head(&xcu->wq_xcu_idle); + mutex_init(&xcu->xcu_lock); + mutex_init(&xcu->ctx_list_lock); + +#ifdef CONFIG_XCU_VSTREAM + mutex_init(&xcu->vs_array_lock); +#endif + + /* Mark current XCU in a mask inside XCU root group. */ + set_bit(xcu->id, xcu_group_root->xcu_mask); + + /* Initialize current XCU's runqueue. */ + xsched_rq_init(xcu); + + /* This worker should set XCU to XSCHED_XCU_WAIT_IDLE. + * If after initialization XCU still has XSCHED_XCU_NONE + * status then we can assume that there was a problem + * with XCU kthread job. + */ + xcu->worker = kthread_run(xsched_schedule, xcu, "xcu_%u", xcu->id); +} + +/* Increment xcu id */ +static int nr_active_cu_inc(void) +{ + int cur_num = -1; + + spin_lock(&xcu_mgr_lock); + if (num_active_xcu >= XSCHED_NR_CUS) + goto out_unlock; + + cur_num = num_active_xcu; + num_active_xcu++; + XSCHED_DEBUG("Number of active xcus: %d.\n", num_active_xcu); + +out_unlock: + spin_unlock(&xcu_mgr_lock); + return cur_num; +} + +/* Adds vstream_metadata object to a specified vstream. */ +int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg) +{ + struct vstream_metadata *new_vsm; + + new_vsm = kmalloc(sizeof(struct vstream_metadata), GFP_KERNEL); + if (!new_vsm) { + XSCHED_ERR("Fail to alloc kick metadata for vs %u @ %s\n", + vs->id, __func__); + return -ENOMEM; + } + + if (vs->kicks_count > MAX_VSTREAM_SIZE) { + kfree(new_vsm); + return -EBUSY; + } + + xsched_init_vsm(new_vsm, vs, arg); + list_add_tail(&new_vsm->node, &vs->metadata_list); + new_vsm->add_time = ktime_get(); + vs->kicks_count += 1; + + return 0; +} + +/* Fetch the first vstream metadata from vstream metadata list + * and removes it from that list. Returned vstream metadata pointer + * to be freed after. + */ +struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs) +{ + struct vstream_metadata *vsm; + + if (list_empty(&vs->metadata_list)) { + XSCHED_DEBUG("No metadata to fetch from vs %u @ %s\n", + vs->id, __func__); + return NULL; + } + + vsm = list_first_entry(&vs->metadata_list, struct vstream_metadata, node); + if (!vsm) { + XSCHED_ERR("Corrupted metadata list in vs %u @ %s\n", + vs->id, __func__); + return NULL; + } + + list_del(&vsm->node); + if (vs->kicks_count == 0) + XSCHED_WARN("kicks_count underflow in vs %u @ %s\n", + vs->id, __func__); + else + vs->kicks_count -= 1; + + return vsm; +} + +/* + * Initialize and register xcu in xcu_manager array. + */ +int xsched_xcu_register(struct xcu_group *group, int phys_id) +{ + int xcu_cur_num; + struct xsched_cu *xcu; + + /* Can be refactored in future because it's possible that + * device contains more than 1 hardware task scheduler. + */ + if (phys_id >= XSCHED_NR_CUS) { + XSCHED_ERR("phys_id (%d) >= XSCHED_NR_CUS (%d).\n", + phys_id, XSCHED_NR_CUS); + return -EINVAL; + } + + xcu_cur_num = nr_active_cu_inc(); + if (xcu_cur_num < 0) { + XSCHED_ERR("Number of present XCU's exceeds %d: %d.\n", + XSCHED_NR_CUS, num_active_xcu); + return -ENOSPC; + }; + + xcu = kzalloc(sizeof(struct xsched_cu), GFP_KERNEL); + if (!xcu) { + XSCHED_ERR("Fail to alloc xcu.\n"); + return -ENOMEM; + }; + + group->xcu = xcu; + xsched_cu_mgr[phys_id] = xcu; + + /* Init xcu's internals. */ + xsched_xcu_init(xcu, group, phys_id); + return 0; +} +EXPORT_SYMBOL(xsched_xcu_register); + +int __init xsched_init(void) +{ + xcu_cg_init_common(root_xcg); + return 0; +} +late_initcall(xsched_init); diff --git a/kernel/xsched/rt.c b/kernel/xsched/rt.c new file mode 100644 index 0000000000000000000000000000000000000000..1a0a7f54206060f27c9625adf55e5d6e5f4d7338 --- /dev/null +++ b/kernel/xsched/rt.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Real-Time Scheduling Class for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include + +/* Add xsched entitiy to a run list based on priority, set on_cu flag + * and set a corresponding curr_prios bit if necessary. + */ +static inline void +xse_rt_add(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + list_add_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]); + __set_bit(xse->rt.prio, xcu->xrq.rt.curr_prios); +} + +/* Delete xsched entitiy from a run list, unset on_cu flag and + * unset corresponding curr_prios bit if necessary. + */ +static inline void xse_rt_del(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + list_del_init(&xse->rt.list_node); + if (list_empty(&xcu->xrq.rt.rq[xse->rt.prio])) + __clear_bit(xse->rt.prio, xcu->xrq.rt.curr_prios); +} + +static inline void xse_rt_move_tail(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + list_move_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]); +} + +/* Increase RT runqueue total and per prio nr_running stat. */ +static inline void xrq_inc_nr_running(struct xsched_entity *xse, + struct xsched_cu *xcu) +{ + xcu->xrq.rt.nr_running++; + xcu->xrq.rt.prio_nr_running[xse->rt.prio]++; + set_bit(xse->rt.prio, xcu->xrq.rt.curr_prios); +} + +/* Decrease RT runqueue total and per prio nr_running stat + * and raise a bug if nr_running decrease beyond zero. + */ +static inline void xrq_dec_nr_running(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + xcu->xrq.rt.nr_running--; + xcu->xrq.rt.prio_nr_running[xse->rt.prio]--; + + if (!xcu->xrq.rt.prio_nr_running[xse->rt.prio]) + clear_bit(xse->rt.prio, xcu->xrq.rt.curr_prios); +} + +static void dequeue_ctx_rt(struct xsched_entity *xse) +{ + xse_rt_del(xse); + xrq_dec_nr_running(xse); +} + +static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + xse_rt_add(xse, xcu); + xrq_inc_nr_running(xse, xcu); +} + +static inline struct xsched_entity *xrq_next_xse(struct xsched_cu *xcu, + int prio) +{ + return list_first_entry(&xcu->xrq.rt.rq[prio], struct xsched_entity, + rt.list_node); +} + +/* Return the next priority for pick_next_ctx taking into + * account if there are pending kicks on certain priority. + */ +static inline uint32_t get_next_prio_rt(struct xsched_rq *xrq) +{ + int32_t curr_prio; + bool bit_val; + unsigned long *prios = xrq->rt.curr_prios; + atomic_t *prio_nr_kicks = xrq->rt.prio_nr_kicks; + + /* Using generic for loop instead of for_each_set_bit + * because it will be faster than for_each_set_bit. + */ + for (curr_prio = NR_XSE_PRIO - 1; curr_prio >= 0; curr_prio--) { + bit_val = test_bit(curr_prio, prios); + if (!bit_val && atomic_read(&prio_nr_kicks[curr_prio])) { + XSCHED_ERR( + "kicks > 0 on RT priority with the priority bit unset\n"); + BUG(); + return NR_XSE_PRIO; + } + + if (bit_val && atomic_read(&prio_nr_kicks[curr_prio])) + return curr_prio; + } + return NR_XSE_PRIO; +} + +static struct xsched_entity *pick_next_ctx_rt(struct xsched_cu *xcu) +{ + struct xsched_entity *result; + int next_prio; + + next_prio = get_next_prio_rt(&xcu->xrq); + if (next_prio >= NR_XSE_PRIO) { + XSCHED_DEBUG("No pending kicks in RT class @ %s\n", __func__); + return NULL; + } + + if (!xcu->xrq.rt.prio_nr_running[next_prio]) { + XSCHED_ERR( + "The nr_running of RT is 0 while there are pending kicks for %u prio\n", + next_prio); + return NULL; + } + + result = xrq_next_xse(xcu, next_prio); + if (!result) + XSCHED_ERR("Next XSE not found @ %s\n", __func__); + + return result; +} + +static void put_prev_ctx_rt(struct xsched_entity *xse) +{ + xse->rt.kick_slice -= atomic_read(&xse->submitted_one_kick); + XSCHED_DEBUG( + "Update XSE=%d kick_slice=%lld, XSE submitted=%d in RT class @ %s\n", + xse->tgid, xse->rt.kick_slice, + atomic_read(&xse->submitted_one_kick), __func__); + + if (xse->rt.kick_slice <= 0) { + xse->rt.kick_slice = XSCHED_RT_KICK_SLICE; + XSCHED_DEBUG("Refill XSE=%d kick_slice=%lld in RT class @ %s\n", + xse->tgid, xse->rt.kick_slice, __func__); + xse_rt_move_tail(xse); + } +} + +static int submit_prepare_ctx_rt(struct xsched_entity *xse, + struct xsched_cu *xcu) +{ + if (!atomic_read(&xse->kicks_pending_ctx_cnt)) { + xse->rt.state = XSE_READY; + xse->rt.kick_slice = 0; + return -EAGAIN; + } + xse->rt.state = XSE_RUNNING; + + return 0; +} + +static bool check_preempt_ctx_rt(struct xsched_entity *xse) +{ + return true; +} + +static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse) +{ + int kick_count, scheduled = 0; + struct vstream_info *vs; + struct vstream_metadata *vsm; + struct xcu_op_handler_params params; + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + if (kick_count == 0) { + XSCHED_WARN("Try to select xse that has 0 kicks @ %s\n", + __func__); + return 0; + } + + for_each_vstream_in_ctx(vs, xse->ctx) { + spin_lock(&vs->stream_lock); + while ((vsm = xsched_vsm_fetch_first(vs))) { + list_add_tail(&vsm->node, &xcu->vsm_list); + scheduled++; + xsched_dec_pending_kicks_xse(xse); + } + spin_unlock(&vs->stream_lock); + } + + /* + * Iterate over all vstreams in context: + * Set wr_cqe bit in last computing task in vsm_list + */ + for_each_vstream_in_ctx(vs, xse->ctx) { + list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) { + if (vsm->parent == vs) { + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_SET_NOTIFY}; + params.param_2 = &vsm->sqe; + xcu_sqe_op(¶ms); + break; + } + } + } + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + xse->total_scheduled += scheduled; + return scheduled; +} + +const struct xsched_class rt_xsched_class = { + .next = &fair_xsched_class, + .dequeue_ctx = dequeue_ctx_rt, + .enqueue_ctx = enqueue_ctx_rt, + .pick_next_ctx = pick_next_ctx_rt, + .put_prev_ctx = put_prev_ctx_rt, + .submit_prepare_ctx = submit_prepare_ctx_rt, + .select_work = select_work_rt, + .check_preempt = check_preempt_ctx_rt +}; diff --git a/kernel/xsched/vstream.c b/kernel/xsched/vstream.c new file mode 100644 index 0000000000000000000000000000000000000000..e47a117497efe9f07f46fccc66fd9f6f3235f287 --- /dev/null +++ b/kernel/xsched/vstream.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Vstream manage for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include +#include + +#ifdef CONFIG_XCU_VSTREAM +static int vstream_del(vstream_info_t *vstream, uint32_t vstream_id); +static int vstream_file_release(struct inode *inode, struct file *file); +static const struct file_operations vstreamfd_fops = { + .release = vstream_file_release, +}; + +static inline struct file *vstream_file_get(int vs_fd) +{ + return fget(vs_fd); +} + +static inline void vstream_file_put(struct file *vstream_file) +{ + fput(vstream_file); +} + +static int vstream_file_create(struct vstream_info *vs) +{ + return anon_inode_getfd("[vstreamfd]", &vstreamfd_fops, vs, + O_RDWR | O_CLOEXEC | O_NONBLOCK); +} + +static int vstream_destroy(vstream_info_t *vstream) +{ + int err; + struct xsched_context *ctx = NULL; + struct xsched_entity *xse = NULL; + + err = vstream_del(vstream, vstream->id); + if (err) + return err; + + xse = &vstream->ctx->xse; + ctx = vstream->ctx; + kref_put(&ctx->kref, xsched_task_free); + + return 0; +} + +static int vstream_file_release(struct inode *inode, struct file *file) +{ + vstream_info_t *vstream; + (void) inode; + + if (!file->private_data) + return 0; + + vstream = file->private_data; + return vstream_destroy(vstream); +} + +static void init_xsched_ctx(struct xsched_context *ctx, + const struct vstream_info *vs) +{ + ctx->tgid = vs->tgid; + ctx->fd = vs->fd; + ctx->dev_id = vs->dev_id; + kref_init(&ctx->kref); + + INIT_LIST_HEAD(&ctx->vstream_list); + INIT_LIST_HEAD(&ctx->ctx_node); + + spin_lock_init(&ctx->ctx_lock); + mutex_init(&ctx->ctx_mutex); +} + +/* Allocates a new xsched_context if a new vstream_info is bound + * to a device that no other vstream that is currently present + * is bound to. + */ +static int alloc_ctx_from_vstream(struct vstream_info *vstream_info, + struct xsched_context **ctx) +{ + struct xsched_cu *xcu = vstream_info->xcu; + + *ctx = ctx_find_by_tgid_and_xcu(vstream_info->tgid, xcu); + if (*ctx) + return 0; + + *ctx = kzalloc(sizeof(struct xsched_context), GFP_KERNEL); + if (!*ctx) { + XSCHED_ERR("Fail to alloc xsched context (tgid=%d) @ %s\n", + vstream_info->tgid, __func__); + return -ENOMEM; + } + + init_xsched_ctx(*ctx, vstream_info); + + if (xsched_ctx_init_xse(*ctx, vstream_info) != 0) { + XSCHED_ERR("Fail to initialize XSE for context @ %s\n", + __func__); + kfree(*ctx); + return -EINVAL; + } + list_add(&(*ctx)->ctx_node, &xcu->ctx_list); + ++xcu->nr_ctx; + + return 0; +} + +/* Bounds a new vstream_info object to a corresponding xsched context. */ +static int vstream_bind_to_ctx(struct vstream_info *vs) +{ + struct xsched_context *ctx = NULL; + struct xsched_cu *xcu = vs->xcu; + int err = 0; + + mutex_lock(&xcu->ctx_list_lock); + ctx = ctx_find_by_tgid_and_xcu(vs->tgid, xcu); + if (ctx) { + XSCHED_DEBUG("Ctx %d found @ %s\n", vs->tgid, __func__); + kref_get(&ctx->kref); + } else { + err = alloc_ctx_from_vstream(vs, &ctx); + if (err) + goto out_err; + } + + vs->ctx = ctx; + list_add(&vs->ctx_node, &vs->ctx->vstream_list); + +out_err: + mutex_unlock(&xcu->ctx_list_lock); + return err; +} + +static vstream_info_t *vstream_create(struct vstream_args *arg) +{ + struct vstream_info *vstream = NULL; + + vstream = kzalloc(sizeof(vstream_info_t), GFP_KERNEL); + if (!vstream) { + XSCHED_ERR("Failed to allocate vstream.\n"); + return NULL; + } + + vstream->dev_id = arg->dev_id; + vstream->channel_id = arg->channel_id; + vstream->kicks_count = 0; + vstream->xcu = NULL; + + INIT_LIST_HEAD(&vstream->ctx_node); + INIT_LIST_HEAD(&vstream->xcu_node); + INIT_LIST_HEAD(&vstream->metadata_list); + + spin_lock_init(&vstream->stream_lock); + + return vstream; +} + +static int vstream_add(vstream_info_t *vstream, uint32_t id) +{ + int err = 0; + struct xsched_cu *xcu = vstream->xcu; + + if (id >= MAX_VSTREAM_NUM) { + XSCHED_ERR("Vstream id=%u out of range @ %s.\n", + id, __func__); + return -EINVAL; + } + + mutex_lock(&xcu->vs_array_lock); + if (xcu->vs_array[id] != NULL) { + XSCHED_ERR("Vstream id=%u cell is busy.\n", id); + err = -EINVAL; + goto out_err; + } + xcu->vs_array[id] = vstream; + +out_err: + mutex_unlock(&xcu->vs_array_lock); + return err; +} + +static int vstream_del(vstream_info_t *vstream, uint32_t vstream_id) +{ + struct xsched_cu *xcu = vstream->xcu; + + if (vstream_id >= MAX_VSTREAM_NUM) { + XSCHED_ERR("Vstream id=%u out of range @ %s.\n", + vstream_id, __func__); + return -EINVAL; + } + + mutex_lock(&xcu->vs_array_lock); + xcu->vs_array[vstream_id] = NULL; + mutex_unlock(&xcu->vs_array_lock); + return 0; +} + +static vstream_info_t *vstream_get(struct xsched_cu *xcu, uint32_t vstream_id) +{ + vstream_info_t *vstream = NULL; + + if (vstream_id >= MAX_VSTREAM_NUM) { + XSCHED_ERR("Vstream id=%u out of range @ %s.\n", + vstream_id, __func__); + return NULL; + } + + mutex_lock(&xcu->vs_array_lock); + vstream = xcu->vs_array[vstream_id]; + mutex_unlock(&xcu->vs_array_lock); + + return vstream; +} + +static vstream_info_t * +vstream_get_by_user_stream_id(struct xsched_cu *xcu, uint32_t user_stream_id) +{ + int id; + static vstream_info_t *ret; + + mutex_lock(&xcu->vs_array_lock); + for (id = 0; id < MAX_VSTREAM_NUM; id++) { + if (xcu->vs_array[id] != NULL && + xcu->vs_array[id]->user_stream_id == user_stream_id) { + ret = xcu->vs_array[id]; + break; + } + } + mutex_unlock(&xcu->vs_array_lock); + return ret; +} + +static int sqcq_alloc(struct vstream_args *arg) +{ + vstream_alloc_args_t *va_args = &arg->va_args; + struct xsched_context *ctx = NULL; + struct xcu_op_handler_params params; + uint32_t logic_cq_id = 0; + vstream_info_t *vstream; + int ret = 0; + uint32_t tgid = 0; + uint32_t cq_id = 0; + uint32_t sq_id = 0; + + vstream = vstream_create(arg); + if (!vstream) + return -ENOSPC; + + vstream->fd = arg->fd; + vstream->task_type = arg->task_type; + + ret = vstream_bind_to_xcu(vstream); + if (ret) { + ret = -EINVAL; + goto out_err_vstream_free; + } + + /* Allocates vstream's SQ and CQ memory on a XCU for processing. */ + params.group = vstream->xcu->group; + params.fd = arg->fd; + params.payload = arg->payload; + params.param_1 = &tgid; + params.param_2 = &sq_id; + params.param_3 = &cq_id; + params.param_4 = &logic_cq_id; + ret = xcu_alloc(¶ms); + if (ret) { + XSCHED_ERR("Fail to allocate SQ/CQ memory to a vstream.\n"); + goto out_err_vstream_free; + } + + vstream->drv_ctx = params.param_5; + vstream->id = sq_id; + vstream->vcq_id = cq_id; + vstream->logic_vcq_id = logic_cq_id; + vstream->user_stream_id = va_args->user_stream_id; + vstream->tgid = tgid; + vstream->sqcq_type = va_args->type; + + ret = vstream_bind_to_ctx(vstream); + if (ret) + goto out_err_vstream_free; + + ctx = vstream->ctx; + ret = vstream_file_create(vstream); + if (ret < 0) { + XSCHED_ERR("Fail to alloc anon inode for vstream %u @ %s\n", + vstream->id, __func__); + goto out_err_vstream_free; + } + vstream->inode_fd = ret; + + /* Add new vstream to array after allocating inode */ + ret = vstream_add(vstream, vstream->id); + if (ret) + goto out_err_vstream_free; + + return 0; + +out_err_vstream_free: + kfree(vstream); + return ret; +} + +static int logic_cq_alloc(struct vstream_args *arg) +{ + int err = 0; + struct xcu_op_handler_params params; + vstream_info_t *vstream = NULL; + vstream_alloc_args_t *logic_cq_alloc_para = &arg->va_args; + struct xsched_cu *xcu_found = NULL; + uint32_t logic_cq_id = 0; + uint32_t type = XCU_TYPE_XPU; + + xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + vstream = vstream_get_by_user_stream_id(xcu_found, + logic_cq_alloc_para->user_stream_id); + if (vstream) + xcu_found = vstream->xcu; + params.group = xcu_found->group; + params.fd = arg->fd; + params.payload = arg->payload; + params.param_1 = &logic_cq_id; + err = xcu_logic_alloc(¶ms); + if (err) { + XSCHED_ERR("Fail to alloc logic CQ memory to a vstream.\n"); + return err; + } + if (vstream) + vstream->logic_vcq_id = logic_cq_id; + + return 0; +} + +int vstream_alloc(struct vstream_args *arg) +{ + vstream_alloc_args_t *va_args = &arg->va_args; + int ret; + + if (!va_args->type) + ret = sqcq_alloc(arg); + else + ret = logic_cq_alloc(arg); + + return ret; +} + +int vstream_free(struct vstream_args *arg) +{ + struct file *vs_file; + struct xcu_op_handler_params params; + struct xsched_cu *xcu_found; + uint32_t vstream_id = arg->sq_id; + uint32_t type = XCU_TYPE_XPU; + vstream_info_t *vstream = NULL; + int err = 0; + + xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + vstream = vstream_get(xcu_found, vstream_id); + if (!vstream) { + XSCHED_ERR("Fail to free NULL vstream, vstream id=%u\n", vstream_id); + return -EINVAL; + } + + params.group = vstream->xcu->group; + params.fd = arg->fd; + params.payload = arg->payload; + + vs_file = vstream_file_get(vstream->inode_fd); + if (vs_file) { + vs_file->private_data = NULL; + vstream_file_put(vs_file); + } + + /* After vstream_get(), destroying the vstream may not fail */ + vstream_destroy(vstream); + err = xcu_finish(¶ms); + if (err) + XSCHED_ERR("Fail to free vstream sqId=%u, cqId=%u.\n", + arg->sq_id, arg->cq_id); + + return err; +} + +int vstream_kick(struct vstream_args *arg) +{ + vstream_info_t *vstream; + struct xsched_cu *xcu = NULL; + struct xsched_entity *xse; + int err = 0; + uint32_t vstream_id = arg->sq_id; + uint32_t type = XCU_TYPE_XPU; + + xcu = xcu_find(&type, arg->dev_id, arg->channel_id); + if (!xcu) + return -EINVAL; + + /* Get vstream. */ + vstream = vstream_get(xcu, vstream_id); + if (!vstream || !vstream->ctx) { + XSCHED_ERR("Vstream NULL or doesn't have a context. " + "vstream_id=%u, dev_id=%u\n", vstream_id, arg->dev_id); + return -EINVAL; + } + + xse = &vstream->ctx->xse; + XSCHED_DEBUG("New kick on xse %d @ %s\n", xse->tgid, __func__); + + do { + mutex_lock(&xcu->xcu_lock); + spin_lock(&vstream->stream_lock); + + /* Adding kick metadata. */ + err = xsched_vsm_add_tail(vstream, arg); + if (err == -EBUSY) { + spin_unlock(&vstream->stream_lock); + mutex_unlock(&xcu->xcu_lock); + + /* Retry after a while */ + usleep_range(100, 200); + continue; + } + + /* Don't forget to unlock */ + if (err) { + XSCHED_ERR("Fail to add kick metadata to vs %u @ %s\n", + vstream->id, __func__); + break; + } + + enqueue_ctx(xse, xcu); + /* Increasing a total amount of kicks on an CU to which this + * context is attached to based on sched_class. + */ + xsched_inc_pending_kicks_xse(&vstream->ctx->xse); + } while (err == -EBUSY); + + spin_unlock(&vstream->stream_lock); + mutex_unlock(&xcu->xcu_lock); + if (!err) + wake_up_interruptible(&xcu->wq_xcu_idle); + + return err; +} + +/* + * vstream_manage_cmd table + */ +static vstream_manage_t(*vstream_command_table[MAX_COMMAND + 1]) = { + vstream_alloc, // VSTREAM_ALLOC + vstream_free, // VSTREAM_FREE + vstream_kick, // VSTREAM_KICK + NULL // MAX_COMMAND +}; + +SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd) +{ + int res = 0; + struct vstream_args vstream_arg; + + if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) { + XSCHED_ERR("copy_from_user failed\n"); + return -EFAULT; + } + + res = vstream_command_table[cmd](&vstream_arg); + if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) { + XSCHED_ERR("copy_to_user failed\n"); + return -EFAULT; + } + + XSCHED_DEBUG("vstream_manage: cmd %d\n", cmd); + return res; +} +#else +SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd) +{ + return 0; +} +#endif