From 9d1f1cbdbd7c2b1b49124bf73435e7976c6118ee Mon Sep 17 00:00:00 2001 From: Kai Date: Tue, 30 Sep 2025 15:24:41 +0800 Subject: [PATCH] libXSched: Implement libucc shared library hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB -------------------------------- Implement interception of NPU IOCTL for XSched Signed-off-by: Hui Tang Signed-off-by: Liu Kai --- .gitignore | 6 ++ Makefile | 16 ++++ README.en.md | 36 -------- README.en_US.md | 40 +++++++++ README.md | 77 ++++++++-------- include/ucc_engine.h | 53 +++++++++++ src/ascend_hal_interceptor.c | 166 +++++++++++++++++++++++++++++++++++ src/fake_ioctl.c | 126 ++++++++++++++++++++++++++ src/vstream.c | 15 ++++ 9 files changed, 462 insertions(+), 73 deletions(-) create mode 100644 .gitignore create mode 100644 Makefile delete mode 100644 README.en.md create mode 100644 README.en_US.md create mode 100644 include/ucc_engine.h create mode 100644 src/ascend_hal_interceptor.c create mode 100644 src/fake_ioctl.c create mode 100644 src/vstream.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f783a3c --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.so* +*.o +*.swp +*.json* +kernel_meta/ +*.patch diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9961d8c --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +target = libucc_engine.so +cc = gcc +srcs = $(wildcard ./src/*.c) +objs = $(patsubst %c, %o, $(srcs)) +INCLUDES = -I./include/ -I/usr/local/Ascend/driver/kernel/inc/driver/ +CFLAGS = -ldl -shared -fPIC + +all: $(target) +$(target): $(objs) + $(cc) $(CFLAGS) $(objs) -o $@ + +%.o: %.c + $(cc) -c $(INCLUDES) $(CFLAGS) $^ -o $@ + +clean: + rm -f $(target) $(objs) diff --git a/README.en.md b/README.en.md deleted file mode 100644 index 98e8b1e..0000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# libXSched - -#### Description -A user space component provides seamless support for various XPUs runtimes to use XSched scheduling framework. - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.en_US.md b/README.en_US.md new file mode 100644 index 0000000..da1f4d2 --- /dev/null +++ b/README.en_US.md @@ -0,0 +1,40 @@ +# libucc + +
+ +**English** | [中文简体](./README.md) + +
+ +## description + +Encapsulates the vstream_manage syscall and serves as the unified entry for vstream management of XPU card. + +## build + +Copy the xcu_vstream.h header file from `include/uapi/linux` in the XSched kernel source to `/usr/include/linux`. + +```bash +cp xcu_vstream.h /usr/include/linux +``` + +Type the following command (you'll probably need sudo rights) to generate the shared library libucc_engine.so + +``` +make clean && make +``` + +## Instruction + +Option 1: Load the shared library at runtime. This only affects the current AI model. + +```bash +LD_PRELOAD= +``` + +Option 2: Configure environment variables. This affects the current shell session. + +```bash +export LD_PRELOAD= + +``` \ No newline at end of file diff --git a/README.md b/README.md index c1e53ca..4828a8e 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,40 @@ -# libXSched - -#### 介绍 -A user space component provides seamless support for various XPUs runtimes to use XSched scheduling framework. - -#### 软件架构 -软件架构说明 - - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx - -#### 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request - - -#### 特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 -5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) +# libucc + +
+ +[English](./README.en_US.md) | **中文简体** + +
+ +## 介绍 + +拦截 npu 的 ioctl 接口并通过 vstream_manage 系统调用转发给 xsched 内核 + +## 编译 + +准备头文件 xcu_vstream.h,在 XSched 内核 `include/uapi/linux` 目录下,复制头文件到 `/usr/include/linux` + +```bash +cp xcu_vstream.h /usr/include/linux +``` + +执行编译命令,生成 libucc_engine.so 共享库 + +```bash +make clean && make +``` + +## 使用 + +方式一:运行模型时,加载共享库,进入当前模型有效 + +```bash +LD_PRELOAD= +``` + +方式二:配置环境变量,对当前 shell 会话有效 + +```bash +export LD_PRELOAD= + +``` \ No newline at end of file diff --git a/include/ucc_engine.h b/include/ucc_engine.h new file mode 100644 index 0000000..ee7cbd2 --- /dev/null +++ b/include/ucc_engine.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UCC_ENGINE_H +#define _UCC_ENGINE_H + +#ifndef __NR_vstream_manage +/* default syscall number for xsched */ +#define __NR_vstream_manage 456 +#endif + +#include +#include +#include + +/** + * NPU IOCTL Interface Definitions + * + * IMPORTANT: Review all defines thoroughly before implementing interception + * of the NPU IOCTL interface. + * + * Source: driver/kernel/rms/trs_drv/trs_core/trs_cmd.h + */ +#define TRS_SQCQ_ALLOC _IOWR('X', 15, struct halSqCqInputInfo) +#define TRS_SQCQ_FREE _IOW('X', 16, struct halSqCqFreeInfo) +#define TRS_SQCQ_SEND _IOW('X', 19, struct halTaskSendInfo) + +typedef enum { + ASCEND = 0, + DEV_TYPE_MAX +} vstream_device_t; + +/** + * VSTREAM_ALLOC - alloc a vstream + * @args: + * Include address, size, vstream_id pointer + * callback stream address, size and id are only for Ascend + * @describe: + * Create a queue in userspace and pass its address and size to kernel. + * Kernel will map the address to physical memory and return vstream_id. + * + * VSTREAM_FREE - free a vstream + * @args: + * Include vstream_id + * + * VSTREAM_KICK - notify there are tasks in vstream + * @args: + * Include vstream_id, tail index + * @describe: + * Vstream should be used as a circular queue. Put tasks in vstream and + * update tail index. Kernel will schedule these tasks to be executed. + */ +int handle_syscall(int device_id, vstream_args_t *vargs, vstream_command_t kcmd); + +#endif /* _UCC_ENGINE_H */ diff --git a/src/ascend_hal_interceptor.c b/src/ascend_hal_interceptor.c new file mode 100644 index 0000000..4026ab3 --- /dev/null +++ b/src/ascend_hal_interceptor.c @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE +#include +#include +#include +#include + +typedef int (*ioctl_type)(int, unsigned long, void*); +typedef int (*uda_get_udevid_by_devid_type)(uint32_t, uint32_t*); +typedef drvError_t (*hal_func_3args)(uint32_t, void*, void*); +typedef drvError_t (*hal_func_2args)(uint32_t, void*); + +extern int fake_ioctl(int fd, unsigned long cmd, void *arg, uint32_t dev_id, int *is_vstream); + +static ioctl_type orig_ioctl = NULL; +static uda_get_udevid_by_devid_type orig_uda_get_udevid_by_devid = NULL; +static void* libascend_hal = NULL; +static void* libc = NULL; + +/* All hal function to hook */ +static struct { + hal_func_3args halSqMemGet; + hal_func_3args halSqCqAllocate; + hal_func_2args halSqMsgSend; + hal_func_2args halSqTaskSend; + hal_func_2args halSqCqFree; +} hal_funcs; + +int dev_id_saved = -1; +static volatile int hooks_initialized = 0; + +/* Helper function to load symbol with error checking */ +static void* load_symbol(void* handle, const char* func_name) +{ + void* sym = dlsym(handle, func_name); + if (!sym) + printf("LD_PRELOAD failed: couldn't load %s\n", func_name); + + return sym; +} + +int init_hooks(void) +{ + if (hooks_initialized) + return 0; + + // Load libc + libc = dlopen("libc.so.6", RTLD_LAZY | RTLD_NOLOAD) ?: dlopen("libc.so.6", RTLD_LAZY); + if (!libc) { + printf("LD_PRELOAD failed: couldn't load libc\n"); + return -1; + } + + // Load libascend_hal + libascend_hal = dlopen("libascend_hal.so", RTLD_LAZY | RTLD_NOLOAD) ?: dlopen("libascend_hal.so", RTLD_LAZY); + if (!libascend_hal) { + printf("LD_PRELOAD failed: couldn't load libascend_hal\n"); + return -1; + } + + // Load all required symbols + orig_uda_get_udevid_by_devid = (uda_get_udevid_by_devid_type) + load_symbol(libascend_hal, "uda_get_udevid_by_devid"); + orig_ioctl = (ioctl_type)load_symbol(libc, "ioctl"); + + hal_funcs.halSqMemGet = (hal_func_3args)load_symbol(libascend_hal, "halSqMemGet"); + hal_funcs.halSqCqAllocate = (hal_func_3args)load_symbol(libascend_hal, "halSqCqAllocate"); + hal_funcs.halSqMsgSend = (hal_func_2args)load_symbol(libascend_hal, "halSqMsgSend"); + hal_funcs.halSqTaskSend = (hal_func_2args)load_symbol(libascend_hal, "halSqTaskSend"); + hal_funcs.halSqCqFree = (hal_func_2args)load_symbol(libascend_hal, "halSqCqFree"); + + hooks_initialized = 1; + return 0; +} + +static int ensure_initialized(void) +{ + if (!hooks_initialized) + return init_hooks(); + + return 0; +} + +/* Mapping dev id */ +static int map_device_id(uint32_t devId) +{ + uint32_t phy_devId = devId; + + if (orig_uda_get_udevid_by_devid) { +#ifdef __linux__ + if (orig_uda_get_udevid_by_devid(devId, &phy_devId) != DRV_ERROR_NONE) { + printf("get phys failed, devId(%u)\n", devId); + return -1; + } + dev_id_saved = phy_devId; +#endif + } + + return 0; +} + +/* HAL wrapper */ +#define DEFINE_HAL_WRAPPER(func_name, ...) \ + do { \ + if (ensure_initialized() != 0) { \ + printf("ERROR: Initialization failed in %s\n", #func_name); \ + return -1; \ + } \ + \ + int ret = map_device_id(devId); \ + if (ret < 0) \ + return DRV_ERROR_INVALID_DEVICE; \ + \ + if (!hal_funcs.func_name) { \ + printf("LD_PRELOAD failed: %s not available\n", #func_name); \ + return -1; \ + } \ + return hal_funcs.func_name(devId, ##__VA_ARGS__); \ + } while (0) + +drvError_t halSqMemGet(uint32_t devId, struct halSqMemGetInput* in, struct halSqMemGetOutput* out) +{ + DEFINE_HAL_WRAPPER(halSqMemGet, in, out); +} + +drvError_t halSqCqAllocate(uint32_t devId, struct halSqCqInputInfo* in, struct halSqCqOutputInfo* out) +{ + DEFINE_HAL_WRAPPER(halSqCqAllocate, in, out); +} + +drvError_t halSqMsgSend(uint32_t devId, struct halSqMsgInfo* info) +{ + DEFINE_HAL_WRAPPER(halSqMsgSend, info); +} + +drvError_t halSqTaskSend(uint32_t devId, struct halTaskSendInfo* info) +{ + DEFINE_HAL_WRAPPER(halSqTaskSend, info); +} + +drvError_t halSqCqFree(uint32_t devId, struct halSqCqFreeInfo* info) +{ + DEFINE_HAL_WRAPPER(halSqCqFree, info); +} + +/* Intercepting the ioctl interface in libc */ +int ioctl(int fd, unsigned long cmd, void *ioctl_arg) +{ + int is_vstream = 0, ret; + + if (ensure_initialized() != 0) { + printf("ERROR: Initialization failed in %s\n", __func__); + return -1; + } + + if (!orig_ioctl) { + printf("Original ioctl not available\n"); + return -1; + } + + ret = fake_ioctl(fd, cmd, ioctl_arg, dev_id_saved, &is_vstream); + if (is_vstream) + return ret; + + return orig_ioctl(fd, cmd, ioctl_arg); +} diff --git a/src/fake_ioctl.c b/src/fake_ioctl.c new file mode 100644 index 0000000..5c1ebce --- /dev/null +++ b/src/fake_ioctl.c @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE + +#include +#include +#include + +static int trs_sqcq_alloc(void *arg, uint32_t dev_id, int fd, int *is_vstream) +{ + struct halSqCqInputInfo *input = (struct halSqCqInputInfo *)arg; + vstream_args_t v_arg = {0}; + int ret; + + if (input->type != DRV_NORMAL_TYPE && input->type != DRV_LOGIC_TYPE) { + *is_vstream = 0; + return 0; + } + + v_arg.fd = fd; + v_arg.dev_id = dev_id; + v_arg.channel_id = input->tsId; + v_arg.payload_size = sizeof(struct halSqCqInputInfo); + v_arg.va_args.user_stream_id = input->info[0]; + v_arg.va_args.type = input->type; + + if (v_arg.payload_size > PAYLOAD_SIZE_MAX) { + printf("payload size %u overflow\n", v_arg.payload_size); + return -1; + } + + memcpy(v_arg.payload, input, sizeof(*input)); + ret = handle_syscall(ASCEND, &v_arg, VSTREAM_ALLOC); + if (ret != 0) { + printf("vstream_alloc failed (ret=%d)\n", ret); + return ret; + } + + memcpy(input, v_arg.payload, sizeof(*input)); + return 0; +} + +static int trs_sqcq_free(void *arg, uint32_t dev_id, int fd, int *is_vstream) +{ + struct halSqCqFreeInfo *input = (struct halSqCqFreeInfo *)arg; + vstream_args_t v_arg = {0}; + int ret; + + if (input->type != DRV_NORMAL_TYPE) { + *is_vstream = 0; + return 0; + } + + v_arg.fd = fd; + v_arg.dev_id = dev_id; + v_arg.channel_id = input->tsId; + v_arg.sq_id = input->sqId; + v_arg.cq_id = input->cqId; + + v_arg.payload_size = sizeof(struct halSqCqFreeInfo); + if (v_arg.payload_size > PAYLOAD_SIZE_MAX) { + printf("payload size %u overflow\n", v_arg.payload_size); + return -1; + } + + memcpy(v_arg.payload, input, sizeof(*input)); + ret = handle_syscall(ASCEND, &v_arg, VSTREAM_FREE); + if (ret) { + printf("vstream_free failed (ret=%d)\n", ret); + return ret; + } + + return 0; +} + +static int trs_sqcq_send(void *arg, uint32_t dev_id, int fd, int *is_vstream) +{ + struct halTaskSendInfo *input = (struct halTaskSendInfo *)arg; + vstream_args_t v_arg = {0}; + int ret; + + v_arg.dev_id = dev_id; + v_arg.channel_id = input->tsId; + v_arg.sq_id = input->sqId; + v_arg.vk_args.sqe_num = input->sqe_num; + v_arg.vk_args.timeout = input->timeout; + + memcpy(v_arg.vk_args.sqe, input->sqe_addr, XCU_SQE_SIZE_MAX); + ret = handle_syscall(ASCEND, &v_arg, VSTREAM_KICK); + if (ret) { + printf("vstream_kick failed (ret=%d)\n", ret); + return ret; + } + + return 0; +} + +int fake_ioctl(int fd, unsigned long cmd, void *arg, uint32_t dev_id, int *is_vstream) +{ + int ret; + + if (dev_id < 0) { + *is_vstream = 0; + return 0; + } + + *is_vstream = 1; + switch (cmd) { + case TRS_SQCQ_ALLOC: + ret = trs_sqcq_alloc(arg, dev_id, fd, is_vstream); + break; + case TRS_SQCQ_FREE: + ret = trs_sqcq_free(arg, dev_id, fd, is_vstream); + break; + case TRS_SQCQ_SEND: + ret = trs_sqcq_send(arg, dev_id, fd, is_vstream); + break; + default: + *is_vstream = 0; + return 0; + } + + /* Fall back to the original driver if interception fails. */ + if (ret != 0) + *is_vstream = 0; + return ret; +} diff --git a/src/vstream.c b/src/vstream.c new file mode 100644 index 0000000..f664f08 --- /dev/null +++ b/src/vstream.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include + +int handle_syscall(int device_id, vstream_args_t* vargs, vstream_command_t kcmd) +{ + switch (device_id) { + case ASCEND: + return syscall(__NR_vstream_manage, vargs, kcmd); + default: + return -ENODEV; + } +} -- Gitee