From 18fc2cc0ec05d30021c39548b0fea0538d49ac5f Mon Sep 17 00:00:00 2001 From: dayschan Date: Tue, 15 Jul 2025 14:24:23 +0800 Subject: [PATCH] modify custom_ops documents of vllm-ms --- .../developer_guide/operations/custom_ops.md | 175 +++++++++++++++++ .../developer_guide/operations/npu_ops.md | 107 ----------- docs/vllm_mindspore/docs/source_en/index.rst | 2 +- .../developer_guide/operations/custom_ops.md | 177 ++++++++++++++++++ .../developer_guide/operations/npu_ops.md | 109 ----------- .../docs/source_zh_cn/index.rst | 2 +- 6 files changed, 354 insertions(+), 218 deletions(-) create mode 100644 docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md delete mode 100644 docs/vllm_mindspore/docs/source_en/developer_guide/operations/npu_ops.md create mode 100644 docs/vllm_mindspore/docs/source_zh_cn/developer_guide/operations/custom_ops.md delete mode 100644 docs/vllm_mindspore/docs/source_zh_cn/developer_guide/operations/npu_ops.md diff --git a/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md b/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md new file mode 100644 index 0000000000..9f29cb0df6 --- /dev/null +++ b/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md @@ -0,0 +1,175 @@ +# Custom Operator Integration + +[![View Source On Gitee](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_source_en.svg)](https://gitee.com/mindspore/docs/blob/master/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md) + +When the built-in operators do not meet your requirements, you can use MindSpore's custom operator functionality to integrate your operators. + +This document would introduce how to integrate a new custom operator into the vLLM MindSpore project, with the **`advance_step_flashattn`** operator as an example. The focus here is on the integration process into vLLM MindSpore. For the details of custom operator development, please refer to the official MindSpore tutorial: [CustomOpBuilder-Based Custom Operators](https://www.mindspore.cn/tutorials/en/master/custom_program/operation/op_customopbuilder.html), and for AscendC operator development, see the official Ascend documentation: [Ascend C Operator Development](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/developmentguide/opdevg/Ascendcopdevg/atlas_ascendc_10_0001.html). + +**Note: Currently, custom operators in vLLM MindSpore are only supported in PyNative Mode.** + +## File Structure + +The directory `csrc` contains declaration and implementation of operations: + +```text +vllm-mindspore/ +├── csrc/ +│ ├── CMakeLists.txt // Operator build script +│ ├── ascendc/ +│ │ ├── CMakeLists.txt // AscendC operator build script +│ │ ├── adv_step_flash.h // AscendC AdvanceStepFlashattn operator declaration +│ │ ├── adv_step_flash.c // AscendC AdvanceStepFlashattn operator implementation +│ │ └── ... +│ └── module/ +│ ├── module.h // Common module registration header +│ ├── module.cpp // Common module registration implementation +│ ├── adv_step_flash.cpp // Integration layer code (Python interface registration) +│ └── ... +└── vllm_mindspore/ + └── _custom_ops.py // Wrapper for custom operator call interface +``` + +- **`csrc/ascendc/`**: Contains AscendC custom operator implementation code. +- **`csrc/module/`**: Contains operator integration layer code, including common module registration (`module.h`, `module.cpp`) and operator-specific integration (e.g., `adv_step_flash.cpp`). + +## Integration Process + +To integrate a custom operator, user need to create [Operator Interface Declaration](#operator-interface-declaration), [Operator Implementation](#operator-implementation) and [Operator Integration](#operator-integration) in the directory `ops/ascendc/`. After the initial development and integration of the custom operator, user can add [Operator Interface](#add-operator-call-interface) and do [Operator Compilation and Testing](#operator-compilation-and-testing) after declaration and implementation. + +### Operator Interface Declaration + +Create a header file in `csrc/ascendc/` to declare the operator function and related interfaces. Refer to [adv_step_flash.cpp](https://gitee.com/mindspore/vllm-mindspore/blob/master/csrc/ascendc/adv_step_flash.h): + +```cpp +#ifndef VLLM_MINDSPORE_CSRC_ASCENDC_ADV_STEP_FLASH_H +#define VLLM_MINDSPORE_CSRC_ASCENDC_ADV_STEP_FLASH_H + +extern void AdvStepFlashKernelEntry(uint32_t blockDims, void *l2ctrl, void *aclStream, uint8_t *sampledTokenIds, + uint8_t *blockTables, uint8_t *seqLensInput, uint8_t *inputTokens, + uint8_t *inputPositions, uint8_t *seqLensOut, uint8_t *slotMapping, + int32_t num_seqs, int32_t block_size, int32_t block_tables_stride); + +#endif // VLLM_MINDSPORE_CSRC_ASCENDC_ADV_STEP_FLASH_H +``` + +### Operator Implementation + +Create an implementation file in `csrc/ascendc/` for the core logic. Refer to [adv_step_flash.cpp](https://gitee.com/mindspore/vllm-mindspore/blob/master/csrc/ascendc/adv_step_flash.c): + +```cpp +#include "kernel_operator.h" + +extern "C" __global__ __aicore__ void adv_step_flash_impl(GM_ADDR sampledTokenIds, GM_ADDR blockTables, + GM_ADDR seqLensInput, GM_ADDR inputTokens, + GM_ADDR inputPositions, GM_ADDR seqLensOut, + GM_ADDR slotMapping, int32_t num_seqs, int32_t block_size, + int32_t block_tables_stride) { + // AscendC operator implementation +} + +#ifndef __CCE_KT_TEST__ +void AdvStepFlashKernelEntry(uint32_t blockDims, void *l2ctrl, void *aclStream, uint8_t *sampledTokenIds, + uint8_t *blockTables, uint8_t *seqLensInput, uint8_t *inputTokens, uint8_t *inputPositions, + uint8_t *seqLensOut, uint8_t *slotMapping, int32_t num_seqs, int32_t block_size, + int32_t block_tables_stride) { + adv_step_flash_impl<<>>(sampledTokenIds, blockTables, seqLensInput, inputTokens, + inputPositions, seqLensOut, slotMapping, num_seqs, block_size, + block_tables_stride); +} +#endif +``` + +### Operator Integration + +Create an integration file in `csrc/module/`. Refer to [adv_step_flash.cpp](https://gitee.com/mindspore/vllm-mindspore/blob/master/csrc/module/adv_step_flash.cpp): + +```cpp +#include "ms_extension/api.h" +#include "ascendc/adv_step_flash.h" +#include "module/module.h" + +auto pyboost_adv_step_flash(int32_t num_seqs, int32_t num_queries, int32_t block_size, + ms::Tensor input_tokens, + ms::Tensor sampled_token_ids, + ms::Tensor input_positions, + ms::Tensor seq_lens, + ms::Tensor slot_mapping, + ms::Tensor block_tables) { + // Use ms::PyboostRunner to call your operator +} + +VLLM_MS_EXTENSION_MODULE(m) { + m.def("advance_step_flashattn", &pyboost_adv_step_flash, "advance_step_flashattn", pybind11::arg("num_seqs"), + pybind11::arg("num_queries"), pybind11::arg("block_size"), pybind11::arg("input_tokens"), + pybind11::arg("sampled_token_ids"), pybind11::arg("input_positions"), pybind11::arg("seq_lens"), + pybind11::arg("slot_mapping"), pybind11::arg("block_tables")); +} +``` + +In the above, the first parameter `"advance_step_flashattn"` in `m.def()` is the Python interface name for the operator. + +The `module.h` and `module.cpp` files create the Python module for the operator based on pybind11. Since only one `PYBIND11_MODULE` is allowed per dynamic library, and to allow users to complete operator integration in a single file, vLLM MindSpore provides a new registration macro `VLLM_MS_EXTENSION_MODULE`. When the custom operator dynamic library is loaded, all operator interfaces will be automatically registered into the same Python module. + +### Operator Interface + +The custom operator in vLLM MindSpore is compiled into `_C_ops.so`. For convenient calls, user can add a call interface in `vllm_mindspore/_custom_ops.py`. If extra adaptation is needed before or after the operator call, user can implement it in this interface. + +```python +def advance_step_flashattn(num_seqs: int, num_queries: int, block_size: int, + input_tokens: torch.Tensor, + sampled_token_ids: torch.Tensor, + input_positions: torch.Tensor, + seq_lens: torch.Tensor, + slot_mapping: torch.Tensor, + block_tables: torch.Tensor) -> None: + """Advance a step on Ascend for existing inputs for a multi-step runner""" + from vllm_mindspore import _C_ops as c_ops + c_ops.advance_step_flashattn(num_seqs=num_seqs, + num_queries=num_queries, + block_size=block_size, + input_tokens=input_tokens, + sampled_token_ids=sampled_token_ids, + input_positions=input_positions, + seq_lens=seq_lens, + slot_mapping=slot_mapping, + block_tables=block_tables) +``` + +Here, importing `_C_ops` allows user to use the Python module for the custom operator. It is recommended to import it right before calling, so it is not imported unnecessarily. + +### Operator Compilation and Testing + +1. **Code Integration**: Merge the code into the vLLM MindSpore project. +2. **Project Compilation**: Run `pip install .` in vllm-mindspore to build and install vLLM MindSpore. +3. **Operator Testing**: Call the operator interface via `_custom_ops`. Refer to testcase [test_custom_advstepflash.py](https://gitee.com/mindspore/vllm-mindspore/blob/master/tests/st/python/test_custom_advstepflash.py): + +```python +from vllm_mindspore import _custom_ops as custom_ops + +custom_ops.advance_step_flashattn(...) +``` + +## Custom Operator Compilation Project + +Currently, MindSpore provides only a [CustomOpBuilder](https://www.mindspore.cn/docs/en/master/api_python/ops/mindspore.ops.CustomOpBuilder.html) interface for online compilation of custom operators, with default compilation and linking options built in. vLLM MindSpore integrates operators based on MindSpore’s custom operator feature and compiles them into a dynamic library for package release. The following introduces the build process: + +### Extension Module + +In `setup.py`, vLLM MindSpore adds a `vllm_mindspore._C_ops` extension and the corresponding build module: + +```python +ext_modules = [Extension("vllm_mindspore._C_ops", sources=[])], +cmdclass = {"build_ext": CustomBuildExt}, +``` + +There is no need to specify `sources` here because vLLM MindSpore triggers the operator build via CMake, which automatically collects the source files. + +### Building Process + +1. `CustomBuildExt` calls CMake to execute `csrc/CMakeLists.txt`, passing necessary environment variables to trigger operator build. +2. Through `ascendc/CMakeLists.txt`, the AscendC compiler is called to compile the source code in the `ascendc` directory. The static library `ascendc_kernels_npu.a` is generated. +3. Recursively collect the list of cpp source files `SRC_FILES`. +4. Generate a temporary script `build_custom_with_ms.py`, which calls `mindspore.CustomOpBuilder` to build the operator interfaces. The script also contains the source file list, header paths, static library paths, etc. +5. Use CMake's `add_custom_target` command to call the Python script to build the custom operator, generating `_C_ops.so`. +6. Rename `_C_ops.so` to the standard Python package extension module name, such as `_C_ops.cpython-39-aarch64-linux-gnu.so`. diff --git a/docs/vllm_mindspore/docs/source_en/developer_guide/operations/npu_ops.md b/docs/vllm_mindspore/docs/source_en/developer_guide/operations/npu_ops.md deleted file mode 100644 index 3e79c32712..0000000000 --- a/docs/vllm_mindspore/docs/source_en/developer_guide/operations/npu_ops.md +++ /dev/null @@ -1,107 +0,0 @@ -# Custom Operator Integration - -[![View Source On Gitee](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_source_en.svg)](https://gitee.com/mindspore/docs/blob/master/docs/vllm_mindspore/docs/source_en/developer_guide/operations/npu_ops.md) - -This document would introduce how to integrate a new custom operator into the vLLM MindSpore project, with the **`adv_step_flash`** operator as an example. The following sections would focus on the integration process, and user can refer to operator implementation introduction in official MindSpore tutorial: [Dynamic Graph Custom Operator Integration](https://www.mindspore.cn/tutorials/en/master/custom_program/operation/op_customopbuilder.html). - -For development, additional features can be extended based on project requirements. Implementation details can be referenced from [MindSpore Custom Operator Implementation](https://www.mindspore.cn/tutorials/en/master/custom_program/operation/op_customopbuilder.html). - -## File Structure - -The directory `vllm_mindspore/ops` contains and declaration and implementation of operations: - -```text -vllm_mindspore/ops/ -├── ascendc/ -│ ├── adv_step_flash.h // AscendC AdvStepFlash operator declaration -│ ├── adv_step_flash.c // AscendC AdvStepFlash operator implementation -│ └── ... -├── module/ -│ ├── module.h // Common module registration header -│ ├── module.cpp // Common module registration implementation -│ ├── adv_step_flash.cpp // Integration layer code (Python interface registration) -│ └── ... -``` - -- **`ops/ascendc/`**: Contains AscendC custom operator implementation code. -- **`ops/module/`**: Contains operator integration layer code, including common module registration (`module.h`, `module.cpp`) and operator-specific integration (e.g., `adv_step_flash.cpp`). - -## Integration Process - -To integrate a custom operator, user need to create [Operator Interface Declaration](#operator-interface-declaration), [Operator Implementation](#operator-implementation) and [Operator Integration](#operator-integration) in the directory `ops/ascendc/`. And do [Operator Compilation and Testing](#operator-compilation-and-testing) after declaration and implementation. - -### Operator Interface Declaration - -Create a header file (e.g., `my_custom_op.h`) in `ops/ascendc/` to declare the operator function and related interfaces: - -```cpp -#ifndef VLLM_MINDSPORE_OPS_ASCENDC_MY_CUSTOM_OP_H -#define VLLM_MINDSPORE_OPS_ASCENDC_MY_CUSTOM_OP_H - -extern void MyCustomOpKernelEntry(uint32_t blockDims, void *l2ctrl, void *aclStream, - uint8_t *input, uint8_t *output, int32_t param1, int32_t param2); - -#endif // VLLM_MINDSPORE_OPS_ASCENDC_MY_CUSTOM_OP_H -``` - -### Operator Implementation - -Create an implementation file (e.g., `my_custom_op.c`) in `ops/ascendc/` for the core logic: - -```cpp -#include "my_custom_op.h" -#include "kernel_operator.h" - -extern "C" __global__ __aicore__ void my_custom_op_impl(GM_ADDR input, GM_ADDR output, - int32_t param1, int32_t param2) { - // AscendC operation implement -} - -#ifndef __CCE_KT_TEST__ -void MyCustomOpKernelEntry(uint32_t blockDims, void *l2ctrl, void *aclStream, - uint8_t *input, uint8_t *output, int32_t param1, int32_t param2) { - my_custom_op_impl<<>>(input, output, param1, param2); -} -#endif -``` - -### Operator Integration - -Create an integration file (e.g., `my_custom_op.cpp`) in `module/`. User can refer to `adv_step_flash.cpp` for more details about the integration: - -```cpp -#include "ms_extension.h" -#include "ascendc/my_custom_op.h" -#include "module/module.h" - -using BaseTensorPtr = mindspore::tensor::BaseTensorPtr; - -void MyCustomOpPythonInterface(int32_t param1, int32_t param2, - BaseTensorPtr input, BaseTensorPtr output) { - ... -} - -MS_EXTENSION_MODULE(my_custom_op) { - m.def("my_custom_op", &MyCustomOpPythonInterface, "My custom operator", - pybind11::arg("param1"), pybind11::arg("param2"), - pybind11::arg("input"), pybind11::arg("output")); -} -``` - -### Operator Compilation and Testing - -1. **Code Integration**: Merge the code into the vLLM MindSpore project. -2. **Project Compilation**: run `pip install .` in vllm-mindspore to build and install vLLM MindSpore. -3. **Operator Testing**: Invoke the operator in Python: - - ```python - from vllm_mindspore import npu_ops - import numpy as np - import mindspore as ms - - input = ms.Tensor(np.array([1, 2, 3], dtype=np.int32)) - output = ms.Tensor(np.zeros_like(input)) - - npu_ops.my_custom_op(10, 20, input, output) - print("Output:", output) - ``` diff --git a/docs/vllm_mindspore/docs/source_en/index.rst b/docs/vllm_mindspore/docs/source_en/index.rst index 3b83b59792..5b4871aebf 100644 --- a/docs/vllm_mindspore/docs/source_en/index.rst +++ b/docs/vllm_mindspore/docs/source_en/index.rst @@ -124,7 +124,7 @@ Apache License 2.0, as found in the `LICENSE >>(sampledTokenIds, blockTables, seqLensInput, inputTokens, + inputPositions, seqLensOut, slotMapping, num_seqs, block_size, + block_tables_stride); +} +#endif +``` + +### 算子接入 + +在 `csrc/module/` 目录下创建一个新的接入文件(如 `adv_step_flash.cpp`),内容参考 [adv_step_flash.cpp](https://gitee.com/mindspore/vllm-mindspore/blob/master/csrc/module/adv_step_flash.cpp): + +```cpp +#include "ms_extension/api.h" +#include "ascendc/adv_step_flash.h" +#include "module/module.h" + +auto pyboost_adv_step_flash(int32_t num_seqs, int32_t num_queries, int32_t block_size, + ms::Tensor input_tokens, + ms::Tensor sampled_token_ids, + ms::Tensor input_positions, + ms::Tensor seq_lens, + ms::Tensor slot_mapping, + ms::Tensor block_tables) { + // 使用 ms::PyboostRunner 调用你的算子 +} + +VLLM_MS_EXTENSION_MODULE(m) { + m.def("advance_step_flashattn", &pyboost_adv_step_flash, "advance_step_flashattn", pybind11::arg("num_seqs"), + pybind11::arg("num_queries"), pybind11::arg("block_size"), pybind11::arg("input_tokens"), + pybind11::arg("sampled_token_ids"), pybind11::arg("input_positions"), pybind11::arg("seq_lens"), + pybind11::arg("slot_mapping"), pybind11::arg("block_tables")); +} +``` + +上面`m.def()`接口的第一个参数`"advance_step_flashattn"`就是算子的Python接口名。 + +`module.h` 和 `module.cpp` 文件的作用是基于pybind11创建算子的Python模块。因为一个动态库内只能有一个 `PYBIND11_MODULE` ,为了让用户可以在一个文件内完成算子接入工作,vLLM MindSpore提供了一个新的注册接口 `VLLM_MS_EXTENSION_MODULE` 宏。自定义算子动态库加载时,所有算子接口都会被自动注册到同一个Python模块中。 + +### 算子调用接口 + +vLLM MindSpore的自定义算子被编译到了 `_C_ops.so` 里面,为了方便调用,可以在 `vllm_mindspore/_custom_ops.py` 添加一个调用接口。如果在算子调用前后需要做额外适配,也可以在这接口内实现。 + +```python +def advance_step_flashattn(num_seqs: int, num_queries: int, block_size: int, + input_tokens: torch.Tensor, + sampled_token_ids: torch.Tensor, + input_positions: torch.Tensor, + seq_lens: torch.Tensor, + slot_mapping: torch.Tensor, + block_tables: torch.Tensor) -> None: + """Advance a step on Ascend for existing inputs for a multi-step runner""" + from vllm_mindspore import _C_ops as c_ops + c_ops.advance_step_flashattn(num_seqs=num_seqs, + num_queries=num_queries, + block_size=block_size, + input_tokens=input_tokens, + sampled_token_ids=sampled_token_ids, + input_positions=input_positions, + seq_lens=seq_lens, + slot_mapping=slot_mapping, + block_tables=block_tables) +``` + +这里通过 `import _C_ops` 即可导入自定义算子的Python模块,推荐在调用前再导入,未调用时不需要导入。 + +### 算子编译和测试 + +1. **代码集成**:将代码集成至 vLLM MindSpore 项目。 +2. **编译项目**:在项目代码根目录下,执行 `pip install .` ,编译安装vLLM MindSpore。 +3. **测试算子接口**:通过 `_custom_ops` 调用算子接口,可以参考测试用例[test_custom_advstepflash.py](https://gitee.com/mindspore/vllm-mindspore/blob/master/tests/st/python/test_custom_advstepflash.py): + +```python +from vllm_mindspore import _custom_ops as custom_ops + +custom_ops.advance_step_flashattn(...) +``` + +## 自定义算子编译工程 + +当前MindSpore仅提供了一个 [CustomOpBuilder接口](https://www.mindspore.cn/docs/zh-CN/master/api_python/ops/mindspore.ops.CustomOpBuilder.html) 用于在线编译自定义算子,接口内置了默认的编译和链接选项。vLLM MindSpore基于MindSpore的自定义算子功能接入算子,并编译成动态库随包发布。下面是编译流程介绍: + +### 算子扩展库模块 + +在 `setup.py` 中,vLLM MindSpore添加了一个 `vllm_mindspore._C_ops` 扩展,并添加了相应的编译模块: + +```python +ext_modules = [Extension("vllm_mindspore._C_ops", sources=[])], +cmdclass = {"build_ext": CustomBuildExt}, +``` + +这里不需要指定 `sources` ,是因为vLLM MindSpore通过CMake触发算子编译,自动收集了源文件。 + +### 算子编译流程 + +1. `CustomBuildExt` 调用CMake执行 `csrc/CMakeLists.txt` ,传入必要的环境变量,触发算子编译。 +2. 通过 `ascendc/CMakeLists.txt` 调用AscendC编译器,编译 `ascendc` 目录内的算子算子源码。生成静态库 `ascendc_kernels_npu.a` 。 +3. 递归收集cpp源文件列表 `SRC_FILES` 。 +4. 生成临时脚本 `build_custom_with_ms.py` ,文件内调用 `mindspore.CustomOpBuilder` 编译算子接口。文件里也写入了源文件列表、头文件路径和静态库路径等信息。 +5. 通过CMake的 `add_custom_target` 命令调用Python脚本编译自定义算子,生成 `_C_ops.so` 。 +6. 将 `_C_ops.so` 重命名成Python包扩展模块的标准名称,如 `_C_ops.cpython-39-aarch64-linux-gnu.so` 。 diff --git a/docs/vllm_mindspore/docs/source_zh_cn/developer_guide/operations/npu_ops.md b/docs/vllm_mindspore/docs/source_zh_cn/developer_guide/operations/npu_ops.md deleted file mode 100644 index dcdf341859..0000000000 --- a/docs/vllm_mindspore/docs/source_zh_cn/developer_guide/operations/npu_ops.md +++ /dev/null @@ -1,109 +0,0 @@ -# 自定义算子接入 - -[![查看源文件](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_source.svg)](https://gitee.com/mindspore/docs/blob/master/docs/vllm_mindspore/docs/source_zh_cn/developer_guide/operations/npu_ops.md) - -本文档将以 **`adv_step_flash`** 算子的接入为例,讲解如何在 vLLM MindSpore 项目中接入一个新的自定义算子。本文重点在于接入流程,算子的实现参考 MindSpore 官方教程:[动态图自定义算子接入方式](https://www.mindspore.cn/tutorials/zh-CN/master/custom_program/operation/op_customopbuilder.html)。以下章节将介绍文件的组织结构及接入步骤。 - -实际开发中,可根据项目需求扩展更多功能,算子实现细节可参考 [MindSpore 自定义算子实现方式](https://www.mindspore.cn/tutorials/zh-CN/master/custom_program/operation/op_customopbuilder.html)。 - -**目前该特性只支持动态图场景。** - -## 文件组织结构 - -接入自定义算子需要在 vLLM MindSpore 项目的 `vllm_mindspore/ops` 目录下添加代码,目录结构如下: - -```text -vllm_mindspore/ops/ -├── ascendc/ -│ ├── adv_step_flash.h // AscendC AdvStepFlash 算子声明 -│ ├── adv_step_flash.c // AscendC AdvStepFlash 算子实现 -│ └── ... -├── module/ -│ ├── module.h // 公共模块注册头文件 -│ ├── module.cpp // 公共模块注册实现文件 -│ ├── adv_step_flash.cpp // 接入层代码,注册 AdvStepFlash 算子的 Python 接口 -│ └── ... -``` - -- **`ops/ascendc/`**:放置 AscendC 自定义算子的实现代码。 -- **`ops/module/`**:放置算子接入层代码,包括公共模块注册(`module.h`、`module.cpp`)和算子接入代码(如 `adv_step_flash.cpp`)。 - -## 接入流程 - -接入一个自定义算子,在算子实现方面,需在`ops/ascendc/`目录中,创建[算子接口定义](#算子接口声明),[算子实现](#算子实现)与[算子接入](#算子接入)。在完成自定义算子初步的开发与接入后,可进行[算子编译并测试](#算子编译并测试)。 - -### 算子接口声明 - -在 `ops/ascendc/` 目录下,创建头文件(如 `my_custom_op.h`),以声明算子函数及相关接口,内容参考: - -```cpp -#ifndef VLLM_MINDSPORE_OPS_ASCENDC_MY_CUSTOM_OP_H -#define VLLM_MINDSPORE_OPS_ASCENDC_MY_CUSTOM_OP_H - -extern void MyCustomOpKernelEntry(uint32_t blockDims, void *l2ctrl, void *aclStream, - uint8_t *input, uint8_t *output, int32_t param1, int32_t param2); - -#endif // VLLM_MINDSPORE_OPS_ASCENDC_MY_CUSTOM_OP_H -``` - -### 算子实现 - -在 `ops/ascendc/` 目录下创建实现文件(如 `my_custom_op.c`),以实现算子的核心逻辑,内容参考: - -```cpp -#include "my_custom_op.h" -#include "kernel_operator.h" - -extern "C" __global__ __aicore__ void my_custom_op_impl(GM_ADDR input, GM_ADDR output, - int32_t param1, int32_t param2) { - // AscendC operation implement -} - -#ifndef __CCE_KT_TEST__ -void MyCustomOpKernelEntry(uint32_t blockDims, void *l2ctrl, void *aclStream, - uint8_t *input, uint8_t *output, int32_t param1, int32_t param2) { - my_custom_op_impl<<>>(input, output, param1, param2); -} -#endif -``` - -### 算子接入 - -在 `module/` 目录下创建一个新的接入文件(如 `my_custom_op.cpp`),内容参考 `adv_step_flash.cpp`: - -```cpp -#include "ms_extension.h" -#include "ascendc/my_custom_op.h" -#include "module/module.h" - -using BaseTensorPtr = mindspore::tensor::BaseTensorPtr; - -void MyCustomOpPythonInterface(int32_t param1, int32_t param2, - BaseTensorPtr input, BaseTensorPtr output) { - ... -} - -MS_EXTENSION_MODULE(my_custom_op) { - m.def("my_custom_op", &MyCustomOpPythonInterface, "My custom operator", - pybind11::arg("param1"), pybind11::arg("param2"), - pybind11::arg("input"), pybind11::arg("output")); -} -``` - -### 算子编译并测试 - -1. **代码集成**:将代码集成至 vllm-mindspore 项目。 -2. **编译项目**:于vllm-mindspore工程中,执行`pip install .`,编译安装vLLM MindSpore。 -3. **测试算子接口**:使用 Python 调用注册的算子接口: - - ```python - from vllm_mindspore import npu_ops - import numpy as np - import mindspore as ms - - input = ms.Tensor(np.array([1, 2, 3], dtype=np.int32)) - output = ms.Tensor(np.zeros_like(input)) - - npu_ops.my_custom_op(10, 20, input, output) - print("Output:", output) - ``` diff --git a/docs/vllm_mindspore/docs/source_zh_cn/index.rst b/docs/vllm_mindspore/docs/source_zh_cn/index.rst index cd870da99c..3f8b00d7d1 100644 --- a/docs/vllm_mindspore/docs/source_zh_cn/index.rst +++ b/docs/vllm_mindspore/docs/source_zh_cn/index.rst @@ -124,7 +124,7 @@ Apache 许可证 2.0,如 `LICENSE