From 0838bf069a4f60504c6699eed3cfcf0d4d0bc35b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=A6=E6=99=93=E7=8E=B2?= <3174348550@qq.com> Date: Fri, 19 Sep 2025 10:59:18 +0800 Subject: [PATCH] modify contents --- .../advanced/third_party/delegate.md | 10 +++---- .../advanced/third_party/npu_info.md | 2 +- .../advanced/third_party/register_kernel.md | 26 +++++++++---------- .../advanced/third_party/tensorrt_info.md | 6 ++--- .../advanced/third_party/delegate.md | 4 +-- .../advanced/third_party/register_kernel.md | 4 +-- .../source_en/api_python/env_var_list.rst | 2 +- .../source_zh_cn/api_python/env_var_list.rst | 2 +- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/docs/lite/docs/source_en/advanced/third_party/delegate.md b/docs/lite/docs/source_en/advanced/third_party/delegate.md index 42f1b0f7fc..9052e45cbd 100644 --- a/docs/lite/docs/source_en/advanced/third_party/delegate.md +++ b/docs/lite/docs/source_en/advanced/third_party/delegate.md @@ -4,7 +4,7 @@ ## Overview -Delegate of MindSpore Lite is used to support third-party AI frameworks (such as NPU, TensorRT) to quickly access to the inference process in MindSpore Lite. Third-party frameworks can be implemented by users themselves, or other open source frameworks. Generally, the framework has the ability to build model online, that is, multiple operators can be built into a sub-graph and distributed to the device for inference. If the user wants to schedule other inference frameworks through MindSpore Lite, please refer to this article. +Delegate of MindSpore Lite is used to support third-party AI frameworks (such as NPU, TensorRT) to quickly access the inference process in MindSpore Lite. Third-party frameworks can be implemented by users themselves, or other open source frameworks. Generally, the framework has the ability to build model online, that is, multiple operators can be built into a sub-graph and distributed to the device for inference. If the user wants to schedule other inference frameworks through MindSpore Lite, please refer to this article. ## Usage of Delegate @@ -120,14 +120,14 @@ class XXXGraph : public kernel::Kernel { } int ReSize() override { - // Support dynamic shape, and input shape will changed. + // Support dynamic shape, and input shape will be changed. } }; ``` ## Calling Delegate by Lite Framework -MindSpore Lite schedules user-defined delegate by [Context](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Context.html#class-context). Use [SetDelegate](https://www.mindspore.cn/lite/api/zh-CN/r2.7.0/api_cpp/mindspore.html#setdelegate) to set a custom delegate for Context. Delegate will be passed by [Build](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Model.html) to MindSpore Lite. If the Delegate in the Context is a null pointer, the process will call the inner inference of MindSpore Lite. +MindSpore Lite schedules user-defined delegate by [Context](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Context.html#class-context). Use [SetDelegate](https://www.mindspore.cn/lite/api/zh-CN/r2.7.0/api_cpp/mindspore.html#setdelegate) to set a custom delegate for Context. Delegate will be passed by [Build](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Model.html) to MindSpore Lite. If the Delegate in the Context is a null pointer, the process will call the inner inference of MindSpore Lite. ```cpp auto context = std::make_shared(); @@ -301,7 +301,7 @@ class NPUGraph : public kernel::Kernel { protected: std::vector npu_ops_{}; - NPUManager *npu_manager_ = nullptr; + NPUManager *npu_manager_ = nullptr; NPUExecutor *executor_ = nullptr; // NPU inference executor. }; ``` @@ -325,4 +325,4 @@ int NPUGraph::Execute() { } ``` -> [NPU](https://www.mindspore.cn/lite/docs/en/r2.7.0/advanced/third_party/npu_info.html) is a third-party AI framework that added by MindSpore Lite internal developers. The usage of NPU is slightly different. You can set the [Context](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Context.html#class-context) through [SetDelegate](https://www.mindspore.cn/lite/api/zh-CN/r2.7.0/api_cpp/mindspore.html#setdelegate), or you can add the description of the NPU device [KirinNPUDeviceInfo](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_KirinNPUDeviceInfo.html#class-kirinnpudeviceinfo) to [MutableDeviceInfo](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Context.html) of the Context. +> [NPU](https://www.mindspore.cn/lite/docs/en/r2.7.0/advanced/third_party/npu_info.html) is a third-party AI framework that was added by MindSpore Lite internal developers. The usage of NPU is slightly different. You can set the [Context](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Context.html#class-context) through [SetDelegate](https://www.mindspore.cn/lite/api/zh-CN/r2.7.0/api_cpp/mindspore.html#setdelegate), or you can add the description of the NPU device [KirinNPUDeviceInfo](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_KirinNPUDeviceInfo.html#class-kirinnpudeviceinfo) to [MutableDeviceInfo](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_Context.html) of the Context. diff --git a/docs/lite/docs/source_en/advanced/third_party/npu_info.md b/docs/lite/docs/source_en/advanced/third_party/npu_info.md index 2c51e20186..35b27b069a 100644 --- a/docs/lite/docs/source_en/advanced/third_party/npu_info.md +++ b/docs/lite/docs/source_en/advanced/third_party/npu_info.md @@ -8,7 +8,7 @@ Besides basic [Environment Preparation](https://www.mindspore.cn/lite/docs/en/r2.7.0/build/build.html), using the NPU requires the integration of the HUAWEI HiAI DDK. HUAWEI HiAI DDK, which contains APIs (including building, loading models and calculation processes) and interfaces implemented to encapsulate dynamic libraries (namely libhiai*.so), is required for the use of NPU. -Download [DDK 100.510.010.010](https://developer.huawei.com/consumer/en/doc/development/hiai-Library/ddk-download-0000001053590180), and set the directory of extracted files as `${HWHIAI_DDK}`. Our build script uses this environment viriable to seek DDK. +Download [DDK 100.510.010.010](https://developer.huawei.com/consumer/en/doc/development/hiai-Library/ddk-download-0000001053590180), and set the directory of extracted files as `${HWHIAI_DDK}`. Our build script uses this environment variable to seek DDK. ### Build diff --git a/docs/lite/docs/source_en/advanced/third_party/register_kernel.md b/docs/lite/docs/source_en/advanced/third_party/register_kernel.md index 87ef96e046..2738454ba9 100644 --- a/docs/lite/docs/source_en/advanced/third_party/register_kernel.md +++ b/docs/lite/docs/source_en/advanced/third_party/register_kernel.md @@ -10,7 +10,7 @@ To implement custom operators, perform the following steps: 1. Determine operator types: Classify operators into common and custom operators. 2. Implement operators: Inherit the Kernel class to implement custom operators and register them in MindSpore Lite. -3. Implement the InferShape capability: Inherit mindspore::kernel::KernelInteface to implement the InferShape capability of custom operators and register them in MindSpore Lite. +3. Implement the InferShape capability: Inherit mindspore::kernel::KernelInterface to implement the InferShape capability of custom operators and register them in MindSpore Lite. ### Determining Operator Types @@ -94,7 +94,7 @@ REGISTER_KERNEL(CPU, BuiltInTest, kFloat32, PrimitiveType_AddFusion, TestCustomA #### Common Operator InferShape -Reload the Infer function after inheriting KernelInterface to implement the InferShape capability. The implementation procedure is as follows: +Override the Infer function after inheriting KernelInterface to implement the InferShape capability. The implementation procedure is as follows: 1. Inherit [KernelInterface](https://www.mindspore.cn/lite/api/en/r2.7.0/generate/classmindspore_kernel_KernelInterface.html). 2. Overload the Infer function to derive the shape, format, and data_type of the output tensor. @@ -207,7 +207,7 @@ class Test2Fusion : public Pass { if (custom_cnode == nullptr) { return false; } - manager->Replace(node, custom_cnode) // Replace old nodes with new nodes through the manager. + manager->Replace(node, custom_cnode); // Replace old nodes with new nodes through the manager. } return true; } @@ -404,7 +404,7 @@ REGISTER_CUSTOM_KERNEL(GPU, BuiltInTest, kFloat32, Custom_Add, CustomAddCreator) ### Implementing Operators -In this example, the operator is implemented as the `CustomAddKernel` class. This class inherits [mindspore::kernel::Kernel](https://www.mindspore.cn/lite/api/en/r2.7.0/api_cpp/mindspore_kernel.html) and reloads necessary APIs to implement the custom operator computation. +In this example, the operator is implemented as the `CustomAddKernel` class. This class inherits [mindspore::kernel::Kernel](https://www.mindspore.cn/lite/api/en/r2.7.0/api_cpp/mindspore_kernel.html) and overloads necessary APIs to implement the custom operator computation. #### Constructor and Destructor Functions @@ -458,8 +458,8 @@ class CustomAddKernel : public kernel::Kernel { #### Code and Description of the Prepare Implementation -In the graph build phase `mindspore::Model::Build`, the Prepare function of the operator is called. You can perform some time-consuming and one-off operations to save the operator computation time of `mindspore::Model::Predict`. -In this example, the Prepare API is overloaded to load and build the custom OpenCL code. +In the graph build phase `mindspore::Model::Build`, the Prepare function of the operator is called. You can perform some time-consuming and one-off operations to reduce the operator computation time of `mindspore::Model::Predict`. +In this example, the Prepare API is overridden to load and build the custom OpenCL code. 1. Check the environment. @@ -581,7 +581,7 @@ In this example, the Prepare API is overloaded to load and build the custom Open 4. Set the OpenCL working group and work items. - For an operator registered as a GPU, the input data received is in image format except that the input is a constant. The format is NHWC4 (C-axis 4-byte aligned NHWC format data). + For an operator registered as a GPU, the input data received is in image format except when the input is a constant. The format is NHWC4 (C-axis 4-byte aligned NHWC format data). In this example, all data is converted to this format for computation and output. In the routine, a simple addition custom operator is implemented. Therefore, the `GpuTensorInfo` function is used to compute the width and height of the `Image` memory used by the output data to set the work items. @@ -798,7 +798,7 @@ In this example, the Prepare API is overloaded to load and build the custom Open 6. Set values of OpenCL kernel runtime parameters. - Some unchanged parameters during the OpenCL kernel running can be set in the `Prepare` phase. + Some unchanged parameters during the OpenCL kernel execution can be set in the `Prepare` phase. In this example, `OpenCLRuntimeWrapper::SetKernelArg` is used to set the third parameter (computation range) of the `ElementAdd` runtime. ```cpp @@ -813,12 +813,12 @@ In this example, the Prepare API is overloaded to load and build the custom Open #### Code and Description of the ReSize and Execute Implementations -By overloading `Execute`, you can customize the computation operations of the operator during inference. +By overriding `Execute`, you can customize the computation operations of the operator during inference. 1. Call the `ReSize` function to support shape changes during running. In this example, `PreProcess` is called to prepare for the computation. - In `PreProcess()`, call the `ReSize` function first. This function is the runtime shape change adaptation API that needs to be overloaded. + In `PreProcess()`, call the `ReSize` function first. This function is the runtime shape change adaptation API that needs to be overridden. In the `ReSize` function, call `CheckOutputs` to check whether the shape of the output tensor of the operator contains invalid values to determine whether shape inference needs to be performed again. If no, the function returns directly. When shape inference is required, call `registry::RegisterKernelInterface::GetKernelInterface` to obtain the shape inference function registered by the operator. The obtained function is the InferShape function implemented and registered by the user in this routine. After re-inference, call the previously implemented `Prepare` API to re-apply for and allocate the memory and related variables required for operator computation. @@ -862,9 +862,9 @@ By overloading `Execute`, you can customize the computation operations of the op 2. Allocate memory for the output tensor. - Before running the operator, you need to apply for GPU memory for the output tensor. Due to the limitation of the framework, the GPU memory needs to be hosted by the framework for management and cannot be manually released. The process is as follows: + Before running the operator, you need to apply for GPU memory for the output tensor. Due to the limitation of the framework, the GPU memory needs to be managed by the framework for management and cannot be manually released. The process is as follows: - 1. Call the `allocator()` API of the output tensor to obtain the memory allocator that manages the tensor in the framework. In the GPU registration operator, the memory allocator is responsible for allocating the GPU memory. + 1. Call the `allocator()` API of the output tensor to obtain the memory allocator that manages the tensor in the framework. In GPU-registered operators, the memory allocator is responsible for allocating the GPU memory. 2. Compute the size of the memory to be allocated. In this example, the `GpuTensorInfo` function is used to compute the size of the memory to be allocated. 3. Apply for memory by using the `Malloc` API of the memory allocator. You can obtain the memory in image or buffer format by using the `void *Malloc(size_t weight, size_t height, DataType type)` and `void *Malloc(size_t size)` APIs. 4. Use the `SetData` API to assign the requested memory to the tensor. After that, the memory is managed by the framework in a unified manner and cannot be manually released. @@ -893,7 +893,7 @@ By overloading `Execute`, you can customize the computation operations of the op 3. Run the OpenCL kernel. - The `SetKernelArg` API is used to set parameters for running the OpenCL kernel, and the `RunKernel` API is used to run the OpenCL kernel. + The `SetKernelArg` API is used to set parameters for executing the OpenCL kernel, and the `RunKernel` API is used to run the OpenCL kernel. ```cpp int Execute() override { diff --git a/docs/lite/docs/source_en/advanced/third_party/tensorrt_info.md b/docs/lite/docs/source_en/advanced/third_party/tensorrt_info.md index 138815ec3b..0b59ac0275 100644 --- a/docs/lite/docs/source_en/advanced/third_party/tensorrt_info.md +++ b/docs/lite/docs/source_en/advanced/third_party/tensorrt_info.md @@ -6,11 +6,11 @@ ### Environment Preparation -Besides basic [Environment Preparation](https://www.mindspore.cn/lite/docs/en/r2.7.0/build/build.html), CUDA and TensorRT is required as well. Current version supports [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) and [TensorRT 6.0.1.5](https://developer.nvidia.com/nvidia-tensorrt-6x-download), and [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.1-download-archive) and [TensorRT 8.5.1](https://developer.nvidia.com/nvidia-tensorrt-8x-download). +Besides basic [Environment Preparation](https://www.mindspore.cn/lite/docs/en/r2.7.0/build/build.html), CUDA and TensorRT are required as well. Current version supports [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) and [TensorRT 6.0.1.5](https://developer.nvidia.com/nvidia-tensorrt-6x-download), and [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.1-download-archive) and [TensorRT 8.5.1](https://developer.nvidia.com/nvidia-tensorrt-8x-download). Install the appropriate version of CUDA and set the installed directory as environment variable `${CUDA_HOME}`. Our build script uses this environment variable to seek CUDA. -Install TensorRT of the corresponding CUDA version, and set the installed directory as environment viriable `${TENSORRT_PATH}`. Our build script uses this environment viriable to seek TensorRT. +Install TensorRT of the corresponding CUDA version, and set the installed directory as environment variable `${TENSORRT_PATH}`. Our build script uses this environment variable to seek TensorRT. ### Build @@ -41,7 +41,7 @@ For more information about compilation, see [Linux Environment Compilation](http - Using Benchmark testing TensorRT inference - Users can also test TensorRT inference using MindSpore Lite Benchmark tool. The location of the compiled Benchmark is shown in [Compiled Output](https://www.mindspore.cn/lite/docs/en/r2.7.0/build/build.html). Pass the build package to a device with a TensorRT environment(TensorRT 6.0.1.5) and use the Benchmark tool to test TensorRT inference. Examples are as follows: + Users can also test TensorRT inference using MindSpore Lite Benchmark tool. The location of the compiled Benchmark is shown in [Compiled Output](https://www.mindspore.cn/lite/docs/en/r2.7.0/build/build.html). Pass the build package to a device with a TensorRT environment (TensorRT 6.0.1.5) and use the Benchmark tool to test TensorRT inference. Examples are as follows: - Test performance diff --git a/docs/lite/docs/source_zh_cn/advanced/third_party/delegate.md b/docs/lite/docs/source_zh_cn/advanced/third_party/delegate.md index 7ba83fd32a..76005edc6a 100644 --- a/docs/lite/docs/source_zh_cn/advanced/third_party/delegate.md +++ b/docs/lite/docs/source_zh_cn/advanced/third_party/delegate.md @@ -120,7 +120,7 @@ class XXXGraph : public kernel::Kernel { } int ReSize() override { - // Support dynamic shape, and input shape will changed. + // Support dynamic shape, and input shape will be changed. } }; ``` @@ -301,7 +301,7 @@ class NPUGraph : public kernel::Kernel { protected: std::vector npu_ops_{}; - NPUManager *npu_manager_ = nullptr; + NPUManager *npu_manager_ = nullptr; NPUExecutor *executor_ = nullptr; // NPU inference executor. }; ``` diff --git a/docs/lite/docs/source_zh_cn/advanced/third_party/register_kernel.md b/docs/lite/docs/source_zh_cn/advanced/third_party/register_kernel.md index 4c743e0249..3af70d70eb 100644 --- a/docs/lite/docs/source_zh_cn/advanced/third_party/register_kernel.md +++ b/docs/lite/docs/source_zh_cn/advanced/third_party/register_kernel.md @@ -10,7 +10,7 @@ MindSpore Lite当前提供了一套南向的算子注册机制,如果用户想 1. 确定算子类型:分为通用算子与Custom算子。 2. 算子实现:继承Kernel类实现自定义算子,并注册进MindSpore Lite。 -3. 算子InferShape:继承mindspore::kernel::KernelInteface实现自定义算子的InferShape能力,并注册进MindSpore Lite。 +3. 算子InferShape:继承mindspore::kernel::KernelInterface实现自定义算子的InferShape能力,并注册进MindSpore Lite。 ### 确定算子类型 @@ -207,7 +207,7 @@ class Test2Fusion : public Pass { if (custom_cnode == nullptr) { return false; } - manager->Replace(node, custom_cnode) // 通过管理器用新节点替换旧节点 + manager->Replace(node, custom_cnode); // 通过管理器用新节点替换旧节点 } return true; } diff --git a/docs/mindspore/source_en/api_python/env_var_list.rst b/docs/mindspore/source_en/api_python/env_var_list.rst index 134a513a77..4f6465a6fb 100644 --- a/docs/mindspore/source_en/api_python/env_var_list.rst +++ b/docs/mindspore/source_en/api_python/env_var_list.rst @@ -312,7 +312,7 @@ Graph Compilation and Execution * - MS_DEV_GRAPH_KERNEL_FLAGS - Configure the graph kernel fusion strategy. - String - - Configuration items, with the format "--key=value", multiple configuration items separated by space, multiple value items separated by commas, for example, `export MS_DEV_GRAPH_KERNEL_FLAGS="--enable_expand_ops=Square --enable_cluster_ops=MatMul,Add"` + - Configuration items, with the format ``--key=value``, multiple configuration items separated by space, multiple value items separated by commas, for example, `export MS_DEV_GRAPH_KERNEL_FLAGS="--enable_expand_ops=Square --enable_cluster_ops=MatMul,Add"` opt_level: Set the optimization level. Default: `2` . diff --git a/docs/mindspore/source_zh_cn/api_python/env_var_list.rst b/docs/mindspore/source_zh_cn/api_python/env_var_list.rst index 741c175dc3..e4b33cad10 100644 --- a/docs/mindspore/source_zh_cn/api_python/env_var_list.rst +++ b/docs/mindspore/source_zh_cn/api_python/env_var_list.rst @@ -312,7 +312,7 @@ * - MS_DEV_GRAPH_KERNEL_FLAGS - 设置图算融合的融合策略 - String - - 配置项,格式为“--key=value”,多个配置项以空格分隔,多个value以逗号分隔,例如 `export MS_DEV_GRAPH_KERNEL_FLAGS="--enable_expand_ops=Square --enable_cluster_ops=MatMul,Add"` + - 配置项,格式为 ``--key=value``,多个配置项以空格分隔,多个value以逗号分隔,例如 `export MS_DEV_GRAPH_KERNEL_FLAGS="--enable_expand_ops=Square --enable_cluster_ops=MatMul,Add"` opt_level:设置优化级别。默认值: `2` 。 -- Gitee