From 3ed5d1f6a98565c7c1ea87a4cbbdbe5157f97034 Mon Sep 17 00:00:00 2001 From: qiuleilei Date: Sat, 9 Aug 2025 17:58:42 +0800 Subject: [PATCH] =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=BD=AC=E6=8D=A2=E5=B7=A5?= =?UTF-8?q?=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/source_en/mindir/converter_python.md | 41 ++++++++++------ .../docs/source_en/mindir/converter_tool.md | 11 +++-- .../source_en/mindir/converter_tool_ascend.md | 8 ++- .../source_zh_cn/mindir/converter_python.md | 49 ++++++++++--------- .../source_zh_cn/mindir/converter_tool.md | 13 ++--- .../mindir/converter_tool_ascend.md | 8 ++- 6 files changed, 70 insertions(+), 60 deletions(-) diff --git a/docs/lite/docs/source_en/mindir/converter_python.md b/docs/lite/docs/source_en/mindir/converter_python.md index 988e692e9f..580978a771 100644 --- a/docs/lite/docs/source_en/mindir/converter_python.md +++ b/docs/lite/docs/source_en/mindir/converter_python.md @@ -12,7 +12,7 @@ When the input model type is MindSpore, since it is already a `mindir` model, tw 1. Inference is performed directly without offline conversion. -2. When using offline conversion, setting `optimize` to `general` in CPU/GPU hardware backend (for general optimization), setting `optimize` to `gpu_oriented` in GPU hardware (for GPU extra optimization based on general optimization), setting `optimize` to `ascend_oriented` in Ascend hardware. The relevant optimization is done in the offline phase to reduce the initialization time of inference execution. +2. When using offline conversion, setting `optimize` to `general` in CPU hardware backend (for general optimization), setting `optimize` to `ascend_oriented` in Ascend hardware. The relevant optimization is done in the offline phase to reduce the initialization time of inference execution. ## Linux Environment Usage Instructions @@ -43,10 +43,9 @@ After successful installation, you can use the `pip show mindspore_lite` command ```text mindspore_lite ├── __pycache__ -├── akg # AKG-related interfaces ├── include +├── custom_kernels ├── lib -| ├── libakg.so # Dynamic link libraries used by AKG │ ├── _c_lite_wrapper.cpython-37m-x86_64-linux-gnu.so # MindSpore Lite cloud-side inference python module encapsulates the dynamic library of the C++ interface framework │ ├── libmindspore_converter.so # Dynamic library for model conversion │ ├── libmindspore_core.so # MindSpore Core Dynamic Library @@ -56,17 +55,28 @@ mindspore_lite │ ├── libascend_pass_plugin.so # Register for Ascend Backend Graph Optimization Plugin Dynamic Library │ ├── libmslite_shared_lib.so # Adaptation of the dynamic library in the backend of Ascend │ ├── libascend_kernel_plugin.so # Ascend backend kernel plugin -│ ├── libtensorrt_plugin.so # tensorrt backend kernel plugin +│ ├── libascend_ge_plugin.so # +│ ├── libdvpp_utils.so # +│ ├── liblite-unified-executor.so # +│ ├── libllm_engine_plugin.so # +│ ├── libmindspore_graph_ir.so # +│ ├── libmindspore_ops.so # +│ ├── libmsplugin_ge_litert.so # +│ ├── libruntime_convert_plugin.so # │ ├── libopencv_core.so.4.5 # Dynamic library for OpenCV │ ├── libopencv_imgcodecs.so.4.5 # Dynamic library for OpenCV │ └── libopencv_imgproc.so.4.5 # Dynamic library for OpenCV -├── __init__.py # Initialization package -├── _checkparam.py # Check parameter tool -├── context.py # Code related to context interface -├── converter.py # Code related to converter interface, conversion portal -├── model.py # Code related to model, inference portal -├── tensor.py # Code related to tensor interface -└── version.py # MindSpore Lite cloud-side inference version number +├── __init__.py # Initialization package +├── _check_ascend.py # +├── lite_infer.py # +├── llm_engine.py # +├── _parse_update_weights_name.py # +├── _checkparam.py # Check parameter tool +├── context.py # Code related to context interface +├── converter.py # Code related to converter interface, conversion portal +├── model.py # Code related to model, inference portal +├── tensor.py # Code related to tensor interface +└── version.py # MindSpore Lite cloud-side inference version number ``` ### Description of Attributes @@ -86,7 +96,7 @@ Detailed descriptions of the parameters and their correspondence to the paramete | input_data_type | DataType | `--inputDataType=` | Set the data type of the quantized model input Tensor. Only valid if the quantization parameters (`scale` and `zero point`) of the model input Tensor are available. The default is to keep the same data type as the original model input Tensor. | DataType.FLOAT32, DataType.INT8, DataType.UINT8, DataType.UNKNOWN | - | | input_format | Format | `--inputDataFormat=` | Set the input format of the exported model, valid only for 4-dimensional inputs. | Format.NCHW, Format.NHWC | - | | input_shape | dict{string:list\[int]} | `--inputShape=` | Set the dimensions of the model input, and the order of the input dimensions is kept the same as the original model. For example: {"inTensor1": \[1, 32, 32, 32], "inTensor2": \[1, 1, 32, 32]} | - | -| optimize | str | `--optimize=` | Set the mode of optimization during the offline conversion. | "none", "general", "gpu_oriented", "ascend_oriented" | - | +| optimize | str | `--optimize=` | Set the mode of optimization during the offline conversion. | "none", "general", "ascend_oriented" | - | | output_data_type | DataType | `--outputDataType=` | Set the data type of the quantized model output Tensor. Only valid if the quantization parameters (`scale` and `zero point`) of the model output Tensor are available. The default is to keep the same data type as the original model output Tensor. | DataType.FLOAT32, DataType.INT8, DataType.UINT8, DataType.UNKNOWN | - | | save_type | ModelType | `--saveType=` | Required | Set the model type needs to be export. | ModelType.MINDIR | The MINDIR model uses the MindSpore Lite cloud-side inference installation package | | weight_fp16 | bool | `--fp16=` | Set whether the weights in float32 data format need to be stored in float16 data format during model serialization. | True, False | - | @@ -100,15 +110,14 @@ Detailed descriptions of the parameters and their correspondence to the paramete > > - `optimize` is an attribute, it used to set the mode of optimization during the offline conversion. > -> - If this attribute is set to "none", no relevant graph optimization operations will be performed during the offline conversion phase of the model, and the relevant graph optimization operations will be performed during the execution of the inference phase. The advantage of this attribute is that the converted model can be deployed directly to any CPU/GPU/Ascend hardware backend since it is not optimized in a specific way, while the disadvantage is that the initialization time of the model increases during inference execution. -> - If this attribute is set to "general", general optimization will be performed, such as constant folding and operator fusion (the converted model only supports CPU/GPU hardware backend, not Ascend backend). -> - If this parameter is set to "gpu_oriented", the general optimization and extra optimization for GPU hardware will be performed (the converted model only supports GPU hardware backend). +> - If this attribute is set to "none", no relevant graph optimization operations will be performed during the offline conversion phase of the model, and the relevant graph optimization operations will be performed during the execution of the inference phase. The advantage of this attribute is that the converted model can be deployed directly to any CPU/Ascend hardware backend since it is not optimized in a specific way, while the disadvantage is that the initialization time of the model increases during inference execution. +> - If this attribute is set to "general", general optimization will be performed, such as constant folding and operator fusion (the converted model only supports CPU hardware backend, not Ascend backend). > - If this attribute is set to "ascend_oriented", the optimization for Ascend hardware will be performed (the converted model only supports Ascend hardware backend). > ### Method of convert -Usage scenario: Convert a third-party model into a MindSpore model. You can call the convert method multiple times to convert multiple models. +Usage scenario: Convert a third-party model into a MindSpore Lite cloud-side inference model. You can call the convert method multiple times to convert multiple models. Detailed descriptions of the parameters and their correspondence to the parameters in [Offline Conversion of Inference Models](https://www.mindspore.cn/lite/docs/en/master/mindir/converter_tool.html) are provided below. diff --git a/docs/lite/docs/source_en/mindir/converter_tool.md b/docs/lite/docs/source_en/mindir/converter_tool.md index 959bb7d3d9..520956de20 100644 --- a/docs/lite/docs/source_en/mindir/converter_tool.md +++ b/docs/lite/docs/source_en/mindir/converter_tool.md @@ -39,10 +39,11 @@ mindspore-lite-{version}-linux-x64 └── lib # Dynamic libraries that the converter depends on ├── libmindspore_glog.so.0 # Glog dynamic libraries ├── libascend_pass_plugin.so # Register for Ascend Backend Graph Optimization Plugin Dynamic Library - ├── libmslite_shared_lib.so # Adaptation of the dynamic library in the backend of Ascend + ├── libmindspore_core_lite.so # + ├── libmindspore_graph_ir_lite.so # + ├── libmindspore_ops_lite.so # ├── libmindspore_converter.so # Dynamic library for model conversion ├── libmslite_converter_plugin.so # Model conversion plugin - ├── libmindspore_core.so # MindSpore Core dynamic libraries ├── libopencv_core.so.4.5 # Dynamic libraries for OpenCV ├── libopencv_imgcodecs.so.4.5 # Dynamic libraries for OpenCV └── libopencv_imgproc.so.4.5 # Dynamic libraries for OpenCV @@ -65,7 +66,7 @@ Detailed parameter descriptions are provided below. | `--configFile=` | Not | 1. can be used as a post-training quantization profile path; 2. can be used as an extended function profile path. | - | - | - | | `--inputShape=` | Not | Set the dimensions of the model inputs, and keep the order of the input dimensions the same as the original model. The model structure can be further optimized for some specific models, but the converted model will probably lose the dynamic shape properties. Multiple inputs are split by `;`, along with double quotes `""`. | e.g. "inTensorName_1: 1,32,32,4;inTensorName_2:1,64,64,4;" | - | - | | `--saveType=` | Not | Set the exported model as `mindir` model or `ms` model. | MINDIR, MINDIR_LITE | MINDIR | This version can only be reasoned with models turned out by setting to MINDIR | -| `--optimize=` | Not | Set the optimization accomplished in the process of converting model. | none, general, gpu_oriented, ascend_oriented| general | - | +| `--optimize=` | Not | Set the optimization accomplished in the process of converting model. | none, general, ascend_oriented| general | - | | `--decryptKey=` | Not | Set the key used to load the cipher text MindIR. The key is expressed in hexadecimal and is only valid when `fmk` is MINDIR. | - | - | - | | `--decryptMode=` | Not | Set the mode to load the cipher MindIR, valid only when decryptKey is specified. | AES-GCM, AES-CBC | AES-GCM | - | | `--encryptKey=` | Not | Set the key to export the encryption `mindir` model. The key is expressed in hexadecimal. Only AES-GCM is supported, and the key length is only 16Byte. | - | - | - | @@ -83,13 +84,13 @@ Notes: - The parameter name and the parameter value are connected by an equal sign without any space between them. - Caffe models are generally divided into two files: `*.prototxt` model structure, corresponding to the `--modelFile` parameter, and `*.caffemodel` model weights, corresponding to the `--weightFile` parameter. - The `configFile` configuration file uses the `key=value` approach to define the relevant parameters. -- `--optimize` parameter is used to set the mode of optimization during the offline conversion. If this parameter is set to none, no relevant graph optimization operations will be performed during the offline conversion phase of the model, and the relevant graph optimization operations will be done during the execution of the inference phase. The advantage of this parameter is that the converted model can be deployed directly to any CPU/GPU/Ascend hardware backend since it is not optimized in a specific way, while the disadvantage is that the initialization time of the model increases during inference execution. If this parameter is set to general, general optimization will be performed, such as constant folding and operator fusion (the converted model only supports CPU/GPU hardware backend, not Ascend backend). If this parameter is set to gpu_oriented, the general optimization and extra optimization for GPU hardware will be performed (the converted model only supports GPU hardware backend). If this parameter is set to ascend_oriented, the optimization for Ascend hardware will be performed (the converted model only supports Ascend hardware backend). +- `--optimize` parameter is used to set the mode of optimization during the offline conversion. If this parameter is set to none, no relevant graph optimization operations will be performed during the offline conversion phase of the model, and the relevant graph optimization operations will be done during the execution of the inference phase. The advantage of this parameter is that the converted model can be deployed directly to any CPU/Ascend hardware backend since it is not optimized in a specific way, while the disadvantage is that the initialization time of the model increases during inference execution. If this parameter is set to general, general optimization will be performed, such as constant folding and operator fusion (the converted model only supports CPU hardware backend, not Ascend backend). If this parameter is set to ascend_oriented, the optimization for Ascend hardware will be performed (the converted model only supports Ascend hardware backend). - The encryption and decryption function only takes effect when `MSLITE_ENABLE_MODEL_ENCRYPTION=on` is set at [compile](https://www.mindspore.cn/lite/docs/en/master/mindir/build.html) time and only supports Linux x86 platforms. `decrypt_key` and `encrypt_key` are string expressed in hexadecimal. Linux platform users can use the' xxd 'tool to convert the key expressed in bytes into hexadecimal expressions. - For the MindSpore model, since it is already a `mindir` model, two approaches are suggested: Inference is performed directly without offline conversion. - When using offline conversion, setting `--optimize` to general in CPU/GPU hardware backend (for general optimization), setting `--optimize` to gpu_oriented in GPU hardware (for GPU extra optimization based on general optimization), setting `--optimize` to ascend_oriented in Ascend hardware. The relevant optimization is done in the offline phase to reduce the initialization time of inference execution. + When using offline conversion, setting `--optimize` to general in CPU hardware backend (for general optimization), setting `--optimize` to ascend_oriented in Ascend hardware. The relevant optimization is done in the offline phase to reduce the initialization time of inference execution. ### Usage Examples diff --git a/docs/lite/docs/source_en/mindir/converter_tool_ascend.md b/docs/lite/docs/source_en/mindir/converter_tool_ascend.md index 82fec57ecf..35371d5fb6 100644 --- a/docs/lite/docs/source_en/mindir/converter_tool_ascend.md +++ b/docs/lite/docs/source_en/mindir/converter_tool_ascend.md @@ -362,7 +362,7 @@ AOE API tuning needs to be done through converter tool. When `optimize=ascend_or ## Deploying Ascend Custom Operators -MindSpore Lite converter supports converting models with MindSpore Lite custom Ascend operators to MindSpore Lite models. Custom operators can be used to optimize model inference performance in special scenarios, such as using custom MatMul to achieve higher matrix multiplication, using the transformer fusion operators provided by MindSpore Lite to improve transformer model performance (to be launched) and using the AKG graph fusion operator to automatically fuse models to improve inference performance. +MindSpore Lite converter supports converting models with MindSpore Lite custom Ascend operators to MindSpore Lite models. Custom operators can be used to optimize model inference performance in special scenarios, such as using custom MatMul to achieve higher matrix multiplication, using the transformer fusion operators provided by MindSpore Lite to improve transformer model performance (to be launched). If MindSpore Lite converts Ascend models with custom operators, user needs to deploy the custom operators to the ACL operator library before calling the converter in order to complete the conversion properly. The following describes the key steps to deploy Ascend custom operators: @@ -393,7 +393,7 @@ If MindSpore Lite converts Ascend models with custom operators, user needs to de 4. Check the Ascend library directory to see if the installation is successful - After deploying the custom operator, go to the Ascend operator library directory ``/usr/local/Ascend/latest/opp/vendors/`` and check whether there are corresponding custom operator files in the directory. At present, we mainly provide the basic operator sample and the AKG graph fusion operator implementation. The specific file structure is as follows: + After deploying the custom operator, go to the Ascend operator library directory ``/usr/local/Ascend/latest/opp/vendors/`` and check whether there are corresponding custom operator files in the directory. At present, we mainly provide the basic operator sample. The specific file structure is as follows: ```text /usr/local/Ascend/latest/opp/vendors/ @@ -415,8 +415,6 @@ If MindSpore Lite converts Ascend models with custom operators, user needs to de │ │ └── mslite_impl # Implementation logic directory of operators │ │ ├── add_dsl.py # add sample logic implementation file based on dsl development │ │ ├── add_tik.py # add sample logic implementation file based on tik development - │ │ ├── compiler.py # Operator compilation logic file needed for akg graph - │ │ ├── custom.py # akg custom operator implementation file │ │ ├── matmul_tik.py # matmul sample logic implementation file based on tik development │ ├── cpu # aicpu custom operator subdirectory, not required │ │ └── aicpu_kernel @@ -428,5 +426,5 @@ If MindSpore Lite converts Ascend models with custom operators, user needs to de │ ├── add_tik.py # add sample logic implementation file based on tik development │ └── matmul_tik.py # matmul sample logic implementation file based on tik development └── op_proto # Operator prototype definition package directory - └── libcust_op_proto.so # operator prototype definition so file. akg custom operator is registered by default, and do not need this file + └── libcust_op_proto.so # operator prototype definition so file. ``` \ No newline at end of file diff --git a/docs/lite/docs/source_zh_cn/mindir/converter_python.md b/docs/lite/docs/source_zh_cn/mindir/converter_python.md index 975e4b5aab..f8b39cd940 100644 --- a/docs/lite/docs/source_zh_cn/mindir/converter_python.md +++ b/docs/lite/docs/source_zh_cn/mindir/converter_python.md @@ -12,7 +12,7 @@ MindSpore Lite云侧推理支持通过Python接口进行模型转换,支持多 1. 不需要经过离线转换,直接进行推理执行。 -2. 使用离线转换,CPU/GPU后端设置optimize为"general"(使能通用优化),GPU后端设置optimize为"gpu_oriented"(在通用优化的基础上,使能针对GPU的额外优化),NPU后端设置optimize为"ascend_oriented",在离线阶段完成相关优化,减少推理执行的初始化时间。 +2. 使用离线转换,CPU后端设置optimize为"general"(使能通用优化),Ascend后端设置optimize为"ascend_oriented",在离线阶段完成相关优化,减少推理执行的初始化时间。 ## Linux环境使用说明 @@ -20,7 +20,7 @@ MindSpore Lite云侧推理支持通过Python接口进行模型转换,支持多 使用MindSpore Lite云侧推理的Python接口进行模型转换,需要进行如下环境准备工作。 -- [编译](https://www.mindspore.cn/lite/docs/zh-CN/master/build/build.html)或[下载](https://www.mindspore.cn/lite/docs/zh-CN/master/use/downloads.html)含Converter组件的MindSpore Lite云侧推理的Whl安装包。 +- [编译](https://www.mindspore.cn/lite/docs/zh-CN/master/mindir/build.html)或[下载](https://www.mindspore.cn/lite/docs/zh-CN/master/use/downloads.html)含Converter组件的MindSpore Lite云侧推理的Whl安装包。 > 当前,提供下载Python3.7版本对应的安装包,若需要其他Python版本,请使用编译功能生成安装包。 @@ -30,12 +30,6 @@ MindSpore Lite云侧推理支持通过Python接口进行模型转换,支持多 python -c "import mindspore_lite" ``` -- 安装后可以使用以下命令检查MindSpore Lite内置的AKG是否安装成功:若无报错,则表示安装成功。 - - ```bash - python -c "import mindspore_lite.akg" - ``` - ### 目录结构 安装成功后,可使用`pip show mindspore_lite`命令查看MindSpore Lite云侧推理的Python模块的安装位置。 @@ -43,10 +37,9 @@ MindSpore Lite云侧推理支持通过Python接口进行模型转换,支持多 ```text mindspore_lite ├── __pycache__ -├── akg # AKG相关的接口 ├── include +├── custom_kernels ├── lib -| ├── libakg.so # AKG使用的动态链接库 │ ├── _c_lite_wrapper.cpython-37m-x86_64-linux-gnu.so # MindSpore Lite 云侧推理python模块封装C++接口的框架的动态库 │ ├── libmindspore_converter.so # 模型转换动态库 │ ├── libmindspore_core.so # MindSpore Core动态库 @@ -56,17 +49,28 @@ mindspore_lite │ ├── libascend_pass_plugin.so # 注册昇腾后端图优化插件动态库 │ ├── libmslite_shared_lib.so # 适配昇腾后端的动态库 │ ├── libascend_kernel_plugin.so # 昇腾后端kernel插件 -│ ├── libtensorrt_plugin.so # tensorrt后端kernel插件 +│ ├── libascend_ge_plugin.so # +│ ├── libdvpp_utils.so # +│ ├── liblite-unified-executor.so # +│ ├── libllm_engine_plugin.so # +│ ├── libmindspore_graph_ir.so # +│ ├── libmindspore_ops.so # +│ ├── libmsplugin_ge_litert.so # +│ ├── libruntime_convert_plugin.so # │ ├── libopencv_core.so.4.5 # OpenCV的动态库 │ ├── libopencv_imgcodecs.so.4.5 # OpenCV的动态库 │ └── libopencv_imgproc.so.4.5 # OpenCV的动态库 -├── __init__.py # 初始化包 -├── _checkparam.py # 校验参数工具 -├── context.py # context接口相关代码 -├── converter.py # converter接口相关代码,转换入口 -├── model.py # model接口相关代码,推理入口 -├── tensor.py # tensor接口相关代码 -└── version.py # MindSpore Lite云侧推理版本号 +├── __init__.py # 初始化包 +├── _check_ascend.py # +├── lite_infer.py # +├── llm_engine.py # +├── _parse_update_weights_name.py # +├── _checkparam.py # 校验参数工具 +├── context.py # context接口相关代码 +├── converter.py # converter接口相关代码,转换入口 +├── model.py # model接口相关代码,推理入口 +├── tensor.py # tensor接口相关代码 +└── version.py # MindSpore Lite云侧推理版本号 ``` ### 属性说明 @@ -86,7 +90,7 @@ MindSpore Lite云侧推理的Python接口模型转换提供了多种属性设置 | input_data_type | DataType | `--inputDataType=` | 设置量化模型输入Tensor的data type。仅当模型输入Tensor的量化参数(`scale`和`zero point`)都具备时有效。默认与原始模型输入Tensor的data type保持一致。 | DataType.FLOAT32、DataType.INT8、DataType.UINT8、DataType.UNKNOWN | - | | input_format | Format | `--inputDataFormat=` | 设置导出模型的输入format,只对四维输入有效。 | Format.NCHW、Format.NHWC | - | | input_shape | dict{string:list\[int]} | `--inputShape=` | 设置模型输入的维度,输入维度的顺序和原始模型保持一致。如:{"inTensor1": \[1, 32, 32, 32], "inTensor2": \[1, 1, 32, 32]} | - | - | -| optimize | str | `--optimize=` | 设定转换模型的过程所完成的优化。 | "none"、"general"、"gpu_oriented"、"ascend_oriented" | - | +| optimize | str | `--optimize=` | 设定转换模型的过程所完成的优化。 | "none"、"general"、"ascend_oriented" | - | | output_data_type | DataType | `--outputDataType=` | 设置量化模型输出Tensor的data type。仅当模型输出Tensor的量化参数(`scale`和`zero point`)都具备时有效。默认与原始模型输出Tensor的data type保持一致。 | DataType.FLOAT32、DataType.INT8、DataType.UINT8、DataType.UNKNOWN | - | | save_type | ModelType | `--saveType=` | 设置导出模型文件的类型。| ModelType.MINDIR | MINDIR模型使用MindSpore Lite云侧推理安装包 | | weight_fp16 | bool | `--fp16=` | 设置在模型序列化时是否需要将float32数据格式的权重存储为float16数据格式。 | True、False | - | @@ -100,15 +104,14 @@ MindSpore Lite云侧推理的Python接口模型转换提供了多种属性设置 > > - `optimize` 该属性是用来设定在离线转换的过程中需要完成哪些特定的优化。 > -> - 如果该属性设置为"none",那么在模型的离线转换阶段将不进行相关的图优化操作,相关的图优化操作将会在执行推理阶段完成。该属性的优点在于转换出来的模型由于没有经过特定的优化,可以直接部署到CPU/GPU/Ascend任意硬件后端;而带来的缺点是推理执行时模型的初始化时间增长。 -> - 如果设置成"general",表示离线转换过程会完成通用优化,包括常量折叠,算子融合等(转换出的模型只支持CPU/GPU后端,不支持Ascend后端)。 -> - 如果设置成"gpu_oriented",表示转换过程中会完成通用优化和针对GPU后端的额外优化(转换出来的模型只支持GPU后端)。 +> - 如果该属性设置为"none",那么在模型的离线转换阶段将不进行相关的图优化操作,相关的图优化操作将会在执行推理阶段完成。该属性的优点在于转换出来的模型由于没有经过特定的优化,可以直接部署到CPU/Ascend任意硬件后端;而带来的缺点是推理执行时模型的初始化时间增长。 +> - 如果设置成"general",表示离线转换过程会完成通用优化,包括常量折叠,算子融合等(转换出的模型只支持CPU后端,不支持Ascend后端)。 > - 如果设置成"ascend_oriented",表示转换过程中只完成针对Ascend后端的优化(转换出来的模型只支持Ascend后端)。 > ### convert方法 -方法使用场景:将第三方模型转换生成MindSpore模型,可多次调用convert方法,转换多个模型。 +方法使用场景:将第三方模型转换生成MindSpore Lite云侧推理模型,可多次调用convert方法,转换多个模型。 下面提供详细的参数说明以及与[推理模型离线转换](https://www.mindspore.cn/lite/docs/zh-CN/master/mindir/converter_tool.html)中参数的对应关系。 diff --git a/docs/lite/docs/source_zh_cn/mindir/converter_tool.md b/docs/lite/docs/source_zh_cn/mindir/converter_tool.md index 8ba643656b..718dbcf7f3 100644 --- a/docs/lite/docs/source_zh_cn/mindir/converter_tool.md +++ b/docs/lite/docs/source_zh_cn/mindir/converter_tool.md @@ -39,14 +39,15 @@ mindspore-lite-{version}-linux-x64 └── lib # 转换工具依赖的动态库 ├── libmindspore_glog.so.0 # Glog动态库 ├── libascend_pass_plugin.so # 注册昇腾后端图优化插件动态库 - ├── libmslite_shared_lib.so # 适配昇腾后端的动态库 + ├── libmindspore_core_lite.so # + ├── libmindspore_graph_ir_lite.so # + ├── libmindspore_ops_lite.so # ├── libmindspore_converter.so # 模型转换动态库 ├── libmslite_converter_plugin.so # 模型转换插件 - ├── libmindspore_core.so # MindSpore Core动态库 ├── libopencv_core.so.4.5 # OpenCV的动态库 ├── libopencv_imgcodecs.so.4.5 # OpenCV的动态库 └── libopencv_imgproc.so.4.5 # OpenCV的动态库 - ├── third_party # 第三方模型proto定义 + ├── third_party # 第三方模型proto定义 ``` ### 参数说明 @@ -65,7 +66,7 @@ MindSpore Lite云侧推理模型转换工具提供了多种参数设置,用户 | `--configFile=` | 否 | 1)可作为训练后量化配置文件路径;2)可作为扩展功能配置文件路径。 | - | - | - | | `--inputShape=` | 否 | 设定模型输入的维度,输入维度的顺序和原始模型保持一致。对某些特定的模型可以进一步优化模型结构,但是转化后的模型将可能失去动态shape的特性。多个输入用`;`分割,同时加上双引号`""`。 | e.g. "inTensorName_1: 1,32,32,4;inTensorName_2:1,64,64,4;" | - | - | | `--saveType=` | 否 | 设定导出的模型为`mindir`模型或者`ms`模型。 | MINDIR、MINDIR_LITE | MINDIR | 云侧推理版本只有设置为MINDIR转出的模型才可以推理 | -| `--optimize=` | 否 | 设定转换模型的过程所完成的优化。 | none、general、gpu_oriented、ascend_oriented| general | - | +| `--optimize=` | 否 | 设定转换模型的过程所完成的优化。 | none、general、ascend_oriented| general | - | | `--decryptKey=` | 否 | 设定用于加载密文MindIR时的密钥,密钥用十六进制表示,只对`fmk`为MINDIR时有效。 | - | - | - | | `--decryptMode=` | 否 | 设定加载密文MindIR的模式,只在指定了decryptKey时有效。 | AES-GCM、AES-CBC | AES-GCM | - | | `--encryptKey=` | 否 | 设定导出加密`mindir`模型的密钥,密钥用十六进制表示。仅支持 AES-GCM,密钥长度仅支持16Byte。 | - | - | - | @@ -83,13 +84,13 @@ MindSpore Lite云侧推理模型转换工具提供了多种参数设置,用户 - 参数名和参数值之间用等号连接,中间不能有空格。 - Caffe模型一般分为两个文件:`*.prototxt`模型结构,对应`--modelFile`参数;`*.caffemodel`模型权值,对应`--weightFile`参数。 - `configFile`配置文件采用`key=value`的方式定义相关参数。 -- `--optimize`该参数是用来设定在离线转换的过程中需要完成哪些特定的优化。如果该参数设置为none,那么在模型的离线转换阶段将不进行相关的图优化操作,相关的图优化操作将会在执行推理阶段完成。该参数的优点在于转换出来的模型由于没有经过特定的优化,可以直接部署到CPU/GPU/Ascend任意硬件后端;而带来的缺点是推理执行时模型的初始化时间增长。如果设置成general,表示离线转换过程会完成通用优化,包括常量折叠,算子融合等(转换出的模型只支持CPU/GPU后端,不支持Ascend后端)。如果设置成gpu_oriented,表示转换过程中会完成通用优化和针对GPU后端的额外优化(转换出来的模型只支持GPU后端)。如果设置成ascend_oriented,表示转换过程中只完成针对Ascend后端的优化(转换出来的模型只支持Ascend后端)。 +- `--optimize`该参数是用来设定在离线转换的过程中需要完成哪些特定的优化。如果该参数设置为none,那么在模型的离线转换阶段将不进行相关的图优化操作,相关的图优化操作将会在执行推理阶段完成。该参数的优点在于转换出来的模型由于没有经过特定的优化,可以直接部署到CPU/Ascend任意硬件后端;而带来的缺点是推理执行时模型的初始化时间增长。如果设置成general,表示离线转换过程会完成通用优化,包括常量折叠,算子融合等(转换出的模型只支持CPU后端,不支持Ascend后端)。如果设置成ascend_oriented,表示转换过程中只完成针对Ascend后端的优化(转换出来的模型只支持Ascend后端)。 - 加解密功能仅在[编译](https://www.mindspore.cn/lite/docs/zh-CN/master/mindir/build.html)时设置为`MSLITE_ENABLE_MODEL_ENCRYPTION=on`时生效,并且仅支持Linux x86平台。其中密钥为十六进制表示的字符串,Linux平台用户可以使用`xxd`工具对字节表示的密钥进行十六进制表达转换。需要注意的是,加解密算法在1.7版本进行了更新,导致新版的Python接口不支持对1.6及其之前版本的MindSpore Lite加密导出的模型进行转换。 - 针对MindSpore模型,由于已经是`mindir`模型,建议两种做法: 不需要经过离线转换,直接进行推理执行。 - 使用离线转换,CPU/GPU后端设置`--optimize`为general(使能通用优化),GPU后端设置`--optimize`为gpu_oriented(在通用优化的基础上,使能针对GPU的额外优化),NPU后端设置`--optimize`为ascend_oriented,在离线阶段完成相关优化,减少推理执行的初始化时间。 + 使用离线转换,CPU后端设置`--optimize`为general(使能通用优化),Ascend后端设置`--optimize`为ascend_oriented,在离线阶段完成相关优化,减少推理执行的初始化时间。 ### 使用示例 diff --git a/docs/lite/docs/source_zh_cn/mindir/converter_tool_ascend.md b/docs/lite/docs/source_zh_cn/mindir/converter_tool_ascend.md index 4619cdc750..af69fef4d4 100644 --- a/docs/lite/docs/source_zh_cn/mindir/converter_tool_ascend.md +++ b/docs/lite/docs/source_zh_cn/mindir/converter_tool_ascend.md @@ -364,7 +364,7 @@ Ascend推理时,运行时指定 `provider` 为 ``ge`` 时,支持多个模型 ## 部署Ascend自定义算子 -MindSpore Lite converter支持将带有MindSpore Lite自定义Ascend算子的模型转换为MindSpore Lite的模型,通过自定义算子,可以在特殊场景下使用自定义算子对模型推理性能进行优化,如使用自定义的MatMul实现更高的矩阵乘法计算,使用MindSpore Lite提供的transformer融合算子提升transformer模型性能(待上线)以及使用AKG图算融合算子对模型进行自动融合优化提升推理性能等。 +MindSpore Lite converter支持将带有MindSpore Lite自定义Ascend算子的模型转换为MindSpore Lite的模型,通过自定义算子,可以在特殊场景下使用自定义算子对模型推理性能进行优化,如使用自定义的MatMul实现更高的矩阵乘法计算,使用MindSpore Lite提供的transformer融合算子提升transformer模型性能(待上线)。 如果MindSpore Lite转换Ascend模型时有自定义算子,用户需要在调用converter之前部署自定义算子到ACL的算子库中才能正常完成转换,以下描述了部署Ascend自定义算子的关键步骤: @@ -395,7 +395,7 @@ MindSpore Lite converter支持将带有MindSpore Lite自定义Ascend算子的模 4. 查看昇腾算子库目录检查是否安装成功 - 完成部署自定义算子之后,进入昇腾算子库目录``/usr/local/Ascend/latest/opp/vendors/``,查看其下目录是否有对应的自定义算子文件,当前主要提供了基本算子样例和AKG图算融合算子实现,具体文件结构如下: + 完成部署自定义算子之后,进入昇腾算子库目录``/usr/local/Ascend/latest/opp/vendors/``,查看其下目录是否有对应的自定义算子文件,当前主要提供了基本算子样例,具体文件结构如下: ```text /usr/local/Ascend/latest/opp/vendors/ @@ -417,8 +417,6 @@ MindSpore Lite converter支持将带有MindSpore Lite自定义Ascend算子的模 │ │ └── mslite_impl # 算子的实现逻辑目录 │ │ ├── add_dsl.py # 基于dsl开发的add样例逻辑实现文件 │ │ ├── add_tik.py # 基于tik开发的add样例逻辑实现文件 - │ │ ├── compiler.py # akg图算需要的算子编译逻辑文件 - │ │ ├── custom.py # akg自定义算子实现文件 │ │ ├── matmul_tik.py # 基于tik开发的matmul样例逻辑实现文件 │ ├── cpu # aicpu自定义算子目录,非必需 │ │ └── aicpu_kernel @@ -430,5 +428,5 @@ MindSpore Lite converter支持将带有MindSpore Lite自定义Ascend算子的模 │ ├── add_tik.py # 基于tik开发的add样例逻辑实现文件 │ └── matmul_tik.py # 基于tik开发的matmul样例逻辑实现文件 └── op_proto # 算子原型定义包目录 - └── libcust_op_proto.so # 算子原型定义so文件,akg自定义算子默认注册,不需要此文件 + └── libcust_op_proto.so # 算子原型定义so文件 ``` -- Gitee