From 137c23b6913d33dd70487993b35ab90b75bbead6 Mon Sep 17 00:00:00 2001 From: qiuleilei Date: Tue, 12 Aug 2025 09:04:09 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=9F=BA=E7=A1=80=E6=8E=A8=E7=90=86docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/source_zh_cn/mindir/runtime_cpp.md | 70 ++++--------------- 1 file changed, 13 insertions(+), 57 deletions(-) diff --git a/docs/lite/docs/source_zh_cn/mindir/runtime_cpp.md b/docs/lite/docs/source_zh_cn/mindir/runtime_cpp.md index fd8c19355b..3502c8b36c 100644 --- a/docs/lite/docs/source_zh_cn/mindir/runtime_cpp.md +++ b/docs/lite/docs/source_zh_cn/mindir/runtime_cpp.md @@ -6,7 +6,7 @@ 本教程介绍如何使用[C++接口](https://www.mindspore.cn/lite/api/zh-CN/master/index.html)执行MindSpore Lite云侧推理。 -MindSpore Lite云侧推理仅支持在Linux环境部署运行。支持Atlas 200/300/500推理产品、Atlas推理系列产品、Atlas训练系列产品、Nvidia GPU和CPU硬件后端。 +MindSpore Lite云侧推理仅支持在Linux环境部署运行。支持Atlas 200/300/500推理产品、Atlas推理系列产品、Atlas训练系列产品和CPU硬件后端。 如需体验MindSpore Lite端侧推理流程,请参考文档[使用C++接口执行端侧推理](https://www.mindspore.cn/lite/docs/zh-CN/master/infer/runtime_cpp.html)。 @@ -26,7 +26,7 @@ MindSpore Lite云侧推理仅支持在Linux环境部署运行。支持Atlas 200/ 2. 通过MindSpore导出MindIR模型,或者由[模型转换工具](https://www.mindspore.cn/lite/docs/zh-CN/master/mindir/converter_tool.html)转换获得MindIR模型,并将其拷贝到`mindspore-lite/examples/cloud_infer/runtime_cpp/model`目录,可以下载MobileNetV2模型文件[mobilenetv2.mindir](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.mindir)。 -3. 从[官网](https://www.mindspore.cn/lite/docs/zh-CN/master/use/downloads.html)下载Ascend、Nvidia GPU、CPU三合一的MindSpore Lite云侧推理包`mindspore-lite-{version}-linux-{arch}.tar.gz`,并存放到`mindspore-lite/examples/cloud_infer/runtime_cpp`目录。 +3. 从[官网](https://www.mindspore.cn/lite/docs/zh-CN/master/use/downloads.html)下载Ascend、CPU二合一的MindSpore Lite云侧推理包`mindspore-lite-{version}-linux-{arch}.tar.gz`,并存放到`mindspore-lite/examples/cloud_infer/runtime_cpp`目录。 ## 创建配置上下文 @@ -43,7 +43,7 @@ if (context == nullptr) { auto &device_list = context->MutableDeviceInfo(); ``` -通过[MutableDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#mutabledeviceinfo)返回后端信息列表的引用,指定运行的设备。`MutableDeviceInfo`中支持用户设置设备信息,包括[CPUDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#cpudeviceinfo)、[GPUDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#gpudeviceinfo)、[AscendDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#ascenddeviceinfo)。设置的设备个数当前只能为其中一个。 +通过[MutableDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#mutabledeviceinfo)返回后端信息列表的引用,指定运行的设备。`MutableDeviceInfo`中支持用户设置设备信息,包括[CPUDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#cpudeviceinfo)、[AscendDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#ascenddeviceinfo)。设置的设备个数当前只能为其中一个。 ### 配置使用CPU后端 @@ -94,40 +94,6 @@ device_list.push_back(device_info); context->SetInterOpParallelNum(2); ``` -### 配置使用GPU后端 - -当需要执行的后端为GPU时,需要设置[GPUDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#gpudeviceinfo)为推理后端。其中GPUDeviceInfo通过`SetDeviceID`来设置设备ID,通过`SetEnableFP16`或者`SetPrecisionMode`使能float16推理。 - -下面示例代码演示如何创建GPU推理后端,同时设备ID设置为0: - -```c++ -auto context = std::make_shared(); -if (context == nullptr) { - std::cerr << "New context failed." << std::endl; - return nullptr; -} -auto &device_list = context->MutableDeviceInfo(); - -auto device_info = std::make_shared(); -if (device_info == nullptr) { - std::cerr << "New GPUDeviceInfo failed." << std::endl; - return nullptr; -} -// Set NVIDIA device id. -device_info->SetDeviceID(0); -// The GPU device context needs to be push_back into device_list to work. -device_list.push_back(device_info); -``` - -`SetEnableFP16`属性是否设置成功取决于当前设备的[CUDA计算能力](https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix)。 - -用户可通过调用 `SetPrecisionMode()`接口配置精度模式,设置 `SetPrecisionMode("preferred_fp16")` 时,同时 `SetEnableFP16(true)` 会自动设置,反之亦然。 - -| SetPrecisionMode() | SetEnableFP16() | -| ------------------ | --------------- | -| enforce_fp32 | false | -| preferred_fp16 | true | - ### 配置使用Ascend后端 当需要执行的后端为Ascend时(目前支持Atlas 200/300/500推理产品、Atlas推理系列产品、Atlas训练系列产品),需要设置[AscendDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#ascenddeviceinfo)为推理后端。其中AscendDeviceInfo通过`SetDeviceID`来设置设备ID。Ascend默认使能float16精度,可通过`AscendDeviceInfo.SetPrecisionMode`更改精度模式。 @@ -190,8 +156,6 @@ std::shared_ptr BuildModel(const std::string &model_path, cons std::shared_ptr device_info = nullptr; if (device_type == "CPU") { device_info = CreateCPUDeviceInfo(); - } else if (device_type == "GPU") { - device_info = CreateGPUDeviceInfo(device_id); } else if (device_type == "Ascend") { device_info = CreateAscendDeviceInfo(device_id); } @@ -345,11 +309,11 @@ tensor name is:shape1 tensor size is:4000 tensor elements num is:1000 ### 动态shape输入 -Lite云侧推理框架支持动态shape输入的模型,GPU和Ascend硬件后端,需要在模型转换和模型推理时配置动态输入信息。 +MindSpore Lite云侧推理框架支持动态shape输入的模型,Ascend硬件后端需要在模型转换和模型推理时配置动态输入信息。 -动态输入信息的配置与离线和在线场景有关。离线场景,模型转换工具参数`--optimize=general`,`--optimize=gpu_oriented`或`--optimize=ascend_oriented`,即经历和硬件相关的融合和优化,产生的MindIR模型仅能在对应硬件后端上运行,比如,在Atlas 200/300/500推理产品环境上,模型转换工具指定`--optimize=ascend_oriented`,则产生的模型仅支持在Atlas 200/300/500推理产品上运行,如果指定`--optimize=general`,则支持在GPU和CPU上运行。在线场景,加载的MindIR没有经历和硬件相关的融合和优化,支持在Ascend、GPU和CPU上运行,模型转换工具参数`--optimize=none`,或MindSpore导出的MindIR模型没有经过转换工具处理。 +动态输入信息的配置与离线和在线场景有关。离线场景,模型转换工具参数`--optimize=general`或`--optimize=ascend_oriented`,即经历和硬件相关的融合和优化,产生的MindIR模型仅能在对应硬件后端上运行,比如,在Atlas 200/300/500推理产品环境上,模型转换工具指定`--optimize=ascend_oriented`,则产生的模型仅支持在Atlas 200/300/500推理产品上运行,如果指定`--optimize=general`,则支持在CPU上运行。在线场景,加载的MindIR没有经历和硬件相关的融合和优化,支持在Ascend和CPU上运行,模型转换工具参数`--optimize=none`,或MindSpore导出的MindIR模型没有经过转换工具处理。 -Ascend硬件后端离线场景下,需要在模型转换阶段配置动态输入信息。Ascend硬件后端在线场景下,以及GPU硬件后端离线和在线场景下,需要在模型加载阶段通过[LoadConfig](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#loadconfig)接口配置动态输入信息。 +Ascend硬件后端离线场景下,需要在模型转换阶段配置动态输入信息。Ascend硬件后端在线场景下,需要在模型加载阶段通过[LoadConfig](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#loadconfig)接口配置动态输入信息。 通过`LoadConfig`加载的配置文件示例如下所示: @@ -357,24 +321,17 @@ Ascend硬件后端离线场景下,需要在模型转换阶段配置动态输 [ascend_context] input_shape=input_1:[-1,3,224,224] dynamic_dims=[1~4],[8],[16] - -[gpu_context] -input_shape=input_1:[-1,3,224,224] -dynamic_dims=[1~16] -opt_dims=[1] ``` -`[ascend_context]`和`[gpu_context]`分别作用于Ascend和GPU硬件后端。 +`[ascend_context]`表示作用于Ascend后端。 -1. Ascend和GPU硬件后端需要通过动态输入信息进行图的编译和优化,CPU硬件后端不需要配置动态维度信息。 +1. Ascend后端需要通过动态输入信息进行图的编译和优化,CPU硬件后端不需要配置动态维度信息。 2. `input_shape`用于指示输入shape信息,格式为`input_name1:[shape1];input_name2:[shape2]`,如果有动态输入,则需要将相应的维度设定为-1,多个输入通过英文分号`;`隔开。 -3. `dynamic_dims`用于指示动态维度的值范围,多个非连续的值范围通过英文逗号`,`隔开。上例子中,Ascend的batch维度值范围为`1,2,3,4,8,16`,GPU的batch维度值范围为1到16。Ascend硬件后端,动态输入为多档模式,动态输入范围越大,模型编译时间越长。 - -4. 对于GPU硬件后端,需要额外配置`opt_dims`用于指示`dynamic_dims`范围中最优的值。 +3. `dynamic_dims`用于指示动态维度的值范围,多个非连续的值范围通过英文逗号`,`隔开。上例子中,Ascend的batch维度值范围为`1,2,3,4,8,16`。Ascend硬件后端,动态输入为多档模式,动态输入范围越大,模型编译时间越长。 -5. 如果`input_shape`配置的为静态shape,则不需要配置`dynamic_dims`和`opt_dims`。 +4. 如果`input_shape`配置的为静态shape,则不需要配置`dynamic_dims`。 在模型`Build`前,通过`LoadConfig`加载配置文件信息: @@ -424,7 +381,7 @@ int ResizeModel(std::shared_ptr model, int32_t batch_size) { ### 指定输入输出host内存 -指定设备内存支持CPU、Ascend和GPU硬件后端。指定的输入host内存,缓存中的数据将直接拷贝到设备(device)内存上,指定的输出host内存,设备(device)内存的数据将直接拷贝到这块缓存中。避免了额外的host之间的数据拷贝,提升推理性能。 +指定设备内存支持CPU、Ascend硬件后端。指定的输入host内存,缓存中的数据将直接拷贝到设备(device)内存上,指定的输出host内存,设备(device)内存的数据将直接拷贝到这块缓存中。避免了额外的host之间的数据拷贝,提升推理性能。 通过[SetData](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#setdata-1)可单独或者同时指定输入和输出host内存。建议参数`own_data`为false,当`own_data`为false,用户需要维护host内存的生命周期,负责host内存的申请和释放。当参数`own_data`为true时,在MSTensor析构时释放指定的内存。 @@ -473,7 +430,7 @@ int ResizeModel(std::shared_ptr model, int32_t batch_size) { ### 指定输入输出设备(device)内存 -指定设备内存支持Ascend和GPU硬件后端。指定输入输出设备内存可以避免device到host内存之间的相互拷贝,比如经过芯片dvpp预处理产生的device内存输入直接作为模型推理的输入,避免预处理结果从device内存拷贝到host内存,host结果作为模型推理输入,推理前重新拷贝到device上。 +指定设备内存支持Ascend硬件后端。指定输入输出设备内存可以避免device到host内存之间的相互拷贝,比如经过芯片dvpp预处理产生的device内存输入直接作为模型推理的输入,避免预处理结果从device内存拷贝到host内存,host结果作为模型推理输入,推理前重新拷贝到device上。 指定输入输出设备内存样例可参考[设备内存样例](https://gitee.com/mindspore/mindspore-lite/tree/master/mindspore-lite/examples/cloud_infer/device_example_cpp)。 @@ -638,7 +595,7 @@ ge.dynamicNodeType=1 ### 多线程加载模型 -硬件后端为Ascend,provider为默认时,支持多线程并发加载多个Ascend优化后模型,以提升模型加载性能。使用[模型转换工具](https://www.mindspore.cn/lite/docs/zh-CN/master/converter/converter_tool.html),指定 `--optimize=ascend_oriented` 可将MindSpore导出的 `MindIR` 模型、TensorFlow和ONNX等第三方框架模型转换为Ascend优化后模型。MindSpore导出的 `MindIR` 模型未进行Ascend优化,对于第三方框架模型,转换工具中如果指定 `--optimize=none` 产生的 `MindIR` 模型也未进行Ascend优化。 +硬件后端为Ascend,provider为默认时,支持多线程并发加载多个Ascend优化后模型,以提升模型加载性能。使用[模型转换工具](https://www.mindspore.cn/lite/docs/zh-CN/master/mindir/converter_tool.html),指定 `--optimize=ascend_oriented` 可将MindSpore导出的 `MindIR` 模型、TensorFlow和ONNX等第三方框架模型转换为Ascend优化后模型。MindSpore导出的 `MindIR` 模型未进行Ascend优化,对于第三方框架模型,转换工具中如果指定 `--optimize=none` 产生的 `MindIR` 模型也未进行Ascend优化。 ### 多模型共享权重 @@ -755,7 +712,6 @@ std::vector LoadModel(const std::string &model_path0, const std::string & } device_info->SetDeviceID(device_id); device_info->SetRankID(rank_id); - device_info->SetProvider("ge"); device_list.push_back(device_info); mindspore::Model model0; -- Gitee From f15731065d896136d29ffe4ac7c1d3408a9fb1bd Mon Sep 17 00:00:00 2001 From: qiuleilei Date: Tue, 12 Aug 2025 14:05:45 +0800 Subject: [PATCH 2/2] bugfix1 --- .../lite/docs/source_en/mindir/runtime_cpp.md | 70 ++++--------------- .../docs/source_en/mindir/runtime_java.md | 14 +--- .../docs/source_zh_cn/mindir/runtime_java.md | 14 +--- .../source_zh_cn/mindir/runtime_python.md | 14 +--- 4 files changed, 16 insertions(+), 96 deletions(-) diff --git a/docs/lite/docs/source_en/mindir/runtime_cpp.md b/docs/lite/docs/source_en/mindir/runtime_cpp.md index f5c54cd949..f83d86cfbb 100644 --- a/docs/lite/docs/source_en/mindir/runtime_cpp.md +++ b/docs/lite/docs/source_en/mindir/runtime_cpp.md @@ -6,7 +6,7 @@ This tutorial describes how to perform cloud-side inference with MindSpore Lite by using the [C++ interface](https://www.mindspore.cn/lite/api/en/master/index.html). -MindSpore Lite cloud-side inference is supported to run in Linux environment deployment only. Atlas 200/300/500 inference product, Atlas inference series, Atlas training series, Nvidia GPU and CPU hardware backends are supported. +MindSpore Lite cloud-side inference is supported to run in Linux environment deployment only. Atlas 200/300/500 inference product, Atlas inference series, Atlas training series and CPU hardware backends are supported. To experience the MindSpore Lite device-side inference process, please refer to the document [Using C++ Interface to Perform Cloud-side Inference](https://www.mindspore.cn/lite/docs/en/master/infer/runtime_cpp.html). @@ -26,7 +26,7 @@ Using the MindSpore Lite inference framework consists of the following main step 2. Export the MindIR model via MindSpore, or get the MindIR model by converting it with [model conversion tool](https://www.mindspore.cn/lite/docs/en/master/mindir/converter_tool.html) and copy it to the `mindspore-lite/examples/cloud_infer/runtime_cpp/model` directory. You can download the MobileNetV2 model file [mobilenetv2.mindir](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.mindir). -3. Download the Ascend, Nvidia GPU, CPU triplet MindSpore Lite cloud-side inference package `mindspore- lite-{version}-linux-{arch}.tar.gz` in the [official website](https://www.mindspore.cn/lite/docs/en/master/use/downloads.html) and save it to `mindspore-lite/examples/cloud_infer/runtime_cpp` directory. +3. Download the Ascend, CPU dual-purpose MindSpore Lite cloud-side inference package `mindspore- lite-{version}-linux-{arch}.tar.gz` in the [official website](https://www.mindspore.cn/lite/docs/en/master/use/downloads.html) and save it to `mindspore-lite/examples/cloud_infer/runtime_cpp` directory. ## Creating Configuration Context @@ -43,7 +43,7 @@ if (context == nullptr) { auto &device_list = context->MutableDeviceInfo(); ``` -Return a reference to the list of backend information for specifying the running device via [MutableDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_Context.html). User-set device information is supported in `MutableDeviceInfo`, including [CPUDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_CPUDeviceInfo.html), [GPUDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_GPUDeviceInfo.html), [AscendDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_AscendDeviceInfo.html). The number of devices set can only be one of them currently. +Return a reference to the list of backend information for specifying the running device via [MutableDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_Context.html). User-set device information is supported in `MutableDeviceInfo`, including [CPUDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_CPUDeviceInfo.html), [AscendDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_AscendDeviceInfo.html). The number of devices set can only be one of them currently. ### Configuring to Use the CPU Backend @@ -94,40 +94,6 @@ Optionally, you can additionally set the number of threads, thread affinity, par context->SetInterOpParallelNum(2); ``` -### Configuring Using GPU Backend - -When the backend to be executed is GPU, you need to set [GPUDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_GPUDeviceInfo.html#class-gpudeviceinfo) as the inference backend. GPUDeviceInfo sets the device ID by `SetDeviceID` and enables float16 inference by `SetEnableFP16` or `SetPrecisionMode`. - -The following sample code demonstrates how to create a GPU inference backend while the device ID is set to 0: - -```c++ -auto context = std::make_shared(); -if (context == nullptr) { - std::cerr << "New context failed." << std::endl; - return nullptr; -} -auto &device_list = context->MutableDeviceInfo(); - -auto device_info = std::make_shared(); -if (device_info == nullptr) { - std::cerr << "New GPUDeviceInfo failed." << std::endl; - return nullptr; -} -// Set NVIDIA device id. -device_info->SetDeviceID(0); -// The GPU device context needs to be push_back into device_list to work. -device_list.push_back(device_info); -``` - -Whether the `SetEnableFP16` is set successfully depends on the [CUDA computing power] of the current device (https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix). - -`SetPrecisionMode()` has two parameters to control float16 inference, `SetPrecisionMode("preferred_fp16")` equals to `SetEnableFP16(true)`, vice versa. - -| SetPrecisionMode() | SetEnableFP16() | -| ------------------ | --------------- | -| enforce_fp32 | false | -| preferred_fp16 | true | - ### Configuring Using Ascend Backend When the backend to be executed is Ascend (Atlas 200/300/500 inference product, Atlas inference series, or Atlas training series are currently supported), you need to set [AscendDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_AscendDeviceInfo.html#class-ascenddeviceinfo) as the inference backend. AscendDeviceInfo sets the device ID by `SetDeviceID`. Ascend enables float16 precision by default, and the precision mode can be changed by `AscendDeviceInfo.SetPrecisionMode`. @@ -190,8 +156,6 @@ std::shared_ptr BuildModel(const std::string &model_path, cons std::shared_ptr device_info = nullptr; if (device_type == "CPU") { device_info = CreateCPUDeviceInfo(); - } else if (device_type == "GPU") { - device_info = CreateGPUDeviceInfo(device_id); } else if (device_type == "Ascend") { device_info = CreateAscendDeviceInfo(device_id); } @@ -345,11 +309,11 @@ tensor name is:shape1 tensor size is:4000 tensor elements num is:1000 ### Dynamic Shape Input -Lite cloud-side inference framework supports dynamic shape input for models. GPU and Ascend hardware backend needs to be configured with dynamic input information during model conversion and model inference. +Lite cloud-side inference framework supports dynamic shape input for models. Ascend hardware backend needs to be configured with dynamic input information during model conversion and model inference. -The configuration of dynamic input information is related to offline and online scenarios. For offline scenarios, the model conversion tool parameter `--optimize=general`, `--optimize=gpu_oriented` or `--optimize=ascend_oriented`, i.e. experiencing the hardware-related fusion and optimization. The generated MindIR model can only run on the corresponding hardware backend. For example, in Atlas 200/300/500 inference product environment, if the model conversion tool specifies `--optimize=ascend_oriented`, the generated model will only support running on Atlas 200/300/500 inference product. If `--optimize=general` is specified, running on GPU and CPU is supported. For online scenarios, the loaded MindIR has not experienced hardware-related fusion and optimization, supports running on Ascend, GPU, and CPU. The model conversion tool parameter `--optimize=none`, or the MindSpore-exported MindIR model has not been processed by the conversion tool. +The configuration of dynamic input information is related to offline and online scenarios. For offline scenarios, the model conversion tool parameter `--optimize=general` or `--optimize=ascend_oriented`, i.e. experiencing the hardware-related fusion and optimization. The generated MindIR model can only run on the corresponding hardware backend. For example, in Atlas 200/300/500 inference product environment, if the model conversion tool specifies `--optimize=ascend_oriented`, the generated model will only support running on Atlas 200/300/500 inference product. If `--optimize=general` is specified, running on CPU is supported. For online scenarios, the loaded MindIR has not experienced hardware-related fusion and optimization, supports running on Ascend and CPU. The model conversion tool parameter `--optimize=none`, or the MindSpore-exported MindIR model has not been processed by the conversion tool. -Ascend hardware backend offline scenarios require dynamic input information to be configured during the model conversion phase. Ascend hardware backend online scenarios, as well as GPU hardware backend offline and online scenarios, require dynamic input information to be configured during the model loading phase via the [LoadConfig](https://www.mindspore.cn/lite/api/en/master/api_cpp/mindspore.html# loadconfig) interface. +Ascend hardware backend offline scenarios require dynamic input information to be configured during the model conversion phase. Ascend hardware backend online scenarios require dynamic input information to be configured during the model loading phase via the [LoadConfig](https://www.mindspore.cn/lite/api/en/master/api_cpp/mindspore.html# loadconfig) interface. An example configuration file loaded via `LoadConfig` is shown below: @@ -357,24 +321,17 @@ An example configuration file loaded via `LoadConfig` is shown below: [ascend_context] input_shape=input_1:[-1,3,224,224] dynamic_dims=[1~4],[8],[16] - -[gpu_context] -input_shape=input_1:[-1,3,224,224] -dynamic_dims=[1~16] -opt_dims=[1] ``` -The `[ascend_context]` and `[gpu_context]` act on the Ascend and GPU hardware backends, respectively. +The `[ascend_context]` indicates that it acts on the Ascend hardware backend. -1. Ascend and GPU hardware backends require dynamic input information for graph compilation and optimization, while CPU hardware backends do not require configuration of dynamic dimensional information. +1. Ascend hardware backends require dynamic input information for graph compilation and optimization, while CPU hardware backends do not require configuration of dynamic dimensional information. 2. `input_shape` is used to indicate the input shape information in the format `input_name1:[shape1];input_name2:[shape2]`. If there are dynamic inputs, the corresponding dimension needs to be set to -1. Multiple inputs are separated by the English semicolon `;`. -3. `dynamic_dims` is used to indicate the value range of the dynamic dimension, with multiple non-contiguous ranges of values separated by the comma `,`. In the above example, Ascend batch dimension values range in `1,2,3,4,8,16` and GPU batch dimension values range from 1 to 16. Ascend hardware backend with dynamic inputs are in multi-step mode, the larger the dynamic input range, the longer the model compilation time. - -4. For the GPU hardware backend, additional configuration of `opt_dims` is required to indicate the optimal value in the `dynamic_dims` range. +3. `dynamic_dims` is used to indicate the value range of the dynamic dimension, with multiple non-contiguous ranges of values separated by the comma `,`. In the above example, Ascend batch dimension values range in `1,2,3,4,8,16`. Ascend hardware backend with dynamic inputs are in multi-step mode, the larger the dynamic input range, the longer the model compilation time. -5. If `input_shape` is configured as a static shape, `dynamic_dims` and `opt_dims` do not need to be configured. +4. If `input_shape` is configured as a static shape, `dynamic_dims` do not need to be configured. Load the configuration file information via `LoadConfig` before the model `Build`: @@ -424,7 +381,7 @@ int ResizeModel(std::shared_ptr model, int32_t batch_size) { ### Specifying Input and Output Host Memory -Specify that the device memory supports the CPU, Ascend, and GPU hardware backend. The specified input host memory, the data in the cache will be directly copied to the device memory, and the specified output host memory, the data in the device memory will be directly copied to this cache. Unnecessary data copying between hosts is avoided and inference performance is improved. +Specify that the device memory supports the CPU and Ascend hardware backend. The specified input host memory, the data in the cache will be directly copied to the device memory, and the specified output host memory, the data in the device memory will be directly copied to this cache. Unnecessary data copying between hosts is avoided and inference performance is improved. Input and output host memory can be specified separately or simultaneously by [SetData](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_MSTensor.html). It is recommended that the parameter `own_data` be false. When `own_data` is false, the user needs to maintain the life cycle of host memory and is responsible for the request and release of host memory. When the parameter `own_data` is true, the specified memory is freed at the MSTensor destruct. @@ -473,7 +430,7 @@ Input and output host memory can be specified separately or simultaneously by [S ### Specifying the Memory of the Input and Output Devices -Specifying device memory supports Ascend and GPU hardware backends. Specifying input and output device memory can avoid mutual copying from device to host memory, for example, the device memory input generated by chip dvpp preprocessing is directly used as input for model inference, avoiding preprocessing results copied from device memory to host memory and host results used as model inference input and re-copied to device before inference. +Specifying device memory supports Ascend hardware backends. Specifying input and output device memory can avoid mutual copying from device to host memory, for example, the device memory input generated by chip dvpp preprocessing is directly used as input for model inference, avoiding preprocessing results copied from device memory to host memory and host results used as model inference input and re-copied to device before inference. Sample memory for specified input and output devices can be found in [sample device memory](https://gitee.com/mindspore/mindspore-lite/tree/master/mindspore-lite/examples/cloud_infer/device_example_cpp). @@ -637,7 +594,7 @@ ge.dynamicNodeType=1 ### Loading Models through Multiple Threads -When the backend is Ascend and the provider is the default, it supports loading multiple Ascend optimized models through multiple threads to improve model loading performance. Using the [Model converting tool](https://www.mindspore.cn/lite/docs/en/master/converter/converter_tool.html), we can specify `--optimize=ascend_oriented` to convert `MindIR` models exported from MindSpore, third-party framework models such as TensorFlow and ONNX into Ascend optimized models. The `MindIR` models exported by MindSpore have not undergone Ascend optimization. For third-party framework models, the `MindIR` model generated by specifying `--optimize=none` in the converting tool has not undergone Ascend optimization. +When the backend is Ascend and the provider is the default, it supports loading multiple Ascend optimized models through multiple threads to improve model loading performance. Using the [Model converting tool](https://www.mindspore.cn/lite/docs/en/master/mindir/converter_tool.html), we can specify `--optimize=ascend_oriented` to convert `MindIR` models exported from MindSpore, third-party framework models such as TensorFlow and ONNX into Ascend optimized models. The `MindIR` models exported by MindSpore have not undergone Ascend optimization. For third-party framework models, the `MindIR` model generated by specifying `--optimize=none` in the converting tool has not undergone Ascend optimization. ### Multiple Models Sharing Weights @@ -753,7 +710,6 @@ std::vector LoadModel(const std::string &model_path0, const std::string & } device_info->SetDeviceID(device_id); device_info->SetRankID(rank_id); - device_info->SetProvider("ge"); device_list.push_back(device_info); mindspore::Model model0; diff --git a/docs/lite/docs/source_en/mindir/runtime_java.md b/docs/lite/docs/source_en/mindir/runtime_java.md index c9c86ad606..6f49ed87b4 100644 --- a/docs/lite/docs/source_en/mindir/runtime_java.md +++ b/docs/lite/docs/source_en/mindir/runtime_java.md @@ -44,7 +44,7 @@ To perform model inference with MindSpore Lite, you need to get the path of the Create a configuration context [MSContext](https://www.mindspore.cn/lite/api/en/master/api_java/mscontext.html#mscontext) and save some basic configuration parameters required for the session, which is used to guide graph compilation and graph execution. Configure the number of threads, thread affinity and whether to enable heterogeneous parallel inference via the [init](https://www.mindspore.cn/lite/api/en/master/api_java/mscontext.html#init) interface. MindSpore Lite has a built-in thread pool shared by processes. The maximum number of threads in the pool is specified by `threadNum` when inference, and the default is 2 threads. -The backend of MindSpore Lite inference can call `deviceType` in the [AddDeviceInfo](https://www.mindspore.cn/lite/api/en/master/api_java/mscontext.html#adddeviceinfo) interface to specify, currently supporting CPU, GPU and Ascend. When graph compilation is performed, the operator selection is scheduled based on the main selection backend. If the backend supports float16, float16 operator can be used in preference by setting `isEnableFloat16` to `true`. +The backend of MindSpore Lite inference can call `deviceType` in the [AddDeviceInfo](https://www.mindspore.cn/lite/api/en/master/api_java/mscontext.html#adddeviceinfo) interface to specify, currently supporting CPU and Ascend. When graph compilation is performed, the operator selection is scheduled based on the main selection backend. If the backend supports float16, float16 operator can be used in preference by setting `isEnableFloat16` to `true`. ### Configuring to Use the CPU Backend @@ -58,18 +58,6 @@ context.init(2, CpuBindMode.HIGHER_CPU); context.addDeviceInfo(DeviceType.DT_CPU, true); ``` -### Configuring to Use the GPU Backend - -When the backend to be executed is GPU, after `MSContext` is created, you need to add [GPUDeviceInfo](https://www.mindspore.cn/lite/api/en/master/generate/classmindspore_GPUDeviceInfo.html#class-gpudeviceinfo) in the [addDeviceInfo](https://www.mindspore.cn/lite/api/en/master/api_java/mscontext.html#adddeviceinfo). If float16 inference is enabled, the GPU will use the float16 operator in preference. - -The following code demonstrates how to create a GPU inference backend: - -```java -MSContext context = new MSContext(); -context.init(); -context.addDeviceInfo(DeviceType.DT_GPU, true); -``` - ### Configuring to Use the Ascend Backend When the backend to be executed is Ascend, after `MSContext` is created, you need to add [AscendDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#ascenddeviceinfo) in the [addDeviceInfo](https://www.mindspore.cn/lite/api/en/master/api_java/mscontext.html#adddeviceinfo). diff --git a/docs/lite/docs/source_zh_cn/mindir/runtime_java.md b/docs/lite/docs/source_zh_cn/mindir/runtime_java.md index 45a339ce19..9d89b391ce 100644 --- a/docs/lite/docs/source_zh_cn/mindir/runtime_java.md +++ b/docs/lite/docs/source_zh_cn/mindir/runtime_java.md @@ -44,7 +44,7 @@ 创建配置上下文[MSContext](https://www.mindspore.cn/lite/api/zh-CN/master/api_java/mscontext.html#mscontext),保存会话所需的一些基本配置参数,用于指导图编译和图执行。通过[init](https://www.mindspore.cn/lite/api/zh-CN/master/api_java/mscontext.html#init)接口配置线程数,线程亲和性,以及是否开启异构并行推理。MindSpore Lite内置一个进程共享的线程池,推理时通过`threadNum`指定线程池的最大线程数,默认为2线程。 -MindSpore Lite推理时的后端可调用[AddDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_java/mscontext.html#adddeviceinfo)接口中的`deviceType`指定,目前支持CPU、GPU和Ascend。在进行图编译时,会根据主选后端进行算子选型调度。如果后端支持float16,可通过设置`isEnableFloat16`为`true`后,优先使用float16算子。 +MindSpore Lite推理时的后端可调用[AddDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_java/mscontext.html#adddeviceinfo)接口中的`deviceType`指定,目前支持CPU和Ascend。在进行图编译时,会根据主选后端进行算子选型调度。如果后端支持float16,可通过设置`isEnableFloat16`为`true`后,优先使用float16算子。 ### 配置使用CPU后端 @@ -58,18 +58,6 @@ context.init(2, CpuBindMode.HIGHER_CPU); context.addDeviceInfo(DeviceType.DT_CPU, true); ``` -### 配置使用GPU后端 - -当需要执行的后端为GPU时,`MSContext`创建后需要在[addDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_java/mscontext.html#adddeviceinfo)中添加[GPUDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#gpudeviceinfo)。如果使能float16推理,GPU会优先使用float16算子。 - -下面代码演示了如何创建GPU推理后端: - -```java -MSContext context = new MSContext(); -context.init(); -context.addDeviceInfo(DeviceType.DT_GPU, true); -``` - ### 配置使用Ascend后端 当需要执行的后端为Ascend时,`MSContext`初始化后需要在[addDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_java/mscontext.html#adddeviceinfo)中添加[AscendDeviceInfo](https://www.mindspore.cn/lite/api/zh-CN/master/api_cpp/mindspore.html#ascenddeviceinfo)。 diff --git a/docs/lite/docs/source_zh_cn/mindir/runtime_python.md b/docs/lite/docs/source_zh_cn/mindir/runtime_python.md index 4ddc0d92c5..8f43979397 100644 --- a/docs/lite/docs/source_zh_cn/mindir/runtime_python.md +++ b/docs/lite/docs/source_zh_cn/mindir/runtime_python.md @@ -6,7 +6,7 @@ 本教程提供了MindSpore Lite执行云侧推理的示例程序,通过文件输入、执行推理、打印推理结果的方式,演示了[Python接口](https://mindspore.cn/lite/api/zh-CN/master/mindspore_lite.html)进行云侧推理的基本流程,用户能够快速了解MindSpore Lite执行云侧推理相关API的使用。相关代码放置在[mindspore-lite/examples/cloud_infer/quick_start_python](https://gitee.com/mindspore/mindspore-lite/tree/master/mindspore-lite/examples/cloud_infer/quick_start_python)目录。 -MindSpore Lite云侧推理仅支持在Linux环境部署运行。支持Atlas 200/300/500推理产品、Atlas推理系列产品、Atlas训练系列产品、Nvidia GPU和CPU硬件后端。 +MindSpore Lite云侧推理仅支持在Linux环境部署运行。支持Atlas 200/300/500推理产品、Atlas推理系列产品、Atlas训练系列产品和CPU硬件后端。 下面以Ubuntu 18.04为例,介绍了在Linux X86操作系统配合CPU硬件平台下如何使用Python云侧推理Demo: @@ -108,18 +108,6 @@ context.ascend.device_id = 0 context.ascend.provider = "ge" ``` -如果用户需要在GPU设备上运行推理时,因此需要设置上下文的目标设备为gpu。 - -```python -import numpy as np -import mindspore_lite as mslite - -# init context, and set target is gpu. -context = mslite.Context() -context.target = ["gpu"] -context.gpu.device_id = 0 -``` - ### 模型加载与编译 模型加载与编译可以调用`Model`的[build_from_file](https://www.mindspore.cn/lite/api/zh-CN/master/mindspore_lite/mindspore_lite.Model.html#mindspore_lite.Model.build_from_file)接口,直接从文件缓存加载、编译得到运行时的模型。 -- Gitee