diff --git a/api/source_en/api/python/mindspore/mindspore.nn.probability.rst b/api/source_en/api/python/mindspore/mindspore.nn.probability.rst index 9ed8e3699313bcc607274c7616faa39cd49d6f23..2235f574850eceaf0f26a4d6a1b2c4927a2a247e 100644 --- a/api/source_en/api/python/mindspore/mindspore.nn.probability.rst +++ b/api/source_en/api/python/mindspore/mindspore.nn.probability.rst @@ -12,7 +12,14 @@ mindspore.nn.probability.bnn_layers .. automodule:: mindspore.nn.probability.bnn_layers :members: + :exclude-members: ConvReparam , DenseReparam + .. autoclass:: ConvReparam(in_channels, out_channels, kernel_size,stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + + .. autoclass:: DenseReparam(in_channels, out_channels, activation=None, has_bias=True, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + .. autoclass:: WithBNNLossCell :members: diff --git a/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst b/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst index 729903dfca591c264e4d1b5b093bf3356203c67a..70e8f69bf9f2a3e6d8d6f1be8636cad79b61fb69 100644 --- a/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst +++ b/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst @@ -12,7 +12,14 @@ mindspore.nn.probability.bnn_layers .. automodule:: mindspore.nn.probability.bnn_layers :members: + :exclude-members: ConvReparam , DenseReparam + .. autoclass:: ConvReparam(in_channels, out_channels, kernel_size,stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + + .. autoclass:: DenseReparam(in_channels, out_channels, activation=None, has_bias=True, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + .. autoclass:: WithBNNLossCell :members: diff --git a/api/source_zh_cn/programming_guide/ops.md b/api/source_zh_cn/programming_guide/ops.md index eb7d4a3352ff0f63bc3b813b30c1dfb1ecd957a7..53bb69f5dfd011be5b4ed47d1f440da45df482a6 100644 --- a/api/source_zh_cn/programming_guide/ops.md +++ b/api/source_zh_cn/programming_guide/ops.md @@ -1,4 +1,4 @@ -# ops模块 +# ops模块 @@ -20,7 +20,7 @@ ops主要包括operations、functional和composite,可通过ops直接获取到 ## mindspore.ops.operations -operations提供了所有的Primitive算子接口,是开放给用户的最低阶算子接口。 +operations提供了所有的Primitive算子接口,是开放给用户的最低阶算子接口。算子支持情况可查询[算子支持列表](https://www.mindspore.cn/docs/zh-CN/master/operator_list.html#mindspore-ops-operations)。 Primitive算子也称为算子原语,它直接封装了底层的Ascend、GPU、AICPU、CPU等多种算子的具体实现,为用户提供基础算子能力。 @@ -47,25 +47,10 @@ output = [ 1. 8. 64.] ## mindspore.ops.functional -为了简化没有属性的算子的调用流程,MindSpore提供了一些算子的functional版本。入参要求参考原算子的输入输出要求。算子支持情况可以查询[算子支持列表](https://www.mindspore.cn/docs/zh-CN/master/operator_list.html#mindspore-ops-operations)。 +为了简化没有属性的算子的调用流程,MindSpore提供了一些算子的functional版本。入参要求参考原算子的输入输出要求。算子支持情况可以查询[算子支持列表](https://www.mindspore.cn/docs/zh-CN/master/operator_list.html#mindspore-ops-functional)。 例如`P.Pow`算子,我们提供了functional版本的`F.tensor_pow`算子。 -使用operations的代码样例如下: - -```python -import numpy as np -import mindspore -from mindspore import Tensor -from mindspore.ops import operations as P - -input_x = mindspore.Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32) -input_y = 3.0 -pow = P.Pow() -output = pow(input_x, input_y) -print("output =", output) -``` - 使用functional的代码样例如下: ```python @@ -138,4 +123,4 @@ tensor [[2.4, 4.2] scalar 3 ``` -此外,高阶函数`GradOperation`提供了根据输入的函数,求这个函数对应的求梯度的函数的方式,详细可以参阅[API文档](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.composite.html#mindspore.ops.composite.GradOperation)。 \ No newline at end of file +此外,高阶函数`GradOperation`提供了根据输入的函数,求这个函数对应的梯度函数的方式,详细可以参阅[API文档](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.composite.html#mindspore.ops.composite.GradOperation)。 \ No newline at end of file diff --git a/api/source_zh_cn/programming_guide/tensor.md b/api/source_zh_cn/programming_guide/tensor.md index 08f9ec6d7f5ed1c450964eb15e5f613831607152..c935dc71bba6a42e0174e03048f7e4d704db9bb4 100644 --- a/api/source_zh_cn/programming_guide/tensor.md +++ b/api/source_zh_cn/programming_guide/tensor.md @@ -55,22 +55,22 @@ print(x, "\n\n", y, "\n\n", z, "\n\n", m, "\n\n", n, "\n\n", p) ``` [[1 2] - [3 4]] + [3 4]] -1 +1 -2 +2 -True +True -[1 2 3] +[1 2 3] [4. 5. 6.] ``` ## 变量张量 -变量张量的值在网络中可以被更新,用来表示需要被更新的参数,MindSpore使用Tensor的子类Parameter构造变量张量,构造时支持传入Tensor或者Initializer。 +变量张量的值在网络中可以被更新,用来表示需要被更新的参数,MindSpore使用Tensor的子类Parameter构造变量张量,构造时支持传入Tensor、Initializer或者Number。 代码样例如下: @@ -83,21 +83,24 @@ from mindspore.common.initializer import initializer x = Tensor(np.arange(2*3).reshape((2, 3))) y = Parameter(x, name="x") z = Parameter(initializer('ones', [1, 2, 3], mstype.float32), name='y') +m = Parameter(2.0, name='m') -print(x, "\n\n", y, "\n\n", z) +print(x, "\n\n", y, "\n\n", z, "\n\n", m) ``` 输出如下: ``` [[0 1 2] - [3 4 5]] + [3 4 5]] -Parameter (name=x, value=[[0 1 2] - [3 4 5]]) +Parameter (name=x, value=[[0 1 2] + [3 4 5]]) Parameter (name=y, value=[[[1. 1. 1.] [1. 1. 1.]]]) + +Parameter (name=m, value=2.0) ``` ## 张量的属性和方法 @@ -152,9 +155,9 @@ print(x_all, "\n\n", x_any, "\n\n", x_array) 输出如下: ``` -False +False -True +True [[ True True] [False False]] @@ -190,9 +193,9 @@ True ``` [[1. 1.] - [1. 1.]] + [1. 1.]] - 1.0 + 1.0 [1 2 3] ``` @@ -270,17 +273,17 @@ True ``` [[[0 1 2] - [3 4 5]]] + [3 4 5]]] [[[0 1] [2 3] - [4 5]]] + [4 5]]] [[[[0 1 2] - [3 4 5]]]] + [3 4 5]]]] [[0 1 2] - [3 4 5]] + [3 4 5]] [[[0 3]] [[1 4]] @@ -315,17 +318,17 @@ True ``` [[0 1 2] - [3 4 5]] + [3 4 5]] [[[0 1 2] [3 4 5]] [[0 1 2] - [3 4 5]]] + [3 4 5]]] [[0 1 2] [3 4 5] [0 1 2] - [3 4 5]] + [3 4 5]] (Tensor(shape=[1, 3], dtype=Int64, [[0 1 2]]), Tensor(shape=[1, 3], dtype=Int64, [[3 4 5]])) ``` @@ -357,7 +360,7 @@ print(x, "\n\n", y) ``` [[0 1 2] - [3 4 5]] + [3 4 5]] [[0 1 2 0 1 2 0 1 2] [3 4 5 3 4 5 3 4 5] diff --git a/docs/source_en/FAQ.md b/docs/source_en/FAQ.md index 681d68614b8840f572c53221c748d1fe4bb01157..6478958c1882e17cafa51e5d0586c4132527e972 100644 --- a/docs/source_en/FAQ.md +++ b/docs/source_en/FAQ.md @@ -1,5 +1,7 @@ # FAQ +`Ascend` `GPU` `CPU` `Environmental Setup` `Model Export` `Model Training` `Beginner` `Intermediate` `Expert` + - [FAQ](#faq) @@ -16,6 +18,7 @@ - [Supported Features](#supported-features) + ## Installation diff --git a/docs/source_en/design/mindinsight/graph_visual_design.md b/docs/source_en/design/mindinsight/graph_visual_design.md index d4d4efaf3a47cf51c0b77522bde76dc94ae5f8e7..1aa1ab694d04bbb8d7d4026780b254cb6add957e 100644 --- a/docs/source_en/design/mindinsight/graph_visual_design.md +++ b/docs/source_en/design/mindinsight/graph_visual_design.md @@ -1,6 +1,6 @@ # Computational Graph Visualization Design -`Ascend` `GPU` `Model Development` `Model Optimization` `Framework Development` `Intermediate` `Expert` `Contributor` +`Ascend` `GPU` `CPU` `Model Development` `Model Optimization` `Framework Development` `Intermediate` `Expert` `Contributor` diff --git a/docs/source_en/design/mindinsight/tensor_visual_design.md b/docs/source_en/design/mindinsight/tensor_visual_design.md index 3f3a21e5eeccf1bdc1b926b28cde8ba048acb67f..8117ef8e0e0e19e2353c933b4c0d8998e9c83e4d 100644 --- a/docs/source_en/design/mindinsight/tensor_visual_design.md +++ b/docs/source_en/design/mindinsight/tensor_visual_design.md @@ -1,6 +1,6 @@ # Tensor Visualization Design -`Ascend` `GPU` `Model Development` `Model Optimization` `Framework Development` `Intermediate` `Expert` `Contributor` +`Ascend` `GPU` `CPU` `Model Development` `Model Optimization` `Framework Development` `Intermediate` `Expert` `Contributor` diff --git a/docs/source_en/design/mindinsight/training_visual_design.md b/docs/source_en/design/mindinsight/training_visual_design.md index fdc47ea1e516d92e509fa6d49a50b5f83ac7ad38..0b19c78cff668b7ad76c49b1c3980aaebd82a3de 100644 --- a/docs/source_en/design/mindinsight/training_visual_design.md +++ b/docs/source_en/design/mindinsight/training_visual_design.md @@ -1,5 +1,7 @@ # Overall Design of Training Visualization +`Ascend` `GPU` `CPU` `Model Development` `Model Optimization` `Framework Development` `Intermediate` `Expert` `Contributor` + - [Overall Design of Training Visualization](#overall-design-of-training-visualization) diff --git a/docs/source_en/network_list.md b/docs/source_en/network_list.md index 916efdde0c895d7038a1c16cc6e821e6784ad9cd..d175f22cdbd380534450f4314fcc698c39f6e787 100644 --- a/docs/source_en/network_list.md +++ b/docs/source_en/network_list.md @@ -23,6 +23,7 @@ |Computer Vision (CV) | Image Classification | [ResNet-101](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnet/src/resnet.py) | Supported |Doing | Doing |Computer Vision (CV) | Image Classification | [ResNext50](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnext50/src/image_classification.py) | Supported | Supported | Doing | Computer Vision (CV) | Image Classification | [VGG16](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/vgg16/src/vgg.py) | Supported | Doing | Doing +| Computer Vision (CV) | Image Classification | [InceptionV3](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/inceptionv3/src/inception_v3.py) | Supported | Supported | Doing | Computer Vision (CV) | Mobile Image Classification
Image Classification
Semantic Tegmentation | [MobileNetV2](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/mobilenetv2/src/mobilenetV2.py) | Supported | Supported | Doing | Computer Vision (CV) | Mobile Image Classification
Image Classification
Semantic Tegmentation | [MobileNetV3](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/mobilenetv3/src/mobilenetV3.py) | Doing | Supported | Doing |Computer Vision (CV) | Targets Detection | [SSD](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/ssd/src/ssd.py) | Supported |Doing | Doing diff --git a/docs/source_en/operator_list.md b/docs/source_en/operator_list.md index b4c760c3175107added8c5376538b050a492c89f..3a79b0a2b9e79d7375608e4ed9a421cf1360a1f4 100644 --- a/docs/source_en/operator_list.md +++ b/docs/source_en/operator_list.md @@ -102,7 +102,7 @@ | [mindspore.ops.operations.Acosh](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Acosh) | Doing | Doing | Doing | nn_ops | [mindspore.ops.operations.FloorMod](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FloorMod) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.Elu](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Elu) | Supported | Doing | Doing | nn_ops -| [mindspore.ops.operations.MirrorPad](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.MirrorPad) | Doing | Doing | Doing | nn_ops +| [mindspore.ops.operations.MirrorPad](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.MirrorPad) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.Unpack](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Unpack) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.Pack](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pack) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.L2Loss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.L2Loss) | Supported | Doing | Doing | nn_ops @@ -184,8 +184,8 @@ | [mindspore.ops.operations.Mul](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Mul) | Supported | Supported | Supported | math_ops | [mindspore.ops.operations.Square](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Square) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.SquareSumAll](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SquareSumAll) | Supported | Doing | Doing | math_ops -| [mindspore.ops.operations.Rsqrt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Rsqrt) | Supported | Supported | Doing | math_ops -| [mindspore.ops.operations.Sqrt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Sqrt) | Supported | Supported | Doing | math_ops +| [mindspore.ops.operations.Rsqrt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Rsqrt) | Supported | Doing | Doing | math_ops +| [mindspore.ops.operations.Sqrt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Sqrt) | Supported | Doing | Doing | math_ops | [mindspore.ops.operations.Reciprocal](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Reciprocal) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Pow](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pow) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Exp](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Exp) | Supported | Supported | Doing | math_ops diff --git a/docs/source_en/roadmap.md b/docs/source_en/roadmap.md index a9525b2e967e301fdc33201d7263ac9a1c896a9e..befe13ad85fa0bcbd51743290f7a8b7af42498dc 100644 --- a/docs/source_en/roadmap.md +++ b/docs/source_en/roadmap.md @@ -69,11 +69,14 @@ We sincerely hope that you can join the discussion in the user community and con * Protect data privacy during training and inference. ## Inference Framework -* Support TensorFlow, Caffe, and ONNX model formats. -* Support iOS. -* Improve more CPU operators. -* Support more CV/NLP models. -* Online learning. -* Support deployment on IoT devices. -* Low-bit quantization. -* CPU and NPU heterogeneous scheduling. +* Continuous optimization for operator, and add more operator. +* Support NLP neural networks. +* Visualization for MindSpore lite model. +* MindSpore Micro, which supports ARM Cortex-A and Cortex-M with Ultra-lightweight. +* Support re-training and federated learning on mobile device. +* Support auto-parallel. +* MindData on mobile device, which supports image resize and pixel data transform. +* Support post-training quantize, which supports inference with mixed precision to improve performance. +* Support Kirin NPU, MTK APU. +* Support inference for multi models with pipeline. +* C++ API for model construction. diff --git a/docs/source_zh_cn/FAQ.md b/docs/source_zh_cn/FAQ.md index 9873b933d666d18b00026515f9cfd14a228f23f5..1698974891db256511ea819d05b944e46ef69ae7 100644 --- a/docs/source_zh_cn/FAQ.md +++ b/docs/source_zh_cn/FAQ.md @@ -1,5 +1,7 @@ # FAQ +`Ascend` `GPU` `CPU` `环境准备` `模型导出` `模型训练` `初级` `中级` `高级` + - [FAQ](#faq) @@ -16,6 +18,7 @@ - [特性支持](#特性支持) + ## 安装类 diff --git a/docs/source_zh_cn/design/mindinsight/graph_visual_design.md b/docs/source_zh_cn/design/mindinsight/graph_visual_design.md index b90f8d882ceccfc679c2f91d77fc8d2351cc9a0e..fe3e6d334cb57caef2d76f013dd150f9c1f39414 100644 --- a/docs/source_zh_cn/design/mindinsight/graph_visual_design.md +++ b/docs/source_zh_cn/design/mindinsight/graph_visual_design.md @@ -1,6 +1,6 @@ # 计算图可视设计 -`Ascend` `GPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` +`Ascend` `GPU` `CPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` diff --git a/docs/source_zh_cn/design/mindinsight/profiler_design.md b/docs/source_zh_cn/design/mindinsight/profiler_design.md index cc13bf9eea14a67d40f2672f517e078d6764e526..8bfd00397831e4fc25bab87fd25af3b27acc28fe 100644 --- a/docs/source_zh_cn/design/mindinsight/profiler_design.md +++ b/docs/source_zh_cn/design/mindinsight/profiler_design.md @@ -1,5 +1,7 @@ # Profiler设计文档 +`Ascend` `GPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` + - [Profiler设计文档](#profiler设计文档) diff --git a/docs/source_zh_cn/design/mindinsight/tensor_visual_design.md b/docs/source_zh_cn/design/mindinsight/tensor_visual_design.md index d84cb8ba7cd23c97dd2a5ca4398128f36b3105a5..eca40e518ca471120ad52ed0b78abb40ab4c00a6 100644 --- a/docs/source_zh_cn/design/mindinsight/tensor_visual_design.md +++ b/docs/source_zh_cn/design/mindinsight/tensor_visual_design.md @@ -1,6 +1,6 @@ # 张量可视设计 -`Ascend` `GPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` +`Ascend` `GPU` `CPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` diff --git a/docs/source_zh_cn/design/mindinsight/training_visual_design.md b/docs/source_zh_cn/design/mindinsight/training_visual_design.md index 1c86233723b7bd456efd5b7790279f828351d841..8dae35eef0244c8f66322912bf1464e53ade5965 100644 --- a/docs/source_zh_cn/design/mindinsight/training_visual_design.md +++ b/docs/source_zh_cn/design/mindinsight/training_visual_design.md @@ -1,6 +1,6 @@ # 训练可视总体设计 -`Ascend` `GPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` +`Ascend` `GPU` `CPU` `模型开发` `模型调优` `框架开发` `中级` `高级` `贡献者` diff --git a/docs/source_zh_cn/design/mindspore/distributed_training_design.md b/docs/source_zh_cn/design/mindspore/distributed_training_design.md index ae38fdd6bc47fb2215bcdc931fa0d46c953f9af0..ab026a6526ac6fc4d4d113e102e24e0fa945eb68 100644 --- a/docs/source_zh_cn/design/mindspore/distributed_training_design.md +++ b/docs/source_zh_cn/design/mindspore/distributed_training_design.md @@ -29,7 +29,7 @@ ### 集合通信 -集合通信指在一组进程间通信,组内所有进程满足一定规则的发送和接收数据。MindSpore通过集合通信的方式进行并行训练过程中的数据传输工作,在Ascend芯片上它依赖于华为集合通信库HCCL完成。 +集合通信指在一组进程间通信,组内所有进程满足一定规则的发送和接收数据。MindSpore通过集合通信的方式进行并行训练过程中的数据传输工作,在Ascend芯片上它依赖于华为集合通信库`HCCL`完成,在GPU上它依赖于英伟达集合通信库`NCCL`完成。 ### 同步模式 @@ -41,11 +41,11 @@ ### 数据并行原理 -
数据并行图解
+![数据并行图解](./images/data_parallel.png) 1. 环境依赖 - 每次开始进行并行训练前,通过调用`mindspore.communication.init`接口初始化通信资源,并自动创建全局通信组`HCCL_WORLD_GROUP`。 + 每次开始进行并行训练前,通过调用`mindspore.communication.init`接口初始化通信资源,并自动创建全局通信组`WORLD_COMM_GROUP`。 2. 数据分发 @@ -77,28 +77,28 @@ ## 自动并行 -自动并行作为MindSpore的关键特性,用于实现自动的数据并行加模型并行的混合并行训练方式,旨在帮助用户以单机的脚本表达并行算法逻辑,降低分布式训练难度,提高算法研发效率,同时又能保持训练的高性能。 +自动并行作为MindSpore的关键特性,用于实现自动的数据并行加模型并行的混合并行训练方式,旨在帮助用户以单机的脚本表达并行算法逻辑,降低分布式训练难度,提高算法研发效率,同时又能保持训练的高性能。这个小节介绍了在MindSpore中`ParallelMode.AUTO_PARALLEL`自动并行模式及`ParallelMode.SEMI_AUTO_PARALLEL`半自动并行模式是如何工作的。 ### 自动并行原理 -自动并行架构图 +![自动并行图解](./images/auto_parallel_design.png) 1. 通用的张量排布模型 在上面的架构图中,自动并行流程会对单机的正向计算图(ANF Graph)进行遍历,以算子(Distributed Operator)为单位对张量进行切分建模,表示一个算子的输入输出张量如何分布到集群各个卡上(Tensor Layout)。这种模型充分地表达了张量和设备间的映射关系,并且可以通过算法推导得到任意排布的张量间通信转换方式(Tensor Redistribution)。 为了得到张量的排布模型,每个算子都具有切分策略(Parallel Strategy),它表示算子的各个输入在相应维度的切分情况。通常情况下只要满足以2为基、均匀分配的原则,张量的任意维度均可切分。以下图为例,这是一个三维矩阵乘操作,它的切分策略由两个元组构成,分别表示`input`和`weight`的切分形式。其中元组中的元素与张量维度一一对应,`2^N`为切分份数,`1`表示不切。当我们想表示一个数据并行切分策略时,即`input`的`batch`维度切分,其他维度不切,可以表达为`strategy=((2^N, 1, 1),(1, 1, 1))`;当表示一个模型并行切分策略时,即`weight`的`channel`维度切分,其他维度不切,可以表达为`strategy=((1, 1, 1),(1, 1, 2^N))`;当表示一个混合并行切分策略时,可以表达为`strategy=((2^N, 1, 1),(1, 1, 2^N))`。 - 算子切分定义 - + ![算子切分定义](./images/operator_split.png) + 依据算子的切分策略,框架将自动推导得到算子输入张量和输出张量的排布模型。这个排布模型由`device_matrix`,`tensor_shape`和`tensor map`组成,分别表示设备矩阵形状、张量形状、设备和张量维度间的映射关系。根据排布模型框架可以自动实现对整图的切分,并推导插入算子内张量重复计算及算子间不同排布的张量变换所需要的通信操作。以数据并行转模型并行为例,第一个数据并行矩阵乘的输出在`batch`维度存在切分,而第二个模型并行矩阵乘的输入需要全量张量,框架将会自动插入`AllGather`算子实现排布变换。 - 张量排布变换 + ![张量排布变换](./images/tensor_redistribution.png) 总体来说这种分布式表达打破了数据并行和模型并行的边界,轻松实现混合并行。并且用户无需感知模型各切片放到哪个设备上运行,框架会自动调度分配。从脚本层面上,用户仅需构造单机网络,即可表达并行算法逻辑。 2. 高效的并行策略搜索算法 - 当用户熟悉了算子的切分表达,并手动对算子配置切分策略,这就是`SEMI_AUTO_PARALLEL`半自动并行模式。这种方式对手动调优有帮助,但还是具有一定的调试难度,用户需要掌握并行原理,并根据网络结构、集群拓扑等计算分析得到高性能的并行方案。为了进一步帮助用户加速并行网络训练过程,在半自动并行模式的基础上,`AUTO_PARALLEL`自动并行模式引入了并行切分策略自动搜索的特性。自动并行围绕昇腾AI处理器构建代价函数模型(Cost Model),计算出一定数据量、一定算子在不同切分策略下的计算开销(Computation Cost),内存开销(Memory Cost)及通信开销(Communication Cost)。然后通过动态规划算法(Dynamic Programming),以单卡的内存上限为约束条件,高效地搜索出性能较优的切分策略。 + 当用户熟悉了算子的切分表达,并手动对算子配置切分策略,这就是`SEMI_AUTO_PARALLEL`半自动并行模式。这种方式对手动调优有帮助,但还是具有一定的调试难度,用户需要掌握并行原理,并根据网络结构、集群拓扑等计算分析得到高性能的并行方案。为了进一步帮助用户加速并行网络训练过程,在半自动并行模式的基础上,`AUTO_PARALLEL`自动并行模式引入了并行切分策略自动搜索的特性。自动并行围绕硬件平台构建相应的代价函数模型(Cost Model),计算出一定数据量、一定算子在不同切分策略下的计算开销(Computation Cost),内存开销(Memory Cost)及通信开销(Communication Cost)。然后通过动态规划算法(Dynamic Programming)或者递归规划算法(Recursive Programming),以单卡的内存上限为约束条件,高效地搜索出性能较优的切分策略。 策略搜索这一步骤代替了用户手动指定模型切分,在短时间内可以得到较高性能的切分方案,极大降低了并行训练的使用门槛。 diff --git a/docs/source_zh_cn/design/mindspore/images/auto_parallel.png b/docs/source_zh_cn/design/mindspore/images/auto_parallel.png deleted file mode 100644 index 544e65eee9b5a6ac984ff2315022135ce7cd4456..0000000000000000000000000000000000000000 Binary files a/docs/source_zh_cn/design/mindspore/images/auto_parallel.png and /dev/null differ diff --git a/docs/source_zh_cn/design/mindspore/images/auto_parallel_design.png b/docs/source_zh_cn/design/mindspore/images/auto_parallel_design.png new file mode 100644 index 0000000000000000000000000000000000000000..7f1006775ed0006208ae1086b8cd062b463237d6 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/auto_parallel_design.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/operator_split.png b/docs/source_zh_cn/design/mindspore/images/operator_split.png index 1fe2fda44fc148c7443b5c6dd6f95a3a0a2a1e99..51dd93cd853cda7b2b284e4955bbc941f3c1b1dd 100644 Binary files a/docs/source_zh_cn/design/mindspore/images/operator_split.png and b/docs/source_zh_cn/design/mindspore/images/operator_split.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png b/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png index 86b4630bb52146479ec4c0f766059d22db12bf10..ce4485c8cbf91c721c9dd19ab105a58cd3d18d22 100644 Binary files a/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png and b/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png differ diff --git a/docs/source_zh_cn/network_list.md b/docs/source_zh_cn/network_list.md index 6e5954b63a4f07f43f47273607a92e8ed130fea1..af294ee4e3e894ca2d75f9026e44dcd4df18ed0f 100644 --- a/docs/source_zh_cn/network_list.md +++ b/docs/source_zh_cn/network_list.md @@ -23,6 +23,7 @@ |计算机视觉(CV) | 图像分类(Image Classification) | [ResNet-101](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnet/src/resnet.py) | Supported |Doing | Doing |计算机视觉(CV) | 图像分类(Image Classification) | [ResNext50](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnext50/src/image_classification.py) | Supported | Supported | Doing | 计算机视觉(CV) | 图像分类(Image Classification) | [VGG16](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/vgg16/src/vgg.py) | Supported | Doing | Doing +| 计算机视觉(CV) | 图像分类(Image Classification) | [InceptionV3](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/inceptionv3/src/inception_v3.py) | Supported | Supported | Doing | 计算机视觉(CV) | 移动端图像分类(Mobile Image Classification)
目标检测(Image Classification)
语义分割(Semantic Tegmentation) | [MobileNetV2](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/mobilenetv2/src/mobilenetV2.py) | Supported | Supported | Doing | 计算机视觉(CV) | 移动端图像分类(Mobile Image Classification)
目标检测(Image Classification)
语义分割(Semantic Tegmentation) | [MobileNetV3](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/mobilenetv3/src/mobilenetV3.py) | Doing | Supported | Doing |计算机视觉(CV) | 目标检测(Targets Detection) | [SSD](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/ssd/src/ssd.py) | Supported |Doing | Doing diff --git a/docs/source_zh_cn/operator_list.md b/docs/source_zh_cn/operator_list.md index 6244a7101f59c9705ec876ff3271bc7774026c14..db8e29e3935556c95e4aabc477cc297d2561c8d0 100644 --- a/docs/source_zh_cn/operator_list.md +++ b/docs/source_zh_cn/operator_list.md @@ -102,7 +102,7 @@ | [mindspore.ops.operations.Acosh](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Acosh) | Doing | Doing | Doing | nn_ops | [mindspore.ops.operations.FloorMod](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FloorMod) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.Elu](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Elu) | Supported | Doing | Doing | nn_ops -| [mindspore.ops.operations.MirrorPad](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.MirrorPad) | Doing | Doing | Doing | nn_ops +| [mindspore.ops.operations.MirrorPad](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.MirrorPad) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.Unpack](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Unpack) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.Pack](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pack) | Supported| Doing | Doing | nn_ops | [mindspore.ops.operations.L2Loss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.L2Loss) | Supported | Doing | Doing | nn_ops @@ -184,8 +184,8 @@ | [mindspore.ops.operations.Mul](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Mul) | Supported | Supported | Supported | math_ops | [mindspore.ops.operations.Square](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Square) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.SquareSumAll](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SquareSumAll) | Supported | Doing | Doing | math_ops -| [mindspore.ops.operations.Rsqrt](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Rsqrt) | Supported | Supported | Doing | math_ops -| [mindspore.ops.operations.Sqrt](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Sqrt) | Supported | Supported | Doing | math_ops +| [mindspore.ops.operations.Rsqrt](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Rsqrt) | Supported | Doing | Doing | math_ops +| [mindspore.ops.operations.Sqrt](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Sqrt) | Supported | Doing | Doing | math_ops | [mindspore.ops.operations.Reciprocal](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Reciprocal) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Pow](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pow) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Exp](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Exp) | Supported | Supported | Doing | math_ops diff --git a/docs/source_zh_cn/roadmap.md b/docs/source_zh_cn/roadmap.md index a985100c4d335268130a07263065889bf6cdf7bb..1dfd7945b1079c44d1d312414d8a0f2368fd7e87 100644 --- a/docs/source_zh_cn/roadmap.md +++ b/docs/source_zh_cn/roadmap.md @@ -70,11 +70,14 @@ * 保护训练和推理过程中的数据隐私 ## 推理框架 -* 提供Tensorflow/Caffe/ONNX模型格式支持 -* IOS系统支持 -* 完善更多的CPU算子 -* 更多CV/NLP模型支持 -* 在线学习 -* 支持部署在IOT设备 -* 低比特量化 -* CPU和NPU异构调度 +* 算子性能与完备度的持续优化 +* 支持语音模型推理 +* 端侧模型的可视化 +* Micro方案,适用于嵌入式系统的超轻量化推理, 支持ARM Cortex-A、Cortex-M硬件 +* 支持端侧重训及联邦学习 +* 端侧自动并行特性 +* 端侧MindData,包含图片Resize、像素数据转换等功能 +* 配套MindSpore混合精度量化训练(或训练后量化),实现混合精度推理,提升推理性能 +* 支持Kirin NPU、MTK APU等AI加速硬件 +* 支持多模型推理pipeline +* C++构图接口 diff --git a/install/mindspore_cpu_install.md b/install/mindspore_cpu_install.md index e31b2a72edd0edfc3a4879fc958d6267adc19651..721cad66b18b2db7b7fdc312765fc60c4a5db594 100644 --- a/install/mindspore_cpu_install.md +++ b/install/mindspore_cpu_install.md @@ -97,7 +97,7 @@ | 版本号 | 操作系统 | 可执行文件安装依赖 | 源码编译安装依赖 | | ---------------------- | :------------------ | :----------------------------------------------------------- | :----------------------- | -| MindArmour master | Ubuntu 18.04 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- 其他依赖项参见[setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py) | 与可执行文件安装依赖相同 | +| MindArmour master | - Ubuntu 18.04 x86_64
- Ubuntu 18.04 aarch64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- 其他依赖项参见[setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py) | 与可执行文件安装依赖相同 | - 在联网状态下,安装whl包时会自动下载`setup.py`中的依赖项,其余情况需自行安装。 diff --git a/install/mindspore_cpu_install_en.md b/install/mindspore_cpu_install_en.md index 3da8d9eff691e60b198680f084259888b8dffd20..d21f05bdeb3d451ff36588a8346a9753bdf831d6 100644 --- a/install/mindspore_cpu_install_en.md +++ b/install/mindspore_cpu_install_en.md @@ -97,7 +97,7 @@ If you need to conduct AI model security research or enhance the security of the | Version | Operating System | Executable File Installation Dependencies | Source Code Compilation and Installation Dependencies | | ---- | :--- | :--- | :--- | -| MindArmour master | Ubuntu 18.04 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- For details about other dependency items, see [setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py). | Same as the executable file installation dependencies. | +| MindArmour master | - Ubuntu 18.04 x86_64
- Ubuntu 18.04 aarch64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- For details about other dependency items, see [setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py). | Same as the executable file installation dependencies. | - When the network is connected, dependency items in the `setup.py` file are automatically downloaded during .whl package installation. In other cases, you need to manually install dependency items. diff --git a/install/mindspore_d_install.md b/install/mindspore_d_install.md index f355690cddd86d88ba89a4b8b417f14917acdd05..a0d6eb70a9f6d7f7c4452a593695e95c024c135f 100644 --- a/install/mindspore_d_install.md +++ b/install/mindspore_d_install.md @@ -32,7 +32,7 @@ | 版本号 | 操作系统 | 可执行文件安装依赖 | 源码编译安装依赖 | | ---- | :--- | :--- | :--- | -| MindSpore master | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- CentOS 7.6 aarch64
- CentOS 7.6 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- 其他依赖项参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt) | **编译依赖:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**安装依赖:**
与可执行文件安装依赖相同 | +| MindSpore master | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- 其他依赖项参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt) | **编译依赖:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**安装依赖:**
与可执行文件安装依赖相同 | - 确认当前用户有权限访问Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))的安装路径`/usr/local/Ascend`,若无权限,需要root用户将当前用户添加到`/usr/local/Ascend`所在的用户组,具体配置请详见配套软件包的说明文档。 - GCC 7.3.0可以直接通过apt命令安装。 diff --git a/install/mindspore_d_install_en.md b/install/mindspore_d_install_en.md index 4eb1e3ae067791ac98787314d78af52cfc1999f6..827f23bb76e748d6304eabf3444d7974956bc0a1 100644 --- a/install/mindspore_d_install_en.md +++ b/install/mindspore_d_install_en.md @@ -32,7 +32,7 @@ This document describes how to quickly install MindSpore in an Ascend AI process | Version | Operating System | Executable File Installation Dependencies | Source Code Compilation and Installation Dependencies | | ---- | :--- | :--- | :--- | -| MindSpore master | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- CentOS 7.6 aarch64
- CentOS 7.6 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt). | **Compilation dependencies:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**Installation dependencies:**
same as the executable file installation dependencies. | +| MindSpore master | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt). | **Compilation dependencies:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**Installation dependencies:**
same as the executable file installation dependencies. | - Confirm that the current user has the right to access the installation path `/usr/local/Ascend `of Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816)). If not, the root user needs to add the current user to the user group where `/usr/local/Ascend` is located. For the specific configuration, please refer to the software package instruction document. - GCC 7.3.0 can be installed by using apt command. diff --git a/lite/docs/source_en/operator_list.md b/lite/docs/source_en/operator_list.md index 1c7383aad2f3457a31b7c89de4efbfbfce1d2d73..ec17368005a3b9f7edba9850c45ddae53229d32c 100644 --- a/lite/docs/source_en/operator_list.md +++ b/lite/docs/source_en/operator_list.md @@ -4,121 +4,108 @@ > √ The checked items are the operators supported by MindSpore Lite。 -| Operation | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | Operator category | Tensorflow
Lite op supported | Caffe
Lite op supported | Onnx
Lite op supported | -|-----------------------|----------|----------|----------|-----------|----------|----------|------------------|----------|----------|----------| -| Abs | | √ | √ | √ | | | math_ops | Abs | | Abs | -| Add | | | | | | √ | | Add | | Add | -| AddN | | √ | | | | | math_ops | AddN | | | -| Argmax | | √ | √ | √ | | | array_ops | Argmax | ArgMax | ArgMax | -| Argmin | | √ | | | | | array_ops | Argmin | | | -| Asin | | | | | | | | | | Asin | -| Atan | | | | | | | | | | Atan | -| AvgPool | | √ | √ | √ | | √ | nn_ops | MeanPooling | Pooling | AveragePool | -| BatchMatMul | √ | √ | √ | √ | | | math_ops | | | | -| BatchNorm | | √ | | | | √ | nn_ops | | BatchNorm | BatchNormalization | -| BatchToSpace | | | | | | | array_ops | BatchToSpace, BatchToSpaceND | | | -| BatchToSpaceND | | | | | | | | | | | -| BiasAdd | | √ | | √ | | √ | nn_ops | | | BiasAdd | -| Broadcast | | √ | | | | | comm_ops | BroadcastTo | | Expand | -| Cast | | √ | | | | | array_ops | Cast, DEQUANTIZE* | | Cast | -| Ceil | | √ | | √ | | | math_ops | Ceil | | Ceil | -| Concat | | √ | √ | √ | | √ | array_ops | Concat | Concat | Concat | -| Constant | | | | | | | | | | Constant | -| Conv1dTranspose | | | | √ | | | layer/conv | | | | -| Conv2d | √ | √ | √ | √ | | √ | layer/conv | Conv2D | Convolution | Conv | -| Conv2dTranspose | | √ | √ | √ | | √ | layer/conv | DeConv2D | Deconvolution | ConvTranspose | -| Cos | | √ | √ | √ | | | math_ops | Cos | | Cos | -| Crop | | | | | | | | | Crop | | -| DeDepthwiseConv2D | | | | | | | | | Deconvolution| ConvTranspose | -| DepthToSpace | | | | | | | | DepthToSpace | | DepthToSpace | -| DepthwiseConv2dNative | √ | √ | √ | √ | | √ | nn_ops | DepthwiseConv2D | Convolution | Convolution | -| Div | | √ | √ | √ | | √ | math_ops | Div | | Div | -| Dropout | | | | | | | | | | Dropout | -| Eltwise | | | | | | | | | Eltwise | | -| Elu | | | | | | | | Elu | | Elu | -| Equal | | √ | √ | √ | | | math_ops | Equal | | Equal | -| Exp | | √ | | | | | math_ops | Exp | | Exp | -| ExpandDims | | √ | | | | | array_ops | | | | -| Fill | | √ | | | | | array_ops | Fill | | | -| Flatten | | | | | | | | | Flatten | | -| Floor | | √ | √ | √ | | | math_ops | flOOR | | Floor | -| FloorDiv | | √ | | | | | math_ops | FloorDiv | | | -| FloorMod | | √ | | | | | nn_ops | FloorMod | | | -| FullConnection | | √ | | | | | layer/basic | FullyConnected | InnerProduct | | -| GatherNd | | √ | | | | | array_ops | GatherND | | | -| GatherV2 | | √ | | | | | array_ops | Gather | | Gather | -| Greater | | √ | √ | √ | | | math_ops | Greater | | Greater | -| GreaterEqual | | √ | √ | √ | | | math_ops | GreaterEqual | | | -| Hswish | | | | | | | | HardSwish | | | -| L2norm | | | | | | | | L2_NORMALIZATION | | | -| LeakyReLU | | √ | | | | √ | layer/activation | LeakyRelu | | LeakyRelu | -| Less | | √ | √ | √ | | | math_ops | Less | | Less | -| LessEqual | | √ | √ | √ | | | math_ops | LessEqual | | | -| LocalResponseNorm | | | | | | | | LocalResponseNorm | | Lrn | -| Log | | √ | √ | √ | | | math_ops | Log | | Log | -| LogicalAnd | | √ | | | | | math_ops | LogicalAnd | | | -| LogicalNot | | √ | √ | √ | | | math_ops | LogicalNot | | | -| LogicalOr | | √ | | | | | math_ops | LogicalOr | | | -| LSTM | | √ | | | | | layer/lstm | | | | -| MatMul | √ | √ | √ | √ | | √ | math_ops | | | MatMul | -| Maximum | | | | | | | math_ops | Maximum | | Max | -| MaxPool | | √ | √ | √ | | √ | nn_ops | MaxPooling | Pooling | MaxPool | -| Minimum | | | | | | | math_ops | Minimum | | Min | -| Mul | | √ | √ | √ | | √ | math_ops | Mul | | Mul | -| Neg | | | | | | | math_ops | | | Neg | -| NotEqual | | √ | √ | √ | | | math_ops | NotEqual | | | -| OneHot | | √ | | | | | layer/basic | OneHot | | | -| Pack | | √ | | | | | nn_ops | | | | -| Pad | | √ | √ | √ | | | nn_ops | Pad | | Pad | -| Pow | | √ | √ | √ | | | math_ops | Pow | Power | Power | -| PReLU | | √ | √ | √ | | √ | layer/activation | Prelu | PReLU | PRelu | -| Range | | √ | | | | | layer/basic | Range | | | -| Rank | | √ | | | | | array_ops | Rank | | | -| RealDiv | | √ | √ | √ | | √ | math_ops | RealDiv | | | -| ReduceMax | | √ | √ | √ | | | math_ops | ReduceMax | | ReduceMax | -| ReduceMean | | √ | √ | √ | | | math_ops | Mean | | ReduceMean | -| ReduceMin | | √ | √ | √ | | | math_ops | ReduceMin | | ReduceMin | -| ReduceProd | | √ | √ | √ | | | math_ops | ReduceProd | | | -| ReduceSum | | √ | √ | √ | | | math_ops | Sum | | ReduceSum | -| ReLU | | √ | √ | √ | | √ | layer/activation | Relu | ReLU | Relu | -| ReLU6 | | √ | | | | √ | layer/activation | Relu6 | ReLU6 | Clip* | -| Reshape | | √ | √ | √ | | √ | array_ops | Reshape | Reshape | Reshape,Flatten | -| Resize | | | | | | | | ResizeBilinear, NearestNeighbor | Interp | | -| Reverse | | | | | | | | reverse | | | -| ReverseSequence | | √ | | | | | array_ops | ReverseSequence | | | -| Round | | √ | | √ | | | math_ops | Round | | | -| Rsqrt | | √ | √ | √ | | | math_ops | Rsqrt | | | -| Scale | | | | | | | | | Scale | | -| ScatterNd | | √ | | | | | array_ops | ScatterNd | | | -| Shape | | √ | | √ | | | array_ops | Shape | | Shape | -| Sigmoid | | √ | √ | √ | | √ | nn_ops | Logistic | Sigmoid | Sigmoid | -| Sin | | | | | | | | Sin | | Sin | -| Slice | | √ | √ | √ | | √ | array_ops | Slice | | Slice | -| Softmax | | √ | √ | √ | | √ | layer/activation | Softmax | Softmax | Softmax | -| SpaceToBatchND | | √ | | | | | array_ops | SpaceToBatchND | | | -| SpareToDense | | | | | | | | SpareToDense | | | -| SpaceToDepth | | √ | | | | | array_ops | SpaceToDepth | | SpaceToDepth | -| Split | | √ | √ | √ | | | array_ops | Split, SplitV | | | -| Sqrt | | √ | √ | √ | | | math_ops | Sqrt | | Sqrt | -| Square | | √ | √ | √ | | | math_ops | Square | | | -| SquaredDifference | | | | | | | | SquaredDifference | | | -| Squeeze | | √ | √ | √ | | | array_ops | Squeeze | | Squeeze | -| StridedSlice | | √ | √ | √ | | | array_ops | StridedSlice | | | -| Stack | | | | | | | | Stack | | | -| Sub | | √ | √ | √ | | √ | math_ops | Sub | | Sub | -| Tan | | | | | | | | | | Tan | -| Tanh | | √ | | | | | layer/activation | Tanh | TanH | | -| TensorAdd | | √ | √ | √ | | √ | math_ops | | | | -| Tile | | √ | | | | | array_ops | Tile | | Tile | -| TopK | | √ | √ | √ | | | nn_ops | TopKV2 | | | -| Transpose | | √ | √ | √ | | √ | array_ops | Transpose | Permute | Transpose | -| Unique | | | | | | | | Unique | | | -| Unpack | | √ | | | | | nn_ops | | | | -| Unsample | | | | | | | | | | Unsample | -| Unsqueeze | | | | | | | | | | Unsqueeze | -| Unstack | | | | | | | | Unstack | | | -| Where | | | | | | | | Where | | | -| ZerosLike | | √ | | | | | array_ops | ZerosLike | | | +| Operation | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | Tensorflow
Lite op supported | Caffe
Lite op supported | Onnx
Lite op supported | +|-----------------------|----------|----------|-----------|----------|----------|------------------|----------|----------|----------| +| Abs | | √ | √ | √ | | | Abs | | Abs | +| Add | √ | √ | √ | √ | | √ | Add | | Add | +| AddN | | √ | | | | | AddN | | | +| Argmax | | √ | √ | √ | | | Argmax | ArgMax | ArgMax | +| Argmin | | √ | √ | √ | | | Argmin | | | +| AvgPool | √ | √ | √ | √ | | √ | MeanPooling| Pooling | AveragePool | +| BatchNorm | √ | √ | √ | √ | | √ | | BatchNorm | BatchNormalization | +| BatchToSpace | | √ | √ | √ | | | BatchToSpace, BatchToSpaceND | | | +| BiasAdd | | √ | √ | √ | | √ | | | BiasAdd | +| Broadcast | | √ | | | | | BroadcastTo | | Expand | +| Cast | √ | √ | | √ | | | Cast, DEQUANTIZE* | | Cast | +| Ceil | | √ | √ | √ | | | Ceil | | Ceil | +| Concat | √ | √ | √ | √ | | √ | Concat | Concat | Concat | +| Conv2d | √ | √ | √ | √ | | √ | Conv2D | Convolution | Conv | +| Conv2dTranspose | √ | √ | √ | √ | | √ | DeConv2D | Deconvolution | ConvTranspose | +| Cos | | √ | √ | √ | | | Cos | | Cos | +| Crop | | √ | √ | √ | | | | Crop | | +| DeDepthwiseConv2D | | √ | √ | √ | | | | Deconvolution| ConvTranspose | +| DepthToSpace | | √ | √ | √ | | | DepthToSpace| | DepthToSpace | +| DepthwiseConv2dNative | √ | √ | √ | √ | | √ | DepthwiseConv2D | Convolution | Convolution | +| Div | √ | √ | √ | √ | | √ | Div, RealDiv | | Div | +| Eltwise | √ | √ | | | | | | Eltwise | | +| Elu | | √ | | | | | Elu | | Elu | +| Equal | √ | √ | √ | √ | | | Equal | | Equal | +| Exp | | √ | | | | | Exp | | Exp | +| ExpandDims | | √ | | | | | | | | +| Fill | | √ | | | | | Fill | | | +| Flatten | | √ | | | | | | Flatten | | +| Floor | | √ | √ | √ | | | flOOR | | Floor | +| FloorDiv | √ | √ | | | | | FloorDiv | | | +| FloorMod | √ | √ | | | | | FloorMod | | | +| FullConnection | | √ | √ | √ | | | FullyConnected | InnerProduct | | +| GatherNd | | √ | √ | √ | | | GatherND | | | +| GatherV2 | | √ | √ | √ | | | Gather | | Gather | +| Greater | √ | √ | √ | √ | | | Greater | | Greater | +| GreaterEqual | √ | √ | √ | √ | | | GreaterEqual| | | +| Hswish | √ | √ | √ | √ | | | HardSwish | | | +| LeakyReLU | √ | √ | | | | √ | LeakyRelu | | LeakyRelu | +| Less | √ | √ | √ | √ | | | Less | | Less | +| LessEqual | √ | √ | √ | √ | | | LessEqual | | | +| LRN | | √ | | | | | LocalResponseNorm | | Lrn | +| Log | | √ | √ | √ | | | Log | | Log | +| LogicalAnd | √ | √ | | | | | LogicalAnd | | | +| LogicalNot | | √ | √ | √ | | | LogicalNot | | | +| LogicalOr | √ | √ | | | | | LogicalOr | | | +| LSTM | | √ | | | | | | | | +| MatMul | | √ | √ | √ | | √ | | | MatMul | +| Maximum | √ | √ | | | | | Maximum | | Max | +| MaxPool | √ | √ | √ | √ | | √ | MaxPooling | Pooling | MaxPool | +| Minimum | √ | √ | | | | | Minimum | | Min | +| Mul | √ | √ | √ | √ | | √ | Mul | | Mul | +| NotEqual | √ | √ | √ | √ | | | NotEqual | | | +| OneHot | | √ | | | | | OneHot | | | +| Pad | | √ | √ | √ | | | Pad | | Pad | +| Pow | | √ | √ | √ | | | Pow | Power | Power | +| PReLU | | √ | | | | √ | | PReLU | | +| Range | | √ | | | | | Range | | | +| Rank | | √ | | | | | Rank | | | +| ReduceMax | √ | √ | √ | √ | | | ReduceMax | | ReduceMax | +| ReduceMean | √ | √ | √ | √ | | | Mean | | ReduceMean | +| ReduceMin | √ | √ | √ | √ | | | ReduceMin | | ReduceMin | +| ReduceProd | √ | √ | √ | √ | | | ReduceProd | | | +| ReduceSum | √ | √ | √ | √ | | | Sum | | ReduceSum | +| ReduceSumSquare | √ | √ | √ | √ | | | | | | +| ReLU | √ | √ | √ | √ | | √ | Relu | ReLU | Relu | +| ReLU6 | √ | √ | √ | √ | | √ | Relu6 | ReLU6 | Clip* | +| Reshape | √ | √ | √ | √ | | √ | Reshape | Reshape | Reshape,Flatten | +| Resize | | √ | √ | √ | | | ResizeBilinear, NearestNeighbor | Interp | | +| Reverse | | √ | | | | | reverse | | | +| ReverseSequence | | √ | | | | | ReverseSequence | | | +| Round | | √ | √ | √ | | | Round | | | +| Rsqrt | | √ | √ | √ | | | Rsqrt | | | +| Scale | | √ | | | | | | Scale | | +| ScatterNd | | √ | | | | | ScatterNd | | | +| Shape | | √ | | | | | Shape | | Shape | +| Sigmoid | √ | √ | √ | √ | | √ | Logistic | Sigmoid | Sigmoid | +| Sin | | √ | √ | √ | | | Sin | | Sin | +| Slice | | √ | √ | √ | | √ | Slice | | Slice | +| Softmax | √ | √ | √ | √ | | √ | Softmax | Softmax | Softmax | +| SpaceToBatch | | √ | | | | | | | | +| SpaceToBatchND | | √ | | | | | SpaceToBatchND | | | +| SpaceToDepth | | √ | | | | | SpaceToDepth | | SpaceToDepth | +| SparseToDense | | √ | | | | | SpareToDense | | | +| Split | √ | √ | √ | √ | | | Split, SplitV | | | +| Sqrt | | √ | √ | √ | | | Sqrt | | Sqrt | +| Square | | √ | √ | √ | | | Square | | | +| SquaredDifference | | √ | | | | | SquaredDifference | | | +| Squeeze | | √ | √ | √ | | | Squeeze | | Squeeze | +| StridedSlice | | √ | √ | √ | | | StridedSlice| | | +| Stack | | √ | | | | | Stack | | | +| Sub | √ | √ | √ | √ | | √ | Sub | | Sub | +| Tanh | √ | √ | | | | | Tanh | TanH | | +| Tile | | √ | | | | | Tile | | Tile | +| TopK | | √ | √ | √ | | | TopKV2 | | | +| Transpose | √ | √ | | | | √ | Transpose | Permute | Transpose | +| Unique | | √ | | | | | Unique | | | +| Unsqueeze | | √ | √ | √ | | | | | Unsqueeze | +| Unstack | | √ | | | | | Unstack | | | +| Where | | √ | | | | | Where | | | +| ZerosLike | | √ | | | | | ZerosLike | | | * Clip: only support convert clip(0, 6) to Relu6. * DEQUANTIZE: only support to convert fp16 to fp32. diff --git a/lite/docs/source_zh_cn/index.rst b/lite/docs/source_zh_cn/index.rst index 08270a72e46b955616944d50149024f3765bf318..28634fb696c434f747949830a37d3efb2b0436e4 100644 --- a/lite/docs/source_zh_cn/index.rst +++ b/lite/docs/source_zh_cn/index.rst @@ -11,6 +11,5 @@ MindSpore端侧文档 :maxdepth: 1 architecture - roadmap operator_list glossary \ No newline at end of file diff --git a/lite/docs/source_zh_cn/operator_list.md b/lite/docs/source_zh_cn/operator_list.md index 0864989bc46b9182b5b90bb624021a1e756f0647..8f8d97056d4fd29ce8ed9f99122e873499ebd972 100644 --- a/lite/docs/source_zh_cn/operator_list.md +++ b/lite/docs/source_zh_cn/operator_list.md @@ -4,121 +4,108 @@ > √勾选的项为MindSpore Lite所支持的算子。 -| 操作名 | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | 算子类别 | 支持的Tensorflow
Lite op | 支持的Caffe
Lite op | 支持的Onnx
Lite op | -|-----------------------|----------|----------|----------|-----------|----------|----------|------------------|----------|----------|----------| -| Abs | | √ | √ | √ | | | math_ops | Abs | | Abs | -| Add | | | | | | √ | | Add | | Add | -| AddN | | √ | | | | | math_ops | AddN | | | -| Argmax | | √ | √ | √ | | | array_ops | Argmax | ArgMax | ArgMax | -| Argmin | | √ | | | | | array_ops | Argmin | | | -| Asin | | | | | | | | | | Asin | -| Atan | | | | | | | | | | Atan | -| AvgPool | | √ | √ | √ | | √ | nn_ops | MeanPooling | Pooling | AveragePool | -| BatchMatMul | √ | √ | √ | √ | | | math_ops | | | | -| BatchNorm | | √ | | | | √ | nn_ops | | BatchNorm | BatchNormalization | -| BatchToSpace | | | | | | | array_ops | BatchToSpace, BatchToSpaceND | | | -| BatchToSpaceND | | | | | | | | | | | -| BiasAdd | | √ | | √ | | √ | nn_ops | | | BiasAdd | -| Broadcast | | √ | | | | | comm_ops | BroadcastTo | | Expand | -| Cast | | √ | | | | | array_ops | Cast, DEQUANTIZE* | | Cast | -| Ceil | | √ | | √ | | | math_ops | Ceil | | Ceil | -| Concat | | √ | √ | √ | | √ | array_ops | Concat | Concat | Concat | -| Constant | | | | | | | | | | Constant | -| Conv1dTranspose | | | | √ | | | layer/conv | | | | -| Conv2d | √ | √ | √ | √ | | √ | layer/conv | Conv2D | Convolution | Conv | -| Conv2dTranspose | | √ | √ | √ | | √ | layer/conv | DeConv2D | Deconvolution | ConvTranspose | -| Cos | | √ | √ | √ | | | math_ops | Cos | | Cos | -| Crop | | | | | | | | | Crop | | -| DeDepthwiseConv2D | | | | | | | | | Deconvolution| ConvTranspose | -| DepthToSpace | | | | | | | | DepthToSpace | | DepthToSpace | -| DepthwiseConv2dNative | √ | √ | √ | √ | | √ | nn_ops | DepthwiseConv2D | Convolution | Convolution | -| Div | | √ | √ | √ | | √ | math_ops | Div | | Div | -| Dropout | | | | | | | | | | Dropout | -| Eltwise | | | | | | | | | Eltwise | | -| Elu | | | | | | | | Elu | | Elu | -| Equal | | √ | √ | √ | | | math_ops | Equal | | Equal | -| Exp | | √ | | | | | math_ops | Exp | | Exp | -| ExpandDims | | √ | | | | | array_ops | | | | -| Fill | | √ | | | | | array_ops | Fill | | | -| Flatten | | | | | | | | | Flatten | | -| Floor | | √ | √ | √ | | | math_ops | flOOR | | Floor | -| FloorDiv | | √ | | | | | math_ops | FloorDiv | | | -| FloorMod | | √ | | | | | nn_ops | FloorMod | | | -| FullConnection | | √ | | | | | layer/basic | FullyConnected | InnerProduct | | -| GatherNd | | √ | | | | | array_ops | GatherND | | | -| GatherV2 | | √ | | | | | array_ops | Gather | | Gather | -| Greater | | √ | √ | √ | | | math_ops | Greater | | Greater | -| GreaterEqual | | √ | √ | √ | | | math_ops | GreaterEqual | | | -| Hswish | | | | | | | | HardSwish | | | -| L2norm | | | | | | | | L2_NORMALIZATION | | | -| LeakyReLU | | √ | | | | √ | layer/activation | LeakyRelu | | LeakyRelu | -| Less | | √ | √ | √ | | | math_ops | Less | | Less | -| LessEqual | | √ | √ | √ | | | math_ops | LessEqual | | | -| LocalResponseNorm | | | | | | | | LocalResponseNorm | | Lrn | -| Log | | √ | √ | √ | | | math_ops | Log | | Log | -| LogicalAnd | | √ | | | | | math_ops | LogicalAnd | | | -| LogicalNot | | √ | √ | √ | | | math_ops | LogicalNot | | | -| LogicalOr | | √ | | | | | math_ops | LogicalOr | | | -| LSTM | | √ | | | | | layer/lstm | | | | -| MatMul | √ | √ | √ | √ | | √ | math_ops | | | MatMul | -| Maximum | | | | | | | math_ops | Maximum | | Max | -| MaxPool | | √ | √ | √ | | √ | nn_ops | MaxPooling | Pooling | MaxPool | -| Minimum | | | | | | | math_ops | Minimum | | Min | -| Mul | | √ | √ | √ | | √ | math_ops | Mul | | Mul | -| Neg | | | | | | | math_ops | | | Neg | -| NotEqual | | √ | √ | √ | | | math_ops | NotEqual | | | -| OneHot | | √ | | | | | layer/basic | OneHot | | | -| Pack | | √ | | | | | nn_ops | | | | -| Pad | | √ | √ | √ | | | nn_ops | Pad | | Pad | -| Pow | | √ | √ | √ | | | math_ops | Pow | Power | Power | -| PReLU | | √ | √ | √ | | √ | layer/activation | Prelu | PReLU | PRelu | -| Range | | √ | | | | | layer/basic | Range | | | -| Rank | | √ | | | | | array_ops | Rank | | | -| RealDiv | | √ | √ | √ | | √ | math_ops | RealDiv | | | -| ReduceMax | | √ | √ | √ | | | math_ops | ReduceMax | | ReduceMax | -| ReduceMean | | √ | √ | √ | | | math_ops | Mean | | ReduceMean | -| ReduceMin | | √ | √ | √ | | | math_ops | ReduceMin | | ReduceMin | -| ReduceProd | | √ | √ | √ | | | math_ops | ReduceProd | | | -| ReduceSum | | √ | √ | √ | | | math_ops | Sum | | ReduceSum | -| ReLU | | √ | √ | √ | | √ | layer/activation | Relu | ReLU | Relu | -| ReLU6 | | √ | | | | √ | layer/activation | Relu6 | ReLU6 | Clip* | -| Reshape | | √ | √ | √ | | √ | array_ops | Reshape | Reshape | Reshape,Flatten | -| Resize | | | | | | | | ResizeBilinear, NearestNeighbor | Interp | | -| Reverse | | | | | | | | reverse | | | -| ReverseSequence | | √ | | | | | array_ops | ReverseSequence | | | -| Round | | √ | | √ | | | math_ops | Round | | | -| Rsqrt | | √ | √ | √ | | | math_ops | Rsqrt | | | -| Scale | | | | | | | | | Scale | | -| ScatterNd | | √ | | | | | array_ops | ScatterNd | | | -| Shape | | √ | | √ | | | array_ops | Shape | | Shape | -| Sigmoid | | √ | √ | √ | | √ | nn_ops | Logistic | Sigmoid | Sigmoid | -| Sin | | | | | | | | Sin | | Sin | -| Slice | | √ | √ | √ | | √ | array_ops | Slice | | Slice | -| Softmax | | √ | √ | √ | | √ | layer/activation | Softmax | Softmax | Softmax | -| SpaceToBatchND | | √ | | | | | array_ops | SpaceToBatchND | | | -| SpareToDense | | | | | | | | SpareToDense | | | -| SpaceToDepth | | √ | | | | | array_ops | SpaceToDepth | | SpaceToDepth | -| Split | | √ | √ | √ | | | array_ops | Split, SplitV | | | -| Sqrt | | √ | √ | √ | | | math_ops | Sqrt | | Sqrt | -| Square | | √ | √ | √ | | | math_ops | Square | | | -| SquaredDifference | | | | | | | | SquaredDifference | | | -| Squeeze | | √ | √ | √ | | | array_ops | Squeeze | | Squeeze | -| StridedSlice | | √ | √ | √ | | | array_ops | StridedSlice | | | -| Stack | | | | | | | | Stack | | | -| Sub | | √ | √ | √ | | √ | math_ops | Sub | | Sub | -| Tan | | | | | | | | | | Tan | -| Tanh | | √ | | | | | layer/activation | Tanh | TanH | | -| TensorAdd | | √ | √ | √ | | √ | math_ops | | | | -| Tile | | √ | | | | | array_ops | Tile | | Tile | -| TopK | | √ | √ | √ | | | nn_ops | TopKV2 | | | -| Transpose | | √ | √ | √ | | √ | array_ops | Transpose | Permute | Transpose | -| Unique | | | | | | | | Unique | | | -| Unpack | | √ | | | | | nn_ops | | | | -| Unsample | | | | | | | | | | Unsample | -| Unsqueeze | | | | | | | | | | Unsqueeze | -| Unstack | | | | | | | | Unstack | | | -| Where | | | | | | | | Where | | | -| ZerosLike | | √ | | | | | array_ops | ZerosLike | | | +| 操作名 | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | 支持的Tensorflow
Lite op | 支持的Caffe
Lite op | 支持的Onnx
Lite op | +|-----------------------|----------|----------|----------|-----------|----------|-------------------|----------|----------|---------| +| Abs | | √ | √ | √ | | | Abs | | Abs | +| Add | √ | √ | √ | √ | | √ | Add | | Add | +| AddN | | √ | | | | | AddN | | | +| Argmax | | √ | √ | √ | | | Argmax | ArgMax | ArgMax | +| Argmin | | √ | √ | √ | | | Argmin | | | +| AvgPool | √ | √ | √ | √ | | √ | MeanPooling| Pooling | AveragePool | +| BatchNorm | √ | √ | √ | √ | | √ | | BatchNorm | BatchNormalization | +| BatchToSpace | | √ | √ | √ | | | BatchToSpace, BatchToSpaceND | | | +| BiasAdd | | √ | √ | √ | | √ | | | BiasAdd | +| Broadcast | | √ | | | | | BroadcastTo | | Expand | +| Cast | √ | √ | | √ | | | Cast, DEQUANTIZE* | | Cast | +| Ceil | | √ | √ | √ | | | Ceil | | Ceil | +| Concat | √ | √ | √ | √ | | √ | Concat | Concat | Concat | +| Conv2d | √ | √ | √ | √ | | √ | Conv2D | Convolution | Conv | +| Conv2dTranspose | √ | √ | √ | √ | | √ | DeConv2D | Deconvolution | ConvTranspose | +| Cos | | √ | √ | √ | | | Cos | | Cos | +| Crop | | √ | √ | √ | | | | Crop | | +| DeDepthwiseConv2D | | √ | √ | √ | | | | Deconvolution| ConvTranspose | +| DepthToSpace | | √ | √ | √ | | | DepthToSpace| | DepthToSpace | +| DepthwiseConv2dNative | √ | √ | √ | √ | | √ | DepthwiseConv2D | Convolution | Convolution | +| Div | √ | √ | √ | √ | | √ | Div, RealDiv | | Div | +| Eltwise | √ | √ | | | | | | Eltwise | | +| Elu | | √ | | | | | Elu | | Elu | +| Equal | √ | √ | √ | √ | | | Equal | | Equal | +| Exp | | √ | | | | | Exp | | Exp | +| ExpandDims | | √ | | | | | | | | +| Fill | | √ | | | | | Fill | | | +| Flatten | | √ | | | | | | Flatten | | +| Floor | | √ | √ | √ | | | flOOR | | Floor | +| FloorDiv | √ | √ | | | | | FloorDiv | | | +| FloorMod | √ | √ | | | | | FloorMod | | | +| FullConnection | | √ | √ | √ | | | FullyConnected | InnerProduct | | +| GatherNd | | √ | √ | √ | | | GatherND | | | +| GatherV2 | | √ | √ | √ | | | Gather | | Gather | +| Greater | √ | √ | √ | √ | | | Greater | | Greater | +| GreaterEqual | √ | √ | √ | √ | | | GreaterEqual| | | +| Hswish | √ | √ | √ | √ | | | HardSwish | | | +| LeakyReLU | √ | √ | | | | √ | LeakyRelu | | LeakyRelu | +| Less | √ | √ | √ | √ | | | Less | | Less | +| LessEqual | √ | √ | √ | √ | | | LessEqual | | | +| LRN | | √ | | | | | LocalResponseNorm | | Lrn | +| Log | | √ | √ | √ | | | Log | | Log | +| LogicalAnd | √ | √ | | | | | LogicalAnd | | | +| LogicalNot | | √ | √ | √ | | | LogicalNot | | | +| LogicalOr | √ | √ | | | | | LogicalOr | | | +| LSTM | | √ | | | | | | | | +| MatMul | | √ | √ | √ | | √ | | | MatMul | +| Maximum | √ | √ | | | | | Maximum | | Max | +| MaxPool | √ | √ | √ | √ | | √ | MaxPooling | Pooling | MaxPool | +| Minimum | √ | √ | | | | | Minimum | | Min | +| Mul | √ | √ | √ | √ | | √ | Mul | | Mul | +| NotEqual | √ | √ | √ | √ | | | NotEqual | | | +| OneHot | | √ | | | | | OneHot | | | +| Pad | | √ | √ | √ | | | Pad | | Pad | +| Pow | | √ | √ | √ | | | Pow | Power | Power | +| PReLU | | √ | | | | √ | | PReLU | | +| Range | | √ | | | | | Range | | | +| Rank | | √ | | | | | Rank | | | +| ReduceMax | √ | √ | √ | √ | | | ReduceMax | | ReduceMax | +| ReduceMean | √ | √ | √ | √ | | | Mean | | ReduceMean | +| ReduceMin | √ | √ | √ | √ | | | ReduceMin | | ReduceMin | +| ReduceProd | √ | √ | √ | √ | | | ReduceProd | | | +| ReduceSum | √ | √ | √ | √ | | | Sum | | ReduceSum | +| ReduceSumSquare | √ | √ | √ | √ | | | | | | +| ReLU | √ | √ | √ | √ | | √ | Relu | ReLU | Relu | +| ReLU6 | √ | √ | √ | √ | | √ | Relu6 | ReLU6 | Clip* | +| Reshape | √ | √ | √ | √ | | √ | Reshape | Reshape | Reshape,Flatten | +| Resize | | √ | √ | √ | | | ResizeBilinear, NearestNeighbor | Interp | | +| Reverse | | √ | | | | | reverse | | | +| ReverseSequence | | √ | | | | | ReverseSequence | | | +| Round | | √ | √ | √ | | | Round | | | +| Rsqrt | | √ | √ | √ | | | Rsqrt | | | +| Scale | | √ | | | | | | Scale | | +| ScatterNd | | √ | | | | | ScatterNd | | | +| Shape | | √ | | | | | Shape | | Shape | +| Sigmoid | √ | √ | √ | √ | | √ | Logistic | Sigmoid | Sigmoid | +| Sin | | √ | √ | √ | | | Sin | | Sin | +| Slice | | √ | √ | √ | | √ | Slice | | Slice | +| Softmax | √ | √ | √ | √ | | √ | Softmax | Softmax | Softmax | +| SpaceToBatch | | √ | | | | | | | | +| SpaceToBatchND | | √ | | | | | SpaceToBatchND | | | +| SpaceToDepth | | √ | | | | | SpaceToDepth | | SpaceToDepth | +| SparseToDense | | √ | | | | | SpareToDense | | | +| Split | √ | √ | √ | √ | | | Split, SplitV | | | +| Sqrt | | √ | √ | √ | | | Sqrt | | Sqrt | +| Square | | √ | √ | √ | | | Square | | | +| SquaredDifference | | √ | | | | | SquaredDifference | | | +| Squeeze | | √ | √ | √ | | | Squeeze | | Squeeze | +| StridedSlice | | √ | √ | √ | | | StridedSlice| | | +| Stack | | √ | | | | | Stack | | | +| Sub | √ | √ | √ | √ | | √ | Sub | | Sub | +| Tanh | √ | √ | | | | | Tanh | TanH | | +| Tile | | √ | | | | | Tile | | Tile | +| TopK | | √ | √ | √ | | | TopKV2 | | | +| Transpose | √ | √ | | | | √ | Transpose | Permute | Transpose | +| Unique | | √ | | | | | Unique | | | +| Unsqueeze | | √ | √ | √ | | | | | Unsqueeze | +| Unstack | | √ | | | | | Unstack | | | +| Where | | √ | | | | | Where | | | +| ZerosLike | | √ | | | | | ZerosLike | | | -* Clip: only support convert clip(0, 6) to Relu6. -* DEQUANTIZE: only support to convert fp16 to fp32. +* Clip: 仅支持将clip(0, 6)转换为Relu6. +* DEQUANTIZE: 仅支持将fp16转换为fp32. diff --git a/lite/docs/source_zh_cn/roadmap.md b/lite/docs/source_zh_cn/roadmap.md deleted file mode 100644 index 6bafce4c91194936f9e2715a5896819b72ee99a8..0000000000000000000000000000000000000000 --- a/lite/docs/source_zh_cn/roadmap.md +++ /dev/null @@ -1,15 +0,0 @@ -# RoadMap - - - -1. 增加更多的FP16、INT8和UINT8 CPU算子; -2. 增加更多的openCL、openGL、vulkan和metal GPU算子; -3. 增加控制流算子支持; -4. 增加NPU支持; -5. 增加部署在IoT设备的推理框架; -6. 增加图像分割、文字识别、人脸检测等预制模型; -7. 增加Lite的图像分割、文字识别、人脸检测等预置样例; -8. 增加Micro的样例; -9. 端侧训练支持; -10. pipeline数据处理丰富; -11. 模型转换工具支持windows和MAC。 \ No newline at end of file diff --git a/lite/tutorials/source_en/deploy.md b/lite/tutorials/source_en/deploy.md index 350654ea725fc9a286be6f113d007e4b5ce62ff6..4429c78551c378dcb83ada80bf596f578841d7be 100644 --- a/lite/tutorials/source_en/deploy.md +++ b/lite/tutorials/source_en/deploy.md @@ -81,9 +81,11 @@ Generally, the compiled output files include the following types. The architectu | third_party | Header file and library of the third-party library | Yes | Yes | Take the 0.7.0-beta version and CPU as an example. The contents of `third party` and `lib` vary depending on the architecture as follows: -- `mindspore-lite-0.7.0-converter-ubuntu`: include `protobuf` (Protobuf dynamic library). -- `mindspore-lite-0.7.0-runtime-x86-cpu`: include `flatbuffers` (FlatBuffers header file). -TODO: Add document content. +- `mindspore-lite-0.7.0-converter-ubuntu`: `third party`include `protobuf` (Protobuf dynamic library). +- `mindspore-lite-0.7.0-runtime-x86-cpu`: `third party`include `flatbuffers` (FlatBuffers header file), `lib`include`libmindspore-lite.so`(Dynamic library of MindSpore Lite inference framework). +- `mindspore-lite-0.7.0-runtime-arm64-cpu`: `third party`include `flatbuffers` (FlatBuffers header file), `lib`include`libmindspore-lite.so`(Dynamic library of MindSpore Lite inference framework) and `liboptimize.so`(Dynamic library of MindSpore Lite advanced operators). + +> `liboptimize.so` only exits in runtime-arm64 outputs, and only can be used in the CPU which supports armv8.2 and fp16. > Before running the tools in the `converter`, `benchmark`, or `time_profiler` directory, you need to configure environment variables and set the paths of the dynamic libraries of MindSpore Lite and Protobuf to the paths of the system dynamic libraries. The following uses the 0.7.0-beta version as an example: `export LD_LIBRARY_PATH=./mindspore-lite-0.7.0/lib:./mindspore-lite-0.7.0/third_party/protobuf/lib:${LD_LIBRARY_PATH}`. diff --git a/lite/tutorials/source_en/index.rst b/lite/tutorials/source_en/index.rst index ac48c19eeb0dc9ee7e406a857c3f3bd7d89a31f1..8d538a482f590e27536f975f1b2486e33ab64bb7 100644 --- a/lite/tutorials/source_en/index.rst +++ b/lite/tutorials/source_en/index.rst @@ -12,7 +12,7 @@ MindSpore Lite Tutorials :caption: Quick Start deploy - quick_start/quick_start_lite + quick_start/quick_start .. toctree:: :glob: @@ -20,4 +20,6 @@ MindSpore Lite Tutorials :caption: Use use/converter_tool - use/tools + use/runtime + use/benchmark_tool + use/timeprofiler_tool diff --git a/lite/tutorials/source_en/quick_start/quick_start_lite.md b/lite/tutorials/source_en/quick_start/quick_start.md similarity index 100% rename from lite/tutorials/source_en/quick_start/quick_start_lite.md rename to lite/tutorials/source_en/quick_start/quick_start.md diff --git a/lite/tutorials/source_en/use/benchmark_tool.md b/lite/tutorials/source_en/use/benchmark_tool.md index e96f6d25948fe288b48379577abd27beb6916e62..d7df06074a13d8a654339daa1b825a94dbacbc9a 100644 --- a/lite/tutorials/source_en/use/benchmark_tool.md +++ b/lite/tutorials/source_en/use/benchmark_tool.md @@ -1,3 +1,97 @@ # Benchmark Tool + + +- [Benchmark Tool](#benchmark-tool) + - [Overview](#overview) + - [Environment Preparation](#environment-preparation) + - [Parameter Description](#parameter-description) + - [Example](#example) + - [Performance Test](#performance-test) + - [Accuracy Test](#accuracy-test) + + + + +## Overview + +The Benchmark tool is used to perform benchmark testing on a MindSpore Lite model and is implemented using the C++ language. It can not only perform quantitative analysis (performance) on the forward inference execution duration of a MindSpore Lite model, but also perform comparative error analysis (accuracy) based on the output of the specified model. + +## Environment Preparation + +To use the Benchmark tool, you need to prepare the environment as follows: + +- Compilation: Install compilation dependencies and perform compilation. The code of the Benchmark tool is stored in the `mindspore/lite/tools/benchmark` directory of the MindSpore source code. For details about the compilation operations, see the [Environment Requirements] (https://www.mindspore.cn/lite/docs/en/master/deploy.html#id2) and [Compilation Example] (https://www.mindspore.cn/lite/docs/en/master/deploy.html#id5) in the deployment document. + +- Run: Obtain the `Benchmark` tool and configure environment variables. For details, see [Output Description] (https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id4) in the deployment document. + +## Parameter Description + +The command used for benchmark testing based on the compiled Benchmark tool is as follows: + +```bash +./benchmark --modelPath= [--accuracyThreshold=] + [--calibDataPath=] [--cpuBindMode=] + [--device=] [--help] [--inDataPath=] + [--inDataType=] [--loopCount=] + [--numThreads=] [--omModelPath=] + [--resizeDims=] [--warmUpLoopCount=] + [--fp16Priority=] +``` + +The following describes the parameters in detail. + +| Parameter | Attribute | Function | Parameter Type | Default Value | Value Range | +| ----------------- | ---- | ------------------------------------------------------------ | ------ | -------- | ---------------------------------- | +| `--modelPath=` | Mandatory | Specifies the file path of the MindSpore Lite model for benchmark testing. | String | Null | - | +| `--accuracyThreshold=` | Optional | Specifies the accuracy threshold. | Float | 0.5 | - | +| `--calibDataPath=` | Optional | Specifies the file path of the benchmark data. The benchmark data, as the comparison output of the tested model, is output from the forward inference of the tested model under other deep learning frameworks using the same input. | String | Null | - | +| `--cpuBindMode=` | Optional | Specifies the type of the CPU core bound to the model inference program. | Integer | 1 | −1: medium core
1: large core
0: not bound | +| `--device=` | Optional | Specifies the type of the device on which the model inference program runs. | String | CPU | CPU, NPU, or GPU | +| `--help` | Optional | Displays the help information about the `benchmark` command. | - | - | - | +| `--inDataPath=` | Optional | Specifies the file path of the input data of the tested model. If this parameter is not set, a random value will be used. | String | Null | - | +| `--inDataType=` | Optional | Specifies the file type of the input data of the tested model. | String | Bin | Img: The input data is an image. Bin: The input data is a binary file.| +| `--loopCount=` | Optional | Specifies the number of forward inference times of the tested model when the Benchmark tool is used for the benchmark testing. The value is a positive integer. | Integer | 10 | - | +| `--numThreads=` | Optional | Specifies the number of threads for running the model inference program. | Integer | 2 | - | +| `--omModelPath=` | Optional | Specifies the file path of the OM model. This parameter is optional only when the `device` type is NPU. | String | Null | - | +| `--resizeDims=` | Optional | Specifies the size to be adjusted for the input data of the tested model. | String | Null | - | +| `--warmUpLoopCount=` | Optional | Specifies the number of preheating inference times of the tested model before multiple rounds of the benchmark test are executed. | Integer | 3 | - | +| `--fp16Priority=` | Optional | Specifies whether the float16 operator is preferred. | Bool | false | true, false | + +## Example + +When using the Benchmark tool to perform benchmark testing on different MindSpore Lite models, you can set different parameters to implement different test functions. The testing is classified into performance test and accuracy test. + +### Performance Test + +The main test indicator of the performance test performed by the Benchmark tool is the duration of a single forward inference. In a performance test, you do not need to set benchmark data parameters such as `calibDataPath`. For example: + +```bash +./benchmark --modelPath=./models/face_age.ms +``` + +This command uses a random input, and other parameters use default values. After this command is executed, the following statistics are displayed. The statistics include the minimum duration, maximum duration, and average duration of a single inference after the tested model runs for the specified number of inference rounds. + +``` +Model = face_age.ms, numThreads = 2, MinRunTime = 72.228996 ms, MaxRuntime = 73.094002 ms, AvgRunTime = 72.556000 ms +``` + +### Accuracy Test + +The accuracy test performed by the Benchmark tool is to verify the accuracy of the MinSpore model output by setting benchmark data. In an accuracy test, in addition to the `modelPath` parameter, the `calibDataPath` parameter must be set. For example: + +```bash +./benchmark --modelPath=./models/face_age.ms --inDataPath=./input/face_age.bin --device=NPU --accuracyThreshold=3 --calibDataPath=./output/face_age.out +``` + +This command specifies the input data and benchmark data of the tested model, specifies that the model inference program runs on the NPU, and sets the accuracy threshold to 3%. After this command is executed, the following statistics are displayed, including the single input data of the tested model, output result and average deviation rate of the output node, and average deviation rate of all nodes. + +``` +InData0: 139.947 182.373 153.705 138.945 108.032 164.703 111.585 227.402 245.734 97.7776 201.89 134.868 144.851 236.027 18.1142 22.218 5.15569 212.318 198.43 221.853 +================ Comparing Output data ================ +Data of node age_out : 5.94584e-08 6.3317e-08 1.94726e-07 1.91809e-07 8.39805e-08 7.66035e-08 1.69285e-07 1.46246e-07 6.03796e-07 1.77631e-07 1.54343e-07 2.04623e-07 8.89609e-07 3.63487e-06 4.86876e-06 1.23939e-05 3.09981e-05 3.37098e-05 0.000107102 0.000213932 0.000533579 0.00062465 0.00296401 0.00993984 0.038227 0.0695085 0.162854 0.123199 0.24272 0.135048 0.169159 0.0221256 0.013892 0.00502971 0.00134921 0.00135701 0.000383242 0.000163475 0.000136294 9.77864e-05 8.00793e-05 5.73874e-05 3.53858e-05 2.18535e-05 2.04467e-05 1.85286e-05 1.05075e-05 9.34751e-06 6.12732e-06 4.55476e-06 +Mean bias of node age_out : 0% +Mean bias of all nodes: 0% +======================================================= +``` diff --git a/lite/tutorials/source_en/use/converter_tool.md b/lite/tutorials/source_en/use/converter_tool.md index 54d11d2ca99cf9d2a8596aca1a951c31d6e3bb21..185696c5834a7f25285ee656167d4d5c4c67af0a 100644 --- a/lite/tutorials/source_en/use/converter_tool.md +++ b/lite/tutorials/source_en/use/converter_tool.md @@ -6,7 +6,6 @@ - [Overview](#overview) - [Environment Preparation](#environment-preparation) - [Parameter Description](#parameter-description) - - [Model Visualization](#model-visualization) - [Example](#example) @@ -15,7 +14,7 @@ ## Overview -MindSpore Lite provides a tool for offline model conversion. It supports conversion of multiple types of models and visualization of converted models. The converted models can be used for inference. The command line parameters contain multiple personalized options, providing a convenient conversion method for users. +MindSpore Lite provides a tool for offline model conversion. It supports conversion of multiple types of models. The converted models can be used for inference. The command line parameters contain multiple personalized options, providing a convenient conversion method for users. Currently, the following input formats are supported: MindSpore, TensorFlow Lite, Caffe, and ONNX. @@ -47,11 +46,6 @@ The following describes the parameters in detail. > - The parameter name and parameter value are separated by an equal sign (=) and no space is allowed between them. > - The Caffe model is divided into two files: model structure `*.prototxt`, corresponding to the `--modelFile` parameter; model weight `*.caffemodel`, corresponding to the `--weightFile` parameter -## Model Visualization - -The model visualization tool provides a method for checking the model conversion result. You can run the JSON command to generate a `*.json` file and compare it with the original model to determine the conversion effect. - -TODO: This function is under development now. ## Example @@ -103,6 +97,4 @@ The following describes how to use the conversion command by using several commo ``` INFO [converter/converter.cc:190] Runconverter] CONVERTER RESULT: SUCCESS! ``` - - -You can use the model visualization tool to visually check the converted MindSpore Lite model. This function is under development. \ No newline at end of file + \ No newline at end of file diff --git a/lite/tutorials/source_en/use/timeprofiler_tool.md b/lite/tutorials/source_en/use/timeprofiler_tool.md index f779cfbda5a3f4f3c25735e5cc68e48a089c92d8..00b6dc69d25658c13491ef2537384c493b0bfe3c 100644 --- a/lite/tutorials/source_en/use/timeprofiler_tool.md +++ b/lite/tutorials/source_en/use/timeprofiler_tool.md @@ -1,3 +1,93 @@ # TimeProfiler Tool + + +- [TimeProfiler Tool](#timeprofiler-tool) + - [Overview](#overview) + - [Environment Preparation](#environment-preparation) + - [Parameter Description](#parameter-description) + - [Example](#example) + + + + +## Overview + +The TimeProfiler tool can be used to analyze the time consumption of forward inference at the network layer of a MindSpore Lite model. The analysis is implemented using the C++ language. + +## Environment Preparation + +To use the TimeProfiler tool, you need to prepare the environment as follows: + +- Compilation: Install compilation dependencies and perform compilation. The code of the TimeProfiler tool is stored in the `mindspore/lite/tools/time_profiler` directory of the MindSpore source code. For details about the compilation operations, see the [Environment Requirements] (https://www.mindspore.cn/lite/docs/en/master/deploy.html#id2) and [Compilation Example] (https://www.mindspore.cn/lite/docs/en/master/deploy.html#id5) in the deployment document. + +- Run: Obtain the `time_profiler` tool and configure environment variables by referring to [Output Description](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id4) in the deployment document. + +## Parameter Description + +The command used for analyzing the time consumption of forward inference at the network layer based on the compiled TimeProfiler tool is as follows: + +```bash +./timeprofiler --modelPath= [--help] [--loopCount=] [--numThreads=] [--cpuBindMode=] [--inDataPath=] [--fp16Priority=] +``` + +The following describes the parameters in detail. + +| Parameter | Attribute | Function | Parameter Type | Default Value | Value Range | +| ----------------- | ---- | ------------------------------------------------------------ | ------ | -------- | ---------------------------------- | +| `--help` | Optional | Displays the help information about the `timeprofiler` command. | - | - | - | +| `--modelPath= ` | Mandatory | Specifies the file path of the MindSpore Lite model for time consumption analysis. | String | Null | - | +| `--loopCount=` | Optional | Specifies the number of times that model inference is executed when the TimeProfiler tool is used for time consumption analysis. The value is a positive integer. | Integer | 100 | - | +| `--numThreads=` | Optional | Specifies the number of threads for running the model inference program. | Integer | 4 | - | +| `--cpuBindMode=` | Optional | Specifies the type of the CPU core bound to the model inference program. | Integer | 1 | −1: medium core
1: large core
0: not bound | +| `--inDataPath=` | Optional | Specifies the file path of the input data of the specified model. If this parameter is not set, a random value will be used. | String | Null | - | +| `--fp16Priority=` | Optional | Specifies whether the float16 operator is preferred. | Bool | false | true, false | + +## Example + +Take the `tcpclassify.ms` model as an example and set the number of model inference cycles to 10. The command for using TimeProfiler to analyze the time consumption at the network layer is as follows: + +```bash +./timeprofiler --modelPath=./models/tcpclassify.ms --loopCount=10 +``` + +After this command is executed, the TimeProfiler tool outputs the statistics on the running time of the model at the network layer. In this example, the command output is as follows: The statistics are displayed by`opName` and `optype`. `opName` indicates the operator name, `optype` indicates the operator type, and `avg` indicates the average running time of the operator per single run, `percent` indicates the ratio of the operator running time to the total operator running time, `calledTimess` indicates the number of times that the operator is run, and `opTotalTime` indicates the total time that the operator is run for a specified number of times. Finally, `total time` and `kernel cost` show the average time consumed by a single inference operation of the model and the sum of the average time consumed by all operators in the model inference, respectively. + +``` +----------------------------------------------------------------------------------------- +opName avg(ms) percent calledTimess opTotalTime +conv2d_1/convolution 2.264800 0.824012 10 22.648003 +conv2d_2/convolution 0.223700 0.081390 10 2.237000 +dense_1/BiasAdd 0.007500 0.002729 10 0.075000 +dense_1/MatMul 0.126000 0.045843 10 1.260000 +dense_1/Relu 0.006900 0.002510 10 0.069000 +max_pooling2d_1/MaxPool 0.035100 0.012771 10 0.351000 +max_pooling2d_2/MaxPool 0.014300 0.005203 10 0.143000 +max_pooling2d_2/MaxPool_nchw2nhwc_reshape_1/Reshape_0 0.006500 0.002365 10 0.065000 +max_pooling2d_2/MaxPool_nchw2nhwc_reshape_1/Shape_0 0.010900 0.003966 10 0.109000 +output/BiasAdd 0.005300 0.001928 10 0.053000 +output/MatMul 0.011400 0.004148 10 0.114000 +output/Softmax 0.013300 0.004839 10 0.133000 +reshape_1/Reshape 0.000900 0.000327 10 0.009000 +reshape_1/Reshape/shape 0.009900 0.003602 10 0.099000 +reshape_1/Shape 0.002300 0.000837 10 0.023000 +reshape_1/strided_slice 0.009700 0.003529 10 0.097000 +----------------------------------------------------------------------------------------- +opType avg(ms) percent calledTimess opTotalTime +Activation 0.006900 0.002510 10 0.069000 +BiasAdd 0.012800 0.004657 20 0.128000 +Conv2D 2.488500 0.905401 20 24.885004 +MatMul 0.137400 0.049991 20 1.374000 +Nchw2Nhwc 0.017400 0.006331 20 0.174000 +Pooling 0.049400 0.017973 20 0.494000 +Reshape 0.000900 0.000327 10 0.009000 +Shape 0.002300 0.000837 10 0.023000 +SoftMax 0.013300 0.004839 10 0.133000 +Stack 0.009900 0.003602 10 0.099000 +StridedSlice 0.009700 0.003529 10 0.097000 + +total time : 2.90800 ms, kernel cost : 2.74851 ms + +----------------------------------------------------------------------------------------- +``` \ No newline at end of file diff --git a/lite/tutorials/source_en/use/tools.rst b/lite/tutorials/source_en/use/tools.rst deleted file mode 100644 index 722a938d6916890fc2e03545f926c6092aff85cd..0000000000000000000000000000000000000000 --- a/lite/tutorials/source_en/use/tools.rst +++ /dev/null @@ -1,8 +0,0 @@ -Other Tools -=========== - -.. toctree:: - :maxdepth: 1 - - benchmark_tool - timeprofiler_tool \ No newline at end of file diff --git a/lite/tutorials/source_zh_cn/deploy.md b/lite/tutorials/source_zh_cn/deploy.md index 2b6177026eba1ddf58b772aec5e4771b1e38dac5..28748fbca58dcd15b962c16bf294c8ad1ba80ab3 100644 --- a/lite/tutorials/source_zh_cn/deploy.md +++ b/lite/tutorials/source_zh_cn/deploy.md @@ -89,10 +89,11 @@ tar -xvf mindspore-lite-{version}-{function}-{OS}.tar.gz 以0.7.0-beta版本,CPU编译为例,不同包名下,`third party`与`lib`的内容不同: -- `mindspore-lite-0.7.0-converter-ubuntu`:包含`protobuf`(Protobuf的动态库)。 -- `mindspore-lite-0.7.0-runtime-x86-cpu`:`third party`包含`flatbuffers`(FlatBuffers头文件),`lib`包含`libmindspore-lite.so`(MindSpore Lite的动态库)。 -- `mindspore-lite-0.7.0-runtime-arm64-cpu`:`third party`包含`flatbuffers`(FlatBuffers头文件),`lib`包含`libmindspore-lite.so`(MindSpore Lite的动态库)和`liboptimize.so`。 -TODO:补全文件内容 +- `mindspore-lite-0.7.0-converter-ubuntu`:`third party`包含`protobuf`(Protobuf的动态库)。 +- `mindspore-lite-0.7.0-runtime-x86-cpu`:`third party`包含`flatbuffers`(FlatBuffers头文件),`lib`包含`libmindspore-lite.so`(MindSpore Lite推理框架的动态库)。 +- `mindspore-lite-0.7.0-runtime-arm64-cpu`:`third party`包含`flatbuffers`(FlatBuffers头文件),`lib`包含`libmindspore-lite.so`(MindSpore Lite推理框架的动态库)和`liboptimize.so`(MindSpore Lite算子性能优化库)。 + +> `liboptimize.so`仅在runtime-arm64的输出包中存在,仅在armv8.2和支持fp16特性的CPU上使用。 > 运行converter、benchmark或time_profiler目录下的工具前,都需配置环境变量,将MindSpore Lite和Protobuf的动态库所在的路径配置到系统搜索动态库的路径中。以0.7.0-beta版本为例:`export LD_LIBRARY_PATH=./mindspore-lite-0.7.0/lib:./mindspore-lite-0.7.0/third_party/protobuf/lib:${LD_LIBRARY_PATH}`。 diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.jpg b/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.jpg deleted file mode 100644 index ca1f0ff80d553333f78a89bf132bbeab7666043d..0000000000000000000000000000000000000000 Binary files a/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.jpg and /dev/null differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.png new file mode 100644 index 0000000000000000000000000000000000000000..6dd7a9027299144fb6264127ed207366de2ce347 Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.png differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png index 29e954a425c3b42e61353b97394d774f646cada7..c48cf581b33afbc15dbf27be495215b999e1be60 100644 Binary files a/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png and b/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_install.jpg b/lite/tutorials/source_zh_cn/images/lite_quick_start_install.jpg deleted file mode 100644 index c98ee71dae722be180a8b88c1661eabf85c97dce..0000000000000000000000000000000000000000 Binary files a/lite/tutorials/source_zh_cn/images/lite_quick_start_install.jpg and /dev/null differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_install.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_install.png new file mode 100644 index 0000000000000000000000000000000000000000..cc66708f0633c537e111d65a4b4e8a411a9322af Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_install.png differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png index 6f71294479c4cd91dd983136d7f13960227c3c57..ade37a61ef97a479401240215e302011c014824c 100644 Binary files a/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png and b/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png index faf694bd2e69ec1e4b33ddfe944612e8472b7600..1fcb8acabc9ba9d289efbe7e82ee5e2da8bfe073 100644 Binary files a/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png and b/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png differ diff --git a/lite/tutorials/source_zh_cn/quick_start/quick_start.md b/lite/tutorials/source_zh_cn/quick_start/quick_start.md index b3730d2ea44d6cecbce11b98df19aeba20a4d9ac..c0332b81056d6149675e9fbf6c5a32d1e00704e3 100644 --- a/lite/tutorials/source_zh_cn/quick_start/quick_start.md +++ b/lite/tutorials/source_zh_cn/quick_start/quick_start.md @@ -17,6 +17,8 @@ + + ## 概述 我们推荐你从端侧Android图像分类demo入手,了解MindSpore Lite应用工程的构建、依赖项配置以及相关API的使用。 @@ -79,16 +81,16 @@ MindSpore Model Zoo中图像分类模型可[在此下载](#TODO)。 3. 在Android设备上,点击“继续安装”,安装完即可查看到设备摄像头捕获的内容和推理结果。 - ![install](../images/lite_quick_start_install.jpg) + ![install](../images/lite_quick_start_install.png) 如下图所示,成功识别出图中内容是键盘和鼠标。 - ![result](../images/lite_quick_start_app_result.jpg) + ![result](../images/lite_quick_start_app_result.png) ## 示例程序详细说明 -本端侧图像分类Android示例程序分为JAVA层和JNI层,其中,JAVA层主要通过Android Camera 2 API实现摄像头获取图像帧,以及相应的图像处理等功能;JNI层在[Runtime](https://www.mindspore.cn/tutorial/zh-CN/master/use/lite_runtime.html)中完成模型推理的过程。 +本端侧图像分类Android示例程序分为JAVA层和JNI层,其中,JAVA层主要通过Android Camera 2 API实现摄像头获取图像帧,以及相应的图像处理等功能;JNI层在[Runtime](https://www.mindspore.cn/lite/tutorial/zh-CN/master/use/runtime.html)中完成模型推理的过程。 > 此处详细说明示例程序的JNI层实现,JAVA层运用Android Camera 2 API实现开启设备摄像头以及图像帧处理等功能,需读者具备一定的Android开发基础知识。 diff --git a/lite/tutorials/source_zh_cn/use/benchmark_tool.md b/lite/tutorials/source_zh_cn/use/benchmark_tool.md index 7c3df6300ab35b77ab3e0354590e2fc8ef25b3df..20c32881feeef915065bb279e9420a4d6cfcb1fc 100644 --- a/lite/tutorials/source_zh_cn/use/benchmark_tool.md +++ b/lite/tutorials/source_zh_cn/use/benchmark_tool.md @@ -68,13 +68,13 @@ Benchmark工具是一款可以对MindSpore Lite模型进行基准测试的工具 Benchmark工具进行的性能测试主要的测试指标为模型单次前向推理的耗时。在性能测试任务中,不需要设置`calibDataPath`等标杆数据参数。例如: ```bash -./benchmark --modelPath=./models/face/age/ml_face_age.ms +./benchmark --modelPath=./models/face_age.ms ``` 这条命令使用随机输入,其他参数使用默认值。该命令执行后会输出如下统计信息,该信息显示了测试模型在运行指定推理轮数后所统计出的单次推理最短耗时、单次推理最长耗时和平均推理耗时。 ``` -Model = ml_face_age.ms, numThreads = 2, MinRunTime = 72.228996 ms, MaxRuntime = 73.094002 ms, AvgRunTime = 72.556000 ms +Model = face_age.ms, numThreads = 2, MinRunTime = 72.228996 ms, MaxRuntime = 73.094002 ms, AvgRunTime = 72.556000 ms ``` ### 精度测试 @@ -82,7 +82,7 @@ Model = ml_face_age.ms, numThreads = 2, MinRunTime = 72.228996 ms, MaxRuntime = Benchmark工具进行的精度测试主要是通过设置标杆数据来对比验证MindSpore Lite模型输出的精确性。在精确度测试任务中,除了需要设置`modelPath`参数以外,还必须设置`calibDataPath`参数。例如: ```bash -./benchmark --modelPath=./models/face/age/ml_face_age.ms --inDataPath=./data/input/ml_face_age.ms.bin --device=NPU --accuracyThreshold=3 --calibDataPath=./data/output/face/ml_face_age.ms.out +./benchmark --modelPath=./models/face_age.ms --inDataPath=./input/face_age.bin --device=NPU --accuracyThreshold=3 --calibDataPath=./output/face_age.out ``` 这条命令指定了测试模型的输入数据、标杆数据,同时指定了模型推理程序在NPU上运行,并指定了准确度阈值为3%。该命令执行后会输出如下统计信息,该信息显示了测试模型的单条输入数据、输出节点的输出结果和平均偏差率以及所有节点的平均偏差率。 diff --git a/lite/tutorials/source_zh_cn/use/converter_tool.md b/lite/tutorials/source_zh_cn/use/converter_tool.md index d5402a61523a45e9fdfce61be2ddd73b4f1d157c..93b65513484d5550c804da8edec6a9d688c2b0ec 100644 --- a/lite/tutorials/source_zh_cn/use/converter_tool.md +++ b/lite/tutorials/source_zh_cn/use/converter_tool.md @@ -7,12 +7,10 @@ - [Linux环境使用说明](#linux环境使用说明) - [环境准备](#环境准备) - [参数说明](#参数说明) - - [模型可视化](#模型可视化) - [使用示例](#使用示例) - [Windows环境使用说明](#windows环境使用说明) - [环境准备](#环境准备-1) - [参数说明](#参数说明-1) - - [模型可视化](#模型可视化-1) - [使用示例](#使用示例-1) @@ -21,7 +19,7 @@ ## 概述 -MindSpore Lite提供离线转换模型功能的工具,支持多种类型的模型转换,同时提供转化后模型可视化的功能,转换后的模型可用于推理。命令行参数包含多种个性化选项,为用户提供方便的转换途径。 +MindSpore Lite提供离线转换模型功能的工具,支持多种类型的模型转换,转换后的模型可用于推理。命令行参数包含多种个性化选项,为用户提供方便的转换途径。 目前支持的输入格式有:MindSpore、TensorFlow Lite、Caffe和ONNX。 @@ -54,11 +52,6 @@ MindSpore Lite提供离线转换模型功能的工具,支持多种类型的模 > - 参数名和参数值之间用等号连接,中间不能有空格。 > - Caffe模型一般分为两个文件:`*.prototxt`模型结构,对应`--modelFile`参数;`*.caffemodel`模型权值,对应`--weightFile`参数。 -### 模型可视化 - -模型可视化工具提供了一种查验模型转换结果的方法。用户可使用Json命令生成`*.json`文件,与原模型相对比,确定转化效果。 - -TODO: 此功能还在开发中。 ### 使用示例 @@ -110,8 +103,7 @@ bash build.sh -I x86_64 ``` INFO [converter/converter.cc:190] Runconverter] CONVERTER RESULT: SUCCESS! ``` - -你可以选择使用模型打印工具,可视化查验上述转化后生成的MindSpore Lite模型。本部分功能开发中。 + ## Windows环境使用说明 @@ -127,9 +119,6 @@ bash build.sh -I x86_64 参考Linux环境模型转换工具的[参数说明](https://www.mindspore.cn/lite/docs/zh-CN/master/converter_tool.html#id4) -### 模型可视化 - -参考Linux环境模型转换工具的[模型可视化](https://www.mindspore.cn/lite/docs/zh-CN/master/converter_tool.html#id5) ### 使用示例 @@ -186,4 +175,3 @@ set MSLOG=INFO INFO [converter/converter.cc:190] Runconverter] CONVERTER RESULT: SUCCESS! ``` -你可以选择使用模型打印工具,可视化查验上述转化后生成的MindSpore Lite模型。本部分功能开发中。 diff --git a/lite/tutorials/source_zh_cn/use/post_training_quantization.md b/lite/tutorials/source_zh_cn/use/post_training_quantization.md index c09d9b933b64fea4b51663d1a141ab3ba93aa2f2..93ba0889434710d8f6143a0469b970b8e1feb2d7 100644 --- a/lite/tutorials/source_zh_cn/use/post_training_quantization.md +++ b/lite/tutorials/source_zh_cn/use/post_training_quantization.md @@ -32,9 +32,9 @@ | 参数名 | 属性 | 功能描述 | 参数类型 | 默认值 | 取值范围 | | -------- | ------- | ----- | ----- | ----- | ----- | | image_path | 必选 | 存放校准数据集的目录 | String | - | 该目录存放可直接用于执行推理的输入数据。由于目前框架还不支持数据预处理,所有数据必须事先完成所需的转换,使得它们满足推理的输入要求。 | -| batch_count | 可选 | 使用的输入数目 | Integer | 1000 | 大于0 | +| batch_count | 可选 | 使用的输入数目 | Integer | 100 | 大于0 | | method_x | 可选 | 网络层输入输出数据量化算法 | String | KL | KL,MAX_MIN。 KL: 基于[KL散度](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf)对数据范围作量化校准; MAX_MIN:基于最大值、最小值计算数据的量化参数。 在模型以及数据集比较较简单的情况下,推荐使用MAX_MIN | - +| thread_num | 可选 | 使用校准数据集执行推理流程时的线程数 | Integer | 1 | 大于0 | ## 使用示例 @@ -44,20 +44,20 @@ image_path=/dir/images batch_count=100 method_x=MAX_MIN + thread_num=1 ``` 校准数据集可以选择测试数据集的子集,要求`/dir/images`目录下存放的每个文件均是预处理好的输入数据,每个文件都可以直接用于推理的输入。 -3. 以TensorFlow Lite模型mnist.tflite为例,执行带训练后量化的模型转换命令: +3. 以MindSpore模型为例,执行带训练后量化的模型转换命令: ``` - ./converter_lite --fmk=TFLITE --modelFile=mnist.tflite --outputFile=mnist_quant --quantType=PostTraining --config_file=config.cfg + ./converter_lite --fmk=MS --modelFile=lenet.ms --outputFile=lenet_quant --quantType=PostTraining --config_file=config.cfg ``` -4. 上述命令执行成功后,便可得到量化后的模型mnist_quant.ms,通常量化后的模型大小会下降到FP32模型的1/4。 +4. 上述命令执行成功后,便可得到量化后的模型lenet_quant.ms,通常量化后的模型大小会下降到FP32模型的1/4。 ## 部分模型精度结果 | 模型 | 测试数据集 | method_x | FP32模型精度 | 训练后量化精度 | 说明 | | -------- | ------- | ----- | ----- | ----- | ----- | - | mnist.tflite | [MNIST](http://yann.lecun.com/exdb/mnist/) | MAX_MIN | 97.61% | 97.83% | 校准数据集选择MNIST Test数据集的前100张 | - | mobilenet_v1.tflite | [MNIST](http://yann.lecun.com/exdb/mnist/) | MAX_MIN | 98.36% | 98.40% | 校准数据集选择MNIST Test数据集的前100张 | - - + | [Inception_V3](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz) | [ImageNet](http://image-net.org/) | KL | 77.92% | 77.95% | 校准数据集随机选择ImageNet Validation数据集中的100张 | + | [Mobilenet_V1_1.0_224](https://torage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz) | [ImageNet](http://image-net.org/) | KL | 70.96% | 70.69% | 校准数据集随机选择ImageNet Validation数据集中的100张 | +> 以上所有结果均在x86环境上测得。 diff --git a/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md b/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md index 840d20f707e444135ce2b4e402baf4a4fe828b27..55baa3aee31fb0ab4aaa7596d96922d451105462 100644 --- a/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md +++ b/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md @@ -49,7 +49,7 @@ TimeProfiler工具可以对MindSpore Lite模型网络层的前向推理进行耗 使用TimeProfiler对`tcpclassify.ms`模型的网络层进行耗时分析,并且设置模型推理循环运行次数为10,则其命令代码如下: ```bash -./timeprofiler --modelPath=./models/siteAI/tcpclassify.ms --loopCount=10 +./timeprofiler --modelPath=./models/tcpclassify.ms --loopCount=10 ``` 该条命令执行后,TimeProfiler工具会输出模型网络层运行耗时的相关统计信息。对于本例命令,输出的统计信息如下。其中统计信息按照`opName`和`optype`两种划分方式分别显示,`opName`表示算子名,`optype`表示算子类别,`avg`表示该算子的平均单次运行时间,`percent`表示该算子运行耗时占所有算子运行总耗时的比例,`calledTimess`表示该算子的运行次数,`opTotalTime`表示该算子运行指定次数的总耗时。最后,`total time`和`kernel cost`分别显示了该模型单次推理的平均耗时和模型推理中所有算子的平均耗时之和。 diff --git a/resource/faq/FAQ_en.md b/resource/faq/FAQ_en.md index 681d68614b8840f572c53221c748d1fe4bb01157..30335b436d65f797ba361674801d8a513125e8bc 100644 --- a/resource/faq/FAQ_en.md +++ b/resource/faq/FAQ_en.md @@ -16,6 +16,8 @@ - [Supported Features](#supported-features) +This document has been transferred to a [new location](https://www.mindspore.cn/docs/en/master/FAQ.html). This page will be offline later。 + ## Installation diff --git a/resource/faq/FAQ_zh_cn.md b/resource/faq/FAQ_zh_cn.md index 9873b933d666d18b00026515f9cfd14a228f23f5..1e06e6c30f88b04ec5a368a69fa5f30c3e748e1f 100644 --- a/resource/faq/FAQ_zh_cn.md +++ b/resource/faq/FAQ_zh_cn.md @@ -16,6 +16,7 @@ - [特性支持](#特性支持) +此文档已经转移到[新的位置](https://www.mindspore.cn/docs/zh-CN/master/FAQ.html),此页面后续会下线。 ## 安装类 diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/images/compose.png b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/compose.png new file mode 100644 index 0000000000000000000000000000000000000000..32d2e39f26899212ccc078aaeef9dd12c693e989 Binary files /dev/null and b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/compose.png differ diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/images/data_enhancement_performance_scheme.png b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/data_enhancement_performance_scheme.png new file mode 100644 index 0000000000000000000000000000000000000000..bb80a6f71865357fb0f19aca128490b27b2ed21c Binary files /dev/null and b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/data_enhancement_performance_scheme.png differ diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/images/data_loading_performance_scheme.png b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/data_loading_performance_scheme.png new file mode 100644 index 0000000000000000000000000000000000000000..b9d3cb9f6877e7c97a6b5d3bfa5378421d22ca75 Binary files /dev/null and b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/data_loading_performance_scheme.png differ diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/images/operator_fusion.png b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/operator_fusion.png new file mode 100644 index 0000000000000000000000000000000000000000..892a3a61d2f75611577bc195f00eb5723adc0975 Binary files /dev/null and b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/operator_fusion.png differ diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/images/pipeline.png b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..01d453d75a479f31fb5355a55d1405b2d5d54dc3 Binary files /dev/null and b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/pipeline.png differ diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/images/shuffle_performance_scheme.png b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/shuffle_performance_scheme.png new file mode 100644 index 0000000000000000000000000000000000000000..bff68233948ffd5b2d731e3fb62270ff16d79e9a Binary files /dev/null and b/tutorials/notebook/optimize_the_performance_of_data_preparation/images/shuffle_performance_scheme.png differ diff --git a/tutorials/notebook/optimize_the_performance_of_data_preparation/optimize_the_performance_of_data_preparation.ipynb b/tutorials/notebook/optimize_the_performance_of_data_preparation/optimize_the_performance_of_data_preparation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..61449ff49c6a2d30066618b52e4c5305a59cefe9 --- /dev/null +++ b/tutorials/notebook/optimize_the_performance_of_data_preparation/optimize_the_performance_of_data_preparation.ipynb @@ -0,0 +1,773 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#
优化数据准备的性能" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 概述\n", + "\n", + "数据是整个深度学习中最重要的一环,因为数据的好坏决定了最终结果的上限,模型的好坏只是去无限逼近这个上限,所以高质量的数据输入,会在整个深度神经网络中起到积极作用,数据在整个数据处理和数据增强的过程像经过pipeline管道的水一样,源源不断地流向训练系统,如图所示:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![title](./images/pipeline.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MindSpore为用户提供了数据处理以及数据增强的功能,在数据的整个pipeline过程中,其中的每一步骤,如果都能够进行合理的运用,那么数据的性能会得到很大的优化和提升。本次体验将基于CIFAR-10数据集来为大家展示如何在数据加载、数据处理和数据增强的过程中进行性能的优化。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 整体流程\n", + "- 准备环节。\n", + "- 数据加载性能优化。\n", + "- shuffle性能优化。\n", + "- 数据增强性能优化。\n", + "- 性能优化方案总结。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 准备环节" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 导入模块" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`dataset`模块提供API用来加载和处理数据集。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import mindspore.dataset as ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`numpy`模块用于生成ndarray数组。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 下载所需数据集" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. 在jupyter工作目录下创建`./dataset/Cifar10Data`目录,本次体验所用的数据集存放在该目录下。\n", + "2. 在jupyter工作目录下创建`./transform`目录,本次体验转换生成的数据集存放在该目录下。\n", + "3. 下载[CIFAR-10二进制格式数据集](https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz),并将数据集文件解压到`./dataset/Cifar10Data/cifar-10-batches-bin`目录下,数据加载的时候使用该数据集。\n", + "4. 下载[CIFAR-10 Python文件格式数据集](https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz),并将数据集文件解压到`./dataset/Cifar10Data/cifar-10-batches-py`目录下,数据转换的时候使用该数据集。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "目录结构如下所示:\n", + "\n", + "\n", + " dataset/Cifar10Data\n", + " ├── cifar-10-batches-bin\n", + " │   ├── batches.meta.txt\n", + " │   ├── data_batch_1.bin\n", + " │   ├── data_batch_2.bin\n", + " │   ├── data_batch_3.bin\n", + " │   ├── data_batch_4.bin\n", + " │   ├── data_batch_5.bin\n", + " │   ├── readme.html\n", + " │   └── test_batch.bin\n", + " └── cifar-10-batches-py\n", + " ├── batches.meta\n", + " ├── data_batch_1\n", + " ├── data_batch_2\n", + " ├── data_batch_3\n", + " ├── data_batch_4\n", + " ├── data_batch_5\n", + " ├── readme.html\n", + " └── test_batch\n", + "\n", + "其中:\n", + "- `cifar-10-batches-bin`目录为CIFAR-10二进制格式数据集目录。\n", + "- `cifar-10-batches-py`目录为CIFAR-10 Python文件格式数据集目录。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据加载性能优化" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MindSpore为用户提供了多种数据加载方式,其中包括常用数据集加载、用户自定义数据集加载、MindSpore数据格式加载,详情内容请参考[加载数据集](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html)。对于数据集加载,底层实现方式的不同,会导致数据集加载的性能存在差异,如下所示:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| | 常用数据集 | 用户自定义 | MindRecord |\n", + "| :----: | :----: | :----: | :----: |\n", + "| 底层实现 | C++ | Python | C++ |\n", + "| 性能 | 高 | 中 | 高|" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 性能优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![title](./images/data_loading_performance_scheme.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "数据加载性能优化建议如下:\n", + "- 已经支持的数据集格式优选内置加载算子,具体内容请参考[内置加载算子](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.dataset.html),如果性能仍无法满足需求,则可采取多线程并发方案,请参考本文[多线程优化方案](#thread)。\n", + "- 不支持的数据集格式,优选转换为MindSpore数据格式后再使用`MindDataset`类进行加载,具体内容请参考[将数据集转换为MindSpore数据格式](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/converting_datasets.html),如果性能仍无法满足需求,则可采取多线程并发方案,请参考本文[多线程优化方案](#thread)。\n", + "- 不支持的数据集格式,算法快速验证场景,优选用户自定义`GeneratorDataset`类实现,如果性能仍无法满足需求,则可采取多进程并发方案,请参考本文[多进程优化方案](#process)。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 代码示例\n", + "\n", + "基于以上的数据加载性能优化建议,本次体验分别使用内置加载算子`Cifar10Dataset`类、数据转换后使用`MindDataset`类、使用`GeneratorDataset`类进行数据加载,代码演示如下:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. 使用内置算子`Cifar10Dataset`类加载CIFAR-10数据集,这里使用的是CIFAR-10二进制格式的数据集,加载数据时采取多线程优化方案,开启了4个线程并发完成任务,最后对数据创建了字典迭代器,并通过迭代器读取了一条数据记录。" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0005214214324951172\n", + "{'image': array([[[235, 235, 235],\n", + " [230, 230, 230],\n", + " [234, 234, 234],\n", + " ...,\n", + " [248, 248, 248],\n", + " [248, 248, 248],\n", + " [249, 249, 249]],\n", + "\n", + " [[216, 216, 216],\n", + " [213, 213, 213],\n", + " [215, 215, 215],\n", + " ...,\n", + " [254, 254, 254],\n", + " [253, 253, 253],\n", + " [253, 253, 253]],\n", + "\n", + " [[213, 213, 213],\n", + " [217, 217, 217],\n", + " [215, 215, 215],\n", + " ...,\n", + " [255, 255, 255],\n", + " [254, 254, 254],\n", + " [254, 254, 254]],\n", + "\n", + " ...,\n", + "\n", + " [[195, 195, 195],\n", + " [200, 200, 200],\n", + " [202, 202, 202],\n", + " ...,\n", + " [138, 138, 138],\n", + " [143, 143, 143],\n", + " [172, 171, 176]],\n", + "\n", + " [[205, 205, 205],\n", + " [205, 205, 205],\n", + " [211, 211, 211],\n", + " ...,\n", + " [112, 112, 112],\n", + " [130, 130, 132],\n", + " [167, 163, 184]],\n", + "\n", + " [[210, 210, 210],\n", + " [209, 209, 209],\n", + " [213, 213, 213],\n", + " ...,\n", + " [120, 120, 119],\n", + " [146, 146, 146],\n", + " [177, 174, 190]]], dtype=uint8), 'label': array(9, dtype=uint32)}\n" + ] + } + ], + "source": [ + "cifar10_path = \"./dataset/Cifar10Data/cifar-10-batches-bin/\"\n", + "\n", + "# create Cifar10Dataset for reading data\n", + "cifar10_dataset = ds.Cifar10Dataset(cifar10_path,num_parallel_workers=4)\n", + "# create a dictionary iterator and read a data record through the iterator\n", + "print(next(cifar10_dataset.create_dict_iterator()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. 使用`Cifar10ToMR`这个类将CIFAR-10数据集转换为MindSpore数据格式,这里使用的是CIFAR-10 python文件格式的数据集,然后使用`MindDataset`类加载MindSpore数据格式数据集,加载数据采取多线程优化方案,开启了4个线程并发完成任务,最后对数据创建了字典迭代器,并通过迭代器读取了一条数据记录。" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': array([255, 216, 255, ..., 63, 255, 217], dtype=uint8), 'id': array(30474, dtype=int64), 'label': array(2, dtype=int64)}\n" + ] + } + ], + "source": [ + "from mindspore.mindrecord import Cifar10ToMR\n", + "\n", + "cifar10_path = './dataset/Cifar10Data/cifar-10-batches-py/'\n", + "cifar10_mindrecord_path = './transform/cifar10.record'\n", + "\n", + "cifar10_transformer = Cifar10ToMR(cifar10_path,cifar10_mindrecord_path)\n", + "# executes transformation from Cifar10 to MindRecord\n", + "cifar10_transformer.transform(['label'])\n", + "\n", + "# create MindDataset for reading data\n", + "cifar10_mind_dataset = ds.MindDataset(dataset_file=cifar10_mindrecord_path,num_parallel_workers=4)\n", + "# create a dictionary iterator and read a data record through the iterator\n", + "print(next(cifar10_mind_dataset.create_dict_iterator()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. 使用`GeneratorDataset`类加载自定义数据集,并且采取多进程优化方案,开启了4个进程并发完成任务,最后对数据创建了字典迭代器,并通过迭代器读取了一条数据记录。" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': array([0], dtype=int64)}\n" + ] + } + ], + "source": [ + "def generator_func(num):\n", + " for i in range(num):\n", + " yield (np.array([i]),)\n", + "\n", + "# create GeneratorDataset for reading data\n", + "dataset = ds.GeneratorDataset(source=generator_func(5),column_names=[\"data\"],num_parallel_workers=4)\n", + "# create a dictionary iterator and read a data record through the iterator\n", + "print(next(dataset.create_dict_iterator()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## shuffle性能优化" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "shuffle操作主要是对有序的数据集或者进行过repeat的数据集进行混洗,MindSpore专门为用户提供了`shuffle`函数,其中设定的`buffer_size`参数越大,混洗程度越大,但时间、计算资源消耗也会大。该接口支持用户在整个pipeline的任何时候都可以对数据进行混洗,具体内容请参考[shuffle处理](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/data_processing_and_augmentation.html#shuffle)。但是因为底层的实现方式不同,该方式的性能不如直接在[内置加载算子](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.dataset.html)中设置`shuffle`参数直接对数据进行混洗。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 性能优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![title](./images/shuffle_performance_scheme.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "shuffle性能优化建议如下:\n", + "- 直接使用内置加载算子的`shuffle`参数进行数据的混洗。\n", + "- 如果使用的是`shuffle`函数,当性能仍无法满足需求,可通过调大`buffer_size`参数的值来优化提升性能。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 代码示例\n", + "\n", + "基于以上的shuffle性能优化建议,本次体验分别使用内置加载算子`Cifar10Dataset`类的`shuffle`参数和`Shuffle`函数进行数据的混洗,代码演示如下:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. 使用内置算子`Cifar10Dataset`类加载CIFAR-10数据集,这里使用的是CIFAR-10二进制格式的数据集,并且设置`shuffle`参数为True来进行数据混洗,最后对数据创建了字典迭代器,并通过迭代器读取了一条数据记录。" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'image': array([[[254, 254, 254],\n", + " [255, 255, 254],\n", + " [255, 255, 254],\n", + " ...,\n", + " [232, 234, 244],\n", + " [226, 230, 242],\n", + " [228, 232, 243]],\n", + "\n", + " [[251, 251, 251],\n", + " [253, 253, 254],\n", + " [255, 255, 255],\n", + " ...,\n", + " [225, 227, 235],\n", + " [227, 231, 241],\n", + " [229, 233, 243]],\n", + "\n", + " [[250, 250, 250],\n", + " [251, 251, 251],\n", + " [253, 253, 253],\n", + " ...,\n", + " [233, 235, 241],\n", + " [233, 236, 245],\n", + " [238, 242, 250]],\n", + "\n", + " ...,\n", + "\n", + " [[ 67, 64, 71],\n", + " [ 65, 62, 69],\n", + " [ 64, 61, 68],\n", + " ...,\n", + " [ 71, 67, 70],\n", + " [ 71, 68, 70],\n", + " [ 69, 65, 68]],\n", + "\n", + " [[ 62, 58, 64],\n", + " [ 59, 55, 61],\n", + " [ 61, 58, 64],\n", + " ...,\n", + " [ 64, 62, 64],\n", + " [ 61, 58, 59],\n", + " [ 62, 60, 61]],\n", + "\n", + " [[ 66, 60, 65],\n", + " [ 64, 59, 64],\n", + " [ 66, 60, 65],\n", + " ...,\n", + " [ 64, 61, 63],\n", + " [ 63, 58, 60],\n", + " [ 61, 56, 58]]], dtype=uint8), 'label': array(9, dtype=uint32)}\n" + ] + } + ], + "source": [ + "cifar10_path = \"./dataset/Cifar10Data/cifar-10-batches-bin/\"\n", + "\n", + "# create Cifar10Dataset for reading data\n", + "cifar10_dataset = ds.Cifar10Dataset(cifar10_path,shuffle=True)\n", + "# create a dictionary iterator and read a data record through the iterator\n", + "print(next(cifar10_dataset.create_dict_iterator()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. 使用`shuffle`函数进行数据混洗,参数`buffer_size`设置为3,数据采用`GeneratorDataset`类自定义生成。" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before shuffle:\n", + "[0 1 2 3 4]\n", + "[1 2 3 4 5]\n", + "[2 3 4 5 6]\n", + "[3 4 5 6 7]\n", + "[4 5 6 7 8]\n", + "after shuffle:\n", + "[2 3 4 5 6]\n", + "[0 1 2 3 4]\n", + "[4 5 6 7 8]\n", + "[1 2 3 4 5]\n", + "[3 4 5 6 7]\n" + ] + } + ], + "source": [ + "def generator_func():\n", + " for i in range(5):\n", + " yield (np.array([i,i+1,i+2,i+3,i+4]),)\n", + "\n", + "ds1 = ds.GeneratorDataset(source=generator_func,column_names=[\"data\"])\n", + "print(\"before shuffle:\")\n", + "for data in ds1.create_dict_iterator():\n", + " print(data[\"data\"])\n", + "\n", + "ds2 = ds1.shuffle(buffer_size=3)\n", + "print(\"after shuffle:\")\n", + "for data in ds2.create_dict_iterator():\n", + " print(data[\"data\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据增强性能优化" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在图片分类的训练中,尤其是当数据集比较小的时候,用户可以使用数据增强的方式来预处理图片,从而丰富数据集。MindSpore为用户提供了多种数据增强的方式,其中包括:\n", + "- 使用内置C算子(`c_transforms`模块)进行数据增强。\n", + "- 使用内置Python算子(`py_transforms`模块)进行数据增强。\n", + "- 用户可根据自己的需求,自定义Python函数进行数据增强。\n", + "\n", + "具体的内容请参考[数据增强](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/data_processing_and_augmentation.html#id3)。因为底层的实现方式不同,所以性能还是有一定的差异,如下所示:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| 模块 | 底层接口 | 说明 |\n", + "| :----: | :----: | :----: |\n", + "| c_transforms | C++(基于OpenCV)| 性能高 |\n", + "| py_transforms | Python(基于PIL) | 该模块提供了多种图像增强功能,并提供了PIL Image和Numpy数组之间的传输方法 |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 性能优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![title](./images/data_enhancement_performance_scheme.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "数据增强性能优化建议如下:\n", + "- 优先使用`c_transforms`模块进行数据增强,因为性能最高,如果性能仍无法满足需求,可采取[多线程优化方案](#thread)、[Compose优化方案](#compose)或者[算子融合优化方案](#fusion)。\n", + "- 如果使用了`py_transforms`模块进行数据增强,当性能仍无法满足需求,可采取[多线程优化方案](#thread)、[多进程优化方案](#process)、[Compose优化方案](#compose)或者[算子融合优化方案](#fusion)。\n", + "- `c_transforms`模块是在C++内维护buffer管理,`py_transforms`模块是在Python内维护buffer管理。因为Python和C++切换的性能成本,建议不要混用算子。\n", + "- 如果用户使用了自定义Python函数进行数据增强,当性能仍无法满足需求,可采取[多线程优化方案](#thread)或者[多进程优化方案](#process),如果还是无法提升性能,就需要对自定义的Python代码进行优化。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 代码示例\n", + "\n", + "基于以上的数据增强性能优化建议,本次体验分别使用`c_transforms`模块和自定义Python函数进行了数据增强,演示代码如下所示:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. 使用`c_transforms`模块进行数据增强,数据增强时采用多线程优化方案,开启了4个线程并发完成任务,并且采用了算子融合优化方案,使用`RandomResizedCrop`融合类替代`RandomResize`类和`RandomCrop`类。" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import mindspore.dataset.transforms.c_transforms as c_transforms\n", + "import mindspore.dataset.transforms.vision.c_transforms as C\n", + "import matplotlib.pyplot as plt\n", + "cifar10_path = \"./dataset/Cifar10Data/cifar-10-batches-bin/\"\n", + "\n", + "# create Cifar10Dataset for reading data\n", + "cifar10_dataset = ds.Cifar10Dataset(cifar10_path,num_parallel_workers=4)\n", + "transforms = C.RandomResizedCrop((800,800))\n", + "# apply the transform to the dataset through dataset.map()\n", + "cifar10_dataset = cifar10_dataset.map(input_columns=\"image\",operations=transforms,num_parallel_workers=4)\n", + "\n", + "data = next(cifar10_dataset.create_dict_iterator())\n", + "plt.imshow(data[\"image\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. 使用自定义Python函数进行数据增强,数据增强时采用多进程优化方案,开启了4个进程并发完成任务。" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before map:\n", + "[0 1 2 3 4]\n", + "[1 2 3 4 5]\n", + "[2 3 4 5 6]\n", + "[3 4 5 6 7]\n", + "[4 5 6 7 8]\n", + "after map:\n", + "[ 0 1 4 9 16]\n", + "[ 1 4 9 16 25]\n", + "[ 4 9 16 25 36]\n", + "[ 9 16 25 36 49]\n", + "[16 25 36 49 64]\n" + ] + } + ], + "source": [ + "def generator_func():\n", + " for i in range(5):\n", + " yield (np.array([i,i+1,i+2,i+3,i+4]),)\n", + "\n", + "ds3 = ds.GeneratorDataset(source=generator_func,column_names=[\"data\"])\n", + "print(\"before map:\")\n", + "for data in ds3.create_dict_iterator():\n", + " print(data[\"data\"])\n", + "\n", + "func = lambda x:x**2\n", + "ds4 = ds3.map(input_columns=\"data\",operations=func,python_multiprocessing=True,num_parallel_workers=4)\n", + "print(\"after map:\")\n", + "for data in ds4.create_dict_iterator():\n", + " print(data[\"data\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 性能优化方案总结" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 多线程优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在数据pipeline过程中,相关算子一般都有线程数设置参数,来提升处理并发度,提升性能,例如:\n", + "- 在数据加载的过程中,内置数据加载类有`num_parallel_workers`参数用来设置线程数。\n", + "- 在数据增强的过程中,`map`函数有`num_parallel_workers`参数用来设置线程数。\n", + "- 在Batch的过程中,`batch`函数有`num_parallel_workers`参数用来设置线程数。\n", + "\n", + "具体内容请参考[内置加载算子](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.dataset.html)。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 多进程优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "数据处理中Python实现的算子均支持多进程的模式,例如:\n", + "- `GeneratorDataset`这个类默认是多进程模式,它的`num_parallel_workers`参数表示的是开启的进程数,默认为1,具体内容请参考[GeneratorDataset](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.dataset.html#mindspore.dataset.GeneratorDataset)。\n", + "- 如果使用Python自定义函数或者`py_transforms`模块进行数据增强的时候,当`map`函数的参数`python_multiprocessing`设置为True时,此时参数`num_parallel_workers`表示的是进程数,参数`python_multiprocessing`默认为False,此时参数`num_parallel_workers`表示的是线程数,具体的内容请参考[内置加载算子](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.dataset.html)。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compose优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Map算子可以接收Tensor算子列表,并将按照顺序应用所有的这些算子,与为每个Tensor算子使用的Map算子相比,此类“胖Map算子”可以获得更好的性能,如图所示:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![title](./images/compose.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 算子融合优化方案" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "提供某些融合算子,这些算子将两个或多个算子的功能聚合到一个算子中。具体内容请参考[数据增强算子](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.dataset.transforms.vision.html),与它们各自组件的流水线相比,这种融合算子提供了更好的性能。如图所示:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![title](./images/operator_fusion.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tutorials/notebook/synchronization_training_and_evaluation.ipynb b/tutorials/notebook/synchronization_training_and_evaluation.ipynb index 486fd4cf2f762b193054db536c9576b5bdc5512f..236ae433c882d620ead10a0247dc321bab8122d3 100644 --- a/tutorials/notebook/synchronization_training_and_evaluation.ipynb +++ b/tutorials/notebook/synchronization_training_and_evaluation.ipynb @@ -230,22 +230,22 @@ "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", "from mindspore.train.callback import Callback\n", "\n", "class EvalCallBack(Callback):\n", - " def __init__(self, model, eval_dataset, eval_per_epoch):\n", + " def __init__(self, model, eval_dataset, eval_per_epoch, epoch_per_eval):\n", " self.model = model\n", " self.eval_dataset = eval_dataset\n", " self.eval_per_epoch = eval_per_epoch\n", + " self.epoch_per_eval = epoch_per_eval\n", " \n", " def epoch_end(self, run_context):\n", " cb_param = run_context.original_args()\n", " cur_epoch = cb_param.cur_epoch_num\n", " if cur_epoch % self.eval_per_epoch == 0:\n", - " acc = self.model.eval(self.eval_dataset,dataset_sink_mode = True)\n", - " epoch_per_eval[\"epoch\"].append(cur_epoch)\n", - " epoch_per_eval[\"acc\"].append(acc[\"Accuracy\"])\n", + " acc = self.model.eval(self.eval_dataset, dataset_sink_mode=True)\n", + " self.epoch_per_eval[\"epoch\"].append(cur_epoch)\n", + " self.epoch_per_eval[\"acc\"].append(acc[\"Accuracy\"])\n", " print(acc)\n" ] }, @@ -351,7 +351,6 @@ } ], "source": [ - "from mindspore.train.serialization import load_checkpoint, load_param_into_net\n", "from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor\n", "from mindspore.train import Model\n", "from mindspore import context\n", @@ -368,21 +367,21 @@ " repeat_size = 1\n", " network = LeNet5()\n", " \n", - " train_data = create_dataset(train_data_path,repeat_size = repeat_size)\n", - " eval_data = create_dataset(eval_data_path,repeat_size = repeat_size)\n", + " train_data = create_dataset(train_data_path, repeat_size=repeat_size)\n", + " eval_data = create_dataset(eval_data_path, repeat_size=repeat_size)\n", " \n", " # define the loss function\n", " net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')\n", " # define the optimizer\n", " net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.01, momentum=0.9)\n", " config_ck = CheckpointConfig(save_checkpoint_steps=eval_per_epoch*1875, keep_checkpoint_max=15)\n", - " ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_lenet\",directory=ckpt_save_dir, config=config_ck)\n", + " ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_lenet\", directory=ckpt_save_dir, config=config_ck)\n", " model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", " \n", - " epoch_per_eval = {\"epoch\":[],\"acc\":[]}\n", - " eval_cb = EvalCallBack(model,eval_data,eval_per_epoch)\n", + " epoch_per_eval = {\"epoch\": [], \"acc\": []}\n", + " eval_cb = EvalCallBack(model, eval_data, eval_per_epoch, epoch_per_eval)\n", " \n", - " model.train(epoch_size, train_data, callbacks=[ckpoint_cb, LossMonitor(375),eval_cb],\n", + " model.train(epoch_size, train_data, callbacks=[ckpoint_cb, LossMonitor(375), eval_cb],\n", " dataset_sink_mode=True)" ] }, @@ -441,11 +440,13 @@ } ], "source": [ + "import matplotlib.pyplot as plt\n", + "\n", "def eval_show(epoch_per_eval):\n", " plt.xlabel(\"epoch number\")\n", " plt.ylabel(\"Model accuracy\")\n", " plt.title(\"Model accuracy variation chart\")\n", - " plt.plot(epoch_per_eval[\"epoch\"],epoch_per_eval[\"acc\"],\"red\")\n", + " plt.plot(epoch_per_eval[\"epoch\"], epoch_per_eval[\"acc\"], \"red\")\n", " plt.show()\n", " \n", "eval_show(epoch_per_eval)" diff --git a/tutorials/source_en/advanced_use/customized_debugging_information.md b/tutorials/source_en/advanced_use/customized_debugging_information.md index 208991c20c35fb5c169882c72e99a59d1a5ae3c8..0e2b71e874e68fd5c967ba61584668a0af117958 100644 --- a/tutorials/source_en/advanced_use/customized_debugging_information.md +++ b/tutorials/source_en/advanced_use/customized_debugging_information.md @@ -31,7 +31,7 @@ For example, you can monitor the loss, save model parameters, dynamically adjust MindSpore provides the callback capabilities to allow users to insert customized operations in a specific phase of training or inference, including: -- Callback functions such as `ModelCheckpoint`, `LossMonitor`, and `SummaryStep` provided by the MindSpore framework +- Callback functions such as `ModelCheckpoint`, `LossMonitor`, and `SummaryCollector` provided by the MindSpore framework - Custom callback functions Usage: Transfer the callback object in the `model.train` method. The callback object can be a list, for example: @@ -39,13 +39,13 @@ Usage: Transfer the callback object in the `model.train` method. The callback ob ```python ckpt_cb = ModelCheckpoint() loss_cb = LossMonitor() -summary_cb = SummaryStep() +summary_cb = SummaryCollector(summary_dir='./summary_dir') model.train(epoch, dataset, callbacks=[ckpt_cb, loss_cb, summary_cb]) ``` `ModelCheckpoint` can save model parameters for retraining or inference. `LossMonitor` can output loss information in logs for users to view. In addition, `LossMonitor` monitors the loss value change during training. When the loss value is `Nan` or `Inf`, the training terminates. -SummaryStep can save the training information to a file for later use. +`SummaryCollector` can save the training information to files for later use. During the training process, the callback list will execute the callback function in the defined order. Therefore, in the definition process, the dependency between callbacks needs to be considered. ### Custom Callback diff --git a/tutorials/source_en/advanced_use/dashboard.md b/tutorials/source_en/advanced_use/dashboard.md index b41b8c70c90dc07af5b2d7e3b036b6b10fc1c5bb..6287ce87cd8322b7dddc18868112d280eb658662 100644 --- a/tutorials/source_en/advanced_use/dashboard.md +++ b/tutorials/source_en/advanced_use/dashboard.md @@ -1,6 +1,6 @@ # Dashboard -`Ascend` `GPU` `Model Optimization` `Intermediate` `Expert` +`Ascend` `GPU` `CPU` `Model Optimization` `Intermediate` `Expert` diff --git a/tutorials/source_en/advanced_use/images/data_chart.png b/tutorials/source_en/advanced_use/images/data_chart.png index 1c8d6995bc15ec28ecfca059b237a0d123dbde4f..f698c682119efc886b46a911d3c61f50ab017879 100644 Binary files a/tutorials/source_en/advanced_use/images/data_chart.png and b/tutorials/source_en/advanced_use/images/data_chart.png differ diff --git a/tutorials/source_en/advanced_use/images/data_function.png b/tutorials/source_en/advanced_use/images/data_function.png index 5af8030e1ad01f10e0c8b5636ceb5985fb5d8153..14dd75ba77452e938e75f46b65d49b6f593c543f 100644 Binary files a/tutorials/source_en/advanced_use/images/data_function.png and b/tutorials/source_en/advanced_use/images/data_function.png differ diff --git a/tutorials/source_en/advanced_use/images/data_label.png b/tutorials/source_en/advanced_use/images/data_label.png index 8d20a3d46e9f802fee99b5663b38d489fffd6d60..f76c645e26b28401285f00dd0613d27e3506982c 100644 Binary files a/tutorials/source_en/advanced_use/images/data_label.png and b/tutorials/source_en/advanced_use/images/data_label.png differ diff --git a/tutorials/source_en/advanced_use/images/data_op_profile.png b/tutorials/source_en/advanced_use/images/data_op_profile.png index b83408e92777181f6447ec20239fc92e28084a6a..6fc146a688c4670594cfc40e20e9180cfa4eacfc 100644 Binary files a/tutorials/source_en/advanced_use/images/data_op_profile.png and b/tutorials/source_en/advanced_use/images/data_op_profile.png differ diff --git a/tutorials/source_en/advanced_use/images/data_table.png b/tutorials/source_en/advanced_use/images/data_table.png index c3e44c634f72f8e4a89282bcb482f83d3b04da1c..65dcd39049b2754ef9ed22641981743f985e2b85 100644 Binary files a/tutorials/source_en/advanced_use/images/data_table.png and b/tutorials/source_en/advanced_use/images/data_table.png differ diff --git a/tutorials/source_en/advanced_use/images/gpu_activity_profiler.png b/tutorials/source_en/advanced_use/images/gpu_activity_profiler.png index 0269025e33b8e7365024b423bfc9d91e895de0ea..57b54ebee6ddaa2dc72bc9124d054a59e59f1b80 100644 Binary files a/tutorials/source_en/advanced_use/images/gpu_activity_profiler.png and b/tutorials/source_en/advanced_use/images/gpu_activity_profiler.png differ diff --git a/tutorials/source_en/advanced_use/images/gpu_op_ui_profiler.png b/tutorials/source_en/advanced_use/images/gpu_op_ui_profiler.png index 8567c50f8b29c1dcae5219ff085459e260242b36..515bff5e998dac83359a84ad1a5b0540663f9544 100644 Binary files a/tutorials/source_en/advanced_use/images/gpu_op_ui_profiler.png and b/tutorials/source_en/advanced_use/images/gpu_op_ui_profiler.png differ diff --git a/tutorials/source_en/advanced_use/images/graph.png b/tutorials/source_en/advanced_use/images/graph.png index 55ca7d7183c818a15b69a3a6ee2c4ef29655460c..1660bc677ad8870b0bdcdb3d64e0a569477d8209 100644 Binary files a/tutorials/source_en/advanced_use/images/graph.png and b/tutorials/source_en/advanced_use/images/graph.png differ diff --git a/tutorials/source_en/advanced_use/images/graph_sidebar.png b/tutorials/source_en/advanced_use/images/graph_sidebar.png index 90e8d868b5ff9d68ae14d55d8f3ff188db412556..72c98d6008931a50a8376d9d6b03e48e8f57ba5f 100644 Binary files a/tutorials/source_en/advanced_use/images/graph_sidebar.png and b/tutorials/source_en/advanced_use/images/graph_sidebar.png differ diff --git a/tutorials/source_en/advanced_use/images/histogram_func.png b/tutorials/source_en/advanced_use/images/histogram_func.png index c4e2c3c9dce7cde09f12141cf9cc19b1f59cebaf..84dfd7f82e667b45d80fc7cf28761b4177d5df80 100644 Binary files a/tutorials/source_en/advanced_use/images/histogram_func.png and b/tutorials/source_en/advanced_use/images/histogram_func.png differ diff --git a/tutorials/source_en/advanced_use/images/image_function.png b/tutorials/source_en/advanced_use/images/image_function.png index d51b8e226f3a13b9707e6bba9abfa4edef6eaaea..4a43b649c106e81b70a0a5bb824bc6563cd2a66b 100644 Binary files a/tutorials/source_en/advanced_use/images/image_function.png and b/tutorials/source_en/advanced_use/images/image_function.png differ diff --git a/tutorials/source_en/advanced_use/images/image_vi.png b/tutorials/source_en/advanced_use/images/image_vi.png index d15ece27f4566f7afbe02ee16b2e5f330b9f402f..1fe3ee2c28367d5fc5d7b322e49b3a731c91f620 100644 Binary files a/tutorials/source_en/advanced_use/images/image_vi.png and b/tutorials/source_en/advanced_use/images/image_vi.png differ diff --git a/tutorials/source_en/advanced_use/images/lineage_label.png b/tutorials/source_en/advanced_use/images/lineage_label.png index 93834d8cedcf41aa1da496598f5eff802274b980..56f6eb7dfd4cd39ce7c8ebf6fa5e2b0d61ea5871 100644 Binary files a/tutorials/source_en/advanced_use/images/lineage_label.png and b/tutorials/source_en/advanced_use/images/lineage_label.png differ diff --git a/tutorials/source_en/advanced_use/images/lineage_model_chart.png b/tutorials/source_en/advanced_use/images/lineage_model_chart.png index d0d0a9a30d0ff0f653b92886b9cafb5b3a12a1b2..32e307551e210a48cfbd5022fc2901e841dd9b8a 100644 Binary files a/tutorials/source_en/advanced_use/images/lineage_model_chart.png and b/tutorials/source_en/advanced_use/images/lineage_model_chart.png differ diff --git a/tutorials/source_en/advanced_use/images/lineage_model_table.png b/tutorials/source_en/advanced_use/images/lineage_model_table.png index 7fa384e1c6f6637a3530b3354a6d3b266ff5d319..923b3ee95c08f2a32437988aae99c1aba6d191ef 100644 Binary files a/tutorials/source_en/advanced_use/images/lineage_model_table.png and b/tutorials/source_en/advanced_use/images/lineage_model_table.png differ diff --git a/tutorials/source_en/advanced_use/images/minddata_profile.png b/tutorials/source_en/advanced_use/images/minddata_profile.png index 79dfad25e6828769a2efc697bb7b02a171dbbdd0..a5698394aa7e68fbe1592f1ab19555e7820589fa 100644 Binary files a/tutorials/source_en/advanced_use/images/minddata_profile.png and b/tutorials/source_en/advanced_use/images/minddata_profile.png differ diff --git a/tutorials/source_en/advanced_use/images/multi_scalars_select.png b/tutorials/source_en/advanced_use/images/multi_scalars_select.png index 80bbc7822bf6f86ed5f1ad4f24ffc8039b655c56..7153bd3002aad05fc68e4a879aa07f021af70e0a 100644 Binary files a/tutorials/source_en/advanced_use/images/multi_scalars_select.png and b/tutorials/source_en/advanced_use/images/multi_scalars_select.png differ diff --git a/tutorials/source_en/advanced_use/images/op_statistics.PNG b/tutorials/source_en/advanced_use/images/op_statistics.PNG index 05a146e1ffd5f732ad0fb8c80bd9abe81fb65ab4..ac22f98dac493a5221481b9029e7539a95b29d85 100644 Binary files a/tutorials/source_en/advanced_use/images/op_statistics.PNG and b/tutorials/source_en/advanced_use/images/op_statistics.PNG differ diff --git a/tutorials/source_en/advanced_use/images/op_type_statistics.PNG b/tutorials/source_en/advanced_use/images/op_type_statistics.PNG index 6d18ccaa0f393938c8f89ca7c20e21e5ff496b4a..92cf3c96eca35ddf7ddc76430c24884526dbaafa 100644 Binary files a/tutorials/source_en/advanced_use/images/op_type_statistics.PNG and b/tutorials/source_en/advanced_use/images/op_type_statistics.PNG differ diff --git a/tutorials/source_en/advanced_use/images/performance_overall.png b/tutorials/source_en/advanced_use/images/performance_overall.png index 923c87df07361e35ae7429b0da2736edd6a2880c..67d1dc36e9c2867071825663a295eab842ce8294 100644 Binary files a/tutorials/source_en/advanced_use/images/performance_overall.png and b/tutorials/source_en/advanced_use/images/performance_overall.png differ diff --git a/tutorials/source_en/advanced_use/images/resources_cpu.png b/tutorials/source_en/advanced_use/images/resources_cpu.png index cd62ad294855f6ee11d1503aaa12c565dbc1c312..4bc7a3b935924f70f411084de787d34d7561b52d 100644 Binary files a/tutorials/source_en/advanced_use/images/resources_cpu.png and b/tutorials/source_en/advanced_use/images/resources_cpu.png differ diff --git a/tutorials/source_en/advanced_use/images/resources_mem.png b/tutorials/source_en/advanced_use/images/resources_mem.png index a222700035f14c08f1979cec7914a976a3633070..8da0662d114089dd3955b044d73377ea43a0c826 100644 Binary files a/tutorials/source_en/advanced_use/images/resources_mem.png and b/tutorials/source_en/advanced_use/images/resources_mem.png differ diff --git a/tutorials/source_en/advanced_use/images/resources_npu.png b/tutorials/source_en/advanced_use/images/resources_npu.png index 51cc63a7c5d1272d226e19c014b0974302f06d12..69440764b00af75f5cc0da8c61fd1a17c5bbcbdd 100644 Binary files a/tutorials/source_en/advanced_use/images/resources_npu.png and b/tutorials/source_en/advanced_use/images/resources_npu.png differ diff --git a/tutorials/source_en/advanced_use/images/scalar.png b/tutorials/source_en/advanced_use/images/scalar.png index f783fec6ccdf67a53a58b4cd1355d75d0cb03879..15a2a4889288b5153fd26c28f6d7a12b5eef4f98 100644 Binary files a/tutorials/source_en/advanced_use/images/scalar.png and b/tutorials/source_en/advanced_use/images/scalar.png differ diff --git a/tutorials/source_en/advanced_use/images/scalar_compound.png b/tutorials/source_en/advanced_use/images/scalar_compound.png index 8813a59f7551f7ab239e6103e1e9ef14ec4e2add..c248af843f3e850eda275d33eaadcfaba4304840 100644 Binary files a/tutorials/source_en/advanced_use/images/scalar_compound.png and b/tutorials/source_en/advanced_use/images/scalar_compound.png differ diff --git a/tutorials/source_en/advanced_use/images/scalar_select.png b/tutorials/source_en/advanced_use/images/scalar_select.png index 11c06aecf6b012f6033414ce5beb2d4600bc3a91..056797d9da760ad9878c86e09732eac6c6bac303 100644 Binary files a/tutorials/source_en/advanced_use/images/scalar_select.png and b/tutorials/source_en/advanced_use/images/scalar_select.png differ diff --git a/tutorials/source_en/advanced_use/images/step_trace.png b/tutorials/source_en/advanced_use/images/step_trace.png index cd82cd8ade3a577c7578cb804fe9967c9c27e541..6eac06f3d1ffc34c176c6a52d979e5e135571507 100644 Binary files a/tutorials/source_en/advanced_use/images/step_trace.png and b/tutorials/source_en/advanced_use/images/step_trace.png differ diff --git a/tutorials/source_en/advanced_use/images/tensor_function.png b/tutorials/source_en/advanced_use/images/tensor_function.png index b6c5e8aba5b098590c168b6b5acb4c698a0f6922..43dbda65cbe55a6e7e3388808087469f11186dde 100644 Binary files a/tutorials/source_en/advanced_use/images/tensor_function.png and b/tutorials/source_en/advanced_use/images/tensor_function.png differ diff --git a/tutorials/source_en/advanced_use/images/tensor_histogram.png b/tutorials/source_en/advanced_use/images/tensor_histogram.png index 4d3ca16b63261eca5e8318cb47ec4050539eca51..967a452efde4efc9f464782244f4e790417b7122 100644 Binary files a/tutorials/source_en/advanced_use/images/tensor_histogram.png and b/tutorials/source_en/advanced_use/images/tensor_histogram.png differ diff --git a/tutorials/source_en/advanced_use/images/tensor_table.png b/tutorials/source_en/advanced_use/images/tensor_table.png index 725bd9f8481826d682b593c2224a766854e9b4f8..70d1949ebf3fd4d2614ed8dd87346cfb454a7123 100644 Binary files a/tutorials/source_en/advanced_use/images/tensor_table.png and b/tutorials/source_en/advanced_use/images/tensor_table.png differ diff --git a/tutorials/source_en/advanced_use/lineage_and_scalars_comparision.md b/tutorials/source_en/advanced_use/lineage_and_scalars_comparision.md index 1e79792b6cf3659c3c04e34f6e497cb6f3a9a175..c6a1274665f22082e9cf3ea220caf4389bc18bc9 100644 --- a/tutorials/source_en/advanced_use/lineage_and_scalars_comparision.md +++ b/tutorials/source_en/advanced_use/lineage_and_scalars_comparision.md @@ -1,6 +1,6 @@ # Lineage and Scalars Comparision -`Ascend` `GPU` `Model Optimization` `Intermediate` `Expert` +`Ascend` `GPU` `CPU` `Model Optimization` `Intermediate` `Expert` diff --git a/tutorials/source_en/advanced_use/mindinsight_commands.md b/tutorials/source_en/advanced_use/mindinsight_commands.md index 42b9af599466ce92bd3515c711f7eba24d8b92c0..c65d3d3aea20eb8ea5def821f1d4c8319126c0e3 100644 --- a/tutorials/source_en/advanced_use/mindinsight_commands.md +++ b/tutorials/source_en/advanced_use/mindinsight_commands.md @@ -1,6 +1,6 @@ # MindInsight Commands -`Ascend` `GPU` `Model Optimization` `Intermediate` `Expert` +`Ascend` `GPU` `CPU` `Model Optimization` `Intermediate` `Expert` diff --git a/tutorials/source_en/advanced_use/serving.md b/tutorials/source_en/advanced_use/serving.md index c3fa19ab5384b6856c4909723c6d2cfec59aead8..a726e427cbdc9febb4b52c3892aa7770e87666de 100644 --- a/tutorials/source_en/advanced_use/serving.md +++ b/tutorials/source_en/advanced_use/serving.md @@ -13,22 +13,21 @@ - [Client Samples](#client-samples) - [Python Client Sample](#python-client-sample) - [C++ Client Sample](#cpp-client-sample) + - [REST API Client Sample](#rest-api-client-sample) - ## Overview MindSpore Serving is a lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment. After completing model training using MindSpore, you can export the MindSpore model and use MindSpore Serving to create an inference service for the model. Currently, only Ascend 910 is supported. - ## Starting Serving After MindSpore is installed using `pip`, the Serving executable program is stored in `/{your python path}/lib/python3.7/site-packages/mindspore/ms_serving`. Run the following command to start Serving: -```bash -ms_serving [--help] [--model_path ] [--model_name ] - [--port ] [--device_id ] +```bash +ms_serving [--help] [--model_path=] [--model_name=] [--port=] + [--rest_api_port=] [--device_id=] ``` Parameters are described as follows: @@ -37,15 +36,19 @@ Parameters are described as follows: |`--help`|Optional|Displays the help information about the startup command. |-|-|-| |`--model_path=`|Mandatory|Path for storing the model to be loaded. |String|Null|-| |`--model_name=`|Mandatory|Name of the model file to be loaded. |String|Null|-| -|`--=port `|Optional|Specifies the external Serving port number. |Integer|5500|1–65535| +|`--port=`|Optional|Specifies the external Serving gRPC port number. |Integer|5500|1–65535| +|`--rest_api_port=`|Specifies the external Serving REST API port number. |Integer|5500|1–65535| |`--device_id=`|Optional|Specifies device ID to be used.|Integer|0|0 to 7| > Before running the startup command, add the path `/{your python path}/lib:/{your python path}/lib/python3.7/site-packages/mindspore/lib` to the environment variable `LD_LIBRARY_PATH`. + > port and rest_ api_port cannot be the same. ## Application Example The following uses a simple network as an example to describe how to use MindSpore Serving. ### Exporting Model + > Before exporting the model, you need to configure MindSpore [base environment](https://www.mindspore.cn/install/en). + Use [add_model.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/export_model/add_model.py) to build a network with only the Add operator and export the MindSpore inference deployment model. ```python @@ -115,7 +118,7 @@ The client code consists of the following parts: explicit MSClient(std::shared_ptr channel) : stub_(MSService::NewStub(channel)) {} private: std::unique_ptr stub_; - };MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials())); + }; MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials())); @@ -151,3 +154,27 @@ The client code consists of the following parts: ``` For details about the complete code, see [ms_client](https://gitee.com/mindspore/mindspore/blob/master/serving/example/cpp_client/ms_client.cc). + +### REST API Client Sample +1. Send data in the form of `data`: + `data` field: flatten each input data of network model into one-dimensional data. Suppose the network model has n inputs, and the final data structure is a two-dimensional list of 1 * n. + As in this example, flatten the model input data `[[1.0, 2.0], [3.0, 4.0]]` and `[[1.0, 2.0], [3.0, 4.0]]` to form `[[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]`. + ``` + curl -X POST -d '{"data": [[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]}' http://127.0.0.1:5501 + ``` + The following return values are displayed, indicating that the serving service has correctly executed the reasoning of the add network, and the output data structure is similar to that of the input: + ``` + {"data":[[2.0,4.0,6.0,8.0]]} + ``` + +2. Send data in the form of `tensor`: + `tensor` field: composed of each input of the network model, keeping the original shape of input. + As in this example, the model input data `[[1.0, 2.0], [3.0, 4.0]]` and `[[1.0, 2.0], [3.0, 4.0]]` are combined into `[[[1.0, 2.0], [3.0, 4.0]], [[1.0, 2.0], [3.0, 4.0]]]`. + ``` + curl -X POST -d '{"tensor": [[[1.0, 2.0], [3.0, 4.0]], [[1.0, 2.0], [3.0, 4.0]]]}' http://127.0.0.1:5501 + ``` + The following return values are displayed, indicating that the serving service has correctly executed the reasoning of the add network, and the output data structure is similar to that of the input: + ``` + {"tensor":[[2.0,4.0], [6.0,8.0]]} + ``` + > REST APICurrently only int32 and fp32 are supported as inputs. \ No newline at end of file diff --git a/tutorials/source_en/advanced_use/summary_record.md b/tutorials/source_en/advanced_use/summary_record.md index cebe6c393def9b71faaa6e6966a9f70ee1b202f0..6ce5581ee586f4fc37a60efc981ec5b2f202d2d6 100644 --- a/tutorials/source_en/advanced_use/summary_record.md +++ b/tutorials/source_en/advanced_use/summary_record.md @@ -1,6 +1,6 @@ # Summary Record -`Ascend` `GPU` `Model Optimization` `Intermediate` `Expert` +`Ascend` `GPU` `CPU` `Model Optimization` `Intermediate` `Expert` diff --git a/tutorials/source_zh_cn/advanced_use/customized_debugging_information.md b/tutorials/source_zh_cn/advanced_use/customized_debugging_information.md index d9708232b94726eb3f2af64d2e7abcecc63fac2f..b4d6da9a82d15848681dd01e67121b43be37afe6 100644 --- a/tutorials/source_zh_cn/advanced_use/customized_debugging_information.md +++ b/tutorials/source_zh_cn/advanced_use/customized_debugging_information.md @@ -33,7 +33,7 @@ Callback是回调函数的意思,但它其实不是一个函数而是一个类 MindSpore提供Callback能力,支持用户在训练/推理的特定阶段,插入自定义的操作。包括: -- MindSpore框架提供的`ModelCheckpoint`、`LossMonitor`、`SummaryStep`等Callback函数。 +- MindSpore框架提供的`ModelCheckpoint`、`LossMonitor`、`SummaryCollector`等Callback函数。 - MindSpore支持用户自定义Callback。 使用方法:在`model.train`方法中传入Callback对象,它可以是一个Callback列表,例: @@ -41,13 +41,13 @@ MindSpore提供Callback能力,支持用户在训练/推理的特定阶段, ```python ckpt_cb = ModelCheckpoint() loss_cb = LossMonitor() -summary_cb = SummaryStep() +summary_cb = SummaryCollector(summary_dir='./summary_dir') model.train(epoch, dataset, callbacks=[ckpt_cb, loss_cb, summary_cb]) ``` `ModelCheckpoint`可以保存模型参数,以便进行再训练或推理。 `LossMonitor`可以在日志中输出loss,方便用户查看,同时它还会监控训练过程中的loss值变化情况,当loss值为`Nan`或`Inf`时终止训练。 -SummaryStep可以把训练过程中的信息存储到文件中,以便后续进行查看或可视化展示。 +`SummaryCollector` 可以把训练过程中的信息存储到文件中,以便后续进行查看或可视化展示。 在训练过程中,Callback列表会按照定义的顺序执行Callback函数。因此在定义过程中,需考虑Callback之间的依赖关系。 ### 自定义Callback diff --git a/tutorials/source_zh_cn/advanced_use/dashboard.md b/tutorials/source_zh_cn/advanced_use/dashboard.md index 9f79c106488bff9e67850f62307803e3d84f09b0..817e994f35e0c3afe1e4f05bd34a0eee9254b8f2 100644 --- a/tutorials/source_zh_cn/advanced_use/dashboard.md +++ b/tutorials/source_zh_cn/advanced_use/dashboard.md @@ -1,6 +1,6 @@ # 训练看板 -`Ascend` `GPU` `模型调优` `中级` `高级` +`Ascend` `GPU` `CPU` `模型调优` `中级` `高级` diff --git a/tutorials/source_zh_cn/advanced_use/images/data_chart.png b/tutorials/source_zh_cn/advanced_use/images/data_chart.png index 1c8d6995bc15ec28ecfca059b237a0d123dbde4f..f698c682119efc886b46a911d3c61f50ab017879 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/data_chart.png and b/tutorials/source_zh_cn/advanced_use/images/data_chart.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/data_function.png b/tutorials/source_zh_cn/advanced_use/images/data_function.png index 5af8030e1ad01f10e0c8b5636ceb5985fb5d8153..ac5978dee9272bc492bf04f9362c3bed20baf96f 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/data_function.png and b/tutorials/source_zh_cn/advanced_use/images/data_function.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/data_label.png b/tutorials/source_zh_cn/advanced_use/images/data_label.png index 8d20a3d46e9f802fee99b5663b38d489fffd6d60..c761a9008c5b814da1913c84d2b113174d3f1947 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/data_label.png and b/tutorials/source_zh_cn/advanced_use/images/data_label.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/data_op_profile.png b/tutorials/source_zh_cn/advanced_use/images/data_op_profile.png index b83408e92777181f6447ec20239fc92e28084a6a..6c657998a745deecb229298fce02108d831e1aa9 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/data_op_profile.png and b/tutorials/source_zh_cn/advanced_use/images/data_op_profile.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/data_table.png b/tutorials/source_zh_cn/advanced_use/images/data_table.png index c3e44c634f72f8e4a89282bcb482f83d3b04da1c..e368080648f1da89696efdd3fe280a371d5909c4 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/data_table.png and b/tutorials/source_zh_cn/advanced_use/images/data_table.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/gpu_activity_profiler.png b/tutorials/source_zh_cn/advanced_use/images/gpu_activity_profiler.png index 0269025e33b8e7365024b423bfc9d91e895de0ea..053fa9687af1c91d248149f770c3faf8c8acc10b 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/gpu_activity_profiler.png and b/tutorials/source_zh_cn/advanced_use/images/gpu_activity_profiler.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/gpu_op_ui_profiler.png b/tutorials/source_zh_cn/advanced_use/images/gpu_op_ui_profiler.png index d1ee2c6b0f6d1d59d33550496083b27bc58aacde..28a2962688445f4cc8404df76f8f1aad5dc66ef6 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/gpu_op_ui_profiler.png and b/tutorials/source_zh_cn/advanced_use/images/gpu_op_ui_profiler.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/graph.png b/tutorials/source_zh_cn/advanced_use/images/graph.png index 55ca7d7183c818a15b69a3a6ee2c4ef29655460c..1660bc677ad8870b0bdcdb3d64e0a569477d8209 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/graph.png and b/tutorials/source_zh_cn/advanced_use/images/graph.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/graph_sidebar.png b/tutorials/source_zh_cn/advanced_use/images/graph_sidebar.png index ea9515857e23d9a55ad56a88a4a21d232734ffb5..b0c3b177dac43d3f105a36dd85245ad4a873569d 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/graph_sidebar.png and b/tutorials/source_zh_cn/advanced_use/images/graph_sidebar.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/histogram_func.png b/tutorials/source_zh_cn/advanced_use/images/histogram_func.png index c4e2c3c9dce7cde09f12141cf9cc19b1f59cebaf..5e30875d0efdab22a326207b4d4c65c8867fefeb 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/histogram_func.png and b/tutorials/source_zh_cn/advanced_use/images/histogram_func.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/image_function.png b/tutorials/source_zh_cn/advanced_use/images/image_function.png index d51b8e226f3a13b9707e6bba9abfa4edef6eaaea..214e6b3927a1098456cabc6b70083b6365c85298 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/image_function.png and b/tutorials/source_zh_cn/advanced_use/images/image_function.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/image_vi.png b/tutorials/source_zh_cn/advanced_use/images/image_vi.png index d15ece27f4566f7afbe02ee16b2e5f330b9f402f..d1924d71c670e02f22eb878a8c3794bde630f178 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/image_vi.png and b/tutorials/source_zh_cn/advanced_use/images/image_vi.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/lineage_label.png b/tutorials/source_zh_cn/advanced_use/images/lineage_label.png index 93834d8cedcf41aa1da496598f5eff802274b980..eabd2dae20664cda83cc46d3d958a07e941a03f6 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/lineage_label.png and b/tutorials/source_zh_cn/advanced_use/images/lineage_label.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/lineage_model_chart.png b/tutorials/source_zh_cn/advanced_use/images/lineage_model_chart.png index d0d0a9a30d0ff0f653b92886b9cafb5b3a12a1b2..3c31840c8c2c89e849e71314b87ada0ba019eb44 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/lineage_model_chart.png and b/tutorials/source_zh_cn/advanced_use/images/lineage_model_chart.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/lineage_model_table.png b/tutorials/source_zh_cn/advanced_use/images/lineage_model_table.png index 7fa384e1c6f6637a3530b3354a6d3b266ff5d319..4103eee6ee25a9aa602addc616b7d200f082bbca 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/lineage_model_table.png and b/tutorials/source_zh_cn/advanced_use/images/lineage_model_table.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/minddata_profile.png b/tutorials/source_zh_cn/advanced_use/images/minddata_profile.png index 79dfad25e6828769a2efc697bb7b02a171dbbdd0..9cf3a923f33c0bedc188f425d72b845a4c730dbf 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/minddata_profile.png and b/tutorials/source_zh_cn/advanced_use/images/minddata_profile.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/multi_scalars_select.png b/tutorials/source_zh_cn/advanced_use/images/multi_scalars_select.png index 80bbc7822bf6f86ed5f1ad4f24ffc8039b655c56..104eae240dbd7518adc85ff2cb265c22dd6cb39c 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/multi_scalars_select.png and b/tutorials/source_zh_cn/advanced_use/images/multi_scalars_select.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/op_statistics.PNG b/tutorials/source_zh_cn/advanced_use/images/op_statistics.PNG index 05a146e1ffd5f732ad0fb8c80bd9abe81fb65ab4..fb9c9da03ed16976877539b9a75f0591463a1dc3 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/op_statistics.PNG and b/tutorials/source_zh_cn/advanced_use/images/op_statistics.PNG differ diff --git a/tutorials/source_zh_cn/advanced_use/images/op_type_statistics.PNG b/tutorials/source_zh_cn/advanced_use/images/op_type_statistics.PNG index 6d18ccaa0f393938c8f89ca7c20e21e5ff496b4a..c4aea613f27f0bcda34e0b1ae1cf19a3c7b71f75 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/op_type_statistics.PNG and b/tutorials/source_zh_cn/advanced_use/images/op_type_statistics.PNG differ diff --git a/tutorials/source_zh_cn/advanced_use/images/performance_overall.png b/tutorials/source_zh_cn/advanced_use/images/performance_overall.png index 923c87df07361e35ae7429b0da2736edd6a2880c..e6846a725cff0e61a0beb92e93502312eee8483c 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/performance_overall.png and b/tutorials/source_zh_cn/advanced_use/images/performance_overall.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/resources_cpu.png b/tutorials/source_zh_cn/advanced_use/images/resources_cpu.png index cd62ad294855f6ee11d1503aaa12c565dbc1c312..a679d20b7199b40ab4dd57e7099d79a652e6344f 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/resources_cpu.png and b/tutorials/source_zh_cn/advanced_use/images/resources_cpu.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/resources_mem.png b/tutorials/source_zh_cn/advanced_use/images/resources_mem.png index eb4af42b58d9f2331b2a517a03af73165a3172ed..b0cc79ef731dcf867d2c31b15208df69a96b5253 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/resources_mem.png and b/tutorials/source_zh_cn/advanced_use/images/resources_mem.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/resources_npu.png b/tutorials/source_zh_cn/advanced_use/images/resources_npu.png index 6fc8635e0c6940f1d2660332031d704cc205b7c3..9ad6cc876b0fde63458c364d409bcd43567fbefc 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/resources_npu.png and b/tutorials/source_zh_cn/advanced_use/images/resources_npu.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/scalar.png b/tutorials/source_zh_cn/advanced_use/images/scalar.png index f783fec6ccdf67a53a58b4cd1355d75d0cb03879..2e3482e03523cc21c7a9873feaf207d333397c95 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/scalar.png and b/tutorials/source_zh_cn/advanced_use/images/scalar.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/scalar_select.png b/tutorials/source_zh_cn/advanced_use/images/scalar_select.png index 11c06aecf6b012f6033414ce5beb2d4600bc3a91..a74f1f651338718a4e8f5ba171c47069e569ee2f 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/scalar_select.png and b/tutorials/source_zh_cn/advanced_use/images/scalar_select.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/step_trace.png b/tutorials/source_zh_cn/advanced_use/images/step_trace.png index cd82cd8ade3a577c7578cb804fe9967c9c27e541..1feace7a12db61c4da2b04b149715239dbe8db60 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/step_trace.png and b/tutorials/source_zh_cn/advanced_use/images/step_trace.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/tensor_function.png b/tutorials/source_zh_cn/advanced_use/images/tensor_function.png index b6c5e8aba5b098590c168b6b5acb4c698a0f6922..ab0ad58219181c782c65c396577d2b030b6a8d19 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/tensor_function.png and b/tutorials/source_zh_cn/advanced_use/images/tensor_function.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/tensor_histogram.png b/tutorials/source_zh_cn/advanced_use/images/tensor_histogram.png index 4d3ca16b63261eca5e8318cb47ec4050539eca51..967a452efde4efc9f464782244f4e790417b7122 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/tensor_histogram.png and b/tutorials/source_zh_cn/advanced_use/images/tensor_histogram.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/tensor_table.png b/tutorials/source_zh_cn/advanced_use/images/tensor_table.png index 725bd9f8481826d682b593c2224a766854e9b4f8..d4978b786c691140b3cad2d0d2257a7c0e448162 100644 Binary files a/tutorials/source_zh_cn/advanced_use/images/tensor_table.png and b/tutorials/source_zh_cn/advanced_use/images/tensor_table.png differ diff --git a/tutorials/source_zh_cn/advanced_use/lineage_and_scalars_comparision.md b/tutorials/source_zh_cn/advanced_use/lineage_and_scalars_comparision.md index d768f30ee4134dc0d73b03304b2bf855965020a7..fae22dbbcf27c6e5ce5b12b62eb4c74aaeee0cba 100644 --- a/tutorials/source_zh_cn/advanced_use/lineage_and_scalars_comparision.md +++ b/tutorials/source_zh_cn/advanced_use/lineage_and_scalars_comparision.md @@ -1,6 +1,6 @@ # 溯源和对比看板 -`Ascend` `GPU` `模型调优` `中级` `高级` +`Ascend` `GPU` `CPU` `模型调优` `中级` `高级` diff --git a/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md b/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md index 12905b823e717b992144dff97a4c1e9b0b15cb0b..4f685599e0c553b7fe47e3359cac8333d8da0e7b 100644 --- a/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md +++ b/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md @@ -1,6 +1,6 @@ # MindInsight相关命令 -`Ascend` `GPU` `模型调优` `中级` `高级` +`Ascend` `GPU` `CPU` `模型调优` `中级` `高级` diff --git a/tutorials/source_zh_cn/advanced_use/serving.md b/tutorials/source_zh_cn/advanced_use/serving.md index 1c069b8debaa74205cbf33b143e5fc851b7254f1..37f24306f0976a5447337c2279e7ff456acb819d 100644 --- a/tutorials/source_zh_cn/advanced_use/serving.md +++ b/tutorials/source_zh_cn/advanced_use/serving.md @@ -23,13 +23,12 @@ MindSpore Serving是一个轻量级、高性能的服务模块,旨在帮助MindSpore开发者在生产环境中高效部署在线推理服务。当用户使用MindSpore完成模型训练后,导出MindSpore模型,即可使用MindSpore Serving创建该模型的推理服务。当前Serving仅支持Ascend 910。 - ## 启动Serving服务 -通过pip安装MindSpore后,Serving可执行程序位于`/{your python path}/lib/python3.7/site-packages/mindspore/ms_serving` 。 +通过pip安装MindSpore后,Serving可执行程序位于`/{your python path}/lib/python3.7/site-packages/mindspore/ms_serving`。 启动Serving服务命令如下 -```bash -ms_serving [--help] [--model_path ] [--model_name ] - [--port ] [--device_id ] +```bash +ms_serving [--help] [--model_path=] [--model_name=] [--port=] + [--rest_api_port=] [--device_id=] ``` 参数含义如下 @@ -38,8 +37,8 @@ ms_serving [--help] [--model_path ] [--model_name ] |`--help`|可选|显示启动命令的帮助信息。|-|-|-| |`--model_path=`|必选|指定待加载模型的存放路径。|String|空|-| |`--model_name=`|必选|指定待加载模型的文件名。|String|空|-| -|`--port=`|可选|指定Serving对外的gRPC端口号。|Integer|5500|1~65535| -|`--rest_api_port=`|可选|指定Serving对外的REST API端口号。|Integer|5501|1~65535| +|`--port=`|可选|指定Serving对外的gRPC端口号。|Integer|5500|1~65535| +|`--rest_api_port=`|可选|指定Serving对外的REST API端口号。|Integer|5501|1~65535| |`--device_id=`|可选|指定使用的设备号|Integer|0|0~7| > 执行启动命令前,需将`/{your python path}/lib:/{your python path}/lib/python3.7/site-packages/mindspore/lib`对应的路径加入到环境变量LD_LIBRARY_PATH中 。 @@ -49,23 +48,27 @@ ms_serving [--help] [--model_path ] [--model_name ] 下面以一个简单的网络为例,演示MindSpore Serving如何使用。 ### 导出模型 + > 导出模型之前,需要配置MindSpore[基础环境](https://www.mindspore.cn/install)。 + 使用[add_model.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/export_model/add_model.py),构造一个只有Add算子的网络,并导出MindSpore推理部署模型。 -```python +```python python add_model.py ``` + 执行脚本,生成`tensor_add.mindir`文件,该模型的输入为两个shape为[2,2]的二维Tensor,输出结果是两个输入Tensor之和。 ### 启动Serving推理服务 -```bash +```bash ms_serving --model_path={model directory} --model_name=tensor_add.mindir ``` -当服务端打印日志`MS Serving grpc Listening on 0.0.0.0:5500`时,表示Serving gRPC服务已加载推理模型完毕。 -当服务端打印日志`MS Serving restful Listening on 0.0.0.0:5501`时,表示Serving REST服务已加载推理模型完毕。 + +当服务端打印日志`MS Serving gRPC start success, listening on 0.0.0.0:5500`时,表示Serving gRPC服务已加载推理模型完毕。 +当服务端打印日志`MS Serving RESTful start, listening on 0.0.0.0:5501`时,表示Serving REST服务已加载推理模型完毕。 ### gRPC客户端示例 #### Python客户端示例 - > 执行客户端前,需将`/{your python path}/lib/python3.7/site-packages/mindspore`对应的路径加入到环境变量PYTHONPATH中。 + > 执行客户端前,需将`/{your python path}/lib/python3.7/site-packages/mindspore`对应的路径添加到环境变量PYTHONPATH中。 获取[ms_client.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/python_client/ms_client.py),启动Python客户端。 ```bash @@ -73,7 +76,7 @@ python ms_client.py ``` 显示如下返回值说明Serving服务已正确执行Add网络的推理。 -``` +```bash ms client received: [[2. 2.] [2. 2.]] @@ -104,8 +107,7 @@ ms client received: ./ms_client --target=localhost:5500 ``` 显示如下返回值说明Serving服务已正确执行Add网络的推理。 - ``` - Compute [[1, 2], [3, 4]] + [[1, 2], [3, 4]] + ```Compute [[1, 2], [3, 4]] + [[1, 2], [3, 4]] Add result is 2 4 6 8 client received: RPC OK ``` @@ -119,7 +121,7 @@ ms client received: explicit MSClient(std::shared_ptr channel) : stub_(MSService::NewStub(channel)) {} private: std::unique_ptr stub_; - };MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials())); + }; MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials())); @@ -151,36 +153,37 @@ ms client received: *request.add_data() = data; ``` 3. 调用gRPC接口和已经启动的Serving服务通信,并取回返回值。 - ``` - Status status = stub_->Predict(&context, request, &reply); - ``` + ```Status status = stub_->Predict(&context, request, &reply);``` 完整代码参考[ms_client](https://gitee.com/mindspore/mindspore/blob/master/serving/example/cpp_client/ms_client.cc)。 ### REST API客户端示例 1. `data`形式发送数据: - data字段:由网络模型每个输入/输出数据展平后组合而成。 + data字段:将网络模型每个输入数据展平成一维数据,假设网络模型有n个输入,最后data数据结构为1*n的二维list。 + + 如本例中,将模型输入数据`[[1.0, 2.0], [3.0, 4.0]]`和`[[1.0, 2.0], [3.0, 4.0]]`展平后组合成data形式的数据`[[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]` - 如本例中,将模型输入数据`[[1, 2], [3, 4]]`和`[[1, 2], [3, 4]]`展平组合成data形式的数据`[[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]` ``` curl -X POST -d '{"data": [[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]}' http://127.0.0.1:5501 ``` - 显示如下返回值,说明Serving服务已正确执行Add网络的推理: + + 显示如下返回值,说明Serving服务已正确执行Add网络的推理,输出数据结构同输入类似: ``` {"data":[[2.0,4.0,6.0,8.0]]} ``` 2. `tensor`形式发送数据: - tensor字段:由网络模型每个输入/输出组合而成。 + tensor字段:由网络模型每个输入组合而成,保持输入的原始shape。 - 如本例中,将模型输入数据`[[1, 2], [3, 4]]`和`[[1, 2], [3, 4]]`组合成tensor形式的数据`[[[1.0, 2.0], [3.0, 4.0]], [[1.0, 2.0], [3.0, 4.0]]]` + 如本例中,将模型输入数据`[[1.0, 2.0], [3.0, 4.0]]`和`[[1.0, 2.0], [3.0, 4.0]]`组合成tensor形式的数据`[[[1.0, 2.0], [3.0, 4.0]], [[1.0, 2.0], [3.0, 4.0]]]` ``` curl -X POST -d '{"tensor": [[[1.0, 2.0], [3.0, 4.0]], [[1.0, 2.0], [3.0, 4.0]]]}' http://127.0.0.1:5501 ``` - 显示如下返回值,说明Serving服务已正确执行Add网络的推理: + 显示如下返回值,说明Serving服务已正确执行Add网络的推理,输出数据结构同输入类似: ``` {"tensor":[[2.0,4.0], [6.0,8.0]]} ``` + > REST API当前只支持int32和fp32数据输入。 diff --git a/tutorials/source_zh_cn/advanced_use/summary_record.md b/tutorials/source_zh_cn/advanced_use/summary_record.md index a1fc1bbcae338e7353b5ca6b43455992feeeffae..e76ff8e730246d65547f08a1bec986b74c4371fe 100644 --- a/tutorials/source_zh_cn/advanced_use/summary_record.md +++ b/tutorials/source_zh_cn/advanced_use/summary_record.md @@ -1,6 +1,6 @@ # Summary数据收集 -`Ascend` `GPU` `模型调优` `中级` `高级` +`Ascend` `GPU` `CPU` `模型调优` `中级` `高级` diff --git a/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md b/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md index 6e6932f1894f5a6caa018e5a7684e738a323f294..773b4c4535380e34e324b64ef2abf4b429ed0b2d 100644 --- a/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md +++ b/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md @@ -1,4 +1,6 @@ -# 同步训练和验证模型 +# 同步训练和验证模型 + +`Ascend` `GPU` `CPU` `初级` `中级` `高级` `模型导出` `模型训练` @@ -41,25 +43,25 @@ - `model`:即是MindSpore中的`Model`函数。 - `eval_dataset`:验证数据集。 -- `epoch_per_eval`:记录验证模型的精度和相应的epoch数,其数据形式为`{"epoch":[],"acc":[]}`。 +- `epoch_per_eval`:记录验证模型的精度和相应的epoch数,其数据形式为`{"epoch": [], "acc": []}`。 ```python -import matplotlib.pyplot as plt from mindspore.train.callback import Callback class EvalCallBack(Callback): - def __init__(self, model, eval_dataset, eval_per_epoch): + def __init__(self, model, eval_dataset, eval_per_epoch, epoch_per_eval): self.model = model self.eval_dataset = eval_dataset self.eval_per_epoch = eval_per_epoch + self.epoch_per_eval = epoch_per_eval def epoch_end(self, run_context): cb_param = run_context.original_args() cur_epoch = cb_param.cur_epoch_num if cur_epoch % self.eval_per_epoch == 0: - acc = self.model.eval(self.eval_dataset,dataset_sink_mode = True) - epoch_per_eval["epoch"].append(cur_epoch) - epoch_per_eval["acc"].append(acc["Accuracy"]) + acc = self.model.eval(self.eval_dataset, dataset_sink_mode=True) + self.epoch_per_eval["epoch"].append(cur_epoch) + self.epoch_per_eval["acc"].append(acc["Accuracy"]) print(acc) ``` @@ -79,12 +81,10 @@ class EvalCallBack(Callback): - `epoch_per_eval`:定义收集`epoch`数和对应模型精度信息的字典。 ```python -from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor from mindspore.train import Model from mindspore import context from mindspore.nn.metrics import Accuracy -from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") @@ -98,10 +98,10 @@ if __name__ == "__main__": ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet",directory=ckpt_save_dir, config=config_ck) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) - epoch_per_eval = {"epoch":[],"acc":[]} - eval_cb = EvalCallBack(model,eval_data,eval_per_epoch) + epoch_per_eval = {"epoch": [], "acc": []} + eval_cb = EvalCallBack(model, eval_data, eval_per_epoch, epoch_per_eval) - model.train(epoch_size, train_data, callbacks=[ckpoint_cb, LossMonitor(375),eval_cb], + model.train(epoch_size, train_data, callbacks=[ckpoint_cb, LossMonitor(375), eval_cb], dataset_sink_mode=True) ``` @@ -152,11 +152,13 @@ lenet_ckpt ```python +import matplotlib.pyplot as plt + def eval_show(epoch_per_eval): plt.xlabel("epoch number") plt.ylabel("Model accuracy") plt.title("Model accuracy variation chart") - plt.plot(epoch_per_eval["epoch"],epoch_per_eval["acc"],"red") + plt.plot(epoch_per_eval["epoch"], epoch_per_eval["acc"], "red") plt.show() eval_show(epoch_per_eval)