diff --git a/api/source_en/api/python/mindspore/mindspore.nn.probability.rst b/api/source_en/api/python/mindspore/mindspore.nn.probability.rst index 9ed8e3699313bcc607274c7616faa39cd49d6f23..2235f574850eceaf0f26a4d6a1b2c4927a2a247e 100644 --- a/api/source_en/api/python/mindspore/mindspore.nn.probability.rst +++ b/api/source_en/api/python/mindspore/mindspore.nn.probability.rst @@ -12,7 +12,14 @@ mindspore.nn.probability.bnn_layers .. automodule:: mindspore.nn.probability.bnn_layers :members: + :exclude-members: ConvReparam , DenseReparam + .. autoclass:: ConvReparam(in_channels, out_channels, kernel_size,stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + + .. autoclass:: DenseReparam(in_channels, out_channels, activation=None, has_bias=True, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + .. autoclass:: WithBNNLossCell :members: diff --git a/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst b/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst index 729903dfca591c264e4d1b5b093bf3356203c67a..70e8f69bf9f2a3e6d8d6f1be8636cad79b61fb69 100644 --- a/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst +++ b/api/source_zh_cn/api/python/mindspore/mindspore.nn.probability.rst @@ -12,7 +12,14 @@ mindspore.nn.probability.bnn_layers .. automodule:: mindspore.nn.probability.bnn_layers :members: + :exclude-members: ConvReparam , DenseReparam + .. autoclass:: ConvReparam(in_channels, out_channels, kernel_size,stride=1, pad_mode='same', padding=0, dilation=1, group=1, has_bias=False, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + + .. autoclass:: DenseReparam(in_channels, out_channels, activation=None, has_bias=True, weight_prior_fn=NormalPrior, weight_posterior_fn=, bias_prior_fn=NormalPrior, bias_posterior_fn=) + :members: + .. autoclass:: WithBNNLossCell :members: diff --git a/api/source_zh_cn/programming_guide/nn.md b/api/source_zh_cn/programming_guide/nn.md new file mode 100644 index 0000000000000000000000000000000000000000..a1bb61ec965da01e16ed19d5d850ee92594e9ea5 --- /dev/null +++ b/api/source_zh_cn/programming_guide/nn.md @@ -0,0 +1,28 @@ +# nn模块 + + + +MindSpore的nn模块是Python实现的模型组件,是对低阶API的封装,主要包括各种模型层、损失函数、优化器等。 + +同时nn也提供了部分与Primitive算子同名的接口,主要作用是对Primitive算子进行进一步封装,为用户提供更友好的API。 + +代码样例如下: +```python +import numpy as np +from mindspore.common.tensor import Tensor +import mindspore.nn as nn +import mindspore + +net = nn.PSNR() +img1 = Tensor(np.random.random((1,3,16,16)), mindspore.float32) +img2 = Tensor(np.random.random((1,3,16,16)), mindspore.float32) +output = net(img1, img2) +print("output =", output) +``` + +输出如下: +``` +output = [7.6338434] +``` + +各种模型层、损失函数、优化器等代码样例正在完善中。 diff --git a/api/source_zh_cn/programming_guide/ops.md b/api/source_zh_cn/programming_guide/ops.md new file mode 100644 index 0000000000000000000000000000000000000000..53bb69f5dfd011be5b4ed47d1f440da45df482a6 --- /dev/null +++ b/api/source_zh_cn/programming_guide/ops.md @@ -0,0 +1,126 @@ +# ops模块 + + + +- [ops模块](#ops模块) + - [mindspore.ops.operations](#mindsporeopsoperations) + - [mindspore.ops.functional](#mindsporeopsfunctional) + - [mindspore.ops.composite](#mindsporeopscomposite) + + + + + +MindSpore的ops模块主要存放算子相关接口,同时包含算子的校验和正反向关联的逻辑。 + +ops主要包括operations、functional和composite,可通过ops直接获取到这三类算子。 +- operations提供单个的Primtive算子。一个算子对应一个原语,是最小的执行对象,需要实例化之后使用。 +- composite提供一些预定义的组合算子,以及复杂的涉及图变换的算子,如`GradOperation`。 +- functional提供operations和composite实例化后的对象,简化算子的调用流程。 + +## mindspore.ops.operations + +operations提供了所有的Primitive算子接口,是开放给用户的最低阶算子接口。算子支持情况可查询[算子支持列表](https://www.mindspore.cn/docs/zh-CN/master/operator_list.html#mindspore-ops-operations)。 + +Primitive算子也称为算子原语,它直接封装了底层的Ascend、GPU、AICPU、CPU等多种算子的具体实现,为用户提供基础算子能力。 + +Primitive算子接口是构建高阶接口、自动微分、网络模型等能力的基础。 + +代码样例如下: +```python +import numpy as np +import mindspore +from mindspore import Tensor +import mindspore.ops.operations as P + +input_x = mindspore.Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32) +input_y = 3.0 +pow = P.Pow() +output = pow(input_x, input_y) +print("output =", output) +``` + +输出如下: +``` +output = [ 1. 8. 64.] +``` + +## mindspore.ops.functional + +为了简化没有属性的算子的调用流程,MindSpore提供了一些算子的functional版本。入参要求参考原算子的输入输出要求。算子支持情况可以查询[算子支持列表](https://www.mindspore.cn/docs/zh-CN/master/operator_list.html#mindspore-ops-functional)。 + +例如`P.Pow`算子,我们提供了functional版本的`F.tensor_pow`算子。 + +使用functional的代码样例如下: + +```python +import numpy as np +import mindspore +from mindspore import Tensor +from mindspore.ops import functional as F + +input_x = mindspore.Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32) +input_y = 3.0 +output = F.tensor_pow(input_x, input_y) +print("output =", output) +``` + +输出如下: +``` +output = [ 1. 8. 64.] +``` + +## mindspore.ops.composite + +composite提供了一些算子的组合,包括clip_by_value和random相关的一些算子,以及涉及图变换的函数(`GradOperation`、`HyperMap`和`Map`等)。 + +算子的组合可以直接像一般函数一样使用,例如使用`normal`生成一个随机分布: +```python +from mindspore.common import dtype as mstype +from mindspore.ops import composite as C +from mindspore import Tensor + +mean = Tensor(1.0, mstype.float32) +stddev = Tensor(1.0, mstype.float32) +output = C.normal((2, 3), mean, stddev, seed=5) +print("ouput =", output) +``` +输出如下: +``` +output = [[2.4911082 0.7941146 1.3117087] + [0.30582333 1.772938 1.525996]] +``` + +> 以上代码运行于MindSpore的GPU版本。 + +针对涉及图变换的函数,用户可以使用`MultitypeFuncGraph`定义一组重载的函数,根据不同类型,走到不同实现。 + +代码样例如下: +```python +import numpy as np +from mindspore.ops.composite import MultitypeFuncGraph +from mindspore import Tensor +from mindspore.ops import functional as F + +add = MultitypeFuncGraph('add') +@add.register("Number", "Number") +def add_scalar(x, y): + return F.scalar_add(x, y) + +@add.register("Tensor", "Tensor") +def add_tensor(x, y): + return F.tensor_add(x, y) + +tensor1 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')) +tensor2 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32')) +print('tensor', add(tensor1, tensor2)) +print('scalar', add(1, 2)) +``` +输出如下: +``` +tensor [[2.4, 4.2] + [4.4, 6.4]] +scalar 3 +``` + +此外,高阶函数`GradOperation`提供了根据输入的函数,求这个函数对应的梯度函数的方式,详细可以参阅[API文档](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.composite.html#mindspore.ops.composite.GradOperation)。 \ No newline at end of file diff --git a/api/source_zh_cn/programming_guide/tensor.md b/api/source_zh_cn/programming_guide/tensor.md index 5199be093ff60ec952450a63d73caf68890e4af2..08f9ec6d7f5ed1c450964eb15e5f613831607152 100644 --- a/api/source_zh_cn/programming_guide/tensor.md +++ b/api/source_zh_cn/programming_guide/tensor.md @@ -25,6 +25,8 @@ 张量里的数据分为不同的类型,支持的类型有int8、int16、int32、int64、uint8、uint16、uint32、uint64、float16、float32、float64、bool_,与NumPy里的数据类型一一对应。 不同维度的张量分别表示不同的数据,0维张量表示标量,1维张量表示向量,2维张量表示矩阵,3维张量可以表示彩色图像的RGB三通道等等。 + +> 本文档中的所有示例,都是在PyNative模式下运行的,暂不支持CPU。 ## 常量张量 @@ -39,34 +41,36 @@ import numpy as np from mindspore import Tensor from mindspore.common import dtype as mstype -x = Tensor(np.array([1, 2], [3, 4]]), mstype.int32) +x = Tensor(np.array([[1, 2], [3, 4]]), mstype.int32) y = Tensor(1.0, mstype.int32) z = Tensor(2, mstype.int32) m = Tensor(True, mstype.bool_) n = Tensor((1, 2, 3), mstype.int16) p = Tensor([4.0, 5.0, 6.0], mstype.float64) -print(x, "\n\n", y, "\n\n", z, "\n\n", m, "\n\n", n, "\n\n", p, "\n\n", q) +print(x, "\n\n", y, "\n\n", z, "\n\n", m, "\n\n", n, "\n\n", p) ``` 输出如下: ``` [[1 2] - [3 4]] + [3 4]] + +1 -1.0 +2 -2 +True -True +[1 2 3] -[1, 2, 3] +[4. 5. 6.] ``` ## 变量张量 -变量张量的值在网络中可以被更新,用来表示需要被更新的参数,MindSpore使用Tensor的子类Parameter构造变量张量,构造时支持传入Tensor Initializer。 +变量张量的值在网络中可以被更新,用来表示需要被更新的参数,MindSpore使用Tensor的子类Parameter构造变量张量,构造时支持传入Tensor或者Initializer。 代码样例如下: @@ -87,11 +91,13 @@ print(x, "\n\n", y, "\n\n", z) ``` [[0 1 2] - [3 4 5]] + [3 4 5]] -Parameter (name=x, value=[[0 1 2] [3 4 5]]) +Parameter (name=x, value=[[0 1 2] + [3 4 5]]) -Parameter (name=y, value=[[1. 1. 1.] [1. 1. 1.]] +Parameter (name=y, value=[[[1. 1. 1.] + [1. 1. 1.]]]) ``` ## 张量的属性和方法 @@ -108,7 +114,7 @@ import numpy as np from mindspore import Tensor from mindspore.common import dtype as mstype -x = Tensor(np.array([1, 2], [3, 4]]), mstype.int32) +x = Tensor(np.array([[1, 2], [3, 4]]), mstype.int32) x_shape = x.shape x_dtype = x.dtype @@ -125,7 +131,7 @@ print(x_shape, x_dtype) 张量的方法包括`all`、`any`和`asnumpy`。 - all(axis, keep_dims):在指定维度上通过“and”操作进行归约,axis代表归约维度,keep_dims表示是否保留归约后的维度。 -- any(axis, keep_dims):在指定维度上通过“any”操作进行归约,axis代表归约维度,keep_dims表示是否保留归约后的维度。 +- any(axis, keep_dims):在指定维度上通过“or”操作进行归约,axis代表归约维度,keep_dims表示是否保留归约后的维度。 - asnumpy():将Tensor转换为NumPy的array。 代码样例如下: @@ -135,24 +141,24 @@ import numpy as np from mindspore import Tensor from mindspore.common import dtype as mstype -x = Tensor(np.array([1, 2], [3, 4]]), mstype.int32) +x = Tensor(np.array([[True, True], [False, False]]), mstype.bool_) x_all = x.all() -x_any = a.any() +x_any = x.any() x_array = x.asnumpy() print(x_all, "\n\n", x_any, "\n\n", x_array) - ``` 输出如下: ``` -False +False + +True -True +[[ True True] + [False False]] -[[True True] - [False True]] ``` ## 张量操作 @@ -184,9 +190,9 @@ True ``` [[1. 1.] - [1. 1.]] + [1. 1.]] - 1.0 + 1.0 [1 2 3] ``` @@ -203,22 +209,35 @@ True import numpy as np from mindspore import Tensor from mindspore.common import dtype as mstype - - x = Tensor(np.arange(3*4*5).reshape((3, 4, 5))) + + x = Tensor(np.arange(3*4*5).reshape((3, 4, 5)), mstype.int32) indices = Tensor(np.array([[0, 1], [1, 2]]), mstype.int32) - y = [:3, indices, 3] - + y = x[:3, indices, 3] + print(x, "\n\n", y) ``` 输出如下: ``` - [[[3 8] - [8 13]] - [[23 28] + [[[ 0 1 2 3 4] + [ 5 6 7 8 9] + [10 11 12 13 14] + [15 16 17 18 19]] + [[20 21 22 23 24] + [25 26 27 28 29] + [30 31 32 33 34] + [35 36 37 38 39]] + [[40 41 42 43 44] + [45 46 47 48 49] + [50 51 52 53 54] + [55 56 57 58 59]]] + + [[[ 3 8] + [ 8 13]] + [[23 28] [28 33]] - [[43 48] + [[43 48] [48 53]]] ``` @@ -237,12 +256,36 @@ True import numpy as np from mindspore import Tensor from mindspore.ops import operations as P - - x = Tensor(np.arange(2*3).reshape((2, 3))) - y = P.Reshape()(x, (4, 3, 5)) + + x = Tensor(np.arange(2*3).reshape((1, 2, 3))) + y = P.Reshape()(x, (1, 3, 2)) z = P.ExpandDims()(x, 1) - m = P.Squeeze(axis=3)(x) - n = P.Transpose()(x, (0, 2, 3, 1)) + m = P.Squeeze(axis=0)(x) + n = P.Transpose()(x, (2, 0, 1)) + + print(x, "\n\n", y, "\n\n", z, "\n\n", m, "\n\n", n) + ``` + + 输出如下: + + ``` + [[[0 1 2] + [3 4 5]]] + + [[[0 1] + [2 3] + [4 5]]] + + [[[[0 1 2] + [3 4 5]]]] + + [[0 1 2] + [3 4 5]] + + [[[0 3]] + [[1 4]] + [[2 5]]] + ``` - 合并分割 @@ -258,31 +301,33 @@ True import numpy as np from mindspore import Tensor from mindspore.ops import operations as P - - x = Tensor(np.arange(2*3).reshape((2, 3))) + x = Tensor(np.arange(2*3).reshape((2, 3))) + y = Tensor(np.arange(2*3).reshape((2, 3))) z = P.Pack(axis=0)((x, y)) m = P.Concat(axis=0)((x, y)) n = P.Split(0, 2)(x) - - print(z, "\n\n", m, "\n\n", n[0], "\n", n[1]) + + print(x, "\n\n", z, "\n\n", m, "\n\n", n) ``` 输出如下: ``` + [[0 1 2] + [3 4 5]] + [[[0 1 2] [3 4 5]] - [[0 1 2] + [[0 1 2] [3 4 5]]] [[0 1 2] - [3 4 5] - [0 1 2] - [3 4 5]] + [3 4 5] + [0 1 2] + [3 4 5]] - [[0 1 2]] - [[3 4 5]] + (Tensor(shape=[1, 3], dtype=Int64, [[0 1 2]]), Tensor(shape=[1, 3], dtype=Int64, [[3 4 5]])) ``` ### 数学运算 @@ -299,8 +344,9 @@ MindSpore支持对张量进行广播,包括显式广播和隐式广播。显 import numpy as np from mindspore import Tensor from mindspore.ops import operations as P +from mindspore.common import dtype as mstype -x = Tensor(np.arange(2*3).reshape((2, 3))) +x = Tensor(np.arange(2*3).reshape((2, 3)), mstype.int32) y = P.Tile()(x, (2, 3)) print(x, "\n\n", y) diff --git a/docs/source_en/FAQ.md b/docs/source_en/FAQ.md new file mode 100644 index 0000000000000000000000000000000000000000..6478958c1882e17cafa51e5d0586c4132527e972 --- /dev/null +++ b/docs/source_en/FAQ.md @@ -0,0 +1,262 @@ +# FAQ + +`Ascend` `GPU` `CPU` `Environmental Setup` `Model Export` `Model Training` `Beginner` `Intermediate` `Expert` + + + +- [FAQ](#faq) + - [Installation](#installation) + - [Installing Using pip](#installing-using-pip) + - [Source Code Compilation Installation](#source-code-compilation-installation) + - [Environment Variables](#environment-variables) + - [Verifying the Installation](#verifying-the-installation) + - [Supported Operators](#supported-operators) + - [Network Models](#network-models) + - [Platform and System](#platform-and-system) + - [Backend Running](#backend-running) + - [Programming Language Extensions](#programming-language-extensions) + - [Supported Features](#supported-features) + + + + +## Installation + +### Installing Using pip +Q: What should I do if an error message `SSL:CERTIFICATE_VERIFY_FATLED` is displayed when I use pip to install MindSpore? + +A: Add the `--trusted-host=ms-release.obs.cn-north-4.myhuaweicloud.com` parameter to the pip installation command and try again. + +
+ +Q: Any specific requirements for Python version when pip install MindSpore? + +A: MindSpore utilizes many of the new features in Python3.7+,therefore we recommend you add Python3.7.5 develop environment via `conda`. + +
+ +Q:What should I do when error `ProxyError(Cannot connect to proxy)` prompts during pip install? + +A:It is generally a proxy configuration problem, you can using `export http_proxy={your_proxy}` on Ubuntu environment, and using `set http_proxy={your_proxy}` in cmd on Windows environment to config your proxy. + +
+ +Q: What should I do when error prompts during pip install? + +A: Please execute `pip -V` to check if pip is linked to Python3.7+. If not, we recommend you +use `python3.7 -m pip install` instead of `pip install` command. + +
+ +Q: What should I do if I cannot find whl package for MindInsight or MindArmour on the installation page of MindSpore website? + +A: You can download whl package from the official [MindSpore Website download page](https://www.mindspore.cn/versions) and manually install it via `pip install`. + +### Source Code Compilation Installation + +Q: What should I do if the compilation time of MindSpore source code takes too long or the process is constantly interrupted by errors? + +A: MindSpore imports third party dependencies through submodule mechanism, among which `protobuf` v3.8.0 might not have the optimal or steady download speed, it is recommended that you perform package cache in advance. + +
+ +Q: How to change installation directory of the third party libraries? + +A: The third party libraries will be installed in build/mindspore/.mslib, you can change the installation directory by setting the environment variable MSLIBS_CACHE_PATH, eg. `export MSLIBS_CACHE_PATH = ~/.mslib`. + +
+ +Q: What should I do if the software version required by MindSpore is not the same with the Ubuntu default software version? + +A: At the moment some software might need manual upgrade. (**Note**:MindSpore requires Python3.7.5 and gcc7.3,the default version in Ubuntu 16.04 are Python3.5 and gcc5,whereas the one in Ubuntu 18.04 are Python3.7.3 and gcc7.4) + +
+ +Q: What should I do if there is a prompt `tclsh not found` when I compile MindSpore from source code? + +A: Please install the software manually if there is any suggestion of certain `software not found`. + +### Environment Variables + +Q:Some frequently-used environment settings need to be reset in the newly started terminal window, which is easy to be forgotten, What should I do? + +A:You can write the frequently-used environment settings to `~/.bash_profile` or `~/.bashrc` so that the settings can take effect immediately when you start a new terminal window. + +### Verifying the Installation + +Q: After MindSpore is installed on a CPU of a PC, an error message `the pointer[session] is null` is displayed during code verification. The specific code is as follows. How do I verify whether MindSpore is successfully installed? +```python +import numpy as np +from mindspore import Tensor +from mindspore.ops import functional as F +import mindspore.context as context + +context.set_context(device_target="Ascend") +x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) +y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) +print(F.tensor_add(x,y)) +``` + +A: After MindSpore is installed on a CPU hardware platform, run the `python -c'import mindspore'` command to check whether MindSpore is successfully installed. If no error message such as `No module named'mindspore'` is displayed, MindSpore is successfully installed. The verification code is used only to verify whether a Ascend platform is successfully installed. + +## Supported Operators + +Q: What can I do if the LSTM example on the official website cannot run on Ascend? + +A: Currently, the LSTM runs only on a GPU or CPU and does not support the hardware environment. You can click [here](https://www.mindspore.cn/docs/en/master/operator_list.html) to view the supported operators. + +
+ +Q: When conv2d is set to (3,10), Tensor[2,2,10,10] and it runs on Ascend on ModelArts, the error message `FM_W+pad_left+pad_right-KW>=strideW` is displayed. However, no error message is displayed when it runs on a CPU. What should I do? + +A: This is a TBE operator restriction that the width of x must be greater than that of the kernel. The CPU does not have this operator restriction. Therefore, no error is reported. + +## Network Models + +Q: Which framework models can be directly read by MindSpore? What formats are supported? + +A: MindSpore uses protocol buffers (protobuf) to store training parameters and cannot directly read framework models. If you want to use the .ckpt file trained by a framework, read the parameters and then call the save_checkpoint API of MindSpore to save the file as a .ckpt file that can be read by MindSpore. + +
+ +Q: How do I use models trained by MindSpore on Ascend 310? + +A: Ascend 310 supports the offline model (OM). Therefore, you need to export the Open Neural Network Exchange (ONNX) or Ascend intermediate representation (AIR) model and then convert it into OM supported by Ascend 310. For details, see [Multi-Platform Inference](https://www.mindspore.cn/tutorial/en/master/use/multi_platform_inference.html). + +
+ +Q: How do I modify parameters (such as the dropout value) on MindSpore? + +A: When building a network, use `if self.training: x = dropput(x)`. During verification, set `network.set_train(mode_false)` before execution to disable the dropout function. During training, set `network.set_train(mode_false)` to True to enable the dropout function. + +
+ +Q: Where can I view the sample code or tutorial of MindSpore training and inference? + +A: Please visit the [MindSpore official website](https://www.mindspore.cn/tutorial/en/master/index.html). + +
+ +Q: What types of model is currently supported by MindSpore for training? + +A: MindSpore has basic support for common training scenarios, please refer to [Release note](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md) for detailed information. + +
+ +Q: What are the available recommendation or text generation networks or models provided by MindSpore? + +A: Currently, recommendation models such as Wide & Deep, DeepFM, and NCF are under development. In the natural language processing (NLP) field, Bert\_NEZHA is available and models such as MASS are under development. You can rebuild the network into a text generation network based on the scenario requirements. Please stay tuned for updates on the [MindSpore Model Zoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). + +
+ +Q: How simple can the MindSpore model training code be? + +A: MindSpore provides Model APIs except for network definitions. In most scenarios, model training can be completed using only a few lines of code. + +## Platform and System + +Q: Can MindSpore be installed on Ascend 310? + +A: Ascend 310 can only be used for inference. MindSpore supports training on Ascend 910. The trained model can be converted into an .om model for inference on Ascend 310. + +
+ +Q: Does MindSpore require computing units such as GPUs and NPUs? What hardware support is required? + +A: MindSpore currently supports CPU, GPU, Ascend, and NPU. Currently, you can try out MindSpore through Docker images on laptops or in environments with GPUs. Some models in MindSpore Model Zoo support GPU-based training and inference, and other models are being improved. For distributed parallel training, MindSpore supports multi-GPU training. You can obtain the latest information from [Road Map](https://www.mindspore.cn/docs/en/master/roadmap.html) and [project release notes](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md). + +
+ +Q: Does MindSpore have any plan on supporting other types of heterogeneous computing hardwares? + +A: MindSpore provides pluggable device management interface so that developer could easily integrate other types of heterogeneous computing hardwares like FPGA to MindSpore. We welcome more backend support in MindSpore from the community. + +
+ +Q: What is the relationship between MindSpore and ModelArts? Can MindSpore be used on ModelArts? + +A: ModelArts is an online training and inference platform on HUAWEI CLOUD. MindSpore is a Huawei deep learning framework. You can view the tutorials on the [MindSpore official website](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/use_on_the_cloud.html) to learn how to train MindSpore models on ModelArts. + +
+ +Q: Does MindSpore support Windows 10? + +A: The MindSpore CPU version can be installed on Windows 10. For details about the installation procedure, please refer to the [MindSpore official website tutorial](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/mindspore_cpu_win_install.html) + +## Backend Running + +Q: What can I do if the error message `device target [CPU] is not supported in pynative mode` is displayed for the operation operator of MindSpore? + +A: Currently, the PyNative mode supports only Ascend and GPU and does not support the CPU. + +
+ +Q: What can I do if the error message `Pynative run op ExpandDims failed` is displayed when the ExpandDims operator is used? The code is as follows: + +```python +context.set_context( +mode=cintext.GRAPH_MODE, +device_target='ascend') +input_tensor=Tensor(np.array([[2,2],[2,2]]),mindspore.float32) +expand_dims=P.ExpandDims() +output=expand_dims(input_tensor,0) +``` + +A: The problem is that the Graph mode is selected but the PyNative mode is used. As a result, an error is reported. MindSpore supports the following running modes which are optimized in terms of debugging or running: + +- PyNative mode: dynamic graph mode. In this mode, operators in the neural network are delivered and executed one by one, facilitating the compilation and debugging of the neural network model. + +- Graph mode: static graph mode. In this mode, the neural network model is compiled into an entire graph and then delivered for execution. This mode uses technologies such as graph optimization to improve the running performance and facilitates large-scale deployment and cross-platform running. +You can select a proper mode and writing method to complete the training by referring to the official website [tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/debugging_in_pynative_mode.html). + +## Programming Language Extensions + +Q: The recent announced programming language such as taichi got Python extensions that could be directly used as `import taichi as ti`. Does MindSpore have similar support? + +A: MindSpore supports Python native expression via `import mindspore`。 + +
+ +Q: Does MindSpore plan to support more programming languages other than Python? + +A:MindSpore currently supports Python extensions,bindings for languages like C++、Rust、Julia are on the way. + +## Supported Features + +Q: Does MindSpore have a lightweight on-device inference engine? + +A: MindSpore has its own on-device inference engine. In the current version, some functions of on-device inference have been open-sourced. MindSpore on-device inference engine is expected to be updated at the end of August. By then, it will be more comprehensive and powerful in terms of usability, performance, operator completeness, and third-party model support. + +Q: How does MindSpore implement semantic collaboration and processing? Is the popular Formal Concept Analysis (FCA) used? + +A: The MindSpore framework does not support FCA. For semantic models, you can call third-party tools to perform FCA in the data preprocessing phase. MindSpore supports Python therefore `import FCA` could do the trick. + +
+ +Q: Does MindSpore have any plan or consideration on the edge and device when the training and inference functions on the cloud are relatively mature? + +A: MindSpore is a unified cloud-edge-device training and inference framework. Edge has been considered in its design, so MindSpore can perform inference at the edge. The open-source version will support Ascend 310-based inference. The optimizations supported in the current inference stage include quantization, operator fusion, and memory overcommitment. + +
+ +Q: How does MindSpore support automatic parallelism? + +A: Automatic parallelism on CPUs and GPUs are being improved. You are advised to use the automatic parallelism feature on the Ascend 910 AI processor. Follow our open source community and apply for a MindSpore developer experience environment for trial use. + +
+ +Q: Does MindSpore have a module that can implement object detection algorithms as TensorFlow does? + +A: The TensorFlow's object detection pipeline API belongs to the TensorFlow's Model module. After MindSpore's detection models are complete, similar pipeline APIs will be provided. + +
+ +Q: How do I migrate scripts or models of other frameworks to MindSpore? + +A: For details about script or model migration, please visit the [MindSpore official website](https://www.mindspore.cn/tutorial/en/master/advanced_use/network_migration.html). + +
+ +Q: Does MindSpore provide open-source e-commerce datasets? + +A: No. Please stay tuned for updates on the [MindSpore official website](https://www.mindspore.cn/en). \ No newline at end of file diff --git a/docs/source_en/design/mindinsight/images/training_visualization_architecture.png b/docs/source_en/design/mindinsight/images/training_visualization_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..6b104eb964711f7855f5343c6ed66ed7573ca2cb Binary files /dev/null and b/docs/source_en/design/mindinsight/images/training_visualization_architecture.png differ diff --git a/docs/source_en/design/mindinsight/images/training_visualization_data_flow.png b/docs/source_en/design/mindinsight/images/training_visualization_data_flow.png new file mode 100644 index 0000000000000000000000000000000000000000..102475c05471b2c19bb645d56a5a05952295b501 Binary files /dev/null and b/docs/source_en/design/mindinsight/images/training_visualization_data_flow.png differ diff --git a/docs/source_en/design/mindinsight/images/training_visualization_data_model.png b/docs/source_en/design/mindinsight/images/training_visualization_data_model.png new file mode 100644 index 0000000000000000000000000000000000000000..9efad73f9ab5f70f6afaa4ac39953b487d815137 Binary files /dev/null and b/docs/source_en/design/mindinsight/images/training_visualization_data_model.png differ diff --git a/docs/source_en/design/mindinsight/training_visual_design.md b/docs/source_en/design/mindinsight/training_visual_design.md new file mode 100644 index 0000000000000000000000000000000000000000..fdc47ea1e516d92e509fa6d49a50b5f83ac7ad38 --- /dev/null +++ b/docs/source_en/design/mindinsight/training_visual_design.md @@ -0,0 +1,130 @@ +# Overall Design of Training Visualization + + + +- [Overall Design of Training Visualization](#overall-design-of-training-visualization) + - [Logical Architecture of Training Visualization](#logical-architecture-of-training-visualization) + - [Architecture of Training Information Collection](#architecture-of-training-information-collection) + - [Architecture of Training Information Analysis and Display](#architecture-of-training-information-analysis-and-display) + - [Code Organization](#code-organization) + - [Training Visualization Data Model](#training-visualization-data-model) + - [Training Information Data Flow](#training-information-data-flow) + - [Data Model](#data-model) + - [Training Job](#training-job) + - [Lineage Data](#lineage-data) + - [Training Process Data](#training-process-data) + + + + + +[MindInsight](https://gitee.com/mindspore/mindinsight) is a visualized debugging and tuning component of MindSpore. MindInsight can be used to complete tasks such as training visualization, performance tuning, and precision tuning. + +Training visualization includes functions such as training dashboard, model lineage, and data lineage. Training dashboard includes functions such as scalar, parameter distribution, computational graph, data graph, and data sampling. + +This document describes the logical architecture, code organization, and data model of the MindInsight training visualization function. + +## Logical Architecture of Training Visualization + +The logical architecture of training visualization is divided into two parts: architecture of training information collection and architecture of training information analysis and display. + +![Logical architecture of MindInsight training visualization](./images/training_visualization_architecture.png) + +Figure 1 Logical architecture of MindInsight training visualization + +### Architecture of Training Information Collection + +The training information collection function in MindSpore consists of training information collection API module and training information persistence module. + +Training information collection APIs include: + +- Training information collection API based on the summary operator. This API contains four summary operators, that is, the ScalarSummary operator for recording scalar data, the ImageSummary operator for recording image data, the HistogramSummary operator for recording parameter distribution histogram data, and the TensorSummary operator for recording tensor data. For details about the operators, see [Operator List](https://www.mindspore.cn/docs/en/master/operator_list.html). + +- Training information collection API based on the Python API. You can use the [SummaryRecord.add_value](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.train.html#mindspore.train.summary.SummaryRecord.add_value) method to collect training information in Python code. + +- Easy-to-use training information collection callback. The [SummaryCollector](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.train.html#mindspore.train.callback.SummaryCollector) callback function can be used to conveniently collect common training information to training logs. + +The training information persistence module mainly includes a summary_record module used to manage a cache and a write_pool module used to process data in parallel and write data into a file. After the training information is made persistent, it is stored in the training log file (summary file). + +### Architecture of Training Information Analysis and Display + +The architecture of training information analysis and display in MindInsight consists of the WebUI and backend. The backend can be divided into the data loading and cache layer, service logic layer, and API layer from bottom to top. The data loading and cache layer consists of the training log discovery module, training log parsing module, and cache management module. The service logic layer consists of the training dashboard service module and lineage service module. The API layer consists of the RESTful API module. Functions of each module are as follows: + +- Training log discovery module: scans and discovers training log directories that contain training log files in the specified training log root directory (summary-base-dir). Only directories containing training log files are identified as training log directories. + +- Training log parsing module: parses training log files. + +- Cache management module: manages training log parsing tasks and caches the parsing results. It periodically calls the training log discovery module to scan the latest training log directory list. Then, it calls the parsing module to parse the file content and stores the parsing result in the cache for query on the UI. + +- Training dashboard module: provides the service logic of the training dashboard function to support the training dashboard data query on the UI. + +- Lineage module: provides service logic of model lineage and data lineage to support lineage data query on the UI. + +- RESTful API module: encapsulates an API provided by a service module into a RESTful API. + +## Code Organization + +The following describes some important directories in MindInsight code repository. + +|Level-1 Directory|Level-2 Directory|Level-3 Directory|Description| +|---|---|---|---| +|build|||Code related to compilation and building | +|mindinsight||| +||backend||RESTful API | +|||datavisual|RESTful API related to training dashboard | +|||lineagemgr|RESTful API related to lineage | +||datavisual||Training dashboard module, including the data loading and cache layer code | +|||data_transform|Data loading and cache layer | +||lineagemgr||Lineage module | +||ui||MindInsight WebUI | +|tests|||Test case directory | + +## Training Visualization Data Model + +### Training Information Data Flow + +The training information is generated during training process. You can use the training information collection API to collect the training information and save it to a disk using the training information persistence module to generate a training log file (summary file). After the training log file is generated, you can use MindInsight to visualize the file information. + +![Training information data flow](./images/training_visualization_data_flow.png) + +Figure 2 Training information data flow + +### Data Model + +Figure 3 shows a brief data model of MindInsight. MindInsight identifies a training log directory as a training job. A training job is the minimum management unit of MindInsight. A training job can be associated with 0 to 1 piece of lineage data and 0 to 1 piece of training process data. The training process data has a rich structure. Each piece of specific data can be uniquely determined based on the given plugin name, tag, and step. These concepts are described in the following. + +![Data model](./images/training_visualization_data_model.png) + +Figure 3 Data model represented by a UML class diagram + +#### Training Job + +MindInsight uses directories to distinguish different training jobs. To distinguish training log files of different training jobs, you need to specify the directory for storing training log files for both `SummaryCollector` and `SummaryRecord`. Training log files in the same directory are considered as the training data generated in the same training job. Training log files in different directories are considered as the training data generated in different training jobs. + +In MindInsight code, a training job is called a TrainJob. A TrainJob ID is the name of the directory where the training log is located, for example, ./train_my_lenet_1. + +During a training process, a lineage data file (whose name ends with _lineage) and a training process data file (whose name ends with _MS) are generated. The lineage data mainly describes an invariant attribute of the training from a global perspective, for example, a dataset path used for training, an optimizer used for training, and user-defined lineage information. The most prominent feature of the lineage data file is that it does not change during the training process. The training process data mainly describes a change status of the training, for example, a loss value, parameter distribution, and image data sent to the model in a step. The most prominent feature of the training process data file is that each step changes. + +It should be noted that the classification about whether the training information changes is not absolute. For example, the training process data file contains computational graph data, which is determined when the training starts. + +#### Lineage Data + +The lineage data describes the invariant attribute of a training from a global perspective. When MindInsight identifies multiple training log directories, the lineage data of these trainings is organized and displayed in a table for comparison and analysis. + +#### Training Process Data + +- Plugin Name (plugin_name) + + The training data is classified into scalar, histogram, image, and tensor by type. In MindInsight, these types are called plugin names (plugin_name) which are defined in the `mindinsight.datavisual.common.enums.PluginNameEnum` file. + +- Tag + + No matter which type the data belongs to, the data is further divided into different sequences according to the tag. Generally, tags are named by users to distinguish data. For example, the tag of a scalar that records a loss value can be named loss. When processing data, MindInsight automatically adds a suffix to the tag based on the plugin name. For example, if a scalar's tag is loss, the tag is automatically renamed loss/scalar. + +- Step + + The training process data is generated in each training step. To distinguish them, data is marked with the corresponding step number. + +- Data Query and Display + + When displaying data, you might want to see how the data under a tag changes with the training process. Therefore, when querying data, you do not need to specify the step number. Instead, you can specify the training job, plugin name, and tag to query data of all steps under the tag. \ No newline at end of file diff --git a/docs/source_en/index.rst b/docs/source_en/index.rst index fbaa777911af3061e6c82f6d035e114c87ed9f6c..b998ceb9e7171ee985df6dad5a31ce4ef7528c5f 100644 --- a/docs/source_en/index.rst +++ b/docs/source_en/index.rst @@ -17,5 +17,6 @@ MindSpore Documentation operator_list constraints_on_network_construction glossary + FAQ help_seeking_path community diff --git a/docs/source_en/operator_list.md b/docs/source_en/operator_list.md index c6eb5188b5d2a33841156c7fe87d2b36e04a47fb..b4c760c3175107added8c5376538b050a492c89f 100644 --- a/docs/source_en/operator_list.md +++ b/docs/source_en/operator_list.md @@ -7,7 +7,11 @@ - [Operator List](#operator-list) - [mindspore.nn](#mindsporenn) - [mindspore.ops.operations](#mindsporeopsoperations) + - [mindspore.ops.functional](#mindsporeopsfunctional) - [Implicit Type Conversion](#implicit-type-conversion) + - [conversion rules](#conversion-rules) + - [data types involved in conversion](#data-types-involved-in-conversion) + - [support ops](#support-ops) @@ -23,7 +27,7 @@ | [mindspore.nn.ReLU6](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.ReLU6) |Supported | Supported | Supported |layer/activation | [mindspore.nn.HSwish](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.HSwish) | Doing | Supported | Doing |layer/activation | [mindspore.nn.HSigmoid](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.HSigmoid) | Doing | Supported | Doing |layer/activation -| [mindspore.nn.LeakyReLU](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LeakyReLU) | Supported |Doing | Doing |layer/activation +| [mindspore.nn.LeakyReLU](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LeakyReLU) | Supported |Supported | Doing |layer/activation | [mindspore.nn.Tanh](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Tanh) | Supported | Supported | Doing |layer/activation | [mindspore.nn.GELU](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.GELU) | Supported | Supported | Doing |layer/activation | [mindspore.nn.Sigmoid](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Sigmoid) | Supported |Supported | Doing |layer/activation @@ -41,12 +45,12 @@ | [mindspore.nn.Conv2d](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv2d) | Supported | Supported | Supported |layer/conv | [mindspore.nn.Conv2dTranspose](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv2dTranspose) | Supported | Supported | Doing |layer/conv | [mindspore.nn.Conv2dBnAct](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv2dBnAct) | Supported | Supported | Supported |layer/conv -| [mindspore.nn.Conv1d](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1d) | Supported | Doing | Doing |layer/conv -| [mindspore.nn.Conv1dTranspose](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1dTranspose) | Supported | Doing | Doing |layer/conv -| [mindspore.nn.Embedding](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Embedding) |Doing | Supported | Doing |layer/embedding +| [mindspore.nn.Conv1d](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1d) | Supported | Supported | Doing |layer/conv +| [mindspore.nn.Conv1dTranspose](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1dTranspose) | Supported | Supported | Doing |layer/conv +| [mindspore.nn.Embedding](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Embedding) |Supported | Supported | Doing |layer/embedding | [mindspore.nn.ImageGradients](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.ImageGradients) | Doing |Doing | Doing |layer/image | [mindspore.nn.SSIM](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SSIM) | Supported | Supported | Doing |layer/image -| [mindspore.nn.PSNR](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.PSNR) | Doing |Doing | Doing |layer/image +| [mindspore.nn.PSNR](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.PSNR) | Supported |Doing | Doing |layer/image | [mindspore.nn.CentralCrop](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.CentralCrop) | Supported |Doing | Doing |layer/image | [mindspore.nn.LSTM](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LSTM) | Doing | Supported | Supported |layer/lstm | [mindspore.nn.GlobalBatchNorm](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.GlobalBatchNorm) | Supported |Doing | Doing |layer/normalization @@ -60,11 +64,12 @@ | [mindspore.nn.LinSpace](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LinSpace) | Supported | Doing | Doing | layer/normalization | [mindspore.nn.MaxPool2d](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.MaxPool2d) | Supported | Supported | Supported |layer/pooling | [mindspore.nn.AvgPool2d](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.AvgPool2d) | Supported | Supported | Doing |layer/pooling -| [mindspore.nn.L1Loss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.L1Loss) |Doing |Doing | Doing |loss/loss +| [mindspore.nn.L1Loss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.L1Loss) |Supported |Supported | Doing |loss/loss | [mindspore.nn.MSELoss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.MSELoss) | Supported |Doing | Doing |loss/loss | [mindspore.nn.SmoothL1Loss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SmoothL1Loss) |Supported |Doing | Doing |loss/loss | [mindspore.nn.SoftmaxCrossEntropyWithLogits](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SoftmaxCrossEntropyWithLogits) | Supported | Supported | Doing |loss/loss -| [mindspore.nn.SoftmaxCrossEntropyExpand](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SoftmaxCrossEntropyExpand) | Supported |Doing | Doing |loss/loss +| [mindspore.nn.SoftmaxCrossEntropyExpand](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SoftmaxCrossEntropyExpand) | Supported |Supported | Doing |loss/loss +| [mindspore.nn.CosineEmbeddingLoss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.CosineEmbeddingLoss) |Supported |Supported | Doing |loss/loss | [mindspore.nn.ProximalAdagrad](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.ProximalAdagrad) | Supported | Doing | Doing |optim/ProximalAdagrad | [mindspore.nn.LazyAdam](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LazyAdam) | Supported | Doing | Doing |optim/lazyadam | [mindspore.nn.Adam](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Adam) | Supported |Doing | Doing |optim/adam @@ -139,7 +144,7 @@ | [mindspore.ops.operations.ApplyProximalAdagrad](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ApplyProximalAdagrad) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.FusedSparseLazyAdam](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FusedSparseLazyAdam) | Doing | Doing | Supported | nn_ops | [mindspore.ops.operations.FusedSparseAdam](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FusedSparseAdam) | Doing | Doing | Supported | nn_ops -| [mindspore.ops.operations.SmoothL1Loss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SmoothL1Loss) | Supported | Doing | Doing | nn_ops +| [mindspore.ops.operations.SmoothL1Loss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SmoothL1Loss) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.SGD](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SGD) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.LayerNorm](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.LayerNorm) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.L2Normalize](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.L2Normalize) | Supported | Doing | Doing | nn_ops @@ -157,7 +162,7 @@ | [mindspore.ops.operations.ROIAlign](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ROIAlign) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.Adam](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Adam) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.BinaryCrossEntropy](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.BinaryCrossEntropy) | Supported | Supported | Doing | nn_ops -| [mindspore.ops.operations.KLDivLoss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.KLDivLoss) | Doing | Supported | Doing | nn_ops +| [mindspore.ops.operations.KLDivLoss](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.KLDivLoss) | Doing | Supported | Doing | nn_ops | [mindspore.ops.operations.LARSUpdate](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.LARSUpdate) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.Softsign](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Softsign) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.TensorAdd](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.TensorAdd) | Supported | Supported | Supported | math_ops @@ -189,7 +194,7 @@ | [mindspore.ops.operations.Minimum](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Minimum) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Maximum](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Maximum) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.RealDiv](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.RealDiv) | Supported | Supported | Doing | math_ops -| [mindspore.ops.operations.Div](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Div) | Supported | Doing | Doing | math_ops +| [mindspore.ops.operations.Div](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Div) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.DivNoNan](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.DivNoNan) | Supported | Doing | Doing | math_ops | [mindspore.ops.operations.FloorDiv](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FloorDiv) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Floor](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Floor) | Supported | Supported | Doing | math_ops @@ -258,7 +263,7 @@ | [mindspore.ops.operations.TupleToArray](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.TupleToArray) | Supported | Supported | Supported | array_ops | [mindspore.ops.operations.ScalarToArray](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScalarToArray) | Supported | Supported | Supported | array_ops | [mindspore.ops.operations.ScalarToTensor](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScalarToTensor) | Supported | Supported | Supported | array_ops -| [mindspore.ops.operations.InvertPermutation](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.InvertPermutation) | Supported | Doing | Doing | array_ops +| [mindspore.ops.operations.InvertPermutation](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.InvertPermutation) | Supported | Supported | Doing | array_ops | [mindspore.ops.operations.Argmax](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Argmax) | Supported | Supported | Supported | array_ops | [mindspore.ops.operations.Argmin](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Argmin) | Supported | Doing | Doing | array_ops | [mindspore.ops.operations.ArgMaxWithValue](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ArgMaxWithValue) | Supported | Supported | Doing | array_ops @@ -327,8 +332,8 @@ | [mindspore.ops.operations.StandardNormal](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.StandardNormal) | Supported | Supported | Doing | random_ops | [mindspore.ops.operations.Gamma](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Gamma) | Supported | Doing | Doing | random_ops | [mindspore.ops.operations.Poisson](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Poisson) | Supported | Doing | Doing | random_ops -| [mindspore.ops.operations.UniformInt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformInt) | Supported | Doing | Doing | random_ops -| [mindspore.ops.operations.UniformReal](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformReal) | Supported | Doing | Doing | random_ops +| [mindspore.ops.operations.UniformInt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformInt) | Supported | Supported | Doing | random_ops +| [mindspore.ops.operations.UniformReal](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformReal) | Supported | Supported | Doing | random_ops | [mindspore.ops.operations.RandomChoiceWithMask](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.RandomChoiceWithMask) | Doing| Supported | Doing | random_ops | [mindspore.ops.operations.RandomCategorical](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.RandomCategorical) | Supported| Doing | Doing | random_ops | [mindspore.ops.operations.ScalarCast](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScalarCast) | Supported | Supported | Supported | inner_ops @@ -338,6 +343,42 @@ | [mindspore.ops.operations.Xdivy](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Xdivy) | Supported | Doing | Doing | math_ops | [mindspore.ops.operations.Xlogy](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Xlogy) | Supported | Doing | Doing | math_ops +## mindspore.ops.functional + +| Operation | functional Operation +| :----------- | :----------- +| [mindspore.ops.operations.Pack](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pack) | pack +| [mindspore.ops.operations.TensorAdd](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.TensorAdd) | tensor_add +| [mindspore.ops.operations.AssignSub](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.AssignSub) | assign_sub +| [mindspore.ops.operations.AddN](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.AddN) | addn +| [mindspore.ops.operations.Square](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Square) | square +| [mindspore.ops.operations.Sqrt](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Sqrt) | sqrt +| [mindspore.ops.operations.Equal](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Equal) | equal +| [mindspore.ops.operations.NotEqual](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.NotEqual) | not_equal +| [mindspore.ops.operations.LogicalNot](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.LogicalNot) | logical_not +| [mindspore.ops.operations.LogicalAnd](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.LogicalAnd) | logical_and +| [mindspore.ops.operations.LogicalOr](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.LogicalOr) | logical_or +| [mindspore.ops.operations.ExpandDims](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ExpandDims) | expand_dims +| [mindspore.ops.operations.DType](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.DType) | dtype +| [mindspore.ops.operations.Cast](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Cast) | cast +| [mindspore.ops.operations.Reshape](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Reshape) | reshape +| [mindspore.ops.operations.Shape](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Shape) | shape +| [mindspore.ops.operations.GatherV2](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.GatherV2) | gather +| [mindspore.ops.operations.Rank](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Rank) | rank +| [mindspore.ops.operations.Size](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Size) | size +| [mindspore.ops.operations.Fill](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Fill) | fill +| [mindspore.ops.operations.OnesLike](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.OnesLike) | ones_like +| [mindspore.ops.operations.Tile](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Tile) | tile +| [mindspore.ops.operations.Select](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Select) | select +| [mindspore.ops.operations.ScatterNd](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScatterNd) | scatter_nd +| [mindspore.ops.operations.GatherNd](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.GatherNd) | gather_nd +| [mindspore.ops.operations.ControlDepend](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ControlDepend) | control_depend +| [mindspore.ops.operations.Print](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Print) | print +| [mindspore.ops.operations.Assign](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Assign) | assign +| [mindspore.ops.operations.Pow](https://www.mindspore.cn/api/en/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pow) | tensor_pow + +> At present, functional supports some operators without attributes, which will be further completed in the future. + ## Implicit Type Conversion ### conversion rules * Scalar and Tensor operations: during operation, the scalar is automatically converted to Tensor, and the data type is consistent with the Tensor data type involved in the operation; diff --git a/docs/source_zh_cn/FAQ.md b/docs/source_zh_cn/FAQ.md new file mode 100644 index 0000000000000000000000000000000000000000..1698974891db256511ea819d05b944e46ef69ae7 --- /dev/null +++ b/docs/source_zh_cn/FAQ.md @@ -0,0 +1,263 @@ +# FAQ + +`Ascend` `GPU` `CPU` `环境准备` `模型导出` `模型训练` `初级` `中级` `高级` + + + +- [FAQ](#faq) + - [安装类](#安装类) + - [pip安装](#pip安装) + - [源码编译安装](#源码编译安装) + - [环境变量](#环境变量) + - [安装验证](#安装验证) + - [算子支持](#算子支持) + - [网络模型](#网络模型) + - [平台系统](#平台系统) + - [后端运行](#后端运行) + - [编程语言拓展](#编程语言拓展) + - [特性支持](#特性支持) + + + + +## 安装类 + +### pip安装 +Q:使用pip安装时报错:`SSL:CERTIFICATE_VERIFY_FATLED`应该怎么办? + +A:在pip安装命令后添加参数 `--trusted-host=ms-release.obs.cn-north-4.myhuaweicloud.com`重试即可。 + +
+ +Q:pip安装MindSpore对Python版本是否有特别要求? + +A:MindSpore开发过程中用到了Python3.7+的新特性,因此建议您通过`conda`工具添加Python3.7.5的开发环境。 + +
+ +Q:使用pip安装时报错`ProxyError(Cannot connect to proxy)`,应该怎么办? + +A:此问题一般是代理配置问题,Ubuntu环境下可通过`export http_proxy={your_proxy}`设置代理;Windows环境可以在cmd中通过`set http_proxy={your_proxy}`进行代理设置。 + +
+ +Q:使用pip安装时提示错误,应该怎么办? + +A:请执行`pip -V`查看是否绑定了Python3.7+。如果绑定的版本不对,建议使用`python3.7 -m pip install`代替`pip install`命令。 + +
+ +Q:MindSpore网站安装页面找不到MindInsight和MindArmour的whl包,无法安装怎么办? + +A:您可以从[MindSpore网站下载地址](https://www.mindspore.cn/versions)下载whl包,通过`pip install`命令进行安装。 + +### 源码编译安装 + +Q:源码编译MindSpore过程时间过长,或时常中断该怎么办? + +A:MindSpore通过submodule机制引入第三方依赖包,其中`protobuf`依赖包(v3.8.0)下载速度不稳定,建议您提前进行包缓存。 + +
+ +Q:如何改变第三方依赖库安装路径? + +A:第三方依赖库的包默认安装在build/mindspore/.mslib目录下,可以设置环境变量MSLIBS_CACHE_PATH来改变安装目录,比如 `export MSLIBS_CACHE_PATH = ~/.mslib`。 + +
+ +Q:MindSpore要求的配套软件版本与Ubuntu默认版本不一致怎么办? + +A:当前MindSpore只提供版本配套关系,需要您手动进行配套软件的安装升级。(**注明**:MindSpore要求Python3.7.5和gcc7.3,Ubuntu 16.04默认为Python3.5和gcc5,Ubuntu 18.04默认自带Python3.7.3和gcc7.4)。 + +
+ +Q:当源码编译MindSpore,提示`tclsh not found`时,应该怎么办? + +A:当有此提示时说明要用户安装`tclsh`;如果仍提示缺少其他软件,同样需要安装其他软件。 + +### 环境变量 + +Q: 一些常用的环境变量设置,在新启动的终端窗口中需要重新设置,容易忘记应该怎么办? + +A: 常用的环境变量设置写入到`~/.bash_profile` 或 `~/.bashrc`中,可让环境变量设置在新启动的终端窗口中立即生效。 + +### 安装验证 + +Q:个人电脑CPU环境安装MindSpore后验证代码时报错:`the pointer[session] is null`,具体代码如下,该如何验证是否安装成功呢? +```python +import numpy as np +from mindspore import Tensor +from mindspore.ops import functional as F +import mindspore.context as context + +context.set_context(device_target="Ascend") +x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) +y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) +print(F.tensor_add(x,y)) +``` + +A:CPU硬件平台安装MindSpore后测试是否安装成功,只需要执行命令:`python -c 'import mindspore'`,如果没有显示`No module named 'mindspore'`等错误即安装成功。问题中的验证代码仅用于验证Ascend平台安装是否成功。 + +## 算子支持 + +Q:官网的LSTM示例在Ascend上跑不通 + +A:目前LSTM只支持在GPU和CPU上运行,暂不支持硬件环境,您可以[点击这里](https://www.mindspore.cn/docs/zh-CN/master/operator_list.html)查看算子支持情况。 + +
+ +Q:conv2d设置为(3,10),Tensor[2,2,10,10],在ModelArts上利用Ascend跑,报错:`FM_W+pad_left+pad_right-KW>=strideW`,CPU下不报错。 + +A:这是TBE这个算子的限制,x的width必须大于kernel的width。CPU的这个算子没有这个限制,所以不报错。 + +## 网络模型 + +Q:MindSpore现支持直接读取哪些其他框架的模型?支持哪些格式呢? + +A:MindSpore采用protbuf存储训练参数,无法直接读取其他框架的模型。如果想用其他框架训练好的ckpt文件,可以先把参数读取出来,再调用MindSpore的save_checkpoint接口,就可以保存成MindSpore可以读取的ckpt文件格式了。 + +
+ +Q:MindSpore训练的模型如何在Ascend 310 上使用? + +A:Ascend 310 支持OM模型,所以先导出ONNX或AIR模型,再转化为Ascend 310 支持的OM模型,具体步骤参考[多平台推理](https://www.mindspore.cn/tutorial/zh-CN/master/use/multi_platform_inference.html) + +
+ +Q:MindSpore如何进行参数(如dropout值)修改? + +A:在构造网络的时候可以通过 `if self.training: x = dropput(x)`,验证的时候,执行前设置`network.set_train(mode_false)`,就可以不适用dropout,训练时设置为True就可以使用dropout。 + +
+ +Q:从哪里可以查看MindSpore训练及推理的样例代码或者教程? + +A:可以访问[MindSpore官网教程](https://www.mindspore.cn/tutorial/zh-CN/master/index.html)。 + +
+ +Q:MindSpore支持哪些模型的训练? + +A:MindSpore针对典型场景均有模型训练支持,支持情况详见[Release note](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md)。 + +
+ +Q:MindSpore有哪些现成的推荐类或生成类网络或模型可用? + +A:目前正在开发Wide & Deep、DeepFM、NCF等推荐类模型,NLP领域已经支持Bert_NEZHA,正在开发MASS等模型,用户可根据场景需要改造为生成类网络,可以关注[MindSpore Model Zoo](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)。 + +
+ +Q:MindSpore模型训练代码能有多简单? + +A:除去网络定义,MindSpore提供了Model类的接口,大多数场景只需几行代码就可完成模型训练。 + +## 平台系统 + +Q:Ascend 310 不能安装MindSpore么? + +A:Ascend 310只能用作推理,MindSpore支持在Ascend 910训练,训练出的模型转化为.om模型可用于Ascend 310上推理。 + +
+ +Q:安装运行MindSpore时,是否要求平台有GPU、NPU等计算单元?需要什么硬件支持? + +A:MindSpore当前支持CPU/GPU/Ascend /NPU。目前笔记本电脑或者有GPU的环境,都可以通过Docker镜像来试用。当前MindSpore Model Zoo中有部分模型已经支持GPU的训练和推理,其他模型也在不断地进行完善。在分布式并行训练方面,MindSpore当前支持GPU多卡训练。你可以通过[RoadMap](https://www.mindspore.cn/docs/zh-CN/master/roadmap.html)和项目[Release note](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md)获取最新信息。 + +
+ +Q:针对异构计算单元的支持,MindSpore有什么计划? + +A:MindSpore提供了可插拔式的设备管理接口,其他计算单元(比如FPGA)可快速灵活地实现与MindSpore的对接,欢迎您参与社区进行异构计算后端的开发工作。 + +
+ +Q:MindSpore与ModelArts是什么关系,在ModelArts中能使用MindSpore吗? + +A:ModelArts是华为公有云线上训练及推理平台,MindSpore是华为深度学习框架,可以查阅[MindSpore官网教程](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/use_on_the_cloud.html),教程中详细展示了用户如何使用ModelArts来做MindSpore的模型训练。 + +
+ +Q:MindSpore是否支持Windows 10? + +A:MindSpore CPU版本已经支持在Windows 10系统中安装,具体安装步骤可以查阅[MindSpore官网教程](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/mindspore_cpu_win_install.html)。 + +## 后端运行 + +Q:MindSpore的operation算子报错:`device target [CPU] is not supported in pynative mode` + +A:pynative 模式目前只支持Ascend和GPU,暂时还不支持CPU。 + +
+ +Q:使用ExpandDims算子报错:`Pynative run op ExpandDims failed`。具体代码: + +```python +context.set_context( +mode=cintext.GRAPH_MODE, +device_target='ascend') +input_tensor=Tensor(np.array([[2,2],[2,2]]),mindspore.float32) +expand_dims=P.ExpandDims() +output=expand_dims(input_tensor,0) +``` + +A:这边的问题是选择了Graph模式却使用了PyNative的写法,所以导致报错,MindSpore支持两种运行模式,在调试或者运行方面做了不同的优化: + +- PyNative模式:也称动态图模式,将神经网络中的各个算子逐一下发执行,方便用户编写和调试神经网络模型。 + +- Graph模式:也称静态图模式或者图模式,将神经网络模型编译成一整张图,然后下发执行。该模式利用图优化等技术提高运行性能,同时有助于规模部署和跨平台运行。 +用户可以参考[官网教程](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/debugging_in_pynative_mode.html)选择合适、统一的模式和写法来完成训练。 + +## 编程语言拓展 + +Q:最近出来的taichi编程语言有Python扩展,类似`import taichi as ti`就能直接用了,MindSpore是否也支持? + +A:MindSpore支持Python原生表达,`import mindspore`相关包即可使用。 + +
+ +Q:MindSpore是否(计划)支持多语言扩展? + +A:MindSpore目前支持Python扩展,针对C++、Rust、Julia等语言的支持正在开发中。 + +## 特性支持 + +Q:MindSpore有轻量的端侧推理引擎么? + +A:MindSpore有自己的端侧推理引擎,当前版本中已开源了端侧推理的部分功能。预计8月底会有更新,届时MindSpore端侧推理引擎在易用性,性能,算子完备度,以及第三方模型支持方面会更加完善和强大。 + +
+ +Q:MindSpore在语义协同和处理上是如何实现的?是否利用当前学术界流行的FCA理论? + +A:MindSpore框架本身并不需要支持FCA。对于语义类模型,用户可以调用第三方的工具在数据预处理阶段做FCA数学分析。MindSpore本身支持Python语言,`import FCA`相关包即可使用。 + +
+ +Q:当前在云上MindSpore的训练和推理功能是比较完备的,至于边端场景(尤其是终端设备)MindSpore有什么计划? + +A:MindSpore是端边云统一的训练和推理框架,支持将云侧训练的模型导出到Ascend AI处理器和终端设备进行推理。当前推理阶段支持的优化包括量化、算子融合、内存复用等。 + +
+ +Q:MindSpore自动并行支持情况如何? + +A:自动并行特性对CPU GPU的支持还在完善中。推荐用户在Ascend 910 AI处理器上使用自动并行,可以关注开源社区,申请MindSpore开发者体验环境进行试用。 + +
+ +Q:MindSpore有没有类似基于TensorFlow实现的对象检测算法的模块? + +A:TensorFlow的对象检测Pipeline接口属于TensorFlow Model模块。待MindSpore检测类模型完备后,会提供类似的Pipeline接口。 + +
+ +Q:其他框架的脚本或者模型怎么迁移到MindSpore? + +A:关于脚本或者模型迁移,可以查询MindSpore官网中关于[网络迁移](https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/network_migration.html)的介绍。 + +
+ +Q:MindSpore是否附带开源电商类数据集? + +A:暂时还没有,可以持续关注[MindSpore官网](https://www.mindspore.cn)。 \ No newline at end of file diff --git a/docs/source_zh_cn/design.rst b/docs/source_zh_cn/design.rst index e975826526cc837b0c7071c673638556c7a43db9..5db1a3815235941f9a2a74420fe85dd36e1afad5 100644 --- a/docs/source_zh_cn/design.rst +++ b/docs/source_zh_cn/design.rst @@ -1,14 +1,16 @@ -设计文档 -=========== - -.. toctree:: - :maxdepth: 1 - - architecture - technical_white_paper - design/mindspore/distributed_training_design - design/mindinsight/profiler_design - design/mindinsight/training_visual_design - design/mindinsight/graph_visual_design - design/mindinsight/tensor_visual_design - design/mindarmour/differential_privacy_design +设计文档 +=========== + +.. toctree:: + :maxdepth: 1 + + architecture + technical_white_paper + design/mindspore/ir + design/mindspore/distributed_training_design + design/mindinsight/profiler_design + design/mindinsight/training_visual_design + design/mindinsight/graph_visual_design + design/mindinsight/tensor_visual_design + design/mindarmour/differential_privacy_design + design/mindarmour/fuzzer_design diff --git a/docs/source_zh_cn/design/mindarmour/fuzzer_design.md b/docs/source_zh_cn/design/mindarmour/fuzzer_design.md new file mode 100644 index 0000000000000000000000000000000000000000..022fc5b0fae557ebb62c4544395172c016ab855f --- /dev/null +++ b/docs/source_zh_cn/design/mindarmour/fuzzer_design.md @@ -0,0 +1,70 @@ +# AI模型安全测试 + + +- [AI模型安全测试](#ai模型安全测试) + - [背景](#背景) + - [Fuzzer设计图](#Fuzzer设计图) + - [Fuzzer流程](#Fuzzer流程) + - [代码实现](#代码实现) + - [参考文献](#参考文献) + + + + + +## 背景 + +不同于[传统程序的Fuzz安全测试](https://zhuanlan.zhihu.com/p/43432370),MindArmour针对深度神经网络,提供AI模型安全测试模块Fuzzer。根据神经网络的特点,引入神经元覆盖率[1]的概念,作为Fuzz的测试指导,引导Fuzz朝神经元覆盖率增加的方向生成样本,让输入能够激活更多的神经元,神经元值的分布范围更广,以充分测试DNN,探索不同类型的模型输出结果、模型错误行为。 + +## Fuzzer设计图 + +AI模型安全测试设计图如下。 + +![fuzz_architecture](./images/fuzz_architecture.png) + +在用户接口层,需要用户提供原始数据集`DataSet`、被测试模型`Model`和配置Fuzzer参数`Fuzzer configuration`。Fuzzer模块对模型和数据进行Fuzz测试后,返回安全评估报告`Security Report`。 + +Fuzzer架构主要包括三个模块: + +1. Natural Threat/Adversarial Example Generator(数据变异模块): + + 随机选择变异方法对种子数据变异生成多个变种。支持多种样本的变异策略, 包括: + + - 图像仿射变换方法如:平移、旋转、缩放、错切。 + - 基于图像像素值变化的方法如:改变对比度、亮度、模糊、加噪。 + - 基于对抗攻击的白盒、黑盒对抗样本生成方法,如FGSM、PGD、MDIIM。 + +2. Fuzzer moduler(变异指导模块): + + 对变异生成的数据进行fuzz测试,观察神经元覆盖率的变化情况,如果生成的数据使得神经元覆盖率增加,则加入变异的种子队列,用于下一轮的数据变异。目前支持的神经元覆盖率指标包括KMNC、NBC、SNAC[2]。 + +3. Evaluation(评估模块): + + 评估Fuzzer效果,生成数据的质量,变异方法的强度。支持3个类型5种指标,包括通用评价指标:accuracy,神经元覆盖率指标:kmnc, nbc,snac,对抗攻击评价指标:attack_success_rate。 + +## Fuzzer流程 + +![fuzz_process](./images/fuzz_process.png) + +具体的Fuzzer流程如下: + +1. 根据策略从种子队列中选择一个种子A。 +2. 随机选择变异策略,对种子A进行变异,生成多个变种数据A1,A2... +3. 用目标模型对变种A1,A2...进行预测,如果变种使得目标模型预测错误,则改变种进入Failed tests。 +4. 若目标模型对于变种的预测结果是正确的,用神经元覆盖率指标进行分析。 +5. 如果变种使得覆盖率增加,那么将该变种放入种子队列,用于下一轮变异。 + +通过多轮循环,我们获得一系列变异数据Fuzzed Tests,并进一步分析,从多个角度给出安全报告。可以用于深入分析神经网络模型的缺陷,从而针对这些缺陷,进行模型增强等,改善提升模型的通用性、鲁棒性。 + +## 代码实现 + +1. [fuzzing.py](https://gitee.com/mindspore/mindarmour/blob/master/mindarmour/fuzzing/fuzzing.py):Fuzzer总体流程。 +2. [model_coverage_metrics.py](https://gitee.com/mindspore/mindarmour/blob/master/mindarmour/fuzzing/model_coverage_metrics.py):神经元覆盖率指标,包括KMNC,NBC,SNAC。 +3. [image_transform.py](https://gitee.com/mindspore/mindarmour/blob/master/mindarmour/fuzzing/image_transform.py):图像变异方法,包括基于像素值的变化方法和仿射变化方法。 +4. [adversarial attacks](https://gitee.com/mindspore/mindarmour/tree/master/mindarmour/attacks):对抗样本攻击方法,包含多种黑盒、白盒攻击方法。 + +## 参考文献 + +[1] Pei K, Cao Y, Yang J, et al. Deepxplore: Automated whitebox testing of deep learning systems[C]//Proceedings of the 26th Symposium on Operating Systems Principles. ACM, 2017: 1-18. + +[2]Ma L, Juefei-Xu F, Zhang F, et al. Deepgauge: Multi-granularity testing criteria for deep learning systems[C]//Proceedings of the 33rd ACM/IEEE International Conference on Automated Software Engineering. ACM, 2018: 120-131. \ No newline at end of file diff --git a/docs/source_zh_cn/design/mindarmour/images/fuzz_architecture.png b/docs/source_zh_cn/design/mindarmour/images/fuzz_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..d4e8b89bd9a9f4844c59790f5b2114d1d477f927 Binary files /dev/null and b/docs/source_zh_cn/design/mindarmour/images/fuzz_architecture.png differ diff --git a/docs/source_zh_cn/design/mindarmour/images/fuzz_process.png b/docs/source_zh_cn/design/mindarmour/images/fuzz_process.png new file mode 100644 index 0000000000000000000000000000000000000000..2e04347f7cfb0819562578a6be1e91b5cc7ce9d5 Binary files /dev/null and b/docs/source_zh_cn/design/mindarmour/images/fuzz_process.png differ diff --git a/docs/source_zh_cn/design/mindinsight/images/analyser_class_profiler.png b/docs/source_zh_cn/design/mindinsight/images/analyser_class_profiler.png index 3ece3c9f06b7f2c5f18bec0c490c42aeecdc40c3..3f785786eb8652e8d1cfc09795e48895da80eef9 100644 Binary files a/docs/source_zh_cn/design/mindinsight/images/analyser_class_profiler.png and b/docs/source_zh_cn/design/mindinsight/images/analyser_class_profiler.png differ diff --git a/docs/source_zh_cn/design/mindinsight/images/context_profiler.png b/docs/source_zh_cn/design/mindinsight/images/context_profiler.png index ea927428eeb5c975ab405fa80ce0163e7bd2242c..b3cfd166214f1e37a26c5ebc45b85bd24d52f4bb 100644 Binary files a/docs/source_zh_cn/design/mindinsight/images/context_profiler.png and b/docs/source_zh_cn/design/mindinsight/images/context_profiler.png differ diff --git a/docs/source_zh_cn/design/mindinsight/images/proposer_class_profiler.png b/docs/source_zh_cn/design/mindinsight/images/proposer_class_profiler.png index e5eb10df1fe3c67aff1b04d288eb7537113b6041..3e2d4363e92821b05cafc330573c981a1ab99bbf 100644 Binary files a/docs/source_zh_cn/design/mindinsight/images/proposer_class_profiler.png and b/docs/source_zh_cn/design/mindinsight/images/proposer_class_profiler.png differ diff --git a/docs/source_zh_cn/design/mindinsight/images/proposer_module_profiler.png b/docs/source_zh_cn/design/mindinsight/images/proposer_module_profiler.png index 73be9651a537055d9564fa6acc05eb24ec132ee7..909dd42c89715d49a11c35764d84aab231b91fb4 100644 Binary files a/docs/source_zh_cn/design/mindinsight/images/proposer_module_profiler.png and b/docs/source_zh_cn/design/mindinsight/images/proposer_module_profiler.png differ diff --git a/docs/source_zh_cn/design/mindspore/distributed_training_design.md b/docs/source_zh_cn/design/mindspore/distributed_training_design.md index 3346eb0db4df28c763b6b98554038315d0b00044..ae38fdd6bc47fb2215bcdc931fa0d46c953f9af0 100644 --- a/docs/source_zh_cn/design/mindspore/distributed_training_design.md +++ b/docs/source_zh_cn/design/mindspore/distributed_training_design.md @@ -10,9 +10,11 @@ - [集合通信](#集合通信) - [同步模式](#同步模式) - [数据并行](#数据并行) - - [设计原理](#设计原理) - - [代码实现](#代码实现) - - [其他并行](#其他并行) + - [数据并行原理](#数据并行原理) + - [数据并行代码](#数据并行代码) + - [自动并行](#自动并行) + - [自动并行原理](#自动并行原理) + - [自动并行代码](#自动并行代码) @@ -37,9 +39,9 @@ 这个小节介绍了在MindSpore中`ParallelMode.DATA_PARALLEL`数据并行模式是如何工作的。 -### 设计原理 +### 数据并行原理 -![数据并行图解](./images/data_parallel.png) +
数据并行图解
1. 环境依赖 @@ -61,7 +63,7 @@ 因为引入了梯度聚合操作,所以各卡的模型会以相同的梯度值一起进入参数更新步骤。因此MindSpore实现的是一种同步数据并行训练方式。理论上最终每卡训练出来的模型是相同的,如果网络中含有在样本维度的归约类型操作,网络的输出可能会有所差别,这是由数据并行的切分性质决定的。 -### 代码实现 +### 数据并行代码 1. 集合通信 @@ -73,8 +75,56 @@ - [grad_reducer.py](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/wrap/grad_reducer.py): 这个文件实现了梯度聚合的过程。对入参`grads`用`HyperMap`展开后插入`AllReduce`算子,这里采用的是全局通信组,用户也可以根据自己网络的需求仿照这个模块进行自定义开发。MindSpore中单机和分布式执行共用一套网络封装接口,在`Cell`内部通过`ParallelMode`来区分是否要对梯度做聚合操作,网络封装接口建议参考`TrainOneStepCell`代码实现。 -## 其他并行 +## 自动并行 -建设中,即将上线。 +自动并行作为MindSpore的关键特性,用于实现自动的数据并行加模型并行的混合并行训练方式,旨在帮助用户以单机的脚本表达并行算法逻辑,降低分布式训练难度,提高算法研发效率,同时又能保持训练的高性能。 +### 自动并行原理 + +自动并行架构图 + +1. 通用的张量排布模型 + + 在上面的架构图中,自动并行流程会对单机的正向计算图(ANF Graph)进行遍历,以算子(Distributed Operator)为单位对张量进行切分建模,表示一个算子的输入输出张量如何分布到集群各个卡上(Tensor Layout)。这种模型充分地表达了张量和设备间的映射关系,并且可以通过算法推导得到任意排布的张量间通信转换方式(Tensor Redistribution)。 + + 为了得到张量的排布模型,每个算子都具有切分策略(Parallel Strategy),它表示算子的各个输入在相应维度的切分情况。通常情况下只要满足以2为基、均匀分配的原则,张量的任意维度均可切分。以下图为例,这是一个三维矩阵乘操作,它的切分策略由两个元组构成,分别表示`input`和`weight`的切分形式。其中元组中的元素与张量维度一一对应,`2^N`为切分份数,`1`表示不切。当我们想表示一个数据并行切分策略时,即`input`的`batch`维度切分,其他维度不切,可以表达为`strategy=((2^N, 1, 1),(1, 1, 1))`;当表示一个模型并行切分策略时,即`weight`的`channel`维度切分,其他维度不切,可以表达为`strategy=((1, 1, 1),(1, 1, 2^N))`;当表示一个混合并行切分策略时,可以表达为`strategy=((2^N, 1, 1),(1, 1, 2^N))`。 + 算子切分定义 + + 依据算子的切分策略,框架将自动推导得到算子输入张量和输出张量的排布模型。这个排布模型由`device_matrix`,`tensor_shape`和`tensor map`组成,分别表示设备矩阵形状、张量形状、设备和张量维度间的映射关系。根据排布模型框架可以自动实现对整图的切分,并推导插入算子内张量重复计算及算子间不同排布的张量变换所需要的通信操作。以数据并行转模型并行为例,第一个数据并行矩阵乘的输出在`batch`维度存在切分,而第二个模型并行矩阵乘的输入需要全量张量,框架将会自动插入`AllGather`算子实现排布变换。 + + 张量排布变换 + + 总体来说这种分布式表达打破了数据并行和模型并行的边界,轻松实现混合并行。并且用户无需感知模型各切片放到哪个设备上运行,框架会自动调度分配。从脚本层面上,用户仅需构造单机网络,即可表达并行算法逻辑。 + +2. 高效的并行策略搜索算法 + + 当用户熟悉了算子的切分表达,并手动对算子配置切分策略,这就是`SEMI_AUTO_PARALLEL`半自动并行模式。这种方式对手动调优有帮助,但还是具有一定的调试难度,用户需要掌握并行原理,并根据网络结构、集群拓扑等计算分析得到高性能的并行方案。为了进一步帮助用户加速并行网络训练过程,在半自动并行模式的基础上,`AUTO_PARALLEL`自动并行模式引入了并行切分策略自动搜索的特性。自动并行围绕昇腾AI处理器构建代价函数模型(Cost Model),计算出一定数据量、一定算子在不同切分策略下的计算开销(Computation Cost),内存开销(Memory Cost)及通信开销(Communication Cost)。然后通过动态规划算法(Dynamic Programming),以单卡的内存上限为约束条件,高效地搜索出性能较优的切分策略。 + + 策略搜索这一步骤代替了用户手动指定模型切分,在短时间内可以得到较高性能的切分方案,极大降低了并行训练的使用门槛。 + + +3. 便捷的分布式自动微分 + + 传统的手动模型切分除了需要关注正向网络通信还需要考虑网络反向的并行运算,MindSpore通过将通信操作包装为算子,并利用框架原有的自动微分操作自动生成通信算子反向,所以即便在进行分布式训练时,用户同样只需关注网络的前向传播,真正实现训练的全自动并行。 + +### 自动并行代码 + +1. 张量排布模型 + - [tensor_layout](https://gitee.com/mindspore/mindspore/tree/master/mindspore/ccsrc/frontend/parallel/tensor_layout):这个目录下包含了张量排布模型相关功能的定义及实现。其中`tensor_layout.h`中声明了一个张量排布模型需要具备的成员变量`tensor_map_origin_`,`tensor_shape_`和`device_arrangement_`等。在`tensor_redistribution.h`中声明了实现张量排布间`from_origin_`和`to_origin_`变换的相关方法,将推导得到的重排布操作保存在`operator_list_`中返回,并计算得到重排布所需的通信开销`comm_cost_`, 内存开销`memory_cost_`及计算开销`computation_cost_`。 + +2. 分布式算子 + - [ops_info](https://gitee.com/mindspore/mindspore/tree/master/mindspore/ccsrc/frontend/parallel/ops_info):这个目录下包含了分布式算子的具体实现。在`operator_info.h`中定义了分布式算子实现的基类`OperatorInfo`,开发一个分布式算子需要继承于这个基类并显式实现相关的虚函数。其中`InferTensorInfo`,`InferTensorMap`和`InferDevMatrixShape`函数定义了推导该算子输入、输出张量排布模型的算法。`InferForwardCommunication`,`InferMirrorOps`等函数定义了切分该算子需要插入的额外计算、通信操作。`CheckStrategy`和`GenerateStrategies`函数定义了算子切分策略校验和生成。根据切分策略`SetCostUnderStrategy`将会产生该策略下分布式算子的并行开销值`operator_cost_`。 + +3. 策略搜索算法 + - [auto_parallel](https://gitee.com/mindspore/mindspore/tree/master/mindspore/ccsrc/frontend/parallel/auto_parallel):这个目录下实现了并行策略搜索的算法。`graph_costmodel.h`定义了构图信息,其中每个点表示一个算子`OperatorInfo`,有向边`edge_costmodel.h`表示算子的输入输出关系及重排布的代价。`operator_costmodel.h`中定义了每个算子的代价模型,包括计算代价、通信代价和内存代价。`dp_algorithm_costmodel.h`主要描述了动态规划算法的主要流程,由一系列图操作组成。在`costmodel.h`中定义了cost和图操作的数据结构。 + +4. 设备管理 + - [device_manager.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/ccsrc/frontend/parallel/device_manager.h):这个文件实现了集群设备通信组的创建及管理。其中设备矩阵模型由`device_matrix.h`定义,通信域由`group_manager.h`管理。 + +5. 整图切分 + - [step_auto_parallel.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/ccsrc/frontend/parallel/step_auto_parallel.h), [step_parallel.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/ccsrc/frontend/parallel/step_parallel.h):这两个文件包含了自动并行流程的核心实现。首先由`step_auto_parallel.h`调用策略搜索流程并产生分布式算子的`OperatorInfo`,然后在`step_parallel.h`中处理算子切分和张量重排布等流程,对单机计算图进行分布式改造。 + + +6. 通信算子反向 + - [grad_comm_ops.py](https://gitee.com/mindspore/mindspore/blob/master/mindspore/ops/_grad/grad_comm_ops.py): 这个文件定义了`AllReduce`和`AllGather`等通信算子的反向操作。 diff --git a/docs/source_zh_cn/design/mindspore/images/auto_parallel.png b/docs/source_zh_cn/design/mindspore/images/auto_parallel.png new file mode 100644 index 0000000000000000000000000000000000000000..544e65eee9b5a6ac984ff2315022135ce7cd4456 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/auto_parallel.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/ir/cf.dot b/docs/source_zh_cn/design/mindspore/images/ir/cf.dot new file mode 100644 index 0000000000000000000000000000000000000000..9da78b45beb7ea56365a300e601c79af4a55e130 --- /dev/null +++ b/docs/source_zh_cn/design/mindspore/images/ir/cf.dot @@ -0,0 +1,183 @@ +digraph mindspore { +compound=true +subgraph cluster_0x8b8cc30{ +id=cluster_0x8b8cc30 +label="fibonacci[managed]" +fontname="Courier New" +node0x8bde4b0_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]0)
>,] +node0x8bee780_0[fontname="Courier New",shape=plaintext,label=< + + +
0
CNode([CNode]1)
>,] +node0x8bee900_0[fontname="Courier New",shape=plaintext,label=< + + +
0123
CNode([CNode]2)
>,] +node0x8b702a0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]54)
>,] +node0x8b6db30_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]37)
>,] +node0x8bc0bb0_0[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x8b768b0_1[fontname="Courier New",shape=plaintext,label=<
Primitive
switch
>,] +node0x8b6c9f0_2[fontname="Courier New",shape=oval,label="✓fibonacci",style=filled,fillcolor=palegreen,URL="#cluster_0x8b91500",] +node0x8bd9410_3[fontname="Courier New",shape=plaintext,label=<
Primitive
Partial
>,] +node0x8b85110_4[fontname="Courier New",shape=oval,label="✗fibonacci",style=filled,fillcolor=palegreen,URL="#cluster_0x8bda550",] +node0x8b7bab0_5[fontname="Courier New",shape=octagon,label="n",style=filled,fillcolor=paleturquoise,] +node0x8b76120_29[fontname="Courier New",shape=plaintext,label=<
PrimitivePy
scalar_lt
>,] +node0x8b7bab0_30[fontname="Courier New",shape=octagon,label="n",style=filled,fillcolor=paleturquoise,] +node0x8b90f50_31[fontname="Courier New",shape=plaintext,label=<
Int32Imm
1
>,] +parameters_0x8b8cc30[shape=plaintext label=<
parameters
n
>,];} +subgraph cluster_0x8bda550{ +id=cluster_0x8bda550 +label="✗fibonacci[managed]" +fontname="Courier New" +node0x8b6acd0_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]3)
>,] +node0x8b6dff0_0[fontname="Courier New",shape=plaintext,label=< + + +
0
CNode([CNode]4)
>,] +node0x8b7d410_0[fontname="Courier New",shape=plaintext,label=< + + +
0123
CNode([CNode]5)
>,] +node0x8b83a80_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]55)
>,] +node0x8b8c2a0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]35)
>,] +node0x8b62c70_6[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x8bbe5f0_7[fontname="Courier New",shape=plaintext,label=<
Primitive
switch
>,] +node0x8b8a0f0_8[fontname="Courier New",shape=oval,label="✓✗fibonacci",style=filled,fillcolor=palegreen,URL="#cluster_0x8b64c50",] +node0x8b8dbb0_9[fontname="Courier New",shape=plaintext,label=<
Primitive
Partial
>,] +node0x8bc0680_10[fontname="Courier New",shape=oval,label="✗✗fibonacci",style=filled,fillcolor=palegreen,URL="#cluster_0x8bedfe0",] +node0x8b76290_11[fontname="Courier New",shape=octagon,label="n",style=filled,fillcolor=paleturquoise,] +node0x8b90c20_24[fontname="Courier New",shape=plaintext,label=<
PrimitivePy
scalar_eq
>,] +node0x8b76290_25[fontname="Courier New",shape=octagon,label="n",style=filled,fillcolor=paleturquoise,] +node0x8b7da70_26[fontname="Courier New",shape=plaintext,label=<
Int32Imm
1
>,] +parameters_0x8bda550[shape=plaintext label=<
parameters
n
>,];} +subgraph cluster_0x8bedfe0{ +id=cluster_0x8bedfe0 +label="✗✗fibonacci[managed]" +fontname="Courier New" +node0x8b8e4a0_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]6)
>,] +node0x8bb9b70_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]33)
>,] +node0x8b7d610_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]8)
>,] +node0x8beae20_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]31)
>,] +node0x8b76cd0_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]11)
>,] +node0x8b849b0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]31)
>,] +node0x8b85200_12[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x8b84310_13[fontname="Courier New",shape=plaintext,label=<
PrimitivePy
scalar_add
>,] +node0x8bc14b0_14[fontname="Courier New",shape=oval,label="fibonacci",style=filled,fillcolor=palegreen,URL="#cluster_0x8b8cc30",] +node0x8b8d2e0_15[fontname="Courier New",shape=plaintext,label=<
PrimitivePy
scalar_sub
>,] +node0x8bbc810_16[fontname="Courier New",shape=octagon,label="n",style=filled,fillcolor=paleturquoise,] +node0x8b8d3d0_17[fontname="Courier New",shape=plaintext,label=<
Int32Imm
2
>,] +node0x8bd5920_18[fontname="Courier New",shape=oval,label="fibonacci",style=filled,fillcolor=palegreen,URL="#cluster_0x8b8cc30",] +node0x8bc15a0_19[fontname="Courier New",shape=plaintext,label=<
PrimitivePy
scalar_sub
>,] +node0x8bbc810_20[fontname="Courier New",shape=octagon,label="n",style=filled,fillcolor=paleturquoise,] +node0x8b83990_21[fontname="Courier New",shape=plaintext,label=<
Int32Imm
1
>,] +parameters_0x8bedfe0[shape=plaintext label=<
parameters
n
>,];} +subgraph cluster_0x8b64c50{ +id=cluster_0x8b64c50 +label="✓✗fibonacci[managed]" +fontname="Courier New" +node0x8be8e20_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]15)
>,] +node0x8bd5440_22[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x8b89ee0_23[fontname="Courier New",shape=plaintext,label=<
Int32Imm
1
>,] +parameters_0x8b64c50[shape=plaintext label=<
parameters
>,];} +subgraph cluster_0x8b91500{ +id=cluster_0x8b91500 +label="✓fibonacci[managed]" +fontname="Courier New" +node0x8bdacb0_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]18)
>,] +node0x8b7d900_27[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x8bb9e90_28[fontname="Courier New",shape=plaintext,label=<
Int32Imm
0
>,] +parameters_0x8b91500[shape=plaintext label=<
parameters
>,];} +node0x8bc0bb0_0:core->node0x8bde4b0_0:0[arrowhead=vee,style=dashed] +node0x8bee780_0:core->node0x8bde4b0_0:1[arrowhead=vee,] +node0x8bee900_0:core->node0x8bee780_0:0[arrowhead=vee,] +node0x8b768b0_1:core->node0x8bee900_0:0[arrowhead=vee,style=dashed] +node0x8b6db30_0:core->node0x8bee900_0:1[arrowhead=vee,] +node0x8b6c9f0_2->node0x8bee900_0:2[arrowhead=vee,] +node0x8b6c9f0_2->node0x8bdacb0_0[lhead=cluster_0x8b91500,dir=both,arrowhead=dot,style=filled,color=blue] +node0x8b702a0_0:core->node0x8bee900_0:3[arrowhead=vee,] +node0x8bd9410_3:core->node0x8b702a0_0:0[arrowhead=vee,style=dashed] +node0x8b85110_4->node0x8b702a0_0:1[arrowhead=vee,] +node0x8b85110_4->node0x8b6acd0_0[lhead=cluster_0x8bda550,dir=both,arrowhead=dot,style=filled,color=blue] +node0x8b7bab0_5->node0x8b702a0_0:2[arrowhead=vee,] +node0x8b62c70_6:core->node0x8b6acd0_0:0[arrowhead=vee,style=dashed] +node0x8b6dff0_0:core->node0x8b6acd0_0:1[arrowhead=vee,] +node0x8b7d410_0:core->node0x8b6dff0_0:0[arrowhead=vee,] +node0x8bbe5f0_7:core->node0x8b7d410_0:0[arrowhead=vee,style=dashed] +node0x8b8c2a0_0:core->node0x8b7d410_0:1[arrowhead=vee,] +node0x8b8a0f0_8->node0x8b7d410_0:2[arrowhead=vee,] +node0x8b8a0f0_8->node0x8be8e20_0[lhead=cluster_0x8b64c50,dir=both,arrowhead=dot,style=filled,color=blue] +node0x8b83a80_0:core->node0x8b7d410_0:3[arrowhead=vee,] +node0x8b8dbb0_9:core->node0x8b83a80_0:0[arrowhead=vee,style=dashed] +node0x8bc0680_10->node0x8b83a80_0:1[arrowhead=vee,] +node0x8bc0680_10->node0x8b8e4a0_0[lhead=cluster_0x8bedfe0,dir=both,arrowhead=dot,style=filled,color=blue] +node0x8b76290_11->node0x8b83a80_0:2[arrowhead=vee,] +node0x8b85200_12:core->node0x8b8e4a0_0:0[arrowhead=vee,style=dashed] +node0x8bb9b70_0:core->node0x8b8e4a0_0:1[arrowhead=vee,] +node0x8b84310_13:core->node0x8bb9b70_0:0[arrowhead=vee,style=dashed] +node0x8b76cd0_0:core->node0x8bb9b70_0:1[arrowhead=vee,] +node0x8b7d610_0:core->node0x8bb9b70_0:2[arrowhead=vee,] +node0x8bc14b0_14->node0x8b7d610_0:0[arrowhead=vee,style=dashed] +node0x8bc14b0_14->node0x8bde4b0_0[lhead=cluster_0x8b8cc30,dir=both,arrowhead=dot,style=filled,color=blue] +node0x8beae20_0:core->node0x8b7d610_0:1[arrowhead=vee,] +node0x8b8d2e0_15:core->node0x8beae20_0:0[arrowhead=vee,style=dashed] +node0x8bbc810_16->node0x8beae20_0:1[arrowhead=vee,] +node0x8b8d3d0_17:core->node0x8beae20_0:2[arrowhead=vee,] +node0x8bd5920_18->node0x8b76cd0_0:0[arrowhead=vee,style=dashed] +node0x8bd5920_18->node0x8bde4b0_0[lhead=cluster_0x8b8cc30,dir=both,arrowhead=dot,style=filled,color=blue] +node0x8b849b0_0:core->node0x8b76cd0_0:1[arrowhead=vee,] +node0x8bc15a0_19:core->node0x8b849b0_0:0[arrowhead=vee,style=dashed] +node0x8bbc810_20->node0x8b849b0_0:1[arrowhead=vee,] +node0x8b83990_21:core->node0x8b849b0_0:2[arrowhead=vee,] +node0x8bd5440_22:core->node0x8be8e20_0:0[arrowhead=vee,style=dashed] +node0x8b89ee0_23:core->node0x8be8e20_0:1[arrowhead=vee,] +node0x8b90c20_24:core->node0x8b8c2a0_0:0[arrowhead=vee,style=dashed] +node0x8b76290_25->node0x8b8c2a0_0:1[arrowhead=vee,] +node0x8b7da70_26:core->node0x8b8c2a0_0:2[arrowhead=vee,] +node0x8b7d900_27:core->node0x8bdacb0_0:0[arrowhead=vee,style=dashed] +node0x8bb9e90_28:core->node0x8bdacb0_0:1[arrowhead=vee,] +node0x8b76120_29:core->node0x8b6db30_0:0[arrowhead=vee,style=dashed] +node0x8b7bab0_30->node0x8b6db30_0:1[arrowhead=vee,] +node0x8b90f50_31:core->node0x8b6db30_0:2[arrowhead=vee,] +} diff --git a/docs/source_zh_cn/design/mindspore/images/ir/cf.png b/docs/source_zh_cn/design/mindspore/images/ir/cf.png new file mode 100644 index 0000000000000000000000000000000000000000..196be66c223022c34fe34848d30c10985efa94c7 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/ir/cf.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/ir/closure.dot b/docs/source_zh_cn/design/mindspore/images/ir/closure.dot new file mode 100644 index 0000000000000000000000000000000000000000..fd3d402bf48b3410e2d92964c1e1ef5e289dda40 --- /dev/null +++ b/docs/source_zh_cn/design/mindspore/images/ir/closure.dot @@ -0,0 +1,93 @@ +digraph mindspore { +compound=true +subgraph cluster_0x19e608f0{ +id=cluster_0x19e608f0 +label="ms_closure[managed]" +fontname="Courier New" +node0x19269490_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]0)
>,] +node0x1976cf00_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]1)
>,] +node0x1963d630_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode(out2)
>,] +node0x196d87f0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode(closure)
>,] +node0x196c2270_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode(out1)
>,] +node0x19e328a0_0[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x19e5e7c0_1[fontname="Courier New",shape=plaintext,label=<
DoSignaturePrimitive
S-Prim-make_tuple
>,] +node0x19b6a3d0_2[fontname="Courier New",shape=plaintext,label=<
Int32Imm
2
>,] +node0x19e68e20_3[fontname="Courier New",shape=oval,label="func_outer[func_outer]",style=filled,fillcolor=palegreen,URL="#cluster_0x19e63830",] +node0x19e38e00_4[fontname="Courier New",shape=plaintext,label=<
Int32Imm
1
>,] +node0x19e23c10_5[fontname="Courier New",shape=plaintext,label=<
Int32Imm
2
>,] +node0x19e1c020_14[fontname="Courier New",shape=plaintext,label=<
Int32Imm
1
>,] +parameters_0x19e608f0[shape=plaintext label=<
parameters
>,];} +subgraph cluster_0x19e63830{ +id=cluster_0x19e63830 +label="func_outer[managed]" +fontname="Courier New" +node0x19e69550_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]5)
>,] +node0x19e68f90_6[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x19e69100_7[fontname="Courier New",shape=oval,label="func_inner",style=filled,fillcolor=palegreen,URL="#cluster_0x19e64130",] +node0x19e035b0_12[fontname="Courier New",shape=octagon,label="a",style=filled,fillcolor=paleturquoise,] +node0x19e036b0_13[fontname="Courier New",shape=octagon,label="b",style=filled,fillcolor=paleturquoise,] +parameters_0x19e63830[shape=plaintext label=<
parameters
a
b
>,];} +subgraph cluster_0x19e64130{ +id=cluster_0x19e64130 +label="func_inner[managed]" +fontname="Courier New" +node0x19e68c80_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]6)
>,] +node0x19e68ae0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]7)
>,] +node0x19e682c0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]8)
>,] +node0x19e50a00_8[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x19c7ced0_9[fontname="Courier New",shape=plaintext,label=<
DoSignaturePrimitive
S-Prim-add
>,] +node0x19e645e0_10[fontname="Courier New",shape=octagon,label="c",style=filled,fillcolor=paleturquoise,] +node0x19e68790_11[fontname="Courier New",shape=plaintext,label=<
DoSignaturePrimitive
S-Prim-add
>,] +parameters_0x19e64130[shape=plaintext label=<
parameters
c
>,];} +node0x19e328a0_0:core->node0x19269490_0:0[arrowhead=vee,style=dashed] +node0x1976cf00_0:core->node0x19269490_0:1[arrowhead=vee,] +node0x19e5e7c0_1:core->node0x1976cf00_0:0[arrowhead=vee,style=dashed] +node0x196c2270_0:core->node0x1976cf00_0:1[arrowhead=vee,] +node0x1963d630_0:core->node0x1976cf00_0:2[arrowhead=vee,] +node0x196d87f0_0:core->node0x1963d630_0:0[arrowhead=vee,style=dashed] +node0x19b6a3d0_2:core->node0x1963d630_0:1[arrowhead=vee,] +node0x19e68e20_3->node0x196d87f0_0:0[arrowhead=vee,style=dashed] +node0x19e68e20_3->node0x19e69550_0[lhead=cluster_0x19e63830,dir=both,arrowhead=dot,style=filled,color=blue] +node0x19e38e00_4:core->node0x196d87f0_0:1[arrowhead=vee,] +node0x19e23c10_5:core->node0x196d87f0_0:2[arrowhead=vee,] +node0x19e68f90_6:core->node0x19e69550_0:0[arrowhead=vee,style=dashed] +node0x19e69100_7->node0x19e69550_0:1[arrowhead=vee,] +node0x19e69100_7->node0x19e68c80_0[lhead=cluster_0x19e64130,dir=both,arrowhead=dot,style=filled,color=blue] +node0x19e50a00_8:core->node0x19e68c80_0:0[arrowhead=vee,style=dashed] +node0x19e68ae0_0:core->node0x19e68c80_0:1[arrowhead=vee,] +node0x19c7ced0_9:core->node0x19e68ae0_0:0[arrowhead=vee,style=dashed] +node0x19e682c0_0:core->node0x19e68ae0_0:1[arrowhead=vee,] +node0x19e645e0_10->node0x19e68ae0_0:2[arrowhead=vee,] +node0x19e68790_11:core->node0x19e682c0_0:0[arrowhead=vee,style=dashed] +node0x19e035b0_12->node0x19e682c0_0:1[arrowhead=vee,] +node0x19e036b0_13->node0x19e682c0_0:2[arrowhead=vee,] +node0x196d87f0_0:core->node0x196c2270_0:0[arrowhead=vee,style=dashed] +node0x19e1c020_14:core->node0x196c2270_0:1[arrowhead=vee,] +} diff --git a/docs/source_zh_cn/design/mindspore/images/ir/closure.png b/docs/source_zh_cn/design/mindspore/images/ir/closure.png new file mode 100644 index 0000000000000000000000000000000000000000..6a618dd46d4bceeabb0b68ddbd187babc24a16aa Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/ir/closure.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/ir/hof.dot b/docs/source_zh_cn/design/mindspore/images/ir/hof.dot new file mode 100644 index 0000000000000000000000000000000000000000..c0102eef9d47393572e241610f6dd1a2c303fb57 --- /dev/null +++ b/docs/source_zh_cn/design/mindspore/images/ir/hof.dot @@ -0,0 +1,85 @@ +digraph mindspore { +compound=true +subgraph cluster_0x1b3c23b0{ +id=cluster_0x1b3c23b0 +label="hof[managed]" +fontname="Courier New" +node0x1b32ae50_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]0)
>,] +node0x1b064930_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode(res)
>,] +node0x1b3c0040_0[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x1b3bfbf0_1[fontname="Courier New",shape=oval,label="g",style=filled,fillcolor=palegreen,URL="#cluster_0x1b3be6c0",] +node0x1b3bfed0_2[fontname="Courier New",shape=oval,label="f",style=filled,fillcolor=palegreen,URL="#cluster_0x1b3c50c0",] +node0x1b3c6870_3[fontname="Courier New",shape=octagon,label="x",style=filled,fillcolor=paleturquoise,] +parameters_0x1b3c23b0[shape=plaintext label=<
parameters
x
>,];} +subgraph cluster_0x1b3c50c0{ +id=cluster_0x1b3c50c0 +label="f[managed]" +fontname="Courier New" +node0x1ab4e190_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]1)
>,] +node0x1ab61220_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]2)
>,] +node0x1b3c59e0_4[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x1b3bf5e0_5[fontname="Courier New",shape=plaintext,label=<
DoSignaturePrimitive
S-Prim-add
>,] +node0x1b348630_6[fontname="Courier New",shape=octagon,label="x",style=filled,fillcolor=paleturquoise,] +node0x1b3c60f0_7[fontname="Courier New",shape=plaintext,label=<
Int32Imm
3
>,] +parameters_0x1b3c50c0[shape=plaintext label=<
parameters
x
>,];} +subgraph cluster_0x1b3be6c0{ +id=cluster_0x1b3be6c0 +label="g[managed]" +fontname="Courier New" +node0x1b3bfa50_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]4)
>,] +node0x1a9fb8c0_0[fontname="Courier New",shape=plaintext,label=< + + +
012
CNode([CNode]5)
>,] +node0x1a39f7a0_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]6)
>,] +node0x1a4daa20_0[fontname="Courier New",shape=plaintext,label=< + + +
01
CNode([CNode]7)
>,] +node0x1b3adfd0_8[fontname="Courier New",shape=plaintext,label=<
Primitive
return
>,] +node0x1b3c2920_9[fontname="Courier New",shape=plaintext,label=<
DoSignaturePrimitive
S-Prim-mul
>,] +node0x1b3120e0_10[fontname="Courier New",shape=octagon,label="function",style=filled,fillcolor=paleturquoise,] +node0x1b3121e0_11[fontname="Courier New",shape=octagon,label="x",style=filled,fillcolor=paleturquoise,] +node0x1b3120e0_12[fontname="Courier New",shape=octagon,label="function",style=filled,fillcolor=paleturquoise,] +node0x1b3121e0_13[fontname="Courier New",shape=octagon,label="x",style=filled,fillcolor=paleturquoise,] +parameters_0x1b3be6c0[shape=plaintext label=<
parameters
function
x
>,];} +node0x1b3c0040_0:core->node0x1b32ae50_0:0[arrowhead=vee,style=dashed] +node0x1b064930_0:core->node0x1b32ae50_0:1[arrowhead=vee,] +node0x1b3bfbf0_1->node0x1b064930_0:0[arrowhead=vee,style=dashed] +node0x1b3bfbf0_1->node0x1b3bfa50_0[lhead=cluster_0x1b3be6c0,dir=both,arrowhead=dot,style=filled,color=blue] +node0x1b3bfed0_2->node0x1b064930_0:1[arrowhead=vee,] +node0x1b3bfed0_2->node0x1ab4e190_0[lhead=cluster_0x1b3c50c0,dir=both,arrowhead=dot,style=filled,color=blue] +node0x1b3c6870_3->node0x1b064930_0:2[arrowhead=vee,] +node0x1b3c59e0_4:core->node0x1ab4e190_0:0[arrowhead=vee,style=dashed] +node0x1ab61220_0:core->node0x1ab4e190_0:1[arrowhead=vee,] +node0x1b3bf5e0_5:core->node0x1ab61220_0:0[arrowhead=vee,style=dashed] +node0x1b348630_6->node0x1ab61220_0:1[arrowhead=vee,] +node0x1b3c60f0_7:core->node0x1ab61220_0:2[arrowhead=vee,] +node0x1b3adfd0_8:core->node0x1b3bfa50_0:0[arrowhead=vee,style=dashed] +node0x1a9fb8c0_0:core->node0x1b3bfa50_0:1[arrowhead=vee,] +node0x1b3c2920_9:core->node0x1a9fb8c0_0:0[arrowhead=vee,style=dashed] +node0x1a4daa20_0:core->node0x1a9fb8c0_0:1[arrowhead=vee,] +node0x1a39f7a0_0:core->node0x1a9fb8c0_0:2[arrowhead=vee,] +node0x1b3120e0_10->node0x1a39f7a0_0:0[arrowhead=vee,style=dashed] +node0x1b3121e0_11->node0x1a39f7a0_0:1[arrowhead=vee,] +node0x1b3120e0_12->node0x1a4daa20_0:0[arrowhead=vee,style=dashed] +node0x1b3121e0_13->node0x1a4daa20_0:1[arrowhead=vee,] +} diff --git a/docs/source_zh_cn/design/mindspore/images/ir/hof.png b/docs/source_zh_cn/design/mindspore/images/ir/hof.png new file mode 100644 index 0000000000000000000000000000000000000000..b7aed07a68798c31561de9461c94814ecec17d33 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/ir/hof.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/ir/ir.dot b/docs/source_zh_cn/design/mindspore/images/ir/ir.dot new file mode 100644 index 0000000000000000000000000000000000000000..50faab23bdcd63c5199303cb8fdcbef1ccb3163c --- /dev/null +++ b/docs/source_zh_cn/design/mindspore/images/ir/ir.dot @@ -0,0 +1,73 @@ +digraph mindspore { +compound=true +subgraph cluster_0x55c9669c3c70{ +id=cluster_0x55c9669c3c70 +label="test_f" +fontname="HuaweiSans" +node0x55c9669c6cc0_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
01
CNode([CNode]0)
>,] +node0x55c9669c66a0_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
012
CNode(c)
>,] +node0x55c9669c6960_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
012
CNode([CNode]1)
>,] +node0x55c9669c58a0_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
012
CNode(b)
>,] +node0x55c9669c4fb0_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
012
CNode(a)
>,] +node0x55c9669c6b60_0[fontname="HuaweiSans",shape=plaintext,label=<
Primitive
return
>,] +node0x55c9669c9720_1[fontname="HuaweiSans",shape=plaintext,label=<
MultitypeFuncGraph
mul
>,] +node0x55c9669c9dd0_2[fontname="HuaweiSans",shape=oval,label="func",style=filled,fillcolor="palegreen",URL="#cluster_0x55c9669c7310",] +node0x55c9669c9800_3[fontname="HuaweiSans",shape=plaintext,label=<
MultitypeFuncGraph
add
>,] +node0x55c9669c4430_4[fontname="HuaweiSans",shape=octagon,label="y",style=filled,fillcolor=paleturquoise,] +node0x55c9669c9e80_5[fontname="HuaweiSans",shape=plaintext,label=<
MultitypeFuncGraph
sub
>,] +node0x55c9669c3fc0_6[fontname="HuaweiSans",shape=octagon,label="x",style=filled,fillcolor=paleturquoise,] +node0x55c96692eeb0_7[fontname="HuaweiSans",shape=plaintext,label=<
Int32Imm
1
>,] +parameters_0x55c9669c3c70[shape=plaintext label=<
parameters
x
y
>,];} +subgraph cluster_0x55c9669c7310{ +id=cluster_0x55c9669c7310 +label="func" +fontname="HuaweiSans" +node0x55c9669cc740_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
01
CNode([CNode]7)
>,] +node0x55c9669cc5c0_0[fontname="HuaweiSans",shape=plaintext,label=< + + +
012
CNode([CNode]8)
>,] +node0x55c9669cafc0_8[fontname="HuaweiSans",shape=plaintext,label=<
Primitive
return
>,] +node0x55c9669cc930_9[fontname="HuaweiSans",shape=plaintext,label=<
MultitypeFuncGraph
div
>,] +node0x55c9669cab20_10[fontname="HuaweiSans",shape=octagon,label="x",style=filled,fillcolor=paleturquoise,] +node0x55c9669cacf0_11[fontname="HuaweiSans",shape=octagon,label="y",style=filled,fillcolor=paleturquoise,] +parameters_0x55c9669c7310[shape=plaintext label=<
parameters
x
y
>,];} +node0x55c9669c6b60_0:core->node0x55c9669c6cc0_0:0[arrowhead=vee,style=dashed] +node0x55c9669c66a0_0:core->node0x55c9669c6cc0_0:1[arrowhead=vee,] +node0x55c9669c9720_1:core->node0x55c9669c66a0_0:0[arrowhead=vee,style=dashed] +node0x55c9669c58a0_0:core->node0x55c9669c66a0_0:1[arrowhead=vee,] +node0x55c9669c6960_0:core->node0x55c9669c66a0_0:2[arrowhead=vee,] +node0x55c9669c9dd0_2->node0x55c9669c6960_0:0[arrowhead=vee,style=dashed] +node0x55c9669c9dd0_2->node0x55c9669cc740_0[lhead=cluster_0x55c9669c7310,dir=both,arrowhead=dot,style=filled,color="#444444"] +node0x55c9669c4fb0_0:core->node0x55c9669c6960_0:1[arrowhead=vee,] +node0x55c9669c58a0_0:core->node0x55c9669c6960_0:2[arrowhead=vee,] +node0x55c9669c9800_3:core->node0x55c9669c58a0_0:0[arrowhead=vee,style=dashed] +node0x55c9669c4fb0_0:core->node0x55c9669c58a0_0:1[arrowhead=vee,] +node0x55c9669c4430_4->node0x55c9669c58a0_0:2[arrowhead=vee,] +node0x55c9669c9e80_5:core->node0x55c9669c4fb0_0:0[arrowhead=vee,style=dashed] +node0x55c9669c3fc0_6->node0x55c9669c4fb0_0:1[arrowhead=vee,] +node0x55c96692eeb0_7:core->node0x55c9669c4fb0_0:2[arrowhead=vee,] +node0x55c9669cafc0_8:core->node0x55c9669cc740_0:0[arrowhead=vee,style=dashed] +node0x55c9669cc5c0_0:core->node0x55c9669cc740_0:1[arrowhead=vee,] +node0x55c9669cc930_9:core->node0x55c9669cc5c0_0:0[arrowhead=vee,style=dashed] +node0x55c9669cab20_10->node0x55c9669cc5c0_0:1[arrowhead=vee,] +node0x55c9669cacf0_11->node0x55c9669cc5c0_0:2[arrowhead=vee,] +} diff --git a/docs/source_zh_cn/design/mindspore/images/ir/ir.png b/docs/source_zh_cn/design/mindspore/images/ir/ir.png new file mode 100644 index 0000000000000000000000000000000000000000..364c5de500557324c8af86e4d1c5bc0a8f347bf5 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/ir/ir.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/operator_split.png b/docs/source_zh_cn/design/mindspore/images/operator_split.png new file mode 100644 index 0000000000000000000000000000000000000000..1fe2fda44fc148c7443b5c6dd6f95a3a0a2a1e99 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/operator_split.png differ diff --git a/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png b/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png new file mode 100644 index 0000000000000000000000000000000000000000..86b4630bb52146479ec4c0f766059d22db12bf10 Binary files /dev/null and b/docs/source_zh_cn/design/mindspore/images/tensor_redistribution.png differ diff --git a/docs/source_zh_cn/design/mindspore/ir.md b/docs/source_zh_cn/design/mindspore/ir.md new file mode 100644 index 0000000000000000000000000000000000000000..a9a266e51313b5603592e28d95635314a836ca34 --- /dev/null +++ b/docs/source_zh_cn/design/mindspore/ir.md @@ -0,0 +1,165 @@ +# MindSpore IR(MindIR) + +`框架开发` `中级` `高级` `贡献者` + + + +- [MindSpore IR(MindIR)](#mindspore-irmindir) + - [简介](#简介) + - [文法定义](#文法定义) + - [示例](#示例) + - [如何保存IR](#如何保存ir) + - [函数式语义](#函数式语义) + - [高阶函数](#高阶函数) + - [控制流](#控制流) + - [自由变量和闭包](#自由变量和闭包) + - [参考文献](#参考文献) + + + + + +## 简介 +中间表示(IR)是程序编译过程中介于源语言和目标语言之间的程序表示,以方便编译器进行程序分析和优化,因此IR的设计需要考虑从源语言到目标语言的转换难度,同时考虑程序分析和优化的易用性和性能。 + +MindIR是一种基于图表示的函数式IR,其最核心的目的是服务于自动微分变换。自动微分采用的是基于函数式编程框架的变换方法,因此IR采用了接近于ANF函数式的语义。此外,借鉴Sea of Nodes[1]和Thorin[2]的优秀设计,采用了一种基于显性依赖图的表示方式。 + +## 文法定义 +ANF是函数式编程中常用且简洁的中间表示,其文法定义如下所示: +``` + ::= NUMBER | STRING | VAR | BOOLEAN | PRIMOP + | (lambda (VAR …) ) + ::= ( …) + | (if ) + ::= (let ([VAR ]) ) | | + +``` +ANF中表达式分为原子表达式(aexp)和复合表达式(cexp),原子表达式表示一个常数值或一个变量或一个匿名函数;复合表达式由多个原子表达式复合组成,表示一个匿名函数或原语函数调用,组合的第一个输入是调用的函数,其余输入是调用的参数。 + +MindIR文法继承于ANF,其定义如下所示: +``` + ::= | + ::= Parameter + ::= Scalar | Named | Tensor | Type | Shape + | Primitive | MetaFuncGraph | FuncGraph + ::= ( …) + ::= | +``` +MindIR中的ANode对应于ANF的原子表达式,ANode有两个子类分别为ValueNode和ParameterNode。ValueNode表示常数节点,可承载一个常数值(标量、符号、张量、类型、维度等),也可以是一个原语函数(Primitive)或一个元函数(MetaFuncGraph)或一个普通函数(FuncGraph),因为在函数式编程中函数定义本身也是一个值。ParameterNode是参数节点,表示函数的形参。 + +MindIR中CNode对应于ANF的复合表达式,表示一次函数调用。 + +在MindSpore自动微分时,会计算ParameterNode和CNode的梯度贡献,并返回最终ParameterNode的梯度,而不计算ValueNode的梯度。 + +## 示例 +下面以一段程序作为示例,对比理解MindIR。 +```python +def func(x, y): + return x / y + +@ms_function +def test_f(x, y): + a = x - 1 + b = a + y + c = b * func(a, b) + return c +``` +这段Python代码对应的ANF表达为: +``` +lambda (x, y) + let a = x - 1 in + let b = a + y in + let func = lambda (x, y) + let ret = x / y in + ret end in + let %1 = func(a, b) in + let c = b * %1 in + c end +``` +对应的MindIR为[ir.dot](./images/ir/ir.dot): + +![](./images/ir/ir.png) + +在MindIR中,一个函数图(FuncGraph)表示一个普通函数的定义,函数图一般由ParameterNode、ValueNode和CNode组成有向无环图,可以清晰地表达出从参数到返回值的计算过程。在上图中可以看出,python代码中两个函数`test_f`和`func`转换成了两个函数图,其参数`x`和`y`转换为函数图的ParameterNode,每一个表达式转换为一个CNode。CNode的第一个输入链接着调用的函数,例如图中的`add`、`func`、`return`。值得注意的是这些节点均是`ValueNode`,因为它们被理解为常数函数值。CNode的其他输入链接这调用的参数,参数值可以来自于ParameterNode、ValueNode和其他CNode。 + +在ANF中每个表达式都用let表达式绑定为一个变量,通过对变量的引用来表示对表达式输出的依赖,而在MindIR中每个表达式都绑定为一个节点,通过节点与节点之间的有向边表示依赖关系。 + +## 如何保存IR +通过`context.set_context(save_graphs=True)`来保存各个编译阶段的中间代码。被保存的中间代码有两种格式,一个是后缀名为`.ir`的文本格式,一个是后缀名为`.dot`的图形化格式。当网络规模不大时,建议使用更直观的图形化格式来查看,当网络规模较大时建议使用更高效的文本格式来查看。 + +DOT文件可以通过graphviz转换为图片格式来查看,例如将dot转换为png的命令是`dot -Tpng *.dot -o *.png`。 + +## 函数式语义 +MindIR较传统计算图的一个重要特性是不仅可以表达算子之间的数据依赖,还可以表达丰富的函数式语义。 +### 高阶函数 +在MindIR中,函数的定义是由一个子图来定义,但其本身可以是一个被传递的值,作为其他高阶函数的输入或输出。 +例如下面一个简单的示例中,函数`f`作为参数传入了函数`g`,因此函数`g`是一个接收函数输入的高阶函数,函数`f`真正的调用点是在函数`g`内部。 + +``` +@ms_function +def hof(x): + def f(x): + return x + 3 + def g(function, x): + return function(x) * function(x) + res = g(f, x) + return res +``` + +对应的MindIR为[hof.dot](./images/ir/hof.dot): +![](./images/ir/hof.png) + +在实际网络训练脚本中,自动求导泛函`GradOperation`和优化器中常用到的`Partial`和`HyperMap`都是典型的高阶函数。高阶语义极大地提升了MindSpore表达的灵活性和简洁性。 + +### 控制流 +控制流在MindIR中是以高阶函数选择调用的形式表达。这样的形式把控制流转换为高阶函数的数据流,从而使得自动微分算法更加强大。不仅可以支持数据流的自动微分,还可以支持条件跳转、循环和递归等控制流的自动微分。 + +下面以一个简单的斐波那契用例来演示说明。 +```python +@ms_function +def fibonacci(n): + if(n < 1): + return 0 + elif(n == 1): + return 1 + else: + return fibonacci(n-1) + fibonacci(n-2) +``` + +对应的MindIR为[cf.dot](./images/ir/cf.dot): +![](./images/ir/cf.png) + +其中`fibonacci`是顶层函数图,在顶层中有两个函数图被`switch`选择调用。`✓fibonacci`是第一个`if`的True分支,`✗fibonacci`是第一个`if`的False分支。在`✗fibonacci`中被调用的`✓✗fibonacci`是`elif`的True分支,`✗✗fibonacci`是`elif`的False分支。这里需要理解的关键是在MindIR中,条件跳转和递归是以高阶控制流的形式表达的。例如,`✓fibonacci`和`✗fibonacci`是作为`switch`算子的参数传入,`switch`根据条件参数选择哪一个函数作为返回值。因此,`switch`是把输入的函数当成普通的值做了一个二元选择操作,并没有调用,而真正的函数调用是在紧随`switch`后的CNode上完成。 + + +### 自由变量和闭包 +闭包(closure)是一种编程语言特性,它指的是代码块和作用域环境的结合。自由变量(free variable)是指在代码块中引用作用域环境中的变量而非局部变量。在MindIR中,代码块是以函数图呈现的,而作用域环境可以理解为该函数被调用时的上下文环境,自由变量的捕获方式是值拷贝而非引用。 + +一个典型的闭包用例如下: +```python +@ms_function +def func_outer(a, b): + def func_inner(c): + return a + b + c + return func_inner + +@ms_function +def ms_closure(): + closure = func_outer(1, 2) + out1 = closure(1) + out2 = closure(2) + return out1, out2 +``` + +对应的MindIR为[closure.dot](./images/ir/closure.dot): +![](./images/ir/closure.png) + +在例子中,`a`和`b`是自由变量,因为`func_inner`中变量`a`和`b`是引用的其父图`func_outer`中定义的参数。变量`closure`是一个闭包,它是函数`func_inner`与其上下文`func_outer(1, 2)`的结合。因此,`out1`的结果是4,因为其等价于`1+2+1`,`out2`的结果是5,因为其等价于`1+2+2`。 + +## 参考文献 +[1] C. Click and M. Paleczny. A simple graph-based intermediate representation. +SIGPLAN Not., 30:35–49, March 1995. + +[2] Roland Leißa, Marcel Köster, and Sebastian Hack. A graph-based higher-order intermediate +representation. In Proceedings of the 13th Annual IEEE/ACM International Symposium on +Code Generation and Optimization, pages 202–212. IEEE Computer Society, 2015. \ No newline at end of file diff --git a/docs/source_zh_cn/index.rst b/docs/source_zh_cn/index.rst index c390f290a5e14074a50c6725c0835a5cdcfb7b62..91ddd47bd41b271296d377dcb5044aa6af87df45 100644 --- a/docs/source_zh_cn/index.rst +++ b/docs/source_zh_cn/index.rst @@ -17,5 +17,6 @@ MindSpore文档 operator_list constraints_on_network_construction glossary + FAQ help_seeking_path - community \ No newline at end of file + community diff --git a/docs/source_zh_cn/operator_list.md b/docs/source_zh_cn/operator_list.md index 4bdddafe41c9b6a1206e96c3eed3204bdfa86f8f..6244a7101f59c9705ec876ff3271bc7774026c14 100644 --- a/docs/source_zh_cn/operator_list.md +++ b/docs/source_zh_cn/operator_list.md @@ -5,14 +5,13 @@ - [算子支持](#算子支持) - - [mindspore.nn](#mindsporenn) - - [mindspore.ops.operations](#mindsporeopsoperations) - - [functional 算子](#functional-算子) - - [functional算子列表](#functional算子列表) - - [隐式类型转换](#隐式类型转换) - - [转换规则](#转换规则) - - [参与转换的数据类型](#参与转换的数据类型) - - [支持算子](#支持算子) + - [mindspore.nn](#mindsporenn) + - [mindspore.ops.operations](#mindsporeopsoperations) + - [mindspore.ops.functional](#mindsporeopsfunctional) + - [隐式类型转换](#隐式类型转换) + - [转换规则](#转换规则) + - [参与转换的数据类型](#参与转换的数据类型) + - [支持算子](#支持算子) @@ -28,7 +27,7 @@ | [mindspore.nn.ReLU6](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.ReLU6) |Supported | Supported | Supported |layer/activation | [mindspore.nn.HSwish](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.HSwish) | Doing | Supported | Doing |layer/activation | [mindspore.nn.HSigmoid](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.HSigmoid) | Doing | Supported | Doing |layer/activation -| [mindspore.nn.LeakyReLU](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LeakyReLU) | Supported |Doing | Doing |layer/activation +| [mindspore.nn.LeakyReLU](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LeakyReLU) | Supported |Supported | Doing |layer/activation | [mindspore.nn.Tanh](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Tanh) | Supported | Supported | Doing |layer/activation | [mindspore.nn.GELU](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.GELU) | Supported | Supported | Doing |layer/activation | [mindspore.nn.Sigmoid](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Sigmoid) | Supported |Supported | Doing |layer/activation @@ -46,12 +45,12 @@ | [mindspore.nn.Conv2d](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv2d) | Supported | Supported | Supported |layer/conv | [mindspore.nn.Conv2dTranspose](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv2dTranspose) | Supported | Supported | Doing |layer/conv | [mindspore.nn.Conv2dBnAct](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv2dBnAct) | Supported | Supported | Supported |layer/conv -| [mindspore.nn.Conv1d](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1d) | Supported | Doing | Doing |layer/conv -| [mindspore.nn.Conv1dTranspose](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1dTranspose) | Supported | Doing | Doing |layer/conv -| [mindspore.nn.Embedding](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Embedding) |Doing | Supported | Doing |layer/embedding +| [mindspore.nn.Conv1d](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1d) | Supported | Supported | Doing |layer/conv +| [mindspore.nn.Conv1dTranspose](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Conv1dTranspose) | Supported | Supported | Doing |layer/conv +| [mindspore.nn.Embedding](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Embedding) |Supported | Supported | Doing |layer/embedding | [mindspore.nn.ImageGradients](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.ImageGradients) | Doing |Doing | Doing |layer/image | [mindspore.nn.SSIM](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SSIM) | Supported | Supported | Doing |layer/image -| [mindspore.nn.PSNR](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.PSNR) | Doing |Doing | Doing |layer/image +| [mindspore.nn.PSNR](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.PSNR) | Supported |Doing | Doing |layer/image | [mindspore.nn.CentralCrop](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.CentralCrop) | Supported |Doing | Doing |layer/image | [mindspore.nn.LSTM](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LSTM) | Doing | Supported | Supported |layer/lstm | [mindspore.nn.GlobalBatchNorm](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.GlobalBatchNorm) | Supported |Doing | Doing |layer/normalization @@ -65,11 +64,12 @@ | [mindspore.nn.LinSpace](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LinSpace) | Supported | Doing | Doing | layer/normalization | [mindspore.nn.MaxPool2d](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.MaxPool2d) | Supported | Supported | Supported |layer/pooling | [mindspore.nn.AvgPool2d](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.AvgPool2d) | Supported | Supported | Doing |layer/pooling -| [mindspore.nn.L1Loss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.L1Loss) |Doing |Doing | Doing |loss/loss +| [mindspore.nn.L1Loss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.L1Loss) |Supported |Supported | Doing |loss/loss | [mindspore.nn.MSELoss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.MSELoss) | Supported |Doing | Doing |loss/loss | [mindspore.nn.SmoothL1Loss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SmoothL1Loss) | Supported |Doing | Doing |loss/loss | [mindspore.nn.SoftmaxCrossEntropyWithLogits](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SoftmaxCrossEntropyWithLogits) | Supported | Supported | Doing |loss/loss -| [mindspore.nn.SoftmaxCrossEntropyExpand](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SoftmaxCrossEntropyExpand) | Supported |Doing | Doing |loss/loss +| [mindspore.nn.SoftmaxCrossEntropyExpand](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.SoftmaxCrossEntropyExpand) | Supported |Supported | Doing |loss/loss +| [mindspore.nn.CosineEmbeddingLoss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.CosineEmbeddingLoss) |Supported |Supported | Doing |loss/loss | [mindspore.nn.ProximalAdagrad](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.ProximalAdagrad) | Supported |Doing | Doing |optim/ProximalAdagrad | [mindspore.nn.LazyAdam](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.LazyAdam) | Supported |Doing | Doing |optim/lazyadam | [mindspore.nn.Adam](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.nn.html#mindspore.nn.Adam) | Supported |Doing | Doing |optim/adam @@ -144,7 +144,7 @@ | [mindspore.ops.operations.ApplyProximalAdagrad](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ApplyProximalAdagrad) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.FusedSparseLazyAdam](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FusedSparseLazyAdam) | Doing | Doing | Supported | nn_ops | [mindspore.ops.operations.FusedSparseAdam](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FusedSparseAdam) | Doing | Doing | Supported | nn_ops -| [mindspore.ops.operations.SmoothL1Loss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SmoothL1Loss) | Supported | Doing | Doing | nn_ops +| [mindspore.ops.operations.SmoothL1Loss](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SmoothL1Loss) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.SGD](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.SGD) | Supported | Doing | Doing | nn_ops | [mindspore.ops.operations.LayerNorm](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.LayerNorm) | Supported | Supported | Doing | nn_ops | [mindspore.ops.operations.L2Normalize](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.L2Normalize) | Supported | Doing | Doing | nn_ops @@ -194,7 +194,7 @@ | [mindspore.ops.operations.Minimum](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Minimum) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Maximum](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Maximum) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.RealDiv](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.RealDiv) | Supported | Supported | Doing | math_ops -| [mindspore.ops.operations.Div](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Div) | Supported | Doing | Doing | math_ops +| [mindspore.ops.operations.Div](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Div) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.DivNoNan](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.DivNoNan) | Supported | Doing | Doing | math_ops | [mindspore.ops.operations.FloorDiv](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.FloorDiv) | Supported | Supported | Doing | math_ops | [mindspore.ops.operations.Floor](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Floor) | Supported | Supported | Doing | math_ops @@ -263,7 +263,7 @@ | [mindspore.ops.operations.TupleToArray](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.TupleToArray) | Supported | Supported | Supported | array_ops | [mindspore.ops.operations.ScalarToArray](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScalarToArray) | Supported | Supported | Supported | array_ops | [mindspore.ops.operations.ScalarToTensor](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScalarToTensor) | Supported | Supported | Supported | array_ops -| [mindspore.ops.operations.InvertPermutation](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.InvertPermutation) | Supported | Doing | Doing | array_ops +| [mindspore.ops.operations.InvertPermutation](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.InvertPermutation) | Supported | Supported | Doing | array_ops | [mindspore.ops.operations.Argmax](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Argmax) | Supported | Supported | Supported | array_ops | [mindspore.ops.operations.Argmin](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Argmin) | Supported | Doing | Doing | array_ops | [mindspore.ops.operations.ArgMaxWithValue](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ArgMaxWithValue) | Supported | Supported | Doing | array_ops @@ -332,8 +332,8 @@ | [mindspore.ops.operations.StandardNormal](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.StandardNormal) | Supported | Supported | Doing | random_ops | [mindspore.ops.operations.Gamma](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Gamma) | Supported | Doing | Doing | random_ops | [mindspore.ops.operations.Poisson](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Poisson) | Supported | Doing | Doing | random_ops -| [mindspore.ops.operations.UniformInt](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformInt) | Supported | Doing | Doing | random_ops -| [mindspore.ops.operations.UniformReal](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformReal) | Supported | Doing | Doing | random_ops +| [mindspore.ops.operations.UniformInt](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformInt) | Supported | Supported | Doing | random_ops +| [mindspore.ops.operations.UniformReal](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.UniformReal) | Supported | Supported | Doing | random_ops | [mindspore.ops.operations.RandomChoiceWithMask](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.RandomChoiceWithMask) | Doing| Supported | Doing | random_ops | [mindspore.ops.operations.RandomCategorical](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.RandomCategorical) | Supported| Doing | Doing | random_ops | [mindspore.ops.operations.ScalarCast](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ScalarCast) | Supported | Supported | Supported | inner_ops @@ -343,31 +343,7 @@ | [mindspore.ops.operations.Xdivy](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Xdivy) | Supported | Doing | Doing | math_ops | [mindspore.ops.operations.Xlogy](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Xlogy) | Supported | Doing | Doing | math_ops -## functional 算子 - -为了简化没有属性的算子的调用流程,我们提供了一些算子的functional版本。入参要求参考原算子的输入输出要求。算子本身支持情况可以查询算子支持情况[mindspore.ops.operations](#mindsporeopsoperations)。 - -例如`P.Pow`算子,我们提供了functional版本的`F.pow`算子。可以直接使用 - -```python -import mindspore -from mindspore.ops import operations as P -input_x = mindspore.Tensor(np.array([1.0, 2.0, 4.0]), mindspore.float32) -input_y = 3.0 -pow = P.Pow() -pow(input_x, input_y) -``` - --> - -```python -from mindspore.ops import functional as F -F.pow(input_x, input_y) -``` - -### functional算子列表 - -当前functional支持了一部分没有属性的算子,后续会进一步补齐完整。 +## mindspore.ops.functional | 操作名 | 对应functional算子 | :----------- | :----------- @@ -399,6 +375,9 @@ F.pow(input_x, input_y) | [mindspore.ops.operations.ControlDepend](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.ControlDepend) | control_depend | [mindspore.ops.operations.Print](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Print) | print | [mindspore.ops.operations.Assign](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Assign) | assign +| [mindspore.ops.operations.Pow](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html#mindspore.ops.operations.Pow) | tensor_pow + +> 当前functional支持了一部分没有属性的算子,后续会进一步补齐完整。 ## 隐式类型转换 ### 转换规则 diff --git a/docs/source_zh_cn/roadmap.md b/docs/source_zh_cn/roadmap.md index a985100c4d335268130a07263065889bf6cdf7bb..5887013c6496750c8c570ad0fd8d0b3cc210521c 100644 --- a/docs/source_zh_cn/roadmap.md +++ b/docs/source_zh_cn/roadmap.md @@ -77,4 +77,3 @@ * 在线学习 * 支持部署在IOT设备 * 低比特量化 -* CPU和NPU异构调度 diff --git a/docs/source_zh_cn/technical_white_paper.md b/docs/source_zh_cn/technical_white_paper.md index 1b32c70320151262f45f54d3a481bf89eaf2efa2..b23cfaf698364c3321431ce250af151d2896115b 100644 --- a/docs/source_zh_cn/technical_white_paper.md +++ b/docs/source_zh_cn/technical_white_paper.md @@ -20,4 +20,4 @@ ## 简介 MindSpore作为新一代深度学习框架,是源于全产业的最佳实践,最佳匹配昇腾处理器算力,支持终端、边缘、云全场景灵活部署,开创全新的AI编程范式,降低AI开发门槛。MindSpore是一种全新的深度学习计算框架,旨在实现易开发、高效执行、全场景覆盖三大目标。为了实现易开发的目标,MindSpore采用基于源码转换(Source Code Transformation,SCT)的自动微分(Automatic Differentiation,AD)机制,该机制可以用控制流表示复杂的组合。函数被转换成函数中间表达(Intermediate Representation,IR),中间表达构造出一个能够在不同设备上解析和执行的计算图。在执行前,计算图上应用了多种软硬件协同优化技术,以提升端、边、云等不同场景下的性能和效率。MindSpore支持动态图,更易于检查运行模式。由于采用了基于源码转换的自动微分机制,所以动态图和静态图之间的模式切换非常简单。为了在大型数据集上有效训练大模型,通过高级手动配置策略,MindSpore可以支持数据并行、模型并行和混合并行训练,具有很强的灵活性。此外,MindSpore还有“自动并行”能力,它通过在庞大的策略空间中进行高效搜索来找到一种快速的并行策略。MindSpore框架的具体优势,请查看详细介绍。 -[查看技术白皮书](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com:443/%E7%99%BD%E7%9A%AE%E4%B9%A6/MindSpore%EF%BC%9A%E4%B8%80%E7%A7%8D%E5%85%A8%E5%9C%BA%E6%99%AF%E8%A6%86%E7%9B%96%E7%9A%84%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E8%AE%A1%E7%AE%97%E6%A1%86%E6%9E%B6.pdf) \ No newline at end of file +[查看技术白皮书](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com:443/white_paper/MindSpore_white_paper.pdf) \ No newline at end of file diff --git a/install/mindspore_cpu_install.md b/install/mindspore_cpu_install.md index 49b23fbde9e75ddd6d51850d9b3d2a6563325380..721cad66b18b2db7b7fdc312765fc60c4a5db594 100644 --- a/install/mindspore_cpu_install.md +++ b/install/mindspore_cpu_install.md @@ -21,7 +21,7 @@ | 版本号 | 操作系统 | 可执行文件安装依赖 | 源码编译安装依赖 | | ---- | :--- | :--- | :--- | -| MindSpore master | Ubuntu 18.04 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- 其他依赖项参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt) | **编译依赖:**
- [Python](https://www.python.org/downloads/) 3.7.5
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
**安装依赖:**
与可执行文件安装依赖相同 | +| MindSpore master | - Ubuntu 18.04 x86_64
- Ubuntu 18.04 aarch64 | - [Python](https://www.python.org/downloads/) 3.7.5
- 其他依赖项参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt) | **编译依赖:**
- [Python](https://www.python.org/downloads/) 3.7.5
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
**安装依赖:**
与可执行文件安装依赖相同 | - GCC 7.3.0可以直接通过apt命令安装。 - 在联网状态下,安装whl包时会自动下载`requirements.txt`中的依赖项,其余情况需自行安装。 @@ -97,7 +97,7 @@ | 版本号 | 操作系统 | 可执行文件安装依赖 | 源码编译安装依赖 | | ---------------------- | :------------------ | :----------------------------------------------------------- | :----------------------- | -| MindArmour master | Ubuntu 18.04 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- 其他依赖项参见[setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py) | 与可执行文件安装依赖相同 | +| MindArmour master | - Ubuntu 18.04 x86_64
- Ubuntu 18.04 aarch64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- 其他依赖项参见[setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py) | 与可执行文件安装依赖相同 | - 在联网状态下,安装whl包时会自动下载`setup.py`中的依赖项,其余情况需自行安装。 diff --git a/install/mindspore_cpu_install_en.md b/install/mindspore_cpu_install_en.md index 1c3aa95fdde5c8b0527e8752ba0529be90bcd2aa..d21f05bdeb3d451ff36588a8346a9753bdf831d6 100644 --- a/install/mindspore_cpu_install_en.md +++ b/install/mindspore_cpu_install_en.md @@ -21,7 +21,7 @@ This document describes how to quickly install MindSpore in a Ubuntu system with | Version | Operating System | Executable File Installation Dependencies | Source Code Compilation and Installation Dependencies | | ---- | :--- | :--- | :--- | -| MindSpore master | Ubuntu 18.04 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt). | **Compilation dependencies:**
- [Python](https://www.python.org/downloads/) 3.7.5
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
same as the executable file installation dependencies. | +| MindSpore master | - Ubuntu 18.04 x86_64
- Ubuntu 18.04 aarch64 | - [Python](https://www.python.org/downloads/) 3.7.5
- For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt). | **Compilation dependencies:**
- [Python](https://www.python.org/downloads/) 3.7.5
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
same as the executable file installation dependencies. | - GCC 7.3.0 can be installed by using apt command. - When the network is connected, dependency items in the `requirements.txt` file are automatically downloaded during .whl package installation. In other cases, you need to manually install dependency items. @@ -97,7 +97,7 @@ If you need to conduct AI model security research or enhance the security of the | Version | Operating System | Executable File Installation Dependencies | Source Code Compilation and Installation Dependencies | | ---- | :--- | :--- | :--- | -| MindArmour master | Ubuntu 18.04 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- For details about other dependency items, see [setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py). | Same as the executable file installation dependencies. | +| MindArmour master | - Ubuntu 18.04 x86_64
- Ubuntu 18.04 aarch64 | - [Python](https://www.python.org/downloads/) 3.7.5
- MindSpore master
- For details about other dependency items, see [setup.py](https://gitee.com/mindspore/mindarmour/blob/master/setup.py). | Same as the executable file installation dependencies. | - When the network is connected, dependency items in the `setup.py` file are automatically downloaded during .whl package installation. In other cases, you need to manually install dependency items. diff --git a/install/mindspore_d_install.md b/install/mindspore_d_install.md index bce12ce71a089cfbd1f4684867aaf95822262deb..a0d6eb70a9f6d7f7c4452a593695e95c024c135f 100644 --- a/install/mindspore_d_install.md +++ b/install/mindspore_d_install.md @@ -32,7 +32,7 @@ | 版本号 | 操作系统 | 可执行文件安装依赖 | 源码编译安装依赖 | | ---- | :--- | :--- | :--- | -| MindSpore 0.6.0-beta | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- 其他依赖项参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt) | **编译依赖:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**安装依赖:**
与可执行文件安装依赖相同 | +| MindSpore master | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- 其他依赖项参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt) | **编译依赖:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**安装依赖:**
与可执行文件安装依赖相同 | - 确认当前用户有权限访问Ascend 910 AI处理器配套软件包(对应版本[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))的安装路径`/usr/local/Ascend`,若无权限,需要root用户将当前用户添加到`/usr/local/Ascend`所在的用户组,具体配置请详见配套软件包的说明文档。 - GCC 7.3.0可以直接通过apt命令安装。 diff --git a/install/mindspore_d_install_en.md b/install/mindspore_d_install_en.md index 6c45406ed8c066fa841c6a3912f03dbf4a27eaf9..827f23bb76e748d6304eabf3444d7974956bc0a1 100644 --- a/install/mindspore_d_install_en.md +++ b/install/mindspore_d_install_en.md @@ -32,7 +32,7 @@ This document describes how to quickly install MindSpore in an Ascend AI process | Version | Operating System | Executable File Installation Dependencies | Source Code Compilation and Installation Dependencies | | ---- | :--- | :--- | :--- | -| MindSpore 0.6.0-beta | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt). | **Compilation dependencies:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**Installation dependencies:**
same as the executable file installation dependencies. | +| MindSpore master | - Ubuntu 18.04 aarch64
- Ubuntu 18.04 x86_64
- EulerOS 2.8 aarch64
- EulerOS 2.5 x86_64 | - [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
- For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/r0.6/requirements.txt). | **Compilation dependencies:**
- [Python](https://www.python.org/downloads/) 3.7.5
- Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816))
- [wheel](https://pypi.org/project/wheel/) >= 0.32.0
- [GCC](https://gcc.gnu.org/releases.html) 7.3.0
- [CMake](https://cmake.org/download/) >= 3.14.1
- [patch](http://ftp.gnu.org/gnu/patch/) >= 2.5
- [gmp](https://gmplib.org/download/gmp/) 6.1.2
**Installation dependencies:**
same as the executable file installation dependencies. | - Confirm that the current user has the right to access the installation path `/usr/local/Ascend `of Ascend 910 AI processor software package(Version:[Atlas Data Center Solution V100R020C10T200](https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251661816)). If not, the root user needs to add the current user to the user group where `/usr/local/Ascend` is located. For the specific configuration, please refer to the software package instruction document. - GCC 7.3.0 can be installed by using apt command. diff --git a/lite/docs/source_en/glossary.md b/lite/docs/source_en/glossary.md index 1a1c7dc5fd8bc7e8b66090d3512ed97cc8b0cdcc..8b596229292567ddb8b24b7a0cc1560c763f224d 100644 --- a/lite/docs/source_en/glossary.md +++ b/lite/docs/source_en/glossary.md @@ -1,3 +1,12 @@ -# Glossary +# Glossary + + + +| Acronym and Abbreviation | Description | +| ----- | ----- | +| MindSpore Lite | MindSpore AI engine is applied to the intelligent terminal and resource constrained scenes on the edge side. | +| MindSpore Micro | MindSpore AI engine with smaller package size for IOT devices. | +| GHLO | Graph high-level optimization. | +| GLLO | Graph low-level optimization. | +| RT | Runtime. | - diff --git a/lite/docs/source_en/index.rst b/lite/docs/source_en/index.rst index 293149dbc33f0281ccbb1d3ee2cc3904f3ed1022..3c2526cf92596f42c67b123a1092c8a0b6350653 100644 --- a/lite/docs/source_en/index.rst +++ b/lite/docs/source_en/index.rst @@ -11,4 +11,5 @@ MindSpore Lite Documentation :maxdepth: 1 architecture + operator_list glossary diff --git a/lite/docs/source_en/operator_list.md b/lite/docs/source_en/operator_list.md new file mode 100644 index 0000000000000000000000000000000000000000..1c7383aad2f3457a31b7c89de4efbfbfce1d2d73 --- /dev/null +++ b/lite/docs/source_en/operator_list.md @@ -0,0 +1,124 @@ +# Operator List + + + +> √ The checked items are the operators supported by MindSpore Lite。 + +| Operation | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | Operator category | Tensorflow
Lite op supported | Caffe
Lite op supported | Onnx
Lite op supported | +|-----------------------|----------|----------|----------|-----------|----------|----------|------------------|----------|----------|----------| +| Abs | | √ | √ | √ | | | math_ops | Abs | | Abs | +| Add | | | | | | √ | | Add | | Add | +| AddN | | √ | | | | | math_ops | AddN | | | +| Argmax | | √ | √ | √ | | | array_ops | Argmax | ArgMax | ArgMax | +| Argmin | | √ | | | | | array_ops | Argmin | | | +| Asin | | | | | | | | | | Asin | +| Atan | | | | | | | | | | Atan | +| AvgPool | | √ | √ | √ | | √ | nn_ops | MeanPooling | Pooling | AveragePool | +| BatchMatMul | √ | √ | √ | √ | | | math_ops | | | | +| BatchNorm | | √ | | | | √ | nn_ops | | BatchNorm | BatchNormalization | +| BatchToSpace | | | | | | | array_ops | BatchToSpace, BatchToSpaceND | | | +| BatchToSpaceND | | | | | | | | | | | +| BiasAdd | | √ | | √ | | √ | nn_ops | | | BiasAdd | +| Broadcast | | √ | | | | | comm_ops | BroadcastTo | | Expand | +| Cast | | √ | | | | | array_ops | Cast, DEQUANTIZE* | | Cast | +| Ceil | | √ | | √ | | | math_ops | Ceil | | Ceil | +| Concat | | √ | √ | √ | | √ | array_ops | Concat | Concat | Concat | +| Constant | | | | | | | | | | Constant | +| Conv1dTranspose | | | | √ | | | layer/conv | | | | +| Conv2d | √ | √ | √ | √ | | √ | layer/conv | Conv2D | Convolution | Conv | +| Conv2dTranspose | | √ | √ | √ | | √ | layer/conv | DeConv2D | Deconvolution | ConvTranspose | +| Cos | | √ | √ | √ | | | math_ops | Cos | | Cos | +| Crop | | | | | | | | | Crop | | +| DeDepthwiseConv2D | | | | | | | | | Deconvolution| ConvTranspose | +| DepthToSpace | | | | | | | | DepthToSpace | | DepthToSpace | +| DepthwiseConv2dNative | √ | √ | √ | √ | | √ | nn_ops | DepthwiseConv2D | Convolution | Convolution | +| Div | | √ | √ | √ | | √ | math_ops | Div | | Div | +| Dropout | | | | | | | | | | Dropout | +| Eltwise | | | | | | | | | Eltwise | | +| Elu | | | | | | | | Elu | | Elu | +| Equal | | √ | √ | √ | | | math_ops | Equal | | Equal | +| Exp | | √ | | | | | math_ops | Exp | | Exp | +| ExpandDims | | √ | | | | | array_ops | | | | +| Fill | | √ | | | | | array_ops | Fill | | | +| Flatten | | | | | | | | | Flatten | | +| Floor | | √ | √ | √ | | | math_ops | flOOR | | Floor | +| FloorDiv | | √ | | | | | math_ops | FloorDiv | | | +| FloorMod | | √ | | | | | nn_ops | FloorMod | | | +| FullConnection | | √ | | | | | layer/basic | FullyConnected | InnerProduct | | +| GatherNd | | √ | | | | | array_ops | GatherND | | | +| GatherV2 | | √ | | | | | array_ops | Gather | | Gather | +| Greater | | √ | √ | √ | | | math_ops | Greater | | Greater | +| GreaterEqual | | √ | √ | √ | | | math_ops | GreaterEqual | | | +| Hswish | | | | | | | | HardSwish | | | +| L2norm | | | | | | | | L2_NORMALIZATION | | | +| LeakyReLU | | √ | | | | √ | layer/activation | LeakyRelu | | LeakyRelu | +| Less | | √ | √ | √ | | | math_ops | Less | | Less | +| LessEqual | | √ | √ | √ | | | math_ops | LessEqual | | | +| LocalResponseNorm | | | | | | | | LocalResponseNorm | | Lrn | +| Log | | √ | √ | √ | | | math_ops | Log | | Log | +| LogicalAnd | | √ | | | | | math_ops | LogicalAnd | | | +| LogicalNot | | √ | √ | √ | | | math_ops | LogicalNot | | | +| LogicalOr | | √ | | | | | math_ops | LogicalOr | | | +| LSTM | | √ | | | | | layer/lstm | | | | +| MatMul | √ | √ | √ | √ | | √ | math_ops | | | MatMul | +| Maximum | | | | | | | math_ops | Maximum | | Max | +| MaxPool | | √ | √ | √ | | √ | nn_ops | MaxPooling | Pooling | MaxPool | +| Minimum | | | | | | | math_ops | Minimum | | Min | +| Mul | | √ | √ | √ | | √ | math_ops | Mul | | Mul | +| Neg | | | | | | | math_ops | | | Neg | +| NotEqual | | √ | √ | √ | | | math_ops | NotEqual | | | +| OneHot | | √ | | | | | layer/basic | OneHot | | | +| Pack | | √ | | | | | nn_ops | | | | +| Pad | | √ | √ | √ | | | nn_ops | Pad | | Pad | +| Pow | | √ | √ | √ | | | math_ops | Pow | Power | Power | +| PReLU | | √ | √ | √ | | √ | layer/activation | Prelu | PReLU | PRelu | +| Range | | √ | | | | | layer/basic | Range | | | +| Rank | | √ | | | | | array_ops | Rank | | | +| RealDiv | | √ | √ | √ | | √ | math_ops | RealDiv | | | +| ReduceMax | | √ | √ | √ | | | math_ops | ReduceMax | | ReduceMax | +| ReduceMean | | √ | √ | √ | | | math_ops | Mean | | ReduceMean | +| ReduceMin | | √ | √ | √ | | | math_ops | ReduceMin | | ReduceMin | +| ReduceProd | | √ | √ | √ | | | math_ops | ReduceProd | | | +| ReduceSum | | √ | √ | √ | | | math_ops | Sum | | ReduceSum | +| ReLU | | √ | √ | √ | | √ | layer/activation | Relu | ReLU | Relu | +| ReLU6 | | √ | | | | √ | layer/activation | Relu6 | ReLU6 | Clip* | +| Reshape | | √ | √ | √ | | √ | array_ops | Reshape | Reshape | Reshape,Flatten | +| Resize | | | | | | | | ResizeBilinear, NearestNeighbor | Interp | | +| Reverse | | | | | | | | reverse | | | +| ReverseSequence | | √ | | | | | array_ops | ReverseSequence | | | +| Round | | √ | | √ | | | math_ops | Round | | | +| Rsqrt | | √ | √ | √ | | | math_ops | Rsqrt | | | +| Scale | | | | | | | | | Scale | | +| ScatterNd | | √ | | | | | array_ops | ScatterNd | | | +| Shape | | √ | | √ | | | array_ops | Shape | | Shape | +| Sigmoid | | √ | √ | √ | | √ | nn_ops | Logistic | Sigmoid | Sigmoid | +| Sin | | | | | | | | Sin | | Sin | +| Slice | | √ | √ | √ | | √ | array_ops | Slice | | Slice | +| Softmax | | √ | √ | √ | | √ | layer/activation | Softmax | Softmax | Softmax | +| SpaceToBatchND | | √ | | | | | array_ops | SpaceToBatchND | | | +| SpareToDense | | | | | | | | SpareToDense | | | +| SpaceToDepth | | √ | | | | | array_ops | SpaceToDepth | | SpaceToDepth | +| Split | | √ | √ | √ | | | array_ops | Split, SplitV | | | +| Sqrt | | √ | √ | √ | | | math_ops | Sqrt | | Sqrt | +| Square | | √ | √ | √ | | | math_ops | Square | | | +| SquaredDifference | | | | | | | | SquaredDifference | | | +| Squeeze | | √ | √ | √ | | | array_ops | Squeeze | | Squeeze | +| StridedSlice | | √ | √ | √ | | | array_ops | StridedSlice | | | +| Stack | | | | | | | | Stack | | | +| Sub | | √ | √ | √ | | √ | math_ops | Sub | | Sub | +| Tan | | | | | | | | | | Tan | +| Tanh | | √ | | | | | layer/activation | Tanh | TanH | | +| TensorAdd | | √ | √ | √ | | √ | math_ops | | | | +| Tile | | √ | | | | | array_ops | Tile | | Tile | +| TopK | | √ | √ | √ | | | nn_ops | TopKV2 | | | +| Transpose | | √ | √ | √ | | √ | array_ops | Transpose | Permute | Transpose | +| Unique | | | | | | | | Unique | | | +| Unpack | | √ | | | | | nn_ops | | | | +| Unsample | | | | | | | | | | Unsample | +| Unsqueeze | | | | | | | | | | Unsqueeze | +| Unstack | | | | | | | | Unstack | | | +| Where | | | | | | | | Where | | | +| ZerosLike | | √ | | | | | array_ops | ZerosLike | | | + +* Clip: only support convert clip(0, 6) to Relu6. +* DEQUANTIZE: only support to convert fp16 to fp32. diff --git a/lite/docs/source_zh_cn/glossary.md b/lite/docs/source_zh_cn/glossary.md index 2104f4270883b3828646fbbcf27fdb8150075704..b9cf41a4c6908e4e75c927614335a60e9e8b0ac6 100644 --- a/lite/docs/source_zh_cn/glossary.md +++ b/lite/docs/source_zh_cn/glossary.md @@ -1,6 +1,6 @@ -# 术语 +# 术语 - + | 术语/缩略语 | 说明 | | ----- | ----- | diff --git a/lite/docs/source_zh_cn/index.rst b/lite/docs/source_zh_cn/index.rst index 300d20dfa712466366460818e5ab5b057702bd01..08270a72e46b955616944d50149024f3765bf318 100644 --- a/lite/docs/source_zh_cn/index.rst +++ b/lite/docs/source_zh_cn/index.rst @@ -11,4 +11,6 @@ MindSpore端侧文档 :maxdepth: 1 architecture + roadmap + operator_list glossary \ No newline at end of file diff --git a/lite/docs/source_zh_cn/operator_list.md b/lite/docs/source_zh_cn/operator_list.md new file mode 100644 index 0000000000000000000000000000000000000000..0864989bc46b9182b5b90bb624021a1e756f0647 --- /dev/null +++ b/lite/docs/source_zh_cn/operator_list.md @@ -0,0 +1,124 @@ +# 算子支持 + + + +> √勾选的项为MindSpore Lite所支持的算子。 + +| 操作名 | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | 算子类别 | 支持的Tensorflow
Lite op | 支持的Caffe
Lite op | 支持的Onnx
Lite op | +|-----------------------|----------|----------|----------|-----------|----------|----------|------------------|----------|----------|----------| +| Abs | | √ | √ | √ | | | math_ops | Abs | | Abs | +| Add | | | | | | √ | | Add | | Add | +| AddN | | √ | | | | | math_ops | AddN | | | +| Argmax | | √ | √ | √ | | | array_ops | Argmax | ArgMax | ArgMax | +| Argmin | | √ | | | | | array_ops | Argmin | | | +| Asin | | | | | | | | | | Asin | +| Atan | | | | | | | | | | Atan | +| AvgPool | | √ | √ | √ | | √ | nn_ops | MeanPooling | Pooling | AveragePool | +| BatchMatMul | √ | √ | √ | √ | | | math_ops | | | | +| BatchNorm | | √ | | | | √ | nn_ops | | BatchNorm | BatchNormalization | +| BatchToSpace | | | | | | | array_ops | BatchToSpace, BatchToSpaceND | | | +| BatchToSpaceND | | | | | | | | | | | +| BiasAdd | | √ | | √ | | √ | nn_ops | | | BiasAdd | +| Broadcast | | √ | | | | | comm_ops | BroadcastTo | | Expand | +| Cast | | √ | | | | | array_ops | Cast, DEQUANTIZE* | | Cast | +| Ceil | | √ | | √ | | | math_ops | Ceil | | Ceil | +| Concat | | √ | √ | √ | | √ | array_ops | Concat | Concat | Concat | +| Constant | | | | | | | | | | Constant | +| Conv1dTranspose | | | | √ | | | layer/conv | | | | +| Conv2d | √ | √ | √ | √ | | √ | layer/conv | Conv2D | Convolution | Conv | +| Conv2dTranspose | | √ | √ | √ | | √ | layer/conv | DeConv2D | Deconvolution | ConvTranspose | +| Cos | | √ | √ | √ | | | math_ops | Cos | | Cos | +| Crop | | | | | | | | | Crop | | +| DeDepthwiseConv2D | | | | | | | | | Deconvolution| ConvTranspose | +| DepthToSpace | | | | | | | | DepthToSpace | | DepthToSpace | +| DepthwiseConv2dNative | √ | √ | √ | √ | | √ | nn_ops | DepthwiseConv2D | Convolution | Convolution | +| Div | | √ | √ | √ | | √ | math_ops | Div | | Div | +| Dropout | | | | | | | | | | Dropout | +| Eltwise | | | | | | | | | Eltwise | | +| Elu | | | | | | | | Elu | | Elu | +| Equal | | √ | √ | √ | | | math_ops | Equal | | Equal | +| Exp | | √ | | | | | math_ops | Exp | | Exp | +| ExpandDims | | √ | | | | | array_ops | | | | +| Fill | | √ | | | | | array_ops | Fill | | | +| Flatten | | | | | | | | | Flatten | | +| Floor | | √ | √ | √ | | | math_ops | flOOR | | Floor | +| FloorDiv | | √ | | | | | math_ops | FloorDiv | | | +| FloorMod | | √ | | | | | nn_ops | FloorMod | | | +| FullConnection | | √ | | | | | layer/basic | FullyConnected | InnerProduct | | +| GatherNd | | √ | | | | | array_ops | GatherND | | | +| GatherV2 | | √ | | | | | array_ops | Gather | | Gather | +| Greater | | √ | √ | √ | | | math_ops | Greater | | Greater | +| GreaterEqual | | √ | √ | √ | | | math_ops | GreaterEqual | | | +| Hswish | | | | | | | | HardSwish | | | +| L2norm | | | | | | | | L2_NORMALIZATION | | | +| LeakyReLU | | √ | | | | √ | layer/activation | LeakyRelu | | LeakyRelu | +| Less | | √ | √ | √ | | | math_ops | Less | | Less | +| LessEqual | | √ | √ | √ | | | math_ops | LessEqual | | | +| LocalResponseNorm | | | | | | | | LocalResponseNorm | | Lrn | +| Log | | √ | √ | √ | | | math_ops | Log | | Log | +| LogicalAnd | | √ | | | | | math_ops | LogicalAnd | | | +| LogicalNot | | √ | √ | √ | | | math_ops | LogicalNot | | | +| LogicalOr | | √ | | | | | math_ops | LogicalOr | | | +| LSTM | | √ | | | | | layer/lstm | | | | +| MatMul | √ | √ | √ | √ | | √ | math_ops | | | MatMul | +| Maximum | | | | | | | math_ops | Maximum | | Max | +| MaxPool | | √ | √ | √ | | √ | nn_ops | MaxPooling | Pooling | MaxPool | +| Minimum | | | | | | | math_ops | Minimum | | Min | +| Mul | | √ | √ | √ | | √ | math_ops | Mul | | Mul | +| Neg | | | | | | | math_ops | | | Neg | +| NotEqual | | √ | √ | √ | | | math_ops | NotEqual | | | +| OneHot | | √ | | | | | layer/basic | OneHot | | | +| Pack | | √ | | | | | nn_ops | | | | +| Pad | | √ | √ | √ | | | nn_ops | Pad | | Pad | +| Pow | | √ | √ | √ | | | math_ops | Pow | Power | Power | +| PReLU | | √ | √ | √ | | √ | layer/activation | Prelu | PReLU | PRelu | +| Range | | √ | | | | | layer/basic | Range | | | +| Rank | | √ | | | | | array_ops | Rank | | | +| RealDiv | | √ | √ | √ | | √ | math_ops | RealDiv | | | +| ReduceMax | | √ | √ | √ | | | math_ops | ReduceMax | | ReduceMax | +| ReduceMean | | √ | √ | √ | | | math_ops | Mean | | ReduceMean | +| ReduceMin | | √ | √ | √ | | | math_ops | ReduceMin | | ReduceMin | +| ReduceProd | | √ | √ | √ | | | math_ops | ReduceProd | | | +| ReduceSum | | √ | √ | √ | | | math_ops | Sum | | ReduceSum | +| ReLU | | √ | √ | √ | | √ | layer/activation | Relu | ReLU | Relu | +| ReLU6 | | √ | | | | √ | layer/activation | Relu6 | ReLU6 | Clip* | +| Reshape | | √ | √ | √ | | √ | array_ops | Reshape | Reshape | Reshape,Flatten | +| Resize | | | | | | | | ResizeBilinear, NearestNeighbor | Interp | | +| Reverse | | | | | | | | reverse | | | +| ReverseSequence | | √ | | | | | array_ops | ReverseSequence | | | +| Round | | √ | | √ | | | math_ops | Round | | | +| Rsqrt | | √ | √ | √ | | | math_ops | Rsqrt | | | +| Scale | | | | | | | | | Scale | | +| ScatterNd | | √ | | | | | array_ops | ScatterNd | | | +| Shape | | √ | | √ | | | array_ops | Shape | | Shape | +| Sigmoid | | √ | √ | √ | | √ | nn_ops | Logistic | Sigmoid | Sigmoid | +| Sin | | | | | | | | Sin | | Sin | +| Slice | | √ | √ | √ | | √ | array_ops | Slice | | Slice | +| Softmax | | √ | √ | √ | | √ | layer/activation | Softmax | Softmax | Softmax | +| SpaceToBatchND | | √ | | | | | array_ops | SpaceToBatchND | | | +| SpareToDense | | | | | | | | SpareToDense | | | +| SpaceToDepth | | √ | | | | | array_ops | SpaceToDepth | | SpaceToDepth | +| Split | | √ | √ | √ | | | array_ops | Split, SplitV | | | +| Sqrt | | √ | √ | √ | | | math_ops | Sqrt | | Sqrt | +| Square | | √ | √ | √ | | | math_ops | Square | | | +| SquaredDifference | | | | | | | | SquaredDifference | | | +| Squeeze | | √ | √ | √ | | | array_ops | Squeeze | | Squeeze | +| StridedSlice | | √ | √ | √ | | | array_ops | StridedSlice | | | +| Stack | | | | | | | | Stack | | | +| Sub | | √ | √ | √ | | √ | math_ops | Sub | | Sub | +| Tan | | | | | | | | | | Tan | +| Tanh | | √ | | | | | layer/activation | Tanh | TanH | | +| TensorAdd | | √ | √ | √ | | √ | math_ops | | | | +| Tile | | √ | | | | | array_ops | Tile | | Tile | +| TopK | | √ | √ | √ | | | nn_ops | TopKV2 | | | +| Transpose | | √ | √ | √ | | √ | array_ops | Transpose | Permute | Transpose | +| Unique | | | | | | | | Unique | | | +| Unpack | | √ | | | | | nn_ops | | | | +| Unsample | | | | | | | | | | Unsample | +| Unsqueeze | | | | | | | | | | Unsqueeze | +| Unstack | | | | | | | | Unstack | | | +| Where | | | | | | | | Where | | | +| ZerosLike | | √ | | | | | array_ops | ZerosLike | | | + +* Clip: only support convert clip(0, 6) to Relu6. +* DEQUANTIZE: only support to convert fp16 to fp32. diff --git a/lite/docs/source_zh_cn/roadmap.md b/lite/docs/source_zh_cn/roadmap.md new file mode 100644 index 0000000000000000000000000000000000000000..6bafce4c91194936f9e2715a5896819b72ee99a8 --- /dev/null +++ b/lite/docs/source_zh_cn/roadmap.md @@ -0,0 +1,15 @@ +# RoadMap + + + +1. 增加更多的FP16、INT8和UINT8 CPU算子; +2. 增加更多的openCL、openGL、vulkan和metal GPU算子; +3. 增加控制流算子支持; +4. 增加NPU支持; +5. 增加部署在IoT设备的推理框架; +6. 增加图像分割、文字识别、人脸检测等预制模型; +7. 增加Lite的图像分割、文字识别、人脸检测等预置样例; +8. 增加Micro的样例; +9. 端侧训练支持; +10. pipeline数据处理丰富; +11. 模型转换工具支持windows和MAC。 \ No newline at end of file diff --git a/lite/tutorials/source_en/deploy.md b/lite/tutorials/source_en/deploy.md index 791c3d1a6be4fff36671ebec08aba3a840055f1f..350654ea725fc9a286be6f113d007e4b5ce62ff6 100644 --- a/lite/tutorials/source_en/deploy.md +++ b/lite/tutorials/source_en/deploy.md @@ -1,3 +1,127 @@ # Deploy - + + +- [Deployment](#deployment) + - [Environment Requirements](#environment-requirements) + - [Compilation Options](#compilation-options) + - [Output Description](#output-description) + - [Compilation Example](#compilation-example) + + + + + +This document describes how to quickly install MindSpore Lite on the Ubuntu system. + +## Environment Requirements + +- The compilation environment supports Linux x86_64 only. Ubuntu 18.04.02 LTS is recommended. + +- Compilation dependencies (basics): + - [CMake](https://cmake.org/download/) >= 3.14.1 + - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 + - [Android_NDK r20b](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) + + > - `Android_NDK` needs to be installed only when the Arm version is compiled. Skip this dependency when the x86_64 version is compiled. + > - To install and use `Android_NDK`, you need to configure environment variables. The command example is `export ANDROID_NDK={$NDK_PATH}/android-ndk-r20b`. + +- Compilation dependencies (additional dependencies required by the MindSpore Lite model conversion tool, which is required only for compilation of the x86_64 version) + - [Autoconf](http://ftp.gnu.org/gnu/autoconf/) >= 2.69 + - [Libtool](https://www.gnu.org/software/libtool/) >= 2.4.6 + - [LibreSSL](http://www.libressl.org/) >= 3.1.3 + - [Automake](https://www.gnu.org/software/automake/) >= 1.11.6 + - [Libevent](https://libevent.org) >= 2.0 + - [M4](https://www.gnu.org/software/m4/m4.html) >= 1.4.18 + - [OpenSSL](https://www.openssl.org/) >= 1.1.1 + + +## Compilation Options + +MindSpore Lite provides multiple compilation options. You can select different compilation options as required. + +| Parameter | Parameter Description | Value Range | Mandatory or Not | +| -------- | ----- | ---- | ---- | +| -d | If this parameter is set, the debug version is compiled. Otherwise, the release version is compiled. | - | No | +| -i | If this parameter is set, incremental compilation is performed. Otherwise, full compilation is performed. | - | No | +| -j[n] | Sets the number of threads used during compilation. Otherwise, the number of threads is set to 8 by default. | - | No | +| -I | Selects an applicable architecture. | arm64, arm32, or x86_64 | Yes | +| -e | In the Arm architecture, select the backend operator and set the `gpu` parameter. The built-in GPU operator of the framework is compiled at the same time. | GPU | No | +| -h | Displays the compilation help information. | - | No | + +> When the `-I` parameter changes, that is, the applicable architecture is changed, the `-i` parameter cannot be used for incremental compilation. + +## Output Description + +After the compilation is complete, go to the `mindspore/output` directory of the source code to view the file generated after compilation. The file is named `mindspore-lite-{version}-{function}-{OS}.tar.gz`. After decompression, the tool package named `mindspore-lite-{version}-{function}-{OS}` can be obtained. + +> version: version of the output, consistent with that of the MindSpore. +> +> function: function of the output. `convert` indicates the output of the conversion tool and `runtime` indicates the output of the inference framework. +> +> OS: OS on which the output will be deployed. + +```bash +tar -xvf mindspore-lite-{version}-{function}-{OS}.tar.gz +``` + +For the x86 architecture, you can obtain the output of the conversion tool and inference framework;But for the ARM architecture, you only get inference framework. + +Generally, the compiled output files include the following types. The architecture selection affects the types of output files. + +> For the Arm 64-bit architecture, you can obtain the output of the `arm64-cpu` inference framework. If `-e gpu` is added, you can obtain the output of the `arm64-gpu` inference framework. The compilation for arm 64-bit is the same as that for arm 32-bit. + +| Directory | Description | converter | runtime | +| --- | --- | --- | --- | +| include | Inference framework header file | No | Yes | +| lib | Inference framework dynamic library | No | Yes | +| benchmark | Benchmark test tool | No | Yes | +| time_profiler | Time consumption analysis tool at the model network layer| No | Yes | +| converter | Model conversion tool | Yes | No | No | +| third_party | Header file and library of the third-party library | Yes | Yes | + +Take the 0.7.0-beta version and CPU as an example. The contents of `third party` and `lib` vary depending on the architecture as follows: +- `mindspore-lite-0.7.0-converter-ubuntu`: include `protobuf` (Protobuf dynamic library). +- `mindspore-lite-0.7.0-runtime-x86-cpu`: include `flatbuffers` (FlatBuffers header file). +TODO: Add document content. + +> Before running the tools in the `converter`, `benchmark`, or `time_profiler` directory, you need to configure environment variables and set the paths of the dynamic libraries of MindSpore Lite and Protobuf to the paths of the system dynamic libraries. The following uses the 0.7.0-beta version as an example: `export LD_LIBRARY_PATH=./mindspore-lite-0.7.0/lib:./mindspore-lite-0.7.0/third_party/protobuf/lib:${LD_LIBRARY_PATH}`. + +## Compilation Example + +First, download source code from the MindSpore code repository. + +```bash +git clone https://gitee.com/mindspore/mindspore.git +``` + +Then, run the following commands in the root directory of the source code to compile MindSpore Lite of different versions: + +- Debug version of the x86_64 architecture: + ```bash + bash build.sh -I x86_64 -d + ``` + +- Release version of the x86_64 architecture, with the number of threads set: + ```bash + bash build.sh -I x86_64 -j32 + ``` + +- Release version of the Arm 64-bit architecture in incremental compilation mode, with the number of threads set: + ```bash + bash build.sh -I arm64 -i -j32 + ``` + +- Release version of the Arm 64-bit architecture in incremental compilation mode, with the built-in GPU operator compiled: + ```bash + bash build.sh -I arm64 -e gpu + ``` + +> - In the `build.sh` script, run the `git clone` command to obtain the code in the third-party dependency library. Ensure that the network settings of Git are correct. + +Take the 0.7.0-beta version as an example. After the release version of the x86_64 architecture is compiled, go to the `mindspore/output` directory and run the following decompression command to obtain the output files `include`, `lib`, `benchmark`, `time_profiler`, `converter`, and `third_party`: + +```bash +tar -xvf mindspore-lite-0.7.0-converter-ubuntu.tar.gz +tar -xvf mindspore-lite-0.7.0-runtime-x86-cpu.tar.gz +``` diff --git a/lite/tutorials/source_en/use/converter_tool.md b/lite/tutorials/source_en/use/converter_tool.md index ccca0bf1716e26711191d69a69e2d37265c4a7f8..54d11d2ca99cf9d2a8596aca1a951c31d6e3bb21 100644 --- a/lite/tutorials/source_en/use/converter_tool.md +++ b/lite/tutorials/source_en/use/converter_tool.md @@ -1,3 +1,108 @@ # Converter Tool + + +- [Model Conversion Tool](#model-conversion-tool) + - [Overview](#overview) + - [Environment Preparation](#environment-preparation) + - [Parameter Description](#parameter-description) + - [Model Visualization](#model-visualization) + - [Example](#example) + + + + +## Overview + +MindSpore Lite provides a tool for offline model conversion. It supports conversion of multiple types of models and visualization of converted models. The converted models can be used for inference. The command line parameters contain multiple personalized options, providing a convenient conversion method for users. + +Currently, the following input formats are supported: MindSpore, TensorFlow Lite, Caffe, and ONNX. + +## Environment Preparation + +To use the MindSpore Lite model conversion tool, you need to prepare the environment as follows: + +- Compilation: Install basic and additional compilation dependencies and perform compilation. The compilation version is x86_64. The code of the model conversion tool is stored in the `mindspore/lite/tools/converter` directory of the MindSpore source code. For details about the compilation operations, see the [Environment Requirements] (https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id2) and [Compilation Example] (https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id5) in the deployment document. + +- Run: Obtain the `converter` tool and configure environment variables by referring to [Output Description](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id4) in the deployment document. + +## Parameter Description + +You can use `./converter_lite ` to complete the conversion. In addition, you can set multiple parameters as required. +You can enter `./converter_lite --help` to obtain help information in real time. + +The following describes the parameters in detail. + + +| Parameter | Mandatory or Not | Parameter Description | Value Range | Default Value | +| -------- | ------- | ----- | --- | ---- | +| `--help` | No | Prints all help information. | - | - | +| `--fmk=` | Yes | Original format of the input model. | MS, CAFFE, TFLITE, or ONNX | - | +| `--modelFile=` | Yes | Path of the input model. | - | - | +| `--outputFile=` | Yes | Path of the output model. (If the path does not exist, a directory will be automatically created.) The suffix `.ms` can be automatically generated. | - | - | +| `--weightFile=` | Yes (for Caffe models only) | Path of the weight file of the input model. | - | - | +| `--quantType=` | No | Sets the training type of the model. | PostTraining: quantization after training
AwareTraining: perceptual quantization | - | + +> - The parameter name and parameter value are separated by an equal sign (=) and no space is allowed between them. +> - The Caffe model is divided into two files: model structure `*.prototxt`, corresponding to the `--modelFile` parameter; model weight `*.caffemodel`, corresponding to the `--weightFile` parameter + +## Model Visualization + +The model visualization tool provides a method for checking the model conversion result. You can run the JSON command to generate a `*.json` file and compare it with the original model to determine the conversion effect. + +TODO: This function is under development now. + +## Example + +First, in the root directory of the source code, run the following command to perform compilation. For details, see `deploy.md`. +```bash +bash build.sh -I x86_64 +``` +> Currently, the model conversion tool supports only the x86_64 architecture. + +The following describes how to use the conversion command by using several common examples. + +- Take the Caffe model LeNet as an example. Run the following conversion command: + + ```bash + ./converter_lite --fmk=CAFFE --modelFile=lenet.prototxt --weightFile=lenet.caffemodel --outputFile=lenet + ``` + + In this example, the Caffe model is used. Therefore, the model structure and model weight files are required. Two more parameters `fmk` and `outputFile` are also required. + + The output is as follows: + ``` + INFO [converter/converter.cc:190] Runconverter] CONVERTER RESULT: SUCCESS! + ``` + This indicates that the Caffe model is successfully converted into the MindSpore Lite model and the new file `lenet.ms` is generated. + +- The following uses the MindSpore, TensorFlow Lite, ONNX and perception quantization models as examples to describe how to run the conversion command. + + - MindSpore model `model.mindir` + ```bash + ./converter_lite --fmk=MS --modelFile=model.mindir --outputFile=model + ``` + + - TensorFlow Lite model `model.tflite` + ```bash + ./converter_lite --fmk=TFLITE --modelFile=model.tflite --outputFile=model + ``` + + - ONNX model `model.onnx` + ```bash + ./converter_lite --fmk=ONNX --modelFile=model.onnx --outputFile=model + ``` + + - TensorFlow Lite perceptual quantization model `model_quant.tflite` + ```bash + ./converter_lite --fmk=TFLITE --modelFile=model.tflite --outputFile=model --quantType=AwareTraining + ``` + + In the preceding scenarios, the following information is displayed, indicating that the conversion is successful. In addition, the target file `model.ms` is obtained. + ``` + INFO [converter/converter.cc:190] Runconverter] CONVERTER RESULT: SUCCESS! + ``` + + +You can use the model visualization tool to visually check the converted MindSpore Lite model. This function is under development. \ No newline at end of file diff --git a/lite/tutorials/source_en/use/runtime_lite.md b/lite/tutorials/source_en/use/runtime.md similarity index 36% rename from lite/tutorials/source_en/use/runtime_lite.md rename to lite/tutorials/source_en/use/runtime.md index 834347308ff5fe95b14d70c2720c0c161522414c..fe1fa8694aeb3750f199f251f86e68839128dafe 100644 --- a/lite/tutorials/source_en/use/runtime_lite.md +++ b/lite/tutorials/source_en/use/runtime.md @@ -1,3 +1,3 @@ -# Runtime (Lite) +# Runtime - + diff --git a/lite/tutorials/source_zh_cn/deploy.md b/lite/tutorials/source_zh_cn/deploy.md index be05c2a5495b939cf95773598d6df17d51eaf4f0..dd973c7790f1090fc031240d24c38737159113a1 100644 --- a/lite/tutorials/source_zh_cn/deploy.md +++ b/lite/tutorials/source_zh_cn/deploy.md @@ -1,32 +1,39 @@ # 部署 -本文档介绍如何在Ubuntu系统上快速安装MindSpore Lite。 - - [部署](#部署) - - [环境要求](#环境要求) - - [编译选项](#编译选项) - - [输出件说明](#输出件说明) - - [编译示例](#编译示例) + - [Linux环境部署](#linux环境部署) + - [环境要求](#环境要求) + - [编译选项](#编译选项) + - [输出件说明](#输出件说明) + - [编译示例](#编译示例) + - [Windows环境部署](#windows环境部署) + - [环境要求](#环境要求-1) + - [编译选项](#编译选项-1) + - [输出件说明](#输出件说明-1) + - [编译示例](#编译示例-1) - + + +本文档介绍如何在Ubuntu和Windows系统上快速安装MindSpore Lite。 + +## Linux环境部署 -## 环境要求 +### 环境要求 - 编译环境仅支持x86_64版本的Linux:推荐使用Ubuntu 18.04.02LTS - 编译依赖(基本项) - [CMake](https://cmake.org/download/) >= 3.14.1 - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Python](https://www.python.org/) >= 3.7 - [Android_NDK r20b](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) - + > - 仅在编译ARM版本时需要安装`Android_NDK`,编译x86_64版本可跳过此项。 > - 如果安装并使用`Android_NDK`,需配置环境变量,命令参考:`export ANDROID_NDK={$NDK_PATH}/android-ndk-r20b`。 - + - 编译依赖(MindSpore Lite模型转换工具所需附加项,仅编译x86_64版本时需要) - [Autoconf](http://ftp.gnu.org/gnu/autoconf/) >= 2.69 - [Libtool](https://www.gnu.org/software/libtool/) >= 2.4.6 @@ -34,10 +41,10 @@ - [Automake](https://www.gnu.org/software/automake/) >= 1.11.6 - [Libevent](https://libevent.org) >= 2.0 - [M4](https://www.gnu.org/software/m4/m4.html) >= 1.4.18 - - [OpenSSL](https://www.openssl.org/) >= 1.1.1 - - -## 编译选项 + - [OpenSSL](https://www.openssl.org/) >= 1.1.1 + + +### 编译选项 MindSpore Lite提供多种编译方式,用户可根据需要选择不同的编译选项。 @@ -52,40 +59,44 @@ MindSpore Lite提供多种编译方式,用户可根据需要选择不同的编 > 在`-I`参数变动时,即切换适用架构时,无法使用`-i`参数进行增量编译。 -## 输出件说明 +### 输出件说明 -编译完成后,进入源码的`mindspore/output`目录,可查看编译后生成的文件,命名为`mindspore-lite-{version}-{function}-{OS}.tar.gz`。解压后,即可获得编译后的工具包,名称为`mindspore-lite-{version}`。 +编译完成后,进入源码的`mindspore/output`目录,可查看编译后生成的文件,命名为`mindspore-lite-{version}-{function}-{OS}.tar.gz`。解压后,即可获得编译后的工具包,名称为`mindspore-lite-{version}-{function}-{OS}`。 > version:输出件版本,与所编译的MindSpore版本一致。 +> > function:输出件功能,`convert`表示为转换工具的输出件,`runtime`表示为推理框架的输出件。 +> > OS:输出件应部署的操作系统。 ```bash tar -xvf mindspore-lite-{version}-{function}-{OS}.tar.gz ``` +编译x86可获得转换工具`converter`与推理框架`runtime`功能的输出件,编译ARM仅能获得推理框架`runtime`。 -编译后的输出件一般包含以下几种,架构的选择会影响输出件的种类。 +输出件中包含以下几类子项,功能不同所含内容也会有所区别。 -> 编译x86可获得转换工具的输出件,编译ARM64默认可获得`arm64-cpu`的推理框架输出件,若添加`-e gpu`则获得`arm64-gpu`的推理框架输出件,编译ARM32同理。 +> 编译ARM64默认可获得`arm64-cpu`的推理框架输出件,若添加`-e gpu`则获得`arm64-gpu`的推理框架输出件,编译ARM32同理。 -编译后的输出件一般包含以下几种,架构的选择会影响输出件的种类。 +| 目录 | 说明 | converter | runtime | +| --- | --- | --- | --- | +| include | 推理框架头文件 | 无 | 有 | +| lib | 推理框架动态库 | 无 | 有 | +| benchmark | 基准测试工具 | 无 | 有 | +| time_profiler | 模型网络层耗时分析工具 | 无 | 有 | +| converter | 模型转换工具 | 有 | 无 | +| third_party | 第三方库头文件和库 | 有 | 有 | -| 目录 | 说明 | x86_64 | arm64 | arm32 | -| --- | --- | --- | --- | --- | -| include | 推理框架头文件 | 无 | 有 | 有 | -| lib | 推理框架动态库 | 有 | 有 | 有 | -| benchmark | 基准测试工具 | 有 | 有 | 有 | -| time_profiler | 模型网络层耗时分析工具 | 有 | 有 | 有 | -| converter | 模型转换工具 | 有 | 无 | 无 | -| third_party | 第三方库头文件和库 | 有 | 有 | 有 | +以0.7.0-beta版本,CPU编译为例,不同包名下,`third party`与`lib`的内容不同: -在x86_64、ARM两种架构下,`third party`的内容不同。其中: -- x86_64:`protobuf`(Protobuf的动态库)。 -- ARM:`flatbuffers`(FlatBuffers头文件)。 +- `mindspore-lite-0.7.0-converter-ubuntu`:包含`protobuf`(Protobuf的动态库)。 +- `mindspore-lite-0.7.0-runtime-x86-cpu`:`third party`包含`flatbuffers`(FlatBuffers头文件),`lib`包含`libmindspore-lite.so`(MindSpore Lite的动态库)。 +- `mindspore-lite-0.7.0-runtime-arm64-cpu`:`third party`包含`flatbuffers`(FlatBuffers头文件),`lib`包含`libmindspore-lite.so`(MindSpore Lite的动态库)和`liboptimize.so`。 +TODO:补全文件内容 -> 运行converter、benchmark或time_profiler目录下的工具前,都需配置环境变量,将MindSpore Lite和Protobuf的动态库所在的路径配置到系统搜索动态库的路径中。以0.6.0-beta版本为例:`export LD_LIBRARY_PATH=./mindspore-lite-0.6.0/lib:./mindspore-lite-0.6.0/third_party/protobuf/lib:${LD_LIBRARY_PATH}`。 +> 运行converter、benchmark或time_profiler目录下的工具前,都需配置环境变量,将MindSpore Lite和Protobuf的动态库所在的路径配置到系统搜索动态库的路径中。以0.7.0-beta版本为例:`export LD_LIBRARY_PATH=./mindspore-lite-0.7.0/lib:./mindspore-lite-0.7.0/third_party/protobuf/lib:${LD_LIBRARY_PATH}`。 -## 编译示例 +### 编译示例 首先,从MindSpore代码仓下载源码。 @@ -99,26 +110,76 @@ git clone https://gitee.com/mindspore/mindspore.git ```bash bash build.sh -I x86_64 -d ``` - + - 编译x86_64架构Release版本,同时设定线程数。 ```bash bash build.sh -I x86_64 -j32 ``` - + - 增量编译ARM64架构Release版本,同时设定线程数。 ```bash - bash build.sh -I arm64 -i -j32 + bash build.sh -I arm64 -i -j32 ``` - + - 编译ARM64架构Release版本,同时编译内置的GPU算子。 ```bash - bash build.sh -I arm64 -e gpu + bash build.sh -I arm64 -e gpu ``` - + > `build.sh`中会执行`git clone`获取第三方依赖库的代码,请提前确保git的网络设置正确可用。 - -以0.6.0-beta版本为例,x86_64架构Release版本编译完成之后,进入`mindspore/output`目录,执行如下解压缩命令,即可获取输出件`include`、`lib`、`benchmark`、`time_profiler`、`converter`和`third_party`。 - + +以0.7.0-beta版本为例,x86_64架构Release版本编译完成之后,进入`mindspore/output`目录,执行如下解压缩命令,即可获取输出件`include`、`lib`、`benchmark`、`time_profiler`、`converter`和`third_party`。 + ```bash -tar -xvf mindspore-lite-0.6.0-converter-ubuntu.tar.gz +tar -xvf mindspore-lite-0.7.0-converter-ubuntu.tar.gz +tar -xvf mindspore-lite-0.7.0-runtime-x86-cpu.tar.gz ``` + +## Windows环境部署 + +### 环境要求 + +- 编译环境仅支持32位或64位Windows系统 + +- 编译依赖(基本项) + - [CMake](https://cmake.org/download/) >= 3.14.1 + - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 + +### 编译选项 + +MindSpore Lite的编译选项如下。 + +| 参数 | 参数说明 | 取值范围 | 是否必选 | +| -------- | ----- | ---- | ---- | +| lite | 设置该参数,则对Mindspore Lite工程进行编译,否则对Mindspore工程进行编译 | - | 是 | +| [n] | 设定编译时所用的线程数,否则默认设定为6线程 | - | 否 | + +### 输出件说明 + +编译完成后,进入源码的`mindspore/output/`目录,可查看编译后生成的文件,命名为命名为`mindspore-lite-{version}-converter-win-{process_unit}.zip`。解压后,即可获得编译后的工具包,名称为`mindspore-lite-{version}`。 + +> version:输出件版本,与所编译的MindSpore版本一致。 +> process_unit:输出件应部署的处理器类型。 + +### 编译示例 + +首先,使用git工具从MindSpore代码仓下载源码。 + +```bash +git clone https://gitee.com/mindspore/mindspore.git +``` + +然后,使用cmd工具在源码根目录下,执行如下命令即可编译MindSpore Lite。 + +- 以默认线程数(6线程)编译Windows版本。 + ```bash + call build.bat lite + ``` +- 以指定线程数8编译Windows版本。 + ```bash + call build.bat lite 8 + ``` + +> `build.bat`中会执行`git clone`获取第三方依赖库的代码,请提前确保git的网络设置正确可用。 + +编译完成之后,进入`mindspore/output/`目录,解压后即可获取输出件`converter`。 diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.jpg b/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ca1f0ff80d553333f78a89bf132bbeab7666043d Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_app_result.jpg differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png new file mode 100644 index 0000000000000000000000000000000000000000..29e954a425c3b42e61353b97394d774f646cada7 Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_home.png differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_install.jpg b/lite/tutorials/source_zh_cn/images/lite_quick_start_install.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c98ee71dae722be180a8b88c1661eabf85c97dce Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_install.jpg differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..6f71294479c4cd91dd983136d7f13960227c3c57 Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_project_structure.png differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_run_app.PNG b/lite/tutorials/source_zh_cn/images/lite_quick_start_run_app.PNG new file mode 100644 index 0000000000000000000000000000000000000000..2557b6293de5b3d7fefe7f6e58b57c03deabb55d Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_run_app.PNG differ diff --git a/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png b/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png new file mode 100644 index 0000000000000000000000000000000000000000..faf694bd2e69ec1e4b33ddfe944612e8472b7600 Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/lite_quick_start_sdk.png differ diff --git a/lite/tutorials/source_zh_cn/images/side_infer_process.png b/lite/tutorials/source_zh_cn/images/side_infer_process.png new file mode 100644 index 0000000000000000000000000000000000000000..eb63d0858cbfb92acab10bc62a0ca1ce6a09e512 Binary files /dev/null and b/lite/tutorials/source_zh_cn/images/side_infer_process.png differ diff --git a/lite/tutorials/source_zh_cn/index.rst b/lite/tutorials/source_zh_cn/index.rst index 77d3d333b2373192055526caa82d0928a4a5a24c..52f49f3366457fc17b653fa30071d5cb67c82963 100644 --- a/lite/tutorials/source_zh_cn/index.rst +++ b/lite/tutorials/source_zh_cn/index.rst @@ -12,7 +12,7 @@ MindSpore端侧教程 :caption: 快速入门 deploy - quick_start/quick_start_lite + quick_start/quick_start .. toctree:: :glob: @@ -20,4 +20,7 @@ MindSpore端侧教程 :caption: 使用指南 use/converter_tool - use/tools + use/runtime + use/benchmark_tool + use/timeprofiler_tool + use/post_training_quantization diff --git a/lite/tutorials/source_zh_cn/quick_start/quick_start.md b/lite/tutorials/source_zh_cn/quick_start/quick_start.md new file mode 100644 index 0000000000000000000000000000000000000000..b3730d2ea44d6cecbce11b98df19aeba20a4d9ac --- /dev/null +++ b/lite/tutorials/source_zh_cn/quick_start/quick_start.md @@ -0,0 +1,323 @@ +# 快速入门(Lite) + + + +- [快速入门(Lite)](#快速入门lite) + - [概述](#概述) + - [选择模型](#选择模型) + - [转换模型](#转换模型) + - [部署应用](#部署应用) + - [运行依赖](#运行依赖) + - [构建与运行](#构建与运行) + - [示例程序详细说明](#示例程序详细说明) + - [示例程序结构](#示例程序结构) + - [配置MindSpore Lite依赖项](#配置mindspore-lite依赖项) + - [下载及部署模型文件](#下载及部署模型文件) + - [编写端侧推理代码](#编写端侧推理代码) + + + +## 概述 + +我们推荐你从端侧Android图像分类demo入手,了解MindSpore Lite应用工程的构建、依赖项配置以及相关API的使用。 + +本教程基于MindSpore团队提供的Android“端侧图像分类”示例程序,演示了端侧部署的流程。 +1. 选择图像分类模型。 +2. 将模型转换成MindSpore Lite模型格式。 +3. 在端侧使用MindSpore Lite推理模型。详细说明如何在端侧利用MindSpore Lite C++ API(Android JNI)和MindSpore Lite图像分类模型完成端侧推理,实现对设备摄像头捕获的内容进行分类,并在APP图像预览界面中,显示出最可能的分类结果。 + +> 你可以在这里找到[Android图像分类模型](https://download.mindspore.cn/model_zoo/official/lite/mobilenetv2_openimage_lite)和[示例代码](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/lite/image_classif)。 + +## 选择模型 + +MindSpore团队提供了一系列预置终端模型,你可以在应用程序中使用这些预置的终端模型。 +MindSpore Model Zoo中图像分类模型可[在此下载](#TODO)。 +同时,你也可以使用预置模型做迁移学习,以实现自己的图像分类任务,操作流程参见[重训练章节](https://www.mindspore.cn/tutorial/zh-CN/master/use/saving_and_loading_model_parameters.html#id6)。 + +## 转换模型 + +如果你需要对MindSpore提供的模型进行重训,重训完成后,需要将模型导出为[.mindir格式](https://www.mindspore.cn/tutorial/zh-CN/master/use/saving_and_loading_model_parameters.html#mindir)。然后使用MindSpore Lite[模型转换工具](https://www.mindspore.cn/lite/tutorial/zh-CN/master/use/converter_tool.html)将.mindir模型转换成.ms格式。 + +以MindSpore MobilenetV2模型为例,如下脚本将其转换为MindSpore Lite模型用于端侧推理。 +```bash +./converter_lite --fmk=MS --modelFile=mobilenet_v2.mindir --outputFile=mobilenet_v2.ms +``` + +## 部署应用 + +接下来介绍如何构建和执行mindspore Lite端侧图像分类任务。 + +### 运行依赖 + +- Android Studio >= 3.2 (推荐4.0以上版本) +- NDK 21.3 +- CMake +- Android SDK >= 26 +- OpenCV >= 4.0.0 (本示例代码已包含) + +### 构建与运行 + +1. 在Android Studio中加载本示例源码,并安装相应的SDK(指定SDK版本后,由Android Studio自动安装)。 + + ![start_home](../images/lite_quick_start_home.png) + + 启动Android Studio后,点击`File->Settings->System Settings->Android SDK`,勾选相应的SDK。如下图所示,勾选后,点击`OK`,Android Studio即可自动安装SDK。 + + ![start_sdk](../images/lite_quick_start_sdk.png) + + (可选)若安装时出现NDK版本问题,可手动下载相应的[NDK版本](https://developer.android.com/ndk/downloads?hl=zh-cn)(本示例代码使用的NDK版本为21.3),并在`Project Structure`的`Android NDK location`设置中指定SDK的位置。 + + ![project_structure](../images/lite_quick_start_project_structure.png) + +2. 连接Android设备,运行图像分类应用程序。 + + 通过USB连接Android设备调试,点击`Run 'app'`即可在你的设备上运行本示例项目。 + + ![run_app](../images/lite_quick_start_run_app.PNG) + + Android Studio连接设备调试操作,可参考。 + +3. 在Android设备上,点击“继续安装”,安装完即可查看到设备摄像头捕获的内容和推理结果。 + + ![install](../images/lite_quick_start_install.jpg) + + 如下图所示,成功识别出图中内容是键盘和鼠标。 + + ![result](../images/lite_quick_start_app_result.jpg) + + +## 示例程序详细说明 + +本端侧图像分类Android示例程序分为JAVA层和JNI层,其中,JAVA层主要通过Android Camera 2 API实现摄像头获取图像帧,以及相应的图像处理等功能;JNI层在[Runtime](https://www.mindspore.cn/tutorial/zh-CN/master/use/lite_runtime.html)中完成模型推理的过程。 + +> 此处详细说明示例程序的JNI层实现,JAVA层运用Android Camera 2 API实现开启设备摄像头以及图像帧处理等功能,需读者具备一定的Android开发基础知识。 + +### 示例程序结构 + +``` +app +| +├── libs # 存放MindSpore Lite依赖的库文件 +│ └── arm64-v8a +│ ├── libopencv_java4.so +│ └── libmindspore-lite.so +│ +├── opencv # opencv 相关依赖文件 +│ └── ... +| +├── src/main +│ ├── assets # 资源文件 +| | └── model.ms # 存放模型文件 +│ | +│ ├── cpp # 模型加载和预测主要逻辑封装类 +| | ├── include # 存放MindSpore调用相关的头文件 +| | | └── ... +│ | | +| | ├── MindSporeNetnative.cpp # MindSpore调用相关的JNI方法 +│ | └── MindSporeNetnative.h # 头文件 +│ | +│ ├── java # java层应用代码 +│ │ └── com.huawei.himindsporedemo +│ │ ├── gallery.classify # 图像处理及MindSpore JNI调用相关实现 +│ │ │ └── ... +│ │ └── obejctdetect # 开启摄像头及绘制相关实现 +│ │ └── ... +│ │ +│ ├── res # 存放Android相关的资源文件 +│ └── AndroidManifest.xml # Android配置文件 +│ +├── CMakeList.txt # cmake编译入口文件 +│ +├── build.gradle # 其他Android配置文件 +└── ... +``` + +### 配置MindSpore Lite依赖项 + +Android JNI层调用MindSpore C++ API时,需要相关库文件支持。可通过MindSpore Lite[源码编译](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html)生成`libmindspore-lite.so`库文件,或直接下载MindSpore Lite提供的已编译完成的AMR64、ARM32、x86等[软件包](#TODO)。 + +在Android Studio中将编译完成的`libmindspore-lite.so`库文件(可包含多个兼容架构),分别放置在APP工程的`app/libs/ARM64-V8a`(ARM64)或`app/libs/armeabi-v7a`(ARM32)目录下,并在应用的`build.gradle`文件中配置CMake编译支持,以及`arm64-v8a`和`armeabi-v7a`的编译支持。   + +``` +android{ + defaultConfig{ + externalNativeBuild{ + cmake{ + arguments "-DANDROID_STL=c++_shared" + } + } + + ndk{ + abiFilters'armeabi-v7a', 'arm64-v8a' + } + } +} +``` + +在`app/CMakeLists.txt`文件中建立`.so`或`.a`库文件链接,如下所示。 + +``` +# Set MindSpore Lite Dependencies. +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include/MindSpore) +add_library(mindspore-lite SHARED IMPORTED ) +set_target_properties(mindspore-lite PROPERTIES + IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/libs/libmindspore-lite.so") + +# Set OpenCV Dependecies. +include_directories(${CMAKE_SOURCE_DIR}/opencv/sdk/native/jni/include) +add_library(lib-opencv SHARED IMPORTED ) +set_target_properties(lib-opencv PROPERTIES + IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/libs/libopencv_java4.so") + +# Link target library. +target_link_libraries( + ... + mindspore-lite + lib-opencv + ... +) +``` + +### 下载及部署模型文件 + +从MindSpore Model Hub中下载模型文件,本示例程序中使用的终端图像分类模型文件为`mobilenet_v2.ms`,放置在`app/src/main/assets`工程目录下。 + +### 编写端侧推理代码 + +在JNI层调用MindSpore Lite C++ API实现端测推理。 + +推理代码流程如下,完整代码请参见`src/cpp/MindSporeNetnative.cpp`。 + +1. 加载MindSpore Lite模型文件,构建上下文、会话以及用于推理的计算图。 + + - 加载模型文件:创建并配置用于模型推理的上下文 + ```cpp + // Buffer is the model data passed in by the Java layer + jlong bufferLen = env->GetDirectBufferCapacity(buffer); + char *modelBuffer = CreateLocalModelBuffer(env, buffer); + ``` + + - 创建会话 + ```cpp + void **labelEnv = new void *; + MSNetWork *labelNet = new MSNetWork; + *labelEnv = labelNet; + + // Create context. + lite::Context *context = new lite::Context; + context->cpu_bind_mode_ = lite::NO_BIND; + context->device_ctx_.type = lite::DT_CPU; + context->thread_num_ = numThread; //Specify the number of threads to run inference + + // Create the mindspore session. + labelNet->CreateSessionMS(modelBuffer, bufferLen, "device label", context); + delete(context); + + ``` + + - 加载模型文件并构建用于推理的计算图 + ```cpp + void MSNetWork::CreateSessionMS(char* modelBuffer, size_t bufferLen, std::string name, mindspore::lite::Context* ctx) + { + CreateSession(modelBuffer, bufferLen, ctx); + session = mindspore::session::LiteSession::CreateSession(ctx); + auto model = mindspore::lite::Model::Import(modelBuffer, bufferLen); + int ret = session->CompileGraph(model); // Compile Graph + } + ``` + +2. 将输入图片转换为传入MindSpore模型的Tensor格式。 + + 将待检测图片数据转换为输入MindSpore模型的Tensor。 + + ```cpp + // Convert the Bitmap image passed in from the JAVA layer to Mat for OpenCV processing + BitmapToMat(env, srcBitmap, matImageSrc); + // Processing such as zooming the picture size. + matImgPreprocessed = PreProcessImageData(matImageSrc); + + ImgDims inputDims; + inputDims.channel = matImgPreprocessed.channels(); + inputDims.width = matImgPreprocessed.cols; + inputDims.height = matImgPreprocessed.rows; + float *dataHWC = new float[inputDims.channel * inputDims.width * inputDims.height] + + // Copy the image data to be detected to the dataHWC array. + // The dataHWC[image_size] array here is the intermediate variable of the input MindSpore model tensor. + float *ptrTmp = reinterpret_cast(matImgPreprocessed.data); + for(int i = 0; i < inputDims.channel * inputDims.width * inputDims.height; i++){ + dataHWC[i] = ptrTmp[i]; + } + + // Assign dataHWC[image_size] to the input tensor variable. + auto msInputs = mSession->GetInputs(); + auto inTensor = msInputs.front(); + memcpy(inTensor->MutableData(), dataHWC, + inputDims.channel * inputDims.width * inputDims.height * sizeof(float)); + delete[] (dataHWC); + ``` + +3. 对输入Tensor按照模型进行推理,获取输出Tensor,并进行后处理。 + + - 图执行,端测推理。 + + ```cpp + // After the model and image tensor data is loaded, run inference. + auto status = mSession->RunGraph(); + ``` + + - 获取输出数据。 + ```cpp + auto msOutputs = mSession->GetOutputs(); + std::string retStr = ProcessRunnetResult(msOutputs, ret); + ``` + + - 输出数据的后续处理。 + ```cpp + std::string ProcessRunnetResult(std::unordered_map> msOutputs, + int runnetRet) { + + // Get model output results. + std::unordered_map>::iterator iter; + iter = msOutputs.begin(); + auto brach1_string = iter->first; + auto branch1_tensor = iter->second; + + int OUTPUTS_LEN = branch1_tensor[0]->ElementsNum(); + + + MS_PRINT("OUTPUTS_LEN:%d", OUTPUTS_LEN); + + float *temp_scores = static_cast(branch1_tensor[0]->MutableData()); + + float scores[RET_CATEGORY_SUM]; + for (int i = 0; i < RET_CATEGORY_SUM; ++i) { + if (temp_scores[i] > 0.5){ + MS_PRINT("MindSpore scores[%d] : [%f]", i, temp_scores[i]); + } + scores[i] = temp_scores[i]; + } + + // Converted to text information that needs to be displayed in the APP. + std::string retStr = ""; + if (runnetRet == 0) { + for (int i = 0; i < RET_CATEGORY_SUM; ++i) { + if (scores[i] > 0.3){ + retStr += g_labels_name_map[i]; + retStr += ":"; + std::string score_str = std::to_string(scores[i]); + retStr += score_str; + retStr += ";"; + } + } + } else { + MS_PRINT("MindSpore run net failed!"); + for (int i = 0; i < RET_CATEGORY_SUM; ++i) { + retStr += " :0.0;"; + } + } + + return retStr; + } + ``` diff --git a/lite/tutorials/source_zh_cn/quick_start/quick_start_lite.md b/lite/tutorials/source_zh_cn/quick_start/quick_start_lite.md deleted file mode 100644 index bc962d4f312777e0331e8ccdd73c27ab71ee3a07..0000000000000000000000000000000000000000 --- a/lite/tutorials/source_zh_cn/quick_start/quick_start_lite.md +++ /dev/null @@ -1,11 +0,0 @@ -# 快速入门(Lite) - - - -- [快速入门(Lite)](#快速入门lite) - - - - - - diff --git a/lite/tutorials/source_zh_cn/use/benchmark_tool.md b/lite/tutorials/source_zh_cn/use/benchmark_tool.md index cf2d07c44309c9d962640404f85db942d49fd898..7c3df6300ab35b77ab3e0354590e2fc8ef25b3df 100644 --- a/lite/tutorials/source_zh_cn/use/benchmark_tool.md +++ b/lite/tutorials/source_zh_cn/use/benchmark_tool.md @@ -1,4 +1,4 @@ -# Benchmark工具 +# Benchmark工具 @@ -22,9 +22,9 @@ Benchmark工具是一款可以对MindSpore Lite模型进行基准测试的工具 使用Benchmark工具,需要进行如下环境准备工作。 -- 编译:Benchmark工具代码在MindSpore源码的`mindspore/lite/tools/benchmark`目录中,参考部署文档中的[环境要求](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id2)和[编译示例](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id5),安装编译依赖基本项,并执行编译。 +- 编译:Benchmark工具代码在MindSpore源码的`mindspore/lite/tools/benchmark`目录中,参考部署文档中的[环境要求](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id2)和[编译示例](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id5),安装编译依赖基本项,并执行编译。 -- 运行:参考部署文档中的[输出件说明](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id4),获得`benchmark`工具,并配置环境变量。 +- 运行:参考部署文档中的[输出件说明](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id4),获得`benchmark`工具,并配置环境变量。 ## 参数说明 diff --git a/lite/tutorials/source_zh_cn/use/converter_tool.md b/lite/tutorials/source_zh_cn/use/converter_tool.md index d42908342169d7f321e56e34916682dd709afa9a..3c6f00a7fd8b428ab008aca4f0b62c3bd1afeabf 100644 --- a/lite/tutorials/source_zh_cn/use/converter_tool.md +++ b/lite/tutorials/source_zh_cn/use/converter_tool.md @@ -1,4 +1,4 @@ -# 模型转换工具 +# 模型转换工具 @@ -23,9 +23,9 @@ MindSpore Lite提供离线转换模型功能的工具,支持多种类型的模 使用MindSpore Lite模型转换工具,需要进行如下环境准备工作。 -- 编译:模型转换工具代码在MindSpore源码的`mindspore/lite/tools/converter`目录中,参考部署文档中的[环境要求](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id2)和[编译示例](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id5),安装编译依赖基本项与模型转换工具所需附加项,并编译x86_64版本。 +- 编译:模型转换工具代码在MindSpore源码的`mindspore/lite/tools/converter`目录中,参考部署文档中的[环境要求](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id2)和[编译示例](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id5),安装编译依赖基本项与模型转换工具所需附加项,并编译x86_64版本。 -- 运行:参考部署文档中的[输出件说明](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id4),获得`converter`工具,并配置环境变量。 +- 运行:参考部署文档中的[输出件说明](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id4),获得`converter`工具,并配置环境变量。 ## 参数说明 diff --git a/lite/tutorials/source_zh_cn/use/post_training_quantization.md b/lite/tutorials/source_zh_cn/use/post_training_quantization.md new file mode 100644 index 0000000000000000000000000000000000000000..6bc018c65195e29b804fe74da9e9937053c84a14 --- /dev/null +++ b/lite/tutorials/source_zh_cn/use/post_training_quantization.md @@ -0,0 +1,63 @@ +# 训练后量化 + + + +- [训练后量化](#训练后量化) + - [概述](#概述) + - [参数说明](#参数说明) + - [使用示例](#使用示例) + + + + + +## 概述 + +对于已经训练好的`float32`模型,通过训练后量化将模型转为`int8`模型,不仅能减小模型大小,而且能显著提高推理性能。在MindSpore端侧框架中,这部分功能集成在模型转换工具`conveter_lite`中,通过增加命令行参数,便能够转换得到量化后模型。 +目前训练后量化属于alpha阶段(支持部分网络,不支持多输入模型),正在持续完善中。 + +``` +./converter_lite --fmk=ModelType --modelFile=ModelFilePath --outputFile=ConvertedModelPath --quantType=PostTraining --config_file=config.cfg +``` +## 参数说明 + +| 参数 | 属性 | 功能描述 | 参数类型 | 默认值 | 取值范围 | +| -------- | ------- | ----- | ----- |----- | ----- | +| --quantType | 必选 | 设置为PostTraining,启用训练后量化 | String | - | 必须设置为PostTraining | +| --config_file | 必选 | 校准数据集配置文件路径 | String | - | - | + +为了计算激活值的量化参数,用户需要提供校准数据集。校准数据集最好来自真实推理场景,能表征模型的实际输入情况,数量在100个左右。 +校准数据集配置文件采用`key=value`的方式定义相关参数,需要配置的`key`如下: + +| 参数名 | 属性 | 功能描述 | 参数类型 | 默认值 | 取值范围 | +| -------- | ------- | ----- | ----- | ----- | ----- | +| image_path | 必选 | 存放校准数据集的目录 | String | - | 该目录存放可直接用于执行推理的输入数据。由于目前框架还不支持数据预处理,所有数据必须事先完成所需的转换,使得它们满足推理的输入要求。 | +| batch_count | 可选 | 使用的输入数目 | Integer | 100 | 大于0 | +| method_x | 可选 | 网络层输入输出数据量化算法 | String | KL | KL,MAX_MIN。 KL: 基于[KL散度](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf)对数据范围作量化校准; MAX_MIN:基于最大值、最小值计算数据的量化参数。 在模型以及数据集比较较简单的情况下,推荐使用MAX_MIN | +| thread_num | 可选 | 使用校准数据集执行推理流程时的线程数 | Integer | 1 | 大于0 | + +## 使用示例 + +1. 正确编译出`converter_lite`可执行文件。 +2. 准备校准数据集,假设存放在`/dir/images`目录,编写配置文件`config.cfg`,内容如下: + ``` + image_path=/dir/images + batch_count=100 + method_x=MAX_MIN + thread_num=1 + ``` + 校准数据集可以选择测试数据集的子集,要求`/dir/images`目录下存放的每个文件均是预处理好的输入数据,每个文件都可以直接用于推理的输入。 +3. 以TensorFlow Lite模型mnist.tflite为例,执行带训练后量化的模型转换命令: + ``` + ./converter_lite --fmk=TFLITE --modelFile=mnist.tflite --outputFile=mnist_quant --quantType=PostTraining --config_file=config.cfg + ``` +4. 上述命令执行成功后,便可得到量化后的模型mnist_quant.ms,通常量化后的模型大小会下降到FP32模型的1/4。 + +## 部分模型精度结果 + + | 模型 | 测试数据集 | method_x | FP32模型精度 | 训练后量化精度 | 说明 | + | -------- | ------- | ----- | ----- | ----- | ----- | + | [Inception_V3](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz) | [ImageNet](http://image-net.org/) | KL | 77.92% | 77.95% | 校准数据集随机选择ImageNet Validation数据集中的100张 | + | [Mobilenet_V1_1.0_224](https://torage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz) | [ImageNet](http://image-net.org/) | KL | 70.96% | 70.69% | 校准数据集随机选择ImageNet Validation数据集中的100张 | + +> 以上所有结果均在x86环境上测得。 diff --git a/lite/tutorials/source_zh_cn/use/runtime.md b/lite/tutorials/source_zh_cn/use/runtime.md new file mode 100644 index 0000000000000000000000000000000000000000..36a5af6d4395251fb8ded2ad7bc032fb261baf90 --- /dev/null +++ b/lite/tutorials/source_zh_cn/use/runtime.md @@ -0,0 +1,366 @@ +# Runtime使用指南 + + + +- [Runtime使用指南](#runtime使用指南) + - [概述](#概述) + - [读取模型](#读取模型) + - [创建会话](#创建会话) + - [创建上下文](#创建上下文) + - [创建会话](#创建会话-1) + - [使用示例](#使用示例) + - [图编译](#图编译) + - [可变维度](#可变维度) + - [使用示例](#使用示例-1) + - [图编译](#图编译-1) + - [输入数据](#输入数据) + - [获取输入Tensor](#获取输入tensor) + - [数据拷贝](#数据拷贝) + - [使用示例](#使用示例-2) + - [图执行](#图执行) + - [执行会话](#执行会话) + - [绑核](#绑核) + - [回调运行](#回调运行) + - [使用示例](#使用示例-3) + - [获取输出](#获取输出) + - [获取输出Tensor](#获取输出tensor) + - [使用示例](#使用示例-4) + - [获取版本号](#获取版本号) + - [使用示例](#使用示例-5) + + + + + +## 概述 + +通过MindSpore Lite模型转换后,需在Runtime中完成模型的推理执行流程。 + +Runtime总体使用流程如下图所示: + +![img](../images/side_infer_process.png) + +包含的组件及功能如下所述: +- `Model`:MindSpore Lite使用的模型,通过用户构图或直接加载网络,来实例化算子原型的列表。 +- `Lite Session`:提供图编译的功能,并调用图执行器进行推理。 +- `Scheduler`:算子异构调度器,根据异构调度策略,为每一个算子选择合适的kernel,构造kernel list,并切分子图。 +- `Executor`:图执行器,执行kernel list,动态分配和释放Tensor。 +- `Operator`:算子原型,包含算子的属性,以及shape、data type和format的推导方法。 +- `Kernel`:算子库提供算子的具体实现,提供算子forward的能力。 +- `Tensor`:MindSpore Lite使用的Tensor,提供了Tensor内存操作的功能和接口。 + +## 读取模型 + +在MindSpore Lite中,模型文件是从模型转换工具转换得到的`.ms`文件。进行模型推理时,需要从文件系统加载模型,并进行模型解析,这部分操作主要在Model中实现。Model持有权重数据、算子属性等模型数据。 + +模型通过Model类的静态`Import`方法从内存数据中创建。函数返回的`Model`实例是一个指针,通过`new`创建,不再需要时,需要用户通过`delete`释放。 + +## 创建会话 + +使用MindSpore Lite执行推理时,Session是推理的主入口,通过Session我们可以进行图编译、图执行。 + +### 创建上下文 + +上下文会保存会话所需的一些基本配置参数,用于指导图编译和图执行,其定义如下: + +MindSpore Lite支持异构推理,推理时的主选后端由`Context`中的`device_ctx_`指定,默认为CPU。在进行图编译时,会根据主选后端进行算子选型调度。 + +MindSpore Lite内置一个进程共享的线程池,推理时通过`thread_num_`指定线程池的最大线程数,默认为2线程,推荐最多不超过4个线程,否则可能会影响性能。 + +MindSpore Lite支持动态内存分配和释放,如果没有指定`allocator`,推理时会生成一个默认的`allocator`,也可以通过`Context`方法在多个`Context`中共享内存分配器。 + +如果用户通过`new`创建`Context`,不再需要时,需要用户通过`delete`释放。一般在创建完Session后,Context即可释放。 + +### 创建会话 + +用上一步创建得到的`Context`,调用LiteSession的静态`CreateSession`方法来创建`LiteSession`。函数返回的`LiteSession`实例是一个指针,通过`new`创建,不再需要时,需要用户通过`delete`释放。 + +### 使用示例 + +下面示例代码演示了`Context`的创建,以及在两个`LiteSession`间共享内存池的功能: + +```cpp +auto context = new (std::nothrow) lite::Context; +if (context == nullptr) { + MS_LOG(ERROR) << "New context failed while running %s", modelName.c_str(); + return RET_ERROR; +} +// The preferred backend is GPU, which means, if there is a GPU operator, it will run on the GPU first, otherwise it will run on the CPU. +context->device_ctx_.type = lite::DT_GPU; +// The medium core takes priority in thread and core binding methods. This parameter will work in the BindThread interface. For specific binding effect, see the "Run Graph" section. +context->cpu_bind_mode_ = MID_CPU; +// Configure the number of worker threads in the thread pool to 2, including the main thread. +context->thread_num_ = 2; +// Allocators can be shared across multiple Contexts. +auto *context2 = new Context(context->thread_num_, context->allocator, context->device_ctx_); +context2->cpu_bind_mode_ = context->cpu_bind_mode_; +// Use Context to create Session. +auto session1 = session::LiteSession::CreateSession(context); +// After the LiteSession is created, the Context can be released. +delete (context); +if (session1 == nullptr) { + MS_LOG(ERROR) << "CreateSession failed while running %s", modelName.c_str(); + return RET_ERROR; +} +// session1 and session2 can share one memory pool. +auto session2 = session::LiteSession::CreateSession(context2); +delete (context2); +if (session == nullptr) { + MS_LOG(ERROR) << "CreateSession failed while running %s", modelName.c_str(); + return RET_ERROR; +} +``` + +## 图编译 + +### 可变维度 + +使用MindSpore Lite进行推理时,在已完成会话创建与图编译之后,如果需要对输入的shape进行Resize,则可以通过对输入的tensor重新设置shape,然后调用session的Resize()接口。 + +### 使用示例 + +下面代码演示如何对MindSpore Lite的输入进行Resize(): +```cpp +// Assume we have created a LiteSession instance named session. +auto inputs = session->GetInputs(); +std::vector resize_shape = {1, 128, 128, 3}; +// Assume the model has only one input,resize input shape to [1, 128, 128, 3] +inputs[0]->set_shape(resize_shape); +session->Resize(inputs); +``` + +### 图编译 + +在图执行前,需要调用`LiteSession`的`CompileGraph`接口进行图编译,进一步解析从文件中加载的Model实例,主要进行子图切分、算子选型调度。这部分会耗费较多时间,所以建议`ListSession`创建一次,编译一次,多次执行。 + +## 输入数据 + +### 获取输入Tensor + +在图执行前,需要将输入数据拷贝到模型的输入Tensor。 + +MindSpore Lite提供两种方法来获取模型的输入Tensor。 + +1. 使用`GetInputsByName`方法,根据模型输入节点的名称来获取模型输入Tensor中连接到该节点的Tensor的vector。 +2. 使用`GetInputs`方法,直接获取所有的模型输入Tensor的vector。 + +### 数据拷贝 + +当获取到模型的输入,就需要向Tensor中填入数据。通过`MSTensor`的`Size`方法来获取Tensor应该填入的数据大小,通过`data_type`方法来获取Tensor的数据类型,通过`MSTensor`的`MutableData`方法来获取可写的指针。 + +### 使用示例 + +下面示例代码演示了从`LiteSession`中获取整图输入`MSTensor`,并且向其中灌入模型输入数据的过程: + +```cpp +// Assume we have created a LiteSession instance named session. +auto inputs = session->GetInputs(); +// Assume that the model has only one input tensor. +auto in_tensor = inputs.front(); +if (in_tensor == nullptr) { + std::cerr << "Input tensor is nullptr" << std::endl; + return -1; +} +// It is omitted that users have read the model input file and generated a section of memory buffer: input_buf, as well as the byte size of input_buf: data_size. +if (in_tensor->Size() != data_size) { + std::cerr << "Input data size is not suit for model input" << std::endl; + return -1; +} +auto *in_data = in_tensor->MutableData(); +if (in_data == nullptr) { + std::cerr << "Data of in_tensor is nullptr" << std::endl; + return -1; +} +memcpy(in_data, input_buf, data_size); +// Users need to free input_buf. +// The elements in the inputs are managed by MindSpore Lite so that users do not need to free inputs. +``` + +需要注意的是: +- MindSpore Lite的模型输入Tensor中的数据排布必须是NHWC。 +- 模型的输入`input_buf`是用户从磁盘读取的,当拷贝给模型输入Tensor以后,用户需要自行释放`input_buf`。 +- `GetInputs`和`GetInputsByName`方法返回的vector不需要用户释放。 + +## 图执行 + +### 执行会话 + +MindSpore Lite会话在进行图编译以后,即可使用`LiteSession`的`RunGraph`进行模型推理。 + +### 绑核 + +MindSpore Lite内置线程池支持绑核、解绑操作,通过调用`BindThread`接口,可以将线程池中的工作线程绑定到指定CPU核,用于性能分析。绑核操作与创建`LiteSession`时用户指定的上下文有关,绑核操作会根据上下文中的绑核策略进行线程与CPU的亲和性设置。 + +需要注意的是,绑核是一个亲和性操作,不保证一定能绑定到指定的CPU核,会受到系统调度的影响。而且绑核后,需要在执行完代码后进行解绑操作,示例如下: + +```cpp +// Assume we have created a LiteSession instance named session. +session->BindThread(true); +auto ret = session->RunGraph(); +if (ret != mindspore::lite::RET_OK) { + std::cerr << "RunGraph failed" << std::endl; + delete session; + return -1; +} +session->BindThread(false); +``` + +> 绑核参数有两种选择:大核优先和中核优先。 +> 判定大核和中核的规则其实是根据CPU核的频率而不是根据CPU的架构,对于没有大中小核之分的CPU架构,在该规则下也可以区分大核和中核。 +> 绑定大核优先是指线程池中的线程从频率最高的核开始绑定,第一个线程绑定在频率最高的核上,第二个线程绑定在频率第二高的核上,以此类推。 +> 对于中核优先,中核的定义是根据经验来定义的,默认设定中核是第三和第四高频率的核,当绑定策略为中核优先时,会优先绑定到中核上,当中核不够用时,会往小核上进行绑定。 + +### 回调运行 + +Mindspore Lite可以在调用`RunGraph`时,传入两个`KernelCallBack`函数指针来回调推理模型,相比于一般的图执行,回调运行可以在运行过程中获取额外的信息,帮助开发者进行性能分析、Bug调试等。额外的信息包括: +- 当前运行的节点名称 +- 推理当前节点前的输入输出Tensor +- 推理当前节点后的输入输出Tensor + +### 使用示例 + +下面示例代码演示了使用`LiteSession`进行图编译,并定义了两个回调函数作为前置回调指针和后置回调指针,传入到`RunGraph`接口进行回调推理,并演示了一次图编译,多次图执行的使用场景: + +```cpp +// Assume we have created a LiteSession instance named session and a Model instance named model before. +// The methods of creating model and session can refer to "Import Model" and "Create Session" two sections. +auto ret = session->CompileGraph(model); +if (ret != RET_OK) { + std::cerr << "CompileGraph failed" << std::endl; + // session and model need to be released by users manually. + delete (session); + delete (model); + return ret; +} +// Copy input data into the input tensor. Users can refer to the "Input Data" section. We uses random data here. +auto inputs = session->GetInputs(); +for (auto in_tensor : inputs) { + in_tensor = inputs.front(); + if (in_tensor == nullptr) { + std::cerr << "Input tensor is nullptr" << std::endl; + return -1; + } + // When calling the MutableData method, if the data in MSTensor is not allocated, it will be malloced. After allocation, the data in MSTensor can be considered as random data. + (void) in_tensor->MutableData(); +} +// Definition of callback function before forwarding operator. +auto before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const session::CallBackParam &call_param) { + std::cout << "Before forwarding " << call_param.name_callback_param << std::endl; + return true; +}; +// Definition of callback function after forwarding operator. +auto after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, + const session::CallBackParam &call_param) { + std::cout << "After forwarding " << call_param.name_callback_param << std::endl; + return true; +}; +// Call the callback function when performing the model inference process. +ret = session_->RunGraph(before_call_back_, after_call_back_); +if (ret != RET_OK) { + MS_LOG(ERROR) << "Run graph failed."; + return RET_ERROR; +} +// CompileGraph would cost much time, a better solution is calling CompileGraph only once and RunGraph much more times. +for (size_t i = 0; i < 10; i++) { + auto ret = session_->RunGraph(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run graph failed."; + return RET_ERROR; + } +} +// session and model needs to be released by users manually. +delete (session); +delete (model); +``` + +## 获取输出 + +### 获取输出Tensor + +MindSpore Lite在执行完推理后,就可以获取模型的推理结果。 + +MindSpore Lite提供四种方法来获取模型的输出`MSTensor`。 +1. 使用`GetOutputsByNodeName`方法,根据模型输出节点的名称来获取模型输出`MSTensor`中连接到该节点的Tensor的vector。 +2. 使用`GetOutputMapByNode`方法,直接获取所有的模型输出节点的名称和连接到该节点的模型输出`MSTensor`的一个map。 +3. 使用`GetOutputByTensorName`方法,根据模型输出Tensor的名称来获取对应的模型输出`MSTensor`。 +4. 使用`GetOutputMapByTensor`方法,直接获取所有的模型输出`MSTensor`的名称和`MSTensor`指针的一个map。 + +当获取到模型的输出Tensor,就需要向Tensor中填入数据。通过`MSTensor`的`Size`方法来获取Tensor应该填入的数据大小,通过`data_type`方法来获取`MSTensor`的数据类型,通过`MSTensor`的`MutableData`方法来获取可读写的内存指针。 + +### 使用示例 + +下面示例代码演示了使用`GetOutputMapByNode`接口获取输出`MSTensor`,并打印了每个输出`MSTensor`的前十个数据或所有数据: + +```cpp +// Assume we have created a LiteSession instance named session before. +auto output_map = session->GetOutputMapByNode(); +// Assume that the model has only one output node. +auto out_node_iter = output_map.begin(); +std::string name = out_node_iter->first; +// Assume that the unique output node has only one output tensor. +auto out_tensor = out_node_iter->second.front(); +if (out_tensor == nullptr) { + std::cerr << "Output tensor is nullptr" << std::endl; + return -1; +} +// Assume that the data format of output data is float 32. +if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) { + std::cerr << "Output of lenet should in float32" << std::endl; + return -1; +} +auto *out_data = reinterpret_cast(out_tensor->MutableData()); +if (out_data == nullptr) { + std::cerr << "Data of out_tensor is nullptr" << std::endl; + return -1; +} +// Print the first 10 float data or all output data of the output tensor. +std::cout << "Output data: "; +for (size_t i = 0; i < 10 & i < out_tensor->ElementsNum(); i++) { + std::cout << " " << out_data[i]; +} +std::cout << std::endl; +// The elements in outputs do not need to be free by users, because outputs are managed by the MindSpore Lite. +``` + +需要注意的是,`GetOutputsByNodeName`、`GetOutputMapByNode`、`GetOutputByTensorName`和`GetOutputMapByTensor`方法返回的vector或map不需要用户释放。 + +下面示例代码演示了使用`GetOutputsByNodeName`接口获取输出`MSTensor`的方法: + +```cpp +// Assume we have created a LiteSession instance named session before. +// Assume that model has a output node named output_node_name_0. +auto output_vec = session->GetOutputsByNodeName("output_node_name_0"); +// Assume that output node named output_node_name_0 has only one output tensor. +auto out_tensor = output_vec.front(); +if (out_tensor == nullptr) { + std::cerr << "Output tensor is nullptr" << std::endl; + return -1; +} +``` + +下面示例代码演示了使用`GetOutputMapByTensor`接口获取输出`MSTensor`的方法: + +```cpp +// Assume we have created a LiteSession instance named session before. +auto output_map = session->GetOutputMapByTensor(); +// Assume that output node named output_node_name_0 has only one output tensor. +auto out_tensor = output_vec.front(); +if (out_tensor == nullptr) { + std::cerr << "Output tensor is nullptr" << std::endl; + return -1; +} +``` + +## 获取版本号 +MindSpore Lite提供了`Version`方法可以获取版本号,包含在`include/version.h`头文件中,调用该方法可以得到版本号字符串。 + +### 使用示例 + +下面代码演示如何获取MindSpore Lite的版本号: +```cpp +#include "include/version.h" +std::string version = mindspore::lite::Version(); +``` diff --git a/lite/tutorials/source_zh_cn/use/runtime_lite.md b/lite/tutorials/source_zh_cn/use/runtime_lite.md deleted file mode 100644 index 3c5162b116c3bc64c3b136abf21453d5c934855c..0000000000000000000000000000000000000000 --- a/lite/tutorials/source_zh_cn/use/runtime_lite.md +++ /dev/null @@ -1,11 +0,0 @@ -# Runtime使用指南(Lite) - - - -- [Runtime使用指南(Lite)](#runtime使用指南lite) - - - - - - diff --git a/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md b/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md index f43612d95f10ecdb454b7c1e0a254ecd32038201..840d20f707e444135ce2b4e402baf4a4fe828b27 100644 --- a/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md +++ b/lite/tutorials/source_zh_cn/use/timeprofiler_tool.md @@ -1,4 +1,4 @@ -# TimeProfiler工具 +# TimeProfiler工具 @@ -20,9 +20,9 @@ TimeProfiler工具可以对MindSpore Lite模型网络层的前向推理进行耗 使用TimeProfiler工具,需要进行如下环境准备工作。 -- 编译:TimeProfiler工具代码在MindSpore源码的`mindspore/lite/tools/time_profiler`目录中,参考部署文档中的[环境要求](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id2)和[编译示例](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id5),安装编译依赖基本项,并执行编译。 +- 编译:TimeProfiler工具代码在MindSpore源码的`mindspore/lite/tools/time_profiler`目录中,参考部署文档中的[环境要求](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id2)和[编译示例](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id5),安装编译依赖基本项,并执行编译。 -- 运行:参考部署文档中的[输出件说明](https://www.mindspore.cn/lite/docs/zh-CN/master/deploy.html#id4),获得`time_profiler`工具,并配置环境变量。 +- 运行:参考部署文档中的[输出件说明](https://www.mindspore.cn/lite/tutorial/zh-CN/master/deploy.html#id4),获得`time_profiler`工具,并配置环境变量。 ## 参数说明 diff --git a/lite/tutorials/source_zh_cn/use/tools.rst b/lite/tutorials/source_zh_cn/use/tools.rst deleted file mode 100644 index c9b9e2a8401e7f71da7f3e10b5946d02fa7ff58c..0000000000000000000000000000000000000000 --- a/lite/tutorials/source_zh_cn/use/tools.rst +++ /dev/null @@ -1,8 +0,0 @@ -其他工具 -========== - -.. toctree:: - :maxdepth: 1 - - benchmark_tool - timeprofiler_tool \ No newline at end of file diff --git a/resource/faq/FAQ_en.md b/resource/faq/FAQ_en.md index 681d68614b8840f572c53221c748d1fe4bb01157..30335b436d65f797ba361674801d8a513125e8bc 100644 --- a/resource/faq/FAQ_en.md +++ b/resource/faq/FAQ_en.md @@ -16,6 +16,8 @@ - [Supported Features](#supported-features) +This document has been transferred to a [new location](https://www.mindspore.cn/docs/en/master/FAQ.html). This page will be offline later。 + ## Installation diff --git a/resource/faq/FAQ_zh_cn.md b/resource/faq/FAQ_zh_cn.md index 9873b933d666d18b00026515f9cfd14a228f23f5..1e06e6c30f88b04ec5a368a69fa5f30c3e748e1f 100644 --- a/resource/faq/FAQ_zh_cn.md +++ b/resource/faq/FAQ_zh_cn.md @@ -16,6 +16,7 @@ - [特性支持](#特性支持) +此文档已经转移到[新的位置](https://www.mindspore.cn/docs/zh-CN/master/FAQ.html),此页面后续会下线。 ## 安装类 diff --git a/tutorials/notebook/README.md b/tutorials/notebook/README.md index fb999501e7ac9982e36a878b76981277fc0dca92..f7eac5dd4b49ceb37ad71ce574f11d0165e53954 100644 --- a/tutorials/notebook/README.md +++ b/tutorials/notebook/README.md @@ -57,6 +57,7 @@ | 数据处理与数据增强 | [data_loading_enhancement.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb) | 使用指南 | - 学习MindSpore中数据处理和增强的方法
- 展示数据处理、增强方法的实际操作
- 对比展示数据处理前和处理后的效果
- 表述在数据处理、增强后的意义 | 自然语言处理应用 | [nlp_application.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/nlp_application.ipynb) | 应用实践 | - 展示MindSpore在自然语言处理的应用
- 展示自然语言处理中数据集特定的预处理方法
- 展示如何定义基于LSTM的SentimentNet网络 | 计算机视觉应用 | [computer_vision_application.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/computer_vision_application.ipynb) | 应用实践 | - 学习MindSpore卷积神经网络在计算机视觉应用的过程
- 学习下载CIFAR-10数据集,搭建运行环境
- 学习使用ResNet-50构建卷积神经网络
- 学习使用Momentum和SoftmaxCrossEntropyWithLogits构建优化器和损失函数
- 学习调试参数训练模型,判断模型精度 +| 模型的训练及验证同步方法 | [synchronization_training_and_evaluation.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/synchronization_training_and_evaluation.ipynb) | 应用实践 | - 了解模型训练和验证同步进行的方法
- 学习同步训练和验证中参数设置方法
- 利用绘图函数从保存的模型中挑选出最优模型 | 使用PyNative进行神经网络的训练调试体验 | [debugging_in_pynative_mode.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/debugging_in_pynative_mode.ipynb) | 模型调优 | - GPU平台下从数据集获取单个数据进行单个step训练的数据变化全过程解读
- 了解PyNative模式下的调试方法
- 图片数据在训练过程中的变化情况的图形展示
- 了解构建权重梯度计算函数的方法
- 展示1个step过程中权重的变化及数据展示 | 自定义调试信息体验文档 | [customized_debugging_information.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/customized_debugging_information.ipynb) | 模型调优 | - 了解MindSpore的自定义调试算子
- 学习使用自定义调试算子Callback设置定时训练
- 学习设置metrics算子输出相对应的模型精度信息
- 学习设置日志环境变量来控制glog输出日志 | MindInsight的模型溯源和数据溯源体验 | [mindinsight_model_lineage_and_data_lineage.ipynb](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb) | 模型调优 | - 了解MindSpore中训练数据的采集及展示
- 学习使用SummaryRecord记录数据
- 学习使用回调函数SummaryCollector进行数据采集
- 使用MindInsight进行数据可视化
- 了解数据溯源和模型溯源的使用方法 diff --git a/tutorials/notebook/debugging_in_pynative_mode.ipynb b/tutorials/notebook/debugging_in_pynative_mode.ipynb index d6c6f81bf49ab31481c9ee4f64375f31ff49f9f8..b068dddd05fc8bde544cf34f234b405b02db40dd 100644 --- a/tutorials/notebook/debugging_in_pynative_mode.ipynb +++ b/tutorials/notebook/debugging_in_pynative_mode.ipynb @@ -373,7 +373,7 @@ "\n", " def construct(self, x, label):\n", " weights = self.weights\n", - " return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label)" + " return C.GradOperation(get_by_list=True)(self.network, weights)(x, label)" ] }, { diff --git a/tutorials/notebook/linear_regression.ipynb b/tutorials/notebook/linear_regression.ipynb index 1d301aa1d00b589dcf06d3664424524e848f6339..b67175937c7ec66376c1431e764e81fbfb150e7e 100644 --- a/tutorials/notebook/linear_regression.ipynb +++ b/tutorials/notebook/linear_regression.ipynb @@ -524,7 +524,7 @@ "\n", " def construct(self, data, label):\n", " weights = self.weights\n", - " return C.GradOperation('get_by_list', get_by_list=True) \\\n", + " return C.GradOperation(get_by_list=True) \\\n", " (self.network, weights)(data, label)\n" ] }, diff --git a/tutorials/notebook/mindinsight/calculate_and_datagraphic.ipynb b/tutorials/notebook/mindinsight/calculate_and_datagraphic.ipynb index 5473e1dfa35c82fddd77e30273d36ea84a744740..2eb475d31541d52e7af5edc5c2b76a55cc3374b2 100644 --- a/tutorials/notebook/mindinsight/calculate_and_datagraphic.ipynb +++ b/tutorials/notebook/mindinsight/calculate_and_datagraphic.ipynb @@ -74,51 +74,55 @@ "metadata": {}, "outputs": [], "source": [ - "import urllib.request \n", - "from urllib.parse import urlparse\n", - "import gzip \n", "import os\n", + "import gzip\n", + "import urllib.request\n", + "from urllib.parse import urlparse\n", + "\n", "\n", "def unzip_file(gzip_path):\n", - " \"\"\"unzip dataset file\n", + " \"\"\"\n", + " Unzip a given gzip file.\n", + "\n", " Args:\n", - " gzip_path: dataset file path\n", + " gzip_path (str): The gzip file path\n", " \"\"\"\n", - " open_file = open(gzip_path.replace('.gz',''), 'wb')\n", + " open_file = open(gzip_path.replace('.gz', ''), 'wb')\n", " gz_file = gzip.GzipFile(gzip_path)\n", " open_file.write(gz_file.read())\n", " gz_file.close()\n", - " \n", + "\n", + "\n", "def download_dataset():\n", " \"\"\"Download the dataset from http://yann.lecun.com/exdb/mnist/.\"\"\"\n", " print(\"******Downloading the MNIST dataset******\")\n", - " train_path = \"./MNIST_Data/train/\" \n", + " train_path = \"./MNIST_Data/train/\"\n", " test_path = \"./MNIST_Data/test/\"\n", " train_path_check = os.path.exists(train_path)\n", " test_path_check = os.path.exists(test_path)\n", - " if train_path_check == False and test_path_check == False:\n", + " if not train_path_check and not test_path_check:\n", " os.makedirs(train_path)\n", " os.makedirs(test_path)\n", - " train_url = {\"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\", \"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\"}\n", - " test_url = {\"http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\", \"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\"}\n", - " \n", + " train_url = {\"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\",\n", + " \"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\"}\n", + " test_url = {\"http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\",\n", + " \"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\"}\n", + "\n", " for url in train_url:\n", " url_parse = urlparse(url)\n", - " \"\"\"split the file name from url\"\"\"\n", - " file_name = os.path.join(train_path,url_parse.path.split('/')[-1])\n", - " if not os.path.exists(file_name.replace('.gz', '')):\n", - " file = urllib.request.urlretrieve(url, file_name)\n", - " unzipfile(file_name)\n", - " os.remove(file_name)\n", - " \n", + " # split the file name from url\n", + " file_name = os.path.join(train_path, url_parse.path.split('/')[-1])\n", + " if not os.path.exists(file_name.replace('.gz', '')) and not os.path.exists(file_name):\n", + " urllib.request.urlretrieve(url, file_name)\n", + " unzip_file(file_name)\n", + "\n", " for url in test_url:\n", " url_parse = urlparse(url)\n", - " \"\"\"split the file name from url\"\"\"\n", - " file_name = os.path.join(test_path,url_parse.path.split('/')[-1])\n", - " if not os.path.exists(file_name.replace('.gz', '')):\n", - " file = urllib.request.urlretrieve(url, file_name)\n", - " unzipfile(file_name)\n", - " os.remove(file_name)\n", + " # split the file name from url\n", + " file_name = os.path.join(test_path, url_parse.path.split('/')[-1])\n", + " if not os.path.exists(file_name.replace('.gz', '')) and not os.path.exists(file_name):\n", + " urllib.request.urlretrieve(url, file_name)\n", + " unzip_file(file_name)\n", "\n", "download_dataset()" ] @@ -127,9 +131,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### 数据集使用\n", - "\n", - "设置正确的数据存放路径,可将数据集读取出来,并对整体数据集做预处理,让数据更能发挥模型性能。MindInsight可视化的数据图,便是显示的数据集预处理时的变化方式和顺序。" + "#### 数据增强\n", + "对数据集进行数据增强操作,可以提升模型精度。\n" ] }, { @@ -148,32 +151,39 @@ "def create_dataset(data_path, batch_size=32, repeat_size=1,\n", " num_parallel_workers=1):\n", " \"\"\"\n", - " create dataset for train or test\n", + " Create dataset for train or test.\n", + "\n", + " Args:\n", + " data_path (str): The absolute path of the dataset\n", + " batch_size (int): The number of data records in each group\n", + " repeat_size (int): The number of replicated data records\n", + " num_parallel_workers (int): The number of parallel workers\n", " \"\"\"\n", - " \"\"\"define dataset\"\"\"\n", + " # define dataset\n", " mnist_ds = ds.MnistDataset(data_path)\n", "\n", + " # define some parameters needed for data enhancement and rough justification\n", " resize_height, resize_width = 32, 32\n", " rescale = 1.0 / 255.0\n", " shift = 0.0\n", " rescale_nml = 1 / 0.3081\n", " shift_nml = -1 * 0.1307 / 0.3081\n", "\n", - " \"\"\"define map operations\"\"\"\n", - " type_cast_op = C.TypeCast(mstype.int32)\n", - " resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode\n", + " # according to the parameters, generate the corresponding data enhancement method\n", + " resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)\n", " rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)\n", " rescale_op = CV.Rescale(rescale, shift)\n", " hwc2chw_op = CV.HWC2CHW()\n", + " type_cast_op = C.TypeCast(mstype.int32)\n", "\n", - " \"\"\"apply map operations on images\"\"\"\n", + " # using map method to apply operations to a dataset\n", " mnist_ds = mnist_ds.map(input_columns=\"label\", operations=type_cast_op, num_parallel_workers=num_parallel_workers)\n", " mnist_ds = mnist_ds.map(input_columns=\"image\", operations=resize_op, num_parallel_workers=num_parallel_workers)\n", " mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_op, num_parallel_workers=num_parallel_workers)\n", " mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)\n", " mnist_ds = mnist_ds.map(input_columns=\"image\", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)\n", - "\n", - " \"\"\"apply DatasetOps\"\"\"\n", + " \n", + " # process the generated dataset\n", " buffer_size = 10000\n", " mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script\n", " mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n", @@ -272,15 +282,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### 主程序运行\n", - "\n", - "1. 首先在主函数之前调用所需要的模块,并在主函数之前使用相应接口。\n", + "#### 执行训练\n", "\n", - "2. 本次体验主要完成计算图与数据图的可视化,定义变量`specified={'collect_graph': True,'collect_dataset_graph': True}`,在`specified`字典中,键名`collect_graph`值设置为`True`,表示记录计算图;键名`collect_dataset_graph`值设置为`True`,表示记录数据图。\n", + "1. 导入所需的代码包,并示例化训练网络。\n", + "2. 通过MindSpore提供的 `SummaryCollector` 接口,实现收集计算图和数据图。在实例化 `SummaryCollector` 时,在 `collect_specified_data` 参数中,通过设置 `collect_graph` 指定收集计算图,设置 `collect_dataset_graph` 指定收集数据图。\n", "\n", - "3. 定义完`specified`变量后,传参到`summary_collector`中,最后将`summary_collector`传参到`model`中。\n", - "\n", - "至此,模型中就有了计算图与数据图的可视化功能。" + "更多 `SummaryCollector` 的用法,请点击[API文档](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector)查看。\n", + "\n" ] }, { @@ -293,9 +301,7 @@ "from mindspore import context\n", "from mindspore.train import Model\n", "from mindspore.nn.metrics import Accuracy\n", - "from mindspore.train.callback import SummaryCollector\n", - "from mindspore.train.serialization import load_checkpoint, load_param_into_net\n", - "from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor\n", + "from mindspore.train.callback import LossMonitor, SummaryCollector\n", "\n", "if __name__ == \"__main__\":\n", " device_target = \"CPU\"\n", @@ -308,18 +314,15 @@ " net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction=\"mean\")\n", " net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.01, momentum=0.9)\n", " time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())\n", - " config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10)\n", - " ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_lenet\", config=config_ck)\n", " model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", - " specified={'collect_graph': True,'collect_dataset_graph': True}\n", + "\n", + " specified={'collect_graph': True, 'collect_dataset_graph': True}\n", " summary_collector = SummaryCollector(summary_dir='./summary_dir', collect_specified_data=specified, collect_freq=1, keep_default_action=False)\n", " \n", " print(\"============== Starting Training ==============\")\n", - " model.train(epoch=2, train_dataset=ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=False)\n", + " model.train(epoch=2, train_dataset=ds_train, callbacks=[LossMonitor(), summary_collector], dataset_sink_mode=False)\n", "\n", " print(\"============== Starting Testing ==============\")\n", - " param_dict = load_checkpoint(\"checkpoint_lenet-3_1875.ckpt\")\n", - " load_param_into_net(network, param_dict)\n", " ds_eval = create_dataset(\"./MNIST_Data/test/\")\n", " acc = model.eval(ds_eval, dataset_sink_mode=False)\n", " print(\"============== {} ==============\".format(acc))" @@ -333,6 +336,8 @@ "- 启动MindInsigh服务命令:`mindinsigh start --summary-base-dir=/path/ --port=8080`;\n", "- 执行完服务命令后,访问给出的地址,查看MindInsigh可视化结果。\n", "\n", + "> 其中 /path/ 为 `SummaryCollector` 中参数 `summary_dir` 所指定的目录。\n", + "\n", "![title](https://gitee.com/mindspore/docs/raw/master/tutorials/notebook/mindinsight/images/mindinsight_map.png)" ] }, @@ -354,45 +359,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 数据图信息\n", + "### 数据图展示\n", "\n", - "数据图所展示的顺序与数据集使用处代码顺序对应\n", + "数据图展示了数据增强中对数据进行操作的流程。\n", "\n", - "1. 首先是从加载数据集`mnist_ds = ds.MnistDataset(data_path)`开始,对应数据图中`MnistDataset`。\n", + "1. 首先是从加载数据集 `mnist_ds = ds.MnistDataset(data_path)` 开始,对应数据图中 `MnistDataset`。\n", "\n", - "2. 在以下所示代码中,是数据预处理的一些方法,顺序与数据图中所示顺序对应。\n", + "2. 下面代码为上面的 `create_dataset` 函数中作数据预处理与数据增强的相关操作。可以从数据图中清晰地看到数据处理的流程。通过查看数据图,可以帮助分析是否存在不恰当的数据处理流程。\n", "\n", "```\n", - "type_cast_op = C.TypeCast(mstype.int32)\n", - "resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)\n", - "rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)\n", - "rescale_op = CV.Rescale(rescale, shift)\n", - "hwc2chw_op = CV.HWC2CHW()\n", "mnist_ds = mnist_ds.map(input_columns=\"label\", operations=type_cast_op, num_parallel_workers=num_parallel_workers)\n", "mnist_ds = mnist_ds.map(input_columns=\"image\", operations=resize_op, num_parallel_workers=num_parallel_workers)\n", "mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_op, num_parallel_workers=num_parallel_workers)\n", "mnist_ds = mnist_ds.map(input_columns=\"image\", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)\n", "mnist_ds = mnist_ds.map(input_columns=\"image\", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)\n", - "```\n", - "\n", - "- `TypeCast`:在数据集`create_data`函数中,使用:`TypeCase(mstype.int32)`,将数据类型转换成我们所设置的类型。\n", - "- `Resize`:在数据集`create_data`函数中,使用:`Resize(resize_height,resize_width = 32,32)`,可以将数据的高和宽做调整。\n", - "- `Rescale`:在数据集`create_data`函数中,使用:`rescale = 1.0 / 255.0`;`Rescale(rescale,shift)`,可以重新数据格式。\n", - "- `HWC2CHW`:在数据集`create_data`函数中,使用:`HWC2CHW()`,此方法可以将数据所带信息与通道结合,一并加载。\n", - "\n", - "\n", - "3. 前面的几个步骤是数据集的预处理顺序,后面几个步骤是模型加载数据集时要定义的参数,顺序与数据图中对应。\n", "\n", - "```\n", - "buffer_size = 10000\n", "mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script\n", "mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n", "mnist_ds = mnist_ds.repeat(repeat_size)\n", - "```\n", - " \n", - "- `Shuffle`:在数据集`create_data`函数中,使用:`buffer_size = 10000`,后面数值可以支持自行设置,表示一次缓存数据的数量。\n", - "- `Batch`:在数据集`create_data`函数中,使用:`batch_size = 32`。支持自行设置,表示将整体数据集划分成小批量数据集,每一个小批次作为一个整体进行训练。\n", - "- `Repeat`:在数据集`create_data`函数中,使用:`repeat_size = 1`,支持自行设定,表示的是一次运行中要训练的次数。" + "```\n" ] }, { @@ -408,7 +393,7 @@ "source": [ "### 关闭MindInsight\n", "\n", - "- 查看完成后,在命令行中可执行此命令`mindinsight stop --port=8080`,关闭MindInsight。" + "- 查看完成后,在命令行中可执行此命令 `mindinsight stop --port=8080`,关闭MindInsight。" ] } ], @@ -433,4 +418,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb b/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb index 27cec8672a5b2d409055b5a711d1c5a451876627..08a68bdb080a475a114bfbbc57f463113e64f289 100644 --- a/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb @@ -6,16 +6,16 @@ "source": [ "# 标量、直方图、图像和张量可视化\n", "\n", - "MindInsight可以将神经网络训练过程中的损失值标量、直方图、图像信息和张量信息记录到日志文件中,通过可视化界面解析以供用户查看。\n", + "可以通过MindSpore提供的接口将训练过程中的标量、图像和张量记录到summary日志文件中,并通过MindInsight提供的可视化界面进行查看。\n", "\n", "接下来是本次流程的体验过程。\n", "\n", "## 整体流程\n", "\n", - "1. 准备环节。下载CIFAR-10二进制格式数据集,配置运行信息。\n", - "2. 数据处理。\n", - "3. 初始化AlexNet网络,使用`ImageSummary`记录图像数据和`TensorSummary`记录张量数据。\n", - "4. 训练网络,使用`SummaryCollector`记录损失值标量、权重梯度等参数。同时启动MindInsight服务,实时查看损失值、参数直方图、输入图像和张量的变化。\n", + "1. 下载CIFAR-10二进制格式数据集。\n", + "2. 对数据进行预处理。\n", + "3. 定义AlexNet网络,在网络中使用summary算子记录数据。\n", + "4. 训练网络,使用 `SummaryCollector` 记录损失值标量、权重梯度等参数。同时启动MindInsight服务,实时查看损失值、参数直方图、输入图像和张量的变化。\n", "5. 完成训练后,查看MindInsight看板中记录到的损失值标量、直方图、图像信息、张量信息。\n", "6. 分别单独记录损失值标量、直方图、图像信息和张量信息并查看可视化结果,查看损失值标量对比信息。\n", "7. 相关注意事项,关闭MindInsight服务。" @@ -33,10 +33,7 @@ "\n", "CIFAR-10二进制格式数据集包含10个类别的60000个32x32彩色图像。每个类别6000个图像,包含50000张训练图像和10000张测试图像。数据集分为5个训练批次和1个测试批次,每个批次具有10000张图像。测试批次包含每个类别中1000个随机选择的图像,训练批次按随机顺序包含剩余图像(某个训练批次包含的一类图像可能比另一类更多)。其中,每个训练批次精确地包含对应每个类别的5000张图像。\n", "\n", - "执行下面一段代码下载CIFAR-10二进制格式数据集到当前工作目录,该段代码分为两部分:\n", - "\n", - "1. 判断当前工作目录是否存在CIFAR-10二进制格式数据集目录,不存在则创建目录,存在则跳至[**数据处理**](#数据处理)。\n", - "2. 判断CIFAT-10数据集目录是否存在CIFAR-10二进制格式数据集,不存在则下载CIFAR-10二进制格式数据集,存在则跳至[**数据处理**](#数据处理)。" + "执行下面一段代码下载CIFAR-10二进制格式数据集到当前工作目录,如果已经下载过数据集,则不重复下载。" ] }, { @@ -143,26 +140,6 @@ "- `data_batch_5.bin`文件为第5批次训练数据集文件。\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 配置运行信息" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from mindspore import context\n", - "\n", - "\n", - "device_target = \"GPU\"\n", - "context.set_context(mode=context.GRAPH_MODE, device_target=device_target)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -284,43 +261,54 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 网络初始化\n", + "## 使用Summary算子记录数据\n", "\n", "在进行训练之前,需定义神经网络模型,本流程采用AlexNet网络,以下一段代码中定义AlexNet网络结构。\n", "\n", - "在AlexNet网络中使用`Summary`算子记录输入图像和张量数据。\n", + "MindSpore提供了两种方法进行记录数据,分别为:\n", + "- 通过Summary算子记录数据\n", + "- 通过 `SummaryCollector` 这个callback进行记录\n", "\n", - "- 使用`ImageSummary`记录输入图像数据。\n", + "下面展示在AlexNet网络中使用Summary算子记录输入图像和张量数据。\n", "\n", - " 1. 在`__init__`方法中初始化`ImageSummary`。\n", + "- 使用 `ImageSummary` 记录输入图像数据。\n", + "\n", + " 1. 在 `__init__` 方法中初始化 `ImageSummary`。\n", " \n", " ```python\n", " # Init ImageSummary\n", - " self.sm_image = P.ImageSummary()\n", + " self.image_summary = P.ImageSummary()\n", " ```\n", " \n", - " 2. 在`construct`方法中使用`ImageSummary`算子记录输入图像。其中\"Image\"为MindInsight展示的记录到的图像信息面板标题。\n", + " 2. 在 `construct` 方法中使用 `ImageSummary` 算子记录输入图像。其中 \"Image\" 为该数据的名称,MindInsight在展示时,会将该名称展示出来以方便识别是哪个数据。\n", " \n", " ```python\n", " # Record image by Summary operator\n", - " self.sm_image(\"Image\", x)\n", + " self.image_summary(\"Image\", x)\n", " ```\n", " \n", - "- 使用`TensorSummary`记录张量数据。\n", + "- 使用 `TensorSummary` 记录张量数据。\n", "\n", - " 1. 在`__init__`方法中初始化`TensorSummary`。\n", + " 1. 在 `__init__` 方法中初始化 `TensorSummary`。\n", " \n", " ```python\n", " # Init TensorSummary\n", - " self.sm_tensor = P.TensorSummary()\n", + " self.tensor_summary = P.TensorSummary()\n", " ```\n", " \n", - " 2. 在`construct`方法中使用`TensorSummary`算子记录张量数据。其中\"Tensor\"为MindInsight展示的记录到的张量信息面板标题。\n", + " 2. 在`construct`方法中使用`TensorSummary`算子记录张量数据。其中\"Tensor\"为该数据的名称。\n", " \n", " ```python\n", " # Record tensor by Summary operator\n", - " self.sm_tensor(\"Tensor\", x)\n", - " ```" + " self.tensor_summary(\"Tensor\", x)\n", + " ```\n", + "\n", + "当前支持的Summary算子:\n", + "\n", + "- [ScalarSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=scalarsummary#mindspore.ops.operations.ScalarSummary): 记录标量数据\n", + "- [TensorSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=tensorsummary#mindspore.ops.operations.TensorSummary): 记录张量数据\n", + "- [ImageSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=imagesummary#mindspore.ops.operations.ImageSummary): 记录图片数据\n", + "- [HistogramSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=histogramsummar#mindspore.ops.operations.HistogramSummary): 将张量数据转为直方图数据记录" ] }, { @@ -366,16 +354,16 @@ " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init TensorSummary\n", - " self.sm_tensor = P.TensorSummary()\n", + " self.tensor_summary = P.TensorSummary()\n", " # Init ImageSummary\n", - " self.sm_image = P.ImageSummary()\n", + " self.image_summary = P.ImageSummary()\n", "\n", " def construct(self, x):\n", " # Record image by Summary operator\n", - " self.sm_image(\"Image\", x)\n", + " self.image_summary(\"Image\", x)\n", " x = self.conv1(x)\n", " # Record tensor by Summary operator\n", - " self.sm_tensor(\"Tensor\", x)\n", + " self.tensor_summary(\"Tensor\", x)\n", " x = self.relu(x)\n", " x = self.max_pool2d(x)\n", " x = self.conv2(x)\n", @@ -401,36 +389,35 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 记录标量、直方图、图像\n", + "## 使用 `SummaryCollector` 记录数据\n", "\n", - "本次体验中使用`SummaryCollector`来记录标量、直方图信息。\n", + "下面展示使用`SummaryCollector`来记录标量、直方图信息。\n", "\n", - "在MindSpore中通过`Callback`机制,提供支持快速简易地收集损失值、参数权重、梯度等信息的`Callback`, 叫做`SummaryCollector`(详细的用法可以参考API文档中`mindspore.train.callback.SummaryCollector`)。`SummaryCollector`使用方法如下: \n", + "在MindSpore中通过`Callback`机制,提供支持快速简易地收集损失值、参数权重、梯度等信息的`Callback`, 叫做`SummaryCollector`(详细的用法可以参考API文档中[mindspore.train.callback.SummaryCollector](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=summarycollector#mindspore.train.callback.SummaryCollector))。`SummaryCollector`使用方法如下: \n", "\n", - "1. 为了记录损失值标量、直方图信息,在下面一段代码中需要在`specified`参数中指定需要记录的信息。\n", + "`SummaryCollector` 提供 `collect_specified_data` 参数,允许自定义想要收集的数据。\n", + "\n", + "下面的代码展示通过 `SummaryCollector` 收集损失值以及卷积层的参数值,参数值在MindInsight中以直方图展示。\n", "\n", - " ```python\n", - " specified={\"collect_metric\": True, \"histogram_regular\": \"^conv1.*|^conv2.*\"}\n", - " ```\n", - " - 其中:\n", - " - `\"collect_metric\"`为记录损失值标量信息。\n", - " - `\"histogram_regular\"`为记录`conv1`层和`conv2`层直方图信息。\n", "\n", - "2. 实例化`SummaryCollector`,并将其应用到`model.train`或者`model.eval`中。\n", "\n", - " ```python\n", - " summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_01\", \n", - " collect_specified_data=specified, \n", - " collect_freq=1, \n", - " keep_default_action=False, \n", - " collect_tensor_freq=200)\n", - " ```\n", - " - 其中:\n", - " - `summary_dir`:指定日志保存的路径。\n", - " - `collect_specified_data`:指定需要记录的信息。\n", - " - `collect_freq`:指定使用`SummaryCollector`记录数据的频率。\n", - " - `keep_default_action`:指定是否除记录除指定信息外的其他数据信息。\n", - " - `collect_tensor_freq`:指定记录张量信息的频率。\n", + "\n", + "```python\n", + "specified={\"collect_metric\": True, \"histogram_regular\": \"^conv1.*|^conv2.*\"}\n", + "summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_01\", \n", + " collect_specified_data=specified, \n", + " collect_freq=1, \n", + " keep_default_action=False, \n", + " collect_tensor_freq=200)\n", + "```\n", + "\n", + "- `summary_dir`:指定日志保存的路径。\n", + "- `collect_specified_data`:指定需要记录的信息。\n", + "- `collect_freq`:指定使用`SummaryCollector`记录数据的频率。\n", + "- `keep_default_action`:指定是否除记录除指定信息外的其他数据信息。\n", + "- `collect_tensor_freq`:指定记录张量信息的频率。\n", + "- `\"collect_metric\"`为记录损失值标量信息。\n", + "- `\"histogram_regular\"`为记录`conv1`层和`conv2`层直方图信息。\n", "\n", "  程序运行过程中将在本地`8080`端口自动启动MindInsight服务并自动遍历读取当前notebook目录下`summary_dir`子目录下所有日志文件、解析进行可视化展示。" ] @@ -455,7 +442,11 @@ "from mindspore.nn.metrics import Accuracy\n", "from mindspore.train.callback import SummaryCollector\n", "from mindspore.train.serialization import load_checkpoint, load_param_into_net\n", - "from mindspore import Tensor" + "from mindspore import Tensor\n", + "from mindspore import context\n", + "\n", + "device_target = \"GPU\"\n", + "context.set_context(mode=context.GRAPH_MODE, device_target=device_target)" ] }, { @@ -501,9 +492,7 @@ " lr_each_step = np.array(lr_each_step).astype(np.float32)\n", " learning_rate = lr_each_step[current_step:]\n", "\n", - " return learning_rate\n", - "\n", - "lr = Tensor(get_lr(0, 0.002, 10, ds_train.get_dataset_size()))" + " return learning_rate\n" ] }, { @@ -553,21 +542,26 @@ } ], "source": [ - "summary_base_dir = \"./summary_dir\"\n", + "\n", "network = AlexNet(num_classes=10)\n", "net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction=\"mean\")\n", + "lr = Tensor(get_lr(0, 0.002, 10, ds_train.get_dataset_size()))\n", "net_opt = nn.Momentum(network.trainable_params(), learning_rate=lr, momentum=0.9)\n", "time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())\n", "config_ck = CheckpointConfig(save_checkpoint_steps=1562, keep_checkpoint_max=10)\n", "ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_alexnet\", config=config_ck)\n", "model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + "\n", + "summary_base_dir = \"./summary_dir\"\n", "os.system(f\"mindinsight start --summary-base-dir {summary_base_dir} --port=8080\")\n", + "\n", "# Init a SummaryCollector callback instance, and use it in model.train or model.eval\n", "specified = {\"collect_metric\": True, \"histogram_regular\": \"^conv1.*|^conv2.*\"}\n", "summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_01\", collect_specified_data=specified, collect_freq=1, keep_default_action=False, collect_tensor_freq=200)\n", + "\n", "print(\"============== Starting Training ==============\")\n", - "# Note: dataset_sink_mode should be set to False, else you should modify collect freq in SummaryCollector\n", "model.train(epoch=10, train_dataset=ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=True)\n", + "\n", "print(\"============== Starting Testing ==============\")\n", "param_dict = load_checkpoint(\"checkpoint_alexnet-10_1562.ckpt\")\n", "load_param_into_net(network, param_dict)\n", @@ -591,11 +585,11 @@ "\n", "### 标量可视化\n", "\n", - "标量可视化用于展示训练过程中标量的变化趋势情况,点击打开标量信息展示面板,该面板记录了迭代计算过程中的损失值标量信息,如下图展示了loss值标量趋势图。\n", + "标量可视化用于展示训练过程中标量的变化趋势,点击打开标量信息展示面板,该面板记录了迭代计算过程中的损失值标量信息,如下图展示了损失值标量趋势图。\n", "\n", "![](https://gitee.com/mindspore/docs/raw/master/tutorials/notebook/mindinsight/images/scalar_panel.png)\n", "\n", - "上图展示了神经网络在训练过程中loss值的变化过程。横坐标是训练步骤,纵坐标是loss值。\n", + "上图展示了神经网络在训练过程中损失值的变化过程。横坐标是训练步骤,纵坐标是损失值。\n", "\n", "图中右上角有几个按钮功能,从左到右功能分别是全屏展示,切换Y轴比例,开启/关闭框选,分步回退和还原图形。\n", "\n", @@ -789,11 +783,14 @@ "config_ck = CheckpointConfig(save_checkpoint_steps=1562, keep_checkpoint_max=10)\n", "ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_alexnet\", config=config_ck)\n", "model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + "\n", "# Init a SummaryCollector callback instance, and use it in model.train or model.eval\n", "specified = {\"collect_metric\": True}\n", "summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_loss_only\", collect_specified_data=specified, collect_freq=1, keep_default_action=False)\n", + "\n", "print(\"============== Starting Training ==============\")\n", "model.train(epoch=10, train_dataset=ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=True)\n", + "\n", "print(\"============== Starting Testing ==============\")\n", "param_dict = load_checkpoint(\"checkpoint_alexnet_1-10_1562.ckpt\")\n", "load_param_into_net(network, param_dict)\n", @@ -882,11 +879,14 @@ "config_ck = CheckpointConfig(save_checkpoint_steps=1562, keep_checkpoint_max=10)\n", "ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_alexnet\", config=config_ck)\n", "model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + "\n", "# Init a SummaryCollector callback instance, and use it in model.train or model.eval\n", "specified = {\"histogram_regular\": \"^conv1.*\"}\n", "summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_histogram_only\", collect_specified_data=specified, collect_freq=1, keep_default_action=False)\n", + "\n", "print(\"============== Starting Training ==============\")\n", "model.train(epoch=1, train_dataset=ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=True)\n", + "\n", "print(\"============== Starting Testing ==============\")\n", "param_dict = load_checkpoint(\"checkpoint_alexnet_2-1_1562.ckpt\")\n", "load_param_into_net(network, param_dict)\n", @@ -989,12 +989,12 @@ " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init TensorSummary\n", - " self.sm_tensor = P.TensorSummary()\n", + " self.tensor_summary = P.TensorSummary()\n", "\n", " def construct(self, x):\n", " x = self.conv1(x)\n", " # Record tensor by Summary operator\n", - " self.sm_tensor(\"Tensor\", x)\n", + " self.tensor_summary(\"Tensor\", x)\n", " x = self.relu(x)\n", " x = self.max_pool2d(x)\n", " x = self.conv2(x)\n", @@ -1023,10 +1023,13 @@ "config_ck = CheckpointConfig(save_checkpoint_steps=1562, keep_checkpoint_max=10)\n", "ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_alexnet\", config=config_ck)\n", "model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + "\n", "# Init a SummaryCollector callback instance, and use it in model.train or model.eval\n", "summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_tensor_only\", collect_specified_data=None, collect_freq=1, keep_default_action=False, collect_tensor_freq=50)\n", + "\n", "print(\"============== Starting Training ==============\")\n", "model.train(epoch=1, train_dataset=ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=True)\n", + "\n", "print(\"============== Starting Testing ==============\")\n", "param_dict = load_checkpoint(\"checkpoint_alexnet_3-1_1562.ckpt\")\n", "load_param_into_net(network, param_dict)\n", @@ -1122,11 +1125,11 @@ " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init ImageSummary\n", - " self.sm_image = P.ImageSummary()\n", + " self.image_summary = P.ImageSummary()\n", "\n", " def construct(self, x):\n", " # Record image by Summary operator\n", - " self.sm_image(\"Image\", x)\n", + " self.image_summary(\"Image\", x)\n", " x = self.conv1(x)\n", " x = self.relu(x)\n", " x = self.max_pool2d(x)\n", @@ -1156,10 +1159,13 @@ "config_ck = CheckpointConfig(save_checkpoint_steps=1562, keep_checkpoint_max=10)\n", "ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_alexnet\", config=config_ck)\n", "model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + "\n", "# Init a SummaryCollector callback instance, and use it in model.train or model.eval\n", "summary_collector = SummaryCollector(summary_dir=\"./summary_dir/summary_image_only\", collect_specified_data=None, collect_freq=1, keep_default_action=False)\n", + "\n", "print(\"============== Starting Training ==============\")\n", "model.train(epoch=1, train_dataset=ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=True)\n", + "\n", "print(\"============== Starting Testing ==============\")\n", "param_dict = load_checkpoint(\"checkpoint_alexnet_4-1_1562.ckpt\")\n", "load_param_into_net(network, param_dict)\n", @@ -1182,7 +1188,7 @@ "source": [ "### 对比看板\n", "\n", - "对比看板可视用于多次训练之间的标量数据对比。\n", + "对比看板用于多次训练之间的数据对比。\n", "\n", "点击MindInsight看板中的**对比看板**,打开对比看板,可以得到多次(不同)训练搜集到的标量数据对比信息。\n", "\n", @@ -1218,12 +1224,6 @@ "metadata": {}, "source": [ "## 注意事项和规格\n", - "\n", - "- 当前支持的Summary算子:\n", - " - [ScalarSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=scalarsummary#mindspore.ops.operations.ScalarSummary): 记录标量数据\n", - " - [TensorSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=tensorsummary#mindspore.ops.operations.TensorSummary): 记录张量数据\n", - " - [ImageSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=imagesummary#mindspore.ops.operations.ImageSummary): 记录图片数据\n", - " - [HistogramSummary](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.ops.operations.html?highlight=histogramsummar#mindspore.ops.operations.HistogramSummary): 将张量数据转为直方图数据记录\n", "- 在训练中使用Summary算子收集数据时,`HistogramSummary`算子会影响性能,所以请尽量少地使用。\n", "- 不能同时使用多个 `SummaryRecord` 实例 (`SummaryCollector` 中使用了 `SummaryRecord`)。\n", "- 为了控制列出summary文件目录的用时,MindInsight最多支持发现999个summary文件目录。\n", @@ -1255,7 +1255,7 @@ "source": [ "## 总结\n", "\n", - "本次体验流程为完整的MindSpore深度学习及MindInsight可视化展示的过程,包括了下载数据集及预处理过程,构建网络、损失函数和优化器过程,生成模型并进行训练、验证的过程,以及启动MindInsight服务进行训练过程可视化展示。读者可以基于本次体验流程构建自己的网络模型进行训练,并使用`SummaryCollector`、`ImageSummary`和`TensorSummary`记录关心的数据,然后在MindInsight服务看板中进行可视化展示,根据MindInsight服务中展示的结果调整相应的参数以提高训练精度。\n", + "本次体验流程为完整的MindSpore深度学习及MindInsight可视化展示的过程,包括了下载数据集及预处理过程,构建网络、损失函数和优化器过程,生成模型并进行训练、验证的过程,以及启动MindInsight服务进行训练过程可视化展示。读者可以基于本次体验流程构建自己的网络模型进行训练,并使用`SummaryCollector`以及Summary算子记录关心的数据,然后在MindInsight服务看板中进行可视化展示,根据MindInsight服务中展示的结果调整相应的参数以提高训练精度。\n", "\n", "以上便完成了标量、直方图、图像和张量可视化的体验,我们通过本次体验全面了解了MindSpore执行训练的过程和MindInsight在标量、直方图、图像和张量可视化的应用,理解了如何使用`SummaryColletor`记录训练过程中的标量、直方图、图像和张量数据。" ] @@ -1282,4 +1282,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb index 663883c8def9b314ae297200ddd45278f987a4cb..3a09e78dc423d7634c1f32c9ba7ff91109afb600 100644 --- a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb @@ -12,20 +12,10 @@ "metadata": {}, "source": [ "## 概述\n", - "在AI训练的过程中,面对陌生的神经网络训练,经常需要事先优化神经网络训练中的参数,毕竟在训练一个十分复杂的神经网络时,有时候需要花费少则几天多则几周甚至更多的时间,为了更好的管理、调试和优化神经网络的训练过程,我们需要一个工具来对训练过程中的计算图、各种指标随着时间的变化趋势以及训练中使用到的图像信息进行分析和记录工作,而MindSpore就提供了一个对用户十分易用友好的可视化工具MindInsight,赋能给用户进行数据溯源和模型溯源的可视化分析,能明显提升用户对网络搭建过程和数据增强过程的纠错调优能力。而本次体验会从MindInsight的数据记录,可视化效果,如何方便用户在模型调优,数据调优上做一次整体流程的体验。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "下面按照MindSpore的训练数据模型的正常步骤进行,当使用`SummaryCollector`进行数据保存操作时,会增加相应的说明,本次体验的整体流程如下:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "在调参的场景下,需要多次调整模型超参并进行多次训练,在这个过程,往往需要手动记录每次训练使用参数以及训练结果。为此,MindSpore提供了自动记录模型参数,训练信息,以及训练结果评估指标的功能,并通过MindInsight进行可视化展示。本次体验会从MindInsight的数据记录,可视化效果,如何方便用户在模型调优,数据调优上做一次整体流程的体验。\n", + "\n", + "下面按照MindSpore的训练数据模型的正常步骤进行,当使用`SummaryCollector`进行数据保存操作时,会增加相应的说明,本次体验的整体流程如下:\n", + "\n", "1. 数据集的准备,这里使用的是MNIST数据集。\n", "\n", "2. 构建一个网络,这里使用LeNet网络。\n", @@ -36,13 +26,9 @@ "\n", "5. 模型溯源的使用。调整模型参数多次训练并存储数据,并使用MindInsight的模型溯源功能对不同优化参数下训练产生的模型作对比,了解MindSpore中的各类优化对训练过程的影响及如何调优训练过程。\n", "\n", - "6. 数据溯源的使用。调整数据参数多次训练并存储数据,并使用MindInsight的数据溯源功能对不同数据集下训练产生的模型进行对比分析,了解如何调优。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "6. 数据溯源的使用。调整数据参数多次训练并存储数据,并使用MindInsight的数据溯源功能对不同数据集下训练产生的模型进行对比分析,了解如何调优。\n", + "\n", + "\n", "本次体验将使用快速入门案例作为基础用例,将MindInsight的模型溯源和数据溯源的数据记录功能加入到案例中,快速入门案例的源码请参考:。" ] }, @@ -57,13 +43,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 数据集准备" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "\n", + "### 准备\n", "#### 方法一:\n", "从以下网址下载,并将数据包解压缩后放至Jupyter的工作目录下:
训练数据集:{\"\", \"\"}\n", "
测试数据集:{\"\", \"\"}
我们用下面代码查询jupyter的工作目录。" @@ -121,7 +102,7 @@ " test_path = \"./MNIST_Data/test/\"\n", " train_path_check = os.path.exists(train_path)\n", " test_path_check = os.path.exists(test_path)\n", - " if train_path_check == False and test_path_check == False:\n", + " if not train_path_check and not test_path_check:\n", " os.makedirs(train_path)\n", " os.makedirs(test_path)\n", " train_url = {\"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\", \"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\"}\n", @@ -168,18 +149,14 @@ "source": [ "数据集处理对于训练非常重要,好的数据集可以有效提高训练精度和效率。在加载数据集前,我们通常会对数据集进行一些处理。\n", "
我们定义一个函数`create_dataset`来创建数据集。在这个函数中,我们定义好需要进行的数据增强和处理操作:\n", + "\n", "1. 定义数据集。\n", "2. 定义进行数据增强和处理所需要的一些参数。\n", "3. 根据参数,生成对应的数据增强操作。\n", "4. 使用`map`映射函数,将数据操作应用到数据集。\n", - "5. 对生成的数据集进行处理。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> 具体的数据集操作可以在MindInsight的数据溯源中进行可视化分析。另外提取图像需要将`normalize`算子的数据处理(`CV.Rescale`)操作取消,否则取出来的图像为全黑图像。" + "5. 对生成的数据集进行处理。\n", + "\n", + "具体的数据集操作可以在MindInsight的数据溯源中进行可视化分析。" ] }, { @@ -242,21 +219,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 构建LeNet5网络" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 使用ImageSummary记录图像数据" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "在构建LeNet5网络的`__init__`中,初始化`ImageSummary`算子,同时在`construct`中将`ImageSummary`放在第一步,其第一个参数`image`为抽取出来的图片的自定义命名,第二个参数`x`是图像数据。此方法与`SummaryCollector`抽取图像的方法不冲突,可以同时使用。" + "## 定义LeNet5网络" ] }, { @@ -299,11 +262,8 @@ " self.relu = nn.ReLU()\n", " self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)\n", " self.flatten = nn.Flatten()\n", - " # Init ImageSummary\n", - " self.sm_image = P.ImageSummary()\n", "\n", " def construct(self, x):\n", - " self.sm_image(\"image\",x)\n", " x = self.conv1(x)\n", " x = self.relu(x)\n", " x = self.max_pool2d(x)\n", @@ -323,88 +283,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 训练网络和测试网络构建" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 使用SummaryCollector记录训练数据" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`summary_callback`,即是`SummaryCollector`,在`model.train`的回调函数中使用,可以记录训练数据溯源和模型溯源信息。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, SummaryCollector, Callback\n", - "from mindspore.train import Model\n", - "import os\n", - "\n", - "def train_net(model, epoch_size, mnist_path, repeat_size, ckpoint_cb, summary_collector):\n", - " \"\"\"Define the training method.\"\"\"\n", - " print(\"============== Starting Training ==============\")\n", - " # load training dataset\n", - " ds_train = create_dataset(os.path.join(mnist_path, \"train\"), 32, repeat_size)\n", - " model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(), summary_collector], dataset_sink_mode=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 使用SummaryCollector记录测试数据" + "## 记录数据及启动训练" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "`summary_callback`,即是`SummaryCollector`,在`model.eval`的回调函数中使用,可以记录训练精度信息和测试样本数量信息。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from mindspore.train.serialization import load_checkpoint, load_param_into_net\n", + "MindSpore 提供 `SummaryCollector` 进行记录训练过程中的信息。通过 `SummaryCollector` 的 `collect_specified_data` 参数,可以自定义记录指定数据。\n", "\n", - "def test_net(network, model, mnist_path, summary_collector):\n", - " \"\"\"Define the evaluation method.\"\"\"\n", - " print(\"============== Starting Testing ==============\")\n", - " # load the saved model for evaluation\n", - " param_dict = load_checkpoint(\"checkpoint_lenet-3_1875.ckpt\")\n", - " # load parameter to the network\n", - " load_param_into_net(network, param_dict)\n", - " # load testing dataset\n", - " ds_eval = create_dataset(os.path.join(mnist_path, \"test\"))\n", - " acc = model.eval(ds_eval, callbacks=[summary_collector], dataset_sink_mode=True)\n", - " print(\"============== Accuracy:{} ==============\".format(acc))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 主程序运行入口" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "初始化`SummaryCollector`,使用`collect_specified_data`控制需要记录的数据,我们这里只需要记录模型溯源和数据溯源,所以将`collect_train_lineage`和`collect_eval_lineage`参数设置成`True`,其他的参数使用`keep_default_action`设置成`False`,SummaryCollector能够记录哪些数据,请参考官网:。" + "在本次体验中,我们将记录训练数据与数据集预处理的操作,我们将 `collect_specified_data` 中的 `collect_train_lineage`, `collect_eval_lineage`, `collect_dataset_graph` 设置成 `True`。SummaryCollector的更多用法,请参考[API文档](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=collector#mindspore.train.callback.SummaryCollector)。\n" ] }, { @@ -422,7 +310,7 @@ " context.set_context(mode=context.GRAPH_MODE, device_target = \"GPU\")\n", " lr = 0.01\n", " momentum = 0.9 \n", - " epoch_size = 3\n", + " epoch_size = 10\n", " mnist_path = \"./MNIST_Data\"\n", " \n", " net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')\n", @@ -436,10 +324,22 @@ " ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_lenet\", config=config_ck)\n", " model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", " \n", - " collect_specified_data = {\"collect_eval_lineage\":True,\"collect_train_lineage\":True}\n", + " collect_specified_data = {\"collect_eval_lineage\": True, \"collect_train_lineage\": True, \"collect_dataset_graph\": True}\n", " summary_collector = SummaryCollector(summary_dir=\"./summary_base/quick_start_summary01\", collect_specified_data=collect_specified_data, keep_default_action=False) \n", - " train_net(model, epoch_size, mnist_path, repeat_size, ckpoint_cb, summary_collector)\n", - " test_net(network, model, mnist_path, summary_collector)" + "\n", + " # Start to train\n", + " ds_train = create_dataset(os.path.join(mnist_path, \"train\"), 32, repeat_size)\n", + " model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, summary_collector], dataset_sink_mode=True)\n", + "\n", + " print(\"============== Starting Testing ==============\")\n", + " # load the saved model for evaluation\n", + " param_dict = load_checkpoint(\"checkpoint_lenet-10_1875.ckpt\")\n", + " # load parameter to the network\n", + " load_param_into_net(network, param_dict)\n", + " # load testing dataset\n", + " ds_eval = create_dataset(os.path.join(mnist_path, \"test\"))\n", + " acc = model.eval(ds_eval, callbacks=[summary_collector], dataset_sink_mode=True)\n", + " print(\"============== Accuracy:{} ==============\".format(acc))" ] }, { @@ -453,74 +353,36 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "这里主要展示如何启用及关闭MindInsight,更多的命令集信息,请参考MindSpore官方网站:。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- 启动MindInsight服务\n", + "这里主要展示如何启用及关闭MindInsight,更多的命令集信息,请参考MindSpore官方网站:。\n", "\n", - " 在安装过MindInsight的环境中启动MindInsight服务:\n", - " - `--summary-base-dir`:MindInsight指定启动工作路径的命令;`./summary_base`表示SummaryRecord保存文件夹的目录。\n", - " - `--port`:MindInsight指定启动的端口,数值可以任意为1~65535的范围内。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.system(\"mindinsight start --summary-base-dir=./summary_base --port=8080\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "查询是否启动成功,在网址输入:`127.0.0.1:8080`,如果看到如下界面说明启动成功。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![image](https://gitee.com/mindspore/docs/raw/master/tutorials/notebook/mindinsight/images/summary_list.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- 关闭MindInsight服务\n", + "启动MindInsight服务命令:\n", "\n", - " 在安装过MindInsight的环境中输入命令:`mindinsight stop --port=8080`\n", - " - `mindinsight stop`:MindInsight关闭服务命令。\n", - " - `--port=8080`:即MindInsight服务开启在`8080`端口,所以这里写成`--port=8080`。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 模型溯源" + "mindinsight start --summary-base-dir=./summary_base --port=8080\n", + "\n", + "\n", + "- `--summary-base-dir`:MindInsight指定启动工作路径的命令;`./summary_base` 为 `SummaryCollector` 的 `summary_dir` 参数所指定的目录。\n", + "- `--port`:MindInsight指定启动的端口,数值可以任意为1~65535的范围内。" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 连接到模型溯源地址" + "停止MindInsight服务命令:`mindinsight stop --port=8080`\n", + "\n", + "- `mindinsight stop`:MindInsight关闭服务命令。\n", + "- `--port=8080`:即MindInsight服务开启在`8080`端口,所以这里写成`--port=8080`。" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "浏览器中输入:`127.0.0.1:8080`,点击模型溯源,如下模型溯源界面:" + "## 模型溯源\n", + "\n", + "### 连接到模型溯源地址\n", + "\n", + "浏览器中输入:`http://127.0.0.1:8080`,点击模型溯源,如下模型溯源界面:" ] }, { @@ -534,46 +396,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "我们可以勾选展示列,由于训练过程涉及的参数很多,在调整训练参数时,一般只会调整少量参数,所以对大部分相同参数可以去掉勾选,不显示出来,使得用户更方便的观察不同参数对模型训练的影响,上图中的不同参数的竖直线段代表的各个参数,数根连接各个参数的折线图代表不同的模型训练过程,其中各参数从左到右如下:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- 训练日志路径:表示存储记录数据的文件夹路径,即`summary_dir`。\n", - "- `Accuracy`:模型的精度值。\n", - "- `loss`:模型的loss值。\n", - "- 网络:表示神经网络名称。\n", - "- 优化器:表示训练过程中采用的优化器。\n", - "- 训练样本数量:训练样本数量。\n", - "- 测试样本数量:测试样本数量。\n", - "- 学习率:learning_rate的值。\n", - "- `epoch`:训练整个数据集的次数。\n", - "- `steps`:训练迭代数。\n", - "- device数目:启用的训练卡数目。\n", - "- 模型大小:生成的模型文件`.ckpt`的大小。\n", - "- 损失函数:表示训练过程中采用的损失函数。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "根据上述记录的信息,我们可以调整模型训练过程中的参数,训练生成模型,然后选择要对比的训练,进行比对观察分析。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 观察分析记录下来的溯源参数" + "如图所示,在页面的左上角中,我们可以选择要查看的训练信息,并通过这个功能挑选出我们要关注的训练信息,而不是要查看所有的训练信息,避免影响对不同训练作业进行对比分析。" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "### 观察分析记录下来的溯源参数\n", + "\n", "下图选择了数条不同参数下训练生成的模型进行对比:" ] }, @@ -588,29 +419,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "在这几次训练的参数中,优化器,epoch和学习率都不一致,可以看到不同的训练生成的模型精度`Accuracy`和loss值是不一致的,当然最好是调整单个参数来观察对模型生成的影响,避免多重因素干扰,难以分辨哪个参数是正影响,哪个参数是负影响。这需要我们调整不同的参数,多训练几遍生成模型,分析各参数对训练产生的影响,这对前期学习AI训练时很有帮助。在以后应对复杂训练时,可以节省不少时间。\n", - "> 在多次训练时,需要将`summary_dir`的指定为不同的文件夹,否则训练记录的数据会生成在同一个文件夹下,而在同一文件夹下MindInsight只会读取最新生成的文件。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 数据溯源" + "在这几次训练的参数中,优化器,epoch和学习率都不一致,可以看到不同的训练生成的模型准确率`Accuracy`和损失值是不一致的,当然最好是调整单个参数来观察对模型生成的影响,避免多重因素干扰,难以分辨哪个参数是正影响,哪个参数是负影响。\n", + "\n", + "在多次训练时,需要为 `SummaryCollector` 的 `summary_dir` 参数的指定不同的文件夹,否则训练记录的数据会生成在同一个文件夹下,会导致MindInsight展示的数据为非预期。" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 连接到数据溯源地址" + "## 数据溯源\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "浏览器中输入:`127.0.0.1:8080`连接上MindInsight的服务,点击模型溯源,如下图数据溯源界面:" + "点击模型溯源,如下图数据溯源界面:" ] }, { @@ -624,60 +449,26 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "数据溯源的根本是重现数据集从左到右进行数据增强的整个过程,方便自己发现增强过程中是否有遗漏的步骤或者不合理的操作,方便自己查找错误,也方便自己找到最优的数据增强方式,毕竟一个好的数据集对模型训练是有事半功倍的效果的。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- 训练日志路径:表示存储记录数据的文件夹路径,即`summary_dir`。\n", - "- `MnistDataset`:表示数据集信息,包含数据集路径。\n", - "- `Map_TypeCast`:定义数据集的类型。\n", - "- `Map_Resize`:图像缩放后的尺寸。\n", - "- `Map_Rescale`:图像的缩放比例。\n", - "- `Map_HWC2CHW`:数据集的张量由:高×宽×通道-->通道×高×宽。\n", - "- `Shuffle`:数据集混洗的缓存空间。\n", - "- `Batch`:每组训练样本数量。\n", - "- `Repeat`:数据图片复制次数,用于增强数据的数量。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 观察分析数据溯源参数" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "可以从上图看到数据增强过程由原数据集MnistDataset开始,按照先后顺序经过了下面的操作:label的数据类型转换(`Map_Typecast`),图像的高宽缩放(`Map_Resize`),图像的比例缩放(`Map_Rescale`),图像数据的张量变换(`Map_HWC2CHW`),图像混洗(`Shuffle`),图像成组(`Batch`),图像数量增强(`Repeat`)然后输出训练需要的数据。显然这样的可视化的数据溯源功能,在你检查数据增强操作是否有误的时候,比起一行行的去检查代码效率多了。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 最后关闭MindInsight服务" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.system(\"mindinsight stop --port=8080\")" + "数据溯源的是记录了每次训练中对数据集进行操作的流程,通过分析数据溯源,查看数据预处理过程中是否有遗漏的步骤或者不合理的操作。\n", + "\n", + "如图所示,图中几次训练的数据增强过程由原数据集MnistDataset开始,按照先后顺序经过了下面的操作:\n", + "\n", + "- label的数据类型转换(`Map_Typecast`)\n", + "- 图像的高宽缩放(`Map_Resize`)\n", + "- 图像的比例缩放(`Map_Rescale`)\n", + "- 图像数据的张量变换(`Map_HWC2CHW`)\n", + "- 图像混洗(`Shuffle`)\n", + "- 图像成组(`Batch`)\n", + "- 图像数量增强(`Repeat`)\n", + "\n", + "在数据溯源中,可以对不同训练所使用的数据预处理操作进行对比,快速发现数据预处理中存在的问题。" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "以上就是这次对MindInsight的使用方法,模型溯源和数据溯源的全部过程。" + "以上就是本次对MindInsight的模型溯源和数据溯源的体验全过程。" ] } ], @@ -702,4 +493,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/notebook/synchronization_training_and_evaluation.ipynb b/tutorials/notebook/synchronization_training_and_evaluation.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4eff3b90a2fcd915f2ed65e376251a8d04f90083 --- /dev/null +++ b/tutorials/notebook/synchronization_training_and_evaluation.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#
同步训练和验证模型体验" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 概述" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在面对复杂网络时,往往需要进行几十甚至几百次的epoch训练。而在训练之前,往往很难掌握在训练到第几个epoch时,模型的精度能达到满足要求的程度。所以经常会采用一边训练的同时,在相隔固定epoch的位置对模型进行精度验证,并保存相应的模型,等训练完毕后,通过查看对应模型精度的变化就能迅速地挑选出相对最优的模型,本文将采用这种方法,以LeNet网络为样本,进行示例。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "整体流程如下:\n", + "1. 数据集准备。\n", + "2. 构建神经网络。\n", + "3. 定义回调函数EvalCallBack。\n", + "4. 定义训练网络并执行。\n", + "5. 定义绘图函数并对不同epoch下的模型精度绘制出折线图。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据准备" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 数据集的下载" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "训练数据集下载地址:{\"http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz \", \"http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz \"}。\n", + "\n", + "测试数据集:{\"\", \"\"}\n", + "
数据集放在----*Jupyter工作目录+\\MNIST_Data\\*,如下图结构:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "MNIST\n", + "├── test\n", + "│   ├── t10k-images-idx3-ubyte\n", + "│   └── t10k-labels-idx1-ubyte\n", + "└── train\n", + " ├── train-images-idx3-ubyte\n", + " └── train-labels-idx1-ubyte \n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 数据集的增强操作" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "下载下来后的数据集,需要通过`mindspore.dataset`处理成适用于MindSpore框架的数据,再使用一系列框架中提供的工具进行数据增强操作来适应LeNet网络的数据处理需求。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import mindspore.dataset as ds\n", + "import mindspore.dataset.transforms.vision.c_transforms as CV\n", + "import mindspore.dataset.transforms.c_transforms as C\n", + "from mindspore.dataset.transforms.vision import Inter\n", + "from mindspore.common import dtype as mstype\n", + "\n", + "def create_dataset(data_path, batch_size=32, repeat_size=1,\n", + " num_parallel_workers=1):\n", + " # define dataset\n", + " mnist_ds = ds.MnistDataset(data_path)\n", + "\n", + " # define map operations\n", + " resize_op = CV.Resize((32, 32), interpolation=Inter.LINEAR) \n", + " rescale_nml_op = CV.Rescale(1 / 0.3081, -1 * 0.1307 / 0.3081) \n", + " rescale_op = CV.Rescale(1/255.0, 0.0) \n", + " hwc2chw_op = CV.HWC2CHW() \n", + " type_cast_op = C.TypeCast(mstype.int32) \n", + "\n", + " # apply map operations on images\n", + " mnist_ds = mnist_ds.map(input_columns=\"label\", operations=type_cast_op, num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(input_columns=\"image\", operations=[resize_op,rescale_op,rescale_nml_op,hwc2chw_op],\n", + " num_parallel_workers=num_parallel_workers)\n", + "\n", + " # apply DatasetOps\n", + " buffer_size = 10000\n", + " mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)\n", + " mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n", + " mnist_ds = mnist_ds.repeat(repeat_size)\n", + " \n", + " return mnist_ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 构建神经网络" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LeNet网络属于7层神经网络,其中涉及卷积层,全连接层,函数激活等算法,在MindSpore中都已经建成相关算子只需导入使用,如下先将卷积函数,全连接函数,权重等进行初始化,然后在LeNet5中定义神经网络并使用`construct`构建网络。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import mindspore.nn as nn\n", + "from mindspore.common.initializer import TruncatedNormal\n", + "\n", + "\n", + "def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):\n", + " \"\"\"Conv layer weight initial.\"\"\"\n", + " weight = weight_variable()\n", + " return nn.Conv2d(in_channels, out_channels,\n", + " kernel_size=kernel_size, stride=stride, padding=padding,\n", + " weight_init=weight, has_bias=False, pad_mode=\"valid\")\n", + "\n", + "def fc_with_initialize(input_channels, out_channels):\n", + " \"\"\"Fc layer weight initial.\"\"\"\n", + " weight = weight_variable()\n", + " bias = weight_variable()\n", + " return nn.Dense(input_channels, out_channels, weight, bias)\n", + "\n", + "def weight_variable():\n", + " \"\"\"Weight initial.\"\"\"\n", + " return TruncatedNormal(0.02)\n", + "\n", + "class LeNet5(nn.Cell):\n", + " \"\"\"Lenet network structure.\"\"\"\n", + " # define the operator required\n", + " def __init__(self):\n", + " super(LeNet5, self).__init__()\n", + " self.conv1 = conv(1, 6, 5)\n", + " self.conv2 = conv(6, 16, 5)\n", + " self.fc1 = fc_with_initialize(16 * 5 * 5, 120)\n", + " self.fc2 = fc_with_initialize(120, 84)\n", + " self.fc3 = fc_with_initialize(84, 10)\n", + " self.relu = nn.ReLU()\n", + " self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)\n", + " self.flatten = nn.Flatten()\n", + "\n", + " # use the preceding operators to construct networks\n", + " def construct(self, x):\n", + " x = self.conv1(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.conv2(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.flatten(x)\n", + " x = self.fc1(x)\n", + " x = self.relu(x)\n", + " x = self.fc2(x)\n", + " x = self.relu(x)\n", + " x = self.fc3(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 定义回调函数EvalCallBack" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "实现思想:每隔n个epoch验证一次模型精度,由于在自定义函数中实现,如需了解自定义回调函数的详细用法,请参考[API说明](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=callback#mindspore.train.callback.Callback)。\n", + "\n", + "核心实现:回调函数的`epoch_end`内设置验证点,如下:\n", + "\n", + "`cur_epoch % eval_per_epoch == 0`:即每`eval_per_epoch`个epoch结束时,验证一次模型精度。\n", + "\n", + "- `cur_epoch`:当前训练过程的epoch数值。\n", + "- `eval_per_epoch`:用户自定义数值,即验证频次。\n", + "\n", + "其他参数解释:\n", + "\n", + "- `model`:即是MindSpore中的`Model`函数。\n", + "- `eval_dataset`:验证数据集。\n", + "- `epoch_per_eval`:记录验证模型的精度和相应的epoch数,其数据形式为`{\"epoch\":[],\"acc\":[]}`。" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from mindspore.train.callback import Callback\n", + "\n", + "class EvalCallBack(Callback):\n", + " def __init__(self, model, eval_dataset, eval_per_epoch, epoch_per_eval):\n", + " self.model = model\n", + " self.eval_dataset = eval_dataset\n", + " self.eval_per_epoch = eval_per_epoch\n", + " self.epoch_per_eval = epoch_per_eval\n", + " \n", + " def epoch_end(self, run_context):\n", + " cb_param = run_context.original_args()\n", + " cur_epoch = cb_param.cur_epoch_num\n", + " if cur_epoch % self.eval_per_epoch == 0:\n", + " acc = self.model.eval(self.eval_dataset, dataset_sink_mode=True)\n", + " epoch_per_eval[\"epoch\"].append(cur_epoch)\n", + " epoch_per_eval[\"acc\"].append(acc[\"Accuracy\"])\n", + " print(acc)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 定义训练网络并执行" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在保存模型的参数`CheckpointConfig`中,需计算好单个epoch中的step数,再根据需要进行验证模型精度的频次对应,\n", + "本次示例为1875个step/epoch,按照每两个epoch验证一次的思想,这里设置`save_checkpoint_steps=eval_per_epoch*1875`,\n", + "其中变量`eval_per_epoch`等于2。\n", + "\n", + "参数解释:\n", + "\n", + "- `train_data_path`:训练数据集地址。\n", + "- `eval_data_path`:验证数据集地址。\n", + "- `train_data`:训练数据集。\n", + "- `eval_data`:验证数据集。\n", + "- `net_loss`:定义损失函数。\n", + "- `net-opt`:定义优化器函数。\n", + "- `config_ck`:定义保存模型信息。\n", + " - `save_checkpoint_steps`:每多少个step保存一次模型。\n", + " - `keep_checkpoint_max`:设置保存模型数量的上限。\n", + "- `ckpoint_cb`:定义模型保存的名称及路径信息。\n", + "- `model`:定义模型。\n", + "- `model.train`:模型训练函数。\n", + "- `epoch_per_eval`:定义收集`epoch`数和对应模型精度信息的字典。" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 1 step: 375, loss is 2.3058078\n", + "epoch: 1 step: 750, loss is 2.3073978\n", + "epoch: 1 step: 1125, loss is 2.3103657\n", + "epoch: 1 step: 1500, loss is 0.65018296\n", + "epoch: 1 step: 1875, loss is 0.07800862\n", + "epoch: 2 step: 375, loss is 0.010344766\n", + "epoch: 2 step: 750, loss is 0.052723818\n", + "epoch: 2 step: 1125, loss is 0.08183526\n", + "epoch: 2 step: 1500, loss is 0.007430988\n", + "epoch: 2 step: 1875, loss is 0.0076965275\n", + "{'Accuracy': 0.9753605769230769}\n", + "epoch: 3 step: 375, loss is 0.11964749\n", + "epoch: 3 step: 750, loss is 0.04522314\n", + "epoch: 3 step: 1125, loss is 0.018271001\n", + "epoch: 3 step: 1500, loss is 0.006928641\n", + "epoch: 3 step: 1875, loss is 0.15374172\n", + "epoch: 4 step: 375, loss is 0.12120275\n", + "epoch: 4 step: 750, loss is 0.122824214\n", + "epoch: 4 step: 1125, loss is 0.0023852547\n", + "epoch: 4 step: 1500, loss is 0.018273383\n", + "epoch: 4 step: 1875, loss is 0.08102103\n", + "{'Accuracy': 0.9821714743589743}\n", + "epoch: 5 step: 375, loss is 0.12944886\n", + "epoch: 5 step: 750, loss is 0.0010141768\n", + "epoch: 5 step: 1125, loss is 0.0054096584\n", + "epoch: 5 step: 1500, loss is 0.0022614016\n", + "epoch: 5 step: 1875, loss is 0.07229582\n", + "epoch: 6 step: 375, loss is 0.0025749032\n", + "epoch: 6 step: 750, loss is 0.06261393\n", + "epoch: 6 step: 1125, loss is 0.021273317\n", + "epoch: 6 step: 1500, loss is 0.011360342\n", + "epoch: 6 step: 1875, loss is 0.12855275\n", + "{'Accuracy': 0.9853766025641025}\n", + "epoch: 7 step: 375, loss is 0.09330422\n", + "epoch: 7 step: 750, loss is 0.002063415\n", + "epoch: 7 step: 1125, loss is 0.0047940286\n", + "epoch: 7 step: 1500, loss is 0.0052507296\n", + "epoch: 7 step: 1875, loss is 0.018066114\n", + "epoch: 8 step: 375, loss is 0.08678668\n", + "epoch: 8 step: 750, loss is 0.02440551\n", + "epoch: 8 step: 1125, loss is 0.0017507032\n", + "epoch: 8 step: 1500, loss is 0.02957578\n", + "epoch: 8 step: 1875, loss is 0.0023948685\n", + "{'Accuracy': 0.9863782051282052}\n", + "epoch: 9 step: 375, loss is 0.012376097\n", + "epoch: 9 step: 750, loss is 0.029711302\n", + "epoch: 9 step: 1125, loss is 0.017438065\n", + "epoch: 9 step: 1500, loss is 0.015443239\n", + "epoch: 9 step: 1875, loss is 0.0031764025\n", + "epoch: 10 step: 375, loss is 0.0005294987\n", + "epoch: 10 step: 750, loss is 0.0015696918\n", + "epoch: 10 step: 1125, loss is 0.019949459\n", + "epoch: 10 step: 1500, loss is 0.004248183\n", + "epoch: 10 step: 1875, loss is 0.07389321\n", + "{'Accuracy': 0.9824719551282052}\n" + ] + } + ], + "source": [ + "from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor\n", + "from mindspore.train import Model\n", + "from mindspore import context\n", + "from mindspore.nn.metrics import Accuracy\n", + "from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits\n", + "\n", + "if __name__ == \"__main__\":\n", + " context.set_context(mode=context.GRAPH_MODE, device_target=\"GPU\")\n", + " train_data_path = \"./MNIST_Data/train\"\n", + " eval_data_path = \"./MNIST_Data/test\"\n", + " ckpt_save_dir = \"./lenet_ckpt\"\n", + " epoch_size = 10\n", + " eval_per_epoch = 2\n", + " repeat_size = 1\n", + " network = LeNet5()\n", + " \n", + " train_data = create_dataset(train_data_path, repeat_size=repeat_size)\n", + " eval_data = create_dataset(eval_data_path, repeat_size=repeat_size)\n", + " \n", + " # define the loss function\n", + " net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')\n", + " # define the optimizer\n", + " net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.01, momentum=0.9)\n", + " config_ck = CheckpointConfig(save_checkpoint_steps=eval_per_epoch*1875, keep_checkpoint_max=15)\n", + " ckpoint_cb = ModelCheckpoint(prefix=\"checkpoint_lenet\", directory=ckpt_save_dir, config=config_ck)\n", + " model = Model(network, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + " \n", + " epoch_per_eval = {\"epoch\": [], \"acc\": []}\n", + " eval_cb = EvalCallBack(model, eval_data, eval_per_epoch, epoch_per_eval)\n", + " \n", + " model.train(epoch_size, train_data, callbacks=[ckpoint_cb, LossMonitor(375), eval_cb],\n", + " dataset_sink_mode=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在同一目录的文件夹中可以看到`lenet_ckpt`文件夹中,保存了5个模型,和一个计算图相关数据,其结构如下:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```\n", + "lenet_ckpt\n", + "├── checkpoint_lenet-10_1875.ckpt\n", + "├── checkpoint_lenet-2_1875.ckpt\n", + "├── checkpoint_lenet-4_1875.ckpt\n", + "├── checkpoint_lenet-6_1875.ckpt\n", + "├── checkpoint_lenet-8_1875.ckpt\n", + "└── checkpoint_lenet-graph.meta\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 绘制不同epoch下模型的精度" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "定义绘图函数`eval_show`,将`epoch_per_eval`载入到`eval_show`中,绘制出不同`epoch`下模型的验证精度折线图。" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3debzWY/7H8ddb0SLZygyisgxiZEiWkMk+wxhb9m1sQxnrjzBmyJKxm9Ega3ZjnRgVmsrUYDohIhQmEsqWpRHV5/fH9T26O85yn5z7fM/yfj4e53Hu+/4u9+c+y/25r+v6Xp9LEYGZmVmxlso7ADMza1ycOMzMrFacOMzMrFacOMzMrFacOMzMrFacOMzMrFacOKzkJHWRFJJaFrHvEZLG1UdczYWkgyU98QOOHy7p8LqMqYbnK/rvxfLhxGGLkfRfSd9I6lDh8Rezf+Yu+URmSyoi7oqInYvZV9J5ku6scPxuETG0NNHVj+xvd52842gqnDisMm8DB5bfkfRToE1+4TQMjfETcGOMuS4199dfKk4cVpk7gMMK7h8O3F64g6TlJd0uabak6ZJ+L2mpbFsLSZdL+kjSW8AvKzn2ZknvS3pP0oWSWhQTmKT7JX0gaY6kpyVtWLCtjaQrsnjmSBonqU22bRtJ/5b0maR3JR2RPT5G0tEF51isqyz7pNpP0lRgavbYNdk5Ppc0UdK2Bfu3kHS2pDclfZFtX0PSYElXVHgtj0o6uZLXeL2kyys89ndJp2a3BxSc/1VJe1WIf7ykqyR9ApxXyWuqNH5JuwJnA/tL+lLSpIo/I0lLZb/r6ZJmZX8Dy2fbyruYDpf0Tvb7P6ea32WVv6/MwZWdR1JPSc9kv8v3JV0raZmqfmeSns42Tcpe1/5VxWRFigh/+eu7L+C/wI7A68AGQAvgXaAzEECXbL/bgb8DywFdgDeAo7JtvwVeA9YAVgJGZ8e2zLY/AtwALAusAvwHOC7bdgQwrpr4fpM9ZyvgauDFgm2DgTHA6lncW2f7rQl8QWpFLQ2sDGySHTMGOLrgHIs9fxb3k9nraJM9dkh2jpbAacAHQOts2/8BLwPrAQK6Z/v2BGYCS2X7dQDmAj+q5DVul/3Mld1fEfgfsFp2fz9gNdIHv/2Br4BVC+KfD5yYxdemktdUXfznAXdWiOe7n1H2858GrAW0Ax4C7si2dcl+Xjdmz9sdmAdsUMXvsqrfV7XnATYDtszi7wJMAU6u4XcWwDp5/381la/cA/BXw/piUeL4PTAI2DX7J2yZ/fN1yf7J5wHdCo47DhiT3f4n8NuCbTtnx7YEfpQd26Zg+4HA6Oz2Ym9yNcS6Qnbe5bM30f8B3SvZ7yzg4SrO8d2bYmXPn52/Tw1xfFr+vKSEu2cV+00Bdspu9wcer2I/Ae8A22X3jwH+Wc3zv1j+nFn871TYXu3PtEL851F94hgFnFCwbT3g24I38QA6FWz/D3BAJc9Z3e+r6PNk204u/P1W9jvDiaNOv9xVZVW5AziI9KZze4VtHYBlgOkFj00nfXKE9Gn43QrbynUmfep/P+tq+IzU+lilpoCybqBLsm6az0lJrjyeDkBr4M1KDl2jiseLVfhakHSapClZ98pnpMRVfjFBdc81lPRpn+z7HZXtFOmd7l4WjTMdBNxV8PyHKV2sUP7z26jg+b8Xb0U1xF+T1fj+7738A0G5DwpuzyW1TCqq7vdV7Xkk/UTSY1mX5efAxZXEX+3PwH4YJw6rVERMJw2S/4LUHVHoI9KnzM4Fj60JvJfdfp/0Blq4rdy7pBZHh4hYIftqHxEbUrODgD1JLaLlSZ9MIX1C/wj4Gli7kuPereJxSN08bQvu/7iSfb4rIZ2NB5wJ9AVWjIgVgDlZDDU9153AnpK6k7oBH6liP4B7gH0ldQa2AB7Mnr8zqQunP7By9vyTC55/sXgrKiL+msplz+T7v/f5wIc1HFdRdb+vmlxH6gpdNyLak8ZlVGEfl/0uIScOq85RpCb/V4UPRsQC4G/ARZKWy97MTiW9MZJt+52kTpJWBAYUHPs+8ARwhaT22WDr2pJ6FxHPcqSk8zHpzf7igvMuBG4BrpS0WtY62UpSK9Kn9R0l9ZXUUtLKkjbJDn0R2FtSW6XLNY8qIob5wGygpaQ/AO0Ltt8EXCBpXSUbS1o5i3EGMIHU0ngwIv5X1ZNExAvZc9wEjIyIz7JNy5LeFGcDSDqS1OIoVk3xfwh0UXahQyXuAU6R1FVSO9Lv4L6ImF+LGGr6fRXzGj4HvpS0PnB8Ecd8SBqXsTrgxGFViog3I6Ksis0nkj6tvwWMA+4mvRFA+kQ8EpgEPM/3WyyHkbq6XiX1rz8ArFpESLeTukbey459tsL200kD0xOAT4A/kQaj3yG1nE7LHn+RNOAKcBXwDemNZSgFXUJVGAkMJ10MMJ30qbmwW+RKUuJ8gvTmdjOLX8o8FPgpVXRTVXAPqXV1d/kDEfEqcAXwTBbzT4HxRZyr2Pjvz75/LOn5So6/JYv9aVKL9GvS38KSqPT3VeRxB5EueLgRuK+IY84Dhmbde32XKFr7TvlVG2ZWDyRtR2qZdck+dZs1Om5xmNUTSUsDJwE3OWlYY+bEYVYPJG0AfEbqkrs653DMfhB3VZmZWa24xWFmZrXSLAqAdejQIbp06ZJ3GGZmjcrEiRM/ioiOFR9vFomjS5culJVVdVWpmZlVRtL0yh53V5WZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdVKs5jHYWaN3MyZMG4cfPUVHHYYtGiRd0TNmhOHmTUsEfD66ylR/Otf6ftbby3a/thjcNdd0Lp1fjE2c04cZpavb7+FF15YPFF89FHa1rEjbLMN9O+fvo8bB6eeCr/8JTzyCCy3XL6xN1NOHGZWv778Ep59dlGiePZZmDs3bVt77ZQUtt02JYqf/ARUsJz45ptDhw5w5JHQpw88/nhKLlavnDjMrLQ+/BDGj1/UmnjhBViwAJZaCrp3h6OPTkmiVy9YbbWaz3foobDiirDffinBPPEErLlm6V+HfaekiUPSrsA1QAvSqmeXVNjembSGcUfSmsOHRMSMbNulwC9JV349CZwUESFpGeBaYHtgIXBORDxYytdhZkWKgDffXLzb6Y030rbWrWGLLeCss1Ki2GoraN9+yZ5n991Twthjj5RwnngCNtig7l6HVatkiUNSC2AwsBMwA5ggaVhEvFqw2+XA7RExVFIfYBBwqKStgV7Axtl+44DewBjgHGBWRPxE0lLASqV6DWZWg/nz4aWXFiWJcePggw/StpVWSm/q5S2KzTaDZZapu+fedlsYOxZ22SXdfvxx6Nmz7s5vVSpli6MnMC0i3gKQdC+wJ1CYOLoBp2S3RwOPZLcDaA0sAwhYGvgw2/YbYH2AbN3mj0r3EsxsMXPnwn/+syhRPPMMfPFF2ta5M+y4Y0oS224L66+fuqNKqXv31A22005pzOORR1IMVlKlTByrA+8W3J8BbFFhn0nAPqTurL2A5SStHBHPSBoNvE9KHNdGxBRJK2THXSBpe+BNoH9EfFjhvEg6FjgWYE33f5otmY8/Xnx8YuLEdBWUBD/9aRpv2Gab9LXGGvnEuPbaKcZddoFf/ALuvhv23TefWJqJUiYOVfJYxQXOTweulXQE8DTwHjBf0jrABkCnbL8nJW1Haq10AsZHxKmSTiV1dx36vSeKGAIMAejRo4cXVjerSQRMn74oSfzrXzBlStq2zDKpG+i001JrYqut0gB1Q7Hqqqnbao89oG9fuO46OO64vKNqskqZOGYAhR9BOgEzC3eIiJnA3gCS2gH7RMScrLXwbER8mW0bDmwJ/AuYCzycneJ+4KgSvgazpmvBApg8edHYxL/+Be+9l7Ytv3wanzj00JQoevRo+BPuVlwxDZLvtx/89reptXTWWYtfzmt1opSJYwKwrqSupJbEAcBBhTtI6gB8ko1VnEW6wgrgHeAYSYNILZfewNXZVVWPkq6o+iewA4uPmZhZVb7+GiZMWJQk/v1vmDMnbVt99ZQgyudPbLhh4yzr0bZtGuc48kg455w0kfDyy0s/1tLMlCxxRMR8Sf2BkaTLcW+JiFckDQTKImIYKQEMkhSkrqp+2eEPAH2Al0ndWyMi4tFs25nAHZKuBmYDR5bqNZg1ap99lvr+yxPFhAnwzTdpW7dusP/+ixJF585N55P50kvD7benq7quuiq1PG66KT1udUIRTb/7v0ePHlFWVpZ3GGal9e67i8+fmDw5jVu0bJm6msqvdtp66zT7uqmLgAsvhD/8IY193HcftGmTd1SNiqSJEdGj4uOeOW7WGC1cmAauCxPF9OlpW7t2KTn07ZuSRc+eqQunuZHg3HNTkuzXL1119eijafzGfhAnDrPG4Jtv0qWw5Yli/Hj45JO07cc/Tgni1FPT9403Tq0MS44/PnVbHXoo9O4NI0akn5ktMf91mTVEn3+eJteVtyaeey4NbkMq/LfXXou6ntZaq+mMT5TK/vvDCivA3nunn9uTT0LXrnlH1Wg5cZg1BO+/v3i306RJqTuqRQv42c/Sp+byiXarrJJ3tI3TLrvAqFFpkmCvXjByZJrEaLXmxGGWl4kT4S9/SYnizTfTY23bpsl1556bksSWW6YxC6sbW26ZkvPOO8N228E//pHGg6xWnDjM6lsEXHttmoXdrh1svz2ccELqdtpkE182WmobbpjGiHbeOdW1euCB1AqxojlxmNWnOXNStdgHHkiXiN52Wxq4tfrVpUtq6e26K+y5JwwdCgcdVONhlng6pVl9eeGFVFr84Yfhssvg73930sjTKqvAmDFpvOPgg1O3oRXFicOs1CLghhvS2MXXX6difKef7iuhGoL27dPlub/+Nfzud/DHP6bfl1XLicOslL78Eg45JBXd23771Oro1SvvqKxQ69Zw//2pvtXAgdC/f7qizarkMQ6zUnn55VSpdepUuOgiGDDAxfYaqpYt4eab0yzzyy5LkyuHDq3bFQubECcOs1K49dZU5mL55dPcge23zzsiq4kEl16akseZZ8Knn8KDD8Kyy+YdWYPjjz9mdWnu3NTl8ZvfpPkBL77opNHYnHFGqqb75JNpSdry0i72HScOs7oyZUoqKDh0aBpkHTkSfvSjvKOyJXHUUemS6YkT00TB8gWuDHDiMKsbd94Jm28Os2alhHHeeY1zISRbZK+90hVX06enWfxTp+YdUYPhxGH2Q/zvf3Dssany6mabpa6pnXbKOyqrKz//OYwena6O22abdFWcOXGYLbGpU9PcjBtvTGtbjxoFq62Wd1RW13r0SLPMW7VK41Vjx+YdUe6cOMyWxN/+lloY776bCuVdfLHXwGjK1lsv1bdaffVUZXfYsLwjypUTh1ltzJuXJojtvz9stFHqmnKBvOZhjTVSZd3u3dO6HrfdlndEuXHiMCvW22+nWd+DB6fKtmPHpjcTaz5WXjl1Sfbpky67vuKKvCPKhROHWTEeeSQtqPTmm+n25Ze7/Hlz1a5dWrt8v/1SzbGzzmp29a3cKWtWnW++SaVCrroqXW57331ectTSQPk996TqxpdcAh99BNdf32wuwXbiMKvKO+9A375pve8TT0w1jFq1yjsqayhatIDrroOOHeHCC9MM87vuSkUTmzgnDrPK/OMfcNhh8O23qXLqvvvmHZE1RBJccEEa+zjlFPjlL1NX5nLL5R1ZSXmMw6zQ/Pmpa2r33WHNNeH55500rGYnnwy3354umOjTB2bPzjuiknLiMCv33ntppvCf/gTHHQfPPAPrrJN3VNZYHHpoam1MnpzWj3/nnbwjKhknDjOAJ56ATTZJJSXuvjsNdDaDvmqrY7vvnv6WPvggXbo9ZUreEZVESROHpF0lvS5pmqQBlWzvLGmUpJckjZHUqWDbpZJekTRF0p+lxdfZlDRM0uRSxm/NwIIF8Ic/wK67wo9/DGVlcOCBeUdljdm226Yuq2+/TbcnTMg7ojpXssQhqQUwGNgN6AYcKKlbhd0uB26PiI2BgcCg7NitgV7AxsBGwOZA74Jz7w18WarYrZn44INUkPCCC+CII9LVU+uvn3dU1hR0755KlLRvn7o/n3oq74jqVClbHD2BaRHxVkR8A9wL7Flhn27AqOz26ILtAbQGlgFaAUsDHwJIagecClxYwtitqRs9OnVNPftsKh1xyy3Qtm3eUVlTsvbaKXmstVa62uqBB/KOqM6UMnGsDrxbcH9G9lihScA+2e29gOUkrRwRz5ASyfvZ18iIKO8svAC4Aphb3ZNLOlZSmaSy2U38CgerhYUL0zX3O+4IK64I//kPHH543lFZU7XqqqnbavPN05ygIUPyjqhOlDJxqJLHKs7LPx3oLekFUlfUe8B8SesAGwCdSMmmj6TtJG0CrBMRD9f05BExJCJ6RESPjh07/qAXYk3E7Nmw225w7rlpHGPChFSo0KyUVlwxDZjvtlu6Wm/QoEZfoqSUEwBnAIUV4DoBMwt3iIiZwN7wXRfUPhExR9KxwLMR8WW2bTiwJfAFsJmk/2axryJpTERsX8LXYU3BuHFwwAGpNMSQIXD00Wnylll9aNs2Xap75JFw9tnpQ8zll8NSjfPC1lJGPQFYV1JXScsABwCLFbGX1EFSeQxnAbdkt98htURaSlqa1BqZEhHXRcRqEdEF2AZ4w0nDqrVwYZqXsf320KZNGtM45hgnDat/Sy+dJgmeeGKqfXbkkenKq0aoZC2OiJgvqT8wEmgB3BIRr0gaCJRFxDBge2CQpACeBvplhz8A9AFeJnVvjYiIR0sVqzVRH3+cxi/+8Y/Uv3zjjekqF7O8LLUUXHNNqm/1hz/Ap5+mwplt2uQdWa0oGnlfWzF69OgRZWVleYdh9enZZ9NiSx98kD7dHX+8WxnWsFx3HfTrl9Yyf/RRWH75vCP6HkkTI6JHxccbZwebWVUiUqLYdttUvXT8eDjhBCcNa3iOPz6VZn/2WejdO33IaSScOKzp+Owz2GcfOPXUVPrh+eehx/c+LJk1HPvvn1obU6emlsfbb+cdUVGcOKxpmDgRNt00/RNedRU89BCssELeUZnVbJdd0nK0n3yS6lu9/HLeEdXIicMat4i0BvjWW6eS6P/6Vypx7a4pa0y23DL97Uqw3Xbw73/nHVG1nDis8fr88zQ3o3//VHPqhRfSP6BZY7ThhmlMrmPHVNlg+PC8I6qSE4c1TpMmpfGLBx9M8zSGDUursJk1Zl26pMmq668Pv/pVGjxvgJw4rHGJSPMxttgCvvoKxoyBM85otDNwzb5nlVXS33WvXnDwwXDttXlH9D3+b7PG48sv0zrgxx6bLl984YV0JYpZU9O+PYwYAXvumWaan3deg6pv5cRhjcMrr6QKo3ffndbPGD48fTIza6pat4b770+lSc4/PyWQhQvzjgoobZFDs7oxdGiaLNW+fVoQ5+c/zzsis/rRsiXcfDN06ACXXZYu2b3tNlhmmXzDyvXZzaozd276lHXLLalI4T33pOVdzZoTCS69NCWPM89M9a0eeACWXTa3kNxVZQ3T66+nAfBbb03rZzz1lJOGNW9nnAE33ZTW9thpp9T6yIkThzU8d98Nm22WaveMGAEDB6a6U2bN3VFHpdbGxIlpouB77+USRo2JI1t+tZ+kFesjIGvGvv4afvvbdAniz34GL74IO++cd1RmDctee6UPVNOnp6sKp06t9xCKaXEcAKwGTJB0r6RdJNdzsDo2bRpstRXccEPqxx09GlavuES9mQHpApHRo9Ml6ttsky5Nr0c1Jo6ImBYR5wA/Ae4mrdL3jqTzJa1U6gCtGXjwwdQ1NX16KlJ4ySXpahIzq1qPHmmWeatW6eKRsWPr7amLGuOQtDFwBXAZ8CCwL/A58M/ShWZN3rx58Lvfwb77QrduqWtq993zjsqs8VhvvVTfavXVU5XdYcNqPqYOFDPGMRG4irSG+MYR8buIeC4irgDeKnWA1kS9/XZabOkvf4FTTkmfltZcM++ozBqfNdZIlXW7d4e9907znkqsmP6A/SKi0gQREXvXcTzWHAwbltYCj0jrZuy1V94RmTVuK6+c1vTYe2844gj4+OO0oFmJFNNVdbSk71bEkbSipAtLFpE1Xd9+C6efnurvrL12WqHPScOsbrRrl8YI99sPTjsNzj67ZPWtikkcu0XEZ+V3IuJT4BclicaarnffTYUJr7gC+vVL/bJrrZV3VGZNS6tWqcLCccfBoEHp+4IFdf40xSSOFpJald+R1AZoVc3+ZosbPjzNy5g8Ge67L5WJbuU/IbOSaNECrrsOfv/7VHnh+efr/CmKSRx3AqMkHSXpN8CTQOlHX6zxmz8/NZd/8Yt01UdZGfTtm3dUZk2flKpIT56cqkrXsRoHxyPiUkkvAzsAAi6IiJF1Hok1LTNnwoEHwtNPwzHHwDXXQJs2eUdl1ryst15JTlvULKuIGA403AVwrWF56ik46KC0Qt8dd8Ahh+QdkZnVoWLmcWwpaYKkLyV9I2mBpM/rIzhrZBYsSCuV7bxzWmSprMxJw6wJKmaM41rgQGAq0AY4GvhLKYOyRuqEE9JKZYcdBs89BxtskHdEZlYCRZUciYhpQIuIWBARtwJFLcEmaVdJr0uaJmlAJds7Sxol6SVJYyR1Kth2qaRXJE2R9GclbSX9Q9Jr2bZLin2hVmJjx8KQIWnS0W235brIjJmVVjGJY66kZYAXszfzU4Aa3xUktQAGA7sB3YADJXWrsNvlwO0RsTEwEBiUHbs10AvYGNgI2BzoXX5MRKwP/AzoJWm3Il6DldK8eakceteu6UoOM2vSikkch2b79Qe+AtYA9iniuJ7AtIh4KyK+Ae4F9qywTzdgVHZ7dMH2AFoDy5DmjCwNfBgRcyNiNEB2zueBTli+Lr0UXnsNBg+Gtm3zjsbMSqzaxJG1Gi6KiK8j4vOIOD8iTs26rmqyOvBuwf0Z2WOFJrEoCe0FLCdp5Yh4hpRI3s++RkbElAqxrQDswaLEUzH2Y7NFqMpmz55dRLi2RKZOhYsuSvMzdnPjz6w5qDZxRMQCoGPWVVVblS32VLFwyulAb0kvkLqi3gPmS1oH2IDUmlgd6CNpu+9OLLUE7gH+XE0BxiER0SMienTs2HEJwrcaRaQB8Vat4Oqr847GzOpJMfM4/guMlzSM1FUFQERcWcNxM0jdWuU6ATMLd4iImcDeAJLaAftExBxJxwLPRsSX2bbhwJbA09mhQ4CpEeF3qzzdfXeaszF4MKy6at7RmFk9KWaMYybwWLbvcgVfNZkArCupa9ZiOQBYbJURSR0klcdwFml1QYB3SC2RlpKWJrVGpmTHXAgsD5xcRAxWKp98kq6g6tkzFVIzs2ajmJIj5y/JiSNivqT+wEigBXBLRLwiaSBQFhHDgO2BQZKC1Jrolx3+ANAHeJnUvTUiIh7NLtc9B3gNeD5b+vzaiLhpSWK0H2DAgFTzf+TIVFTNzJoNRQ312iWN5vtjE0REn1IFVdd69OgRZWVleYfRdIwfD9tsk2r+X3553tGYWYlImhgRPSo+XswYx+kFt1uTroKaX1eBWSPz7bdpzsYaa6TyImbW7BTTVTWxwkPjJY0tUTzW0F1xRSrVPGxYWnHMzJqdGhOHpJUK7i4FbAb8uGQRWcP11lswcGBa7nWPPfKOxsxyUkxX1UTSGIdIXVRvA0eVMihrgCLSkq8tWsCf/5x3NGaWo2K6qrrWRyDWwN1/P4wYkSb6dXKVF7PmrJj1OPpl5T3K768o6YTShmUNypw5cNJJsOmm0L9/3tGYWc6KmQB4TER8Vn4nIj4FjildSNbgnH02zJqVyqZ7zoZZs1dM4lhK2Uw7+K7w4ZLUrrLG6Lnn4LrrUktjs83yjsbMGoBiBsdHAn+TdD1pkPy3wIiSRmUNw/z5qZzIqqt6nQ0z+04xieNM4FjgeNKVVU8ALvHRHFxzDUyaBA8+CO3b5x2NmTUQxSSONsCNEXE9fNdV1QqYW8rALGfTp8Mf/gC7757mbZiZZYoZ4xhFSh7l2gBPlSYcaxAi4MQT0+1rrwVVtrSKmTVXxbQ4WpeviwEQEV9K8vqgTdkjj8Cjj8Jll0HnznlHY2YNTDEtjq8kbVp+R9JmwP9KF5Ll6osvUmtj443T3A0zswqKaXGcDNwvqXz1vlWB/UsXkuXq3HNh5kx44AFYeum8ozGzBqiYkiMTJK0PrEe6quq1iPi25JFZ/Zs4Ef7yl1Q2fcst847GzBqoYlockJJGN9J6HD+TRETcXrqwrN4tWJDmbKyyClx8cd7RmFkDVkxZ9T+SlnjtBjwO7AaMA5w4mpLBg1OL4957YYUVat7fzJqtYgbH9wV2AD6IiCOB7qR5HNZUzJgB55wDu+wCffvmHY2ZNXDFJI7/RcRCYL6k9sAsYK3ShmX16qSTUnmRv/7VczbMrEbFjHGUZWXVbyQt6vQl8J+SRmX159FH4aGH0rjGWv48YGY1U0QUv7PUBWgfES+VKqBS6NGjR5SVleUdRsPz1VfQrRsstxw8/zws46LHZraIpIkR0aPi48VeVQVARPy3ziKy/J13HrzzDowb56RhZkUrZozDmqJJk+Cqq+Doo6FXr7yjMbNGxImjOSqfs7HSSvCnP+UdjZk1MlV2VUlaqboDI+KTug/H6sWQIWllvzvuSMnDzKwWqhvjmEha8a+y6zMDX5LbOL3/PgwYADvsAAcfnHc0ZtYIVdlVFRFdI2Kt7HvFr6KShqRdJb0uaZqkAZVs7yxplKSXJI2R1Klg26WSXpE0RdKfy9c9l7SZpJezc373uBXplFNg3ry0jrh/dGa2BGoc41ByiKRzs/trSupZxHEtgMGkEiXdgAMldauw2+XA7RGxMTAQGJQduzXQC9gY2AjYHOidHXMdaSnbdbOvXWuKxTIjRsB998HZZ8O66+YdjZk1UsUMjv8V2Ao4KLv/BSkh1KQnMC0i3oqIb4B7gT0r7NONtMIgwOiC7UEqqLgMqbzJ0sCHklYlzSN5JtIElNuBXxcRi82dCyecAOutB2eemXc0ZtaIFZM4toiIfsDXABHxKekNvSarA+8W3J+RPVZoErBPdnsvYDlJK0fEM6RE8n72NTIipmTHzyDc73oAABM2SURBVKjhnABIOlZSmaSy2bNnFxFuE3fhhfD223D99dDKpcbMbMkVkzi+zbqdAkBSR2BhEcdVNahe6HSgt6QXSF1R75FqYq0DbAB0IiWGPpK2K/Kc6cGIIRHRIyJ6dOzYsYhwm7DJk9MysEccAdtvn3c0ZtbIFZM4/gw8DKwi6SJSSfViFmyYAaxRcL8TMLNwh4iYGRF7R8TPgHOyx+aQWh/PRsSX2Xrnw4Ets3N2qu6cVsHChWlhpuWXT8nDzOwHqjFxRMRdwBmkgev3gV9HxP1FnHsCsK6krpKWAQ4AhhXuIKmDpPIYzgJuyW6/Q2qJtJS0NKk1MiUi3ge+kLRldjXVYcDfi4il+br5Zhg/PiWNDh3yjsbMmoBiJwDOAu4p3FbTBMCImC+pPzASaAHcEhGvSBoIlEXEMNICUYMkBfA00C87/AGgD/AyqStqREQ8mm07HrgNaENqiQwv7qU2Q7NmpYHw7bZL3VRmZnWgyuq4kt5m0QTANYFPs9srAO9ERNf6CvKHarbVcQ89NF1++9JLsP76eUdjZo1MVdVxa5wASGox7BERHSJiZWB34KHShWp14qmn4M470yxxJw0zq0PFDI5vHhGPl9+JiOEsmoxnDdHXX8Pxx8M666TJfmZmdaiY9Tg+kvR74E5S19UhwMcljcp+mIsvhmnT4MknoXXrvKMxsyammBbHgUBH0iW5jwCrZI9ZQ/Taa3DJJamA4Y475h2NmTVBNbY4squnTpLUHliYzauwhigizdlYdlm44oq8ozGzJqqYIoc/zWZ2vwy8ImmipI1KH5rV2tChMHYsXHop/OhHeUdjZk1UMV1VNwCnRkTniOgMnAYMKW1YVmsffQSnn56WgT3qqLyjMbMmrJjEsWxEjC6/ExFjgGVLFpEtmf/7P5gzJxUxXMorAptZ6RRzVdVb2Vocd2T3DwHeLl1IVmtjx8Jtt6U5Gxu5F9HMSquYj6a/IV1V9RDpyqqOwJGlDMpqYd68NCDetSuce27e0ZhZM1DMVVWfAr+rh1hsSVx6aboE9/HHoW3bvKMxs2aguiKHw6raBhARv6r7cKxWpk6Fiy6Cvn1ht93yjsbMmonqWhxbkVbwuwd4jsoXUbK8RKSyIq1awdVX5x2NmTUj1SWOHwM7kWaJHwT8A7gnIl6pj8CsBnffDaNGweDBsOqqeUdjZs1IddVxF0TEiIg4nLT63jRgjKQT6y06q9wnn8Cpp0LPnnDccXlHY2bNTLWD45JaAb8ktTq6kJaRdUn1vA0YAB9/DCNHQosWeUdjZs1MdYPjQ4GNSCvsnR8Rk+stKqva+PFw441w2mmwySZ5R2NmzVB1KwAuBL7K7hbuJCAion2JY6szTWYFwG++gU03hc8/h1dfhXbt8o7IzJqwqlYArLLFERGuW9HQXHklvPIKDBvmpGFmuXFyaCzeegsGDoS99oI99sg7GjNrxpw4GoMI6NcvDYT/+c95R2NmzVwxRQ4tb/ffDyNGpIl+nTrlHY2ZNXNucTR0n30GJ52UBsX79887GjMztzgavHPOgVmz4LHHPGfDzBoEtzgasueeg+uuSy2NzTbLOxozM8CJo+GaPz+VE1ltNbjggryjMTP7TkkTh6RdJb0uaZqkAZVs7yxplKSXJI2R1Cl7/OeSXiz4+lrSr7NtO0h6Pnt8nKR1SvkacnPNNTBpUrqKqn2jmWtpZs1AlTPHf/CJpRbAG6QKuzOACcCBEfFqwT73A49FxFBJfYAjI+LQCudZiVRgsVNEzJX0BrBnREyRdALQMyKOqC6WRjdzfPp06NYN+vRJk/3kivZmVv+qmjleyhZHT2BaRLwVEd8A9wJ7VtinGzAquz26ku0A+wLDI2Judj+A8o/gywMz6zTqvEXAiVkB4muvddIwswanlIljddJCUOVmZI8VmgTsk93eC1hO0soV9jmAtJhUuaOBxyXNAA4FLqmziBuCRx6BRx+F88+Hzp3zjsbM7HtKmTgq+6hcsV/sdKC3pBeA3sB7wPzvTiCtCvwUGFlwzCnALyKiE3ArcGWlTy4dK6lMUtns2bOX/FXUpy++SK2N7t3T3A0zswaolPM4ZgBrFNzvRIVupYiYCewNIKkdsE9EzCnYpS/wcER8m+3TEegeEc9l2+8DRlT25BExBBgCaYzjB7+a+nDuuTBzJjz4ICy9dN7RmJlVqpQtjgnAupK6SlqG1OU0rHAHSR0klcdwFnBLhXMcyOLdVJ8Cy0v6SXZ/J2BKnUeeh4kT4S9/SeuIb7FF3tGYmVWpZC2OiJgvqT+pm6kFcEtEvCJpIFAWEcOA7YFBkgJ4GuhXfrykLqQWy9gK5zwGeDBbL+RT4Deleg31ZsGCNGdjlVXg4ovzjsbMrFolLTkSEY8Dj1d47A8Ftx8AHqji2P/y/cF0IuJh4OE6DTRvgwenFse998Lyy+cdjZlZtTxzPG8zZqR6VLvuCn375h2NmVmNnDjydtJJqbzI4MGes2FmjYKr4+bp0UfhoYdg0CBYa628ozEzK4pbHHn56qtU9XbDDeG00/KOxsysaG5x5OW88+Cdd2DcOM/ZMLNGxS2OPEyaBFddBcccA7165R2NmVmtOHHUt/I5GyutBJc0rTJbZtY8uKuqvt1wQ1rZ7847U/IwM2tk3OKoT++/D2edBTvuCAcdlHc0ZmZLxImjPp1yCsybB3/9q+dsmFmj5cRRX0aMgPvuS7PE110372jMzJaYE0d9mDsXTjgB1lsPzjgj72jMzH4QD47XhwsugLffhjFjoFWrvKMxM/tB3OIotcmT4fLL4YgjoHfvvKMxM/vBnDhKaeFC+O1vU6n0yy7LOxozszrhrqpSuvlmGD8ebr0VOnTIOxozszrhFkepfPhhGgjv3RsOPzzvaMzM6owTR6mcdlqqgHv99Z6zYWZNihNHKTz1FNx1FwwYAOuvn3c0ZmZ1yomjrn39NRx/PKyzDpx9dt7RmJnVOQ+O17WLL4Zp0+DJJ6F167yjMTOrc25x1KXXXkul0g8+OBUyNDNrgpw46kpEmrPRrh1ceWXe0ZiZlYy7qurK0KEwdiwMGQKrrJJ3NGZmJeMWR1346CM4/fS0DOxRR+UdjZlZSTlx1IX/+z+YMyfN2VjKP1Iza9r8LvdDjRkDt92WWhwbbZR3NGZmJVfSxCFpV0mvS5omaUAl2ztLGiXpJUljJHXKHv+5pBcLvr6W9OtsmyRdJOkNSVMk/a6Ur6Fa8+alAfGuXeHcc3MLw8ysPpVscFxSC2AwsBMwA5ggaVhEvFqw2+XA7RExVFIfYBBwaESMBjbJzrMSMA14IjvmCGANYP2IWCgpv5HoSy+F11+H4cOhbdvcwjAzq0+lbHH0BKZFxFsR8Q1wL7BnhX26AaOy26Mr2Q6wLzA8IuZm948HBkbEQoCImFXnkRdj6lS46CLo2xd23TWXEMzM8lDKxLE68G7B/RnZY4UmAftkt/cClpO0coV9DgDuKbi/NrC/pDJJwyVVuoC3pGOzfcpmz569xC+iUhGprEirVnD11XV7bjOzBq6UiaOykrBR4f7pQG9JLwC9gfeA+d+dQFoV+CkwsuCYVsDXEdEDuBG4pbInj4ghEdEjInp07NhxyV9FZe6+G0aNgkGDYNVV6/bcZmYNXCknAM4gjUWU6wTMLNwhImYCewNIagfsExFzCnbpCzwcEd9WOO+D2e2HgVvrOO7qffIJnHIKbLEFHHdcvT61mVlDUMoWxwRgXUldJS1D6nIaVriDpA6SymM4i++3Hg5k8W4qgEeAPtnt3sAbdRp1TQYMSMnjhhugRYt6fWozs4agZIkjIuYD/UndTFOAv0XEK5IGSvpVttv2wOuS3gB+BFxUfrykLqQWy9gKp74E2EfSy6SrsI4u1Wv4nvHj4cYb4eSToXv3entaM7OGRBEVhx2anh49ekRZWdkPO8k338Cmm8IXX8Arr6RihmZmTZikidl48mJc5LBYV16ZEsawYU4aZtasueRIMd56C84/H/beG/bYI+9ozMxy5cRRkwjo1w9atoRrrsk7GjOz3Lmrqib33w8jRqSJfp065R2NmVnu3OKozmefwUknwWabQf/+eUdjZtYguMVRnXPOgVmz4LHHPGfDzCzjFkd1unaFM85ILQ4zMwPc4qje6afnHYGZWYPjFoeZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdWKE4eZmdVKs1jISdJsYPoSHt4B+KgOw6krjqt2HFftOK7aaapxdY6IjhUfbBaJ44eQVFbZClh5c1y147hqx3HVTnOLy11VZmZWK04cZmZWK04cNRuSdwBVcFy147hqx3HVTrOKy2McZmZWK25xmJlZrThxmJlZrThxVEHSGpJGS5oi6RVJJ+UdE4Ck1pL+I2lSFtf5ecdUTlILSS9IeizvWApJ+q+klyW9KKks73jKSVpB0gOSXsv+zrZqADGtl/2cyr8+l3Ry3nEBSDol+5ufLOkeSa3zjglA0klZTK/k+bOSdIukWZImFzy2kqQnJU3Nvq9YF8/lxFG1+cBpEbEBsCXQT1K3nGMCmAf0iYjuwCbArpK2zDmmcicBU/IOogo/j4hNGti19tcAIyJifaA7DeBnFxGvZz+nTYDNgLnAwzmHhaTVgd8BPSJiI6AFcEC+UYGkjYBjgJ6k3+HuktbNKZzbgF0rPDYAGBUR6wKjsvs/mBNHFSLi/Yh4Prv9BemfevV8o4JIvszuLp195X6Fg6ROwC+Bm/KOpTGQ1B7YDrgZICK+iYjP8o3qe3YA3oyIJa26UNdaAm0ktQTaAjNzjgdgA+DZiJgbEfOBscBeeQQSEU8Dn1R4eE9gaHZ7KPDrunguJ44iSOoC/Ax4Lt9IkqxL6EVgFvBkRDSEuK4GzgAW5h1IJQJ4QtJEScfmHUxmLWA2cGvWvXeTpGXzDqqCA4B78g4CICLeAy4H3gHeB+ZExBP5RgXAZGA7SStLagv8Algj55gK/Sgi3of0YRhYpS5O6sRRA0ntgAeBkyPi87zjAYiIBVlXQiegZ9Zczo2k3YFZETExzziq0SsiNgV2I3U5bpd3QKRPz5sC10XEz4CvqKNuhLogaRngV8D9eccCkPXN7wl0BVYDlpV0SL5RQURMAf4EPAmMACaRurmbNCeOakhampQ07oqIh/KOp6Ksa2MM3+/XrG+9gF9J+i9wL9BH0p35hrRIRMzMvs8i9df3zDciAGYAMwpaiw+QEklDsRvwfER8mHcgmR2BtyNidkR8CzwEbJ1zTABExM0RsWlEbEfqKpqad0wFPpS0KkD2fVZdnNSJowqSROp/nhIRV+YdTzlJHSWtkN1uQ/qHei3PmCLirIjoFBFdSN0b/4yI3D8NAkhaVtJy5beBnUndC7mKiA+AdyWtlz20A/BqjiFVdCANpJsq8w6wpaS22f/mDjSAiwkAJK2SfV8T2JuG9XMbBhye3T4c+HtdnLRlXZykieoFHAq8nI0nAJwdEY/nGBPAqsBQSS1Iif9vEdGgLn9tYH4EPJzea2gJ3B0RI/IN6TsnAndl3UJvAUfmHA8AWV/9TsBxecdSLiKek/QA8DypK+gFGk6ZjwclrQx8C/SLiE/zCELSPcD2QAdJM4A/ApcAf5N0FCn57lcnz+WSI2ZmVhvuqjIzs1px4jAzs1px4jAzs1px4jAzs1px4jAzs1px4jCrQ5K2z7M6sKQjJF2b1/Nb8+DEYWbfyeYHmVXLicOaHUmHZGuavCjphvI3S0lfSrpC0vOSRknqmD2+iaRnJb0k6eHyNQ0krSPpqWxtlOclrZ09RbuCdTbuymY6V4xhjKQ/ZXG8IWnb7PHFWgySHpO0fUF8f8qKNT4lqWd2nrck/arg9GtIGiHpdUl/LPJ1D5T0HJD7miDW8DlxWLMiaQNgf1Lhw02ABcDB2eZlSfWZNiWVxy5/070dODMiNgZeLnj8LmBwtjbK1qSqrZAqKZ8MdCNVwe1VRTgtI6Jntu8fq9in0LLAmIjYDPgCuJA0w3svYGDBfj2z17QJsJ+kHkW87skRsUVEjCsiDmvmXHLEmpsdSAsUTcgaAm1YVPhtIXBfdvtO4CFJywMrRMTY7PGhwP1Z/avVI+JhgIj4GiA7538iYkZ2/0WgC1DZG3J54cyJ2T41+YZUgRVSApsXEd9KernC8U9GxMfZ8z8EbEMq01HV615AKuZpVhQnDmtuBAyNiLOK2Le6ejzf634qMK/g9gKq/j+bV8k+81m8J6BwedRvY1GNoIXlx0fEwmxxo6riDqp/3V9HxIIqYjT7HndVWXMzCti3oKLpSpI6Z9uWAvbNbh8EjIuIOcCn5WMQpMKXY7O1WWZI+nV2nlZZccAf6r/AJpKWkrQGS1YCfqfsdbUhrfg2nupft1mtuMVhzUpEvCrp96QVAZciq2gKTCctprShpInAHNKYAKRy1NdniaGwiu2hwA2SBmbnqYvKo+OBt0ldUZNJ1WBraxxwB7AOqRpwGUA1r9usVlwd1ywj6cuIaJd3HGYNnbuqzMysVtziMDOzWnGLw8zMasWJw8zMasWJw8zMasWJw8zMasWJw8zMauX/AWFVESTQvY98AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "def eval_show(epoch_per_eval):\n", + " plt.xlabel(\"epoch number\")\n", + " plt.ylabel(\"Model accuracy\")\n", + " plt.title(\"Model accuracy variation chart\")\n", + " plt.plot(epoch_per_eval[\"epoch\"], epoch_per_eval[\"acc\"], \"red\")\n", + " plt.show()\n", + " \n", + "eval_show(epoch_per_eval)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "从上图可以一目了然地挑选出需要的最优模型。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 总结" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "本例使用MNIST数据集通过卷积神经网络LeNet5进行训练,着重介绍了利用回调函数在进行模型训练的同时进行模型的验证,保存对应`epoch`的模型,并从中挑选出最优模型的方法。" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/source_en/advanced_use/debugging_in_pynative_mode.md b/tutorials/source_en/advanced_use/debugging_in_pynative_mode.md index fd1a93d2764234ec6b91cfae0e07fc6f2fd9bd95..877b07e0a6dcc8e7980a883927f5d1f235bcd213 100644 --- a/tutorials/source_en/advanced_use/debugging_in_pynative_mode.md +++ b/tutorials/source_en/advanced_use/debugging_in_pynative_mode.md @@ -26,6 +26,8 @@ By default, MindSpore is in PyNative mode. You can switch it to the graph mode b In PyNative mode, single operators, common functions, network inference, and separated gradient calculation can be executed. The following describes the usage and precautions. +> In PyNative mode, operators are executed asynchronously on the device to improve performance. Therefore, when an error occurs during operator excution, the error information may be displayed after the program is executed. + ## Executing a Single Operator Execute a single operator and output the result, as shown in the following example. @@ -243,6 +245,7 @@ print(z.asnumpy()) [ 0.0377498 -0.06117418 0.00546303]]]] ``` + ## Debugging Network Train Model In PyNative mode, the gradient can be calculated separately. As shown in the following example, `GradOperation` is used to calculate all input gradients of the function or the network. Note that the inputs have to be Tensor. @@ -259,7 +262,7 @@ def mul(x, y): return x * y def mainf(x, y): - return C.GradOperation('get_all', get_all=True)(mul)(x, y) + return C.GradOperation(get_all=True)(mul)(x, y) print(mainf(Tensor(1, mstype.int32), Tensor(2, mstype.int32))) ``` @@ -354,7 +357,7 @@ class GradWrap(nn.Cell): def construct(self, x, label): weights = self.weights - return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label) + return C.GradOperation(get_by_list=True)(self.network, weights)(x, label) net = LeNet5() optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) diff --git a/tutorials/source_en/advanced_use/distributed_training_ascend.md b/tutorials/source_en/advanced_use/distributed_training_ascend.md index bc1de3258cd5b2ffe4adacc774c75e6a6aaacdd9..6cfb3db2427102e17d88aa4ff1c02c56124ba1fb 100644 --- a/tutorials/source_en/advanced_use/distributed_training_ascend.md +++ b/tutorials/source_en/advanced_use/distributed_training_ascend.md @@ -212,10 +212,9 @@ The `Momentum` optimizer is used as the parameter update tool. The definition is ## Training the Network -`context.set_auto_parallel_context` is an API for users to set parallel training parameters and must be called before the initialization of `Model`. If no parameters are specified, MindSpore will automatically set parameters to the empirical values based on the parallel mode. For example, in data parallel mode, `parameter_broadcast` is enabled by default. The related parameters are as follows: +`context.set_auto_parallel_context` is an API for users to set parallel training parameters and must be called before the initialization of networks. The related parameters are as follows: - `parallel_mode`: parallel distributed mode. The default value is `ParallelMode.STAND_ALONE`. The options are `ParallelMode.DATA_PARALLEL` and `ParallelMode.AUTO_PARALLEL`. -- `parameter_broadcast`: whether to broadcast initialized parameters. The default value is `True` in `DATA_PARALLEL` and `HYBRID_PARALLEL` mode. - `mirror_mean`: During backward computation, the framework collects gradients of parameters in data parallel mode across multiple hosts, obtains the global gradient value, and transfers the global gradient value to the optimizer for update. The default value is `False`, which indicates that the `allreduce_sum` operation is applied. The value `True` indicates that the `allreduce_mean` operation is applied. - `enable_parallel_optimizer`: a developing feature. Whether to use optimizer model parallel, which improves performance by distributing the parameters to be updated to each worker, and applying Broadcast among workers to share updated parameters. This feature can be used only in data parallel mode and when the number of parameters is larger than the number of devices. diff --git a/tutorials/source_en/advanced_use/system_metrics.md b/tutorials/source_en/advanced_use/hardware_resources.md similarity index 80% rename from tutorials/source_en/advanced_use/system_metrics.md rename to tutorials/source_en/advanced_use/hardware_resources.md index 9eb633605059b98564de44ffd941acd354a8bea2..cb9dc16d3411acf5310897bc405badea0d46e9d3 100644 --- a/tutorials/source_en/advanced_use/system_metrics.md +++ b/tutorials/source_en/advanced_use/hardware_resources.md @@ -1,4 +1,4 @@ -# System Metrics +# Hardware Resources `Ascend` `Model Optimization` `Intermediate` `Expert` @@ -11,20 +11,20 @@ - + ## Overview -Users can view system metrics such as Ascend AI processor, CPU, memory, etc., so as to allocate appropriate resources for training. -Just [Start MindInsight](https://www.mindspore.cn/tutorial/en/master/advanced_use/mindinsight_commands.html#start-the-service), and click "System Metrics" in the navigation bar to view it. +Users can view hardware resources such as Ascend AI processor, CPU, memory, etc., so as to allocate appropriate resources for training. +Just [Start MindInsight](https://www.mindspore.cn/tutorial/en/master/advanced_use/mindinsight_commands.html#start-the-service), and click "Hardware Resources" in the navigation bar to view it. ## Ascend AI Processor Board The Ascend AI processor board is used to view the current information of each NPU chip. -![sysmetric_npu.png](./images/sysmetric_npu.png) +![resources_npu.png](./images/resources_npu.png) -Figure 1: System metrics Ascend AI processor board +Figure 1: Hardware resources Ascend AI processor board Figure 1 is a table, each row shows the information of each NPU chip at a certain time. The metrics in each column are as follows: @@ -43,9 +43,9 @@ Figure 1 is a table, each row shows the information of each NPU chip at a certai The CPU board is used to view the current system CPU total and the information of each core. -![sysmetric_cpu.png](./images/sysmetric_cpu.png) +![resources_cpu.png](./images/resources_cpu.png) -Figure 2: System Metrics CPU board +Figure 2: Hardware resources CPU board The two-dimensional table in Figure 2 shows the percentage of CPU utilization for each core; the following two rows show the detailed metrics of *CPU-total* and *CPU-selected*. @@ -68,8 +68,8 @@ The two-dimensional table in Figure 2 shows the percentage of CPU utilization fo The memory board is used to view the current system memory information. -![sysmetric_mem.png](./images/sysmetric_mem.png) +![resources_mem.png](./images/resources_mem.png) -Figure 3: Hardware resource memory board +Figure 3: Hardware resources memory board Figure 3 shows a pie chart showing used memory and available memory. Other memory types are classified into *others*. diff --git a/tutorials/source_en/advanced_use/images/sysmetric_cpu.png b/tutorials/source_en/advanced_use/images/resources_cpu.png similarity index 100% rename from tutorials/source_en/advanced_use/images/sysmetric_cpu.png rename to tutorials/source_en/advanced_use/images/resources_cpu.png diff --git a/tutorials/source_en/advanced_use/images/resources_mem.png b/tutorials/source_en/advanced_use/images/resources_mem.png new file mode 100644 index 0000000000000000000000000000000000000000..a222700035f14c08f1979cec7914a976a3633070 Binary files /dev/null and b/tutorials/source_en/advanced_use/images/resources_mem.png differ diff --git a/tutorials/source_en/advanced_use/images/sysmetric_npu.png b/tutorials/source_en/advanced_use/images/resources_npu.png similarity index 100% rename from tutorials/source_en/advanced_use/images/sysmetric_npu.png rename to tutorials/source_en/advanced_use/images/resources_npu.png diff --git a/tutorials/source_en/advanced_use/mindinsight_commands.md b/tutorials/source_en/advanced_use/mindinsight_commands.md index 462b6adc5c24d4362539850b5a73373ca299fd50..42b9af599466ce92bd3515c711f7eba24d8b92c0 100644 --- a/tutorials/source_en/advanced_use/mindinsight_commands.md +++ b/tutorials/source_en/advanced_use/mindinsight_commands.md @@ -2,8 +2,6 @@ `Ascend` `GPU` `Model Optimization` `Intermediate` `Expert` - - - [MindInsight Commands](#mindinsight-commands) @@ -15,6 +13,8 @@ + + ## View the Command Help Information ```shell diff --git a/tutorials/source_en/advanced_use/on_device_inference.md b/tutorials/source_en/advanced_use/on_device_inference.md index 0b5cef7eff0700c36855df60836e211e1ba7a84b..fc4bc61442edf9e4066c4f1df87a5f3cc64f05d1 100644 --- a/tutorials/source_en/advanced_use/on_device_inference.md +++ b/tutorials/source_en/advanced_use/on_device_inference.md @@ -90,10 +90,10 @@ The compilation procedure is as follows: bash build.sh -I arm32 ``` -3. Go to the `mindspore/output` directory of the source code to obtain the compilation result. Unzip `mindspore-lite-0.6.0-converter-ubuntu.tar.gz` to get the result `mindspore-lite-0.6.0` after building. +3. Go to the `mindspore/output` directory of the source code to obtain the compilation result. Unzip `mindspore-lite-0.7.0-converter-ubuntu.tar.gz` to get the result `mindspore-lite-0.7.0` after building. ```bash - tar -xvf mindspore-lite-0.6.0-converter-ubuntu.tar.gz + tar -xvf mindspore-lite-0.7.0-converter-ubuntu.tar.gz ``` ## Use of On-Device Inference @@ -171,7 +171,7 @@ To perform on-device model inference using MindSpore, perform the following step else: print("checkpoint file does not exist.") ``` -3. In `mindspore/output/mindspore-lite-0.6.0/converter` directory, calling MindSpore convert tool named `converter_lite`, convert model file (`.mindir`) to on_device inference model file (`.ms`). +3. In `mindspore/output/mindspore-lite-0.7.0/converter` directory, calling MindSpore convert tool named `converter_lite`, convert model file (`.mindir`) to on_device inference model file (`.ms`). ``` ./converter_lite --fmk=MS --modelFile=./lenet.mindir --outputFile=lenet ``` diff --git a/tutorials/source_en/advanced_use/performance_profiling.md b/tutorials/source_en/advanced_use/performance_profiling.md index 6bf345ba7c316a270593a58d9f6c817fdb2383a1..2ede33a3407d5613822c923b95cdd5baa24082b9 100644 --- a/tutorials/source_en/advanced_use/performance_profiling.md +++ b/tutorials/source_en/advanced_use/performance_profiling.md @@ -29,6 +29,10 @@ Performance data like operators' execution time is recorded in files and can be - Start MindInsight and specify the profiler data directory using startup parameters. After MindInsight is started, access the visualization page based on the IP address and port number. The default access IP address is `http://127.0.0.1:8080`. - Find the training in the list, click the performance profiling link, and view the data on the web page. +## Preparing the Environment + +Before using Profiler, make sure the process of ada in background running right. The ada process must using the root user to run. The start command is `/usr/local/Ascend/driver/tools/ada`. + ## Preparing the Training Script To enable the performance profiling of neural networks, MindSpore Profiler APIs should be added into the script. At first, the MindSpore `Profiler` object need to be set after set context and before the network and HCCL initialization. Then, at the end of the training, `Profiler.analyse()` should be called to finish profiling and generate the perforamnce analyse results. @@ -183,4 +187,8 @@ W/A/S/D can be applied to zoom in and out of the Timeline graph. ## Specifications - To limit the data size generated by the Profiler, MindInsight suggests that for large neural network, the profiled steps should better below 10. + + > How to limit step count, Please refer to data preparation tutorial: + > + - The parse of Timeline data is time consuming, and several step's data is usually enough for analysis. In order to speed up the data parse and UI display, Profiler will show at most 20M data (Contain 10+ step information for large networks). diff --git a/tutorials/source_en/advanced_use/performance_profiling_gpu.md b/tutorials/source_en/advanced_use/performance_profiling_gpu.md index 3cb758244a7637fb27d6f786debee934f7c050ad..0bbe119e404d30ca4f605240089b3f2ebce72237 100644 --- a/tutorials/source_en/advanced_use/performance_profiling_gpu.md +++ b/tutorials/source_en/advanced_use/performance_profiling_gpu.md @@ -23,7 +23,7 @@ Performance data like operators' execution time is recorded in files and can be > The GPU operation process is the same as that in Ascend chip. > -> https://www.mindspore.cn/tutorial/en/master/advanced_use/performance_profiling.html#id3 +> ## Preparing the Training Script @@ -31,7 +31,37 @@ To enable the performance profiling of neural networks, MindSpore Profiler APIs > The sample code is the same as that in Ascend chip: > -> https://www.mindspore.cn/tutorial/en/master/advanced_use/performance_profiling.html#id4 +> + +Users can get profiling data by user-defined callback: + +```python +class StopAtStep(Callback): + def __init__(self, start_step, stop_step): + super(StopAtStep, self).__init__() + self.start_step = start_step + self.stop_step = stop_step + self.already_analysed = False + + def step_begin(self, run_context): + cb_params = run_context.original_args() + step_num = cb_params.cur_step_num + if step_num == self.start_step: + self.profiler = Profiler() + + def step_end(self, run_context): + cb_params = run_context.original_args() + step_num = cb_params.cur_step_num + if step_num == self.stop_step and not self.already_analysed: + self.profiler.analyse() + self.already_analysed = True + + def end(self, run_context): + if not self.already_analysed: + self.profiler.analyse() +``` + +The code above is just a example. Users should implement callback by themselves. ## Launch MindInsight diff --git a/tutorials/source_en/advanced_use/visualization_tutorials.rst b/tutorials/source_en/advanced_use/visualization_tutorials.rst index 4404eb2ff14804d50e913c283954908d7c7231dc..1900ead71703abb8e2135fae7691a3c0327998c1 100644 --- a/tutorials/source_en/advanced_use/visualization_tutorials.rst +++ b/tutorials/source_en/advanced_use/visualization_tutorials.rst @@ -7,7 +7,7 @@ Training Process Visualization summary_record dashboard lineage_and_scalars_comparision - system_metrics + hardware_resources performance_profiling performance_profiling_gpu mindinsight_commands diff --git a/tutorials/source_en/use/custom_operator.md b/tutorials/source_en/use/custom_operator.md index bbb0ac9b352cf5f88885dbf77f98b9d2b8b9f499..cff9317dd13be9efd5d52200234cc4256eecf862 100644 --- a/tutorials/source_en/use/custom_operator.md +++ b/tutorials/source_en/use/custom_operator.md @@ -232,7 +232,7 @@ def test_grad_net(): x = np.array([1.0, 4.0, 9.0]).astype(np.float32) sens = np.array([1.0, 1.0, 1.0]).astype(np.float32) square = Net() - grad = C.GradOperation('grad_with_sens', sens_param=True) + grad = C.GradOperation(sens_param=True) dx = grad(square)(Tensor(x), Tensor(sens)) print("x: ", x) print("dx: ", dx) diff --git a/tutorials/source_zh_cn/advanced_use/bert_poetry.md b/tutorials/source_zh_cn/advanced_use/bert_poetry.md new file mode 100644 index 0000000000000000000000000000000000000000..e4fc7f37ca8902a1b3d9a24647a95dc4c9cd08f8 --- /dev/null +++ b/tutorials/source_zh_cn/advanced_use/bert_poetry.md @@ -0,0 +1,308 @@ +# 智能写诗 + +`Ascend` `模型训练` `推理应用` `端侧` `高级` + + + +- [智能写诗](#智能写诗) + - [案例简介](#案例简介) + - [模型介绍](#模型介绍) + - [模型训练](#模型训练) + - [Pre-training](#pre-training) + - [Fine-tuning](#fine-tuning) + - [模型修改](#模型修改) + - [样例代码](#样例代码) + - [实现步骤](#实现步骤) + - [基础信息](#基础信息) + - [数据准备](#数据准备) + - [训练](#训练) + - [推理验证](#推理验证) + - [服务部署](#服务部署) + - [参考资料](#参考资料) + + + + +五千年历史孕育了深厚的中华文化,而诗词是中华文化不可或缺的一部分,欣赏过诗词就可以感受到当中纯净、辽阔的意境,极致的感性,恰恰弥补了节奏紧迫的现代生活带给我们的拥挤感、浮躁感,古语曰:熟读唐诗三百首,不会作诗也会吟,今天理科生MindSpore也来秀一秀文艺范儿! + +## 案例简介 + +通过MindSpore训练出智能写诗模型及部署预测服务,具体流程如下图所示: + +![introduce image](images/introduce.PNG) + +图1:案例流程图 + +由于Bert预训练比较费时费力,在本案例中省略了预训练阶段,直接提供MindSpore预训练好的Bert-Base模型,经过Fine-tuning后训练获得最终的模型的训练全流程。 + +除此之外,将展示如何通过MindSpore Serving将该模型部署成一个预测服务,Clients代码可以发送请求给该预测服务并获得预测结果。 + +## 模型介绍 + +和诗词打交道需要用NLP相关的网络,BERT作为NLP领域中里程碑式的模型,极大地推动了NLP社区的发展,BERT模型由Google提出,采用Transformer中的Encoder结构,通过若干层Encoder的堆叠并借由注意力机制,在多项GLUE(General Language Understanding Evaluation)任务中取得了SOTA(State Of The Art)的效果。 + +正是由于这种注意力的机制,不同于以往的循环神经网络的结构,可以做高度的并行计算,这样便可以充分发挥出Ascend 910AI处理器的强大算力,获得极佳的性能表现。 + +## 模型训练 + +分为两个步骤,即Pre-training和Fine-tuning。首先在海量无标签的数据上进行Pre-training,希望通过此过程让模型掌握一般的人类语言语义机制,然后在Fine-tuning阶段会针对特定细分领域的有标签数据进行训练以完成特定任务。 + +### Pre-training + +Pre-training是在无标签数据上进行的自编码训练,因此训练任务的设计尤为重要,BERT中的Pre-training包含两项任务MLM(Masked Language Model)和NSP(Next Sentence Prediction)。 + +- **MLM任务**是在输入时,随机将部分token置换为[MASK]标记,然后通过注意力机制,由其上下文预测出被遮挡位置的原始token。 + +- BERT模型的输入是两“句”话:A与B,构造数据的时候会以50%的概率随机调换A、B的位置,**NSP任务**是预测A与B是否是相连的两“句”话。 + +在MLM基础上再增加一个NSP任务,是考虑到实际任务中并没有MLM这种任务,增加一个更符合实际任务类型的预训练任务。 + +从上述描述中可以看出,Pre-training并不需要任务数据标签,这种MLM的训练任务本质上是去噪自编码模型,因此BERT可以利用海量的无标签数据来进行预训练。通过预训练阶段的任务设置,BERT可以从无标签数据中学到基础语义逻辑,然后配合Finetune过程完成特定任务训练。 + +BERT模型的结构如下图所示,输入两“句”话,如果是中文模型,那么每一个token对应一个汉字,[CLS]和[SEP]是插入的特殊标识位。 + +![Teaser image](images/bert_model.PNG) + +图2:Bert模型结构[1] + +### Fine-tuning + +Fine-tuning是在BERT的预训练模型基础上,在最后增加一层适配实际任务,然后在有标签数据上进行少量的训练。 + +Fine-tuning的模式可以分为两大类,end-to-end Fine-tuning和feature-based approach,两者的区别在于Finetune阶段中是否修改BERT预训练模型中的参数,正常情况下都是使用end-to-end Fine-tuning。 + +### 模型修改 + +BERT采用了Encoder结构,`attention_mask`为全1的向量,即每个token都可以看到其前后的token,此举帮助每一个token都可以了解到整句话信息从而加强语义理解能力,所以BERT天生就不是生成式模型。 + +语句生成任务中,在生成下一个token时,应当只能看到之前token的信息,而不应该看到全局信息,因此需要在修改`attention_mask`为下三角矩阵,这样当前token只能看到自己及之前的token信息。 + +用于Fine-tuning的数据是40000多首诗词,并无标签,因此构造Fine-tuning任务如下图所示,每一个token的输出要接近下一个标签token,使用交叉熵作为损失函数。 + +![Teaser image](images/finetune.PNG) + +图2:训练流程示意图 + +## 样例代码 + +样例代码可[点击下载](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com:443/DemoCode/bert_poetry.rar),可直接运行体验实现写诗效果,代码结构如下: + +``` +└─bert_poetry + ├── src + ├── bert_for_pre_training.py # 封装BERT-Base正反向网络类 + ├── bert_model.py # 定义BERT正向网络结构 + ├── finetune_config.py # Fine-tuning配置文件 + ├── fused_layer_norm.py # 定义fused_layer_norm + ├── __init__.py # __init__ + ├── utils.py # 定义Fine-tuning正向网络结构 + └── poetry_dataset.py # 部分代码取自[2],解析poetry.txt,生成所需dataset + ├── vocab.txt # 词汇表 + ├── generator.py # 部分代码取自[2],推理生成诗句使用函数 + ├── poetry.py # 训练、推理、导出函数 + ├── serving + ├── ms_serving # 启动服务器侧serving + ├── bert_flask.py # 服务器侧接收requests请求 + ├── poetry_client.py # 客户端代码 + ├── ms_service_pb2_grpc.py # 定义了grpc相关函数供bert_flask.py使用 + └── ms_service_pb2.py # 定义了protocol buffer相关函数供bert_flask.py使用 + +``` + +## 实现步骤 + +### 基础信息 + +基于MindSpore 0.6.0-beta版本,在Ascend 910AI处理器平台上进行训练及推理。 + +### 数据准备 + +安装bert4keras [3],需要用到其中的`Tokenizer`和`load_vocab`函数,安装命令如下: +``` +pip install bert4keras +pip install bottle +``` + +数据集为43030首诗词:可[下载](https://github.com/AaronJny/DeepLearningExamples/tree/master/keras-bert-poetry-generator)其中的`poetry.txt`。 + +BERT-Base模型的预训练ckpt:可在[MindSpore官网](https://www.mindspore.cn/docs/zh-CN/master/network_list.html)下载。 + +### 训练 + +在`src/finetune_config.py`中修改`pre_training_ckpt`路径,加载预训练的ckpt,修改`batch_size`为bs,修改`dataset_path`为存放诗词的路径,默认的`BertConfig`为Base模型。 + +``` +'dataset_path': '/your/path/to/poetry.txt', +'batch_size': bs, +'pre_training_ckpt': '/your/path/to/pre_training_ckpt', +``` + +执行训练指令 + +``` +python poetry.py +``` + +### 推理验证 + +修改`poetry.py`中`test_eval`函数来控制随机生成、续写诗句或是藏头诗。 + +`generate_random_poetry`函数实现随机生成和续写诗句的功能,如果入参`s`为空则代表随机生成,`s`不为空则为续写诗句。 + +``` + output = generate_random_poetry(poetrymodel, s='') #随机生成 + output = generate_random_poetry(poetrymodel, s='天下为公') #续写诗句 +``` + +`generate_hidden`函数实现生成藏头诗的功能,入参`head`为隐藏的头部语句。 +``` + output = generate_hidden(poetrymodel, head="人工智能") #藏头诗 +``` + +执行推理指令 + +``` +python poetry.py --train=False --ckpt_path=/your/ckpt/path +``` + +会打印出最终生成的诗句,脚本中默认生成一首随机生成、一首续写诗词、一首藏头诗,结果如下所示: + +随机生成: + +``` +大堤柳暗, +春深树根。 +东望一望, +断回还家。 +山色渐风雨, +东风多雨禾。 +无情与去, +万里所思。 +``` + +续写 【天下为公】: + +``` +天下为公少, +唯君北向西。 +远山无路见, +长水见人偏。 +一路巴猿啸, +千峰楚客啼。 +幽深有诗策, +无以话年华。 +``` + +藏头诗 【人工智能】: + +``` +人君离别难堪望, +工部张机自少年。 +智士不知身没处, +能令圣德属何年。 +``` + +### 服务部署 + +通过MindSpore Serving将训练好的模型部署成推理服务。服务端部署包含以下3个步骤:模型导出、Serving服务启动、预处理及后处理的服务启动;客户端发送推理请求给服务端进行模型推理,推理生成的诗句返回给客户端展示。 + +- 模型导出 + +在使用Serving部署服务前,需要导出模型文件,在`poetry.py`中提供了`export_net`函数负责导出MINDIR模型,执行命令: + +``` +python poetry.py --export=True --ckpt_path=/your/ckpt/path +``` + +会在当前路径下生成`poetry.pb`文件。 + +- Serving服务 + +在服务器侧启动Serving服务,并加载导出的MINDIR文件`poetry.pb`。 + +``` +cd serving +./ms_serving --model_path=/path/to/your/MINDIR_file --model_name=your_mindir.pb +``` + +- 预处理及后处理的服务 + +预处理及后处理通过Flask框架来快速实现,在服务器侧运行`bert_flask.py`文件,启动Flask服务。 + +``` +python bert_flask.py +``` + +通过以上步骤,服务端部署就已经完成。 + +- 客户端 + +可用电脑作为客户端,修改`poetry_client.py`中的url请求地址为推理服务启动的服务器IP,并确保端口与服务端`bert_flask.py`中的端口一致,例如: + +``` +url = 'http://10.155.170.71:8080/' +``` + +运行`poetry_client.py`文件 + +``` +python poetry_client.py +``` + +此时在客户端输入指令,即可在远端服务器进行推理,返回生成的诗句。 + +``` +选择模式:0-随机生成,1:续写,2:藏头诗 +0 +``` +``` +一朵黄花叶, +千竿绿树枝。 +含香待夏晚, +澹浩长风时。 +``` + +``` +选择模式:0-随机生成,1:续写,2:藏头诗 +1 +输入首句诗 +明月 +``` +``` +明月照三峡, +长空一片云。 +秋风与雨过, +唯有客舟分。 +寒影出何处, +远林含不闻。 +不知前后事, +何道逐风君。 +``` + +``` +选择模式:0-随机生成,1:续写,2:藏头诗 +2 +输入藏头诗 +人工智能 +``` +``` +人生事太远, +工部与神期。 +智者岂无识, +能文争有疑。 +``` + +细读鉴赏一下,平仄、押韵、意味均有体现,AI诗人已然成形。 + + +> 友情提醒,修改其他类型数据集,也可以完成其他简单的生成类任务,如对春联,简单聊天机器人等,用户可尝试体验实现。 + + +## 参考资料 + +[1] [BERT:Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) + +[2] [https://github.com/AaronJny/DeepLearningExamples/](https://github.com/AaronJny/DeepLearningExamples/) + +[3] [https://github.com/bojone/bert4keras](https://github.com/bojone/bert4keras) + diff --git a/tutorials/source_zh_cn/advanced_use/debugging_in_pynative_mode.md b/tutorials/source_zh_cn/advanced_use/debugging_in_pynative_mode.md index 349b8c85031d37131a78df419323b6635418d101..a8c87f9ba8f6df44d9f5c4193b4d2b14ba1db147 100644 --- a/tutorials/source_zh_cn/advanced_use/debugging_in_pynative_mode.md +++ b/tutorials/source_zh_cn/advanced_use/debugging_in_pynative_mode.md @@ -28,6 +28,8 @@ MindSpore支持两种运行模式,在调试或者运行方面做了不同的 PyNative模式下,支持执行单算子、普通函数和网络,以及单独求梯度的操作。下面将详细介绍使用方法和注意事项。 +> PyNative模式下为了提升性能,算子在device上使用了异步执行方式,因此在算子执行错误的时候,错误信息可能会在程序执行到最后才显示。 + ## 执行单算子 执行单个算子,并打印相关结果,如下例所示。 @@ -245,6 +247,7 @@ print(z.asnumpy()) [ 0.0377498 -0.06117418 0.00546303]]]] ``` + ## 调试网络训练模型 PyNative模式下,还可以支持单独求梯度的操作。如下例所示,可通过`GradOperation`求该函数或者网络所有的输入梯度。需要注意,输入类型仅支持Tensor。 @@ -261,7 +264,7 @@ def mul(x, y): return x * y def mainf(x, y): - return C.GradOperation('get_all', get_all=True)(mul)(x, y) + return C.GradOperation(get_all=True)(mul)(x, y) print(mainf(Tensor(1, mstype.int32), Tensor(2, mstype.int32))) ``` @@ -356,7 +359,7 @@ class GradWrap(nn.Cell): def construct(self, x, label): weights = self.weights - return C.GradOperation('get_by_list', get_by_list=True)(self.network, weights)(x, label) + return C.GradOperation(get_by_list=True)(self.network, weights)(x, label) net = LeNet5() optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) diff --git a/tutorials/source_zh_cn/advanced_use/differential_privacy.md b/tutorials/source_zh_cn/advanced_use/differential_privacy.md index bf11a9ee4af89f1e3cc3d3c196381d4cc083ef15..0f09b27154658ff26c1f03bb73613eb28f4f7ca3 100644 --- a/tutorials/source_zh_cn/advanced_use/differential_privacy.md +++ b/tutorials/source_zh_cn/advanced_use/differential_privacy.md @@ -2,6 +2,20 @@ `Ascend` `模型开发` `模型调优` `企业` `高级` + + +- [机器学习中的差分隐私](#机器学习中的差分隐私) + - [概述](#概述) + - [实现阶段](#实现阶段) + - [导入需要的库文件](#导入需要的库文件) + - [参数配置](#参数配置) + - [预处理数据集](#预处理数据集) + - [建立模型](#建立模型) + - [引入差分隐私](#引入差分隐私) + - [引用](#引用) + + + ## 概述 diff --git a/tutorials/source_zh_cn/advanced_use/distributed_training_ascend.md b/tutorials/source_zh_cn/advanced_use/distributed_training_ascend.md index 3ad4ef13f467023abc9ded8d0c3fee153cee4ed0..d02d36a460a235f4c29c86c93503a6957e7f60ca 100644 --- a/tutorials/source_zh_cn/advanced_use/distributed_training_ascend.md +++ b/tutorials/source_zh_cn/advanced_use/distributed_training_ascend.md @@ -215,12 +215,11 @@ class SoftmaxCrossEntropyExpand(nn.Cell): ## 训练网络 -`context.set_auto_parallel_context`是配置并行训练参数的接口,必须在`Model`初始化前调用。如用户未指定参数,框架会自动根据并行模式为用户设置参数的经验值。如数据并行模式下,`parameter_broadcast`默认打开。主要参数包括: +`context.set_auto_parallel_context`是配置并行训练参数的接口,必须在初始化网络之前调用。主要参数包括: - `parallel_mode`:分布式并行模式,默认为单机模式`ParallelMode.STAND_ALONE`。可选数据并行`ParallelMode.DATA_PARALLEL`及自动并行`ParallelMode.AUTO_PARALLEL`。 -- `parameter_broadcast`: 参数初始化广播开关,`DATA_PARALLEL`和`HYBRID_PARALLEL`模式下,默认值为`True`。 - `mirror_mean`:反向计算时,框架内部会将数据并行参数分散在多台机器的梯度值进行收集,得到全局梯度值后再传入优化器中更新。默认值为`False`,设置为True对应`allreduce_mean`操作,False对应`allreduce_sum`操作。 -- `enable_parallel_optimizer`:开发中特性。打开优化器模型并行开关,通过拆分权重到各卡分别进行更新再同步的方式以提升性能。该特性只在数据并行模式和参数量大于机器数时有效。 +- `enable_parallel_optimizer`:开发中特性。打开优化器模型并行开关,通过拆分权重到各卡分别进行更新再同步的方式以提升性能。该参数目前只在数据并行模式和参数量大于机器数时有效,支持`Lamb`和`Adam`优化器。 > `device_num`和`global_rank`建议采用默认值,框架内会调用HCCL接口获取。 diff --git a/tutorials/source_zh_cn/advanced_use/fuzzer.md b/tutorials/source_zh_cn/advanced_use/fuzzer.md index 1ae638da781a7128c7d4ccae992b3cc627970c6b..9ab97020ace0c7433fcc06a82a7c01da091415f8 100644 --- a/tutorials/source_zh_cn/advanced_use/fuzzer.md +++ b/tutorials/source_zh_cn/advanced_use/fuzzer.md @@ -18,7 +18,7 @@ 传统软件的决策逻辑由代码逻辑决定,传统软件通过代码行覆盖率来判断当前测试是否充分,理想情况下覆盖率越高,代码测试越充分。然而,对于深度神经网络而言,程序的决策逻辑由训练数据、网络模型结构和参数通过某种黑盒机制决定,代码行覆盖率已不足以评估测试的充分性。需要根据深度网络的特点选择更为适合的测试评价准则,指导神经网络进行更为充分的测试,发现更多的边缘错误用例,从而确保模型的通用性、鲁棒性。 -MindArmourd的Fuzzer模块以神经元覆盖率作为测试评价准则。神经元覆盖率,是指通过一组输入观察到的、激活的神经元数量和神经元输出值的范围。我们通过神经元覆盖率来指导输入变异,让输入能够激活更多的神经元,神经元值的分布范围更广,从而探索不同类型的模型输出结果、错误行为。 +MindArmour的Fuzzer模块以神经元覆盖率作为测试评价准则。神经元覆盖率,是指通过一组输入观察到的、激活的神经元数量和神经元输出值的范围。我们通过神经元覆盖率来指导输入变异,让输入能够激活更多的神经元,神经元值的分布范围更广,从而探索不同类型的模型输出结果、错误行为。 这里以LeNet模型,MNIST数据集为例,说明如何使用Fuzzer。 @@ -135,13 +135,13 @@ context.set_context(mode=context.GRAPH_MODE, device_target=cfg.device_target) eval_metrics =['accuracy', 'kmnc', 'attack_success_rate'] ``` -3. 初始化种子队列,种子队列中的每个种子,包含3个值:原始图片、图片标签、变异标记。变异标记初始化时均为0。 +3. 初始化种子队列,种子队列中的每个种子,包含3个值:原始图片、图片标签。 ```python # make initial seeds initial_seeds = [] for img, label in zip(test_images, test_labels): - initial_seeds.append([img, label, 0]) + initial_seeds.append([img, label]) initial_seeds = initial_seeds[:100] ``` @@ -184,7 +184,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target=cfg.device_target) Neural_coverage_KMNC: 0.4797 ``` - Fuzz测试前种子的KMNC神经元覆盖率为8.5%,Fuzz后,KMNC神经元覆盖率为47.97%,神经元覆盖率提升,样本的多样性提升。Fuzz后,模型对于Fuzz生成样本的准确率为96.8%,使用了对抗攻击方法的样本,攻击成功率为79.29%。由于初始化种子、变异方法和相应的参数均为随机选择的,结果有一定的浮动是正常的。 + Fuzz测试前种子的KMNC神经元覆盖率为8.5%,Fuzz后,KMNC神经元覆盖率为47.97%,神经元覆盖率提升,样本的多样性提升。Fuzz后,模型对于Fuzz生成样本的准确率为79.29%,使用了对抗攻击方法的样本,攻击成功率为47.97%。由于初始化种子、变异方法和相应的参数均为随机选择的,结果有一定的浮动是正常的。 原始图片: diff --git a/tutorials/source_zh_cn/advanced_use/gradient_accumulation.md b/tutorials/source_zh_cn/advanced_use/gradient_accumulation.md index dc0a0d6c4d77e2b000fdfd81c782f06e276d05aa..5ea2290e4647d865fdef6ddaf50032efb22a5fe8 100644 --- a/tutorials/source_zh_cn/advanced_use/gradient_accumulation.md +++ b/tutorials/source_zh_cn/advanced_use/gradient_accumulation.md @@ -95,11 +95,12 @@ class TrainForwardBackward(Cell): def __init__(self, network, optimizer, grad_sum, sens=1.0): super(TrainForwardBackward, self).__init__(auto_prefix=False) self.network = network + self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad_sum = grad_sum - self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) + self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.sens = sens self.hyper_map = C.HyperMap() diff --git a/tutorials/source_zh_cn/advanced_use/system_metrics.md b/tutorials/source_zh_cn/advanced_use/hardware_resources.md similarity index 88% rename from tutorials/source_zh_cn/advanced_use/system_metrics.md rename to tutorials/source_zh_cn/advanced_use/hardware_resources.md index f66388ced2b5969a02b56cf23c25dd00b4da73e8..406db6384677c418193258a498b005f84bb33553 100644 --- a/tutorials/source_zh_cn/advanced_use/system_metrics.md +++ b/tutorials/source_zh_cn/advanced_use/hardware_resources.md @@ -11,7 +11,7 @@ -   +   ## 概述 @@ -21,7 +21,7 @@ 昇腾AI处理器看板用于查看当前各芯片的信息。 -![sysmetric_npu.png](./images/sysmetric_npu.png) +![resources_npu.png](./images/resources_npu.png) 图1:硬件资源昇腾AI处理器看板 @@ -30,7 +30,7 @@ - **名称**: 芯片名称。 - **编号**: 芯片编号,从`0`到`7`. - **是否空闲**: 芯片是否空闲。 -- **健康状态**: 芯片健康指数。 +- **健康状态**: 芯片健康状态。 - **IP地址**: 芯片IP地址。 - **已用HBM内存(MB)**: 芯片已用的HBM内存。 - **功率(W)**: 芯片功率。 @@ -42,7 +42,7 @@ CPU看板用于查看当前系统CPU总计及每个核的信息。 -![sysmetric_cpu.png](./images/sysmetric_cpu.png) +![resources_cpu.png](./images/resources_cpu.png) 图2:硬件资源CPU看板 @@ -67,7 +67,7 @@ CPU看板用于查看当前系统CPU总计及每个核的信息。 内存用于查看当前系统内存的信息。 -![sysmetric_mem.png](./images/sysmetric_mem.png) +![resources_mem.png](./images/resources_mem.png) 图3:硬件资源内存看板 diff --git a/tutorials/source_zh_cn/advanced_use/images/bert_model.PNG b/tutorials/source_zh_cn/advanced_use/images/bert_model.PNG new file mode 100644 index 0000000000000000000000000000000000000000..8dddbe6be41ae9ae4cd5fef0ea7e4ab1b98a46eb Binary files /dev/null and b/tutorials/source_zh_cn/advanced_use/images/bert_model.PNG differ diff --git a/tutorials/source_zh_cn/advanced_use/images/finetune.PNG b/tutorials/source_zh_cn/advanced_use/images/finetune.PNG new file mode 100644 index 0000000000000000000000000000000000000000..cbdc6263669c5a40c910af9f5f2483d0d1137455 Binary files /dev/null and b/tutorials/source_zh_cn/advanced_use/images/finetune.PNG differ diff --git a/tutorials/source_zh_cn/advanced_use/images/introduce.PNG b/tutorials/source_zh_cn/advanced_use/images/introduce.PNG new file mode 100644 index 0000000000000000000000000000000000000000..9a59eda78c4b9782c6b4f3838ed7f881f7b72d20 Binary files /dev/null and b/tutorials/source_zh_cn/advanced_use/images/introduce.PNG differ diff --git a/tutorials/source_zh_cn/advanced_use/images/sysmetric_cpu.png b/tutorials/source_zh_cn/advanced_use/images/resources_cpu.png similarity index 100% rename from tutorials/source_zh_cn/advanced_use/images/sysmetric_cpu.png rename to tutorials/source_zh_cn/advanced_use/images/resources_cpu.png diff --git a/tutorials/source_en/advanced_use/images/sysmetric_mem.png b/tutorials/source_zh_cn/advanced_use/images/resources_mem.png similarity index 100% rename from tutorials/source_en/advanced_use/images/sysmetric_mem.png rename to tutorials/source_zh_cn/advanced_use/images/resources_mem.png diff --git a/tutorials/source_zh_cn/advanced_use/images/sysmetric_npu.png b/tutorials/source_zh_cn/advanced_use/images/resources_npu.png similarity index 100% rename from tutorials/source_zh_cn/advanced_use/images/sysmetric_npu.png rename to tutorials/source_zh_cn/advanced_use/images/resources_npu.png diff --git a/tutorials/source_zh_cn/advanced_use/images/synchronization_training_and_evaluation.png b/tutorials/source_zh_cn/advanced_use/images/synchronization_training_and_evaluation.png new file mode 100644 index 0000000000000000000000000000000000000000..cbecb6c9739eaf047c89ea79f9d596a2793e6283 Binary files /dev/null and b/tutorials/source_zh_cn/advanced_use/images/synchronization_training_and_evaluation.png differ diff --git a/tutorials/source_zh_cn/advanced_use/images/sysmetric_mem.png b/tutorials/source_zh_cn/advanced_use/images/sysmetric_mem.png deleted file mode 100644 index eb4af42b58d9f2331b2a517a03af73165a3172ed..0000000000000000000000000000000000000000 Binary files a/tutorials/source_zh_cn/advanced_use/images/sysmetric_mem.png and /dev/null differ diff --git a/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md b/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md index fb8835d381e69ffd78c83416665d94adb21d4d36..12905b823e717b992144dff97a4c1e9b0b15cb0b 100644 --- a/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md +++ b/tutorials/source_zh_cn/advanced_use/mindinsight_commands.md @@ -2,8 +2,6 @@ `Ascend` `GPU` `模型调优` `中级` `高级` - - - [MindInsight相关命令](#mindinsight相关命令) @@ -15,6 +13,8 @@ + + ## 查看命令帮助信息 ```shell diff --git a/tutorials/source_zh_cn/advanced_use/on_device_inference.md b/tutorials/source_zh_cn/advanced_use/on_device_inference.md index 7fd7a75d38330922f800c9a0e07caee5d437fbc1..5676dc2a41297277cfd163f36dfc4294c22f6ce7 100644 --- a/tutorials/source_zh_cn/advanced_use/on_device_inference.md +++ b/tutorials/source_zh_cn/advanced_use/on_device_inference.md @@ -89,10 +89,10 @@ MindSpore Lite的框架主要由Frontend、IR、Backend、Lite RT、Micro构成 bash build.sh -I arm32 ``` -3. 进入源码的`mindspore/output`目录,获取编译结果`mindspore-lite-0.6.0-converter-ubuntu.tar.gz`。执行解压缩命令,获得编译后的工具包`mindspore-lite-0.6.0`: +3. 进入源码的`mindspore/output`目录,获取编译结果`mindspore-lite-0.7.0-converter-ubuntu.tar.gz`。执行解压缩命令,获得编译后的工具包`mindspore-lite-0.7.0`: ```bash - tar -xvf mindspore-lite-0.6.0-converter-ubuntu.tar.gz + tar -xvf mindspore-lite-0.7.0-converter-ubuntu.tar.gz ``` @@ -172,7 +172,7 @@ MindSpore进行端侧模型推理的步骤如下。 else: print("checkpoint file does not exist.") ``` -3. 在`mindspore/output/mindspore-lite-0.6.0/converter`路径下,调用MindSpore端侧转换工具`converter_lite`,将模型文件(`.mindir`)转换为端侧模型文件(`.ms`)。 +3. 在`mindspore/output/mindspore-lite-0.7.0/converter`路径下,调用MindSpore端侧转换工具`converter_lite`,将模型文件(`.mindir`)转换为端侧模型文件(`.ms`)。 ``` ./converter_lite --fmk=MS --modelFile=./lenet.mindir --outputFile=lenet ``` diff --git a/tutorials/source_zh_cn/advanced_use/performance_profiling.md b/tutorials/source_zh_cn/advanced_use/performance_profiling.md index a3e72ecc1a12d29b95f3b088e015db6161994e98..00f65da84e55c7fb1252666a263457bc5bffaab5 100644 --- a/tutorials/source_zh_cn/advanced_use/performance_profiling.md +++ b/tutorials/source_zh_cn/advanced_use/performance_profiling.md @@ -29,6 +29,9 @@ - 启动MindInsight,并通过启动参数指定Profiler文件目录,启动成功后,根据IP和端口访问可视化界面,默认访问地址为 `http://127.0.0.1:8080`。 - 在训练列表找到对应训练,点击性能分析,即可在页面中查看训练性能数据。 +## 环境准备 +在使用性能分析工具之前,要确保后台工具进程(ada)正确启动,要求用户使用root启动ada进程,启动命令为:`/usr/local/Ascend/driver/tools/ada`。 + ## 准备训练脚本 为了收集神经网络的性能数据,需要在训练脚本中添加MindSpore Profiler相关接口。 @@ -183,4 +186,9 @@ Timeline主要包含如下几个部分: ## 规格 - 为了控制性能测试时生成数据的大小,大型网络建议性能调试的step数目限制在10以内。 + + > 如何控制step数目请参考数据准备教程: + > + > + - Timeline数据的解析比较耗时,且一般几个step的数据即足够分析出结果。出于数据解析和UI展示性能的考虑,Profiler最多展示20M数据(对大型网络20M可以显示10+条step的信息)。 diff --git a/tutorials/source_zh_cn/advanced_use/performance_profiling_gpu.md b/tutorials/source_zh_cn/advanced_use/performance_profiling_gpu.md index 484bc37fc4a9b3d718c9f0236cba1ffa9a251a2d..da2ee6df36fb65f3e275b5235f0ed3c48f21401f 100644 --- a/tutorials/source_zh_cn/advanced_use/performance_profiling_gpu.md +++ b/tutorials/source_zh_cn/advanced_use/performance_profiling_gpu.md @@ -23,7 +23,7 @@ > 操作流程可以参考Ascend 910上profiler的操作: > -> https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/performance_profiling.html#id3 +> ## 准备训练脚本 @@ -33,7 +33,37 @@ > 样例代码与Ascend使用方式一致可以参考: > -> https://www.mindspore.cn/tutorial/zh-CN/master/advanced_use/performance_profiling.html#id4 +> + +GPU场景下还可以用自定义callback的方式收集性能数据,示例如下: + +```python +class StopAtStep(Callback): + def __init__(self, start_step, stop_step): + super(StopAtStep, self).__init__() + self.start_step = start_step + self.stop_step = stop_step + self.already_analysed = False + + def step_begin(self, run_context): + cb_params = run_context.original_args() + step_num = cb_params.cur_step_num + if step_num == self.start_step: + self.profiler = Profiler() + + def step_end(self, run_context): + cb_params = run_context.original_args() + step_num = cb_params.cur_step_num + if step_num == self.stop_step and not self.already_analysed: + self.profiler.analyse() + self.already_analysed = True + + def end(self, run_context): + if not self.already_analysed: + self.profiler.analyse() +``` + +以上代码仅供参考,用户可根据所需场景自由实现。 ## 启动MindInsight diff --git a/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md b/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md new file mode 100644 index 0000000000000000000000000000000000000000..a6a68fe3f68e279c844d80b13abadefd81ce31d3 --- /dev/null +++ b/tutorials/source_zh_cn/advanced_use/synchronization_training_and_evaluation.md @@ -0,0 +1,174 @@ +# 同步训练和验证模型 + + + +- [同步训练和验证模型](#同步训练和验证模型) + - [概述](#概述) + - [定义回调函数EvalCallBack](#定义回调函数evalcallback) + - [定义训练网络并执行](#定义训练网络并执行) + - [定义函数绘制不同epoch下模型的精度](#定义函数绘制不同epoch下模型的精度) + - [总结](#总结) + + + + +   + + +## 概述 + +在面对复杂网络时,往往需要进行几十甚至几百次的epoch训练。在训练之前,很难掌握在训练到第几个epoch时,模型的精度能达到满足要求的程度,所以经常会采用一边训练的同时,在相隔固定epoch的位置对模型进行精度验证,并保存相应的模型,等训练完毕后,通过查看对应模型精度的变化就能迅速地挑选出相对最优的模型,本文将采用这种方法,以LeNet网络为样本,进行示例。 + +流程如下: +1. 定义回调函数EvalCallBack,实现同步进行训练和验证。 +2. 定义训练网络并执行。 +3. 将不同epoch下的模型精度绘制出折线图并挑选最优模型。 + +完整示例请参考[notebook](https://gitee.com/mindspore/docs/blob/master/tutorials/notebook/synchronization_training_and_evaluation.ipynb)。 + +## 定义回调函数EvalCallBack + +实现思想:每隔n个epoch验证一次模型精度,由于在自定义函数中实现,如需了解详细用法,请参考[API说明](https://www.mindspore.cn/api/zh-CN/master/api/python/mindspore/mindspore.train.html?highlight=callback#mindspore.train.callback.Callback); + +核心实现:回调函数的`epoch_end`内设置验证点,如下: + +`cur_epoch % eval_per_epoch == 0`:即每`eval_per_epoch`个epoch结束时,验证一次模型精度。 + +- `cur_epoch`:当前训练过程的epoch数值。 +- `eval_per_epoch`:用户自定义数值,即验证频次。 + +其他参数解释: + +- `model`:即是MindSpore中的`Model`函数。 +- `eval_dataset`:验证数据集。 +- `epoch_per_eval`:记录验证模型的精度和相应的epoch数,其数据形式为`{"epoch": [], "acc": []}`。 + +```python +from mindspore.train.callback import Callback + +class EvalCallBack(Callback): + def __init__(self, model, eval_dataset, eval_per_epoch, epoch_per_eval): + self.model = model + self.eval_dataset = eval_dataset + self.eval_per_epoch = eval_per_epoch + self.epoch_per_eval = epoch_per_eval + + def epoch_end(self, run_context): + cb_param = run_context.original_args() + cur_epoch = cb_param.cur_epoch_num + if cur_epoch % self.eval_per_epoch == 0: + acc = self.model.eval(self.eval_dataset, dataset_sink_mode=True) + epoch_per_eval["epoch"].append(cur_epoch) + epoch_per_eval["acc"].append(acc["Accuracy"]) + print(acc) + +``` + +## 定义训练网络并执行 + +在保存模型的参数`CheckpointConfig`中,需计算好单个epoch中的step数,再根据需要进行验证模型精度的频次对应,本次示例为1875个step/epoch,按照每两个epoch验证一次的思想,这里设置`save_checkpoint_steps=eval_per_epoch*1875`,其中变量`eval_per_epoch`等于2。 + +参数解释: + +- `config_ck`:定义保存模型信息。 + - `save_checkpoint_steps`:每多少个step保存一次模型。 + - `keep_checkpoint_max`:设置保存模型数量的上限。 +- `ckpoint_cb`:定义模型保存的名称及路径信息。 +- `model`:定义模型。 +- `model.train`:模型训练函数。 +- `epoch_per_eval`:定义收集`epoch`数和对应模型精度信息的字典。 + +```python +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor +from mindspore.train import Model +from mindspore import context +from mindspore.nn.metrics import Accuracy + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + ckpt_save_dir = "./lenet_ckpt" + eval_per_epoch = 2 + + ... ... + + # need to calculate how many steps are in each epoch,in this example, 1875 steps per epoch + config_ck = CheckpointConfig(save_checkpoint_steps=eval_per_epoch*1875, keep_checkpoint_max=15) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet",directory=ckpt_save_dir, config=config_ck) + model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) + + epoch_per_eval = {"epoch": [], "acc": []} + eval_cb = EvalCallBack(model, eval_data, eval_per_epoch, epoch_per_eval) + + model.train(epoch_size, train_data, callbacks=[ckpoint_cb, LossMonitor(375), eval_cb], + dataset_sink_mode=True) +``` + +输出结果: + + epoch: 1 step: 375, loss is 2.298612 + epoch: 1 step: 750, loss is 2.075152 + epoch: 1 step: 1125, loss is 0.39205977 + epoch: 1 step: 1500, loss is 0.12368304 + epoch: 1 step: 1875, loss is 0.20988345 + epoch: 2 step: 375, loss is 0.20582482 + epoch: 2 step: 750, loss is 0.029070046 + epoch: 2 step: 1125, loss is 0.041760832 + epoch: 2 step: 1500, loss is 0.067035824 + epoch: 2 step: 1875, loss is 0.0050643035 + {'Accuracy': 0.9763621794871795} + + ... ... + + epoch: 9 step: 375, loss is 0.021227183 + epoch: 9 step: 750, loss is 0.005586236 + epoch: 9 step: 1125, loss is 0.029125651 + epoch: 9 step: 1500, loss is 0.00045874066 + epoch: 9 step: 1875, loss is 0.023556218 + epoch: 10 step: 375, loss is 0.0005807788 + epoch: 10 step: 750, loss is 0.02574059 + epoch: 10 step: 1125, loss is 0.108463734 + epoch: 10 step: 1500, loss is 0.01950589 + epoch: 10 step: 1875, loss is 0.10563098 + {'Accuracy': 0.979667467948718} + + +在同一目录找到`lenet_ckpt`文件夹,文件夹中保存了5个模型,和一个计算图相关数据,其结构如下: + +``` +lenet_ckpt +├── checkpoint_lenet-10_1875.ckpt +├── checkpoint_lenet-2_1875.ckpt +├── checkpoint_lenet-4_1875.ckpt +├── checkpoint_lenet-6_1875.ckpt +├── checkpoint_lenet-8_1875.ckpt +└── checkpoint_lenet-graph.meta +``` + +## 定义函数绘制不同epoch下模型的精度 + +定义绘图函数`eval_show`,将`epoch_per_eval`载入到`eval_show`中,绘制出不同`epoch`下模型的验证精度折线图。 + + +```python +import matplotlib.pyplot as plt + +def eval_show(epoch_per_eval): + plt.xlabel("epoch number") + plt.ylabel("Model accuracy") + plt.title("Model accuracy variation chart") + plt.plot(epoch_per_eval["epoch"], epoch_per_eval["acc"], "red") + plt.show() + +eval_show(epoch_per_eval) +``` + +输出结果: + +![png](./images/synchronization_training_and_evaluation.png) + + +从上图可以一目了然地挑选出需要的最优模型。 + +## 总结 + +本次使用MNIST数据集通过卷积神经网络LeNet5进行训练,着重介绍了在进行模型训练的同时进行模型的验证,保存对应`epoch`的模型,并从中挑选出最优模型的方法。 diff --git a/tutorials/source_zh_cn/advanced_use/visualization_tutorials.rst b/tutorials/source_zh_cn/advanced_use/visualization_tutorials.rst index 60bea522f6194c6b1a8459ffacab5f6c11ea3f9c..310e3b37dff39271d6027df31296df92d1277d8d 100644 --- a/tutorials/source_zh_cn/advanced_use/visualization_tutorials.rst +++ b/tutorials/source_zh_cn/advanced_use/visualization_tutorials.rst @@ -7,7 +7,7 @@ summary_record dashboard lineage_and_scalars_comparision - system_metrics + hardware_resources performance_profiling performance_profiling_gpu mindinsight_commands diff --git a/tutorials/source_zh_cn/index.rst b/tutorials/source_zh_cn/index.rst index 33171ea18ab9b7a45a08c40f6aeea694c4cc7efa..c1551225334e0208d3589dd2bfec89b39b26cbaa 100644 --- a/tutorials/source_zh_cn/index.rst +++ b/tutorials/source_zh_cn/index.rst @@ -33,6 +33,8 @@ MindSpore教程 advanced_use/computer_vision_application advanced_use/nlp_application advanced_use/second_order_optimizer_for_resnet50_application + advanced_use/synchronization_training_and_evaluation + advanced_use/bert_poetry .. toctree:: :glob: diff --git a/tutorials/source_zh_cn/quick_start/linear_regression.md b/tutorials/source_zh_cn/quick_start/linear_regression.md index 18ea005e760af3f6c6cebb7d218811c6ddb65d71..9f7ab617c692f9e04a200e87f2a5203a9d1bbae8 100644 --- a/tutorials/source_zh_cn/quick_start/linear_regression.md +++ b/tutorials/source_zh_cn/quick_start/linear_regression.md @@ -297,7 +297,7 @@ class GradWrap(nn.Cell): def construct(self, data, label): weights = self.weights - return C.GradOperation('get_by_list', get_by_list=True) \ + return C.GradOperation(get_by_list=True) \ (self.network, weights)(data, label) ``` diff --git a/tutorials/source_zh_cn/use/custom_operator.md b/tutorials/source_zh_cn/use/custom_operator.md index 9eb6dec635b3bbe89d9a723570fc7e6601bc450c..5ca1b6d2103d261a5c0c5fd643583d12c13925ce 100644 --- a/tutorials/source_zh_cn/use/custom_operator.md +++ b/tutorials/source_zh_cn/use/custom_operator.md @@ -232,7 +232,7 @@ def test_grad_net(): x = np.array([1.0, 4.0, 9.0]).astype(np.float32) sens = np.array([1.0, 1.0, 1.0]).astype(np.float32) square = Net() - grad = C.GradOperation('grad_with_sens', sens_param=True) + grad = C.GradOperation(sens_param=True) dx = grad(square)(Tensor(x), Tensor(sens)) print("x: ", x) print("dx: ", dx) diff --git a/tutorials/tutorial_code/gradient_accumulation/train.py b/tutorials/tutorial_code/gradient_accumulation/train.py index c99a6a876a4dd6ded366d4d2fa2664925ff917b2..123bf80a6c88bf324e8b1465e1029b8af68452d8 100644 --- a/tutorials/tutorial_code/gradient_accumulation/train.py +++ b/tutorials/tutorial_code/gradient_accumulation/train.py @@ -1,6 +1,5 @@ import argparse import os -from collections.abc import Iterable import mindspore.nn as nn from mindspore import ParameterTuple @@ -37,11 +36,12 @@ class TrainForwardBackward(Cell): def __init__(self, network, optimizer, grad_sum, sens=1.0): super(TrainForwardBackward, self).__init__(auto_prefix=False) self.network = network + self.network.set_grad() self.network.add_flags(defer_inline=True) self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad_sum = grad_sum - self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) + self.grad = C.GradOperation(get_by_list=True, sens_param=True) self.sens = sens self.hyper_map = C.HyperMap() @@ -88,17 +88,6 @@ class GradientAccumulation: self._train_optim = self._build_train_optim() self._train_clear = self._build_train_clear() - @staticmethod - def _transform_callbacks(callbacks): - """Transform callback to a list.""" - if callbacks is None: - return [] - - if isinstance(callbacks, Iterable): - return list(callbacks) - - return [callbacks] - def _build_train_forward_backward_network(self): """Build forward and backward network""" network = self._network diff --git a/tutorials/tutorial_code/linear_regression.py b/tutorials/tutorial_code/linear_regression.py index 952607a19d43e1ed0e739d8f606681ab65881d60..bfed5b389e5d1de61e14336c14b5ab1663437784 100644 --- a/tutorials/tutorial_code/linear_regression.py +++ b/tutorials/tutorial_code/linear_regression.py @@ -41,7 +41,7 @@ class GradWrap(nn.Cell): def construct(self, data, label): weights = self.weights - return C.GradOperation('get_by_list', get_by_list=True) \ + return C.GradOperation(get_by_list=True) \ (self.network, weights)(data, label) # Initializing model functions