From 17ba0a60d7a467801f80421295b4989663c230a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E9=9B=B7?= Date: Thu, 22 Oct 2020 17:07:49 +0800 Subject: [PATCH 1/3] unified operator of programming guide 3rd --- .../constraints_on_network_construction.md | 4 ++-- .../constraints_on_network_construction.md | 4 ++-- .../computer_vision_application.ipynb | 2 -- .../customized_debugging_information.ipynb | 1 - .../notebook/debugging_in_pynative_mode.ipynb | 5 ++--- ...nsight_image_histogram_scalar_tensor.ipynb | 22 +++++++++---------- ...sight_model_lineage_and_data_lineage.ipynb | 1 - tutorials/notebook/mixed_precision.ipynb | 15 ++++++------- tutorials/notebook/nlp_application.ipynb | 8 +++---- 9 files changed, 28 insertions(+), 34 deletions(-) diff --git a/docs/note/source_en/constraints_on_network_construction.md b/docs/note/source_en/constraints_on_network_construction.md index 1fa7869dc0..9086c60ed3 100644 --- a/docs/note/source_en/constraints_on_network_construction.md +++ b/docs/note/source_en/constraints_on_network_construction.md @@ -239,7 +239,7 @@ Currently, the following syntax is not supported in network constructors: class ExpandDimsTest(Cell): def __init__(self): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() def construct(self, input_x, input_axis): return self.expandDims(input_x, input_axis) @@ -254,7 +254,7 @@ Currently, the following syntax is not supported in network constructors: class ExpandDimsTest(Cell): def __init__(self, axis): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() self.axis = axis def construct(self, input_x): diff --git a/docs/note/source_zh_cn/constraints_on_network_construction.md b/docs/note/source_zh_cn/constraints_on_network_construction.md index a56bff5b1e..d7e2b8d58d 100644 --- a/docs/note/source_zh_cn/constraints_on_network_construction.md +++ b/docs/note/source_zh_cn/constraints_on_network_construction.md @@ -238,7 +238,7 @@ tuple也支持切片取值操作, 但不支持切片类型为Tensor类型,支 class ExpandDimsTest(Cell): def __init__(self): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() def construct(self, input_x, input_axis): return self.expandDims(input_x, input_axis) @@ -253,7 +253,7 @@ tuple也支持切片取值操作, 但不支持切片类型为Tensor类型,支 class ExpandDimsTest(Cell): def __init__(self, axis): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() self.axis = axis def construct(self, input_x): diff --git a/tutorials/notebook/computer_vision_application.ipynb b/tutorials/notebook/computer_vision_application.ipynb index adfeec2c49..78ae171c00 100644 --- a/tutorials/notebook/computer_vision_application.ipynb +++ b/tutorials/notebook/computer_vision_application.ipynb @@ -144,7 +144,6 @@ "import random\n", "import argparse\n", "from mindspore import Tensor\n", - "from mindspore.ops import operations as P\n", "\n", "# Set Training Parameters \n", "random.seed(1)\n", @@ -212,7 +211,6 @@ "source": [ "import mindspore.nn as nn\n", "import mindspore.common.dtype as mstype\n", - "import mindspore.ops.functional as F\n", "import mindspore.dataset as ds\n", "import mindspore.dataset.vision.c_transforms as C\n", "import mindspore.dataset.transforms.c_transforms as C2\n", diff --git a/tutorials/notebook/customized_debugging_information.ipynb b/tutorials/notebook/customized_debugging_information.ipynb index 0ecf5695d2..dafdfc455f 100644 --- a/tutorials/notebook/customized_debugging_information.ipynb +++ b/tutorials/notebook/customized_debugging_information.ipynb @@ -153,7 +153,6 @@ "source": [ "from mindspore.common.initializer import TruncatedNormal\n", "import mindspore.nn as nn\n", - "from mindspore.ops import operations as P\n", "\n", "def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):\n", " \"\"\"Conv layer weight initial.\"\"\"\n", diff --git a/tutorials/notebook/debugging_in_pynative_mode.ipynb b/tutorials/notebook/debugging_in_pynative_mode.ipynb index c35d904989..649cb059a8 100644 --- a/tutorials/notebook/debugging_in_pynative_mode.ipynb +++ b/tutorials/notebook/debugging_in_pynative_mode.ipynb @@ -253,8 +253,7 @@ "outputs": [], "source": [ "import mindspore.nn as nn\n", - "import mindspore.ops.operations as P\n", - "from mindspore.ops import composite as C\n", + "import mindspore.ops as ops", "from mindspore.common import dtype as mstype\n", "from mindspore.common.initializer import TruncatedNormal\n", "from mindspore.nn import Dense\n", @@ -373,7 +372,7 @@ "\n", " def construct(self, x, label):\n", " weights = self.weights\n", - " return C.GradOperation(get_by_list=True)(self.network, weights)(x, label)" + " return ops.GradOperation(get_by_list=True)(self.network, weights)(x, label)" ] }, { diff --git a/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb b/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb index 95934934b7..7f644a9642 100644 --- a/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb @@ -277,7 +277,7 @@ " \n", " ```python\n", " # Init ImageSummary\n", - " self.image_summary = P.ImageSummary()\n", + " self.image_summary = ops.ImageSummary()\n", " ```\n", " \n", " 2. 在 `construct` 方法中使用 `ImageSummary` 算子记录输入图像。其中 \"Image\" 为该数据的名称,MindInsight在展示时,会将该名称展示出来以方便识别是哪个数据。\n", @@ -293,7 +293,7 @@ " \n", " ```python\n", " # Init TensorSummary\n", - " self.tensor_summary = P.TensorSummary()\n", + " self.tensor_summary = ops.TensorSummary()\n", " ```\n", " \n", " 2. 在`construct`方法中使用`TensorSummary`算子记录张量数据。其中\"Tensor\"为该数据的名称。\n", @@ -319,7 +319,7 @@ "source": [ "import mindspore.nn as nn\n", "from mindspore.common.initializer import TruncatedNormal\n", - "from mindspore.ops import operations as P\n", + "import mindspore.ops as ops\n", "\n", "def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode=\"valid\"):\n", " weight = weight_variable()\n", @@ -348,15 +348,15 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init TensorSummary\n", - " self.tensor_summary = P.TensorSummary()\n", + " self.tensor_summary = ops.TensorSummary()\n", " # Init ImageSummary\n", - " self.image_summary = P.ImageSummary()\n", + " self.image_summary = ops.ImageSummary()\n", "\n", " def construct(self, x):\n", " # Record image by Summary operator\n", @@ -747,7 +747,7 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", @@ -983,13 +983,13 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init TensorSummary\n", - " self.tensor_summary = P.TensorSummary()\n", + " self.tensor_summary = ops.TensorSummary()\n", "\n", " def construct(self, x):\n", " x = self.conv1(x)\n", @@ -1119,13 +1119,13 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init ImageSummary\n", - " self.image_summary = P.ImageSummary()\n", + " self.image_summary = ops.ImageSummary()\n", "\n", " def construct(self, x):\n", " # Record image by Summary operator\n", diff --git a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb index d7b558c859..432e48ebbb 100644 --- a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb @@ -229,7 +229,6 @@ "metadata": {}, "outputs": [], "source": [ - "from mindspore.ops import operations as P\n", "import mindspore.nn as nn\n", "from mindspore.common.initializer import TruncatedNormal\n", "\n", diff --git a/tutorials/notebook/mixed_precision.ipynb b/tutorials/notebook/mixed_precision.ipynb index d74e6e11f9..74c1f6c12c 100644 --- a/tutorials/notebook/mixed_precision.ipynb +++ b/tutorials/notebook/mixed_precision.ipynb @@ -404,22 +404,21 @@ "outputs": [], "source": [ "from mindspore.nn.loss.loss import _Loss\n", - "from mindspore.ops import operations as P\n", - "from mindspore.ops import functional as F\n", + "import mindspore.ops as ops\n", "from mindspore import Tensor\n", "import mindspore.nn as nn\n", "\n", "class CrossEntropy(_Loss):\n", " def __init__(self, smooth_factor=0., num_classes=1001):\n", " super(CrossEntropy, self).__init__()\n", - " self.onehot = P.OneHot()\n", + " self.onehot = ops.OneHot()\n", " self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)\n", " self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)\n", " self.ce = nn.SoftmaxCrossEntropyWithLogits()\n", - " self.mean = P.ReduceMean(False)\n", + " self.mean = ops.ReduceMean(False)\n", "\n", " def construct(self, logit, label):\n", - " one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)\n", + " one_hot_label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value)\n", " loss = self.ce(logit, one_hot_label)\n", " loss = self.mean(loss, 0)\n", " return loss" @@ -520,7 +519,7 @@ " if self.down_sample:\n", " self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),\n", " _bn(out_channel)])\n", - " self.add = P.TensorAdd()\n", + " self.add = ops.TensorAdd()\n", "\n", " def construct(self, x):\n", " identity = x\n", @@ -560,7 +559,7 @@ "\n", " self.conv1 = _conv7x7(3, 64, stride=2)\n", " self.bn1 = _bn(64)\n", - " self.relu = P.ReLU()\n", + " self.relu = ops.ReLU()\n", " self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")\n", "\n", " self.layer1 = self._make_layer(block,\n", @@ -584,7 +583,7 @@ " out_channel=out_channels[3],\n", " stride=strides[3])\n", "\n", - " self.mean = P.ReduceMean(keep_dims=True)\n", + " self.mean = ops.ReduceMean(keep_dims=True)\n", " self.flatten = nn.Flatten()\n", " self.end_point = _fc(out_channels[3], num_classes)\n", "\n", diff --git a/tutorials/notebook/nlp_application.ipynb b/tutorials/notebook/nlp_application.ipynb index d2df6a718d..1155b7351e 100644 --- a/tutorials/notebook/nlp_application.ipynb +++ b/tutorials/notebook/nlp_application.ipynb @@ -679,7 +679,7 @@ "\n", "from mindspore import Tensor, nn, context, Parameter, ParameterTuple\n", "from mindspore.common.initializer import initializer\n", - "from mindspore.ops import operations as P" + "import mindspore.ops as ops" ] }, { @@ -787,7 +787,7 @@ " super(StackLSTM, self).__init__()\n", " self.num_layers = num_layers\n", " self.batch_first = batch_first\n", - " self.transpose = P.Transpose()\n", + " self.transpose = ops.Transpose()\n", "\n", " # direction number\n", " num_directions = 2 if bidirectional else 1\n", @@ -883,7 +883,7 @@ " embed_size,\n", " embedding_table=weight)\n", " self.embedding.embedding_table.requires_grad = False\n", - " self.trans = P.Transpose()\n", + " self.trans = ops.Transpose()\n", " self.perm = (1, 0, 2)\n", "\n", " if context.get_context(\"device_target\") in STACK_LSTM_DEVICE:\n", @@ -905,7 +905,7 @@ " dropout=0.0)\n", " self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional)\n", "\n", - " self.concat = P.Concat(1)\n", + " self.concat = ops.Concat(1)\n", " if bidirectional:\n", " self.decoder = nn.Dense(num_hiddens * 4, num_classes)\n", " else:\n", -- Gitee From 12b9b078903fa67ce85c019b51e02d304eb4a7b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E9=9B=B7?= Date: Wed, 21 Oct 2020 16:51:54 +0800 Subject: [PATCH 2/3] unified operator of programming guide 2rd --- docs/faq/source_en/faq.md | 11 +- docs/faq/source_zh_cn/faq.md | 10 +- .../apply_gradient_accumulation.md | 42 ++-- .../advanced_use/custom_debugging_info.md | 59 +++--- .../advanced_use/custom_operator_ascend.md | 35 ++-- .../advanced_use/debug_in_pynative_mode.md | 63 +++--- .../distributed_training_ascend.md | 68 ++++--- .../enable_graph_kernel_fusion.md | 13 +- .../advanced_use/enable_mixed_precision.md | 15 +- .../migrate_3rd_scripts_mindconverter.md | 23 +-- .../save_load_model_hybrid_parallel.md | 122 ++++++------ .../source_en/advanced_use/summary_record.md | 62 +++--- .../load_model_for_inference_and_transfer.md | 21 +- .../apply_deep_probability_programming.md | 172 ++++++++++------- .../apply_gradient_accumulation.md | 41 ++-- .../apply_host_device_training.md | 10 +- .../apply_parameter_server_training.md | 16 +- .../apply_quantization_aware_training.md | 39 ++-- .../advanced_use/convert_dataset.md | 7 +- .../advanced_use/custom_debugging_info.md | 37 ++-- .../advanced_use/custom_operator_ascend.md | 35 ++-- .../advanced_use/debug_in_pynative_mode.md | 61 +++--- .../distributed_training_ascend.md | 54 +++--- .../enable_graph_kernel_fusion.md | 12 +- .../advanced_use/enable_mixed_precision.md | 13 +- .../migrate_3rd_scripts_mindconverter.md | 18 +- .../save_load_model_hybrid_parallel.md | 179 +++++++++--------- .../advanced_use/summary_record.md | 60 +++--- .../quick_start/linear_regression.md | 54 ++---- .../source_zh_cn/quick_start/quick_start.md | 36 ++-- .../source_zh_cn/quick_start/quick_video.md | 9 +- .../quick_start/quick_video/ascend910.md | 2 +- .../ascend910_operator_development.md | 2 +- .../quick_start/quick_video/community.md | 2 +- .../quick_start/quick_video/cpu_ubuntu.md | 2 +- .../quick_start/quick_video/cpu_windows.md | 2 +- .../quick_video/customized_debugging.md | 2 +- .../quick_start/quick_video/gpu.md | 2 +- .../quick_start/quick_video/inference.md | 2 +- ..._the_dataset_and_converting_data_format.md | 2 +- .../quick_video/mindInsight_dashboard.md | 2 +- ...nsight_installation_and_common_commands.md | 2 +- ...Insight_lineage_and_scalars_comparision.md | 2 +- .../mindInsight_performance_profiling.md | 3 +- .../quick_video/quick_start_video.md | 2 +- .../saving_and_loading_model_parameters.md | 2 +- .../source_zh_cn/use/load_dataset_image.md | 2 +- .../source_zh_cn/use/load_dataset_text.md | 14 +- .../load_model_for_inference_and_transfer.md | 19 +- .../source_zh_cn/use/publish_model.md | 9 +- .../training/source_zh_cn/use/save_model.md | 3 +- .../distributed_training/resnet.py | 16 +- .../resnet50_distributed_training.py | 45 ++--- .../gradient_accumulation/train.py | 55 +++--- .../tutorial_code/resnet/cifar_resnet50.py | 4 - tutorials/tutorial_code/resnet/resnet.py | 16 +- .../tutorial_code/sample_for_cloud/resnet.py | 12 +- 57 files changed, 861 insertions(+), 762 deletions(-) diff --git a/docs/faq/source_en/faq.md b/docs/faq/source_en/faq.md index 02460fa4f3..20763e8fcf 100644 --- a/docs/faq/source_en/faq.md +++ b/docs/faq/source_en/faq.md @@ -105,16 +105,17 @@ A:You can write the frequently-used environment settings to `~/.bash_profile` ### Verifying the Installation Q: After MindSpore is installed on a CPU of a PC, an error message `the pointer[session] is null` is displayed during code verification. The specific code is as follows. How do I verify whether MindSpore is successfully installed? + ```python import numpy as np from mindspore import Tensor -from mindspore.ops import functional as F +import mindspore.ops as ops import mindspore.context as context context.set_context(device_target="Ascend") x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) -print(F.tensor_add(x,y)) +print(ops.tensor_add(x,y)) ``` A: After MindSpore is installed on a CPU hardware platform, run the `python -c'import mindspore'` command to check whether MindSpore is successfully installed. If no error message such as `No module named'mindspore'` is displayed, MindSpore is successfully installed. The verification code is used only to verify whether a Ascend platform is successfully installed. @@ -201,7 +202,7 @@ A: The MindSpore CPU version can be installed on Windows 10. For details about t Q: What can I do if an error message `wrong shape of image` is displayed when I use a model trained by MindSpore to perform prediction on a `28 x 28` digital image with white text on a black background? -A: The MNIST gray scale image dataset is used for MindSpore training. Therefore, when the model is used, the data must be set to a `28 x 28 `gray scale image, that is, a single channel. +A: The MNIST gray scale image dataset is used for MindSpore training. Therefore, when the model is used, the data must be set to a `28 x 28` gray scale image, that is, a single channel.
@@ -224,7 +225,7 @@ context.set_context( mode=cintext.GRAPH_MODE, device_target='ascend') input_tensor=Tensor(np.array([[2,2],[2,2]]),mindspore.float32) -expand_dims=P.ExpandDims() +expand_dims=ops.ExpandDims() output=expand_dims(input_tensor,0) ``` @@ -298,4 +299,4 @@ A: For details about script or model migration, please visit the [MindSpore offi Q: Does MindSpore provide open-source e-commerce datasets? -A: No. Please stay tuned for updates on the [MindSpore official website](https://www.mindspore.cn/en). \ No newline at end of file +A: No. Please stay tuned for updates on the [MindSpore official website](https://www.mindspore.cn/en). diff --git a/docs/faq/source_zh_cn/faq.md b/docs/faq/source_zh_cn/faq.md index 44c918ac4f..3c482c834e 100644 --- a/docs/faq/source_zh_cn/faq.md +++ b/docs/faq/source_zh_cn/faq.md @@ -112,16 +112,17 @@ A: 常用的环境变量设置写入到`~/.bash_profile` 或 `~/.bashrc`中, ### 安装验证 Q:个人电脑CPU环境安装MindSpore后验证代码时报错:`the pointer[session] is null`,具体代码如下,该如何验证是否安装成功呢? + ```python import numpy as np from mindspore import Tensor -from mindspore.ops import functional as F +import mindspore.ops as ops import mindspore.context as context context.set_context(device_target="Ascend") x = Tensor(np.ones([1,3,3,4]).astype(np.float32)) y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) -print(F.tensor_add(x,y)) +print(ops.tensor_add(x,y)) ``` A:CPU硬件平台安装MindSpore后测试是否安装成功,只需要执行命令:`python -c 'import mindspore'`,如果没有显示`No module named 'mindspore'`等错误即安装成功。问题中的验证代码仅用于验证Ascend平台安装是否成功。 @@ -243,6 +244,7 @@ Q:MindSpore如何实现早停功能? A:可以自定义`callback`方法实现早停功能。 例子:当loss降到一定数值后,停止训练。 + ```python class EarlyStop(Callback): def __init__(self, control_loss=1): @@ -287,7 +289,7 @@ context.set_context( mode=cintext.GRAPH_MODE, device_target='ascend') input_tensor=Tensor(np.array([[2,2],[2,2]]),mindspore.float32) -expand_dims=P.ExpandDims() +expand_dims=ops.ExpandDims() output=expand_dims(input_tensor,0) ``` @@ -372,4 +374,4 @@ A:关于脚本或者模型迁移,可以查询MindSpore官网中关于[网络 Q:MindSpore是否附带开源电商类数据集? -A:暂时还没有,可以持续关注[MindSpore官网](https://www.mindspore.cn)。 \ No newline at end of file +A:暂时还没有,可以持续关注[MindSpore官网](https://www.mindspore.cn)。 diff --git a/tutorials/training/source_en/advanced_use/apply_gradient_accumulation.md b/tutorials/training/source_en/advanced_use/apply_gradient_accumulation.md index 2fd3ab6c5c..aa4a4921f8 100644 --- a/tutorials/training/source_en/advanced_use/apply_gradient_accumulation.md +++ b/tutorials/training/source_en/advanced_use/apply_gradient_accumulation.md @@ -36,6 +36,7 @@ The ultimate objective is to achieve the same effect as training with N x mini-b The MNIST dataset is used as an example to describe how to customize a simple model to implement gradient accumulation. ### Importing Library Files + The following are the required public modules and MindSpore modules and library files. ```python @@ -47,9 +48,7 @@ import mindspore.nn as nn from mindspore import ParameterTuple from mindspore import context from mindspore.nn import Cell -from mindspore.ops import composite as C -from mindspore.ops import functional as F -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.train.dataset_helper import DatasetHelper from mindspore.train.serialization import save_checkpoint from model_zoo.official.cv.lenet.src.dataset import create_dataset @@ -65,20 +64,22 @@ Use the `MnistDataset` API provided by the dataset of MindSpore to load the MNIS The following uses the LeNet network as an example. You can also use other networks, such as ResNet-50 and BERT. The code is imported from [lenet.py]() in the lenet directory of model_zoo. ### Defining the Training Model + The training process is divided into three parts: forward and backward training, parameter update, and accumulated gradient clearance. + - `TrainForwardBackward` calculates the loss and gradient, and uses grad_sum to implement gradient accumulation. - `TrainOptim` updates parameters. - `TrainClear` clears the gradient accumulation variable grad_sum. ```python -_sum_op = C.MultitypeFuncGraph("grad_sum_op") -_clear_op = C.MultitypeFuncGraph("clear_op") +_sum_op = ops.MultitypeFuncGraph("grad_sum_op") +_clear_op = ops.MultitypeFuncGraph("clear_op") @_sum_op.register("Tensor", "Tensor") def _cumulative_gard(grad_sum, grad): """Apply gard sum to cumulative gradient.""" - add = P.AssignAdd() + add = ops.AssignAdd() return add(grad_sum, grad) @@ -86,7 +87,7 @@ def _cumulative_gard(grad_sum, grad): def _clear_grad_sum(grad_sum, zero): """Apply zero to clear grad_sum.""" success = True - success = F.depend(success, F.assign(grad_sum, zero)) + success = ops.depend(success, ops.assign(grad_sum, zero)) return success @@ -99,16 +100,16 @@ class TrainForwardBackward(Cell): self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad_sum = grad_sum - self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens - self.hyper_map = C.HyperMap() + self.hyper_map = ops.HyperMap() def construct(self, *inputs): weights = self.weights loss = self.network(*inputs) - sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) + sens = ops.Fill()(ops.DType()(loss), ops.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(*inputs, sens) - return F.depend(loss, self.hyper_map(F.partial(_sum_op), self.grad_sum, grads)) + return ops.depend(loss, self.hyper_map(ops.partial(_sum_op), self.grad_sum, grads)) class TrainOptim(Cell): @@ -126,16 +127,16 @@ class TrainClear(Cell): super(TrainClear, self).__init__(auto_prefix=False) self.grad_sum = grad_sum self.zeros = zeros - self.hyper_map = C.HyperMap() + self.hyper_map = ops.HyperMap() def construct(self): - success = self.hyper_map(F.partial(_clear_op), self.grad_sum, self.zeros) + success = self.hyper_map(ops.partial(_clear_op), self.grad_sum, self.zeros) return success ``` ### Defining the Training Process -Each mini-batch calculates the loss and gradient through forward and backward training, and uses mini_steps to control the accumulated times before each parameter update. After the number of accumulation times is reached, the parameter is updated and the accumulated gradient variable is cleared. +Each mini-batch calculates the loss and gradient through forward and backward training, and uses mini_steps to control the accumulated times before each parameter update. After the number of accumulation times is reached, the parameter is updated and the accumulated gradient variable is cleared. ```python class GradientAccumulation: @@ -202,6 +203,7 @@ class GradientAccumulation: ``` ### Training and Saving the Model + Call the network, optimizer, and loss function, and then customize the `train_process` API of `GradientAccumulation` to train the model. ```python @@ -226,13 +228,15 @@ if __name__ == "__main__": ``` ## Experiment Result + After 10 epochs, the accuracy on the test set is about 96.31%. -**Training Execution** +**Training Execution:** + 1. Run the training code and view the running result. ```shell - $ python train.py --data_path=./MNIST_Data + python train.py --data_path=./MNIST_Data ``` The output is as follows. The loss value decreases during training. @@ -252,12 +256,12 @@ After 10 epochs, the accuracy on the test set is about 96.31%. The model file `gradient_accumulation.ckpt` is saved during training. -**Model Validation** +**Model Validation:** Use the saved checkpoint file to load the validation dataset through [eval.py]() in the lenet directory of model_zoo. ```shell -$ python eval.py --data_path=./MNIST_Data --ckpt_path=./gradient_accumulation.ckpt --device_target=GPU +python eval.py --data_path=./MNIST_Data --ckpt_path=./gradient_accumulation.ckpt --device_target=GPU ``` The output is as follows. The accuracy of the validation dataset is about 96.31%, which is the same as the result when the value of batch_size is 32. @@ -265,4 +269,4 @@ The output is as follows. The accuracy of the validation dataset is about 96.31% ```shell ============== Starting Testing ============== ============== {'Accuracy': 0.9631730769230769} ============== -``` \ No newline at end of file +``` diff --git a/tutorials/training/source_en/advanced_use/custom_debugging_info.md b/tutorials/training/source_en/advanced_use/custom_debugging_info.md index e2d38cdbbd..6ca6a3dd80 100644 --- a/tutorials/training/source_en/advanced_use/custom_debugging_info.md +++ b/tutorials/training/source_en/advanced_use/custom_debugging_info.md @@ -24,7 +24,7 @@ This section describes how to use the customized capabilities provided by MindSpore, such as `callback`, `metrics`,`Print` operator and log printing, to help you quickly debug the training network. -## Introduction to Callback +## Introduction to Callback Callback here is not a function but a class. You can use callback to observe the internal status and related information of the network during training or perform specific actions in a specific period. For example, you can monitor the loss, save model parameters, dynamically adjust parameters, and terminate training tasks in advance. @@ -39,7 +39,7 @@ MindSpore provides the callback capabilities to allow users to insert customized Usage: Transfer the callback object in the `model.train` method. The callback object can be a list, for example: ```python -ckpt_cb = ModelCheckpoint() +ckpt_cb = ModelCheckpoint() loss_cb = LossMonitor() summary_cb = SummaryCollector(summary_dir='./summary_dir') model.train(epoch, dataset, callbacks=[ckpt_cb, loss_cb, summary_cb]) @@ -58,7 +58,7 @@ The callback base class is defined as follows: ```python class Callback(): - """Callback base class""" + """Callback base class""" def begin(self, run_context): """Called once before the network executing.""" pass @@ -68,11 +68,11 @@ class Callback(): pass def epoch_end(self, run_context): - """Called after each epoch finished.""" + """Called after each epoch finished.""" pass def step_begin(self, run_context): - """Called before each epoch beginning.""" + """Called before each epoch beginning.""" pass def step_end(self, run_context): @@ -129,7 +129,7 @@ Here are two examples to further understand the usage of custom Callback. The output is as follows: - ``` + ```text epoch: 20 step: 32 loss: 2.298344373703003 ``` @@ -221,16 +221,20 @@ print('Accuracy is ', accuracy) ``` The output is as follows: -``` + +```text Accuracy is 0.6667 ``` + ## MindSpore Print Operator -MindSpore-developed `Print` operator is used to print the tensors or character strings input by users. Multiple strings, multiple tensors, and a combination of tensors and strings are supported, which are separated by comma (,). -The use method of MindSpore `Print` operator is the same that of other operators. You need to assert MindSpore `Print` operator in `__init__` and invoke using `construct`. The following is an example. + +MindSpore-developed `Print` operator is used to print the tensors or character strings input by users. Multiple strings, multiple tensors, and a combination of tensors and strings are supported, which are separated by comma (,). +The use method of MindSpore `Print` operator is the same that of other operators. You need to assert MindSpore `Print` operator in `__init__` and invoke using `construct`. The following is an example. + ```python import numpy as np from mindspore import Tensor -from mindspore.ops import operations as P +import mindspore.ops as ops import mindspore.nn as nn import mindspore.context as context @@ -239,7 +243,7 @@ context.set_context(mode=context.GRAPH_MODE) class PrintDemo(nn.Cell): def __init__(self): super(PrintDemo, self).__init__() - self.print = P.Print() + self.print = ops.Print() def construct(self, x, y): self.print('print Tensor x and Tensor y:', x, y) @@ -250,8 +254,10 @@ y = Tensor(np.ones([2, 2]).astype(np.int32)) net = PrintDemo() output = net(x, y) ``` + The output is as follows: -``` + +```text print Tensor x and Tensor y: Tensor shape:[[const vector][2, 1]]Int32 val:[[1] @@ -313,7 +319,7 @@ The input and output of the operator can be saved for debugging through the data You can set `context.set_context(reserve_class_name_in_scope=False)` in your training script to avoid dump failure because of file name is too long. 4. Parse the Dump file - + Call `numpy.fromfile` to parse dump data file. ### Asynchronous Dump @@ -321,6 +327,7 @@ The input and output of the operator can be saved for debugging through the data 1. Create dump json file:`data_dump.json`. The name and location of the JSON file can be customized. + ```json { "common_dump_settings": { @@ -369,30 +376,32 @@ The input and output of the operator can be saved for debugging through the data ``` ## Log-related Environment Variables and Configurations + MindSpore uses glog to output logs. The following environment variables are commonly used: - `GLOG_v` - - The environment variable specifies the log level. + + The environment variable specifies the log level. The default value is 2, indicating the WARNING level. The values are as follows: 0: DEBUG; 1: INFO; 2: WARNING; 3: ERROR. -- `GLOG_logtostderr` +- `GLOG_logtostderr` The environment variable specifies the log output mode. When `GLOG_logtostderr` is set to 1, logs are output to the screen. If the value is set to 0, logs are output to a file. The default value is 1. - `GLOG_log_dir` - - The environment variable specifies the log output path. - If `GLOG_logtostderr` is set to 0, value of this variable must be specified. - If `GLOG_log_dir is` specified and the value of `GLOG_logtostderr` is 1, logs are output to the screen but not to a file. - Logs of C++ and Python will be output to different files. The file name of C++ log complies with the naming rule of `GLOG` log file. Here, the name is `mindspore.MachineName.UserName.log.LogLevel.Timestamp`. The file name of Python log is `mindspore.log`. -- `MS_SUBMODULE_LOG_v` + The environment variable specifies the log output path. + If `GLOG_logtostderr` is set to 0, value of this variable must be specified. + If `GLOG_log_dir is` specified and the value of `GLOG_logtostderr` is 1, logs are output to the screen but not to a file. + Logs of C++ and Python will be output to different files. The file name of C++ log complies with the naming rule of `GLOG` log file. Here, the name is `mindspore.MachineName.UserName.log.LogLevel.Timestamp`. + The file name of Python log is `mindspore.log`. + +- `MS_SUBMODULE_LOG_v` The environment variable specifies log levels of C++ sub modules of MindSpore. - The environment variable is assigned as: `MS_SUBMODULE_LOG_v="{SubModule1:LogLevel1,SubModule2:LogLevel2,...}"`. - The specified sub module log level will overwrite the global log level. The meaning of submodule log level is same as `GLOG_v`, the sub modules of MindSpore grouped by source directory is as the bellow table. + The environment variable is assigned as: `MS_SUBMODULE_LOG_v="{SubModule1:LogLevel1,SubModule2:LogLevel2,...}"`. + The specified sub module log level will overwrite the global log level. The meaning of submodule log level is same as `GLOG_v`, the sub modules of MindSpore grouped by source directory is as the bellow table. E.g. when set `GLOG_v=1 MS_SUBMODULE_LOG_v="{PARSER:2,ANALYZER:2}"` then log levels of `PARSER` and `ANALYZER` are WARNING, other modules' log levels are INFO. Sub modules of MindSpore grouped by source directory: @@ -424,4 +433,4 @@ Sub modules of MindSpore grouped by source directory: | mindspore/core/gvar | COMMON | | mindspore/core/ | CORE | -> The glog does not support log rotate. To control the disk space occupied by log files, use the log file management tool provided by the operating system, such as: logrotate of Linux. \ No newline at end of file +> The glog does not support log rotate. To control the disk space occupied by log files, use the log file management tool provided by the operating system, such as: logrotate of Linux. diff --git a/tutorials/training/source_en/advanced_use/custom_operator_ascend.md b/tutorials/training/source_en/advanced_use/custom_operator_ascend.md index f312d8fe66..88870321ba 100644 --- a/tutorials/training/source_en/advanced_use/custom_operator_ascend.md +++ b/tutorials/training/source_en/advanced_use/custom_operator_ascend.md @@ -25,6 +25,7 @@ When built-in operators cannot meet requirements during network development, you To add a custom operator, you need to register the operator primitive, implement the operator, and register the operator information. The related concepts are as follows: + - Operator primitive: defines the frontend API prototype of an operator on the network. It is the basic unit for forming a network model and includes the operator name, attribute (optional), input and output names, output shape inference method, and output dtype inference method. - Operator implementation: describes the implementation of the internal computation logic for an operator through the DSL API provided by the Tensor Boost Engine (TBE). The TBE supports the development of custom operators based on the Ascend AI chip. You can apply for Open Beta Tests (OBTs) by visiting . - Operator information: describes basic information about a TBE operator, such as the operator name and supported input and output types. It is the basis for the backend to select and map operators. @@ -38,6 +39,7 @@ This section takes a Square operator as an example to describe how to customize The primitive of an operator is a subclass inherited from `PrimitiveWithInfer`. The type name of the subclass is the operator name. The definition of the custom operator primitive is the same as that of the built-in operator primitive. + - The attribute is defined by the input parameter of the constructor function `__init__`. The operator in this test case has no attribute. Therefore, `__init__` has only one input parameter. For details about test cases in which operators have attributes, see [custom add3](https://gitee.com/mindspore/mindspore/blob/r1.0/tests/st/ops/custom_ops_tbe/cus_add3.py) in the MindSpore source code. - The input and output names are defined by the `init_prim_io_names` function. - The shape inference method of the output tensor is defined in the `infer_shape` function, and the dtype inference method of the output tensor is defined in the `infer_dtype` function. @@ -48,7 +50,7 @@ The following code takes the Square operator primitive `cus_square.py` as an exa ```python from mindspore.ops import prim_attr_register, PrimitiveWithInfer -from mindspore.ops import operations as P +import mindspore.ops as ops # y = x^2 class CusSquare(PrimitiveWithInfer): """ @@ -75,10 +77,12 @@ To compile an operator implementation, you need to compile a computable function The computable function of an operator is mainly used to encapsulate the computation logic of the operator for the main function to call. The computation logic is implemented by calling the combined API of the TBE. The entry function of an operator describes the internal process of compiling the operator. The process is as follows: + 1. Prepare placeholders to be input. A placeholder will return a tensor object that represents a group of input data. 2. Call the computable function. The computable function uses the API provided by the TBE to describe the computation logic of the operator. 3. Call the scheduling module. The model tiles the operator data based on the scheduling description and specifies the data transfer process to ensure optimal hardware execution. By default, the automatic scheduling module (`auto_schedule`) can be used. 4. Call `cce_build_code` to compile and generate an operator binary file. + > The input parameters of the entry function require the input information of each operator, output information of each operator, operator attributes (optional), and `kernel_name` (name of the generated operator binary file). The input and output information is encapsulated in dictionaries, including the input and output shape and dtype when the operator is called on the network. For details about TBE operator development, visit the [TBE website](https://support.huaweicloud.com/odevg-A800_3000_3010/atlaste_10_0063.html). For details about how to debug and optimize the TBE operator, visit the [Mind Studio website](https://support.huaweicloud.com/usermanual-mindstudioc73/atlasmindstudio_02_0043.html). @@ -93,7 +97,7 @@ The operator information is key for the backend to select the operator implement ### Example -The following takes the TBE implementation `square_impl.py` of the `Square` operator as an example. `square_compute` is a computable function of the operator implementation. It describes the computation logic of `x * x` by calling the API provided by `te.lang.cce`. `cus_square_op_info ` is the operator information, which is defined by `TBERegOp`. For the specific field meaning of the operator information, visit the [TBE website](https://support.huaweicloud.com/odevg-A800_3000_3010/atlaste_10_0096.html). +The following takes the TBE implementation `square_impl.py` of the `Square` operator as an example. `square_compute` is a computable function of the operator implementation. It describes the computation logic of `x * x` by calling the API provided by `te.lang.cce`. `cus_square_op_info` is the operator information, which is defined by `TBERegOp`. For the specific field meaning of the operator information, visit the [TBE website](https://support.huaweicloud.com/odevg-A800_3000_3010/atlaste_10_0096.html). Note the following parameters when setting `TBERegOp`: @@ -128,7 +132,7 @@ cus_square_op_info = TBERegOp("CusSquare") \ .output(0, "y", False, "required", "all") \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .get_op_info() + .get_op_info() # Binding kernel info with the kernel implementation. @op_info_register(cus_square_op_info) @@ -185,17 +189,20 @@ def test_net(): ``` Execute the test case. -``` + +```bash pytest -s tests/st/ops/custom_ops_tbe/test_square.py::test_net ``` The execution result is as follows: -``` + +```text x: [1. 4. 9.] output: [1. 16. 81.] ``` ## Defining the bprop Function for an Operator + If an operator needs to support automatic differentiation, the bprop function needs to be defined in the primitive of the operator. In the bprop function, you need to describe the backward computation logic that uses the forward input, forward output, and output gradients to obtain the input gradients. The backward computation logic can be composed of built-in operators or custom backward operators. Note the following points when defining the bprop function: @@ -204,6 +211,7 @@ Note the following points when defining the bprop function: - The return value of the bprop function is tuples consisting of input gradients. The sequence of elements in a tuple is the same as that of the forward input parameters. Even if there is only one input gradient, the return value must be a tuple. For example, the `CusSquare` primitive after the bprop function is added is as follows: + ```python class CusSquare(PrimitiveWithInfer): @prim_attr_register @@ -220,33 +228,36 @@ class CusSquare(PrimitiveWithInfer): def get_bprop(self): def bprop(data, out, dout): - twos_like = P.OnesLike()(data) * 2.0 - gradient = P.Mul()(data, twos_like) - dx = P.Mul()(gradient, dout) + twos_like = ops.OnesLike()(data) * 2.0 + gradient = ops.Mul()(data, twos_like) + dx = ops.Mul()(gradient, dout) return (dx,) return bprop ``` Define backward cases in the `test_square.py` file. + ```python -from mindspore.ops import composite as C +import mindspore.ops as ops def test_grad_net(): x = np.array([1.0, 4.0, 9.0]).astype(np.float32) sens = np.array([1.0, 1.0, 1.0]).astype(np.float32) square = Net() - grad = C.GradOperation(sens_param=True) + grad = ops.GradOperation(sens_param=True) dx = grad(square)(Tensor(x), Tensor(sens)) print("x: ", x) print("dx: ", dx) ``` Execute the test case. -``` + +```bash pytest -s tests/st/ops/custom_ops_tbe/test_square.py::test_grad_net ``` The execution result is as follows: -``` + +```text x: [1. 4. 9.] dx: [2. 8. 18.] ``` diff --git a/tutorials/training/source_en/advanced_use/debug_in_pynative_mode.md b/tutorials/training/source_en/advanced_use/debug_in_pynative_mode.md index c1951122be..73609b11ed 100644 --- a/tutorials/training/source_en/advanced_use/debug_in_pynative_mode.md +++ b/tutorials/training/source_en/advanced_use/debug_in_pynative_mode.md @@ -26,7 +26,7 @@ By default, MindSpore is in PyNative mode. You can switch it to the graph mode b In PyNative mode, single operators, common functions, network inference, and separated gradient calculation can be executed. The following describes the usage and precautions. -> In PyNative mode, operators are executed asynchronously on the device to improve performance. Therefore, when an error occurs during operator excution, the error information may be displayed after the program is executed. +> In PyNative mode, operators are executed asynchronously on the device to improve performance. Therefore, when an error occurs during operator excution, the error information may be displayed after the program is executed. ## Executing a Single Operator @@ -73,22 +73,22 @@ Output: [ 0.05016355 0.03958241 0.03958241 0.03958241 0.03443141]]]] ``` - ## Executing a Common Function Combine multiple operators into a function, call the function to execute the operators, and output the result, as shown in the following example: -**Example Code** +**Example Code:** + ```python import numpy as np from mindspore import context, Tensor -from mindspore.ops import functional as F +import mindspore.ops as ops context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") def tensor_add_func(x, y): - z = F.tensor_add(x, y) - z = F.tensor_add(z, x) + z = ops.tensor_add(x, y) + z = ops.tensor_add(z, x) return z x = Tensor(np.ones([3, 3], dtype=np.float32)) @@ -97,7 +97,7 @@ output = tensor_add_func(x, y) print(output.asnumpy()) ``` -**Output** +**Output:** ```python [[3. 3. 3.] @@ -107,7 +107,6 @@ print(output.asnumpy()) > Parallel execution and summary are not supported in PyNative mode, so parallel and summary related operators cannot be used. - ### Improving PyNative Performance MindSpore provides the Staging function to improve the execution speed of inference tasks in PyNative mode. This function compiles Python functions or Python class methods into computational graphs in PyNative mode and improves the execution speed by using graph optimization technologies, as shown in the following example: @@ -116,7 +115,7 @@ MindSpore provides the Staging function to improve the execution speed of infere import numpy as np import mindspore.nn as nn from mindspore import context, Tensor -import mindspore.ops.operations as P +import mindspore.ops as ops from mindspore.common.api import ms_function context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") @@ -124,7 +123,7 @@ context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") class TensorAddNet(nn.Cell): def __init__(self): super(TensorAddNet, self).__init__() - self.add = P.TensorAdd() + self.add = ops.TensorAdd() @ms_function def construct(self, x, y): @@ -136,11 +135,12 @@ y = Tensor(np.ones([4, 4]).astype(np.float32)) net = TensorAddNet() z = net(x, y) # Staging mode -tensor_add = P.TensorAdd() +tensor_add = ops.TensorAdd() res = tensor_add(x, z) # PyNative mode print(res.asnumpy()) ``` -**Output** + +**Output:** ```python [[3. 3. 3. 3.] @@ -153,18 +153,18 @@ In the preceding code, the `ms_function` decorator is added before `construct` o It should be noted that, in a function to which the `ms_function` decorator is added, if an operator (such as `pooling` or `tensor_add`) that does not need parameter training is included, the operator can be directly called in the decorated function, as shown in the following example: -**Example Code** +**Example Code:** ```python import numpy as np import mindspore.nn as nn from mindspore import context, Tensor -import mindspore.ops.operations as P +import mindspore.ops as ops from mindspore.common.api import ms_function context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") -tensor_add = P.TensorAdd() +tensor_add = ops.TensorAdd() @ms_function def tensor_add_fn(x, y): @@ -176,7 +176,8 @@ y = Tensor(np.ones([4, 4]).astype(np.float32)) z = tensor_add_fn(x, y) print(z.asnumpy()) ``` -**Output** + +**Output:** ```shell [[2. 2. 2. 2.] @@ -187,7 +188,7 @@ print(z.asnumpy()) If the decorated function contains operators (such as `Convolution` and `BatchNorm`) that require parameter training, these operators must be instantiated before the decorated function is called, as shown in the following example: -**Example Code** +**Example Code:** ```python import numpy as np @@ -209,7 +210,7 @@ z = conv_fn(Tensor(input_data)) print(z.asnumpy()) ``` -**Output** +**Output:** ```shell [[[[ 0.10377571 -0.0182163 -0.05221086] @@ -245,15 +246,14 @@ print(z.asnumpy()) [ 0.0377498 -0.06117418 0.00546303]]]] ``` - ## Debugging Network Train Model In PyNative mode, the gradient can be calculated separately. As shown in the following example, `GradOperation` is used to calculate all input gradients of the function or the network. Note that the inputs have to be Tensor. -**Example Code** +**Example Code:** ```python -from mindspore.ops import composite as C +import mindspore.ops as ops import mindspore.context as context context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") @@ -262,12 +262,12 @@ def mul(x, y): return x * y def mainf(x, y): - return C.GradOperation(get_all=True)(mul)(x, y) + return ops.GradOperation(get_all=True)(mul)(x, y) print(mainf(Tensor(1, mstype.int32), Tensor(2, mstype.int32))) ``` -**Output** +**Output:** ```python (2, 1) @@ -275,13 +275,12 @@ print(mainf(Tensor(1, mstype.int32), Tensor(2, mstype.int32))) During network training, obtain the gradient, call the optimizer to optimize parameters (the breakpoint cannot be set during the reverse gradient calculation), and calculate the loss values. Then, network training is implemented in PyNative mode. -**Complete LeNet Sample Code** +**Complete LeNet Sample Code:** ```python import numpy as np import mindspore.nn as nn -import mindspore.ops.operations as P -from mindspore.ops import composite as C +import mindspore.ops as ops from mindspore.common import dtype as mstype from mindspore import context, Tensor, ParameterTuple from mindspore.common.initializer import TruncatedNormal @@ -312,7 +311,7 @@ class LeNet5(nn.Cell): Lenet network Args: num_class (int): Num classes. Default: 10. - + Returns: Tensor, output tensor @@ -330,7 +329,7 @@ class LeNet5(nn.Cell): self.fc3 = fc_with_initialize(84, self.num_class) self.relu = nn.ReLU() self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - self.reshape = P.Reshape() + self.reshape = ops.Reshape() def construct(self, x): x = self.conv1(x) @@ -346,8 +345,8 @@ class LeNet5(nn.Cell): x = self.relu(x) x = self.fc3(x) return x - - + + class GradWrap(nn.Cell): """ GradWrap definition """ def __init__(self, network): @@ -357,7 +356,7 @@ class GradWrap(nn.Cell): def construct(self, x, label): weights = self.weights - return C.GradOperation(get_by_list=True)(self.network, weights)(x, label) + return ops.GradOperation(get_by_list=True)(self.network, weights)(x, label) net = LeNet5() optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) @@ -376,7 +375,7 @@ loss = loss_output.asnumpy() print(loss) ``` -**Output** +**Output:** ```python 2.3050091 diff --git a/tutorials/training/source_en/advanced_use/distributed_training_ascend.md b/tutorials/training/source_en/advanced_use/distributed_training_ascend.md index 2a26afe249..2e78c0d699 100644 --- a/tutorials/training/source_en/advanced_use/distributed_training_ascend.md +++ b/tutorials/training/source_en/advanced_use/distributed_training_ascend.md @@ -70,6 +70,7 @@ The following uses the Ascend 910 AI processor as an example. The JSON configura "status": "completed" } ``` + The following parameters need to be modified based on the actual training environment: - `server_count`: number of hosts. @@ -78,11 +79,11 @@ The following parameters need to be modified based on the actual training enviro - `device_ip`: IP address of the integrated NIC. You can run the `cat /etc/hccn.conf` command on the current host. The key value of `address_x` is the IP address of the NIC. - `rank_id`: logical sequence number of a device, which starts from 0. - ### Calling the Collective Communication Library The Huawei Collective Communication Library (HCCL) is used for the communication of MindSpore parallel distributed training and can be found in the Ascend 310 AI processor software package. In addition, `mindspore.communication.management` encapsulates the collective communication API provided by the HCCL to help users configure distributed information. > HCCL implements multi-device multi-node communication based on the Ascend AI processor. The common restrictions on using the distributed service are as follows. For details, see the HCCL documentation. +> > - In a single-node system, a cluster of 1, 2, 4, or 8 devices is supported. In a multi-node system, a cluster of 8 x N devices is supported. > - Each host has four devices numbered 0 to 3 and four devices numbered 4 to 7 deployed on two different networks. During training of 2 or 4 devices, the devices must be connected and clusters cannot be created across networks. > - The server hardware architecture and operating system require the symmetrical multi-processing (SMP) mode. @@ -97,10 +98,11 @@ from mindspore.communication.management import init if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=int(os.environ["DEVICE_ID"])) init() - ... + ... ``` In the preceding code: + - `mode=context.GRAPH_MODE`: sets the running mode to graph mode for distributed training. (The PyNative mode does not support parallel running.) - `device_id`: physical sequence number of a device, that is, the actual sequence number of the device on the corresponding host. - `init`: enables HCCL communication and completes the distributed training initialization. @@ -109,7 +111,6 @@ In the preceding code: During distributed training, data is imported in data parallel mode. The following takes the CIFAR-10 dataset as an example to describe how to import the CIFAR-10 dataset in data parallel mode. `data_path` indicates the dataset path, which is also the path of the `cifar-10-batches-bin` folder. - ```python import mindspore.common.dtype as mstype import mindspore.dataset as ds @@ -122,12 +123,12 @@ def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size= resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 - + # get rank_id and rank_size rank_id = get_rank() rank_size = get_group_size() data_set = ds.Cifar10Dataset(data_path, num_shards=rank_size, shard_id=rank_id) - + # define map operations random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4)) random_horizontal_op = vision.RandomHorizontalFlip() @@ -155,7 +156,9 @@ def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size= return data_set ``` + Different from the single-node system, the multi-node system needs to transfer the `num_shards` and `shard_id` parameters to the dataset API. The two parameters correspond to the number of devices and logical sequence numbers of devices, respectively. You are advised to obtain the parameters through the HCCL API. + - `get_rank`: obtains the ID of the current device in the cluster. - `get_group_size`: obtains the number of devices. @@ -172,40 +175,39 @@ Automatic parallelism splits models using the operator granularity and obtains t In the Loss function, the `SoftmaxCrossEntropyWithLogits` is expanded into multiple small operators for implementation according to a mathematical formula. The sample code is as follows: ```python -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore import Tensor -import mindspore.ops.functional as F import mindspore.common.dtype as mstype import mindspore.nn as nn class SoftmaxCrossEntropyExpand(nn.Cell): def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() - self.exp = P.Exp() - self.sum = P.ReduceSum(keep_dims=True) - self.onehot = P.OneHot() + self.exp = ops.Exp() + self.sum = ops.ReduceSum(keep_dims=True) + self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) - self.div = P.Div() - self.log = P.Log() - self.sum_cross_entropy = P.ReduceSum(keep_dims=False) - self.mul = P.Mul() - self.mul2 = P.Mul() - self.mean = P.ReduceMean(keep_dims=False) + self.div = ops.Div() + self.log = ops.Log() + self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) + self.mul = ops.Mul() + self.mul2 = ops.Mul() + self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse - self.max = P.ReduceMax(keep_dims=True) - self.sub = P.Sub() - + self.max = ops.ReduceMax(keep_dims=True) + self.sub = ops.Sub() + def construct(self, logit, label): logit_max = self.max(logit, -1) exp = self.exp(self.sub(logit, logit_max)) exp_sum = self.sum(exp, -1) softmax_result = self.div(exp, exp_sum) if self.sparse: - label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) softmax_result_log = self.log(softmax_result) loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) - loss = self.mul2(F.scalar_to_array(-1.0), loss) + loss = self.mul2(ops.scalar_to_array(-1.0), loss) loss = self.mean(loss, -1) return loss @@ -252,11 +254,14 @@ def test_train_cifar(epoch_size=10): model = Model(net, loss_fn=loss, optimizer=opt) model.train(epoch_size, dataset, callbacks=[loss_cb], dataset_sink_mode=True) ``` + In the preceding code: + - `dataset_sink_mode=True`: uses the dataset sink mode. That is, the training computing is sunk to the hardware platform for execution. - `LossMonitor`: returns the loss value through the callback function to monitor the loss function. ## Running the Script + After the script required for training is edited, run the corresponding command to call the script. Currently, MindSpore distributed execution uses the single-device single-process running mode. That is, one process runs on each device, and the number of total processes is the same as the number of devices that are being used. For device 0, the corresponding process is executed in the foreground. For other devices, the corresponding processes are executed in the background. You need to create a directory for each process to store log information and operator compilation information. The following takes the distributed training script for eight devices as an example to describe how to run the script: @@ -318,6 +323,7 @@ cd ../ The variables `DATA_PATH` and `RANK_SIZE` need to be transferred to the script, which indicate the path of the dataset and the number of devices, respectively. The necessary environment variables are as follows: + - `RANK_TABLE_FILE`: path for storing the networking information file. - `DEVICE_ID`: actual sequence number of the current device on the corresponding host. - `RANK_ID`: logical sequence number of the current device. @@ -327,7 +333,7 @@ The running time is about 5 minutes, which is mainly occupied by operator compil Log files are saved in the `device` directory. The `env.log` file records environment variable information. The `train.log` file records the loss function information. The following is an example: -``` +```text epoch: 1 step: 156, loss is 2.0084016 epoch: 2 step: 156, loss is 1.6407638 epoch: 3 step: 156, loss is 1.6164391 @@ -387,7 +393,7 @@ from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMoni from mindspore.communication.management import get_rank from mindspore.common.parameter import Parameter from mindspore import Tensor -import mindspore.ops.operations as P +import mindspore.ops as ops import numpy as np # define network class DataParallelNet(Cell): @@ -396,7 +402,7 @@ class DataParallelNet(Cell): weight_np = np.full(test_size, 0.1, dtype=np.float32) self.weight = Parameter(Tensor(weight_np), name="fc_weight", layerwise_parallel=layerwise_parallel) self.relu = ReLU() - self.fc = P.MatMul(transpose_a=transpose_a, transpose_b=transpose_b) + self.fc = ops.MatMul(transpose_a=transpose_a, transpose_b=transpose_b) if strategy is not None: self.fc.shard(strategy) @@ -417,7 +423,7 @@ strategy = ((1, 1), (1, 8)) net = DataParallelNet(strategy=strategy) # reset parallel mode context.reset_auto_parallel_context() -# set parallel mode, data parallel mode is selected for training and model saving. If you want to choose auto parallel +# set parallel mode, data parallel mode is selected for training and model saving. If you want to choose auto parallel # mode, you can simply change the value of parallel_mode parameter to ParallelMode.AUTO_PARALLEL. context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=8) ``` @@ -455,8 +461,8 @@ class SemiAutoParallelNet(Cell): equal_np = np.full(test_size, 0.1, dtype=np.float32) self.mul_weight = Parameter(Tensor(mul_np), name="mul_weight") self.equal_weight = Parameter(Tensor(equal_np), name="equal_weight") - self.mul = P.Mul() - self.equal = P.Equal() + self.mul = ops.Mul() + self.equal = ops.Equal() if strategy is not None: self.mul.shard(strategy) self.equal.shard(strategy2) @@ -478,10 +484,10 @@ strategy = ((1, 1), (1, 8)) net = SemiAutoParallelNet(strategy=strategy, strategy2=strategy) # reset parallel mode context.reset_auto_parallel_context() -# set parallel mode, data parallel mode is selected for training and model saving. If you want to choose auto parallel +# set parallel mode, data parallel mode is selected for training and model saving. If you want to choose auto parallel # mode, you can simply change the value of parallel_mode parameter to ParallelMode.AUTO_PARALLEL. context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, - strategy_ckpt_save_file='./rank_{}_ckpt/strategy.txt'.format(get_rank)) + strategy_ckpt_save_file='./rank_{}_ckpt/strategy.txt'.format(get_rank)) ``` Then set the checkpoint saving policy, optimizer and loss function as required. The code is as follows: @@ -510,12 +516,14 @@ For the three parallel training modes described above, the checkpoint file is sa Only by changing the code that sets the checkpoint saving policy, the checkpoint file of each card can be saved on itself. The specific changes are as follows: Change the checkpoint configuration policy from: + ```python # config checkpoint ckpt_config = CheckpointConfig(keep_checkpoint_max=1) ``` to: + ```python # config checkpoint ckpt_config = CheckpointConfig(keep_checkpoint_max=1, integrated_save=False) @@ -525,4 +533,4 @@ It should be noted that if users chooses this checkpoint saving policy, users ne ### Hybrid Parallel Mode -For model parameter saving and loading in Hybrid Parallel Mode, please refer to [Saving and Loading Model Parameters in the Hybrid Parallel Scenario](https://www.mindspore.cn/tutorial/training/en/r1.0/advanced_use/save_load_model_hybrid_parallel.html). \ No newline at end of file +For model parameter saving and loading in Hybrid Parallel Mode, please refer to [Saving and Loading Model Parameters in the Hybrid Parallel Scenario](https://www.mindspore.cn/tutorial/training/en/r1.0/advanced_use/save_load_model_hybrid_parallel.html). diff --git a/tutorials/training/source_en/advanced_use/enable_graph_kernel_fusion.md b/tutorials/training/source_en/advanced_use/enable_graph_kernel_fusion.md index 043cf4cd85..0c6378abbe 100644 --- a/tutorials/training/source_en/advanced_use/enable_graph_kernel_fusion.md +++ b/tutorials/training/source_en/advanced_use/enable_graph_kernel_fusion.md @@ -41,7 +41,7 @@ context.set_context(enable_graph_kernel=True) import mindspore.context as context from mindspore import Tensor from mindspore.nn import Cell - from mindspore.ops import operations as P + import mindspore.ops as ops context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") # save graph ir files. @@ -53,8 +53,8 @@ context.set_context(enable_graph_kernel=True) class NetBasicFuse(Cell): def __init__(self): super(NetBasicFuse, self).__init__() - self.add = P.TensorAdd() - self.mul = P.Mul() + self.add = ops.TensorAdd() + self.mul = ops.Mul() def construct(self, x): mul_res = self.mul(x, 2.0) @@ -66,9 +66,9 @@ context.set_context(enable_graph_kernel=True) class NetCompositeFuse(Cell): def __init__(self): super(NetCompositeFuse, self).__init__() - self.add = P.TensorAdd() - self.mul = P.Mul() - self.pow = P.Pow() + self.add = ops.TensorAdd() + self.mul = ops.Mul() + self.pow = ops.Pow() def construct(self, x): mul_res = self.mul(x, 2.0) @@ -100,7 +100,6 @@ context.set_context(enable_graph_kernel=True) 2. `BERT-large` training network Take the training model of the `BERT-large` network as an example. For details about the dataset and training script, see . You only need to modify the `context` parameter. - ## Effect Evaluation diff --git a/tutorials/training/source_en/advanced_use/enable_mixed_precision.md b/tutorials/training/source_en/advanced_use/enable_mixed_precision.md index 27b7b242ea..be9515654c 100644 --- a/tutorials/training/source_en/advanced_use/enable_mixed_precision.md +++ b/tutorials/training/source_en/advanced_use/enable_mixed_precision.md @@ -16,7 +16,7 @@ ## Overview -The mixed precision training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. +The mixed precision training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware. For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching 'reduce precision'. @@ -42,6 +42,7 @@ This document describes the computation process by using examples of automatic a To use the automatic mixed precision, you need to invoke the corresponding API, which takes the network to be trained and the optimizer as the input. This API converts the operators of the entire network into FP16 operators (except the `BatchNorm` and Loss operators). You can use automatic mixed precision through API `amp` or API `Model`. The procedure of using automatic mixed precision by API `amp` is as follows: + 1. Introduce the MindSpore mixed precision API `amp`. 2. Define the network. This step is the same as the common network definition. (You do not need to manually configure the precision of any specific operator.) @@ -55,7 +56,7 @@ import numpy as np import mindspore.nn as nn from mindspore import Tensor, context -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.nn import Momentum # The interface of Auto_mixed precision from mindspore import amp @@ -68,7 +69,7 @@ class Net(nn.Cell): def __init__(self, input_channel, out_channel): super(Net, self).__init__() self.dense = nn.Dense(input_channel, out_channel) - self.relu = P.ReLU() + self.relu = ops.ReLU() def construct(self, x): x = self.dense(x) @@ -93,6 +94,7 @@ output = train_network(predict, label) ``` The procedure of using automatic mixed precision by API `Model` is as follows: + 1. Introduce the MindSpore model API `Model`. 2. Define the network. This step is the same as the common network definition. (You do not need to manually configure the precision of any specific operator.) @@ -169,7 +171,8 @@ model.train(epoch=10, train_dataset=ds_train) MindSpore also supports manual mixed precision. It is assumed that only one dense layer in the network needs to be calculated by using FP32, and other layers are calculated by using FP16. The mixed precision is configured in the granularity of cell. The default format of a cell is FP32. The following is the procedure for implementing manual mixed precision: -1. Define the network. This step is similar to step 2 in the automatic mixed precision. + +1. Define the network. This step is similar to step 2 in the automatic mixed precision. 2. Configure the mixed precision. Use `net.to_float(mstype.float16)` to set all operators of the cell and its sub-cells to FP16. Then, configure the dense to FP32. @@ -183,7 +186,7 @@ import numpy as np import mindspore.nn as nn import mindspore.common.dtype as mstype from mindspore import Tensor, context -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.nn import WithLossCell, TrainOneStepCell from mindspore.nn import Momentum @@ -195,7 +198,7 @@ class Net(nn.Cell): def __init__(self, input_channel, out_channel): super(Net, self).__init__() self.dense = nn.Dense(input_channel, out_channel) - self.relu = P.ReLU() + self.relu = ops.ReLU() def construct(self, x): x = self.dense(x) diff --git a/tutorials/training/source_en/advanced_use/migrate_3rd_scripts_mindconverter.md b/tutorials/training/source_en/advanced_use/migrate_3rd_scripts_mindconverter.md index d80867dc18..193c6420cb 100644 --- a/tutorials/training/source_en/advanced_use/migrate_3rd_scripts_mindconverter.md +++ b/tutorials/training/source_en/advanced_use/migrate_3rd_scripts_mindconverter.md @@ -22,12 +22,10 @@ MindConverter is a migration tool to transform the model scripts from PyTorch to Mindspore. Users can migrate their PyTorch models to Mindspore rapidly with minor changes according to the conversion report. - ## Installation Mindconverter is a submodule in MindInsight. Please follow the [Guide](https://www.mindspore.cn/install/en) here to install MindInsight. - ## Usage MindConverter currently only provides command-line interface. Here is the manual page. @@ -71,7 +69,7 @@ optional arguments: > The AST mode will be enabled, if both `--in_file` and `--model_file` are specified. -For the Graph mode, `--shape` is mandatory. +For the Graph mode, `--shape` is mandatory. For the AST mode, `--shape` is ignored. @@ -83,8 +81,6 @@ Please note that your original PyTorch project is included in the module search > MindConverter needs the original PyTorch scripts because of the reverse serialization. - - ## Scenario MindConverter provides two modes for different migration demands. @@ -96,13 +92,12 @@ The AST mode is recommended for the first demand. It parses and analyzes PyTorch For the second demand, the Graph mode is recommended. As the computational graph is a standard descriptive language, it is not affected by user's coding style. This mode may have more operators converted as long as these operators are supported by MindConverter. -Some typical image classification networks such as ResNet and VGG have been tested for the Graph mode. Note that: +Some typical image classification networks such as ResNet and VGG have been tested for the Graph mode. Note that: > 1. Currently, the Graph mode does not support models with multiple inputs. Only models with a single input and single output are supported. > 2. The Dropout operator will be lost after conversion because the inference mode is used to load the PyTorch model. Manually re-implement is necessary. > 3. The Graph-based mode will be continuously developed and optimized with further updates. - ## Example ### AST-Based Conversion @@ -123,8 +118,8 @@ line : [UnConvert] 'operator' didn't convert. ... For non-transformed operators, the original code keeps. Please manually migrate them. [Click here](https://www.mindspore.cn/doc/note/en/r1.0/index.html#operator_api) for more information about operator mapping. - Here is an example of the conversion report: + ```text [Start Convert] [Insert] 'import mindspore.ops.operations as P' is inserted to the converted file. @@ -137,7 +132,6 @@ Here is an example of the conversion report: For non-transformed operators, suggestions are provided in the report. For instance, MindConverter suggests that replace `torch.nn.AdaptiveAvgPool2d` with `mindspore.ops.operations.ReduceMean`. - ### Graph-Based Conversion Assume the PyTorch model (.pth file) is located at `/home/user/model.pth`, with input shape (3, 224, 224) and the original PyTorch script is at `/home/user/project/model_training`. Output the transformed MindSpore script to `/home/user/output`, with the conversion report to `/home/user/output/report`. Use the following command: @@ -174,7 +168,7 @@ class Classifier(nn.Cell): It is convenient to replace the operators according to the `input_shape` and `output_shape` parameters. The replacement is like this: ```python -from mindspore.ops import operations as P +import mindspore.ops as ops ... class Classifier(nn.Cell): @@ -182,7 +176,7 @@ class Classifier(nn.Cell): def __init__(self): super(Classifier, self).__init__() ... - self.reshape = P.Reshape(input_shape=(1, 1280, 1, 1), + self.reshape = ops.Reshape(input_shape=(1, 1280, 1, 1), output_shape=(1, 1280)) ... @@ -194,12 +188,9 @@ class Classifier(nn.Cell): ``` -> `--output` and `--report` are optional. MindConverter creates an `output` folder under the current working directory, and outputs generated scripts and conversion reports to it. - +> `--output` and `--report` are optional. MindConverter creates an `output` folder under the current working directory, and outputs generated scripts and conversion reports to it. ## Caution 1. PyTorch is not an explicitly stated dependency library in MindInsight. The Graph conversion requires the consistent PyTorch version as the model is trained. (MindConverter recommends PyTorch 1.4.0 or 1.6.0) -2. This script conversion tool relies on operators which supported by MindConverter and MindSpore. Unsupported operators may not successfully mapped to MindSpore operators. You can manually edit, or implement the mapping based on MindConverter, and contribute to our MindInsight repository. We appreciate your support for the MindSpore community. - - +2. This script conversion tool relies on operators which supported by MindConverter and MindSpore. Unsupported operators may not successfully mapped to MindSpore operators. You can manually edit, or implement the mapping based on MindConverter, and contribute to our MindInsight repository. We appreciate your support for the MindSpore community. diff --git a/tutorials/training/source_en/advanced_use/save_load_model_hybrid_parallel.md b/tutorials/training/source_en/advanced_use/save_load_model_hybrid_parallel.md index 9cc2742b69..608515ffc1 100644 --- a/tutorials/training/source_en/advanced_use/save_load_model_hybrid_parallel.md +++ b/tutorials/training/source_en/advanced_use/save_load_model_hybrid_parallel.md @@ -90,11 +90,11 @@ Finally, save the updated parameter list to a file through the API provided by M Define the network, call the `load_checkpoint` and `load_param_into_net` APIs to import the checkpoint files to the network in rank id order, and then call `parameters_and_names` API to obtain all parameters in this network. -``` -net = Net() +```python +net = Net() opt = Momentum(learning_rate=0.01, momentum=0.9, params=net.get_parameters()) net = TrainOneStepCell(net, opt) -param_dicts = [] +param_dicts = [] for i in range(rank_size): file_name = os.path.join("./node"+str(i), "CKP_1-4_32.ckpt") # checkpoint file name of current node param_dict = load_checkpoint(file_name) @@ -115,7 +115,7 @@ In the preceding information: Call the `build_searched_strategy` API to obtain the slice strategy of model. -``` +```python strategy = build_searched_strategy("./strategy_train.ckpt") ``` @@ -131,7 +131,7 @@ The parameter name is model\_parallel\_weight and the dividing strategy is to pe 1. Obtain the data value on all nodes for model parallel parameters. - ``` + ```python sliced_parameters = [] for i in range(4): parameter = param_dicts[i].get("model_parallel_weight") @@ -142,32 +142,32 @@ The parameter name is model\_parallel\_weight and the dividing strategy is to pe 2. Call the `merge_sliced_parameter` API to merge the sliced parameters. + ```python + merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) ``` - merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) - ``` - + > If there are multiple model parallel parameters, repeat steps 1 to 2 to process them one by one. ### Saving the Data and Generating a New Checkpoint File 1. Convert `param_dict` to `param_list`. - ``` + ```python param_list = [] for (key, value) in param_dict.items(): each_param = {} each_param["name"] = key if isinstance(value.data, Tensor): - param_data = value.data + param_data = value.data else: - param_data = Tensor(value.data) + param_data = Tensor(value.data) each_param["data"] = param_data - param_list.append(each_param) + param_list.append(each_param) ``` 2. Call the `save_checkpoint` API to write the parameter data to a file and generate a new checkpoint file. - ``` + ```python save_checkpoint(param_list, “./CKP-Integrated_1-4_32.ckpt”) ``` @@ -186,7 +186,7 @@ If you need to load the integrated and saved checkpoint file to multi-device tra Call the `load_checkpoint` API to load model parameter data from the checkpoint file. -``` +```python param_dict = load_checkpoint("./CKP-Integrated_1-4_32.ckpt") ``` @@ -205,7 +205,7 @@ The following uses a specific model parameter as an example. The parameter name In the following code example, data is divided into two slices in dimension 0. - ``` + ```python new_param = parameter_dict[“model_parallel_weight”] slice_list = np.split(new_param.data.asnumpy(), 2, axis=0) new_param_moments = parameter_dict[“moments.model_parallel_weight”] @@ -214,8 +214,10 @@ The following uses a specific model parameter as an example. The parameter name Data after dividing: - slice_list[0] --- [1, 2, 3, 4] Corresponding to device0 - slice_list[1] --- [5, 6, 7, 8] Corresponding to device1 + ```text + slice_list[0] --- [1, 2, 3, 4] Corresponding to device0 + slice_list[1] --- [5, 6, 7, 8] Corresponding to device1 + ``` Similar to slice\_list, slice\_moments\_list is divided into two tensors with the shape of \[1, 4]. @@ -223,7 +225,7 @@ The following uses a specific model parameter as an example. The parameter name Obtain rank\_id of the current node and load data based on rank\_id. - ``` + ```python rank = get_rank() tensor_slice = Tensor(slice_list[rank]) tensor_slice_moments = Tensor(slice_moments_list[rank]) @@ -233,7 +235,7 @@ The following uses a specific model parameter as an example. The parameter name 3. Modify values of model parameters. - ``` + ```python new_param.set_data(tensor_slice, True) new_param_moments.set_data(tensor_slice_moments, True) ``` @@ -244,8 +246,8 @@ The following uses a specific model parameter as an example. The parameter name Call the `load_param_into_net` API to load the model parameter data to the network. -``` -net = Net() +```python +net = Net() opt = Momentum(learning_rate=0.01, momentum=0.9, params=parallel_net.get_parameters()) load_param_into_net(net, param_dict) load_param_into_net(opt, param_dict) @@ -273,40 +275,38 @@ User process: 1. Run the following script to integrate the checkpoint files: - - - ``` + ```bash python ./integrate_checkpoint.py "Name of the checkpoint file to be integrated" "Path and name of the checkpoint file generated after integration" "Path and name of the strategy file" "Number of nodes" ``` integrate\_checkpoint.py: - ``` + ```python import numpy as np import os import mindspore.nn as nn from mindspore import Tensor, Parameter - from mindspore.ops import operations as P + import mindspore.ops as ops from mindspore.train.serialization import save_checkpoint, load_checkpoint, build_searched_strategy, merge_sliced_parameter - + class Net(nn.Cell): def __init__(self,weight_init): super(Net, self).__init__() self.weight = Parameter(Tensor(weight_init), "model_parallel_weight", layerwise_parallel=True) - self.fc = P.MatMul(transpose_b=True) - + self.fc = ops.MatMul(transpose_b=True) + def construct(self, x): x = self.fc(x, self.weight1) return x - + def integrate_ckpt_file(old_ckpt_file, new_ckpt_file, strategy_file, rank_size): weight = np.ones([2, 8]).astype(np.float32) net = Net(weight) opt = Momentum(learning_rate=0.01, momentum=0.9, params=net.get_parameters()) net = TrainOneStepCell(net, opt) - + # load CheckPoint into net in rank id order - param_dicts = [] + param_dicts = [] for i in range(rank_size): file_name = os.path.join("./node"+str(i), old_ckpt_file) param_dict = load_checkpoint(file_name) @@ -315,21 +315,21 @@ User process: for _, param in net.parameters_and_names(): param_dict[param.name] = param param_dicts.append(param_dict) - + strategy = build_searched_strategy(strategy_file) param_dict = {} - + for paramname in ["model_parallel_weight", "moments.model_parallel_weight"]: # get layer wise model parallel parameter sliced_parameters = [] for i in range(rank_size): parameter = param_dicts[i].get(paramname) sliced_parameters.append(parameter) - + # merge the parallel parameters of the model - merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) + merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) param_dict[paramname] = merged_parameter - + # convert param_dict to list type data param_list = [] for (key, value) in param_dict.items(): @@ -339,14 +339,14 @@ User process: param_data = value.data else: param_data = Tensor(value.data) - each_param["data"] = param_data - param_list.append(each_param) - + each_param["data"] = param_data + param_list.append(each_param) + # call the API to generate a new CheckPoint file save_checkpoint(param_list, new_ckpt_file) - + return - + if __name__ == "__main__": try: old_ckpt_file = sys.argv[1] @@ -363,10 +363,10 @@ User process: Before the script is executed, the parameter values in the checkpoint files are as follows: - ``` + ```text device0: name is model_parallel_weight - value is + value is [[0.87537426 1.0448935 0.86736983 0.8836905 0.77354026 0.69588304 0.9183654 0.7792076] [0.87224025 0.8726848 0.771446 0.81967723 0.88974726 0.7988162 0.72919345 0.7677011]] name is learning_rate @@ -380,7 +380,7 @@ User process: device1: name is model_parallel_weight - value is + value is [[0.9210751 0.9050457 0.9827775 0.920396 0.9240526 0.9750359 1.0275179 1.0819869] [0.73605865 0.84631145 0.9746683 0.9386582 0.82902765 0.83565056 0.9702136 1.0514659]] name is learning_rate @@ -390,11 +390,11 @@ User process: name is moments.model_weight value is [[0.2417504 0.28193963 0.06713893 0.21510397 0.23380603 0.11424308 0.0218009 -0.11969765] - [0.45955992 0.22664294 0.01990281 0.0731914 0.27125207 0.27298513 -0.01716102 -0.15327111]] + [0.45955992 0.22664294 0.01990281 0.0731914 0.27125207 0.27298513 -0.01716102 -0.15327111]] device2: name is model_parallel_weight - value is + value is [[1.0108461 0.8689414 0.91719437 0.8805056 0.7994629 0.8999671 0.7585804 1.0287056 ] [0.90653455 0.60146594 0.7206475 0.8306303 0.8364681 0.89625114 0.7354735 0.8447268]] name is learning_rate @@ -402,7 +402,7 @@ User process: name is momentum value is [0.9] name is moments.model_weight - value is + value is [[0.03440702 0.41419312 0.24817684 0.30765256 0.48516113 0.24904746 0.57791173 0.00955463] [0.13458519 0.6690533 0.49259356 0.28319967 0.25951773 0.16777472 0.45696738 0.24933104]] @@ -416,16 +416,16 @@ User process: name is momentum value is [0.9] name is moments.model_parallel_weight - value is + value is [[0.14152306 0.5040985 0.24455397 0.10907605 0.11319532 0.19538902 0.01208619 0.40430856] [-0.7773164 -0.47611716 -0.6041424 -0.6144473 -0.2651842 -0.31909415 -0.4510405 -0.12860501]] ``` After the script is executed, the parameter values in the checkpoint files are as follows: - ``` + ```text name is model_parallel_weight - value is + value is [[1.1138763 1.0962057 1.3516843 1.0812817 1.1579804 1.1078343 1.0906502 1.3207073] [0.916671 1.0781671 1.0368758 0.9680898 1.1735439 1.0628364 0.9960786 1.0135143] [0.8828271 0.7963984 0.90675324 0.9830291 0.89010954 0.897052 0.7890109 0.89784735] @@ -439,7 +439,7 @@ User process: name is momentum value is [0.9] name is moments.model_parallel_weight - value is + value is [[0.2567724 -0.07485991 0.282002 0.2456022 0.454939 0.619168 0.18964815 0.45714882] [0.25946522 0.24344791 0.45677605 0.3611395 0.23378398 0.41439137 0.5312468 0.4696194 ] [0.2417504 0.28193963 0.06713893 0.21510397 0.23380603 0.11424308 0.0218009 -0.11969765] @@ -453,14 +453,14 @@ User process: 2. Execute stage 2 training and load the checkpoint file before training. The training code needs to be supplemented based on the site requirements. - ``` + ```python import numpy as np import os import mindspore.nn as nn from mindspore import context from mindspore.communication.management import init from mindspore import Tensor, Parameter - from mindspore.ops import operations as P + import mindspore.ops as ops from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.communication.management import init @@ -472,7 +472,7 @@ User process: def __init__(self,weight_init): super(Net, self).__init__() self.weight = Parameter(Tensor(weight_init), "model_parallel_weight", layerwise_parallel=True) - self.fc = P.MatMul(transpose_b=True) + self.fc = ops.MatMul(transpose_b=True) def construct(self, x): x = self.fc(x, self.weight1) @@ -497,7 +497,7 @@ User process: load_param_into_net(net, param_dict) opt = Momentum(learning_rate=0.01, momentum=0.9, params=parallel_net.get_parameters()) load_param_into_net(opt, param_dict) - # train code + # train code ... if __name__ == "__main__": @@ -506,7 +506,7 @@ User process: label = np.random.random((4, 4)).astype(np.float32) train_mindspore_impl_fc(input, label, weight1) ``` - + In the preceding information: - `mode=context.GRAPH_MODE`: sets the running mode to graph mode for distributed training. (The PyNative mode does not support parallel running.) @@ -515,10 +515,10 @@ User process: Parameter values after loading: - ``` + ```text device0: name is model_parallel_weight - value is + value is [[0.87537426 1.0448935 0.86736983 0.8836905 0.77354026 0.69588304 0.9183654 0.7792076] [0.87224025 0.8726848 0.771446 0.81967723 0.88974726 0.7988162 0.72919345 0.7677011] [0.8828271 0.7963984 0.90675324 0.9830291 0.89010954 0.897052 0.7890109 0.89784735] @@ -536,7 +536,7 @@ User process: device1: name is model_parallel_weight - value is + value is [[1.0053468 0.98402303 0.99762845 0.97587246 1.0259694 1.0055295 0.99420834 0.9496847] [1.0851002 1.0295962 1.0999886 1.0958165 0.9765328 1.146529 1.0970603 1.1388365] [0.7147005 0.9168278 0.80178416 0.6258351 0.8413766 0.5909515 0.696347 0.71359116] @@ -550,5 +550,5 @@ User process: [[0.03440702 0.41419312 0.24817684 0.30765256 0.48516113 0.24904746 0.57791173 0.00955463] [0.13458519 0.6690533 0.49259356 0.28319967 0.25951773 0.16777472 0.45696738 0.24933104] [0.14152306 0.5040985 0.24455397 0.10907605 0.11319532 0.19538902 0.01208619 0.40430856] - [-0.7773164 -0.47611716 -0.6041424 -0.6144473 -0.2651842 -0.31909415 -0.4510405 -0.12860501]] + [-0.7773164 -0.47611716 -0.6041424 -0.6144473 -0.2651842 -0.31909415 -0.4510405 -0.12860501]] ``` diff --git a/tutorials/training/source_en/advanced_use/summary_record.md b/tutorials/training/source_en/advanced_use/summary_record.md index b4e423dcfa..50d28d4590 100644 --- a/tutorials/training/source_en/advanced_use/summary_record.md +++ b/tutorials/training/source_en/advanced_use/summary_record.md @@ -41,6 +41,7 @@ The `Callback` mechanism in MindSpore provides a quick and easy way to collect c When you write a training script, you just instantiate the `SummaryCollector` and apply it to either `model.train` or `model.eval`. You can automatically collect some common summary data. `SummaryCollector` detailed usage can reference `API` document `mindspore.train.callback.SummaryCollector`. The sample code is as follows: + ```python import mindspore import mindspore.nn as nn @@ -48,7 +49,7 @@ from mindspore import context from mindspore import Tensor from mindspore.train import Model from mindspore.common.initializer import TruncatedNormal -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.train.callback import SummaryCollector from mindspore.nn.metrics import Accuracy @@ -77,7 +78,7 @@ class AlexNet(nn.Cell): self.conv4 = conv(384, 384, 3, pad_mode="same") self.conv5 = conv(384, 256, 3, pad_mode="same") self.relu = nn.ReLU() - self.max_pool2d = P.MaxPool(ksize=3, strides=2) + self.max_pool2d = ops.MaxPool(ksize=3, strides=2) self.flatten = nn.Flatten() self.fc1 = fc_with_initialize(6*6*256, 4096) self.fc2 = fc_with_initialize(4096, 4096) @@ -126,9 +127,10 @@ model.eval(ds_eval, callbacks=[summary_collector]) ### Method two: Custom collection of network data with summary operators and SummaryCollector -In addition to providing the `SummaryCollector` that automatically collects some summary data, MindSpore provides summary operators that enable custom collection other data on the network, such as the input of each convolutional layer, or the loss value in the loss function, etc. +In addition to providing the `SummaryCollector` that automatically collects some summary data, MindSpore provides summary operators that enable custom collection other data on the network, such as the input of each convolutional layer, or the loss value in the loss function, etc. Summary operators currently supported: + - [ScalarSummary](https://www.mindspore.cn/doc/api_python/en/r1.0/mindspore/mindspore.ops.html#mindspore.ops.ScalarSummary): Record a scalar data. - [TensorSummary](https://www.mindspore.cn/doc/api_python/en/r1.0/mindspore/mindspore.ops.html#mindspore.ops.TensorSummary): Record a tensor data. - [ImageSummary](https://www.mindspore.cn/doc/api_python/en/r1.0/mindspore/mindspore.ops.html#mindspore.ops.ImageSummary): Record a image data. @@ -147,8 +149,7 @@ The sample code is as follows: ```python from mindspore import context, Tensor, nn from mindspore.common import dtype as mstype -from mindspore.ops import operations as P -from mindspore.ops import functional as F +import mindspore.ops as ops from mindspore.nn import Optimizer @@ -156,17 +157,17 @@ class CrossEntropyLoss(nn.Cell): """Loss function definition.""" def __init__(self): super(CrossEntropyLoss, self).__init__() - self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() - self.mean = P.ReduceMean() - self.one_hot = P.OneHot() + self.cross_entropy = ops.SoftmaxCrossEntropyWithLogits() + self.mean = ops.ReduceMean() + self.one_hot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) # Init ScalarSummary - self.scalar_summary = P.ScalarSummary() + self.scalar_summary = ops.ScalarSummary() def construct(self, logits, label): - label = self.one_hot(label, F.shape(logits)[1], self.on_value, self.off_value) + label = self.one_hot(label, ops.shape(logits)[1], self.on_value, self.off_value) loss = self.cross_entropy(logits, label)[0] loss = self.mean(loss, (-1,)) @@ -180,8 +181,8 @@ class MyOptimizer(Optimizer): def __init__(self, learning_rate, params, ......): ...... # Initialize ScalarSummary - self.scalar_summary = P.ScalarSummary() - self.histogram_summary = P.HistogramSummary() + self.scalar_summary = ops.ScalarSummary() + self.histogram_summary = ops.HistogramSummary() self.weight_names = [param.name for param in self.parameters] def construct(self, grads): @@ -193,7 +194,7 @@ class MyOptimizer(Optimizer): self.histogram_summary(self.weight_names[0], self.paramters[0]) # Record gradient self.histogram_summary(self.weight_names[0] + ".gradient", grads[0]) - + ...... @@ -204,9 +205,9 @@ class Net(nn.Cell): ...... # Init ImageSummary - self.image_summary = P.ImageSummary() + self.image_summary = ops.ImageSummary() # Init TensorSummary - self.tensor_summary = P.TensorSummary() + self.tensor_summary = ops.TensorSummary() def construct(self, data): # Record image by Summary operator @@ -252,18 +253,18 @@ It is then recorded into the summary log file through the `SummaryRecord` module The sample code is as follows: -``` +```python from mindspore.train.callback import Callback from mindspore.train.summary import SummaryRecord class ConfusionMatrixCallback(Callback): def __init__(self, summary_dir): self._summary_dir = summary_dir - + def __enter__(self): # init you summary record in here, when the train script run, it will be inited before training self.summary_record = SummaryRecord(summary_dir) - + def __exit__(self, *exc_args): # Note: you must close the summary record, it will release the process pool resource # else your training script will not exit from training. @@ -274,7 +275,7 @@ class ConfusionMatrixCallback(Callback): cb_params = run_context.run_context.original_args() # create a confusion matric image, and record it to summary file - confusion_martrix = create_confusion_matrix(cb_params) + confusion_martrix = create_confusion_matrix(cb_params) self.summary_record.add_value('image', 'confusion_matrix', confusion_matric) self.summary_record.record(cb_params.cur_step) @@ -291,24 +292,28 @@ the `save_graphs` option of `context.set_context` in the training script is set In the saved files, `ms_output_after_hwopt.pb` is the computational graph after operator fusion, which can be viewed on the web page. ## Run MindInsight + After completing the data collection in the tutorial above, you can start MindInsight to visualize the collected data. When start MindInsight, you need to specify the summary log file directory with the `--summary-base-dir` parameter. The specified summary log file directory can be the output directory of a training or the parent directory of the output directory of multiple training. The output directory structure for a training is as follows -``` + +```text └─summary_dir events.out.events.summary.1596869898.hostname_MS events.out.events.summary.1596869898.hostname_lineage ``` Start command: + ```Bash mindinsight start --summary-base-dir ./summary_dir ``` The output directory structure of multiple training is as follows: -``` + +```text └─summary ├─summary_dir1 │ events.out.events.summary.1596869898.hostname_MS @@ -320,6 +325,7 @@ The output directory structure of multiple training is as follows: ``` Start command: + ```Bash mindinsight start --summary-base-dir ./summary ``` @@ -327,6 +333,7 @@ mindinsight start --summary-base-dir ./summary After successful startup, the visual page can be viewed by visiting the `http://127.0.0.1:8080` address through the browser. Stop MindInsight command: + ```Bash mindinsight stop ``` @@ -339,12 +346,13 @@ For more parameter Settings, see the [MindInsight related commands](https://www. 2. Multiple `SummaryRecord` instances can not be used at the same time. (`SummaryRecord` is used in `SummaryCollector`) - If you use two or more instances of `SummaryCollector` in the callback list of 'model.train' or 'model.eval', it is seen as using multiple `SummaryRecord` instances at the same time, and it will cause recoding data fail. + If you use two or more instances of `SummaryCollector` in the callback list of 'model.train' or 'model.eval', it is seen as using multiple `SummaryRecord` instances at the same time, and it will cause recoding data fail. If the custom callback use `SummaryRecord`, it can not be used with `SummaryCollector` at the same time. Right code: - ``` + + ```python ... summary_collector = SummaryCollector('./summary_dir') model.train(2, train_dataset, callbacks=[summary_collector]) @@ -353,7 +361,8 @@ For more parameter Settings, see the [MindInsight related commands](https://www. ``` Wrong code: - ``` + + ```python ... summary_collector1 = SummaryCollector('./summary_dir1') summary_collector2 = SummaryCollector('./summary_dir2') @@ -361,7 +370,8 @@ For more parameter Settings, see the [MindInsight related commands](https://www. ``` Wrong code: - ``` + + ```python ... # Note: the 'ConfusionMatrixCallback' is user-defined, and it uses SummaryRecord to record data. confusion_callback = ConfusionMatrixCallback('./summary_dir1') @@ -371,4 +381,4 @@ For more parameter Settings, see the [MindInsight related commands](https://www. 3. In each Summary log file directory, only one training data should be placed. If a summary log directory contains summary data from multiple training, MindInsight will overlay the summary data from these training when visualizing the data, which may not be consistent with the expected visualizations. -4. Currently, `SummaryCollector` and `SummaryRecord` do not support scenarios with GPU multi-card running. \ No newline at end of file +4. Currently, `SummaryCollector` and `SummaryRecord` do not support scenarios with GPU multi-card running. diff --git a/tutorials/training/source_en/use/load_model_for_inference_and_transfer.md b/tutorials/training/source_en/use/load_model_for_inference_and_transfer.md index b7a75262b2..2b62d0dce2 100644 --- a/tutorials/training/source_en/use/load_model_for_inference_and_transfer.md +++ b/tutorials/training/source_en/use/load_model_for_inference_and_transfer.md @@ -1,4 +1,4 @@ -# Loading a Model for Inference and Transfer Learning +# Loading a Model for Inference and Transfer Learning `Linux` `Ascend` `GPU` `CPU` `Model Loading` `Beginner` `Intermediate` `Expert` @@ -50,6 +50,7 @@ The `eval` method validates the accuracy of the trained model. In the retraining and fine-tuning scenarios for task interruption, you can load network parameters and optimizer parameters to the model. The sample code is as follows: + ```python # return a parameter dict for model param_dict = load_checkpoint("resnet50-2_32.ckpt") @@ -105,11 +106,11 @@ The `load_checkpoint` method returns a parameter dictionary and then the `load_p ### For Transfer Training -When loading a model with `mindspore_hub.load` API, we can add an extra argument to load the feature extraction part of the model only. So we can easily add new layers to perform transfer learning. This feature can be found in the related model page when an extra argument (e.g., include_top) has been integrated into the model construction by the model developer. The value of `include_top` is True or False, indicating whether to keep the top layer in the fully-connected network. +When loading a model with `mindspore_hub.load` API, we can add an extra argument to load the feature extraction part of the model only. So we can easily add new layers to perform transfer learning. This feature can be found in the related model page when an extra argument (e.g., include_top) has been integrated into the model construction by the model developer. The value of `include_top` is True or False, indicating whether to keep the top layer in the fully-connected network. -We use GoogleNet as example to illustrate how to load a model trained on ImageNet dataset and then perform transfer learning (re-training) on specific sub-task dataset. The main steps are listed below: +We use GoogleNet as example to illustrate how to load a model trained on ImageNet dataset and then perform transfer learning (re-training) on specific sub-task dataset. The main steps are listed below: -1. Search the model of interest on [MindSpore Hub Website](https://www.mindspore.cn/resources/hub/) and get the related `url`. +1. Search the model of interest on [MindSpore Hub Website](https://www.mindspore.cn/resources/hub/) and get the related `url`. 2. Load the model from MindSpore Hub using the `url`. Note that the parameter `include_top` is provided by the model developer. @@ -118,7 +119,7 @@ We use GoogleNet as example to illustrate how to load a model trained on ImageNe from mindspore import nn, context, Tensor from mindpsore.train.serialization import save_checkpoint from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits - from mindspore.ops import operations as P + import mindspore.ops as ops from mindspore.nn import Momentum import math @@ -140,9 +141,9 @@ We use GoogleNet as example to illustrate how to load a model trained on ImageNe class ReduceMeanFlatten(nn.Cell): def __init__(self): super(ReduceMeanFlatten, self).__init__() - self.mean = P.ReduceMean(keep_dims=True) + self.mean = ops.ReduceMean(keep_dims=True) self.flatten = nn.Flatten() - + def construct(self, x): x = self.mean(x, (2, 3)) x = self.flatten(x) @@ -197,7 +198,7 @@ We use GoogleNet as example to illustrate how to load a model trained on ImageNe data, label = items data = mindspore.Tensor(data) label = mindspore.Tensor(label) - + loss = train_net(data, label) print(f"epoch: {epoch}/{epoch_size}, loss: {loss}") # Save the ckpt file for each epoch. @@ -218,7 +219,7 @@ We use GoogleNet as example to illustrate how to load a model trained on ImageNe classification_layer = nn.Dense(last_channel, num_classes) classification_layer.set_train(False) softmax = nn.Softmax() - network = nn.SequentialCell([network, reducemean_flatten, + network = nn.SequentialCell([network, reducemean_flatten, classification_layer, softmax]) # Load a pre-trained ckpt file. @@ -237,4 +238,4 @@ We use GoogleNet as example to illustrate how to load a model trained on ImageNe res = model.eval(eval_dataset) print("result:", res, "ckpt=", ckpt_path) - ``` \ No newline at end of file + ``` diff --git a/tutorials/training/source_zh_cn/advanced_use/apply_deep_probability_programming.md b/tutorials/training/source_zh_cn/advanced_use/apply_deep_probability_programming.md index ce3b1af214..a1cdea05f8 100644 --- a/tutorials/training/source_zh_cn/advanced_use/apply_deep_probability_programming.md +++ b/tutorials/training/source_zh_cn/advanced_use/apply_deep_probability_programming.md @@ -1,4 +1,5 @@ # 深度概率编程 + `Ascend` `GPU` `全流程` `初级` `中级` `高级` @@ -29,16 +30,20 @@ ## 概述 + 深度学习模型具有强大的拟合能力,而贝叶斯理论具有很好的可解释能力。MindSpore深度概率编程(MindSpore Deep Probabilistic Programming, MDP)将深度学习和贝叶斯学习结合,通过设置网络权重为分布、引入隐空间分布等,可以对分布进行采样前向传播,由此引入了不确定性,从而增强了模型的鲁棒性和可解释性。MDP不仅包含通用、专业的概率学习编程语言,适用于“专业”用户,而且支持使用开发深度学习模型的逻辑进行概率编程,让初学者轻松上手;此外,还提供深度概率学习的工具箱,拓展贝叶斯应用功能。 本章将详细介绍深度概率编程在MindSpore上的应用。在动手进行实践之前,确保,你已经正确安装了MindSpore 0.7.0-beta及其以上版本。本章的具体内容如下: + 1. 介绍如何使用[bnn_layers模块](https://gitee.com/mindspore/mindspore/tree/r1.0/mindspore/nn/probability/bnn_layers)实现贝叶斯神经网(Bayesian Neural Network, BNN); 2. 介绍如何使用[variational模块](https://gitee.com/mindspore/mindspore/tree/r1.0/mindspore/nn/probability/infer/variational)和[dpn模块](https://gitee.com/mindspore/mindspore/tree/r1.0/mindspore/nn/probability/dpn)实现变分自编码器(Variational AutoEncoder, VAE); 3. 介绍如何使用[transforms模块](https://gitee.com/mindspore/mindspore/tree/r1.0/mindspore/nn/probability/transforms)实现DNN(Deep Neural Network, DNN)一键转BNN; 4. 介绍如何使用[toolbox模块](https://gitee.com/mindspore/mindspore/blob/r1.0/mindspore/nn/probability/toolbox/uncertainty_evaluation.py)实现不确定性估计。 ## 使用贝叶斯神经网络 + 贝叶斯神经网络是由概率模型和神经网络组成的基本模型,它的权重不再是一个确定的值,而是一个分布。本例介绍了如何使用MDP中的bnn_layers模块实现贝叶斯神经网络,并利用贝叶斯神经网络实现一个简单的图片分类功能,整体流程如下: + 1. 处理MNIST数据集; 2. 定义贝叶斯LeNet网络; 3. 定义损失函数和优化器; @@ -47,15 +52,17 @@ > 本例面向GPU或Ascend 910 AI处理器平台,你可以在这里下载完整的样例代码: ### 处理数据集 + 本例子使用的是MNIST数据集,数据处理过程与教程中的[实现一个图片分类应用](https://www.mindspore.cn/tutorial/training/zh-CN/r1.0/quick_start/quick_start.html)一致。 ### 定义贝叶斯神经网络 + 本例使用的是Bayesian LeNet。利用bnn_layers模块构建贝叶斯神经网络的方法与构建普通的神经网络相同。值得注意的是,`bnn_layers`和普通的神经网络层可以互相组合。 -``` +```python import mindspore.nn as nn from mindspore.nn.probability import bnn_layers -import mindspore.ops.operations as P +import mindspore.ops as ops class BNNLeNet5(nn.Cell): """ @@ -81,7 +88,7 @@ class BNNLeNet5(nn.Cell): self.relu = nn.ReLU() self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) self.flatten = nn.Flatten() - self.reshape = P.Reshape() + self.reshape = ops.Reshape() def construct(self, x): x = self.conv1(x) @@ -98,7 +105,9 @@ class BNNLeNet5(nn.Cell): x = self.fc3(x) return x ``` + ### 定义损失函数和优化器 + 接下来需要定义损失函数(Loss)和优化器(Optimizer)。损失函数是深度学习的训练目标,也叫目标函数,可以理解为神经网络的输出(Logits)和标签(Labels)之间的距离,是一个标量数据。 常见的损失函数包括均方误差、L2损失、Hinge损失、交叉熵等等。图像分类应用通常采用交叉熵损失(CrossEntropy)。 @@ -107,7 +116,7 @@ class BNNLeNet5(nn.Cell): MindSpore中定义损失函数和优化器的代码样例如下: -``` +```python # loss function definition criterion = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") @@ -116,9 +125,10 @@ optimizer = AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0 ``` ### 训练网络 + 贝叶斯神经网络的训练过程与DNN基本相同,唯一不同的是将`WithLossCell`替换为适用于BNN的`WithBNNLossCell`。除了`backbone`和`loss_fn`两个参数之外,`WithBNNLossCell`增加了`dnn_factor`和`bnn_factor`两个参数。`dnn_factor`是由损失函数计算得到的网络整体损失的系数,`bnn_factor`是每个贝叶斯层的KL散度的系数,这两个参数是用来平衡网络整体损失和贝叶斯层的KL散度的,防止KL散度的值过大掩盖了网络整体损失。 -``` +```python net_with_loss = bnn_layers.WithBNNLossCell(network, criterion, dnn_factor=60000, bnn_factor=0.000001) train_bnn_network = TrainOneStepCell(net_with_loss, optimizer) train_bnn_network.set_train() @@ -136,9 +146,10 @@ for i in range(epoch): print('Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tvalidation Accuracy: {:.4f}'. format(i, train_loss, train_acc, valid_acc)) ``` + 其中,`train_model`和`validate_model`在MindSpore中的代码样例如下: -``` +```python def train_model(train_net, net, dataset): accs = [] loss_sum = 0 @@ -147,7 +158,7 @@ def train_model(train_net, net, dataset): label = Tensor(data['label'].asnumpy().astype(np.int32)) loss = train_net(train_x, label) output = net(train_x) - log_output = P.LogSoftmax(axis=1)(output) + log_output = ops.LogSoftmax(axis=1)(output) acc = np.mean(log_output.asnumpy().argmax(axis=1) == label.asnumpy()) accs.append(acc) loss_sum += loss.asnumpy() @@ -163,7 +174,7 @@ def validate_model(net, dataset): train_x = Tensor(data['image'].asnumpy().astype(np.float32)) label = Tensor(data['label'].asnumpy().astype(np.int32)) output = net(train_x) - log_output = P.LogSoftmax(axis=1)(output) + log_output = ops.LogSoftmax(axis=1)(output) acc = np.mean(log_output.asnumpy().argmax(axis=1) == label.asnumpy()) accs.append(acc) @@ -172,17 +183,22 @@ def validate_model(net, dataset): ``` ## 使用变分自编码器 + 接下来介绍如何使用MDP中的variational模块和dpn模块实现变分自编码器。变分自编码器是经典的应用了变分推断的深度概率模型,用来学习潜在变量的表示,通过该模型,不仅可以压缩输入数据,还可以生成该类型的新图像。本例的整体流程如下: + 1. 定义变分自编码器; 2. 定义损失函数和优化器; 3. 处理数据; 4. 训练网络; 5. 生成新样本或重构输入样本。 + > 本例面向GPU或Ascend 910 AI处理器平台,你可以在这里下载完整的样例代码: + ### 定义变分自编码器 + 使用dpn模块来构造变分自编码器尤为简单,你只需要自定义编码器和解码器(DNN模型),调用`VAE`接口即可。 -``` +```python class Encoder(nn.Cell): def __init__(self): super(Encoder, self).__init__() @@ -205,7 +221,7 @@ class Decoder(nn.Cell): super(Decoder, self).__init__() self.fc1 = nn.Dense(400, 1024) self.sigmoid = nn.Sigmoid() - self.reshape = P.Reshape() + self.reshape = ops.Reshape() def construct(self, z): z = self.fc1(z) @@ -218,11 +234,13 @@ encoder = Encoder() decoder = Decoder() vae = VAE(encoder, decoder, hidden_size=400, latent_size=20) ``` + ### 定义损失函数和优化器 + 接下来需要定义损失函数(Loss)和优化器(Optimizer)。本例使用的损失函数是`ELBO`,`ELBO`是变分推断专用的损失函数;本例使用的优化器是`Adam`。 MindSpore中定义损失函数和优化器的代码样例如下: -``` +```python # loss function definition net_loss = ELBO(latent_prior='Normal', output_prior='Normal') @@ -231,24 +249,30 @@ optimizer = nn.Adam(params=vae.trainable_params(), learning_rate=0.001) net_with_loss = nn.WithLossCell(vae, net_loss) ``` + ### 处理数据 + 本例使用的是MNIST数据集,数据处理过程与教程中的[实现一个图片分类应用](https://www.mindspore.cn/tutorial/training/zh-CN/r1.0/quick_start/quick_start.html)一致。 ### 训练网络 + 使用variational模块中的`SVI`接口对VAE网络进行训练。 -``` +```python from mindspore.nn.probability.infer import SVI vi = SVI(net_with_loss=net_with_loss, optimizer=optimizer) vae = vi.run(train_dataset=ds_train, epochs=10) trained_loss = vi.get_train_loss() ``` + 通过`vi.run`可以得到训练好的网络,使用`vi.get_train_loss`可以得到训练之后的损失。 + ### 生成新样本或重构输入样本 + 利用训练好的VAE网络,我们可以生成新的样本或重构输入样本。 -``` +```python IMAGE_SHAPE = (-1, 1, 32, 32) generated_sample = vae.generate_sample(64, IMAGE_SHAPE) for sample in ds_train.create_dict_iterator(): @@ -257,19 +281,24 @@ for sample in ds_train.create_dict_iterator(): ``` ## DNN一键转换成BNN + 对于不熟悉贝叶斯模型的DNN研究人员,MDP提供了高级API`TransformToBNN`,支持DNN模型一键转换成BNN模型。目前在LeNet,ResNet,MobileNet,VGG等模型上验证了API的通用性。本例将会介绍如何使用transforms模块中的`TransformToBNN`API实现DNN一键转换成BNN,整体流程如下: + 1. 定义DNN模型; 2. 定义损失函数和优化器; 3. 实现功能一:转换整个模型; 4. 实现功能二:转换指定类型的层。 + > 本例面向GPU或Ascend 910 AI处理器平台,你可以在这里下载完整的样例代码: + ### 定义DNN模型 + 本例使用的DNN模型是LeNet。 -``` +```python from mindspore.common.initializer import TruncatedNormal import mindspore.nn as nn -import mindspore.ops.operations as P +import mindspore.ops as ops def conv(in_channels, out_channels, kernel_size, stride=1, padding=0): """weight initial for conv layer""" @@ -315,7 +344,7 @@ class LeNet5(nn.Cell): self.relu = nn.ReLU() self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) self.flatten = nn.Flatten() - self.reshape = P.Reshape() + self.reshape = ops.Reshape() def construct(self, x): x = self.conv1(x) @@ -332,9 +361,10 @@ class LeNet5(nn.Cell): x = self.fc3(x) return x ``` + LeNet的网络结构如下: -``` +```text LeNet5 (conv1) Conv2dinput_channels=1, output_channels=6, kernel_size=(5, 5),stride=(1, 1), pad_mode=valid, padding=0, dilation=(1, 1), group=1, has_bias=False (conv2) Conv2dinput_channels=6, output_channels=16, kernel_size=(5, 5),stride=(1, 1), pad_mode=valid, padding=0, dilation=(1, 1), group=1, has_bias=False @@ -347,9 +377,10 @@ LeNet5 ``` ### 定义损失函数和优化器 + 接下来需要定义损失函数(Loss)和优化器(Optimizer)。本例使用交叉熵损失作为损失函数,`Adam`作为优化器。 -``` +```python network = LeNet5() # loss function definition @@ -361,10 +392,12 @@ optimizer = AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0 net_with_loss = WithLossCell(network, criterion) train_network = TrainOneStepCell(net_with_loss, optimizer) ``` + ### 实例化TransformToBNN + `TransformToBNN`的`__init__`函数定义如下: -``` +```python class TransformToBNN: def __init__(self, trainable_dnn, dnn_factor=1, bnn_factor=1): net_with_loss = trainable_dnn.network @@ -375,18 +408,21 @@ class TransformToBNN: self.bnn_factor = bnn_factor self.bnn_loss_file = None ``` + 参数`trainable_bnn`是经过`TrainOneStepCell`包装的可训练DNN模型,`dnn_factor`和`bnn_factor`分别为由损失函数计算得到的网络整体损失的系数和每个贝叶斯层的KL散度的系数。 MindSpore中实例化`TransformToBNN`的代码如下: -``` +```python from mindspore.nn.probability import transforms bnn_transformer = transforms.TransformToBNN(train_network, 60000, 0.000001) ``` + ### 实现功能一:转换整个模型 + `transform_to_bnn_model`方法可以将整个DNN模型转换为BNN模型。其定义如下: -``` +```python def transform_to_bnn_model(self, get_dense_args=lambda dp: {"in_channels": dp.in_channels, "has_bias": dp.has_bias, "out_channels": dp.out_channels, "activation": dp.activation}, @@ -413,90 +449,94 @@ bnn_transformer = transforms.TransformToBNN(train_network, 60000, 0.000001) Cell, a trainable BNN model wrapped by TrainOneStepCell. """ ``` + 参数`get_dense_args`指定从DNN模型的全连接层中获取哪些参数,`get_conv_args`指定从DNN模型的卷积层中获取哪些参数,参数`add_dense_args`和`add_conv_args`分别指定了要为BNN层指定哪些新的参数值。需要注意的是,`add_dense_args`中的参数不能与`get_dense_args`重复,`add_conv_args`和`get_conv_args`也是如此。 在MindSpore中将整个DNN模型转换成BNN模型的代码如下: -``` +```python train_bnn_network = bnn_transformer.transform_to_bnn_model() ``` + 整个模型转换后的结构如下: -``` +```text LeNet5 (conv1) ConvReparam in_channels=1, out_channels=6, kernel_size=(5, 5), stride=(1, 1), pad_mode=valid, padding=0, dilation=(1, 1), group=1, weight_mean=Parameter (name=conv1.weight_posterior.mean), weight_std=Parameter (name=conv1.weight_posterior.untransformed_std), has_bias=False (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (conv2) ConvReparam in_channels=6, out_channels=16, kernel_size=(5, 5), stride=(1, 1), pad_mode=valid, padding=0, dilation=(1, 1), group=1, weight_mean=Parameter (name=conv2.weight_posterior.mean), weight_std=Parameter (name=conv2.weight_posterior.untransformed_std), has_bias=False (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (fc1) DenseReparam in_channels=400, out_channels=120, weight_mean=Parameter (name=fc1.weight_posterior.mean), weight_std=Parameter (name=fc1.weight_posterior.untransformed_std), has_bias=True, bias_mean=Parameter (name=fc1.bias_posterior.mean), bias_std=Parameter (name=fc1.bias_posterior.untransformed_std) (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - + (bias_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (bias_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (fc2) DenseReparam in_channels=120, out_channels=84, weight_mean=Parameter (name=fc2.weight_posterior.mean), weight_std=Parameter (name=fc2.weight_posterior.untransformed_std), has_bias=True, bias_mean=Parameter (name=fc2.bias_posterior.mean), bias_std=Parameter (name=fc2.bias_posterior.untransformed_std) (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - + (bias_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (bias_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (fc3) DenseReparam in_channels=84, out_channels=10, weight_mean=Parameter (name=fc3.weight_posterior.mean), weight_std=Parameter (name=fc3.weight_posterior.untransformed_std), has_bias=True, bias_mean=Parameter (name=fc3.bias_posterior.mean), bias_std=Parameter (name=fc3.bias_posterior.untransformed_std) (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - + (bias_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (bias_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (relu) ReLU (max_pool2d) MaxPool2dkernel_size=2, stride=2, pad_mode=VALID (flatten) Flatten ``` + 可以看到,整个LeNet网络中的卷积层和全连接层都转变成了相应的贝叶斯层。 ### 实现功能二:转换指定类型的层 + `transform_to_bnn_layer`方法可以将DNN模型中指定类型的层(nn.Dense或者nn.Conv2d)转换为对应的贝叶斯层。其定义如下: -``` +```python def transform_to_bnn_layer(self, dnn_layer, bnn_layer, get_args=None, add_args=None): r""" Transform a specific type of layers in DNN model to corresponding BNN layer. @@ -513,16 +553,18 @@ LeNet5 Cell, a trainable model wrapped by TrainOneStepCell, whose sprcific type of layer is transformed to the corresponding bayesian layer. """ ``` + 参数`dnn_layer`指定将哪个类型的DNN层转换成BNN层,`bnn_layer`指定DNN层将转换成哪个类型的BNN层,`get_args`和`add_args`分别指定从DNN层中获取哪些参数和要为BNN层的哪些参数重新赋值。 在MindSpore中将DNN模型中的Dense层转换成相应贝叶斯层`DenseReparam`的代码如下: -``` +```python train_bnn_network = bnn_transformer.transform_to_bnn_layer(nn.Dense, bnn_layers.DenseReparam) ``` + 转换后网络的结构如下: -``` +```text LeNet5 (conv1) Conv2dinput_channels=1, output_channels=6, kernel_size=(5, 5),stride=(1, 1), pad_mode=valid, padding=0, dilation=(1, 1), group=1, has_bias=False (conv2) Conv2dinput_channels=6, output_channels=16, kernel_size=(5, 5),stride=(1, 1), pad_mode=valid, padding=0, dilation=(1, 1), group=1, has_bias=False @@ -530,55 +572,58 @@ LeNet5 in_channels=400, out_channels=120, weight_mean=Parameter (name=fc1.weight_posterior.mean), weight_std=Parameter (name=fc1.weight_posterior.untransformed_std), has_bias=True, bias_mean=Parameter (name=fc1.bias_posterior.mean), bias_std=Parameter (name=fc1.bias_posterior.untransformed_std) (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - + (bias_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (bias_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (fc2) DenseReparam in_channels=120, out_channels=84, weight_mean=Parameter (name=fc2.weight_posterior.mean), weight_std=Parameter (name=fc2.weight_posterior.untransformed_std), has_bias=True, bias_mean=Parameter (name=fc2.bias_posterior.mean), bias_std=Parameter (name=fc2.bias_posterior.untransformed_std) (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - + (bias_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (bias_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (fc3) DenseReparam in_channels=84, out_channels=10, weight_mean=Parameter (name=fc3.weight_posterior.mean), weight_std=Parameter (name=fc3.weight_posterior.untransformed_std), has_bias=True, bias_mean=Parameter (name=fc3.bias_posterior.mean), bias_std=Parameter (name=fc3.bias_posterior.untransformed_std) (weight_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (weight_posterior) NormalPosterior (normal) Normalbatch_shape = None - + (bias_prior) NormalPrior (normal) Normalmean = 0.0, standard deviation = 0.1 - + (bias_posterior) NormalPosterior (normal) Normalbatch_shape = None - - + + (relu) ReLU (max_pool2d) MaxPool2dkernel_size=2, stride=2, pad_mode=VALID (flatten) Flatten ``` + 可以看到,LeNet网络中的卷积层保持不变,全连接层变成了对应的贝叶斯层`DenseReparam`。 ## 使用不确定性估计工具箱 + 贝叶斯神经网络的优势之一就是可以获取不确定性,MDP在上层提供了不确定性估计的工具箱,用户可以很方便地使用该工具箱计算不确定性。不确定性意味着深度学习模型对预测结果的不确定程度。目前,大多数深度学习算法只能给出预测结果,而不能判断预测结果的可靠性。不确定性主要有两种类型:偶然不确定性和认知不确定性。 + - 偶然不确定性(Aleatoric Uncertainty):描述数据中的内在噪声,即无法避免的误差,这个现象不能通过增加采样数据来削弱。 - 认知不确定性(Epistemic Uncertainty):模型自身对输入数据的估计可能因为训练不佳、训练数据不够等原因而不准确,可以通过增加训练数据等方式来缓解。 @@ -587,7 +632,7 @@ LeNet5 以分类任务为例,本例中使用的模型是LeNet,数据集为MNIST,数据处理过程与教程中的[实现一个图片分类应用](https://www.mindspore.cn/tutorial/training/zh-CN/r1.0/quick_start/quick_start.html)一致。为了评估测试示例的不确定性,使用工具箱的方法如下: -``` +```python from mindspore.nn.probability.toolbox.uncertainty_evaluation import UncertaintyEvaluation from mindspore.train.serialization import load_checkpoint, load_param_into_net @@ -610,4 +655,3 @@ for eval_data in ds_eval.create_dict_iterator(): epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data) aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data) ``` - diff --git a/tutorials/training/source_zh_cn/advanced_use/apply_gradient_accumulation.md b/tutorials/training/source_zh_cn/advanced_use/apply_gradient_accumulation.md index cec36bbd37..9f81867731 100644 --- a/tutorials/training/source_zh_cn/advanced_use/apply_gradient_accumulation.md +++ b/tutorials/training/source_zh_cn/advanced_use/apply_gradient_accumulation.md @@ -36,6 +36,7 @@ 以MNIST作为示范数据集,自定义简单模型实现梯度累积。 ### 导入需要的库文件 + 下列是我们所需要的公共模块及MindSpore的模块及库文件。 ```python @@ -47,9 +48,7 @@ import mindspore.nn as nn from mindspore import ParameterTuple from mindspore import context from mindspore.nn import Cell -from mindspore.ops import composite as C -from mindspore.ops import functional as F -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.train.dataset_helper import DatasetHelper from mindspore.train.serialization import save_checkpoint from model_zoo.official.cv.lenet.src.dataset import create_dataset @@ -65,20 +64,22 @@ from model_zoo.official.cv.lenet.src.lenet import LeNet5 这里以LeNet网络为例进行介绍,当然也可以使用其它的网络,如ResNet-50、BERT等, 此部分代码由`model_zoo`中`lenet`目录下的[lenet.py]()导入。 ### 定义训练模型 + 将训练流程拆分为正向反向训练、参数更新和累积梯度清理三个部分: + - `TrainForwardBackward`计算loss和梯度,利用grad_sum实现梯度累加。 - `TrainOptim`实现参数更新。 - `TrainClear`实现对梯度累加变量grad_sum清零。 ```python -_sum_op = C.MultitypeFuncGraph("grad_sum_op") -_clear_op = C.MultitypeFuncGraph("clear_op") +_sum_op = ops.MultitypeFuncGraph("grad_sum_op") +_clear_op = ops.MultitypeFuncGraph("clear_op") @_sum_op.register("Tensor", "Tensor") def _cumulative_gard(grad_sum, grad): """Apply gard sum to cumulative gradient.""" - add = P.AssignAdd() + add = ops.AssignAdd() return add(grad_sum, grad) @@ -86,7 +87,7 @@ def _cumulative_gard(grad_sum, grad): def _clear_grad_sum(grad_sum, zero): """Apply zero to clear grad_sum.""" success = True - success = F.depend(success, F.assign(grad_sum, zero)) + success = ops.depend(success, ops.assign(grad_sum, zero)) return success @@ -99,16 +100,16 @@ class TrainForwardBackward(Cell): self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad_sum = grad_sum - self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens - self.hyper_map = C.HyperMap() + self.hyper_map = ops.HyperMap() def construct(self, *inputs): weights = self.weights loss = self.network(*inputs) - sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) + sens = ops.Fill()(ops.DType()(loss), ops.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(*inputs, sens) - return F.depend(loss, self.hyper_map(F.partial(_sum_op), self.grad_sum, grads)) + return ops.depend(loss, self.hyper_map(ops.partial(_sum_op), self.grad_sum, grads)) class TrainOptim(Cell): @@ -126,14 +127,15 @@ class TrainClear(Cell): super(TrainClear, self).__init__(auto_prefix=False) self.grad_sum = grad_sum self.zeros = zeros - self.hyper_map = C.HyperMap() + self.hyper_map = ops.HyperMap() def construct(self): - success = self.hyper_map(F.partial(_clear_op), self.grad_sum, self.zeros) + success = self.hyper_map(ops.partial(_clear_op), self.grad_sum, self.zeros) return success ``` ### 定义训练过程 + 每个Mini-batch通过正反向训练计算loss和梯度,通过mini_steps控制每次更新参数前的累加次数。达到累加次数后进行参数更新和 累加梯度变量清零。 @@ -202,6 +204,7 @@ class GradientAccumulation: ``` ### 训练并保存模型 + 调用网络、优化器及损失函数,然后自定义`GradientAccumulation`的`train_process`接口,进行模型训练。 ```python @@ -226,13 +229,15 @@ if __name__ == "__main__": ``` ## 实验结果 + 在经历了10轮epoch之后,在测试集上的精度约为96.31%。 -**执行训练** +### 执行训练 + 1. 运行训练代码,查看运行结果。 ```shell - $ python train.py --data_path=./MNIST_Data + python train.py --data_path=./MNIST_Data ``` 输出如下,可以看到loss值随着训练逐步降低: @@ -247,17 +252,17 @@ if __name__ == "__main__": epoch: 10 step: 448 loss is 0.06443884 epoch: 10 step: 449 loss is 0.0067842817 ``` - + 2. 查看保存的CheckPoint文件。 训练过程中保存了CheckPoint文件`gradient_accumulation.ckpt`,即模型文件。 -**验证模型** +### 验证模型 通过`model_zoo`中`lenet`目录下的[eval.py](),使用保存的CheckPoint文件,加载验证数据集,进行验证。 ```shell -$ python eval.py --data_path=./MNIST_Data --ckpt_path=./gradient_accumulation.ckpt --device_target=GPU +python eval.py --data_path=./MNIST_Data --ckpt_path=./gradient_accumulation.ckpt --device_target=GPU ``` 输出如下,可以看到使用验证的数据集,正确率在96.31%左右,与batch_size为32的验证结果一致。 diff --git a/tutorials/training/source_zh_cn/advanced_use/apply_host_device_training.md b/tutorials/training/source_zh_cn/advanced_use/apply_host_device_training.md index 82b9218891..799b4ff10e 100644 --- a/tutorials/training/source_zh_cn/advanced_use/apply_host_device_training.md +++ b/tutorials/training/source_zh_cn/advanced_use/apply_host_device_training.md @@ -47,16 +47,20 @@ ## 配置混合执行 1. 配置混合训练标识。在`src/config.py`文件中,设置`argparse_init`函数中的`host_device_mix`默认值为`1`,设置`WideDeepConfig`类的`__init__`函数中`self.host_device_mix`为`1`: + ```python self.host_device_mix = 1 ``` 2. 检查必要算子和优化器的执行位置。在`src/wide_and_deep.py`的`WideDeepModel`类中,检查`EmbeddingLookup`为主机端执行: + ```python self.deep_embeddinglookup = nn.EmbeddingLookup() self.wide_embeddinglookup = nn.EmbeddingLookup() ``` + 在`src/wide_and_deep.py`文件的`class TrainStepWrap(nn.Cell)`中,检查两个优化器主机端执行的属性。 + ```python self.optimizer_w.sparse_opt.add_prim_attr("primitive_target", "CPU") self.optimizer_d.sparse_opt.add_prim_attr("primitive_target", "CPU") @@ -69,7 +73,7 @@ 运行日志保存在`device_0`目录下,其中`loss.log`保存一个epoch内中多个loss值,其值类似如下: -``` +```text epoch: 1 step: 1, wide_loss is 0.6873926, deep_loss is 0.8878349 epoch: 1 step: 2, wide_loss is 0.6442529, deep_loss is 0.8342661 epoch: 1 step: 3, wide_loss is 0.6227323, deep_loss is 0.80273706 @@ -84,7 +88,7 @@ epoch: 1 step: 10, wide_loss is 0.566089, deep_loss is 0.6884129 `test_deep0.log`保存pytest进程输出的详细的运行时日志(需要将日志级别设置为INFO,且在MindSpore编译时加上-p on选项),搜索关键字`EmbeddingLookup`,可找到如下信息: -``` +```text [INFO] DEVICE(109904,python3.7):2020-06-27-12:42:34.928.275 [mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc:324] Run] cpu kernel: Default/network-VirtualDatasetCellTriple/_backbone-NetWithLossClass/network-WideDeepModel/EmbeddingLookup-op297 costs 3066 us. [INFO] DEVICE(109904,python3.7):2020-06-27-12:42:34.943.896 [mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc:324] Run] cpu kernel: Default/network-VirtualDatasetCellTriple/_backbone-NetWithLossClass/network-WideDeepModel/EmbeddingLookup-op298 costs 15521 us. ``` @@ -92,7 +96,7 @@ epoch: 1 step: 10, wide_loss is 0.566089, deep_loss is 0.6884129 表示`EmbeddingLookup`在主机端的执行时间。 继续在`test_deep0.log`搜索关键字`FusedSparseFtrl`和`FusedSparseLazyAdam`,可找到如下信息: -``` +```text [INFO] DEVICE(109904,python3.7):2020-06-27-12:42:35.422.963 [mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc:324] Run] cpu kernel: Default/optimizer_w-FTRL/FusedSparseFtrl-op299 costs 54492 us. [INFO] DEVICE(109904,python3.7):2020-06-27-12:42:35.565.953 [mindspore/ccsrc/device/cpu/cpu_kernel_runtime.cc:324] Run] cpu kernel: Default/optimizer_d-LazyAdam/FusedSparseLazyAdam-op300 costs 142865 us. ``` diff --git a/tutorials/training/source_zh_cn/advanced_use/apply_parameter_server_training.md b/tutorials/training/source_zh_cn/advanced_use/apply_parameter_server_training.md index 9e932e4e30..269e464170 100644 --- a/tutorials/training/source_zh_cn/advanced_use/apply_parameter_server_training.md +++ b/tutorials/training/source_zh_cn/advanced_use/apply_parameter_server_training.md @@ -17,6 +17,7 @@ ## 概述 + Parameter Server(参数服务器)是分布式训练中一种广泛使用的架构,相较于同步的AllReduce训练方法,Parameter Server具有更好的灵活性、可扩展性以及节点容灾的能力。具体来讲,参数服务器既支持同步SGD,也支持异步SGD的训练算法;在扩展性上,将模型的计算与模型的更新分别部署在Worker和Server两类进程中,使得Worker和Server的资源可以独立地横向扩缩;另外,在大规模数据中心的环境下,计算设备、网络以及存储经常会出现各种故障而导致部分节点异常,而在参数服务器的架构下,能够较为容易地处理此类的故障而不会对训练中的任务产生影响。 在MindSpore的参数服务器实现中,采用了开源的[ps-lite](https://github.com/dmlc/ps-lite)作为基础架构,基于其提供的远程通信能力以及抽象的Push/Pull原语,实现了同步SGD的分布式训练算法,另外结合Ascend和GPU中的高性能集合通信库(HCCL和NCCL),MindSpore还提供了Parameter Server和AllReduce的混合训练模式,支持将部分权重通过参数服务器进行存储和更新,其余权重仍然通过AllReduce算法进行训练。 @@ -30,6 +31,7 @@ Parameter Server(参数服务器)是分布式训练中一种广泛使用的架 - Scheduler:用于建立Server和Worker的通信关系。 ## 准备工作 + 以LeNet在Ascend 910上使用Parameter Server训练为例: ### 训练脚本准备 @@ -50,6 +52,7 @@ Parameter Server(参数服务器)是分布式训练中一种广泛使用的架 - 通过`mindspore.common.Parameter.set_param_ps()`对此权重进行设置。 3. 在[原训练脚本](https://gitee.com/mindspore/mindspore/blob/r1.0/model_zoo/official/cv/lenet/train.py)基础上,设置LeNet模型所有权重通过Parameter Server训练: + ```python context.set_ps_context(enable_ps=True) network = LeNet5(cfg.num_classes) @@ -60,7 +63,7 @@ Parameter Server(参数服务器)是分布式训练中一种广泛使用的架 MindSpore通过读取环境变量,控制Parameter Server训练,环境变量包括以下选项(其中`MS_SCHED_HOST`及`MS_SCHED_PORT`所有脚本需保持一致): -``` +```bash export PS_VERBOSE=1 # Print ps-lite log export MS_SERVER_NUM=1 # Server number export MS_WORKER_NUM=1 # Worker number @@ -89,6 +92,7 @@ export MS_ROLE=MS_SCHED # The role of this process: MS_SCHED repre ``` `Server.sh`: + ```bash #!/bin/bash export PS_VERBOSE=1 @@ -101,6 +105,7 @@ export MS_ROLE=MS_SCHED # The role of this process: MS_SCHED repre ``` `Worker.sh`: + ```bash #!/bin/bash export PS_VERBOSE=1 @@ -113,26 +118,31 @@ export MS_ROLE=MS_SCHED # The role of this process: MS_SCHED repre ``` 最后分别执行: + ```bash sh Scheduler.sh > scheduler.log 2>&1 & sh Server.sh > server.log 2>&1 & sh Worker.sh > worker.log 2>&1 & ``` + 启动训练 2. 查看结果 查看`scheduler.log`中Server与Worker通信日志: - ``` + + ```text Bind to role=scheduler, id=1, ip=XXX.XXX.XXX.XXX, port=XXXX Assign rank=8 to node role=server, ip=XXX.XXX.XXX.XXX, port=XXXX Assign rank=9 to node role=worker, ip=XXX.XXX.XXX.XXX, port=XXXX the scheduler is connected to 1 workers and 1 servers ``` + 说明Server、Worker与Scheduler通信建立成功。 查看`worker.log`中训练结果: - ``` + + ```text epoch: 1 step: 1, loss is 2.302287 epoch: 1 step: 2, loss is 2.304071 epoch: 1 step: 3, loss is 2.308778 diff --git a/tutorials/training/source_zh_cn/advanced_use/apply_quantization_aware_training.md b/tutorials/training/source_zh_cn/advanced_use/apply_quantization_aware_training.md index fe7d840bd9..42cad6136b 100644 --- a/tutorials/training/source_zh_cn/advanced_use/apply_quantization_aware_training.md +++ b/tutorials/training/source_zh_cn/advanced_use/apply_quantization_aware_training.md @@ -39,6 +39,7 @@ ### 伪量化节点 伪量化节点,是指感知量化训练中插入的节点,用以寻找网络数据分布,并反馈损失精度,具体作用如下: + - 找到网络数据的分布,即找到待量化参数的最大值和最小值; - 模拟量化为低比特时的精度损失,把该损失作用到网络模型中,传递给损失函数,让优化器在训练过程中对该损失值进行优化。 @@ -59,12 +60,12 @@ MindSpore的感知量化训练是在训练基础上,使用低精度数据替 感知量化训练模型与一般训练步骤一致,在定义网络和最后生成模型阶段后,需要进行额外的操作,完整流程如下: -1. 数据处理加载数据集。 -2. 定义原始非量化网络。 -3. 定义融合网络。在完成定义原始非量化网络后,替换指定的算子,完成融合网络的定义。 -4. 定义优化器和损失函数。 -5. 转化量化网络。基于融合网络,使用转化接口在融合网络中插入伪量化节点,生成量化网络。 -6. 进行量化训练。基于量化网络训练,生成量化模型。 +1. 数据处理加载数据集。 +2. 定义原始非量化网络。 +3. 定义融合网络。在完成定义原始非量化网络后,替换指定的算子,完成融合网络的定义。 +4. 定义优化器和损失函数。 +5. 转化量化网络。基于融合网络,使用转化接口在融合网络中插入伪量化节点,生成量化网络。 +6. 进行量化训练。基于量化网络训练,生成量化模型。 在上面流程中,第3、5、6步是感知量化训练区别普通训练需要额外进行的步骤。 @@ -99,7 +100,7 @@ class LeNet5(nn.Cell): Tensor, output tensor Examples: >>> LeNet(num_class=10, num_channel=1) - + """ def __init__(self, num_class=10, num_channel=1): super(LeNet5, self).__init__() @@ -129,10 +130,10 @@ class LeNet5(nn.Cell): def __init__(self, num_class=10): super(LeNet5, self).__init__() self.num_class = num_class - + self.conv1 = nn.Conv2dBnAct(1, 6, kernel_size=5, activation='relu') self.conv2 = nn.Conv2dBnAct(6, 16, kernel_size=5, activation='relu') - + self.fc1 = nn.DenseBnAct(16 * 5 * 5, 120, activation='relu') self.fc2 = nn.DenseBnAct(120, 84, activation='relu') self.fc3 = nn.DenseBnAct(84, self.num_class) @@ -164,13 +165,13 @@ net = quant.convert_quant_network(network, quant_delay=900, bn_fold=False, per_c 上面介绍了从零开始进行感知量化训练。更常见情况是已有一个模型文件,希望生成量化模型,这时已有正常网络模型训练得到的模型文件及训练脚本,进行感知量化训练。这里使用checkpoint文件重新训练的功能,详细步骤为: - 1. 数据处理加载数据集。 - 2. 定义原始非量化网络。 - 3. 训练原始网络生成非量化模型。 - 4. 定义融合网络。 - 5. 定义优化器和损失函数。 - 6. 基于融合网络转化生成量化网络。 - 7. 加载模型文件重训。加载已有非量化模型文件,基于量化网络重新训练生成量化模型。详细模型重载训练,请参见。 + 1. 数据处理加载数据集。 + 2. 定义原始非量化网络。 + 3. 训练原始网络生成非量化模型。 + 4. 定义融合网络。 + 5. 定义优化器和损失函数。 + 6. 基于融合网络转化生成量化网络。 + 7. 加载模型文件重训。加载已有非量化模型文件,基于量化网络重新训练生成量化模型。详细模型重载训练,请参见。 ### 进行推理 @@ -180,11 +181,11 @@ net = quant.convert_quant_network(network, quant_delay=900, bn_fold=False, per_c - 使用感知量化训练后得到的checkpoint文件进行推理: - 1. 加载量化模型。 - 2. 推理。 + 1. 加载量化模型。 + 2. 推理。 - 转化为ONNX等通用格式进行推理(暂不支持,开发完善后补充)。 - + ## 参考文献 [1] Jacob B, Kligys S, Chen B, et al. Quantization and training of neural networks for efficient integer-arithmetic-only inference[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018: 2704-2713. diff --git a/tutorials/training/source_zh_cn/advanced_use/convert_dataset.md b/tutorials/training/source_zh_cn/advanced_use/convert_dataset.md index 5c1c31b7f6..8619747cb9 100644 --- a/tutorials/training/source_zh_cn/advanced_use/convert_dataset.md +++ b/tutorials/training/source_zh_cn/advanced_use/convert_dataset.md @@ -16,9 +16,10 @@ ## 概述 -用户可以将非标准的数据集和常用的数据集转换为MindSpore数据格式,即MindRecord,从而方便地加载到MindSpore中进行训练。同时,MindSpore在部分场景做了性能优化,使用MindSpore数据格式可以获得更好的性能。 +用户可以将非标准的数据集和常用的数据集转换为MindSpore数据格式,即MindRecord,从而方便地加载到MindSpore中进行训练。同时,MindSpore在部分场景做了性能优化,使用MindSpore数据格式可以获得更好的性能。 MindSpore数据格式具备的特征如下: + 1. 实现多变的用户数据统一存储、访问,训练数据读取更简便; 2. 数据聚合存储,高效读取,且方便管理、移动; 3. 高效数据编解码操作,对用户透明、无感知; @@ -96,7 +97,7 @@ MindSpore数据格式的目标是归一化用户的数据集,并进一步通 5. 创建`FileWriter`对象,传入文件名及分片数量,然后添加Schema文件及索引,调用`write_raw_data`接口写入数据,最后调用`commit`接口生成本地数据文件。 - ```python + ```python writer = FileWriter(file_name="test.mindrecord", shard_num=4) writer.add_schema(cv_schema_json, "test_schema") writer.add_index(indexes) @@ -141,7 +142,7 @@ MindSpore数据格式的目标是归一化用户的数据集,并进一步通 输出结果如下: - ``` + ```text sample: {'data': array([175, 175, 85, 60, 184, 124, 54, 189, 125, 193, 153, 91, 234, 106, 43, 143, 132, 211, 204, 160, 44, 105, 187, 185, 45, 205, 122, 236, 112, 123, 84, 177, 219], dtype=uint8), 'file_name': array(b'3.jpg', dtype='|S5'), 'label': array(99, dtype=int32)} diff --git a/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md b/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md index 9e9fd4c673..20e69a5814 100644 --- a/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md +++ b/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md @@ -41,7 +41,7 @@ MindSpore提供`Callback`能力,支持用户在训练/推理的特定阶段, 使用方法:在`model.train`方法中传入`Callback`对象,它可以是一个`Callback`列表,例: ```python -ckpt_cb = ModelCheckpoint() +ckpt_cb = ModelCheckpoint() loss_cb = LossMonitor() summary_cb = SummaryCollector(summary_dir='./summary_dir') model.train(epoch, dataset, callbacks=[ckpt_cb, loss_cb, summary_cb]) @@ -60,7 +60,7 @@ model.train(epoch, dataset, callbacks=[ckpt_cb, loss_cb, summary_cb]) ```python class Callback(): - """Callback base class""" + """Callback base class""" def begin(self, run_context): """Called once before the network executing.""" pass @@ -70,11 +70,11 @@ class Callback(): pass def epoch_end(self, run_context): - """Called after each epoch finished.""" + """Called after each epoch finished.""" pass def step_begin(self, run_context): - """Called before each epoch beginning.""" + """Called before each epoch beginning.""" pass def step_end(self, run_context): @@ -131,7 +131,7 @@ class Callback(): 输出: - ``` + ```text epoch: 20 step: 32 loss: 2.298344373703003 ``` @@ -172,7 +172,6 @@ class Callback(): 具体实现逻辑为:定义一个`Callback`对象,初始化对象接收`model`对象和`ds_eval`(验证数据集)。在`step_end`阶段验证模型的精度,当精度为当前最高时,手动触发保存checkpoint方法,保存当前的参数。 - ## MindSpore metrics功能介绍 当训练结束后,可以使用metrics评估训练结果的好坏。 @@ -224,17 +223,21 @@ print('Accuracy is ', accuracy) ``` 输出: -``` + +```text Accuracy is 0.6667 ``` + ## Print算子功能介绍 + MindSpore的自研`Print`算子可以将用户输入的Tensor或字符串信息打印出来,支持多字符串输入,多Tensor输入和字符串与Tensor的混合输入,输入参数以逗号隔开。 `Print`算子使用方法与其他算子相同,在网络中的`__init__`声明算子并在`construct`进行调用,具体使用实例及输出结果如下: + ```python import numpy as np from mindspore import Tensor -from mindspore.ops import operations as P +import mindspore.ops as ops import mindspore.nn as nn import mindspore.context as context @@ -243,7 +246,7 @@ context.set_context(mode=context.GRAPH_MODE) class PrintDemo(nn.Cell): def __init__(self): super(PrintDemo, self).__init__() - self.print = P.Print() + self.print = ops.Print() def construct(self, x, y): self.print('print Tensor x and Tensor y:', x, y) @@ -254,8 +257,10 @@ y = Tensor(np.ones([2, 2]).astype(np.int32)) net = PrintDemo() output = net(x, y) ``` + 输出: -``` + +```text print Tensor x and Tensor y: Tensor shape:[[const vector][2, 1]]Int32 val:[[1] @@ -315,7 +320,7 @@ val:[[1 1] 3. 执行用例Dump数据。 可以在训练脚本中设置`context.set_context(reserve_class_name_in_scope=False)`,避免Dump文件名称过长导致Dump数据文件生成失败。 4. 解析Dump数据。 - + 通过`numpy.fromfile`读取Dump数据文件即可解析。 ### 异步Dump功能介绍 @@ -372,6 +377,7 @@ val:[[1 1] ``` ## 日志相关的环境变量和配置 + MindSpore采用glog来输出日志,常用的几个环境变量如下: - `GLOG_v` @@ -379,13 +385,13 @@ MindSpore采用glog来输出日志,常用的几个环境变量如下: 该环境变量控制日志的级别。 该环境变量默认值为2,即WARNING级别,对应关系如下:0-DEBUG、1-INFO、2-WARNING、3-ERROR。 -- `GLOG_logtostderr` +- `GLOG_logtostderr` 该环境变量控制日志的输出方式。 该环境变量的值设置为1时,日志输出到屏幕;值设置为0时,日志输出到文件。默认值为1。 -- `GLOG_log_dir` - +- `GLOG_log_dir` + 该环境变量指定日志输出的路径。 若`GLOG_logtostderr`的值为0,则必须设置此变量。 若指定了`GLOG_log_dir`且`GLOG_logtostderr`的值为1时,则日志输出到屏幕,不输出到文件。 @@ -428,6 +434,3 @@ MindSpore子模块按照目录划分如下: | mindspore/core/ | CORE | > glog不支持日志文件的绕接,如果需要控制日志文件对磁盘空间的占用,可选用操作系统提供的日志文件管理工具,例如:Linux的logrotate。 - - - diff --git a/tutorials/training/source_zh_cn/advanced_use/custom_operator_ascend.md b/tutorials/training/source_zh_cn/advanced_use/custom_operator_ascend.md index ba039bed01..8537e9b20c 100644 --- a/tutorials/training/source_zh_cn/advanced_use/custom_operator_ascend.md +++ b/tutorials/training/source_zh_cn/advanced_use/custom_operator_ascend.md @@ -25,6 +25,7 @@ 添加一个自定义算子,需要完成算子原语注册、算子实现、算子信息注册三部分工作。 其中: + - 算子原语:定义了算子在网络中的前端接口原型,也是组成网络模型的基础单元,主要包括算子的名称、属性(可选)、输入输出名称、输出shape推理方法、输出dtype推理方法等信息。 - 算子实现:通过TBE(Tensor Boost Engine)提供的特性语言接口,描述算子内部计算逻辑的实现。TBE提供了开发昇腾AI芯片自定义算子的能力。你可以在页面申请公测。 - 算子信息:描述TBE算子的基本信息,如算子名称、支持的输入输出类型等。它是后端做算子选择和映射时的依据。 @@ -38,6 +39,7 @@ 每个算子的原语是一个继承于`PrimitiveWithInfer`的子类,其类型名称即是算子名称。 自定义算子原语与内置算子原语的接口定义完全一致: + - 属性由构造函数`__init__`的入参定义。本用例的算子没有属性,因此`__init__`没有额外的入参。带属性的用例可参考MindSpore源码中的[custom add3](https://gitee.com/mindspore/mindspore/blob/r1.0/tests/st/ops/custom_ops_tbe/cus_add3.py)用例。 - 输入输出的名称通过`init_prim_io_names`函数定义。 - 输出Tensor的shape推理方法在`infer_shape`函数中定义,输出Tensor的dtype推理方法在`infer_dtype`函数中定义。 @@ -48,7 +50,7 @@ ```python from mindspore.ops import prim_attr_register, PrimitiveWithInfer -from mindspore.ops import operations as P +import mindspore.ops as ops # y = x^2 class CusSquare(PrimitiveWithInfer): """ @@ -75,10 +77,12 @@ class CusSquare(PrimitiveWithInfer): 算子的计算函数主要用来封装算子的计算逻辑供主函数调用,其内部通过调用TBE的API接口组合实现算子的计算逻辑。 算子的入口函数描述了编译算子的内部过程,一般分为如下几步: + 1. 准备输入的placeholder,placeholder是一个占位符,返回一个Tensor对象,表示一组输入数据。 2. 调用计算函数,计算函数使用TBE提供的API接口描述了算子内部的计算逻辑。 3. 调用Schedule调度模块,调度模块对算子中的数据按照调度模块的调度描述进行切分,同时指定好数据的搬运流程,确保在硬件上的执行达到最优。默认可以采用自动调度模块(`auto_schedule`)。 4. 调用`cce_build_code`编译生成算子二进制。 + > 入口函数的输入参数有特殊要求,需要依次为:算子每个输入的信息、算子每个输出的信息、算子属性(可选)和`kernel_name`(生成算子二进制的名称)。输入和输出的信息用字典封装传入,其中包含该算子在网络中被调用时传入的实际输入和输出的shape和dtype。 更多关于使用TBE开发算子的内容请参考[TBE文档](https://support.huaweicloud.com/odevg-A800_3000_3010/atlaste_10_0063.html),关于TBE算子的调试和性能优化请参考[MindStudio文档](https://support.huaweicloud.com/usermanual-mindstudioc73/atlasmindstudio_02_0043.html)。 @@ -92,7 +96,7 @@ class CusSquare(PrimitiveWithInfer): ### 示例 -下面以`Square`算子的TBE实现`square_impl.py`为例进行介绍。`square_compute`是算子实现的计算函数,通过调用`te.lang.cce`提供的API描述了`x * x`的计算逻辑。`cus_square_op_info `是算子信息,通过`TBERegOp`来定义。 +下面以`Square`算子的TBE实现`square_impl.py`为例进行介绍。`square_compute`是算子实现的计算函数,通过调用`te.lang.cce`提供的API描述了`x * x`的计算逻辑。`cus_square_op_info`是算子信息,通过`TBERegOp`来定义。 `TBERegOp`的设置需要注意以下几点: @@ -128,7 +132,7 @@ cus_square_op_info = TBERegOp("CusSquare") \ .output(0, "y", False, "required", "all") \ .dtype_format(DataType.F32_Default, DataType.F32_Default) \ .dtype_format(DataType.F16_Default, DataType.F16_Default) \ - .get_op_info() + .get_op_info() # Binding kernel info with the kernel implementation. @op_info_register(cus_square_op_info) @@ -185,17 +189,20 @@ def test_net(): ``` 执行用例: -``` + +```bash pytest -s tests/st/ops/custom_ops_tbe/test_square.py::test_net ``` 执行结果: -``` + +```text x: [1. 4. 9.] output: [1. 16. 81.] ``` ## 定义算子反向传播函数 + 如果算子要支持自动微分,需要在其原语中定义其反向传播函数(bprop)。你需要在bprop中描述利用正向输入、正向输出和输出梯度得到输入梯度的反向计算逻辑。反向计算逻辑可以使用内置算子或自定义反向算子构成。 定义算子反向传播函数时需注意以下几点: @@ -204,6 +211,7 @@ output: [1. 16. 81.] - bprop函数的返回值形式约定为输入梯度组成的元组,元组中元素的顺序与正向输入参数顺序一致。即使只有一个输入梯度,返回值也要求是元组的形式。 例如,增加bprop后的`CusSquare`原语为: + ```python class CusSquare(PrimitiveWithInfer): @prim_attr_register @@ -220,33 +228,36 @@ class CusSquare(PrimitiveWithInfer): def get_bprop(self): def bprop(data, out, dout): - twos_like = P.OnesLike()(data) * 2.0 - gradient = P.Mul()(data, twos_like) - dx = P.Mul()(gradient, dout) + twos_like = ops.OnesLike()(data) * 2.0 + gradient = ops.Mul()(data, twos_like) + dx = ops.Mul()(gradient, dout) return (dx,) return bprop ``` 在`test_square.py`文件中定义反向用例。 + ```python -from mindspore.ops import composite as C +import mindspore.ops as ops def test_grad_net(): x = np.array([1.0, 4.0, 9.0]).astype(np.float32) sens = np.array([1.0, 1.0, 1.0]).astype(np.float32) square = Net() - grad = C.GradOperation(sens_param=True) + grad = ops.GradOperation(sens_param=True) dx = grad(square)(Tensor(x), Tensor(sens)) print("x: ", x) print("dx: ", dx) ``` 执行用例: -``` + +```bash pytest -s tests/st/ops/custom_ops_tbe/test_square.py::test_grad_net ``` 执行结果: -``` + +```text x: [1. 4. 9.] dx: [2. 8. 18.] ``` diff --git a/tutorials/training/source_zh_cn/advanced_use/debug_in_pynative_mode.md b/tutorials/training/source_zh_cn/advanced_use/debug_in_pynative_mode.md index 79890d821b..2dba735719 100644 --- a/tutorials/training/source_zh_cn/advanced_use/debug_in_pynative_mode.md +++ b/tutorials/training/source_zh_cn/advanced_use/debug_in_pynative_mode.md @@ -75,22 +75,22 @@ print(output.asnumpy()) [ 0.05016355 0.03958241 0.03958241 0.03958241 0.03443141]]]] ``` - ## 执行普通函数 将若干算子组合成一个函数,然后直接通过函数调用的方式执行这些算子,并打印相关结果,如下例所示。 -**示例代码** +**示例代码:** + ```python import numpy as np from mindspore import context, Tensor -from mindspore.ops import functional as F +import mindspore.ops as ops context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") def tensor_add_func(x, y): - z = F.tensor_add(x, y) - z = F.tensor_add(z, x) + z = ops.tensor_add(x, y) + z = ops.tensor_add(z, x) return z x = Tensor(np.ones([3, 3], dtype=np.float32)) @@ -99,7 +99,7 @@ output = tensor_add_func(x, y) print(output.asnumpy()) ``` -**输出** +**输出:** ```python [[3. 3. 3.] @@ -109,7 +109,6 @@ print(output.asnumpy()) > PyNative不支持并行执行和summary功能,图模式的并行和summary相关算子不能使用。 - ### 提升PyNative性能 为了提高PyNative模式下的前向计算任务执行速度,MindSpore提供了Staging功能,该功能可以在PyNative模式下将Python函数或者Python类的方法编译成计算图,通过图优化等技术提高运行速度,如下例所示。 @@ -118,7 +117,7 @@ print(output.asnumpy()) import numpy as np import mindspore.nn as nn from mindspore import context, Tensor -import mindspore.ops.operations as P +import mindspore.ops as ops from mindspore.common.api import ms_function context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") @@ -126,7 +125,7 @@ context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") class TensorAddNet(nn.Cell): def __init__(self): super(TensorAddNet, self).__init__() - self.add = P.TensorAdd() + self.add = ops.TensorAdd() @ms_function def construct(self, x, y): @@ -138,11 +137,12 @@ y = Tensor(np.ones([4, 4]).astype(np.float32)) net = TensorAddNet() z = net(x, y) # Staging mode -tensor_add = P.TensorAdd() +tensor_add = ops.TensorAdd() res = tensor_add(x, z) # PyNative mode print(res.asnumpy()) ``` -**输出** + +**输出:** ```python [[3. 3. 3. 3.] @@ -155,18 +155,18 @@ print(res.asnumpy()) 需要说明的是,加装了`ms_function`装饰器的函数中,如果包含不需要进行参数训练的算子(如`pooling`、`tensor_add`等算子),则这些算子可以在被装饰的函数中直接调用,如下例所示。 -**示例代码** +**示例代码:** ```python import numpy as np import mindspore.nn as nn from mindspore import context, Tensor -import mindspore.ops.operations as P +import mindspore.ops as ops from mindspore.common.api import ms_function context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") -tensor_add = P.TensorAdd() +tensor_add = ops.TensorAdd() @ms_function def tensor_add_fn(x, y): @@ -178,7 +178,8 @@ y = Tensor(np.ones([4, 4]).astype(np.float32)) z = tensor_add_fn(x, y) print(z.asnumpy()) ``` -**输出** + +**输出:** ```shell [[2. 2. 2. 2.] @@ -189,7 +190,7 @@ print(z.asnumpy()) 如果被装饰的函数中包含了需要进行参数训练的算子(如`Convolution`、`BatchNorm`等算子),则这些算子必须在被装饰等函数之外完成实例化操作,如下例所示。 -**示例代码** +**示例代码:** ```python import numpy as np @@ -211,7 +212,7 @@ z = conv_fn(Tensor(input_data)) print(z.asnumpy()) ``` -**输出** +**输出:** ```shell [[[[ 0.10377571 -0.0182163 -0.05221086] @@ -247,15 +248,14 @@ print(z.asnumpy()) [ 0.0377498 -0.06117418 0.00546303]]]] ``` - ## 调试网络训练模型 PyNative模式下,还可以支持单独求梯度的操作。如下例所示,可通过`GradOperation`求该函数或者网络所有的输入梯度。需要注意,输入类型仅支持Tensor。 -**示例代码** +**示例代码:** ```python -from mindspore.ops import composite as C +import mindspore.ops as ops import mindspore.context as context context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") @@ -264,12 +264,12 @@ def mul(x, y): return x * y def mainf(x, y): - return C.GradOperation(get_all=True)(mul)(x, y) + return ops.GradOperation(get_all=True)(mul)(x, y) print(mainf(Tensor(1, mstype.int32), Tensor(2, mstype.int32))) ``` -**输出** +**输出:** ```python (2, 1) @@ -277,13 +277,12 @@ print(mainf(Tensor(1, mstype.int32), Tensor(2, mstype.int32))) 在进行网络训练时,求得梯度然后调用优化器对参数进行优化(暂不支持在反向计算梯度的过程中设置断点),然后再利用前向计算loss,从而实现在PyNative模式下进行网络训练。 -**完整LeNet示例代码** +**完整LeNet示例代码:** ```python import numpy as np import mindspore.nn as nn -import mindspore.ops.operations as P -from mindspore.ops import composite as C +import mindspore.ops as ops from mindspore.common import dtype as mstype from mindspore import context, Tensor, ParameterTuple from mindspore.common.initializer import TruncatedNormal @@ -314,7 +313,7 @@ class LeNet5(nn.Cell): Lenet network Args: num_class (int): Num classes. Default: 10. - + Returns: Tensor, output tensor @@ -332,7 +331,7 @@ class LeNet5(nn.Cell): self.fc3 = fc_with_initialize(84, self.num_class) self.relu = nn.ReLU() self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - self.reshape = P.Reshape() + self.reshape = ops.Reshape() def construct(self, x): x = self.conv1(x) @@ -348,8 +347,8 @@ class LeNet5(nn.Cell): x = self.relu(x) x = self.fc3(x) return x - - + + class GradWrap(nn.Cell): """ GradWrap definition """ def __init__(self, network): @@ -359,7 +358,7 @@ class GradWrap(nn.Cell): def construct(self, x, label): weights = self.weights - return C.GradOperation(get_by_list=True)(self.network, weights)(x, label) + return ops.GradOperation(get_by_list=True)(self.network, weights)(x, label) net = LeNet5() optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) @@ -378,7 +377,7 @@ loss = loss_output.asnumpy() print(loss) ``` -**输出** +**输出:** ```python 2.3050091 diff --git a/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md b/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md index 707554e535..02792306b4 100644 --- a/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md +++ b/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md @@ -81,11 +81,11 @@ - `device_ip`表示集成网卡的IP地址,可以在当前机器执行指令`cat /etc/hccn.conf`,`address_x`的键值就是网卡IP地址。 - `rank_id`表示卡逻辑序号,固定从0开始编号。 - ### 调用集合通信库 MindSpore分布式并行训练的通信使用了华为集合通信库`Huawei Collective Communication Library`(以下简称HCCL),可以在Ascend AI处理器配套的软件包中找到。同时`mindspore.communication.management`中封装了HCCL提供的集合通信接口,方便用户配置分布式信息。 > HCCL实现了基于Ascend AI处理器的多机多卡通信,有一些使用限制,我们列出使用分布式服务常见的,详细的可以查看HCCL对应的使用文档。 +> > - 单机场景下支持1、2、4、8卡设备集群,多机场景下支持8*n卡设备集群。 > - 每台机器的0-3卡和4-7卡各为1个组网,2卡和4卡训练时卡必须相连且不支持跨组网创建集群。 > - 服务器硬件架构及操作系统需要是SMP(Symmetrical Multi-Processing,对称多处理器)处理模式。 @@ -100,10 +100,11 @@ from mindspore.communication.management import init if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=int(os.environ["DEVICE_ID"])) init() - ... + ... ``` 其中, + - `mode=context.GRAPH_MODE`:使用分布式训练需要指定运行模式为图模式(PyNative模式不支持并行)。 - `device_id`:卡的物理序号,即卡所在机器中的实际序号。 - `init`:使能HCCL通信,并完成分布式训练初始化操作。 @@ -112,7 +113,6 @@ if __name__ == "__main__": 分布式训练时,数据是以数据并行的方式导入的。下面我们以CIFAR-10数据集为例,介绍以数据并行方式导入CIFAR-10数据集的方法,`data_path`是指数据集的路径,即`cifar-10-batches-bin`文件夹的路径。 - ```python import mindspore.common.dtype as mstype import mindspore.dataset as ds @@ -158,7 +158,9 @@ def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size= return data_set ``` + 其中,与单机不同的是,在数据集接口需要传入`num_shards`和`shard_id`参数,分别对应卡的数量和逻辑序号,建议通过HCCL接口获取: + - `get_rank`:获取当前设备在集群中的ID。 - `get_group_size`:获取集群数量。 @@ -175,29 +177,28 @@ def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size= 在Loss部分,我们采用`SoftmaxCrossEntropyWithLogits`的展开形式,即按照数学公式,将其展开为多个小算子进行实现,样例代码如下: ```python -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore import Tensor -import mindspore.ops.functional as F import mindspore.common.dtype as mstype import mindspore.nn as nn class SoftmaxCrossEntropyExpand(nn.Cell): def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() - self.exp = P.Exp() - self.sum = P.ReduceSum(keep_dims=True) - self.onehot = P.OneHot() + self.exp = ops.Exp() + self.sum = ops.ReduceSum(keep_dims=True) + self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) - self.div = P.RealDiv() - self.log = P.Log() - self.sum_cross_entropy = P.ReduceSum(keep_dims=False) - self.mul = P.Mul() - self.mul2 = P.Mul() - self.mean = P.ReduceMean(keep_dims=False) + self.div = ops.RealDiv() + self.log = ops.Log() + self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) + self.mul = ops.Mul() + self.mul2 = ops.Mul() + self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse - self.max = P.ReduceMax(keep_dims=True) - self.sub = P.Sub() + self.max = ops.ReduceMax(keep_dims=True) + self.sub = ops.Sub() def construct(self, logit, label): logit_max = self.max(logit, -1) @@ -205,10 +206,10 @@ class SoftmaxCrossEntropyExpand(nn.Cell): exp_sum = self.sum(exp, -1) softmax_result = self.div(exp, exp_sum) if self.sparse: - label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) softmax_result_log = self.log(softmax_result) loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) - loss = self.mul2(F.scalar_to_array(-1.0), loss) + loss = self.mul2(ops.scalar_to_array(-1.0), loss) loss = self.mean(loss, -1) return loss @@ -255,7 +256,9 @@ def test_train_cifar(epoch_size=10): model = Model(net, loss_fn=loss, optimizer=opt) model.train(epoch_size, dataset, callbacks=[loss_cb], dataset_sink_mode=True) ``` + 其中, + - `dataset_sink_mode=True`:表示采用数据集的下沉模式,即训练的计算下沉到硬件平台中执行。 - `LossMonitor`:能够通过回调函数返回Loss值,用于监控损失函数。 @@ -322,6 +325,7 @@ cd ../ 脚本需要传入变量`DATA_PATH`和`RANK_SIZE`,分别表示数据集的路径和卡的数量。 其中必要的环境变量有, + - `RANK_TABLE_FILE`:组网信息文件的路径。 - `DEVICE_ID`:当前卡在机器上的实际序号。 - `RANK_ID`: 当前卡的逻辑序号。 @@ -331,7 +335,7 @@ cd ../ 日志文件保存`device`目录下,`env.log`中记录了环境变量的相关信息,关于Loss部分结果保存在`train.log`中,示例如下: -``` +```text epoch: 1 step: 156, loss is 2.0084016 epoch: 2 step: 156, loss is 1.6407638 epoch: 3 step: 156, loss is 1.6164391 @@ -391,7 +395,7 @@ from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMoni from mindspore.communication.management import get_rank from mindspore.common.parameter import Parameter from mindspore import Tensor -import mindspore.ops.operations as P +import mindspore.ops as ops import numpy as np # define network class DataParallelNet(Cell): @@ -400,7 +404,7 @@ class DataParallelNet(Cell): weight_np = np.full(test_size, 0.1, dtype=np.float32) self.weight = Parameter(Tensor(weight_np), name="fc_weight", layerwise_parallel=layerwise_parallel) self.relu = ReLU() - self.fc = P.MatMul(transpose_a=transpose_a, transpose_b=transpose_b) + self.fc = ops.MatMul(transpose_a=transpose_a, transpose_b=transpose_b) if strategy is not None: self.fc.shard(strategy) @@ -459,8 +463,8 @@ class SemiAutoParallelNet(Cell): equal_np = np.full(test_size, 0.1, dtype=np.float32) self.mul_weight = Parameter(Tensor(mul_np), name="mul_weight") self.equal_weight = Parameter(Tensor(equal_np), name="equal_weight") - self.mul = P.Mul() - self.equal = P.Equal() + self.mul = ops.Mul() + self.equal = ops.Equal() if strategy is not None: self.mul.shard(strategy) self.equal.shard(strategy2) @@ -485,7 +489,7 @@ context.reset_auto_parallel_context() # set parallel mode, data parallel mode is selected for training and model saving. If you want to choose auto parallel # mode, you can simply change the value of parallel_mode parameter to ParallelMode.AUTO_PARALLEL. context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, - strategy_ckpt_save_file='./rank_{}_ckpt/strategy.txt'.format(get_rank)) + strategy_ckpt_save_file='./rank_{}_ckpt/strategy.txt'.format(get_rank)) ``` 然后根据需要设置checkpoint保存策略,以及设置优化器和损失函数等,代码如下: @@ -514,12 +518,14 @@ context.reset_auto_parallel_context() 只需要改动设置checkpoint保存策略的代码,将`CheckpointConfig`中的`integrated_save`参数设置为Fasle,便可实现每张卡上只保存本卡的checkpoint文件,具体改动如下: 将checkpoint配置策略由 + ```python # config checkpoint ckpt_config = CheckpointConfig(keep_checkpoint_max=1) ``` 改为 + ```python # config checkpoint ckpt_config = CheckpointConfig(keep_checkpoint_max=1, integrated_save=False) diff --git a/tutorials/training/source_zh_cn/advanced_use/enable_graph_kernel_fusion.md b/tutorials/training/source_zh_cn/advanced_use/enable_graph_kernel_fusion.md index 5d38995b9a..d49f885040 100644 --- a/tutorials/training/source_zh_cn/advanced_use/enable_graph_kernel_fusion.md +++ b/tutorials/training/source_zh_cn/advanced_use/enable_graph_kernel_fusion.md @@ -41,7 +41,7 @@ context.set_context(enable_graph_kernel=True) import mindspore.context as context from mindspore import Tensor from mindspore.nn import Cell - from mindspore.ops import operations as P + import mindspore.ops as ops context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") # save graph ir files. @@ -53,8 +53,8 @@ context.set_context(enable_graph_kernel=True) class NetBasicFuse(Cell): def __init__(self): super(NetBasicFuse, self).__init__() - self.add = P.TensorAdd() - self.mul = P.Mul() + self.add = ops.TensorAdd() + self.mul = ops.Mul() def construct(self, x): mul_res = self.mul(x, 2.0) @@ -66,9 +66,9 @@ context.set_context(enable_graph_kernel=True) class NetCompositeFuse(Cell): def __init__(self): super(NetCompositeFuse, self).__init__() - self.add = P.TensorAdd() - self.mul = P.Mul() - self.pow = P.Pow() + self.add = ops.TensorAdd() + self.mul = ops.Mul() + self.pow = ops.Pow() def construct(self, x): mul_res = self.mul(x, 2.0) diff --git a/tutorials/training/source_zh_cn/advanced_use/enable_mixed_precision.md b/tutorials/training/source_zh_cn/advanced_use/enable_mixed_precision.md index 4ec536b3cf..08c0647575 100644 --- a/tutorials/training/source_zh_cn/advanced_use/enable_mixed_precision.md +++ b/tutorials/training/source_zh_cn/advanced_use/enable_mixed_precision.md @@ -42,6 +42,7 @@ MindSpore混合精度典型的计算流程如下图所示: 使用自动混合精度,需要调用相应的接口,将待训练网络和优化器作为输入传进去;该接口会将整张网络的算子转换成FP16算子(除`BatchNorm`算子和Loss涉及到的算子外)。可以使用`amp`接口和`Model`接口两种方式实现混合精度。 使用`amp`接口具体的实现步骤为: + 1. 引入MindSpore的混合精度的接口`amp`; 2. 定义网络:该步骤和普通的网络定义没有区别(无需手动配置某个算子的精度); @@ -55,7 +56,7 @@ import numpy as np import mindspore.nn as nn from mindspore import Tensor, context -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.nn import Momentum # The interface of Auto_mixed precision from mindspore import amp @@ -68,7 +69,7 @@ class Net(nn.Cell): def __init__(self, input_channel, out_channel): super(Net, self).__init__() self.dense = nn.Dense(input_channel, out_channel) - self.relu = P.ReLU() + self.relu = ops.ReLU() def construct(self, x): x = self.dense(x) @@ -93,6 +94,7 @@ output = train_network(predict, label) ``` 使用`Model`接口具体的实现步骤为: + 1. 引入MindSpore的模型训练接口`Model`; 2. 定义网络:该步骤和普通的网络定义没有区别(无需手动配置某个算子的精度); @@ -169,6 +171,7 @@ model.train(epoch=10, train_dataset=ds_train) MindSpore还支持手动混合精度。假定在网络中只有一个Dense Layer要用FP32计算,其他Layer都用FP16计算。混合精度配置以Cell为粒度,Cell默认是FP32类型。 以下是一个手动混合精度的实现步骤: + 1. 定义网络: 该步骤与自动混合精度中的步骤2类似; 2. 配置混合精度: 通过`net.to_float(mstype.float16)`,把该Cell及其子Cell中所有的算子都配置成FP16;然后,将模型中的dense算子手动配置成FP32; @@ -183,7 +186,7 @@ import numpy as np import mindspore.nn as nn import mindspore.common.dtype as mstype from mindspore import Tensor, context -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.nn import WithLossCell, TrainOneStepCell from mindspore.nn import Momentum @@ -195,7 +198,7 @@ class Net(nn.Cell): def __init__(self, input_channel, out_channel): super(Net, self).__init__() self.dense = nn.Dense(input_channel, out_channel) - self.relu = P.ReLU() + self.relu = ops.ReLU() def construct(self, x): x = self.dense(x) @@ -220,4 +223,4 @@ train_network.set_train() # Run training output = train_network(predict, label) -``` \ No newline at end of file +``` diff --git a/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md b/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md index 95e92a70c8..4ad367d899 100644 --- a/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md +++ b/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md @@ -22,14 +22,10 @@ MindConverter是一款将PyTorch模型脚本转换至MindSpore的脚本迁移工具。结合转换报告的提示信息,用户对转换后脚本进行微小改动,即可快速将PyTorch模型脚本迁移至MindSpore。 - - ## 安装 此工具为MindInsight的子模块,安装MindInsight后,即可使用MindConverter,MindInsight安装请参考该[安装文档](https://www.mindspore.cn/install/)。 - - ## 用法 MindConverter提供命令行(Command-line interface, CLI)的使用方式,命令如下。 @@ -79,15 +75,13 @@ optional arguments: 另外,当使用基于图结构的脚本生成方案时,请确保原PyTorch项目已在Python包搜索路径中,可通过CLI进入Python交互式命令行,通过import的方式判断是否已满足;若未加入,可通过`--project_path`命令手动将项目路径传入,以确保MindConverter可引用到原PyTorch脚本。 - > 假设用户项目目录为`/home/user/project/model_training`,用户可通过如下命令手动项目添加至包搜索路径中:`export PYTHONPATH=/home/user/project/model_training:$PYTHONPATH` - > 此处MindConverter需要引用原PyTorch脚本,是因为PyTorch模型反向序列化过程中会引用原脚本。 - ## 使用场景 MindConverter提供两种技术方案,以应对不同脚本迁移场景: + 1. 用户希望迁移后脚本保持原有PyTorch脚本结构(包括变量、函数、类命名等与原脚本保持一致); 2. 用户希望迁移后脚本保持较高的转换率,尽量少的修改、甚至不需要修改,即可实现迁移后模型脚本的执行。 @@ -101,7 +95,6 @@ MindConverter提供两种技术方案,以应对不同脚本迁移场景: > 2. 基于图结构的脚本生成方案,由于要基于推理模式加载PyTorch模型,会导致转换后网络中Dropout算子丢失,需要用户手动补齐; > 3. 基于图结构的脚本生成方案持续优化中。 - ## 使用示例 ### 基于AST的脚本转换示例 @@ -121,6 +114,7 @@ line x:y: [UnConvert] 'operator' didn't convert. ... ``` 转换报告示例如下所示: + ```text [Start Convert] [Insert] 'import mindspore.ops.operations as P' is inserted to the converted file. @@ -133,7 +127,6 @@ line x:y: [UnConvert] 'operator' didn't convert. ... 对于部分未成功转换的算子,报告中会提供修改建议,如`line 157:23`,MindConverter建议将`torch.nn.AdaptiveAvgPool2d`替换为`mindspore.ops.operations.ReduceMean`。 - ### 基于图结构的脚本生成示例 若用户已将PyTorch模型保存为.pth格式,假设模型绝对路径为`/home/user/model.pth`,该模型期望的输入样本shape为(3, 224, 224),原PyTorch脚本位于`/home/user/project/model_training`,希望将脚本输出至`/home/user/output`,转换报告输出至`/home/user/output/report`,则脚本生成命令为: @@ -147,10 +140,8 @@ mindconverter --model_file /home/user/model.pth --shape 3,224,224 \ 执行该命令,MindSpore代码文件、转换报告生成至相应目录。 - 基于图结构的脚本生成方案产生的转换报告格式与AST方案相同。然而,由于基于图结构方案属于生成式方法,转换过程中未参考原PyTorch脚本,因此生成的转换报告中涉及的代码行、列号均指生成后脚本。 - 另外对于未成功转换的算子,在代码中会相应的标识该节点输入、输出Tensor的shape(以`input_shape`, `output_shape`标识),便于用户手动修改。以Reshape算子为例(暂不支持Reshape),将生成如下代码: ```python @@ -174,7 +165,7 @@ class Classifier(nn.Cell): 通过`input_shape`、`output_shape`参数,用户可以十分便捷地完成算子替换,替换结果如下: ```python -from mindspore.ops import operations as P +import mindspore.ops as ops ... class Classifier(nn.Cell): @@ -182,7 +173,7 @@ class Classifier(nn.Cell): def __init__(self): super(Classifier, self).__init__() ... - self.reshape = P.Reshape(input_shape=(1, 1280, 1, 1), + self.reshape = ops.Reshape(input_shape=(1, 1280, 1, 1), output_shape=(1, 1280)) ... @@ -194,7 +185,6 @@ class Classifier(nn.Cell): ``` - > 其中`--output`与`--report`参数可省略,若省略,该命令将在当前工作目录(Working directory)下自动创建`output`目录,将生成的脚本、转换报告输出至该目录。 ## 注意事项 diff --git a/tutorials/training/source_zh_cn/advanced_use/save_load_model_hybrid_parallel.md b/tutorials/training/source_zh_cn/advanced_use/save_load_model_hybrid_parallel.md index f58251f5ba..ccc631c389 100644 --- a/tutorials/training/source_zh_cn/advanced_use/save_load_model_hybrid_parallel.md +++ b/tutorials/training/source_zh_cn/advanced_use/save_load_model_hybrid_parallel.md @@ -72,9 +72,6 @@ MindSpore模型并行场景下,每个实例进程只保存有本节点对应 4. 执行阶段二训练。 - - - ## 对保存的CheckPoint文件做合并处理 ### 整体流程 @@ -85,18 +82,19 @@ MindSpore模型并行场景下,每个实例进程只保存有本节点对应 最后,将更新之后的参数列表,通过MindSpore提供的API保存到文件,生成新的CheckPoint文件。对应下图中的Step4。 -![img](./images/checkpoint_integration_process.jpg) +![img](./images/checkpoint_integration_process.jpg) ### 准备工作 #### 按逻辑顺序导入CheckPoint文件 定义网络,调用`load_checkpoint`、`load_param_into_net`接口,按逻辑顺序将CheckPoint文件导入网络,之后调用`parameters_and_names`接口获取网络里所有的参数数据。 -``` -net = Net() + +```python +net = Net() opt = Momentum(learning_rate=0.01, momentum=0.9, params=net.get_parameters()) net = TrainOneStepCell(net, opt) -param_dicts = [] +param_dicts = [] for i in range(rank_size): file_name = os.path.join("./node"+str(i), "CKP_1-4_32.ckpt") # checkpoint file name of current node param_dict = load_checkpoint(file_name) @@ -116,7 +114,8 @@ for i in range(rank_size): #### 获取模型参数切分策略 调用`build_searched_strategy`接口,得到模型各个参数的切分策略。 -``` + +```python strategy = build_searched_strategy("./strategy_train.cpkt") ``` @@ -130,45 +129,48 @@ strategy = build_searched_strategy("./strategy_train.cpkt") 参数名称为"model_parallel_weight",切分逻辑为4卡场景。 -1. 针对涉及模型并行的参数,获取所有节点上的参数数据。 +1. 针对涉及模型并行的参数,获取所有节点上的参数数据。 - ``` + ```python sliced_parameters = [] for i in range(4): parameter = param_dicts[i].get("model_parallel_weight") sliced_parameters.append(parameter) ``` + > 如果要保证参数更新速度不变,需要对优化器中保存的参数,如“moments.model_parallel_weight”,同样做合并处理。 2. 调用`merge_sliced_parameter`接口进行参数合并。 - ``` - merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) + ```python + merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) ``` > 如果存在多个模型并行的参数,则需要重复步骤1到步骤2循环逐个处理。 ### 保存数据生成新的CheckPoint文件 -1. 将`param_dict`转换为list类型数据。 +1. 将`param_dict`转换为list类型数据。 - ``` + ```python param_list = [] for (key, value) in param_dict.items(): - each_param = {} - each_param["name"] = key - if isinstance(value.data, Tensor): - param_data = value.data - else: - param_data = Tensor(value.data) - each_param["data"] = param_data - param_list.append(each_param) + each_param = {} + each_param["name"] = key + if isinstance(value.data, Tensor): + param_data = value.data + else: + param_data = Tensor(value.data) + each_param["data"] = param_data + param_list.append(each_param) ``` 2. 调用`save_checkpoint`接口,将参数数据写入文件,生成新的CheckPoint文件。 - ``` + + ```python save_checkpoint(param_list, “./CKP-Integrated_1-4_32.ckpt”) ``` + 其中, - `save_checkpoint`: 通过该接口将网络模型参数信息存入文件。 - `CKP-Integrated_1-4_32.ckpt`: 新生成的CheckPoint模型参数文件名称。 @@ -185,7 +187,7 @@ strategy = build_searched_strategy("./strategy_train.cpkt") 调用`load_checkpoint`接口,从CheckPoint文件中加载模型参数数据。 -``` +```python param_dict = load_checkpoint("./CKP-Integrated_1-4_32.ckpt") ``` @@ -204,7 +206,8 @@ param_dict = load_checkpoint("./CKP-Integrated_1-4_32.ckpt") 1. 对模型参数数据做切分。 如下代码示例,在维度0上,将数据切分为两个切片。 - ``` + + ```python new_param = parameter_dict[“model_parallel_weight”] slice_list = np.split(new_param.data.asnumpy(), 2, axis=0) new_param_moments = parameter_dict[“moments.model_parallel_weight”] @@ -213,24 +216,28 @@ param_dict = load_checkpoint("./CKP-Integrated_1-4_32.ckpt") 切分后的数据情况: - slice_list[0] --- [1, 2, 3, 4] 对应device0 - slice_list[1] --- [5, 6, 7, 8] 对应device1 + ```text + slice_list[0] --- [1, 2, 3, 4] 对应device0 + slice_list[1] --- [5, 6, 7, 8] 对应device1 + ``` 与`slice_list`类似,`slice_moments_list` 也被切分为两个shape为[1, 4]的Tensor。 -2. 在每个节点分别加载对应的数据切片。 +2. 在每个节点分别加载对应的数据切片。 获取本节点的rank_id,根据rank_id加载数据。 - ``` + + ```python rank = get_rank() tensor_slice = Tensor(slice_list[rank]) tensor_slice_moments = Tensor(slice_moments_list[rank]) ``` - - `get_rank`:获取当前设备在集群中的ID。 -3. 修改模型参数数据值。 + - `get_rank`:获取当前设备在集群中的ID。 - ``` +3. 修改模型参数数据值。 + + ```python new_param.set_data(tensor_slice, True) new_param_moments.set_data(tensor_slice_moments, True) ``` @@ -240,8 +247,9 @@ param_dict = load_checkpoint("./CKP-Integrated_1-4_32.ckpt") ### 步骤3:将修改后的参数数据加载到网络中 调用`load_param_into_net`接口,将模型参数数据加载到网络中。 -``` -net = Net() + +```python +net = Net() opt = Momentum(learning_rate=0.01, momentum=0.9, params=parallel_net.get_parameters()) load_param_into_net(net, param_dict) load_param_into_net(opt, param_dict) @@ -266,43 +274,44 @@ load_param_into_net(opt, param_dict) > > 本文档附上对CheckPoint文件做合并处理以及分布式训练前加载CheckPoint文件的示例代码,仅作为参考,实际请参考具体情况实现。 -### 示例代码 +### 示例代码 1. 执行脚本对CheckPoint文件做合并处理。 - 脚本执行命令: - ``` + 脚本执行命令: + + ```bash python ./integrate_checkpoint.py "待合并的CheckPoint文件名称" "合并生成的CheckPoint文件路径&名称" "策略文件路径&名称" "节点数" ``` integrate_checkpoint.py: - ``` + ```python import numpy as np import os import mindspore.nn as nn from mindspore import Tensor, Parameter - from mindspore.ops import operations as P + import mindspore.ops as ops from mindspore.train.serialization import save_checkpoint, load_checkpoint, build_searched_strategy, merge_sliced_parameter - + class Net(nn.Cell): def __init__(self,weight_init): super(Net, self).__init__() self.weight = Parameter(Tensor(weight_init), "model_parallel_weight", layerwise_parallel=True) - self.fc = P.MatMul(transpose_b=True) - + self.fc = ops.MatMul(transpose_b=True) + def construct(self, x): x = self.fc(x, self.weight1) return x - + def integrate_ckpt_file(old_ckpt_file, new_ckpt_file, strategy_file, rank_size): weight = np.ones([2, 8]).astype(np.float32) net = Net(weight) opt = Momentum(learning_rate=0.01, momentum=0.9, params=net.get_parameters()) net = TrainOneStepCell(net, opt) - + # load CheckPoint into net in rank id order - param_dicts = [] + param_dicts = [] for i in range(rank_size): file_name = os.path.join("./node"+str(i), old_ckpt_file) param_dict = load_checkpoint(file_name) @@ -311,21 +320,21 @@ load_param_into_net(opt, param_dict) for _, param in net.parameters_and_names(): param_dict[param.name] = param param_dicts.append(param_dict) - + strategy = build_searched_strategy(strategy_file) param_dict = {} - + for paramname in ["model_parallel_weight", "moments.model_parallel_weight"]: # get layer wise model parallel parameter sliced_parameters = [] for i in range(rank_size): parameter = param_dicts[i].get(paramname) sliced_parameters.append(parameter) - + # merge the parallel parameters of the model - merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) + merged_parameter = merge_sliced_parameter(sliced_parameters, strategy) param_dict[paramname] = merged_parameter - + # convert param_dict to list type data param_list = [] for (key, value) in param_dict.items(): @@ -335,14 +344,14 @@ load_param_into_net(opt, param_dict) param_data = value.data else: param_data = Tensor(value.data) - each_param["data"] = param_data - param_list.append(each_param) - + each_param["data"] = param_data + param_list.append(each_param) + # call the API to generate a new CheckPoint file save_checkpoint(param_list, new_ckpt_file) - + return - + if __name__ == "__main__": try: old_ckpt_file = sys.argv[1] @@ -354,14 +363,15 @@ load_param_into_net(opt, param_dict) print("Fail to integrate checkpoint file) sys.exit(-1) ``` - + 执行结果: 脚本执行前,CheckPoint文件中参数值: - ``` + + ```text device0: name is model_parallel_weight - value is + value is [[0.87537426 1.0448935 0.86736983 0.8836905 0.77354026 0.69588304 0.9183654 0.7792076] [0.87224025 0.8726848 0.771446 0.81967723 0.88974726 0.7988162 0.72919345 0.7677011]] name is learning_rate @@ -372,10 +382,10 @@ load_param_into_net(opt, param_dict) value is [[0.2567724 -0.07485991 0.282002 0.2456022 0.454939 0.619168 0.18964815 0.45714882] [0.25946522 0.24344791 0.45677605 0.3611395 0.23378398 0.41439137 0.5312468 0.4696194]] - + device1: name is model_parallel_weight - value is + value is [[0.9210751 0.9050457 0.9827775 0.920396 0.9240526 0.9750359 1.0275179 1.0819869] [0.73605865 0.84631145 0.9746683 0.9386582 0.82902765 0.83565056 0.9702136 1.0514659]] name is learning_rate @@ -385,11 +395,11 @@ load_param_into_net(opt, param_dict) name is moments.model_weight value is [[0.2417504 0.28193963 0.06713893 0.21510397 0.23380603 0.11424308 0.0218009 -0.11969765] - [0.45955992 0.22664294 0.01990281 0.0731914 0.27125207 0.27298513 -0.01716102 -0.15327111]] - + [0.45955992 0.22664294 0.01990281 0.0731914 0.27125207 0.27298513 -0.01716102 -0.15327111]] + device2: name is model_parallel_weight - value is + value is [[1.0108461 0.8689414 0.91719437 0.8805056 0.7994629 0.8999671 0.7585804 1.0287056 ] [0.90653455 0.60146594 0.7206475 0.8306303 0.8364681 0.89625114 0.7354735 0.8447268]] name is learning_rate @@ -397,10 +407,10 @@ load_param_into_net(opt, param_dict) name is momentum value is [0.9] name is moments.model_weight - value is + value is [[0.03440702 0.41419312 0.24817684 0.30765256 0.48516113 0.24904746 0.57791173 0.00955463] [0.13458519 0.6690533 0.49259356 0.28319967 0.25951773 0.16777472 0.45696738 0.24933104]] - + device3: name is model_parallel_weight value is @@ -411,16 +421,16 @@ load_param_into_net(opt, param_dict) name is momentum value is [0.9] name is moments.model_parallel_weight - value is + value is [[0.14152306 0.5040985 0.24455397 0.10907605 0.11319532 0.19538902 0.01208619 0.40430856] [-0.7773164 -0.47611716 -0.6041424 -0.6144473 -0.2651842 -0.31909415 -0.4510405 -0.12860501]] ``` 脚本执行后,CheckPoint文件中参数值: - ``` + ```text name is model_parallel_weight - value is + value is [[1.1138763 1.0962057 1.3516843 1.0812817 1.1579804 1.1078343 1.0906502 1.3207073] [0.916671 1.0781671 1.0368758 0.9680898 1.1735439 1.0628364 0.9960786 1.0135143] [0.8828271 0.7963984 0.90675324 0.9830291 0.89010954 0.897052 0.7890109 0.89784735] @@ -434,7 +444,7 @@ load_param_into_net(opt, param_dict) name is momentum value is [0.9] name is moments.model_parallel_weight - value is + value is [[0.2567724 -0.07485991 0.282002 0.2456022 0.454939 0.619168 0.18964815 0.45714882] [0.25946522 0.24344791 0.45677605 0.3611395 0.23378398 0.41439137 0.5312468 0.4696194 ] [0.2417504 0.28193963 0.06713893 0.21510397 0.23380603 0.11424308 0.0218009 -0.11969765] @@ -446,36 +456,35 @@ load_param_into_net(opt, param_dict) -0.12860501]] ``` - 2. 执行阶段2训练,训练前加载CheckPoint文件。其中训练代码部分,需要根据实际情况补充。 - ``` + ```python import numpy as np import os import mindspore.nn as nn from mindspore import context from mindspore.communication.management import init from mindspore import Tensor, Parameter - from mindspore.ops import operations as P + import mindspore.ops as ops from mindspore.train.serialization import load_checkpoint, load_param_into_net - + from mindspore.communication.management import init devid = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE,device_target='Ascend',save_graphs=True, device_id=devid) init() - + class Net(nn.Cell): def __init__(self,weight_init): super(Net, self).__init__() self.weight = Parameter(Tensor(weight_init), "model_parallel_weight", layerwise_parallel=True) - self.fc = P.MatMul(transpose_b=True) - + self.fc = ops.MatMul(transpose_b=True) + def construct(self, x): x = self.fc(x, self.weight1) return x def train_mindspore_impl_fc(input, label, ckpt_file): param_dict = load_checkpoint(ckpt_file) - + for paramname in ["model_parallel_weight", "moments.model_parallel_weight"]: # get layer wise model parallel parameter new_param = parameter_dict[paramname] @@ -486,23 +495,23 @@ load_param_into_net(opt, param_dict) tensor_slice = Tensor(slice_list[rank]) # modify model parameter data values new_param.set_data(tensor_slice, True) - + # load the modified parameter data into the network weight = np.ones([4, 8]).astype(np.float32) net = Net(weight) load_param_into_net(net, param_dict) opt = Momentum(learning_rate=0.01, momentum=0.9, params=parallel_net.get_parameters()) load_param_into_net(opt, param_dict) - # train code + # train code ... - + if __name__ == "__main__": input = np.random.random((4, 8)).astype(np.float32) print("mean = ", np.mean(input,axis=1, keepdims=True)) label = np.random.random((4, 4)).astype(np.float32) train_mindspore_impl_fc(input, label, weight1) ``` - + 其中, - `mode=context.GRAPH_MODE`:使用分布式训练需要指定运行模式为图模式(PyNative模式不支持并行)。 @@ -511,10 +520,10 @@ load_param_into_net(opt, param_dict) 加载后的参数值: - ``` + ```text device0: name is model_parallel_weight - value is + value is [[0.87537426 1.0448935 0.86736983 0.8836905 0.77354026 0.69588304 0.9183654 0.7792076] [0.87224025 0.8726848 0.771446 0.81967723 0.88974726 0.7988162 0.72919345 0.7677011] [0.8828271 0.7963984 0.90675324 0.9830291 0.89010954 0.897052 0.7890109 0.89784735] @@ -532,7 +541,7 @@ load_param_into_net(opt, param_dict) device1: name is model_parallel_weight - value is + value is [[1.0053468 0.98402303 0.99762845 0.97587246 1.0259694 1.0055295 0.99420834 0.9496847] [1.0851002 1.0295962 1.0999886 1.0958165 0.9765328 1.146529 1.0970603 1.1388365] [0.7147005 0.9168278 0.80178416 0.6258351 0.8413766 0.5909515 0.696347 0.71359116] @@ -546,5 +555,5 @@ load_param_into_net(opt, param_dict) [[0.03440702 0.41419312 0.24817684 0.30765256 0.48516113 0.24904746 0.57791173 0.00955463] [0.13458519 0.6690533 0.49259356 0.28319967 0.25951773 0.16777472 0.45696738 0.24933104] [0.14152306 0.5040985 0.24455397 0.10907605 0.11319532 0.19538902 0.01208619 0.40430856] - [-0.7773164 -0.47611716 -0.6041424 -0.6144473 -0.2651842 -0.31909415 -0.4510405 -0.12860501]] + [-0.7773164 -0.47611716 -0.6041424 -0.6144473 -0.2651842 -0.31909415 -0.4510405 -0.12860501]] ``` diff --git a/tutorials/training/source_zh_cn/advanced_use/summary_record.md b/tutorials/training/source_zh_cn/advanced_use/summary_record.md index 657b037b3d..35ff6eb87b 100644 --- a/tutorials/training/source_zh_cn/advanced_use/summary_record.md +++ b/tutorials/training/source_zh_cn/advanced_use/summary_record.md @@ -43,6 +43,7 @@ MindSpore目前支持三种方式将数据记录到summary日志文件中。 即可自动收集一些常见信息。`SummaryCollector` 详细的用法可以参考 `API` 文档中 `mindspore.train.callback.SummaryCollector`。 样例代码如下: + ```python import mindspore import mindspore.nn as nn @@ -50,7 +51,7 @@ from mindspore import context from mindspore import Tensor from mindspore.train import Model from mindspore.common.initializer import TruncatedNormal -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.train.callback import SummaryCollector from mindspore.nn.metrics import Accuracy @@ -79,7 +80,7 @@ class AlexNet(nn.Cell): self.conv4 = conv(384, 384, 3, pad_mode="same") self.conv5 = conv(384, 256, 3, pad_mode="same") self.relu = nn.ReLU() - self.max_pool2d = P.MaxPool(ksize=3, strides=2) + self.max_pool2d = ops.MaxPool(ksize=3, strides=2) self.flatten = nn.Flatten() self.fc1 = fc_with_initialize(6*6*256, 4096) self.fc2 = fc_with_initialize(4096, 4096) @@ -131,6 +132,7 @@ model.eval(ds_eval, callbacks=[summary_collector]) MindSpore除了提供 `SummaryCollector` 能够自动收集一些常见数据,还提供了Summary算子,支持在网络中自定义收集其他的数据,比如每一个卷积层的输入,或在损失函数中的损失值等。 当前支持的Summary算子: + - [ScalarSummary](https://www.mindspore.cn/doc/api_python/zh-CN/r1.0/mindspore/mindspore.ops.html#mindspore.ops.ScalarSummary): 记录标量数据 - [TensorSummary](https://www.mindspore.cn/doc/api_python/zh-CN/r1.0/mindspore/mindspore.ops.html#mindspore.ops.TensorSummary): 记录张量数据 - [ImageSummary](https://www.mindspore.cn/doc/api_python/zh-CN/r1.0/mindspore/mindspore.ops.html#mindspore.ops.ImageSummary): 记录图片数据 @@ -149,8 +151,7 @@ MindSpore除了提供 `SummaryCollector` 能够自动收集一些常见数据, ```python from mindspore import context, Tensor, nn from mindspore.common import dtype as mstype -from mindspore.ops import operations as P -from mindspore.ops import functional as F +import mindspore.ops as ops from mindspore.nn import Optimizer @@ -158,17 +159,17 @@ class CrossEntropyLoss(nn.Cell): """Loss function definition.""" def __init__(self): super(CrossEntropyLoss, self).__init__() - self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() - self.mean = P.ReduceMean() - self.one_hot = P.OneHot() + self.cross_entropy = ops.SoftmaxCrossEntropyWithLogits() + self.mean = ops.ReduceMean() + self.one_hot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) # Init ScalarSummary - self.scalar_summary = P.ScalarSummary() + self.scalar_summary = ops.ScalarSummary() def construct(self, logits, label): - label = self.one_hot(label, F.shape(logits)[1], self.on_value, self.off_value) + label = self.one_hot(label, ops.shape(logits)[1], self.on_value, self.off_value) loss = self.cross_entropy(logits, label)[0] loss = self.mean(loss, (-1,)) @@ -182,8 +183,8 @@ class MyOptimizer(Optimizer): def __init__(self, learning_rate, params, ......): ...... # Initialize ScalarSummary - self.scalar_summary = P.ScalarSummary() - self.histogram_summary = P.HistogramSummary() + self.scalar_summary = ops.ScalarSummary() + self.histogram_summary = ops.HistogramSummary() self.weight_names = [param.name for param in self.parameters] def construct(self, grads): @@ -205,9 +206,9 @@ class Net(nn.Cell): ...... # Init ImageSummary - self.image_summary = P.ImageSummary() + self.image_summary = ops.ImageSummary() # Init TensorSummary - self.tensor_summary = P.TensorSummary() + self.tensor_summary = ops.TensorSummary() def construct(self, data): # Record image by Summary operator @@ -254,18 +255,18 @@ MindSpore支持自定义Callback, 并允许在自定义Callback中将数据记 样例代码如下: -``` +```python from mindspore.train.callback import Callback from mindspore.train.summary import SummaryRecord class ConfusionMatrixCallback(Callback): def __init__(self, summary_dir): self._summary_dir = summary_dir - + def __enter__(self): # init you summary record in here, when the train script run, it will be inited before training self.summary_record = SummaryRecord(summary_dir) - + def __exit__(self, *exc_args): # Note: you must close the summary record, it will release the process pool resource # else your training script will not exit from training. @@ -276,7 +277,7 @@ class ConfusionMatrixCallback(Callback): cb_params = run_context.run_context.original_args() # create a confusion matric image, and record it to summary file - confusion_martrix = create_confusion_matrix(cb_params) + confusion_martrix = create_confusion_matrix(cb_params) self.summary_record.add_value('image', 'confusion_matrix', confusion_matric) self.summary_record.record(cb_params.cur_step) @@ -288,31 +289,34 @@ model.train(cnn_network, train_dataset=train_ds, callbacks=[confusion_martrix]) ``` 上面的三种方式,支持记录计算图, 损失值等多种数据。除此以外,MindSpore还支持保存训练中其他阶段的计算图,通过 -将训练脚本中 `context.set_context` 的 `save_graphs` 选项设置为 `True`, 可以记录其他阶段的计算图,其中包括算子融合后的计算图。 +将训练脚本中 `context.set_context` 的 `save_graphs` 选项设置为 `True`, 可以记录其他阶段的计算图,其中包括算子融合后的计算图。 在保存的文件中,`ms_output_after_hwopt.pb` 即为算子融合后的计算图,可以使用可视化页面对其进行查看。 ## 运行MindInsight + 按照上面教程完成数据收集后,启动MindInsight,即可可视化收集到的数据。启动MindInsight时, 需要通过 `--summary-base-dir` 参数指定summary日志文件目录。 其中指定的summary日志文件目录可以是一次训练的输出目录,也可以是多次训练输出目录的父目录。 - 一次训练的输出目录结构如下: -``` + +```text └─summary_dir events.out.events.summary.1596869898.hostname_MS events.out.events.summary.1596869898.hostname_lineage ``` 启动命令: + ```Bash mindinsight start --summary-base-dir ./summary_dir ``` 多次训练的输出目录结构如下: -``` + +```text └─summary ├─summary_dir1 │ events.out.events.summary.1596869898.hostname_MS @@ -324,6 +328,7 @@ mindinsight start --summary-base-dir ./summary_dir ``` 启动命令: + ```Bash mindinsight start --summary-base-dir ./summary ``` @@ -331,13 +336,13 @@ mindinsight start --summary-base-dir ./summary 启动成功后,通过浏览器访问 `http://127.0.0.1:8080` 地址,即可查看可视化页面。 停止MindInsight命令: + ```Bash mindinsight stop ``` 更多参数设置,请点击查看[MindInsight相关命令](https://www.mindspore.cn/tutorial/training/zh-CN/r1.0/advanced_use/mindinsight_commands.html)页面。 - ## 注意事项 1. 为了控制列出summary文件目录的用时,MindInsight最多支持发现999个summary文件目录。 @@ -349,7 +354,8 @@ mindinsight stop 自定义callback中如果使用 `SummaryRecord`,则其不能和 `SummaryCollector` 同时使用。 正确代码: - ``` + + ```python ... summary_collector = SummaryCollector('./summary_dir') model.train(2, train_dataset, callbacks=[summary_collector]) @@ -359,7 +365,8 @@ mindinsight stop ``` 错误代码: - ``` + + ```python ... summary_collector1 = SummaryCollector('./summary_dir1') summary_collector2 = SummaryCollector('./summary_dir2') @@ -367,7 +374,8 @@ mindinsight stop ``` 错误代码: - ``` + + ```python ... # Note: the 'ConfusionMatrixCallback' is user-defined, and it uses SummaryRecord to record data. confusion_callback = ConfusionMatrixCallback('./summary_dir1') @@ -377,4 +385,4 @@ mindinsight stop 3. 每个summary日志文件目录中,应该只放置一次训练的数据。一个summary日志目录中如果存放了多次训练的summary数据,MindInsight在可视化数据时会将这些训练的summary数据进行叠加展示,可能会与预期可视化效果不相符。 -4. 当前 `SummaryCollector` 和 `SummaryRecord` 不支持GPU多卡运行的场景。 \ No newline at end of file +4. 当前 `SummaryCollector` 和 `SummaryRecord` 不支持GPU多卡运行的场景。 diff --git a/tutorials/training/source_zh_cn/quick_start/linear_regression.md b/tutorials/training/source_zh_cn/quick_start/linear_regression.md index 1583263d98..06bf6152da 100644 --- a/tutorials/training/source_zh_cn/quick_start/linear_regression.md +++ b/tutorials/training/source_zh_cn/quick_start/linear_regression.md @@ -28,7 +28,6 @@    - ## 概述 回归问题算法通常是利用一系列属性来预测一个值,预测的值是连续的。例如给出一套房子的一些特征数据,如面积、卧室数等等来预测房价,利用最近一周的气温变化和卫星云图来预测未来的气温情况等。如果一套房子实际价格为500万元,通过回归分析的预测值为499万元,则认为这是一个比较好的回归分析。在机器学习问题中,常见的回归分析有线性回归、多项式回归、逻辑回归等。本例子介绍线性回归算法,并通过MindSpore进行线性回归AI训练体验。 @@ -47,7 +46,6 @@ 设置MindSpore运行配置 - ```python from mindspore import context @@ -66,7 +64,6 @@ context.set_context(mode=context.GRAPH_MODE, device_target="CPU") `get_data`用于生成训练数据集和测试数据集。由于拟合的是线性数据,假定要拟合的目标函数为:$f(x)=2x+3$,那么我们需要的训练数据集应随机分布于函数周边,这里采用了$f(x)=2x+3+noise$的方式生成,其中`noise`为遵循标准正态分布规律的随机数值。 - ```python import numpy as np @@ -80,7 +77,6 @@ def get_data(num, w=2.0, b=3.0): 使用`get_data`生成50组测试数据,可视化展示。 - ```python import matplotlib.pyplot as plt @@ -97,10 +93,8 @@ plt.show() 输出结果: - ![png](./images/linear_regression_eval_datasets.png) - 上图中绿色线条部分为目标函数,红点部分为验证数据`eval_data`。 ### 定义数据增强函数 @@ -111,7 +105,6 @@ plt.show() - `batch`:将`batch_size`个数据组合成一个batch。 - `repeat`:将数据集数量倍增。 - ```python from mindspore import dataset as ds @@ -124,13 +117,12 @@ def create_dataset(num_data, batch_size=16, repeat_size=1): 使用数据集增强函数生成训练数据,并查看训练数据的格式。 - ```python num_data = 1600 batch_size = 16 repeat_size = 1 -ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) +ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) print("The dataset size of ds_train:", ds_train.get_dataset_size()) dict_datasets = ds_train.create_dict_iterator().get_next() @@ -141,11 +133,12 @@ print("The y label value shape:", dict_datasets["label"].shape) 输出结果: - The dataset size of ds_train: 100 - dict_keys(['data', 'label']) - The x label value shape: (16, 1) - The y label value shape: (16, 1) - +```text +The dataset size of ds_train: 100 +dict_keys(['data', 'label']) +The x label value shape: (16, 1) +The y label value shape: (16, 1) +``` 通过定义的`create_dataset`将生成的1600个数据增强为了100组shape为16x1的数据集。 @@ -157,7 +150,6 @@ $$f(x)=wx+b\tag{1}$$ 并使用Normal算子随机初始化权重$w$和$b$。 - ```python from mindspore.common.initializer import Normal from mindspore import nn @@ -174,7 +166,6 @@ class LinearNet(nn.Cell): 调用网络查看初始化的模型参数。 - ```python net = LinearNet() model_params = net.trainable_params() @@ -183,18 +174,18 @@ print(model_params) 输出结果: - [Parameter (name=fc.weight, value=Tensor(shape=[1, 1], dtype=Float32, - [[-7.35660456e-003]])), Parameter (name=fc.bias, value=Tensor(shape=[1], dtype=Float32, [-7.35660456e-003]))] - +```text +[Parameter (name=fc.weight, value=Tensor(shape=[1, 1], dtype=Float32, +[[-7.35660456e-003]])), Parameter (name=fc.bias, value=Tensor(shape=[1], dtype=Float32, [-7.35660456e-003]))] +``` 初始化网络模型后,接下来将初始化的网络函数和训练数据集进行可视化,了解拟合前的模型函数情况。 - ```python from mindspore import Tensor x_model_label = np.array([-10, 10, 0.1]) -y_model_label = (x_model_label * Tensor(model_params[0]).asnumpy()[0][0] + +y_model_label = (x_model_label * Tensor(model_params[0]).asnumpy()[0][0] + Tensor(model_params[1]).asnumpy()[0]) plt.scatter(x_eval_label, y_eval_label, color="red", s=5) @@ -205,10 +196,8 @@ plt.show() 输出结果: - ![png](./images/model_net_and_eval_datasets.png) - 从上图中可以看出,蓝色线条的初始化模型函数与绿色线条的目标函数还是有较大的差别的。 ## 定义前向传播网络与反向传播网络并关联 @@ -236,7 +225,6 @@ $$J(w)=\frac{1}{2m}\sum_{i=1}^m(h(x_i)-y^{(i)})^2\tag{2}$$ 在MindSpore中使用如下方式实现。 - ```python net = LinearNet() net_loss = nn.loss.MSELoss() @@ -257,7 +245,6 @@ $$w_{t}=w_{t-1}-\alpha\frac{\partial{J(w_{t-1})}}{\partial{w}}\tag{3}$$ 函数中所有的权重值更新完成后,将值传入到模型函数中,这个过程就是反向传播过程,实现此过程需要使用MindSpore中的优化器函数,如下: - ```python opt = nn.Momentum(net.trainable_params(), learning_rate=0.005, momentum=0.9) ``` @@ -266,7 +253,6 @@ opt = nn.Momentum(net.trainable_params(), learning_rate=0.005, momentum=0.9) 定义完成前向传播和反向传播后,在MindSpore中需要调用`Model`函数,将前面定义的网络,损失函数,优化器函数关联起来,使之变成完整的计算网络。 - ```python from mindspore.train import Model @@ -279,7 +265,6 @@ model = Model(net, net_loss, opt) 为了使得整个训练过程更容易理解,需要将训练过程的测试数据、目标函数和模型网络进行可视化,这里定义了可视化函数,将在每个step训练结束后调用,展示模型网络的拟合过程。 - ```python import matplotlib.pyplot as plt import time @@ -292,7 +277,7 @@ def plot_model_and_datasets(net, eval_data): x1, y1 = zip(*eval_data) x_target = x y_target = x_target * 2 + 3 - + plt.axis([-11, 11, -20, 25]) plt.scatter(x1, y1, color="red", s=5) plt.plot(x, y, color="blue") @@ -305,7 +290,6 @@ def plot_model_and_datasets(net, eval_data): MindSpore提供的工具,可对模型训练过程进行自定义控制,这里在`step_end`中调用可视化函数,展示拟合过程。更多的使用可参考[官网说明]()。 - ```python from IPython import display from mindspore.train.callback import Callback @@ -314,7 +298,7 @@ class ImageShowCallback(Callback): def __init__(self, net, eval_data): self.net = net self.eval_data = eval_data - + def step_end(self, run_context): plot_model_and_datasets(self.net, self.eval_data) display.clear_output(wait=True) @@ -329,7 +313,6 @@ class ImageShowCallback(Callback): - `callbacks`:训练过程中需要调用的回调函数。 - `dataset_sink_mode`:数据集下沉模式,支持Ascend、GPU计算平台,本例为CPU计算平台设置为False。 - ```python from mindspore.train.callback import LossMonitor @@ -344,13 +327,12 @@ print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) 输出结果: - ![gif](./images/linear_regression.gif) - - Parameter (name=fc.weight, value=[[2.0065749]]) - Parameter (name=fc.bias, value=[3.0089042]) - +```text +Parameter (name=fc.weight, value=[[2.0065749]]) +Parameter (name=fc.bias, value=[3.0089042]) +``` 训练完成后打印出最终模型的权重参数,其中weight接近于2.0,bias接近于3.0,模型训练完成,符合预期。 diff --git a/tutorials/training/source_zh_cn/quick_start/quick_start.md b/tutorials/training/source_zh_cn/quick_start/quick_start.md index f00c5ca354..dbaead34a1 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_start.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_start.md @@ -36,6 +36,7 @@ 下面我们通过一个实际样例,带领大家体验MindSpore基础的功能,对于一般的用户而言,完成整个样例实践会持续20~30分钟。 本例子会实现一个简单的图片分类的功能,整体流程如下: + 1. 处理需要的数据集,这里使用了MNIST数据集。 2. 定义一个网络,这里我们使用LeNet网络。 3. 定义损失函数和优化器。 @@ -45,7 +46,6 @@ > 你可以在这里找到完整可运行的样例代码: 。 - 这是简单、基础的应用流程,其他高级、复杂的应用可以基于这个基本流程进行扩展。 ## 准备环节 @@ -66,7 +66,7 @@ 目录结构如下: -``` +```text └─MNIST_Data ├─test │ t10k-images.idx3-ubyte @@ -76,6 +76,7 @@ train-images.idx3-ubyte train-labels.idx1-ubyte ``` + > 为了方便样例使用,我们在样例脚本中添加了自动下载数据集的功能。 ### 导入Python库&模块 @@ -83,8 +84,7 @@ 在使用前,需要导入需要的Python库。 目前使用到`os`库,为方便理解,其他需要的库,我们在具体使用到时再说明。 - - + ```python import os ``` @@ -161,7 +161,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, rescale_op = CV.Rescale(rescale, shift) # rescale images hwc2chw_op = CV.HWC2CHW() # change shape from (height, width, channel) to (channel, height, width) to fit network. type_cast_op = C.TypeCast(mstype.int32) # change data type of label to int32 to fit network - + # apply map operations on images mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) @@ -187,7 +187,6 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, > MindSpore支持进行多种数据处理和增强的操作,各种操作往往组合使用,具体可以参考[数据处理](https://www.mindspore.cn/doc/programming_guide/zh-CN/r1.0/pipeline.html)和与[数据增强](https://www.mindspore.cn/doc/programming_guide/zh-CN/r1.0/augmentation.html)章节。 - ## 定义网络 我们选择相对简单的LeNet网络。LeNet网络不包括输入层的情况下,共有7层:2个卷积层、2个下采样层(池化层)、3个全连接层。每层都包含不同数量的训练参数,如下图所示: @@ -196,7 +195,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, > 更多的LeNet网络的介绍不在此赘述,希望详细了解LeNet网络,可以查询。 -我们对全连接层以及卷积层采用`Normal`进行参数初始化。 +我们对全连接层以及卷积层采用`Normal`进行参数初始化。 MindSpore支持`TruncatedNormal`、`Normal`、`Uniform`等多种参数初始化方法,默认采用`Normal`。具体可以参考MindSpore API的`mindspore.common.initializer`模块说明。 @@ -242,7 +241,7 @@ class LeNet5(nn.Cell): 在进行定义之前,先简单介绍损失函数及优化器的概念。 - 损失函数:又叫目标函数,用于衡量预测值与实际值差异的程度。深度学习通过不停地迭代来缩小损失函数的值。定义一个好的损失函数,可以有效提高模型的性能。 -- 优化器:用于最小化损失函数,从而在训练过程中改进模型。 +- 优化器:用于最小化损失函数,从而在训练过程中改进模型。 定义了损失函数后,可以得到损失函数关于权重的梯度。梯度用于指示优化器优化权重的方向,以提高模型性能。 @@ -296,9 +295,9 @@ from mindspore.train.callback import ModelCheckpoint, CheckpointConfig if __name__ == "__main__": ... # set parameters of check point - config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10) + config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10) # apply parameters of check point - ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) + ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) ... ``` @@ -307,7 +306,6 @@ if __name__ == "__main__": 通过MindSpore提供的`model.train`接口可以方便地进行网络的训练。`LossMonitor`可以监控训练过程中`loss`值的变化。 这里把`epoch_size`设置为1,对数据集进行1个迭代的训练。 - ```python from mindspore.nn.metrics import Accuracy from mindspore.train.callback import LossMonitor @@ -324,23 +322,26 @@ def train_net(args, model, epoch_size, mnist_path, repeat_size, ckpoint_cb, sink if __name__ == "__main__": ... - - epoch_size = 1 + + epoch_size = 1 mnist_path = "./MNIST_Data" repeat_size = 1 model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) train_net(args, model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode) ... ``` + 其中, 在`train_net`方法中,我们加载了之前下载的训练数据集,`mnist_path`是MNIST数据集路径。 ## 运行并查看结果 使用以下命令运行脚本: -``` + +```bash python lenet.py --device_target=CPU ``` + 其中, `lenet.py`:为你根据教程编写的脚本文件。 `--device_target CPU`:指定运行硬件平台,参数为`CPU`、`GPU`或者`Ascend`,根据你的实际运行硬件平台来指定。 @@ -402,23 +403,24 @@ if __name__ == "__main__": test_net(network, model, mnist_path) ``` -其中, +其中, `load_checkpoint`:通过该接口加载CheckPoint模型参数文件,返回一个参数字典。 `checkpoint_lenet-1_1875.ckpt`:之前保存的CheckPoint模型文件名称。 `load_param_into_net`:通过该接口把参数加载到网络中。 - 使用运行命令,运行你的代码脚本。 + ```bash python lenet.py --device_target=CPU ``` + 其中, `lenet.py`:为你根据教程编写的脚本文件。 `--device_target CPU`:指定运行硬件平台,参数为`CPU`、`GPU`或者`Ascend`,根据你的实际运行硬件平台来指定。 运行结果示例如下: -``` +```text ... ============== Starting Testing ============== ============== Accuracy:{'Accuracy': 0.9663477564102564} ============== diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video.md b/tutorials/training/source_zh_cn/quick_start/quick_video.md index b807dd0228..15595566fa 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video.md @@ -108,7 +108,6 @@ - ## 体验MindSpore @@ -237,8 +236,6 @@ - - ## 使用可视化组件MindInsight @@ -343,10 +340,6 @@ - - - - ## 算子开发 @@ -455,4 +448,4 @@ - \ No newline at end of file + diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910.md b/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910.md index d48d9c76b9..72f8e01534 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910.md @@ -6,4 +6,4 @@ -**立即安装**: \ No newline at end of file +**立即安装**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910_operator_development.md b/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910_operator_development.md index 8b72f11c5e..1e7d45170d 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910_operator_development.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/ascend910_operator_development.md @@ -4,4 +4,4 @@ \ No newline at end of file + diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/community.md b/tutorials/training/source_zh_cn/quick_start/quick_video/community.md index d67071537b..b75e7dcc7e 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/community.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/community.md @@ -6,4 +6,4 @@ -**更多内容**: \ No newline at end of file +**更多内容**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_ubuntu.md b/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_ubuntu.md index baff978d12..2ffcb311eb 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_ubuntu.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_ubuntu.md @@ -6,4 +6,4 @@ -**立即安装**: \ No newline at end of file +**立即安装**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_windows.md b/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_windows.md index badcdffb9d..3476461137 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_windows.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/cpu_windows.md @@ -6,4 +6,4 @@ -**立即安装**: \ No newline at end of file +**立即安装**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/customized_debugging.md b/tutorials/training/source_zh_cn/quick_start/quick_video/customized_debugging.md index a45204e464..a5d98a15b1 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/customized_debugging.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/customized_debugging.md @@ -6,4 +6,4 @@ -**查看完整教程**: \ No newline at end of file +**查看完整教程**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/gpu.md b/tutorials/training/source_zh_cn/quick_start/quick_video/gpu.md index 0e48014979..47452cebdf 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/gpu.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/gpu.md @@ -6,4 +6,4 @@ -**立即安装**: \ No newline at end of file +**立即安装**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/inference.md b/tutorials/training/source_zh_cn/quick_start/quick_video/inference.md index 8665d00fbb..c593f405d9 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/inference.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/inference.md @@ -6,4 +6,4 @@ -**更多内容**: \ No newline at end of file +**更多内容**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/loading_the_dataset_and_converting_data_format.md b/tutorials/training/source_zh_cn/quick_start/quick_video/loading_the_dataset_and_converting_data_format.md index 2885b5e640..bc2890e5b9 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/loading_the_dataset_and_converting_data_format.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/loading_the_dataset_and_converting_data_format.md @@ -10,4 +10,4 @@ - \ No newline at end of file + diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_dashboard.md b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_dashboard.md index ab9fe88fd9..bb6ea4f151 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_dashboard.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_dashboard.md @@ -8,4 +8,4 @@ **立即安装**: -**查看更多内容**: \ No newline at end of file +**查看更多内容**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_installation_and_common_commands.md b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_installation_and_common_commands.md index 23a27873f5..60088d7109 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_installation_and_common_commands.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_installation_and_common_commands.md @@ -8,4 +8,4 @@ **立即安装**: -**查看更多命令**: \ No newline at end of file +**查看更多命令**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_lineage_and_scalars_comparision.md b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_lineage_and_scalars_comparision.md index 174b09ed19..e4fcd1779b 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_lineage_and_scalars_comparision.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_lineage_and_scalars_comparision.md @@ -8,4 +8,4 @@ **立即安装**: -**查看更多内容**: \ No newline at end of file +**查看更多内容**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_performance_profiling.md b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_performance_profiling.md index 3db55ec68e..7a58398751 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_performance_profiling.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/mindInsight_performance_profiling.md @@ -6,9 +6,8 @@ - **查看更多内容**: - \ No newline at end of file + diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/quick_start_video.md b/tutorials/training/source_zh_cn/quick_start/quick_video/quick_start_video.md index 078b3cd10b..27ff80d959 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/quick_start_video.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/quick_start_video.md @@ -8,4 +8,4 @@ **查看代码**: -**查看完整教程**: \ No newline at end of file +**查看完整教程**: diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/saving_and_loading_model_parameters.md b/tutorials/training/source_zh_cn/quick_start/quick_video/saving_and_loading_model_parameters.md index ff725c7d47..2c97212c94 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video/saving_and_loading_model_parameters.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/saving_and_loading_model_parameters.md @@ -6,4 +6,4 @@ -**查看完整教程**: \ No newline at end of file +**查看完整教程**: diff --git a/tutorials/training/source_zh_cn/use/load_dataset_image.md b/tutorials/training/source_zh_cn/use/load_dataset_image.md index 1a92a90e9c..31f42f8e68 100644 --- a/tutorials/training/source_zh_cn/use/load_dataset_image.md +++ b/tutorials/training/source_zh_cn/use/load_dataset_image.md @@ -28,7 +28,7 @@ 1. 下载MNIST数据集的训练[图像](http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz)和[标签](http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz)并解压,存放在`./MNIST`路径中,目录结构如下。 - ``` + ```text └─MNIST ├─train-images.idx3-ubyte └─train-labels.idx1-ubyte diff --git a/tutorials/training/source_zh_cn/use/load_dataset_text.md b/tutorials/training/source_zh_cn/use/load_dataset_text.md index d605ca817d..aa9c22c6f1 100644 --- a/tutorials/training/source_zh_cn/use/load_dataset_text.md +++ b/tutorials/training/source_zh_cn/use/load_dataset_text.md @@ -27,7 +27,7 @@ MindSpore提供的`mindspore.dataset`模块可以帮助用户构建数据集对 1. 准备文本数据如下。 - ``` + ```text Welcome to Beijing! 北京欢迎您! 我喜欢English! @@ -35,7 +35,7 @@ MindSpore提供的`mindspore.dataset`模块可以帮助用户构建数据集对 2. 创建`tokenizer.txt`文件并复制文本数据到该文件中,将该文件存放在`./test`路径中,目录结构如下。 - ``` + ```text └─test └─tokenizer.txt ``` @@ -69,7 +69,7 @@ MindSpore目前支持加载文本领域常用的经典数据集和多种数据 获取到分词前的数据: - ``` + ```text Welcome to Beijing! 北京欢迎您! 我喜欢English! @@ -101,7 +101,7 @@ MindSpore目前支持的数据处理算子及其详细使用方法,可参考 输出结果如下: - ``` + ```text ['大', '家', '早', '上', '好'] ``` @@ -120,7 +120,7 @@ MindSpore目前支持的数据处理算子及其详细使用方法,可参考 输出结果如下: - ``` + ```text [['大', '家'], ['家', '早'], ['早', '上'], @@ -147,7 +147,7 @@ MindSpore目前支持的数据处理算子及其详细使用方法,可参考 输出结果如下: - ``` + ```text c a d @@ -182,7 +182,7 @@ MindSpore目前支持的数据分词算子及其详细使用方法,可参考 获取到分词后的数据: - ``` + ```text ['Welcome', 'to', 'Beijing!'] ['北京欢迎您!'] ['我喜欢English!'] diff --git a/tutorials/training/source_zh_cn/use/load_model_for_inference_and_transfer.md b/tutorials/training/source_zh_cn/use/load_model_for_inference_and_transfer.md index f9bc442667..43fbe8e780 100644 --- a/tutorials/training/source_zh_cn/use/load_model_for_inference_and_transfer.md +++ b/tutorials/training/source_zh_cn/use/load_model_for_inference_and_transfer.md @@ -48,6 +48,7 @@ acc = model.eval(dataset_eval) 针对任务中断再训练及微调(Fine Tune)场景,可以加载网络参数和优化器参数到模型中。 示例代码如下: + ```python # return a parameter dict for model param_dict = load_checkpoint("resnet50-2_32.ckpt") @@ -103,7 +104,7 @@ model.train(epoch, dataset) ### 用于迁移学习 -通过`mindspore_hub.load`完成模型加载后,可以增加一个额外的参数项只加载神经网络的特征提取部分,这样我们就能很容易地在之后增加一些新的层进行迁移学习。*当模型开发者将额外的参数(例如 `include_top`)添加到模型构造中时,可以在模型的详情页中找到这个功能。`include_top`取值为True或者False,表示是否保留顶层的全连接网络。* +通过`mindspore_hub.load`完成模型加载后,可以增加一个额外的参数项只加载神经网络的特征提取部分,这样我们就能很容易地在之后增加一些新的层进行迁移学习。*当模型开发者将额外的参数(例如 `include_top`)添加到模型构造中时,可以在模型的详情页中找到这个功能。`include_top`取值为True或者False,表示是否保留顶层的全连接网络。* 下面我们以GoogleNet为例,说明如何加载一个基于ImageNet的预训练模型,并在特定的子任务数据集上进行迁移学习(重训练)。主要的步骤如下: @@ -116,7 +117,7 @@ model.train(epoch, dataset) from mindspore import nn, context, Tensor from mindpsore.train.serialization import save_checkpoint from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits - from mindspore.ops import operations as P + import mindspore.ops as ops from mindspore.nn import Momentum import math @@ -138,9 +139,9 @@ model.train(epoch, dataset) class ReduceMeanFlatten(nn.Cell): def __init__(self): super(ReduceMeanFlatten, self).__init__() - self.mean = P.ReduceMean(keep_dims=True) + self.mean = ops.ReduceMean(keep_dims=True) self.flatten = nn.Flatten() - + def construct(self, x): x = self.mean(x, (2, 3)) x = self.flatten(x) @@ -180,10 +181,10 @@ model.train(epoch, dataset) optim = Momentum(filter(lambda x: x.requires_grad, loss_net.get_parameters()), Tensor(lr), 0.9, 4e-5) train_net = nn.TrainOneStepCell(loss_net, optim) ``` - + 5. 构建数据集,开始重训练。 - 如下所示,进行微调任务的数据集为垃圾分类数据集,存储位置为`/ssd/data/garbage/train`。 + 如下所示,进行微调任务的数据集为垃圾分类数据集,存储位置为`/ssd/data/garbage/train`。 ```python dataset = create_dataset("/ssd/data/garbage/train", @@ -197,7 +198,7 @@ model.train(epoch, dataset) data, label = items data = mindspore.Tensor(data) label = mindspore.Tensor(label) - + loss = train_net(data, label) print(f"epoch: {epoch}/{epoch_size}, loss: {loss}") # Save the ckpt file for each epoch. @@ -218,7 +219,7 @@ model.train(epoch, dataset) classification_layer = nn.Dense(last_channel, num_classes) classification_layer.set_train(False) softmax = nn.Softmax() - network = nn.SequentialCell([network, reducemean_flatten, + network = nn.SequentialCell([network, reducemean_flatten, classification_layer, softmax]) # Load a pre-trained ckpt file. @@ -237,4 +238,4 @@ model.train(epoch, dataset) res = model.eval(eval_dataset) print("result:", res, "ckpt=", ckpt_path) - ``` \ No newline at end of file + ``` diff --git a/tutorials/training/source_zh_cn/use/publish_model.md b/tutorials/training/source_zh_cn/use/publish_model.md index 73aa040666..d539b31dd5 100644 --- a/tutorials/training/source_zh_cn/use/publish_model.md +++ b/tutorials/training/source_zh_cn/use/publish_model.md @@ -24,7 +24,7 @@ 1. 将你的预训练模型托管在可以访问的存储位置。 -2. 参照[模板](https://gitee.com/mindspore/mindspore/blob/r1.0/model_zoo/official/cv/googlenet/mindspore_hub_conf.py),在你自己的代码仓中添加模型生成文件`mindspore_hub_conf.py`,文件放置的位置如下: +2. 参照[模板](https://gitee.com/mindspore/mindspore/blob/r1.0/model_zoo/official/cv/googlenet/mindspore_hub_conf.py),在你自己的代码仓中添加模型生成文件`mindspore_hub_conf.py`,文件放置的位置如下: ```shell googlenet @@ -47,19 +47,20 @@ | ├── gpu | ├── 0.7 | ├── ascend - | ├── 0.7 + | ├── 0.7 | ├── googlenet_v1_cifar10.md │   ├── tools | ├── md_validator.py - | └── md_validator.py + | └── md_validator.py ``` + 注意,`{model_name}_{model_version}_{dataset}.md`文件中需要补充如下所示的`file-format`、`asset-link` 和 `asset-sha256`信息,它们分别表示模型文件格式、模型存储位置(步骤1所得)和模型哈希值。 ```shell file-format: ckpt asset-link: https://download.mindspore.cn/model_zoo/official/cv/googlenet/goolenet_ascend_0.2.0_cifar10_official_classification_20200713/googlenet.ckpt asset-sha256: 114e5acc31dad444fa8ed2aafa02ca34734419f602b9299f3b53013dfc71b0f7 - ``` + ``` 其中,MindSpore Hub支持的模型文件格式有: - [MindSpore CKPT](https://www.mindspore.cn/tutorial/training/zh-CN/r1.0/use/save_model.html#checkpoint) diff --git a/tutorials/training/source_zh_cn/use/save_model.md b/tutorials/training/source_zh_cn/use/save_model.md index bfe3eba2a6..e160f81f87 100644 --- a/tutorials/training/source_zh_cn/use/save_model.md +++ b/tutorials/training/source_zh_cn/use/save_model.md @@ -34,6 +34,7 @@ 通过`CheckpointConfig`对象可以设置CheckPoint的保存策略。保存的参数分为网络参数和优化器参数。 `ModelCheckpoint`提供默认配置策略,方便用户快速上手。具体用法如下: + ```python from mindspore.train.callback import ModelCheckpoint ckpoint_cb = ModelCheckpoint() @@ -60,7 +61,7 @@ model.train(epoch_num, dataset, callbacks=ckpoint_cb) 生成的CheckPoint文件如下: -``` +```text resnet50-graph.meta # 编译后的计算图 resnet50-1_32.ckpt # CheckPoint文件后缀名为'.ckpt' resnet50-2_32.ckpt # 文件的命名方式表示保存参数所在的epoch和step数 diff --git a/tutorials/tutorial_code/distributed_training/resnet.py b/tutorials/tutorial_code/distributed_training/resnet.py index d01555638b..9448f31187 100644 --- a/tutorials/tutorial_code/distributed_training/resnet.py +++ b/tutorials/tutorial_code/distributed_training/resnet.py @@ -18,7 +18,7 @@ The sample can be run on Ascend 910 AI processor. import numpy as np import mindspore.nn as nn from mindspore import Tensor -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.common.initializer import initializer from mindspore.common import dtype as mstype def weight_variable(shape): @@ -121,8 +121,8 @@ class ResidualBlock(nn.Cell): self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) self.bn3 = bn_with_initialize_last(out_channels) - self.relu = P.ReLU() - self.add = P.TensorAdd() + self.relu = ops.ReLU() + self.add = ops.TensorAdd() def construct(self, x): """construct""" @@ -167,12 +167,12 @@ class ResidualBlockWithDown(nn.Cell): self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) self.bn3 = bn_with_initialize_last(out_channels) - self.relu = P.ReLU() + self.relu = ops.ReLU() self.down_sample = down_sample self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0) self.bn_down_sample = bn_with_initialize(out_channels) - self.add = P.TensorAdd() + self.add = ops.TensorAdd() def construct(self, x): """construct""" @@ -294,7 +294,7 @@ class ResNet(nn.Cell): self.conv1 = conv7x7(3, 64, stride=2, padding=0) self.bn1 = bn_with_initialize(64) - self.relu = P.ReLU() + self.relu = ops.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = MakeLayer0(block, in_channels=64, out_channels=256, stride=1) @@ -302,8 +302,8 @@ class ResNet(nn.Cell): self.layer3 = MakeLayer2(block, in_channels=512, out_channels=1024, stride=2) self.layer4 = MakeLayer3(block, in_channels=1024, out_channels=2048, stride=2) - self.pool = P.ReduceMean(keep_dims=True) - self.squeeze = P.Squeeze(axis=(2, 3)) + self.pool = ops.ReduceMean(keep_dims=True) + self.squeeze = ops.Squeeze(axis=(2, 3)) self.fc = fc_with_initialize(512 * block.expansion, num_classes) def construct(self, x): diff --git a/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py b/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py index 99ca5892f0..ddf8e1321b 100644 --- a/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py +++ b/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py @@ -16,17 +16,14 @@ The sample can be run on Ascend 910 AI processor. """ import os -import random -import argparse import mindspore.nn as nn import mindspore.common.dtype as mstype -import mindspore.ops.functional as F +import mindspore.ops as ops import mindspore.dataset as ds import mindspore.dataset.vision.c_transforms as vision import mindspore.dataset.transforms.c_transforms as C from mindspore.communication.management import init, get_rank, get_group_size from mindspore import Tensor -from mindspore.ops import operations as P from mindspore.nn.optim.momentum import Momentum from mindspore.train.model import Model from mindspore.context import ParallelMode @@ -39,11 +36,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(device_id=device_id) # set device_id init() -rank_id = get_rank() -rank_size = get_group_size() - - -def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size=1): +def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size=1): # pylint: disable=missing-docstring resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 @@ -82,42 +75,42 @@ def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size= return data_set -class SoftmaxCrossEntropyExpand(nn.Cell): - def __init__(self, sparse=False): +class SoftmaxCrossEntropyExpand(nn.Cell): # pylint: disable=missing-docstring + def __init__(self, sparse=False): # pylint: disable=missing-docstring super(SoftmaxCrossEntropyExpand, self).__init__() - self.exp = P.Exp() - self.sum = P.ReduceSum(keep_dims=True) - self.onehot = P.OneHot() + self.exp = ops.Exp() + self.sum = ops.ReduceSum(keep_dims=True) + self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) - self.div = P.RealDiv() - self.log = P.Log() - self.sum_cross_entropy = P.ReduceSum(keep_dims=False) - self.mul = P.Mul() - self.mul2 = P.Mul() - self.mean = P.ReduceMean(keep_dims=False) + self.div = ops.RealDiv() + self.log = ops.Log() + self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) + self.mul = ops.Mul() + self.mul2 = ops.Mul() + self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse - self.max = P.ReduceMax(keep_dims=True) - self.sub = P.Sub() + self.max = ops.ReduceMax(keep_dims=True) + self.sub = ops.Sub() self.eps = Tensor(1e-24, mstype.float32) - def construct(self, logit, label): + def construct(self, logit, label): # pylint: disable=missing-docstring logit_max = self.max(logit, -1) exp = self.exp(self.sub(logit, logit_max)) exp_sum = self.sum(exp, -1) softmax_result = self.div(exp, exp_sum) if self.sparse: - label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) softmax_result_log = self.log(softmax_result + self.eps) loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) - loss = self.mul2(F.scalar_to_array(-1.0), loss) + loss = self.mul2(ops.scalar_to_array(-1.0), loss) loss = self.mean(loss, -1) return loss -def test_train_cifar(epoch_size=10): +def test_train_cifar(epoch_size=10): # pylint: disable=missing-docstring context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, gradients_mean=True) loss_cb = LossMonitor() data_path = os.getenv('DATA_PATH') diff --git a/tutorials/tutorial_code/gradient_accumulation/train.py b/tutorials/tutorial_code/gradient_accumulation/train.py index a0de464e89..0ae0ef74a3 100644 --- a/tutorials/tutorial_code/gradient_accumulation/train.py +++ b/tutorials/tutorial_code/gradient_accumulation/train.py @@ -1,3 +1,6 @@ +"""train +""" + import argparse import os @@ -5,22 +8,20 @@ import mindspore.nn as nn from mindspore import ParameterTuple from mindspore import context from mindspore.nn import Cell -from mindspore.ops import composite as C -from mindspore.ops import functional as F -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.train.dataset_helper import DatasetHelper from mindspore.train.serialization import save_checkpoint from model_zoo.official.cv.lenet.src.dataset import create_dataset from model_zoo.official.cv.lenet.src.lenet import LeNet5 -_sum_op = C.MultitypeFuncGraph("grad_sum_op") -_clear_op = C.MultitypeFuncGraph("clear_op") +_sum_op = ops.MultitypeFuncGraph("grad_sum_op") +_clear_op = ops.MultitypeFuncGraph("clear_op") @_sum_op.register("Tensor", "Tensor") def _cumulative_gard(grad_sum, grad): """Apply gard sum to cumulative gradient.""" - add = P.AssignAdd() + add = ops.AssignAdd() return add(grad_sum, grad) @@ -28,12 +29,12 @@ def _cumulative_gard(grad_sum, grad): def _clear_grad_sum(grad_sum, zero): """Apply zero to clear grad_sum.""" success = True - success = F.depend(success, F.assign(grad_sum, zero)) + success = ops.depend(success, ops.assign(grad_sum, zero)) return success -class TrainForwardBackward(Cell): - def __init__(self, network, optimizer, grad_sum, sens=1.0): +class TrainForwardBackward(Cell): # pylint: disable=missing-docstring + def __init__(self, network, optimizer, grad_sum, sens=1.0): # pylint: disable=missing-docstring super(TrainForwardBackward, self).__init__(auto_prefix=False) self.network = network self.network.set_grad() @@ -41,42 +42,42 @@ class TrainForwardBackward(Cell): self.weights = ParameterTuple(network.trainable_params()) self.optimizer = optimizer self.grad_sum = grad_sum - self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.grad = ops.GradOperation(get_by_list=True, sens_param=True) self.sens = sens - self.hyper_map = C.HyperMap() + self.hyper_map = ops.HyperMap() - def construct(self, *inputs): + def construct(self, *inputs): # pylint: disable=missing-docstring weights = self.weights loss = self.network(*inputs) - sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) + sens = ops.Fill()(ops.DType()(loss), ops.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(*inputs, sens) - return F.depend(loss, self.hyper_map(F.partial(_sum_op), self.grad_sum, grads)) + return ops.depend(loss, self.hyper_map(ops.partial(_sum_op), self.grad_sum, grads)) -class TrainOptim(Cell): - def __init__(self, optimizer, grad_sum): +class TrainOptim(Cell): # pylint: disable=missing-docstring + def __init__(self, optimizer, grad_sum): # pylint: disable=missing-docstring super(TrainOptim, self).__init__(auto_prefix=False) self.optimizer = optimizer self.grad_sum = grad_sum - def construct(self): + def construct(self): # pylint: disable=missing-docstring return self.optimizer(self.grad_sum) -class TrainClear(Cell): - def __init__(self, grad_sum, zeros): +class TrainClear(Cell): # pylint: disable=missing-docstring + def __init__(self, grad_sum, zeros): # pylint: disable=missing-docstring super(TrainClear, self).__init__(auto_prefix=False) self.grad_sum = grad_sum self.zeros = zeros - self.hyper_map = C.HyperMap() + self.hyper_map = ops.HyperMap() - def construct(self): - seccess = self.hyper_map(F.partial(_clear_op), self.grad_sum, self.zeros) + def construct(self): # pylint: disable=missing-docstring + seccess = self.hyper_map(ops.partial(_clear_op), self.grad_sum, self.zeros) return seccess -class GradientAccumulation: - def __init__(self, network, loss_fn, optimizer): +class GradientAccumulation: # pylint: disable=missing-docstring + def __init__(self, network, loss_fn, optimizer): # pylint: disable=missing-docstring self._network = network self._loss_fn = loss_fn self._optimizer = optimizer @@ -88,8 +89,7 @@ class GradientAccumulation: self._train_optim = self._build_train_optim() self._train_clear = self._build_train_clear() - def _build_train_forward_backward_network(self): - """Build forward and backward network""" + def _build_train_forward_backward_network(self): # pylint: disable=missing-docstring network = self._network network = nn.WithLossCell(network, self._loss_fn) loss_scale = 1.0 @@ -124,8 +124,7 @@ class GradientAccumulation: train_dataset.reset() - save_checkpoint(self._train_forward_backward, "gradient_accumulation.ckpt", ) - + save_checkpoint(self._train_forward_backward, "gradient_accumulation.ckpt") if __name__ == "__main__": parser = argparse.ArgumentParser(description='MindSpore Gard Cumulative Example') diff --git a/tutorials/tutorial_code/resnet/cifar_resnet50.py b/tutorials/tutorial_code/resnet/cifar_resnet50.py index 1aa84ada4c..0610e051b7 100644 --- a/tutorials/tutorial_code/resnet/cifar_resnet50.py +++ b/tutorials/tutorial_code/resnet/cifar_resnet50.py @@ -18,16 +18,12 @@ The sample can be run on Ascend 910 AI processor. import os import random import argparse -import mindspore.nn as nn import mindspore.common.dtype as mstype -import mindspore.ops.functional as F import mindspore.dataset as ds import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.communication.management import init -from mindspore import Tensor -from mindspore.ops import operations as P from mindspore.nn.optim.momentum import Momentum from mindspore.train.model import Model from mindspore.context import ParallelMode diff --git a/tutorials/tutorial_code/resnet/resnet.py b/tutorials/tutorial_code/resnet/resnet.py index d01555638b..9448f31187 100644 --- a/tutorials/tutorial_code/resnet/resnet.py +++ b/tutorials/tutorial_code/resnet/resnet.py @@ -18,7 +18,7 @@ The sample can be run on Ascend 910 AI processor. import numpy as np import mindspore.nn as nn from mindspore import Tensor -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.common.initializer import initializer from mindspore.common import dtype as mstype def weight_variable(shape): @@ -121,8 +121,8 @@ class ResidualBlock(nn.Cell): self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) self.bn3 = bn_with_initialize_last(out_channels) - self.relu = P.ReLU() - self.add = P.TensorAdd() + self.relu = ops.ReLU() + self.add = ops.TensorAdd() def construct(self, x): """construct""" @@ -167,12 +167,12 @@ class ResidualBlockWithDown(nn.Cell): self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0) self.bn3 = bn_with_initialize_last(out_channels) - self.relu = P.ReLU() + self.relu = ops.ReLU() self.down_sample = down_sample self.conv_down_sample = conv1x1(in_channels, out_channels, stride=stride, padding=0) self.bn_down_sample = bn_with_initialize(out_channels) - self.add = P.TensorAdd() + self.add = ops.TensorAdd() def construct(self, x): """construct""" @@ -294,7 +294,7 @@ class ResNet(nn.Cell): self.conv1 = conv7x7(3, 64, stride=2, padding=0) self.bn1 = bn_with_initialize(64) - self.relu = P.ReLU() + self.relu = ops.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = MakeLayer0(block, in_channels=64, out_channels=256, stride=1) @@ -302,8 +302,8 @@ class ResNet(nn.Cell): self.layer3 = MakeLayer2(block, in_channels=512, out_channels=1024, stride=2) self.layer4 = MakeLayer3(block, in_channels=1024, out_channels=2048, stride=2) - self.pool = P.ReduceMean(keep_dims=True) - self.squeeze = P.Squeeze(axis=(2, 3)) + self.pool = ops.ReduceMean(keep_dims=True) + self.squeeze = ops.Squeeze(axis=(2, 3)) self.fc = fc_with_initialize(512 * block.expansion, num_classes) def construct(self, x): diff --git a/tutorials/tutorial_code/sample_for_cloud/resnet.py b/tutorials/tutorial_code/sample_for_cloud/resnet.py index 0e21222d21..8755f01f44 100755 --- a/tutorials/tutorial_code/sample_for_cloud/resnet.py +++ b/tutorials/tutorial_code/sample_for_cloud/resnet.py @@ -15,7 +15,7 @@ """ResNet.""" import numpy as np import mindspore.nn as nn -from mindspore.ops import operations as P +import mindspore.ops as ops from mindspore.common.tensor import Tensor @@ -105,9 +105,9 @@ class ResidualBlock(nn.Cell): if self.down_sample: self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)]) - self.add = P.TensorAdd() + self.add = ops.TensorAdd() - def construct(self, x): + def construct(self, x): # pylint: disable=missing-docstring identity = x out = self.conv1(x) @@ -167,7 +167,7 @@ class ResNet(nn.Cell): self.conv1 = _conv7x7(3, 64, stride=2) self.bn1 = _bn(64) - self.relu = P.ReLU() + self.relu = ops.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") self.layer1 = self._make_layer(block, @@ -191,7 +191,7 @@ class ResNet(nn.Cell): out_channel=out_channels[3], stride=strides[3]) - self.mean = P.ReduceMean(keep_dims=True) + self.mean = ops.ReduceMean(keep_dims=True) self.flatten = nn.Flatten() self.end_point = _fc(out_channels[3], num_classes) @@ -223,7 +223,7 @@ class ResNet(nn.Cell): return nn.SequentialCell(layers) - def construct(self, x): + def construct(self, x): # pylint: disable=missing-docstring x = self.conv1(x) x = self.bn1(x) x = self.relu(x) -- Gitee From c46ce607d04bb1807ab3213802412990c5a3c908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B5=B5=E9=9B=B7?= Date: Thu, 22 Oct 2020 17:07:49 +0800 Subject: [PATCH 3/3] unified operator of programming guide 3rd --- .../constraints_on_network_construction.md | 4 ++-- .../constraints_on_network_construction.md | 4 ++-- .../computer_vision_application.ipynb | 2 -- .../customized_debugging_information.ipynb | 1 - .../notebook/debugging_in_pynative_mode.ipynb | 5 ++--- ...nsight_image_histogram_scalar_tensor.ipynb | 22 +++++++++---------- ...sight_model_lineage_and_data_lineage.ipynb | 1 - tutorials/notebook/mixed_precision.ipynb | 15 ++++++------- tutorials/notebook/nlp_application.ipynb | 8 +++---- 9 files changed, 28 insertions(+), 34 deletions(-) diff --git a/docs/note/source_en/constraints_on_network_construction.md b/docs/note/source_en/constraints_on_network_construction.md index 1fa7869dc0..9086c60ed3 100644 --- a/docs/note/source_en/constraints_on_network_construction.md +++ b/docs/note/source_en/constraints_on_network_construction.md @@ -239,7 +239,7 @@ Currently, the following syntax is not supported in network constructors: class ExpandDimsTest(Cell): def __init__(self): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() def construct(self, input_x, input_axis): return self.expandDims(input_x, input_axis) @@ -254,7 +254,7 @@ Currently, the following syntax is not supported in network constructors: class ExpandDimsTest(Cell): def __init__(self, axis): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() self.axis = axis def construct(self, input_x): diff --git a/docs/note/source_zh_cn/constraints_on_network_construction.md b/docs/note/source_zh_cn/constraints_on_network_construction.md index a56bff5b1e..d7e2b8d58d 100644 --- a/docs/note/source_zh_cn/constraints_on_network_construction.md +++ b/docs/note/source_zh_cn/constraints_on_network_construction.md @@ -238,7 +238,7 @@ tuple也支持切片取值操作, 但不支持切片类型为Tensor类型,支 class ExpandDimsTest(Cell): def __init__(self): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() def construct(self, input_x, input_axis): return self.expandDims(input_x, input_axis) @@ -253,7 +253,7 @@ tuple也支持切片取值操作, 但不支持切片类型为Tensor类型,支 class ExpandDimsTest(Cell): def __init__(self, axis): super(ExpandDimsTest, self).__init__() - self.expandDims = P.ExpandDims() + self.expandDims = ops.ExpandDims() self.axis = axis def construct(self, input_x): diff --git a/tutorials/notebook/computer_vision_application.ipynb b/tutorials/notebook/computer_vision_application.ipynb index adfeec2c49..78ae171c00 100644 --- a/tutorials/notebook/computer_vision_application.ipynb +++ b/tutorials/notebook/computer_vision_application.ipynb @@ -144,7 +144,6 @@ "import random\n", "import argparse\n", "from mindspore import Tensor\n", - "from mindspore.ops import operations as P\n", "\n", "# Set Training Parameters \n", "random.seed(1)\n", @@ -212,7 +211,6 @@ "source": [ "import mindspore.nn as nn\n", "import mindspore.common.dtype as mstype\n", - "import mindspore.ops.functional as F\n", "import mindspore.dataset as ds\n", "import mindspore.dataset.vision.c_transforms as C\n", "import mindspore.dataset.transforms.c_transforms as C2\n", diff --git a/tutorials/notebook/customized_debugging_information.ipynb b/tutorials/notebook/customized_debugging_information.ipynb index 0ecf5695d2..dafdfc455f 100644 --- a/tutorials/notebook/customized_debugging_information.ipynb +++ b/tutorials/notebook/customized_debugging_information.ipynb @@ -153,7 +153,6 @@ "source": [ "from mindspore.common.initializer import TruncatedNormal\n", "import mindspore.nn as nn\n", - "from mindspore.ops import operations as P\n", "\n", "def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):\n", " \"\"\"Conv layer weight initial.\"\"\"\n", diff --git a/tutorials/notebook/debugging_in_pynative_mode.ipynb b/tutorials/notebook/debugging_in_pynative_mode.ipynb index c35d904989..649cb059a8 100644 --- a/tutorials/notebook/debugging_in_pynative_mode.ipynb +++ b/tutorials/notebook/debugging_in_pynative_mode.ipynb @@ -253,8 +253,7 @@ "outputs": [], "source": [ "import mindspore.nn as nn\n", - "import mindspore.ops.operations as P\n", - "from mindspore.ops import composite as C\n", + "import mindspore.ops as ops", "from mindspore.common import dtype as mstype\n", "from mindspore.common.initializer import TruncatedNormal\n", "from mindspore.nn import Dense\n", @@ -373,7 +372,7 @@ "\n", " def construct(self, x, label):\n", " weights = self.weights\n", - " return C.GradOperation(get_by_list=True)(self.network, weights)(x, label)" + " return ops.GradOperation(get_by_list=True)(self.network, weights)(x, label)" ] }, { diff --git a/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb b/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb index 95934934b7..7f644a9642 100644 --- a/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_image_histogram_scalar_tensor.ipynb @@ -277,7 +277,7 @@ " \n", " ```python\n", " # Init ImageSummary\n", - " self.image_summary = P.ImageSummary()\n", + " self.image_summary = ops.ImageSummary()\n", " ```\n", " \n", " 2. 在 `construct` 方法中使用 `ImageSummary` 算子记录输入图像。其中 \"Image\" 为该数据的名称,MindInsight在展示时,会将该名称展示出来以方便识别是哪个数据。\n", @@ -293,7 +293,7 @@ " \n", " ```python\n", " # Init TensorSummary\n", - " self.tensor_summary = P.TensorSummary()\n", + " self.tensor_summary = ops.TensorSummary()\n", " ```\n", " \n", " 2. 在`construct`方法中使用`TensorSummary`算子记录张量数据。其中\"Tensor\"为该数据的名称。\n", @@ -319,7 +319,7 @@ "source": [ "import mindspore.nn as nn\n", "from mindspore.common.initializer import TruncatedNormal\n", - "from mindspore.ops import operations as P\n", + "import mindspore.ops as ops\n", "\n", "def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode=\"valid\"):\n", " weight = weight_variable()\n", @@ -348,15 +348,15 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init TensorSummary\n", - " self.tensor_summary = P.TensorSummary()\n", + " self.tensor_summary = ops.TensorSummary()\n", " # Init ImageSummary\n", - " self.image_summary = P.ImageSummary()\n", + " self.image_summary = ops.ImageSummary()\n", "\n", " def construct(self, x):\n", " # Record image by Summary operator\n", @@ -747,7 +747,7 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", @@ -983,13 +983,13 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init TensorSummary\n", - " self.tensor_summary = P.TensorSummary()\n", + " self.tensor_summary = ops.TensorSummary()\n", "\n", " def construct(self, x):\n", " x = self.conv1(x)\n", @@ -1119,13 +1119,13 @@ " self.conv4 = conv(384, 384, 3, pad_mode=\"same\")\n", " self.conv5 = conv(384, 256, 3, pad_mode=\"same\")\n", " self.relu = nn.ReLU()\n", - " self.max_pool2d = P.MaxPool(ksize=3, strides=2)\n", + " self.max_pool2d = ops.MaxPool(ksize=3, strides=2)\n", " self.flatten = nn.Flatten()\n", " self.fc1 = fc_with_initialize(6*6*256, 4096)\n", " self.fc2 = fc_with_initialize(4096, 4096)\n", " self.fc3 = fc_with_initialize(4096, num_classes)\n", " # Init ImageSummary\n", - " self.image_summary = P.ImageSummary()\n", + " self.image_summary = ops.ImageSummary()\n", "\n", " def construct(self, x):\n", " # Record image by Summary operator\n", diff --git a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb index d7b558c859..432e48ebbb 100644 --- a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb @@ -229,7 +229,6 @@ "metadata": {}, "outputs": [], "source": [ - "from mindspore.ops import operations as P\n", "import mindspore.nn as nn\n", "from mindspore.common.initializer import TruncatedNormal\n", "\n", diff --git a/tutorials/notebook/mixed_precision.ipynb b/tutorials/notebook/mixed_precision.ipynb index d74e6e11f9..74c1f6c12c 100644 --- a/tutorials/notebook/mixed_precision.ipynb +++ b/tutorials/notebook/mixed_precision.ipynb @@ -404,22 +404,21 @@ "outputs": [], "source": [ "from mindspore.nn.loss.loss import _Loss\n", - "from mindspore.ops import operations as P\n", - "from mindspore.ops import functional as F\n", + "import mindspore.ops as ops\n", "from mindspore import Tensor\n", "import mindspore.nn as nn\n", "\n", "class CrossEntropy(_Loss):\n", " def __init__(self, smooth_factor=0., num_classes=1001):\n", " super(CrossEntropy, self).__init__()\n", - " self.onehot = P.OneHot()\n", + " self.onehot = ops.OneHot()\n", " self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)\n", " self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)\n", " self.ce = nn.SoftmaxCrossEntropyWithLogits()\n", - " self.mean = P.ReduceMean(False)\n", + " self.mean = ops.ReduceMean(False)\n", "\n", " def construct(self, logit, label):\n", - " one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)\n", + " one_hot_label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value)\n", " loss = self.ce(logit, one_hot_label)\n", " loss = self.mean(loss, 0)\n", " return loss" @@ -520,7 +519,7 @@ " if self.down_sample:\n", " self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),\n", " _bn(out_channel)])\n", - " self.add = P.TensorAdd()\n", + " self.add = ops.TensorAdd()\n", "\n", " def construct(self, x):\n", " identity = x\n", @@ -560,7 +559,7 @@ "\n", " self.conv1 = _conv7x7(3, 64, stride=2)\n", " self.bn1 = _bn(64)\n", - " self.relu = P.ReLU()\n", + " self.relu = ops.ReLU()\n", " self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode=\"same\")\n", "\n", " self.layer1 = self._make_layer(block,\n", @@ -584,7 +583,7 @@ " out_channel=out_channels[3],\n", " stride=strides[3])\n", "\n", - " self.mean = P.ReduceMean(keep_dims=True)\n", + " self.mean = ops.ReduceMean(keep_dims=True)\n", " self.flatten = nn.Flatten()\n", " self.end_point = _fc(out_channels[3], num_classes)\n", "\n", diff --git a/tutorials/notebook/nlp_application.ipynb b/tutorials/notebook/nlp_application.ipynb index d2df6a718d..1155b7351e 100644 --- a/tutorials/notebook/nlp_application.ipynb +++ b/tutorials/notebook/nlp_application.ipynb @@ -679,7 +679,7 @@ "\n", "from mindspore import Tensor, nn, context, Parameter, ParameterTuple\n", "from mindspore.common.initializer import initializer\n", - "from mindspore.ops import operations as P" + "import mindspore.ops as ops" ] }, { @@ -787,7 +787,7 @@ " super(StackLSTM, self).__init__()\n", " self.num_layers = num_layers\n", " self.batch_first = batch_first\n", - " self.transpose = P.Transpose()\n", + " self.transpose = ops.Transpose()\n", "\n", " # direction number\n", " num_directions = 2 if bidirectional else 1\n", @@ -883,7 +883,7 @@ " embed_size,\n", " embedding_table=weight)\n", " self.embedding.embedding_table.requires_grad = False\n", - " self.trans = P.Transpose()\n", + " self.trans = ops.Transpose()\n", " self.perm = (1, 0, 2)\n", "\n", " if context.get_context(\"device_target\") in STACK_LSTM_DEVICE:\n", @@ -905,7 +905,7 @@ " dropout=0.0)\n", " self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional)\n", "\n", - " self.concat = P.Concat(1)\n", + " self.concat = ops.Concat(1)\n", " if bidirectional:\n", " self.decoder = nn.Dense(num_hiddens * 4, num_classes)\n", " else:\n", -- Gitee