From e986cd3188c9659ce3abdea6ccbec0074a431c0d Mon Sep 17 00:00:00 2001 From: huan <3174348550@qq.com> Date: Wed, 16 Jul 2025 17:14:44 +0800 Subject: [PATCH] modify file contents --- tutorials/source_en/custom_program/fusion_pass.md | 4 ++-- tutorials/source_en/custom_program/hook_program.md | 4 ++-- .../custom_program/operation/op_customopbuilder.md | 2 +- .../operation/op_customopbuilder_function.md | 4 ++-- tutorials/source_en/cv/fcn8s.md | 2 +- tutorials/source_en/cv/resnet50.md | 4 ++-- tutorials/source_en/cv/ssd.md | 10 +++++----- tutorials/source_en/cv/transfer_learning.md | 10 +++++----- tutorials/source_en/cv/vit.md | 8 ++++---- tutorials/source_zh_cn/custom_program/fusion_pass.md | 2 +- tutorials/source_zh_cn/cv/resnet50.ipynb | 2 +- tutorials/source_zh_cn/cv/ssd.ipynb | 8 ++++---- tutorials/source_zh_cn/cv/transfer_learning.ipynb | 8 ++++---- 13 files changed, 34 insertions(+), 34 deletions(-) diff --git a/tutorials/source_en/custom_program/fusion_pass.md b/tutorials/source_en/custom_program/fusion_pass.md index 2b11c0f5e1..a14735495f 100644 --- a/tutorials/source_en/custom_program/fusion_pass.md +++ b/tutorials/source_en/custom_program/fusion_pass.md @@ -31,12 +31,12 @@ The environment variable `MS_DEV_GRAPH_KERNEL_FLAGS` provides controlling the sw ### Specifying Optimization Level -- **opt_level**: Set the optimization level. Default: `2` . Graph kernel fusion can be enabled equivalently by setting opt_level greater than 0. Optional values include: +- **opt_level**: Set the optimization level. Default: `2` . Graph kernel fusion can be enabled equivalently by setting opt_level greater than 0. Optional values include: - 0: disables graph kernel fusion; - 1: enables the basic fusion of operators; - 2: includes all optimizations of level 1, and turns on more optimizations such as CSE, arithmetic simplification and so on; - - 3: includes all optimizations of level 2, and turns on more optimizations such as SitchingFusion, ParallelFusion and so on. Optimizations of this level are radical and unstable in some scenarios. Be caution when using this level. + - 3: includes all optimizations of level 2, and turns on more optimizations such as StitchingFusion, ParallelFusion and so on. Optimizations of this level are radical and unstable in some scenarios. Be cautious when using this level. ### Specifying Automatic Fusion Strategy diff --git a/tutorials/source_en/custom_program/hook_program.md b/tutorials/source_en/custom_program/hook_program.md index 9412c73641..a4101ed3d2 100644 --- a/tutorials/source_en/custom_program/hook_program.md +++ b/tutorials/source_en/custom_program/hook_program.md @@ -2,7 +2,7 @@ [![View Source On Gitee](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/r2.7.0rc1/resource/_static/logo_source_en.svg)](https://gitee.com/mindspore/docs/blob/r2.7.0rc1/tutorials/source_en/custom_program/hook_program.md) -Debugging deep learning networks is a big task for every practitioner in the field of deep learning. Since the deep learning network hides the input and output data as well as the inverse gradient of the intermediate layer operators, only the gradient of the network input data (feature quantity and weight) is provided, resulting in the inability to accurately sense the data changes of the intermediate layer operators, which reduces the debugging efficiency. In order to facilitate users to debug the deep learning network accurately and quickly, MindSpore designes Hook function in dynamic graph mode. **Using Hook function can capture the input and output data of intermediate layer operators as well as the reverse gradient**. +Debugging deep learning networks is a big task for every practitioner in the field of deep learning. Since the deep learning network hides the input and output data as well as the inverse gradient of the intermediate layer operators, only the gradient of the network input data (feature quantity and weight) is provided, resulting in the inability to accurately sense the data changes of the intermediate layer operators, which reduces the debugging efficiency. In order to facilitate users to debug the deep learning network accurately and quickly, MindSpore designs Hook function in dynamic graph mode. **Using Hook function can capture the input and output data of intermediate layer operators as well as the reverse gradient**. Currently, five forms of Hook functions are provided in dynamic graph mode: [HookBackward](https://www.mindspore.cn/docs/en/r2.7.0rc1/api_python/ops/mindspore.ops.HookBackward.html) operator and register_forward_pre_hook, register_forward_hook, register_backward_pre_hook, register_backward_hook functions registered on Cell objects. @@ -216,7 +216,7 @@ For more information about the `register_forward_hook` function of the Cell obje The user can use the `register_backward_pre_hook` function on the Cell object to register a custom Hook function that captures the gradient associated with the Cell object when the network is back propagated. This function does not work in graph mode or inside functions modified with `@jit`. The `register_backward_pre_hook` function takes the Hook function as an input and returns a `handle` object that corresponds to the Hook function. The user can remove the corresponding Hook function by calling the `remove()` function of the `handle` object. Each call to the `register_backward_pre_hook` function will return a different `handle` object. -Unlike the custom Hook function used by the HookBackward operator, the inputs of the Hook function used by `register_backward_pre_hook` contains `cell`, which represents the information of the Cell object, the gradient passed to the Cell object in reverse of the Cell object. +Unlike the custom Hook function used by the HookBackward operator, the inputs of the Hook function used by `register_backward_pre_hook` contain `cell`, which represents the information of the Cell object, the gradient passed to the Cell object in reverse of the Cell object. The sample code is as follows: diff --git a/tutorials/source_en/custom_program/operation/op_customopbuilder.md b/tutorials/source_en/custom_program/operation/op_customopbuilder.md index 95b04c72c0..3a33845a8f 100644 --- a/tutorials/source_en/custom_program/operation/op_customopbuilder.md +++ b/tutorials/source_en/custom_program/operation/op_customopbuilder.md @@ -12,7 +12,7 @@ When developing operators using [C++ Interface](https://www.mindspore.cn/tutoria ## Introduction to Dynamic Graph Operator Execution Process -MindSpore uses Python as the frontend and implements the backend with C++. Each operator execution needs to go through a series of operations such as Python interpretation and execution, operator inference, and operator memory allocation before being dispatched to the GPU/NPU. This results in relatively slower execution speeds. To address this performance issue, MindSpore’s dynamic graph adopts a multi-stage pipeline runtime, where various stages of operator dispatch are executed in parallel, thereby improving operator dispatch speed. +MindSpore uses Python as the frontend and implements the backend with C++. Each operator execution needs to go through a series of operations such as Python interpretation and execution, operator inference, and operator memory allocation before being dispatched to the GPU/NPU. This results in relatively slower execution speeds. To address this performance issue, MindSpore's dynamic graph adopts a multi-stage pipeline runtime, where various stages of operator dispatch are executed in parallel, thereby improving operator dispatch speed. ![pyboost pipeline](../images/customopbuilder_pyboost.png) diff --git a/tutorials/source_en/custom_program/operation/op_customopbuilder_function.md b/tutorials/source_en/custom_program/operation/op_customopbuilder_function.md index e501698719..2a97933fb2 100644 --- a/tutorials/source_en/custom_program/operation/op_customopbuilder_function.md +++ b/tutorials/source_en/custom_program/operation/op_customopbuilder_function.md @@ -10,7 +10,7 @@ For dynamic graphs, MindSpore offers another method for customizing backward fun The following is an example illustrating how to use the `Function` interface: -This guide demonstrates a multiplication operator implementation on Ascend platform. For related code and more examples, see [Repository Code](https://gitee.com/mindspore/mindspore/blob/v2.7.0-rc1/tests/st/pynative/grad/test_custom_cpp_function_grad.py). +This guide demonstrates a multiplication operator implementation on the Ascend platform. For related code and more examples, see [Repository Code](https://gitee.com/mindspore/mindspore/blob/v2.7.0rc1/tests/st/pynative/grad/test_custom_cpp_function_grad.py). **Note:** The `BaseTensorPtr` referenced in this guide is an internal data structure of MindSpore. In future versions, these interfaces will be refactored into interfaces based on `ms::Tensor`. @@ -217,7 +217,7 @@ print('grads[0]:', grads[0]) print('grads[1]:', grads[1]) ``` -Here, the user defines a custom operator module `self.my_ops = CustomOpBuilder(“my_ops”, ['. /custom_src/function_ops.cpp'], backend=“Ascend”).load()`. Here the meaning of `CustomOpBuilder` parameters are: +Here, the user defines a custom operator module `self.my_ops = CustomOpBuilder("my_ops", ['. /custom_src/function_ops.cpp'], backend="Ascend").load()`. Here the meaning of `CustomOpBuilder` parameters are: - `"my_ops"`: Customize the operator module name. - `['./custom_src/function_ops.cpp']`: Customize the path to the operator C++ file. If there is more than one C++ file, you need to list them all in the list. diff --git a/tutorials/source_en/cv/fcn8s.md b/tutorials/source_en/cv/fcn8s.md index 7811f1bc73..e50014d9f0 100644 --- a/tutorials/source_en/cv/fcn8s.md +++ b/tutorials/source_en/cv/fcn8s.md @@ -337,7 +337,7 @@ class FCN8s(nn.Cell): ### Importing VGG-16 Partial Pre-trained Weights -FCN uses VGG-16 as the backbone network for image encoding. Use the following code to import some pre-traind weights of the VGG-16 pre-trained model. +FCN uses VGG-16 as the backbone network for image encoding. Use the following code to import some pre-trained weights of the VGG-16 pre-trained model. ```python from download import download diff --git a/tutorials/source_en/cv/resnet50.md b/tutorials/source_en/cv/resnet50.md index 2111b8698a..00d267ada9 100644 --- a/tutorials/source_en/cv/resnet50.md +++ b/tutorials/source_en/cv/resnet50.md @@ -375,7 +375,7 @@ class ResNet(nn.Cell): self.layer4 = make_layer(256 * block.expansion, block, 512, layer_nums[3], stride=2) # average pooling layer self.avg_pool = nn.AvgPool2d() - # flattern layer + # flatten layer self.flatten = nn.Flatten() # fully-connected layer self.fc = nn.Dense(in_channels=input_channel, out_channels=num_classes) @@ -487,7 +487,7 @@ import mindspore.ops as ops def train(data_loader, epoch): - """Model taining""" + """Model training""" losses = [] network.set_train(True) diff --git a/tutorials/source_en/cv/ssd.md b/tutorials/source_en/cv/ssd.md index 58a12fe518..529c04d0c0 100644 --- a/tutorials/source_en/cv/ssd.md +++ b/tutorials/source_en/cv/ssd.md @@ -70,7 +70,7 @@ Extracting zip file... Successfully downloaded / unzipped to ./ ``` -Then We define some inputs for data processing. +Then we define some inputs for data processing. ```python coco_root = "./datasets/" @@ -601,13 +601,13 @@ def class_loss(logits, label): label = ops.one_hot(label, ops.shape(logits)[-1], Tensor(1.0, ms.float32), Tensor(0.0, ms.float32)) weight = ops.ones_like(logits) pos_weight = ops.ones_like(logits) - sigmiod_cross_entropy = ops.binary_cross_entropy_with_logits(logits, label, weight.astype(ms.float32), pos_weight.astype(ms.float32)) + sigmoid_cross_entropy = ops.binary_cross_entropy_with_logits(logits, label, weight.astype(ms.float32), pos_weight.astype(ms.float32)) sigmoid = ops.sigmoid(logits) label = label.astype(ms.float32) p_t = label * sigmoid + (1 - label) * (1 - sigmoid) modulating_factor = ops.pow(1 - p_t, 2.0) alpha_weight_factor = label * 0.75 + (1 - label) * (1 - 0.75) - focal_loss = modulating_factor * alpha_weight_factor * sigmiod_cross_entropy + focal_loss = modulating_factor * alpha_weight_factor * sigmoid_cross_entropy return focal_loss ``` @@ -835,8 +835,8 @@ from mindspore import set_seed class GeneratDefaultBoxes(): """ - Generate Default boxes for SSD, follows the order of (W, H, archor_sizes). - `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w]. + Generate Default boxes for SSD, follows the order of (W, H, anchor_sizes). + `self.default_boxes` has a shape of [anchor_sizes, H, W, 4], the last dimension is [y, x, h, w]. `self.default_boxes_tlbr` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2]. """ diff --git a/tutorials/source_en/cv/transfer_learning.md b/tutorials/source_en/cv/transfer_learning.md index 74c26b871e..ce1fb4eb68 100644 --- a/tutorials/source_en/cv/transfer_learning.md +++ b/tutorials/source_en/cv/transfer_learning.md @@ -250,7 +250,7 @@ class ResidualBlock(nn.Cell): def construct(self, x): - identity = x # shortscuts + identity = x # shortcuts out = self.conv1(x) # The first layer of main body: 1*1 convolutional layer out = self.norm1(out) @@ -318,7 +318,7 @@ class ResNet(nn.Cell): self.layer4 = make_layer(256 * block.expansion, block, 512, layer_nums[3], stride=2) # Average pooling layer self.avg_pool = nn.AvgPool2d() - # flattern layer + # flatten layer self.flatten = nn.Flatten() # Fully-connected layer self.fc = nn.Dense(in_channels=input_channel, out_channels=num_classes) @@ -343,14 +343,14 @@ class ResNet(nn.Cell): def _resnet(model_url: str, block: Type[Union[ResidualBlockBase, ResidualBlock]], - layers: List[int], num_classes: int, pretrained: bool, pretrianed_ckpt: str, + layers: List[int], num_classes: int, pretrained: bool, pretrained_ckpt: str, input_channel: int): model = ResNet(block, layers, num_classes, input_channel) if pretrained: # Load pre-trained models - download(url=model_url, path=pretrianed_ckpt, replace=True) - param_dict = load_checkpoint(pretrianed_ckpt) + download(url=model_url, path=pretrained_ckpt, replace=True) + param_dict = load_checkpoint(pretrained_ckpt) load_param_into_net(model, param_dict) return model diff --git a/tutorials/source_en/cv/vit.md b/tutorials/source_en/cv/vit.md index 7c746dee56..8a3ad9df66 100644 --- a/tutorials/source_en/cv/vit.md +++ b/tutorials/source_en/cv/vit.md @@ -22,7 +22,7 @@ ViT model is mainly applied in the field of image classification. Therefore, its 1. After the original image of the dataset is divided into multiple patches, the two-dimensional patches (without considering the channel) are converted into one-dimensional vectors. The one-dimensional vectors, the category vectors and the position vectors are added as model inputs. 2. The Block structure of the main body in the model is based on the Encoder structure of Transformer, but adjusts the position of Normalization, where the main structure is still the Multi-head Attention structure. -3. The model connects a fully connected layer after the Blocks stack, accepts the output of the category vector as input and uses it for classification. Typically, we refer to the final fully-onnected layer as Head and the Transformer Encoder part as backbone. +3. The model connects a fully connected layer after the Blocks stack, accepts the output of the category vector as input and uses it for classification. Typically, we refer to the final fully-connected layer as Head and the Transformer Encoder part as backbone. The following code example will explain in detail the implementation of ImageNet classification task based on ViT. @@ -102,17 +102,17 @@ Its main structure is composed of several Encoder and Decoder modules, where the ![encoder-decoder](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/r2.7.0rc1/tutorials/source_zh_cn/cv/images/encoder_decoder.png) -Encoder and Decoder consist of many structures, such as Multi-Head Attention layer, Feed Forward layer, Normaliztion layer, and even Residual Connection ("Add" in the figure). However, one of the most important structures is the Multi-Head Attention structure, which is based on the Self-Attention mechanism and is a parallel composition of multiple Self-Attentions. +Encoder and Decoder consist of many structures, such as Multi-Head Attention layer, Feed Forward layer, Normalization layer, and even Residual Connection ("Add" in the figure). However, one of the most important structures is the Multi-Head Attention structure, which is based on the Self-Attention mechanism and is a parallel composition of multiple Self-Attentions. Therefore, understanding Self-Attention means understanding the core of Transformer. #### Attention Module -The following is an explanation of Self-Attention, the core of which is to learn a weight for each word of the input vector. Given a task-related query vector Query vector, the similarity or relevance of Query and each Key is calculated to obtain the Attention distribution, i.e., the weight coefficient of each Key corresponding to Value is obtained, and then the Value is weighted and summed to obtain the final Attention value. +The following is an explanation of Self-Attention, the core of which is to learn a weight for each word of the input vector. Given a task-related Query vector, the similarity or relevance of Query and each Key is calculated to obtain the Attention distribution, i.e., the weight coefficient of each Key corresponding to Value is obtained, and then the Value is weighted and summed to obtain the final Attention value. In the Self-Attention: -1. The initial input vector is first mapped into three vectors Q(Query), K(Key), and V(Value) by the Embedding layer, and since it is a parallel operation, the code is mapped into a dim x 3 vector and partitioned. In other words, if your input vector is a sequence of vectors ($x_1$, $x_2$, $x_3$), where $ x_1$, $x_2$, $x_3$ are one-dimensional vectors, each one-dimensional vector is mapped to Q, K, and V by the Embedding layer, only the Embedding matrix is different and the matrix parameters are obtained through learning. **Here we can consider that the three matrices Q, K, V are a means to discover the correlation information between vectors, which needs to be obtained through learning. The reason for the number of vector is three is that two vectors can get the weights after point multiplication, and another vector is needed to carry the results of summing the weights, so at least three matrices are needed.** +1. The initial input vector is first mapped into three vectors Q(Query), K(Key), and V(Value) by the Embedding layer, and since it is a parallel operation, the code is mapped into a dim x 3 vector and partitioned. In other words, if your input vector is a sequence of vectors ($x_1$, $x_2$, $x_3$), where $ x_1$, $x_2$, $x_3$ are one-dimensional vectors, each one-dimensional vector is mapped to Q, K, and V by the Embedding layer, only the Embedding matrix is different and the matrix parameters are obtained through learning. **Here we can consider that the three matrices Q, K, V are a means to discover the correlation information between vectors, which needs to be obtained through learning. The reason for the number of vectors is three is that two vectors can get the weights after point multiplication, and another vector is needed to carry the results of summing the weights, so at least three matrices are needed.** $$ \begin{cases} diff --git a/tutorials/source_zh_cn/custom_program/fusion_pass.md b/tutorials/source_zh_cn/custom_program/fusion_pass.md index 642d6bad9f..16ea76b1b5 100644 --- a/tutorials/source_zh_cn/custom_program/fusion_pass.md +++ b/tutorials/source_zh_cn/custom_program/fusion_pass.md @@ -35,7 +35,7 @@ - 0:关闭图算融合。 - 1:启动算子的基本融合。 - 2:包括级别1的所有优化,并打开更多的优化,如CSE优化算法、算术简化等。 - - 3:包括级别2的所有优化,并打开更多的优化,如SitchingFusion、ParallelFusion等。在某些场景下,该级别的优化激进且不稳定。使用此级别时要小心。 + - 3:包括级别2的所有优化,并打开更多的优化,如StitchingFusion、ParallelFusion等。在某些场景下,该级别的优化激进且不稳定。使用此级别时要小心。 ### 指定自动融合策略 diff --git a/tutorials/source_zh_cn/cv/resnet50.ipynb b/tutorials/source_zh_cn/cv/resnet50.ipynb index e7dd1e1272..c2a87a5832 100644 --- a/tutorials/source_zh_cn/cv/resnet50.ipynb +++ b/tutorials/source_zh_cn/cv/resnet50.ipynb @@ -364,7 +364,7 @@ "\n", " def construct(self, x):\n", "\n", - " identity = x # shortscuts分支\n", + " identity = x # shortcuts分支\n", "\n", " out = self.conv1(x) # 主分支第一层:1*1卷积层\n", " out = self.norm1(out)\n", diff --git a/tutorials/source_zh_cn/cv/ssd.ipynb b/tutorials/source_zh_cn/cv/ssd.ipynb index 2f434e2c64..11ce7110c4 100644 --- a/tutorials/source_zh_cn/cv/ssd.ipynb +++ b/tutorials/source_zh_cn/cv/ssd.ipynb @@ -788,13 +788,13 @@ " label = ops.one_hot(label, ops.shape(logits)[-1], Tensor(1.0, ms.float32), Tensor(0.0, ms.float32))\n", " weight = ops.ones_like(logits)\n", " pos_weight = ops.ones_like(logits)\n", - " sigmiod_cross_entropy = ops.binary_cross_entropy_with_logits(logits, label, weight.astype(ms.float32), pos_weight.astype(ms.float32))\n", + " sigmoid_cross_entropy = ops.binary_cross_entropy_with_logits(logits, label, weight.astype(ms.float32), pos_weight.astype(ms.float32))\n", " sigmoid = ops.sigmoid(logits)\n", " label = label.astype(ms.float32)\n", " p_t = label * sigmoid + (1 - label) * (1 - sigmoid)\n", " modulating_factor = ops.pow(1 - p_t, 2.0)\n", " alpha_weight_factor = label * 0.75 + (1 - label) * (1 - 0.75)\n", - " focal_loss = modulating_factor * alpha_weight_factor * sigmiod_cross_entropy\n", + " focal_loss = modulating_factor * alpha_weight_factor * sigmoid_cross_entropy\n", " return focal_loss" ] }, @@ -1062,8 +1062,8 @@ "\n", "class GeneratDefaultBoxes():\n", " \"\"\"\n", - " Generate Default boxes for SSD, follows the order of (W, H, archor_sizes).\n", - " `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w].\n", + " Generate Default boxes for SSD, follows the order of (W, H, anchor_sizes).\n", + " `self.default_boxes` has a shape of [anchor_sizes, H, W, 4], the last dimension is [y, x, h, w].\n", " `self.default_boxes_tlbr` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2].\n", " \"\"\"\n", "\n", diff --git a/tutorials/source_zh_cn/cv/transfer_learning.ipynb b/tutorials/source_zh_cn/cv/transfer_learning.ipynb index 4dc3d4feb2..1f97ce1e5f 100644 --- a/tutorials/source_zh_cn/cv/transfer_learning.ipynb +++ b/tutorials/source_zh_cn/cv/transfer_learning.ipynb @@ -359,7 +359,7 @@ "\n", " def construct(self, x):\n", "\n", - " identity = x # shortscuts分支\n", + " identity = x # shortcuts分支\n", "\n", " out = self.conv1(x) # 主分支第一层:1*1卷积层\n", " out = self.norm1(out)\n", @@ -464,14 +464,14 @@ "\n", "\n", "def _resnet(model_url: str, block: Type[Union[ResidualBlockBase, ResidualBlock]],\n", - " layers: List[int], num_classes: int, pretrained: bool, pretrianed_ckpt: str,\n", + " layers: List[int], num_classes: int, pretrained: bool, pretrained_ckpt: str,\n", " input_channel: int):\n", " model = ResNet(block, layers, num_classes, input_channel)\n", "\n", " if pretrained:\n", " # 加载预训练模型\n", - " download(url=model_url, path=pretrianed_ckpt, replace=True)\n", - " param_dict = load_checkpoint(pretrianed_ckpt)\n", + " download(url=model_url, path=pretrained_ckpt, replace=True)\n", + " param_dict = load_checkpoint(pretrained_ckpt)\n", " load_param_into_net(model, param_dict)\n", "\n", " return model\n", -- Gitee