diff --git a/docs/api_cpp/source_en/conf.py b/docs/api_cpp/source_en/conf.py index d4d9a317a1072a1975703a430f1f2bf87499c3a6..0f8c878ce170688e628c4f34dbb0adb4674621dc 100644 --- a/docs/api_cpp/source_en/conf.py +++ b/docs/api_cpp/source_en/conf.py @@ -90,7 +90,7 @@ def specificationsForKind(kind): if kind == "class": return [ ":members:", - ":protected-members:", + # ":protected-members:", ":private-members:" ] else: diff --git a/docs/api_cpp/source_en/namespace/mindspore_dataset.rst b/docs/api_cpp/source_en/namespace/mindspore_dataset.rst index abecd5efffbf27b4fb00d241f31e5bf9d3f0953d..f5185a93bfc120126b74d21528817b58898715be 100644 --- a/docs/api_cpp/source_en/namespace/mindspore_dataset.rst +++ b/docs/api_cpp/source_en/namespace/mindspore_dataset.rst @@ -237,6 +237,8 @@ Constants Others ------ +This section contains some predefined classes related to Dataset operations, tool functions, and some Typedefs. + Classes ^^^^^^^ diff --git a/docs/api_cpp/source_en/session.md b/docs/api_cpp/source_en/session.md index 4b602201032c1049214985511503068a72bbb183..5ab4a1fd6302e392cbd86087e7fb555364c86abc 100644 --- a/docs/api_cpp/source_en/session.md +++ b/docs/api_cpp/source_en/session.md @@ -216,6 +216,167 @@ Static method to create a LiteSession pointer. The returned LiteSession pointer Pointer that points to MindSpore Lite LiteSession. +#### CreateTransferSession + +```cpp +static TrainSession *CreateTransferSession(const std::string &filename_backbone, const std::string &filename_head, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +Static method that creates the object pointer that points to the transfer learning training session. + +- Parameters + + - `filename_backbone`: File name of the backbone network. + - `filename_head`: File name of the head network. + - `context`: Pointer that points to the target session. + - `train_mode`: Training mode to initialize the Session. + - `cfg`: Config of train session. + +- Returns + + Pointer that points to MindSpore Lite TrainSession. + +#### CreateTrainSession + +```cpp +static LiteSession *CreateTrainSession(const std::string &filename, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +Static method to create a TrainSession object. + +- Parameters + + - `filename`: Train model file name. + - `context`: Pointer that points to the target session. + - `train_mode`: Training mode to initialize Session. + - `cfg`: Config of train session. + +- Returns + + Pointer that points to MindSpore Lite TrainSession. + +#### Train + +```cpp +virtual int Train() = 0; +``` + +Set model to train mode. + +- Returns + + STATUS as an error code of compiling graph, STATUS is defined in [errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h). + +#### IsTrain + +```cpp +bool IsTrain() { return train_mode_ == true; } +``` + +Check whether the current model is under the train mode. + +- Returns + + Boolean indication if model is in train mode. + +#### Eval + +```cpp +virtual int Eval() = 0; +``` + +Set model to eval mode. + +- Returns + + STATUS as an error code of compiling graph, STATUS is defined in [errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h). + +#### IsEval + +```cpp +bool IsEval() { return train_mode_ == false; } +``` + +Check mode of model. + +- Returns + + Boolean indication if model is in eval mode. + +#### SetLearningRate + +```cpp +virtual int SetLearningRate(float learning_rate) = 0; +``` + +Set the learning rate for the current model. + +- Returns + + 0 represents success or -1 in case of error. + +#### GetLearningRate + +```cpp +virtual float GetLearningRate() = 0; +``` + +Get the learning rate of the current model. + +- Returns + + The learning rate of the current model, default is 0.0. + +#### SetupVirtualBatch + +```cpp +virtual int SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f) = 0; +``` + +Customize the virtual batch size, in order to reduce memory consumption. + +- Parameters + + - `virtual_batch_multiplier`: virtual batch number. + - `lr`: learning rate. + - `momentum`: momentum. + +- Returns + + 0 represents success or -1 in case of error. + +#### GetPredictions + +```cpp +virtual std::vector GetPredictions() const = 0; +``` + +Get the predicting result of the trained model. + +- Returns + + Return the pointer vector of prediction results. + +#### Export + +```cpp +virtual int (const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, + lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType format= lite::FT_FLATBUFFERS) const = 0; +``` + +Save the trained model into a flatbuffer file. + +- Parameters + + - `filename`: Filename of the file to save buffer. + - `model_type`: Model save Type train or inference. + - `quant_type`: Quant type of Model. + - `format`: Model save. + +- Returns + + 0 represents success or -1 in case of error. + ## TrainLoop \#include <[ltrain_loop.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/train/train_loop.h)> diff --git a/docs/api_cpp/source_zh_cn/session.md b/docs/api_cpp/source_zh_cn/session.md index 230f7a273f327cfd134698eff8bd5d6d11e3ee40..ccd2148c51451d390bfb81789502140876c75da8 100644 --- a/docs/api_cpp/source_zh_cn/session.md +++ b/docs/api_cpp/source_zh_cn/session.md @@ -214,6 +214,167 @@ static LiteSession *CreateSession(const char *model_buf, size_t size, const lite 指向MindSpore Lite LiteSession的指针。 +#### CreateTransferSession + +```cpp +static TrainSession *CreateTransferSession(const std::string &filename_backbone, const std::string &filename_head, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +创建迁移学习训练会话指针的静态方法。 + +- 参数 + + - `filename_backbone`: 主干网络的名称。 + - `filename_head`: 顶层网络的名称。 + - `context`: 指向目标会话的指针。 + - `train_mode`: 是否开启训练模式。 + - `cfg`: 训练相关配置。 + +- 返回值 + + 指向训练会话的指针。 + +#### CreateTrainSession + +```cpp +static LiteSession *CreateTrainSession(const std::string &filename, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +创建训练会话指针的静态方法。 + +- 参数 + + - `filename`: 指向文件名称。 + - `context`: 指向会话指针 + - `train_mode`: 是否开启训练模式。 + - `cfg`: 训练相关配置。 + +- 返回值 + + 指向训练会话的指针。 + +#### Train + +```cpp +virtual int Train() = 0; +``` + +设置为训练模式。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### IsTrain + +```cpp +bool IsTrain() { return train_mode_ == true; } +``` + +检查当前模型是否为训练模式。 + +- 返回值 + + true 或 false,即当前模型是否为训练模式。 + +#### Eval + +```cpp +virtual int Eval() = 0; +``` + +设置为验证模式。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### IsEval + +```cpp +bool IsEval() { return train_mode_ == false; } +``` + +检查当前模型是否为验证模式。 + +- 返回值 + + true 或 false,即当前模型是否为验证模式。 + +#### SetLearningRate + +```cpp +virtual int SetLearningRate(float learning_rate) = 0; +``` + +为当前模型设置学习率。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### GetLearningRate + +```cpp +virtual float GetLearningRate() = 0; +``` + +获取当前模型的学习率。 + +- 返回值 + + 当前模型的学习率, 如果未设置优化器则返回0.0。 + +#### SetupVirtualBatch + +```cpp +virtual int SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f) = 0; +``` + +用户自定义虚拟批次数,,用于减少内存消耗。 + +- 参数 + + - `virtual_batch_multiplier`: 自定义虚拟批次数。 + - `lr`: 自定义学习率。 + - `momentum`: 自定义动量。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### GetPredictions + +```cpp +virtual std::vector GetPredictions() const = 0; +``` + +获取训练模型的预测结果。 + +- 返回值 + + 预测结果张量指针数组。 + +#### Export + +```cpp +virtual int Export(const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, + lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType format= lite::FT_FLATBUFFERS) const = 0; +``` + +保存已训练模型。 + +- 参数 + + - `filename`: 保存模型的文件名。 + - `model_type`: 保存模型的文件名。 + - `quant_type`: 保存模型的文件名。 + - `format`: 保存模型格式。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + ## TrainLoop \#include <[ltrain_loop.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/train/train_loop.h)> diff --git a/docs/api_java/source_en/lite_session.md b/docs/api_java/source_en/lite_session.md index da87e6d4f7291f42da4883d6fab769ce35e2ffa2..eccebdc582d146c4462211c981bccba3b6085f9b 100644 --- a/docs/api_java/source_en/lite_session.md +++ b/docs/api_java/source_en/lite_session.md @@ -22,8 +22,15 @@ LiteSession defines session in MindSpore Lite for compiling Model and forwarding | [Map getOutputMapByTensor()](#getoutputmapbytensor) | | [List getOutputTensorNames()](#getoutputtensornames) | | [MSTensor getOutputByTensorName(String tensorName)](#getoutputbytensorname) | -| [boolean resize(List inputs, int[][] dims](#resize) | +| [boolean resize(List inputs, int[][] dims)](#resize) | | [void free()](#free) | +| [boolean export(String modelFilename, int model_type, int quantization_type)](#export) | +| [boolean train()](#train) | +| [boolean eval()](#eval) | +| [boolean isTrain()](#isTrain) | +| [boolean isEval()](#isEval) | +| [boolean setLearningRate(float learning_rate)](#setLearningRate) | +| [boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum)](#setupVirtualBatch) | ## init @@ -189,3 +196,87 @@ public void free() ``` Free LiteSession. + +## export + +```java +public boolean export(String modelFilename, int model_type, int quantization_type) +``` + +Export the model. + +- Parameters + + - `modelFilename`: Model file name. + - `model_type`: Train or Inference type. + - `quantization_type`: The quant type. + +- Returns + + Whether the export is successful. + +## train + +```java +public void train() +``` + +Switch to the train mode. + +## eval + +```java +public void eval() +``` + +Switch to the eval mode. + +## istrain + +```java +public void isTrain() +``` + +It is Train mode. + +## iseval + +```java +public void isEval() +``` + +It is Eval mode. + +## setLearningRate + +```java +public boolean setLearningRate(float learning_rate) +``` + +Set learning rate. + +- Parameters + + - `learning_rate`: learning rate. + +- Returns + + Whether the set learning rate is successful. + +## setupVirtualBatch + +```java +public boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum) +``` + +Set virtual batch. + +- Parameters + + - `virtualBatchMultiplier`: virtual batch multuplier. + - `learningRate`: learning rate. + - `momentum`: monentum. + +- Returns + +Whether the virtual batch is successfully set. diff --git a/docs/api_java/source_zh_cn/lite_session.md b/docs/api_java/source_zh_cn/lite_session.md index 6df2bf110160847495a3279ce9c1b4dd0adc9fea..c8ca339869f3aeca395b75d4039e5b5637c926be 100644 --- a/docs/api_java/source_zh_cn/lite_session.md +++ b/docs/api_java/source_zh_cn/lite_session.md @@ -22,8 +22,15 @@ LiteSession定义了MindSpore Lite中的会话,用于进行Model的编译和 | [Map getOutputMapByTensor()](#getoutputmapbytensor) | | [List getOutputTensorNames()](#getoutputtensornames) | | [MSTensor getOutputByTensorName(String tensorName)](#getoutputbytensorname) | -| [boolean resize(List inputs, int[][] dims](#resize) | +| [boolean resize(List inputs, int[][] dims)](#resize) | | [void free()](#free) | +| [boolean export(String modelFilename, int model_type, int quantization_type)](#export) | +| [boolean train()](#train) | +| [boolean eval()](#eval) | +| [boolean isTrain()](#isTrain) | +| [boolean isEval()](#isEval) | +| [boolean setLearningRate(float learning_rate)](#setLearningRate) | +| [boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum)](#setupVirtualBatch) | ## init @@ -189,3 +196,87 @@ public void free() ``` 释放LiteSession。 + +## export + +```java +public boolean export(String modelFilename, int model_type, int quantization_type) +``` + +导出模型。 + +- 参数 + + - `modelFilename`: 模型文件名称。 + - `model_type`: 训练或者推理类型。 + - `quantization_type`: 量化类型。 + +- 返回值 + + 导出模型是否成功。 + +## train + +```java +public void train() +``` + +切换训练模式。 + +## eval + +```java +public void eval() +``` + +切换推理模式。 + +## istrain + +```java +public void isTrain() +``` + +是否训练模式。 + +## iseval + +```java +public void isEval() +``` + +是否推理模式。 + +## setLearningRate + +```java +public boolean setLearningRate(float learning_rate) +``` + +设置学习率。 + +- 参数 + + - `learning_rate`: 学习率。 + +- 返回值 + + 学习率设置是否成功。 + +## setupVirtualBatch + +```java +public boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum) +``` + +设置虚批次系数。 + +- 参数 + + - `virtualBatchMultiplier`: 虚批次系数。 + - `learningRate`: 学习率。 + - `momentum`: 动量系数。 + +- 返回值 + + 虚批次系数设置是否成功。 diff --git a/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst b/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst index 6534637abb4d182b6ad93bede9408c81a73cda12..67d920ded7b2d941b7cd1f548f9552be4b9b7d95 100644 --- a/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst +++ b/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst @@ -36,9 +36,9 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.RandomHorizontalFlip mindspore.dataset.vision.c_transforms.RandomHorizontalFlipWithBBox mindspore.dataset.vision.c_transforms.RandomPosterize + mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizedCrop mindspore.dataset.vision.c_transforms.RandomResizedCropWithBBox - mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizeWithBBox mindspore.dataset.vision.c_transforms.RandomRotation mindspore.dataset.vision.c_transforms.RandomSelectSubpolicy @@ -53,7 +53,7 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.SoftDvppDecodeRandomCropResizeJpeg mindspore.dataset.vision.c_transforms.SoftDvppDecodeResizeJpeg mindspore.dataset.vision.c_transforms.UniformAugment - mindspore.dataset.vision.c_transforms.HorizontalFlip + mindspore.dataset.vision.c_transforms.VerticalFlip mindspore.dataset.vision.py_transforms ------------------------------------------------- @@ -76,6 +76,7 @@ mindspore.dataset.vision.py_transforms mindspore.dataset.vision.py_transforms.LinearTransformation mindspore.dataset.vision.py_transforms.MixUp mindspore.dataset.vision.py_transforms.Normalize + mindspore.dataset.vision.py_transforms.NormalizePad mindspore.dataset.vision.py_transforms.Pad mindspore.dataset.vision.py_transforms.RandomAffine mindspore.dataset.vision.py_transforms.RandomColor diff --git a/docs/api_python/source_en/mindspore/mindspore.ops.rst b/docs/api_python/source_en/mindspore/mindspore.ops.rst index ed3374e90d74fc7105f560b6ad59d406e6d81b84..a0552f5d295932fc107c5c3a8c70c0e184f06514 100644 --- a/docs/api_python/source_en/mindspore/mindspore.ops.rst +++ b/docs/api_python/source_en/mindspore/mindspore.ops.rst @@ -47,195 +47,195 @@ The functional operators are the pre-instantiated Primitive operators, which can :header-rows: 1 * - functional - - operations + - Description * - mindspore.ops.add - - :class:`mindspore.ops.Add` + - Refer to :class:`mndspore.ops.Add`. * - mindspore.ops.addn - - :class:`mindspore.ops.AddN` + - Refer to :class:`mindspore.ops.AddN`. * - mindspore.ops.array_reduce - - :class:`mindspore.ops.Primitive` ('array_reduce') + - :class:`mindspore.ops.Primitive` ('array_reduce') * - mindspore.ops.array_to_scalar - - :class:`mindspore.ops.Primitive` ('array_to_scalar') + - :class:`mindspore.ops.Primitive` ('array_to_scalar') * - mindspore.ops.assign - - :class:`mindspore.ops.Assign` + - Refer to :class:`mindspore.ops.Assign`. * - mindspore.ops.assign_add - - :class:`mindspore.ops.AssignAdd` + - Refer to :class:`mindspore.ops.AssignAdd`. * - mindspore.ops.assign_sub - - :class:`mindspore.ops.AssignSub` + - Refer to :class:`mindspore.ops.AssignSub`. * - mindspore.ops.bool_and - - :class:`mindspore.ops.Primitive` ('bool_and') + - :class:`mindspore.ops.Primitive` ('bool_and') * - mindspore.ops.bool_eq - - :class:`mindspore.ops.Primitive` ('bool_eq') + - :class:`mindspore.ops.Primitive` ('bool_eq') * - mindspore.ops.bool_not - - :class:`mindspore.ops.Primitive` ('bool_not') + - :class:`mindspore.ops.Primitive` ('bool_not') * - mindspore.ops.bool_or - - :class:`mindspore.ops.Primitive` ('bool_or') + - :class:`mindspore.ops.Primitive` ('bool_or') * - mindspore.ops.cast - - :class:`mindspore.ops.Cast` + - Refer to :class:`mindspore.ops.Cast`. * - mindspore.ops.distribute - - :class:`mindspore.ops.Primitive` ('distribute') + - :class:`mindspore.ops.Primitive` ('distribute') * - mindspore.ops.dtype - - :class:`mindspore.ops.DType` + - Refer to :class:`mindspore.ops.DType`. * - mindspore.ops.equal - - :class:`mindspore.ops.Equal` + - Refer to :class:`mindspore.ops.Equal`. * - mindspore.ops.expand_dims - - :class:`mindspore.ops.ExpandDims` + - Refer to :class:`mindspore.ops.ExpandDims`. * - mindspore.ops.fill - - :class:`mindspore.ops.Fill` + - Refer to :class:`mindspore.ops.Fill`. * - mindspore.ops.gather - - :class:`mindspore.ops.Gather` + - Refer to :class:`mindspore.ops.Gather`. * - mindspore.ops.gather_nd - - :class:`mindspore.ops.GatherNd` + - Refer to :class:`mindspore.ops.GatherNd`. * - mindspore.ops.hastype - - :class:`mindspore.ops.Primitive` ('hastype') + - :class:`mindspore.ops.Primitive` ('hastype') * - mindspore.ops.in_dict - - :class:`mindspore.ops.Primitive` ('in_dict') + - :class:`mindspore.ops.Primitive` ('in_dict') * - mindspore.ops.is_not - - :class:`mindspore.ops.Primitive` ('is_not') + - :class:`mindspore.ops.Primitive` ('is_not') * - mindspore.ops.is\_ - - :class:`mindspore.ops.Primitive` ('is\_') + - :class:`mindspore.ops.Primitive` ('is\_') * - mindspore.ops.isconstant - - :class:`mindspore.ops.Primitive` ('is_constant') + - :class:`mindspore.ops.Primitive` ('is_constant') * - mindspore.ops.isinstance\_ - - :class:`mindspore.ops.IsInstance` + - Refer to :class:`mindspore.ops.IsInstance`. * - mindspore.ops.issubclass\_ - - :class:`mindspore.ops.IsSubClass` + - Refer to :class:`mindspore.ops.IsSubClass`. * - mindspore.ops.logical_and - - :class:`mindspore.ops.LogicalAnd` + - Refer to :class:`mindspore.ops.LogicalAnd`. * - mindspore.ops.logical_not - - :class:`mindspore.ops.LogicalNot` + - Refer to :class:`mindspore.ops.LogicalNot`. * - mindspore.ops.logical_or - - :class:`mindspore.ops.LogicalOr` + - Refer to :class:`mindspore.ops.LogicalOr`. * - mindspore.ops.make_row_tensor - - :class:`mindspore.ops.Primitive` ('MakeRowTensor') + - Generate row tensor. * - mindspore.ops.make_sparse_tensor - - :class:`mindspore.ops.Primitive` ('MakeSparseTensor') + - Generate sparse tensor. * - mindspore.ops.mixed_precision_cast - :class:`mindspore.ops.Primitive` ('mixed_precision_cast') * - mindspore.ops.neg_tensor - - :class:`mindspore.ops.Neg` + - Refer to :class:`mindspore.ops.Neg`. * - mindspore.ops.not_equal - - :class:`mindspore.ops.NotEqual` + - Refer to :class:`mindspore.ops.NotEqual`. * - mindspore.ops.not_in_dict - - :class:`mindspore.ops.Primitive` ('not_in_dict') + - :class:`mindspore.ops.Primitive` ('not_in_dict') * - mindspore.ops.ones_like - - :class:`mindspore.ops.OnesLike` + - Refer to :class:`mindspore.ops.OnesLike`. * - mindspore.ops.print\_ - - :class:`mindspore.ops.Print` + - Refer to :class:`mindspore.ops.Print`. * - mindspore.ops.rank - - :class:`mindspore.ops.Rank` + - Refer to :class:`mindspore.ops.Rank`. * - mindspore.ops.reduced_shape - - :class:`mindspore.ops.Primitive` ('reduced_shape') + - Calculate the shape of the reduction operator. * - mindspore.ops.reshape - - :class:`mindspore.ops.Reshape` + - Refer to :class:`mindspore.ops.Reshape`. * - mindspore.ops.row_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('RowTensorGetDenseShape') + - Get corresponding dense shape of row tensor. * - mindspore.ops.row_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('RowTensorGetIndices') + - Get indices of row tensor. * - mindspore.ops.row_tensor_get_values - - :class:`mindspore.ops.Primitive` ('RowTensorGetValues') + - Get values of row tensor. * - mindspore.ops.same_type_shape - - :class:`mindspore.ops.SameTypeShape` + - Refer to :class:`mindspore.ops.SameTypeShape`. * - mindspore.ops.scalar_add - :class:`mindspore.ops.Primitive` ('scalar_add') * - mindspore.ops.scalar_cast - - :class:`mindspore.ops.ScalarCast` + - Refer to :class:`mindspore.ops.ScalarCast`. * - mindspore.ops.scalar_div - - :class:`mindspore.ops.Primitive` ('scalar_div') + - :class:`mindspore.ops.Primitive` ('scalar_div') * - mindspore.ops.scalar_eq - - :class:`mindspore.ops.Primitive` ('scalar_eq') + - :class:`mindspore.ops.Primitive` ('scalar_eq') * - mindspore.ops.scalar_floordiv - - :class:`mindspore.ops.Primitive` ('scalar_floordiv') + - :class:`mindspore.ops.Primitive` ('scalar_floordiv') * - mindspore.ops.scalar_ge - - :class:`mindspore.ops.Primitive` ('scalar_ge') + - :class:`mindspore.ops.Primitive` ('scalar_ge') * - mindspore.ops.scalar_gt - - :class:`mindspore.ops.Primitive` ('scalar_gt') + - :class:`mindspore.ops.Primitive` ('scalar_gt') * - mindspore.ops.scalar_le - - :class:`mindspore.ops.Primitive` ('scalar_le') + - :class:`mindspore.ops.Primitive` ('scalar_le') * - mindspore.ops.scalar_log - - :class:`mindspore.ops.Primitive` ('scalar_log') + - :class:`mindspore.ops.Primitive` ('scalar_log') * - mindspore.ops.scalar_lt - - :class:`mindspore.ops.Primitive` ('scalar_lt') + - :class:`mindspore.ops.Primitive` ('scalar_lt') * - mindspore.ops.scalar_mod - - :class:`mindspore.ops.Primitive` ('scalar_mod') + - :class:`mindspore.ops.Primitive` ('scalar_mod') * - mindspore.ops.scalar_mul - - :class:`mindspore.ops.Primitive` ('scalar_mul') + - :class:`mindspore.ops.Primitive` ('scalar_mul') * - mindspore.ops.scalar_ne - - :class:`mindspore.ops.Primitive` ('scalar_ne') + - :class:`mindspore.ops.Primitive` ('scalar_ne') * - mindspore.ops.scalar_pow - - :class:`mindspore.ops.Primitive` ('scalar_pow') + - :class:`mindspore.ops.Primitive` ('scalar_pow') * - mindspore.ops.scalar_sub - - :class:`mindspore.ops.Primitive` ('scalar_sub') + - :class:`mindspore.ops.Primitive` ('scalar_sub') * - mindspore.ops.scalar_to_array - - :class:`mindspore.ops.ScalarToArray` + - Refer to :class:`mindspore.ops.ScalarToArray`. * - mindspore.ops.scalar_to_tensor - - :class:`mindspore.ops.ScalarToTensor` + - Refer to :class:`mindspore.ops.ScalarToTensor`. * - mindspore.ops.scalar_uadd - - :class:`mindspore.ops.Primitive` ('scalar_uadd') + - :class:`mindspore.ops.Primitive` ('scalar_uadd') * - mindspore.ops.scalar_usub - - :class:`mindspore.ops.Primitive` ('scalar_usub') + - :class:`mindspore.ops.Primitive` ('scalar_usub') * - mindspore.ops.scatter_nd - - :class:`mindspore.ops.ScatterNd` + - Refer to :class:`mindspore.ops.ScatterNd`. * - mindspore.ops.scatter_nd_update - - :class:`mindspore.ops.ScatterNdUpdate` + - Refer to :class:`mindspore.ops.ScatterNdUpdate`. * - mindspore.ops.scatter_update - - :class:`mindspore.ops.ScatterUpdate` + - Refer to :class:`mindspore.ops.ScatterUpdate`. * - mindspore.ops.select - - :class:`mindspore.ops.Select` + - Refer to :class:`mindspore.ops.Select`. * - mindspore.ops.shape - - :class:`mindspore.ops.Shape` + - Refer to :class:`mindspore.ops.Shape`. * - mindspore.ops.shape_mul - - :class:`mindspore.ops.Primitive` ('shape_mul') + - The input of shape_mul must be shape multiply elements in tuple(shape). * - mindspore.ops.size - - :class:`mindspore.ops.Size` + - Refer to :class:`mindspore.ops.Size`. * - mindspore.ops.sparse_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('SparseTensorGetDenseShape') + - Get corresponding dense shape of sparse tensor. * - mindspore.ops.sparse_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('SparseTensorGetIndices') + - Get indices of sparse tensor. * - mindspore.ops.sparse_tensor_get_values - - :class:`mindspore.ops.Primitive` ('SparseTensorGetValues') + - Get values of sparse tensor. * - mindspore.ops.sqrt - - :class:`mindspore.ops.Sqrt` + - Refer to :class:`mindspore.ops.Sqrt`. * - mindspore.ops.square - - :class:`mindspore.ops.Square` + - Refer to :class:`mindspore.ops.Square`. * - mindspore.ops.stack - - :class:`mindspore.ops.Stack` + - Refer to :class:`mindspore.ops.Stack`. * - mindspore.ops.stop_gradient - - :class:`mindspore.ops.Primitive` ('stop_gradient') + - Disable update during back propagation. * - mindspore.ops.strided_slice - - :class:`mindspore.ops.StridedSlice` + - Refer to :class:`mindspore.ops.StridedSlice`. * - mindspore.ops.string_concat - - :class:`mindspore.ops.Primitive` ('string_concat') + - Concatenate two strings. * - mindspore.ops.string_eq - - :class:`mindspore.ops.Primitive` ('string_equal') + - Determine if two strings are equal. * - mindspore.ops.tensor_div - - :class:`mindspore.ops.RealDiv` + - Refer to :class:`mindspore.ops.RealDiv`. * - mindspore.ops.tensor_floordiv - - :class:`mindspore.ops.FloorDiv` + - Refer to :class:`mindspore.ops.FloorDiv`. * - mindspore.ops.tensor_ge - - :class:`mindspore.ops.GreaterEqual` + - Refer to :class:`mindspore.ops.GreaterEqual`. * - mindspore.ops.tensor_gt - - :class:`mindspore.ops.Greater` + - Refer to :class:`mindspore.ops.Greater`. * - mindspore.ops.tensor_le - - :class:`mindspore.ops.LessEqual` + - Refer to :class:`mindspore.ops.LessEqual`. * - mindspore.ops.tensor_lt - - :class:`mindspore.ops.Less` + - Refer to :class:`mindspore.ops.Less`. * - mindspore.ops.tensor_mod - - :class:`mindspore.ops.FloorMod` + - Refer to :class:`mindspore.ops.FloorMod`. * - mindspore.ops.tensor_mul - - :class:`mindspore.ops.Mul` + - Refer to :class:`mindspore.ops.Mul`. * - mindspore.ops.tensor_pow - - :class:`mindspore.ops.Pow` + - Refer to :class:`mindspore.ops.Pow`. * - mindspore.ops.tensor_sub - - :class:`mindspore.ops.Sub` + - Refer to :class:`mindspore.ops.Sub`. * - mindspore.ops.tile - - :class:`mindspore.ops.Tile` + - Refer to :class:`mindspore.ops.Tile`. * - mindspore.ops.tuple_to_array - - :class:`mindspore.ops.TupleToArray` + - Refer to :class:`mindspore.ops.TupleToArray`. * - mindspore.ops.typeof - - :class:`mindspore.ops.Primitive` ('typeof') + - Get type of object. * - mindspore.ops.zeros_like - - :class:`mindspore.ops.ZerosLike` + - Refer to :class:`mindspore.ops.ZerosLike`. primitive --------- diff --git a/docs/api_python/source_en/mindspore/operations.rst b/docs/api_python/source_en/mindspore/operations.rst index fe42da4e0035fa383194c446e30e65d2f846b4c0..a96a5912bee9b84e556e99c80c7cb93512aa3509 100644 --- a/docs/api_python/source_en/mindspore/operations.rst +++ b/docs/api_python/source_en/mindspore/operations.rst @@ -241,6 +241,7 @@ Array Operators mindspore.ops.IsFinite mindspore.ops.IsInstance mindspore.ops.IsSubClass + mindspore.ops.MaskedSelect mindspore.ops.Meshgrid mindspore.ops.Ones mindspore.ops.OnesLike diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst index 6534637abb4d182b6ad93bede9408c81a73cda12..67d920ded7b2d941b7cd1f548f9552be4b9b7d95 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst @@ -36,9 +36,9 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.RandomHorizontalFlip mindspore.dataset.vision.c_transforms.RandomHorizontalFlipWithBBox mindspore.dataset.vision.c_transforms.RandomPosterize + mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizedCrop mindspore.dataset.vision.c_transforms.RandomResizedCropWithBBox - mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizeWithBBox mindspore.dataset.vision.c_transforms.RandomRotation mindspore.dataset.vision.c_transforms.RandomSelectSubpolicy @@ -53,7 +53,7 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.SoftDvppDecodeRandomCropResizeJpeg mindspore.dataset.vision.c_transforms.SoftDvppDecodeResizeJpeg mindspore.dataset.vision.c_transforms.UniformAugment - mindspore.dataset.vision.c_transforms.HorizontalFlip + mindspore.dataset.vision.c_transforms.VerticalFlip mindspore.dataset.vision.py_transforms ------------------------------------------------- @@ -76,6 +76,7 @@ mindspore.dataset.vision.py_transforms mindspore.dataset.vision.py_transforms.LinearTransformation mindspore.dataset.vision.py_transforms.MixUp mindspore.dataset.vision.py_transforms.Normalize + mindspore.dataset.vision.py_transforms.NormalizePad mindspore.dataset.vision.py_transforms.Pad mindspore.dataset.vision.py_transforms.RandomAffine mindspore.dataset.vision.py_transforms.RandomColor diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst index 93dee4d2a2ead3cc7782678bc0c460b3b321bb03..0f410384731a65325b13eae2e7018b5158f6a356 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst @@ -47,195 +47,195 @@ The functional operators are the pre-instantiated Primitive operators, which can :header-rows: 1 * - functional - - operations + - Description * - mindspore.ops.add - - :class:`mindspore.ops.Add` + - Refer to :class:`mndspore.ops.Add`. * - mindspore.ops.addn - - :class:`mindspore.ops.AddN` + - Refer to :class:`mindspore.ops.AddN`. * - mindspore.ops.array_reduce - - :class:`mindspore.ops.Primitive` ('array_reduce') + - :class:`mindspore.ops.Primitive` ('array_reduce') * - mindspore.ops.array_to_scalar - - :class:`mindspore.ops.Primitive` ('array_to_scalar') + - :class:`mindspore.ops.Primitive` ('array_to_scalar') * - mindspore.ops.assign - - :class:`mindspore.ops.Assign` + - Refer to :class:`mindspore.ops.Assign`. * - mindspore.ops.assign_add - - :class:`mindspore.ops.AssignAdd` + - Refer to :class:`mindspore.ops.AssignAdd`. * - mindspore.ops.assign_sub - - :class:`mindspore.ops.AssignSub` + - Refer to :class:`mindspore.ops.AssignSub`. * - mindspore.ops.bool_and - - :class:`mindspore.ops.Primitive` ('bool_and') + - :class:`mindspore.ops.Primitive` ('bool_and') * - mindspore.ops.bool_eq - - :class:`mindspore.ops.Primitive` ('bool_eq') + - :class:`mindspore.ops.Primitive` ('bool_eq') * - mindspore.ops.bool_not - - :class:`mindspore.ops.Primitive` ('bool_not') + - :class:`mindspore.ops.Primitive` ('bool_not') * - mindspore.ops.bool_or - - :class:`mindspore.ops.Primitive` ('bool_or') + - :class:`mindspore.ops.Primitive` ('bool_or') * - mindspore.ops.cast - - :class:`mindspore.ops.Cast` + - Refer to :class:`mindspore.ops.Cast`. * - mindspore.ops.distribute - - :class:`mindspore.ops.Primitive` ('distribute') + - :class:`mindspore.ops.Primitive` ('distribute') * - mindspore.ops.dtype - - :class:`mindspore.ops.DType` + - Refer to :class:`mindspore.ops.DType`. * - mindspore.ops.equal - - :class:`mindspore.ops.Equal` + - Refer to :class:`mindspore.ops.Equal`. * - mindspore.ops.expand_dims - - :class:`mindspore.ops.ExpandDims` + - Refer to :class:`mindspore.ops.ExpandDims`. * - mindspore.ops.fill - - :class:`mindspore.ops.Fill` + - Refer to :class:`mindspore.ops.Fill`. * - mindspore.ops.gather - - :class:`mindspore.ops.Gather` + - Refer to :class:`mindspore.ops.Gather`. * - mindspore.ops.gather_nd - - :class:`mindspore.ops.GatherNd` + - Refer to :class:`mindspore.ops.GatherNd`. * - mindspore.ops.hastype - - :class:`mindspore.ops.Primitive` ('hastype') + - :class:`mindspore.ops.Primitive` ('hastype') * - mindspore.ops.in_dict - - :class:`mindspore.ops.Primitive` ('in_dict') + - :class:`mindspore.ops.Primitive` ('in_dict') * - mindspore.ops.is_not - - :class:`mindspore.ops.Primitive` ('is_not') + - :class:`mindspore.ops.Primitive` ('is_not') * - mindspore.ops.is\_ - - :class:`mindspore.ops.Primitive` ('is\_') + - :class:`mindspore.ops.Primitive` ('is\_') * - mindspore.ops.isconstant - - :class:`mindspore.ops.Primitive` ('is_constant') + - :class:`mindspore.ops.Primitive` ('is_constant') * - mindspore.ops.isinstance\_ - - :class:`mindspore.ops.IsInstance` + - Refer to :class:`mindspore.ops.IsInstance`. * - mindspore.ops.issubclass\_ - - :class:`mindspore.ops.IsSubClass` + - Refer to :class:`mindspore.ops.IsSubClass`. * - mindspore.ops.logical_and - - :class:`mindspore.ops.LogicalAnd` + - Refer to :class:`mindspore.ops.LogicalAnd`. * - mindspore.ops.logical_not - - :class:`mindspore.ops.LogicalNot` + - Refer to :class:`mindspore.ops.LogicalNot`. * - mindspore.ops.logical_or - - :class:`mindspore.ops.LogicalOr` + - Refer to :class:`mindspore.ops.LogicalOr`. * - mindspore.ops.make_row_tensor - - :class:`mindspore.ops.Primitive` ('MakeRowTensor') + - :Generate row tensor. * - mindspore.ops.make_sparse_tensor - - :class:`mindspore.ops.Primitive` ('MakeSparseTensor') + - Generate sparse tensor. * - mindspore.ops.mixed_precision_cast - - :class:`mindspore.ops.Primitive` ('mixed_precision_cast') + - :class:`mindspore.ops.Primitive` ('mixed_precision_cast') * - mindspore.ops.neg_tensor - - :class:`mindspore.ops.Neg` + - Refer to :class:`mindspore.ops.Neg`. * - mindspore.ops.not_equal - - :class:`mindspore.ops.NotEqual` + - Refer to :class:`mindspore.ops.NotEqual`. * - mindspore.ops.not_in_dict - - :class:`mindspore.ops.Primitive` ('not_in_dict') + - :class:`mindspore.ops.Primitive` ('not_in_dict') * - mindspore.ops.ones_like - - :class:`mindspore.ops.OnesLike` + - Refer to :class:`mindspore.ops.OnesLike`. * - mindspore.ops.print\_ - - :class:`mindspore.ops.Print` + - Refer to :class:`mindspore.ops.Print`. * - mindspore.ops.rank - - :class:`mindspore.ops.Rank` + - Refer to :class:`mindspore.ops.Rank`. * - mindspore.ops.reduced_shape - - :class:`mindspore.ops.Primitive` ('reduced_shape') + - Calculate the shape of the reduction operator. * - mindspore.ops.reshape - - :class:`mindspore.ops.Reshape` + - Refer to :class:`mindspore.ops.Reshape`. * - mindspore.ops.row_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('RowTensorGetDenseShape') + - Get corresponding dense shape of row tensor. * - mindspore.ops.row_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('RowTensorGetIndices') + - Get indices of row tensor. * - mindspore.ops.row_tensor_get_values - - :class:`mindspore.ops.Primitive` ('RowTensorGetValues') + - Get values of row tensor. * - mindspore.ops.same_type_shape - - :class:`mindspore.ops.SameTypeShape` + - Refer to :class:`mindspore.ops.SameTypeShape`. * - mindspore.ops.scalar_add - - :class:`mindspore.ops.Primitive` ('scalar_add') + - :class:`mindspore.ops.Primitive` ('scalar_add') * - mindspore.ops.scalar_cast - - :class:`mindspore.ops.ScalarCast` + - Refer to :class:`mindspore.ops.ScalarCast`. * - mindspore.ops.scalar_div - - :class:`mindspore.ops.Primitive` ('scalar_div') + - :class:`mindspore.ops.Primitive` ('scalar_div') * - mindspore.ops.scalar_eq - - :class:`mindspore.ops.Primitive` ('scalar_eq') + - :class:`mindspore.ops.Primitive` ('scalar_eq') * - mindspore.ops.scalar_floordiv - - :class:`mindspore.ops.Primitive` ('scalar_floordiv') + - :class:`mindspore.ops.Primitive` ('scalar_floordiv') * - mindspore.ops.scalar_ge - - :class:`mindspore.ops.Primitive` ('scalar_ge') + - :class:`mindspore.ops.Primitive` ('scalar_ge') * - mindspore.ops.scalar_gt - - :class:`mindspore.ops.Primitive` ('scalar_gt') + - :class:`mindspore.ops.Primitive` ('scalar_gt') * - mindspore.ops.scalar_le - - :class:`mindspore.ops.Primitive` ('scalar_le') + - :class:`mindspore.ops.Primitive` ('scalar_le') * - mindspore.ops.scalar_log - - :class:`mindspore.ops.Primitive` ('scalar_log') + - :class:`mindspore.ops.Primitive` ('scalar_log') * - mindspore.ops.scalar_lt - - :class:`mindspore.ops.Primitive` ('scalar_lt') + - :class:`mindspore.ops.Primitive` ('scalar_lt') * - mindspore.ops.scalar_mod - - :class:`mindspore.ops.Primitive` ('scalar_mod') + - :class:`mindspore.ops.Primitive` ('scalar_mod') * - mindspore.ops.scalar_mul - - :class:`mindspore.ops.Primitive` ('scalar_mul') + - :class:`mindspore.ops.Primitive` ('scalar_mul') * - mindspore.ops.scalar_ne - - :class:`mindspore.ops.Primitive` ('scalar_ne') + - :class:`mindspore.ops.Primitive` ('scalar_ne') * - mindspore.ops.scalar_pow - - :class:`mindspore.ops.Primitive` ('scalar_pow') + - :class:`mindspore.ops.Primitive` ('scalar_pow') * - mindspore.ops.scalar_sub - - :class:`mindspore.ops.Primitive` ('scalar_sub') + - :class:`mindspore.ops.Primitive` ('scalar_sub') * - mindspore.ops.scalar_to_array - - :class:`mindspore.ops.ScalarToArray` + - Refer to :class:`mindspore.ops.ScalarToArray`. * - mindspore.ops.scalar_to_tensor - - :class:`mindspore.ops.ScalarToTensor` + - Refer to :class:`mindspore.ops.ScalarToTensor`. * - mindspore.ops.scalar_uadd - - :class:`mindspore.ops.Primitive` ('scalar_uadd') + - :class:`mindspore.ops.Primitive` ('scalar_uadd') * - mindspore.ops.scalar_usub - - :class:`mindspore.ops.Primitive` ('scalar_usub') + - :class:`mindspore.ops.Primitive` ('scalar_usub') * - mindspore.ops.scatter_nd - - :class:`mindspore.ops.ScatterNd` + - Refer to :class:`mindspore.ops.ScatterNd`. * - mindspore.ops.scatter_nd_update - - :class:`mindspore.ops.ScatterNdUpdate` + - Refer to :class:`mindspore.ops.ScatterNdUpdate`. * - mindspore.ops.scatter_update - - :class:`mindspore.ops.ScatterUpdate` + - Refer to :class:`mindspore.ops.ScatterUpdate`. * - mindspore.ops.select - - :class:`mindspore.ops.Select` + - Refer to :class:`mindspore.ops.Select`. * - mindspore.ops.shape - - :class:`mindspore.ops.Shape` + - Refer to :class:`mindspore.ops.Shape`. * - mindspore.ops.shape_mul - - :class:`mindspore.ops.Primitive` ('shape_mul') + - The input of shape_mul must be shape multiply elements in tuple(shape). * - mindspore.ops.size - - :class:`mindspore.ops.Size` + - Refer to :class:`mindspore.ops.Size`. * - mindspore.ops.sparse_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('SparseTensorGetDenseShape') + - Get corresponding dense shape of sparse tensor. * - mindspore.ops.sparse_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('SparseTensorGetIndices') + - Get indices of sparse tensor. * - mindspore.ops.sparse_tensor_get_values - - :class:`mindspore.ops.Primitive` ('SparseTensorGetValues') + - Get values of sparse tensor. * - mindspore.ops.sqrt - - :class:`mindspore.ops.Sqrt` + - Refer to :class:`mindspore.ops.Sqrt`. * - mindspore.ops.square - - :class:`mindspore.ops.Square` + - Refer to :class:`mindspore.ops.Square`. * - mindspore.ops.stack - - :class:`mindspore.ops.Stack` + - Refer to :class:`mindspore.ops.Stack`. * - mindspore.ops.stop_gradient - - :class:`mindspore.ops.Primitive` ('stop_gradient') + - Disable update during back propagation. * - mindspore.ops.strided_slice - - :class:`mindspore.ops.StridedSlice` + - Refer to :class:`mindspore.ops.StridedSlice`. * - mindspore.ops.string_concat - - :class:`mindspore.ops.Primitive` ('string_concat') + - Concatenate two strings. * - mindspore.ops.string_eq - - :class:`mindspore.ops.Primitive` ('string_equal') + - Determine if two strings are equal. * - mindspore.ops.tensor_div - - :class:`mindspore.ops.RealDiv` + - Refer to :class:`mindspore.ops.RealDiv`. * - mindspore.ops.tensor_floordiv - - :class:`mindspore.ops.FloorDiv` + - Refer to :class:`mindspore.ops.FloorDiv`. * - mindspore.ops.tensor_ge - - :class:`mindspore.ops.GreaterEqual` + - Refer to :class:`mindspore.ops.GreaterEqual`. * - mindspore.ops.tensor_gt - - :class:`mindspore.ops.Greater` + - Refer to :class:`mindspore.ops.Greater`. * - mindspore.ops.tensor_le - - :class:`mindspore.ops.LessEqual` + - Refer to :class:`mindspore.ops.LessEqual`. * - mindspore.ops.tensor_lt - - :class:`mindspore.ops.Less` + - Refer to :class:`mindspore.ops.Less`. * - mindspore.ops.tensor_mod - - :class:`mindspore.ops.FloorMod` + - Refer to :class:`mindspore.ops.FloorMod`. * - mindspore.ops.tensor_mul - - :class:`mindspore.ops.Mul` + - Refer to :class:`mindspore.ops.Mul`. * - mindspore.ops.tensor_pow - - :class:`mindspore.ops.Pow` + - Refer to :class:`mindspore.ops.Pow`. * - mindspore.ops.tensor_sub - - :class:`mindspore.ops.Sub` + - Refer to :class:`mindspore.ops.Sub`. * - mindspore.ops.tile - - :class:`mindspore.ops.Tile` + - Refer to :class:`mindspore.ops.Tile`. * - mindspore.ops.tuple_to_array - - :class:`mindspore.ops.TupleToArray` + - Refer to :class:`mindspore.ops.TupleToArray`. * - mindspore.ops.typeof - - :class:`mindspore.ops.Primitive` ('typeof') + - Get type of object. * - mindspore.ops.zeros_like - - :class:`mindspore.ops.ZerosLike` + - Refer to :class:`mindspore.ops.ZerosLike`. primitive --------- diff --git a/docs/api_python/source_zh_cn/mindspore/operations.rst b/docs/api_python/source_zh_cn/mindspore/operations.rst index fe42da4e0035fa383194c446e30e65d2f846b4c0..a96a5912bee9b84e556e99c80c7cb93512aa3509 100644 --- a/docs/api_python/source_zh_cn/mindspore/operations.rst +++ b/docs/api_python/source_zh_cn/mindspore/operations.rst @@ -241,6 +241,7 @@ Array Operators mindspore.ops.IsFinite mindspore.ops.IsInstance mindspore.ops.IsSubClass + mindspore.ops.MaskedSelect mindspore.ops.Meshgrid mindspore.ops.Ones mindspore.ops.OnesLike diff --git a/docs/faq/source_en/frontend_compile.md b/docs/faq/source_en/frontend_compile.md index ea009040677dbb33c547f9d21d9720b8c8089d26..6c89d61cfa2db80732662fb6d5df0704e650f0aa 100644 --- a/docs/faq/source_en/frontend_compile.md +++ b/docs/faq/source_en/frontend_compile.md @@ -6,13 +6,13 @@ **Q:What can I do if an error "Create python object \`\` failed, only support create Cell or Primitive object." is reported?** -A: Currently, tensors cannot be constructed on the network. That is, the syntax `x = Tensor(args...)` is not supported. +A: Currently in graph mode, the `construct` function (or the function decorated by the `@ms_function` decorator) only supports the construction of `Cell` and `Primitive object`. The construction of `Tensor` is not supported, that is, the syntax `x = Tensor(args...)` is not supported. If it is a constant tensor, please define it in the function `__init__`. If not, you can use the `@constexpr` decorator to modify the function and generate the `Tensor` in the function. Please see the usage of `@constexpr` in . -The constant `Tensor` used on the network can be used as a network attribute and defined in `init`, that is, `self.x = Tensor(args...)`. Then the constant can be used in `construct`. +The constant `Tensor` used on the network can be used as a network attribute and defined in `init`, that is, `self.x = Tensor(args...)`. Then the constant can be used in the `construct` function (or the function decorated by the `@ms_function` decorator). In the following example, `Tensor` of `shape = (3, 4), dtype = int64` is generated by `@constexpr`. @@ -47,4 +47,90 @@ A: For comparison statements, `MindSpore` supports at most one operator. Please A: When you call the instance of a network, the function `construct` will be executed. And the program will check the number of parameters required by the function `construct` and the number of parameters actually given. If they are not equal, the above exception will be thrown. Please check your code to make sure they are equal. -
\ No newline at end of file +
+ +**Q:What can I do if an error "Type Join Failed" or "Shape Join Failed" is reported?** + +A: In the inference stage of front-end compilation, the abstract types of nodes, including `type` and `shape`, will be inferred. Common abstract types include `AbstractScalar`, `AbstractTensor`, `AbstractFunction`, `AbstractTuple`, `AbstractList`, etc. In some scenarios, such as multi-branch scenarios, the abstract types of the return values of different branches will be joined to infer the abstract type of the returned result. If these abstract types do not match, or `type`/`shape` are inconsistent, the above exception will be thrown. + +When an error similar to "Type Join Failed: dtype1 = Float32, dtype2 = Float16" appears, it means that the data types are inconsistent, resulting in an exception when joining abstract. According to the provided data types and code line, the error can be quickly located. In addition, the specific abstract information and node information are provided in the error message. You can view the MindIR information through the `analyze_fail.dat` file to locate and solve the problem. For specific introduction of MindIR, please refer to [MindSpore IR (MindIR)](https://www.mindspore.cn/doc/note/en/master/design/mindspore/mindir.html). The code sample is as follows: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.cast = ops.Cast() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.cast(self.relu(x), ms.float16) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out_me = net(input_x, input_a, input_b) +``` + +The result is as follows: + +```python +TypeError: The return values of different branches do not match. Type Join Failed: dtype1 = Float32, dtype2 = Float16. The abstract type of the return value of the current branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float16, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_join.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_join.py(14) + if a > b: +``` + +When an error similar to "Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = ()" appears, it means that the shapes are inconsistent, resulting in an exception when joining abstract. The code sample is as follows: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.reducesum = ops.ReduceSum() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.reducesum(x) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out = net(input_x, input_a, input_b) +``` + +The result is as follows: + +```python +ValueError: The return values of different branches do not match. Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = (). The abstract type of the return value of the current branch is AbstractTensor(shape: (), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_join1.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_join1.py(14) + if a > b: +``` + +When an error similar to "Type Join Failed: abstract type AbstractTensor can not join with AbstractTuple" appears, it means that the two abstract types are mismatched. You need to review the code and modify it based on the provided code line and other error information. + +
diff --git a/docs/faq/source_en/script_implement.md b/docs/faq/source_en/script_implement.md index e84fb079f6b065e165074d44eb71bc16c704e1af..4163a307620231fbbe9025eeabffd82f9472eabe 100644 --- a/docs/faq/source_en/script_implement.md +++ b/docs/faq/source_en/script_implement.md @@ -229,3 +229,9 @@ A: Sorry, this function is not available yet. You can find the optimal hyperpara **Q:What should I do when error `error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory` prompts during application running?** A:While installing Ascend 310 AI Processor software packages,the `CANN` package should install the full-featured `toolkit` version instead of the `nnrt` version. + +
+ +**Q:Why does context.set_ps_context(enable_ps=True) in model_zoo/official/cv/resnet/train.py in the MindSpore code have to be set before init?** + +A:In MindSpore Ascend mode, if init is called first, then all processes will be allocated cards, but in parameter server training mode, the server does not need to allocate cards, then the worker and server will use the same card, resulting in an error: Hccl dependent tsd is not open. \ No newline at end of file diff --git a/docs/faq/source_zh_cn/frontend_compile.md b/docs/faq/source_zh_cn/frontend_compile.md index 277531b517ed8198c124c4f5e0f2751f71bef1e2..6d484818b41650b63c55aca42c2c91a95644b872 100644 --- a/docs/faq/source_zh_cn/frontend_compile.md +++ b/docs/faq/source_zh_cn/frontend_compile.md @@ -6,13 +6,13 @@ **Q:运行时报错“Create python object \`\` failed, only support create Cell or Primitive object.”怎么办?** -A:当前图模式不支持在网络里构造`Tensor`,即不支持语法`x = Tensor(args...)`。 +A:当前在图模式下,`construct`函数(或`@ms_function`装饰器修饰的函数)仅支持构造`Cell`和`Primitive object`,不支持构造`Tensor`,即不支持语法`x = Tensor(args...)`。 如果是常量`Tensor`,请在`__init__`函数中定义。如果不是常量`Tensor`,可以通过`@constexpr`装饰器修饰函数,在函数里生成`Tensor`。 关于`@constexpr`的用法可参考:。 -对于网络中需要用到的常量`Tensor`,可以作为网络的属性,在`init`的时候定义,即`self.x = Tensor(args...)`,然后在`construct`里使用。 +对于网络中需要用到的常量`Tensor`,可以作为网络的属性,在`init`的时候定义,即`self.x = Tensor(args...)`,然后在`construct`函数(或`@ms_function`装饰器修饰的函数)里使用。 如下示例,通过`@constexpr`生成一个`shape = (3, 4), dtype = int64`的`Tensor`。 @@ -47,4 +47,90 @@ A:对于比较语句,`MindSpore`最多支持一个操作数。例如不支 A:网络的实例被调用时,会执行`construct`方法,然后会检查`construct`方法需要的参数个数和实际传入的参数个数,如果不一致则会抛出以上异常。 请检查脚本中调用网络实例时传入的参数个数,和定义的网络中`construct`函数需要的参数个数是否一致。 -
\ No newline at end of file +
+ +**Q:运行时报错“Type Join Failed”或“Shape Join Failed”怎么办?** + +A:在前端编译的推理阶段,会对节点的抽象类型(包含`type`、`shape`等)进行推导,常见抽象类型包括`AbstractScalar`、`AbstractTensor`、`AbstractFunction`、`AbstractTuple`、`AbstractList`等。在一些场景比如多分支场景,会对不同分支返回值的抽象类型进行`join`合并,推导出返回结果的抽象类型。如果抽象类型不匹配,或者`type`/`shape`不一致,则会抛出以上异常。 + +当出现类似“Type Join Failed: dtype1 = Float32, dtype2 = Float16”的报错时,说明数据类型不一致,导致抽象类型合并失败。根据提供的数据类型和代码行信息,可以快速定位出错范围。此外,报错信息中提供了具体的抽象类型信息、节点信息,可以通过`analyze_fail.dat`文件查看MindIR信息,定位解决问题。关于MindIR的具体介绍,可以参考[MindSpore IR(MindIR)](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/mindir.html)。代码样例如下: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.cast = ops.Cast() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.cast(self.relu(x), ms.float16) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out_me = net(input_x, input_a, input_b) +``` + +执行结果如下: + +```python +TypeError: The return values of different branches do not match. Type Join Failed: dtype1 = Float32, dtype2 = Float16. The abstract type of the return value of the current branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float16, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_type_join_failed.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_type_join_failed.py(14) + if a > b: +``` + +当出现类似“Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = ()”的报错时,说明`shape`不一致,导致抽象类型合并失败。代码样例如下: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.reducesum = ops.ReduceSum() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.reducesum(x) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out = net(input_x, input_a, input_b) +``` + +执行结果如下: + +```python +ValueError: The return values of different branches do not match. Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = (). The abstract type of the return value of the current branch is AbstractTensor(shape: (), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_shape_join_failed.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_shape_join_failed.py(14) + if a > b: +``` + +当出现如“Type Join Failed: abstract type AbstractTensor can not join with AbstractTuple”的报错时,说明这两种抽象类型无法匹配,需要根据提供的代码行等报错信息,重新检视代码并修改。 + +
diff --git a/docs/faq/source_zh_cn/script_implement.md b/docs/faq/source_zh_cn/script_implement.md index 7461939e09ec9e2c2d950942ed2608050ba58768..ed2353a06f2334971b1c781b28de3acfb84d79e5 100644 --- a/docs/faq/source_zh_cn/script_implement.md +++ b/docs/faq/source_zh_cn/script_implement.md @@ -247,3 +247,9 @@ A:您好,很抱歉暂时还未有这样的功能。目前只能通过训练- **Q:运行应用时报错`error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory`怎么办?** A:安装MindSpore所依赖的Ascend 310 AI处理器配套软件包时,`CANN`包不能安装`nnrt`版本,而是需要安装功能完整的`toolkit`版本。 + +
+ +**Q:MindSpore代码里面的model_zoo/official/cv/resnet/train.py中context.set_ps_context(enable_ps=True)为什么一定要在init之前设置** + +A:MindSpore Ascend模式下,如果先调用init,那么会为所有的进程都分配卡,但是parameter server训练模式下server是不需要分配卡的,那么worker和server就会去使用同一块卡,导致会报错:Hccl dependent tsd is not open。 diff --git a/docs/mindfl/api/Makefile b/docs/mindfl/api/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1eff8952707bdfa503c8d60c1e9a903053170ba2 --- /dev/null +++ b/docs/mindfl/api/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source_zh_cn +BUILDDIR = build_zh_cn + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/mindfl/api/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc b/docs/mindfl/api/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d29e9adbd432f769cae6d07608ca48367941a05 Binary files /dev/null and b/docs/mindfl/api/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc differ diff --git a/docs/mindfl/api/source_zh_cn/_ext/my_signature.py b/docs/mindfl/api/source_zh_cn/_ext/my_signature.py new file mode 100644 index 0000000000000000000000000000000000000000..7eb6cec6f4607af6b66dfe3d460b7e57d39e20d5 --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/_ext/my_signature.py @@ -0,0 +1,354 @@ +""" +Rewrote the Signature module that fix default signature error for autodoc module. +""" + +import inspect +import re +import types +import functools + + +def _sort_param(param_list, target_str): + """Sort param_list as default order.""" + ls = [] + for param_name in param_list: + ls.append((param_name, target_str.find(param_name))) + ls.sort(key=lambda x: x[1], reverse=False) + ls = [i[0] for i in ls] + return ls + + +def get_default_params(func): + """ Get the default signatures from function. """ + source_code = inspect.getsource(func) + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + karg_pos = func_code.co_kwonlyargcount + kwargs_num = arg_names.count("args") + arg_names.count("kwargs") + all_param_names = list(arg_names[:pos_count+karg_pos+kwargs_num]) + all_params = re.findall(r"def [\w_\d\-]+\(([\S\s]*?)\):", source_code)[0].replace("\n", "").replace("'", "\"") + + # sub null spaces from matched all param str. + re_space_sub = re.compile(r",\s+") + all_params = re_space_sub.sub(",", all_params) + + all_param_names = _sort_param(all_param_names, all_params) + + # sub the extra "=" from param. + re_equate_sub = re.compile("=") + + re_defaults_param = re.compile(r"(.*?)".join(all_param_names) + r"(.*)") + defaults_params = re_defaults_param.findall(all_params) + if defaults_params: + if isinstance(defaults_params[0], tuple): + defaults_params = list([i[:-2] if i[-2:] == "**" else i for i in defaults_params[0]]) + defaults_params_list = [] + for i in defaults_params: + if "=" in i and i: + i = re_equate_sub.sub("", i, count=1).strip(",") + if i[:6] == "lambda": + i = "<" + i + ">" + defaults_params_list.append(i) + defaults_params_tuple = tuple(defaults_params_list) + return defaults_params_tuple + return func.__defaults__ + + +def _my_signature_from_function(cls, func): + """Private helper: constructs Signature for the given python function.""" + + is_duck_function = False + if not inspect.isfunction(func): + if inspect._signature_is_functionlike(func): # pylint: disable=protected-access + is_duck_function = True + else: + # If it's not a pure Python function, and not a duck type + # of pure function: + raise TypeError('{!r} is not a Python function'.format(func)) + + Parameter = cls._parameter_cls # pylint: disable=protected-access + + # Parameter information._partialmethod + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + positional = tuple(arg_names[:pos_count]) + keyword_only_count = func_code.co_kwonlyargcount + keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)] + annotations = func.__annotations__ + defaults = get_default_params(func) + if keyword_only_count == len(defaults): + kwdefaults = dict() + for num, arg_name in enumerate(keyword_only): + kwdefaults[arg_name] = defaults[num] + else: + kwdefaults = func.__kwdefaults__ + pos_defaults = func.__defaults__ + + if pos_defaults: + pos_default_count = len(pos_defaults) + else: + pos_default_count = 0 + + parameters = [] + + # Non-keyword-only parameters w/o defaults. + non_default_count = pos_count - pos_default_count + for name in positional[:non_default_count]: + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD)) # pylint: disable=protected-access + + # ... w/ defaults. + for offset, name in enumerate(positional[non_default_count:]): + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD, # pylint: disable=protected-access + default=defaults[offset])) + + # *args + if func_code.co_flags & inspect.CO_VARARGS: + name = arg_names[pos_count + keyword_only_count] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_POSITIONAL)) # pylint: disable=protected-access + + # Keyword-only parameters. + for name in keyword_only: + default = inspect._empty # pylint: disable=protected-access + if kwdefaults is not None: + default = kwdefaults.get(name, inspect._empty) # pylint: disable=protected-access + + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._KEYWORD_ONLY, # pylint: disable=protected-access + default=default)) + # **kwargs + if func_code.co_flags & inspect.CO_VARKEYWORDS: + index = pos_count + keyword_only_count + if func_code.co_flags & inspect.CO_VARARGS: + index += 1 + + name = arg_names[index] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_KEYWORD)) # pylint: disable=protected-access + + # Is 'func' is a pure Python function - don't validate the + # parameters list (for correct order and defaults), it should be OK. + return cls(parameters, + return_annotation=annotations.get('return', inspect._empty), # pylint: disable=protected-access + __validate_parameters__=is_duck_function) + + +def _my_signature_from_callable(obj, *, + follow_wrapper_chains=True, + skip_bound_arg=True, + sigcls): + """Private helper function to get signature for arbitrary + callable objects. + """ + + if not callable(obj): + raise TypeError('{!r} is not a callable object'.format(obj)) + + if isinstance(obj, types.MethodType): + # In this case we skip the first parameter of the underlying + # function (usually `self` or `cls`). + sig = _my_signature_from_callable( + obj.__func__, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + # Was this function wrapped by a decorator? + if follow_wrapper_chains: + obj = inspect.unwrap(obj, stop=(lambda f: hasattr(f, "__signature__"))) + if isinstance(obj, types.MethodType): + # If the unwrapped object is a *method*, we might want to + # skip its first parameter (self). + # See test_signature_wrapped_bound_method for details. + return _my_signature_from_callable( + obj, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + try: + sig = obj.__signature__ + except AttributeError: + pass + else: + if sig is not None: + if not isinstance(sig, MySignature): + raise TypeError( + 'unexpected object {!r} in __signature__ ' + 'attribute'.format(sig)) + return sig + + try: + partialmethod = obj._partialmethod # pylint: disable=protected-access + except AttributeError: + pass + else: + if isinstance(partialmethod, functools.partialmethod): + # Unbound partialmethod (see functools.partialmethod) + # This means, that we need to calculate the signature + # as if it's a regular partial object, but taking into + # account that the first positional argument + # (usually `self`, or `cls`) will not be passed + # automatically (as for boundmethods) + + wrapped_sig = _my_signature_from_callable( + partialmethod.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + sig = inspect._signature_get_partial(wrapped_sig, partialmethod, (None,)) # pylint: disable=protected-access + first_wrapped_param = tuple(wrapped_sig.parameters.values())[0] + if first_wrapped_param.kind is Parameter.VAR_POSITIONAL: # pylint: disable=no-else-return + # First argument of the wrapped callable is `*args`, as in + # `partialmethod(lambda *args)`. + return sig + else: + sig_params = tuple(sig.parameters.values()) + assert (not sig_params or + first_wrapped_param is not sig_params[0]) + new_params = (first_wrapped_param,) + sig_params + return sig.replace(parameters=new_params) + + if inspect.isfunction(obj) or inspect._signature_is_functionlike(obj): # pylint: disable=protected-access + # If it's a pure Python function, or an object that is duck type + # of a Python function (Cython functions, for instance), then: + return _my_signature_from_function(sigcls, obj) + + if inspect._signature_is_builtin(obj): # pylint: disable=protected-access + return inspect._signature_from_builtin(sigcls, obj, # pylint: disable=protected-access + skip_bound_arg=skip_bound_arg) + + if isinstance(obj, functools.partial): + wrapped_sig = _my_signature_from_callable( + obj.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + return inspect._signature_get_partial(wrapped_sig, obj) # pylint: disable=protected-access + + sig = None + if isinstance(obj, type): + # obj is a class or a metaclass + + # First, let's see if it has an overloaded __call__ defined + # in its metaclass + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Now we check if the 'obj' class has a '__new__' method + new = inspect._signature_get_user_defined_method(obj, '__new__') # pylint: disable=protected-access + if new is not None: + sig = _my_signature_from_callable( + new, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Finally, we should have at least __init__ implemented + init = inspect._signature_get_user_defined_method(obj, '__init__') # pylint: disable=protected-access + if init is not None: + sig = _my_signature_from_callable( + init, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if sig is None: + # At this point we know, that `obj` is a class, with no user- + # defined '__init__', '__new__', or class-level '__call__' + + for base in obj.__mro__[:-1]: + # Since '__text_signature__' is implemented as a + # descriptor that extracts text signature from the + # class docstring, if 'obj' is derived from a builtin + # class, its own '__text_signature__' may be 'None'. + # Therefore, we go through the MRO (except the last + # class in there, which is 'object') to find the first + # class with non-empty text signature. + try: + text_sig = base.__text_signature__ + except AttributeError: + pass + else: + if text_sig: + # If 'obj' class has a __text_signature__ attribute: + # return a signature based on it + return inspect._signature_fromstr(sigcls, obj, text_sig) # pylint: disable=protected-access + + # No '__text_signature__' was found for the 'obj' class. + # Last option is to check if its '__init__' is + # object.__init__ or type.__init__. + if type not in obj.__mro__: + # We have a class (not metaclass), but no user-defined + # __init__ or __new__ for it + if (obj.__init__ is object.__init__ and # pylint: disable=no-else-return + obj.__new__ is object.__new__): + # Return a signature of 'object' builtin. + return sigcls.from_callable(object) + else: + raise ValueError( + 'no signature found for builtin type {!r}'.format(obj)) + + elif not isinstance(obj, inspect._NonUserDefinedCallables): # pylint: disable=protected-access + # An object with __call__ + # We also check that the 'obj' is not an instance of + # _WrapperDescriptor or _MethodWrapper to avoid + # infinite recursion (and even potential segfault) + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + try: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + except ValueError as ex: + msg = 'no signature found for {!r}'.format(obj) + raise ValueError(msg) from ex + + if sig is not None: + # For classes and objects we skip the first parameter of their + # __call__, __new__, or __init__ methods + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + if isinstance(obj, types.BuiltinFunctionType): + # Raise a nicer error message for builtins + msg = 'no signature found for builtin function {!r}'.format(obj) + raise ValueError(msg) + + raise ValueError('callable {!r} is not supported by signature'.format(obj)) + + +class MySignature(inspect.Signature): + + @classmethod + def from_callable(cls, obj, *, follow_wrapped=True): + """Constructs Signature for the given callable object.""" + return _my_signature_from_callable(obj, sigcls=cls, + follow_wrapper_chains=follow_wrapped) + + +def signature(obj, *, follow_wrapped=True): + """Get a signature object for the passed callable.""" + return MySignature.from_callable(obj, follow_wrapped=follow_wrapped) diff --git a/docs/mindfl/api/source_zh_cn/_static/logo_notebook.png b/docs/mindfl/api/source_zh_cn/_static/logo_notebook.png new file mode 100644 index 0000000000000000000000000000000000000000..18c2e29e4b73ee428f70253feffdd855fdf0c422 Binary files /dev/null and b/docs/mindfl/api/source_zh_cn/_static/logo_notebook.png differ diff --git a/docs/mindfl/api/source_zh_cn/_static/logo_source.png b/docs/mindfl/api/source_zh_cn/_static/logo_source.png new file mode 100644 index 0000000000000000000000000000000000000000..880f2bc87172daf487654c0ba4f1657c672bd2b8 Binary files /dev/null and b/docs/mindfl/api/source_zh_cn/_static/logo_source.png differ diff --git a/docs/mindfl/api/source_zh_cn/_templates/classtemplate.rst b/docs/mindfl/api/source_zh_cn/_templates/classtemplate.rst new file mode 100644 index 0000000000000000000000000000000000000000..fd88815f7b49e1cd25195fc8eceba498eafe780c --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/_templates/classtemplate.rst @@ -0,0 +1,24 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname in [] %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% elif objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/mindfl/api/source_zh_cn/_templates/classtemplate_inherited.rst b/docs/mindfl/api/source_zh_cn/_templates/classtemplate_inherited.rst new file mode 100644 index 0000000000000000000000000000000000000000..8f4a423dca6e678c191df73d142e4e52a862a3db --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/_templates/classtemplate_inherited.rst @@ -0,0 +1,26 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :inherited-members: + :members: + +{% elif fullname=="mindspore.numpy.ix_" %} + +mindspore.numpy.ix\_ +==================== + +.. autofunction:: mindspore.numpy.ix_ + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. autogenerated from _templates/classtemplate_inherited.rst \ No newline at end of file diff --git a/docs/mindfl/api/source_zh_cn/_templates/classtemplate_probability.rst b/docs/mindfl/api/source_zh_cn/_templates/classtemplate_probability.rst new file mode 100644 index 0000000000000000000000000000000000000000..6329880e1fc540de910b25d1724a2cfba8d501f2 --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/_templates/classtemplate_probability.rst @@ -0,0 +1,13 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/mindfl/api/source_zh_cn/conf.py b/docs/mindfl/api/source_zh_cn/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..170b75c94df1068d62afabf31443d034b296220e --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/conf.py @@ -0,0 +1,398 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import re +import sys +sys.path.append(os.path.abspath('./_ext')) +import sphinx.ext.autosummary.generate as g +from sphinx.ext import autodoc as sphinx_autodoc +from sphinx.util import inspect as sphinx_inspect +from sphinx.domains import python as sphinx_domain_python +from textwrap import dedent +# sys.path.insert(0, os.path.abspath('.')) + +import mindspore +# If you don't want to generate MindArmour APIs, comment this line. +# import mindarmour +# If you don't want to generate MindSpore_Hub APIs, comment this line. +# import mindspore_hub +# If you don't want to generate MindSpore_Serving APIs, comment this line. +# import mindspore_serving + +# -- Project information ----------------------------------------------------- + +project = 'MindSpore' +copyright = '2020, MindSpore' +author = 'MindSpore' + +# The full version, including alpha/beta/rc tags +release = 'master' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', + 'recommonmark', +] + +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +pygments_style = 'sphinx' + +autodoc_inherit_docstrings = False + +autosummary_generate = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +html_search_language = 'zh' + +html_search_options = {'dict': '../../resource/jieba.txt'} + +html_static_path = ['_static'] + +# -- Options for Texinfo output ------------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'python': ('https://docs.python.org/', '../python_objects.inv'), + 'numpy': ('https://docs.scipy.org/doc/numpy/', '../numpy_objects.inv'), +} + +from typing import List, Tuple +from docutils.nodes import Node + +from sphinx.locale import __ +from sphinx.ext.autosummary import Autosummary, posixpath, addnodes, logger, Matcher, autosummary_toc, get_import_prefixes_from_env +from sphinx.ext.autosummary import mock, StringList, ModuleType, get_documenter, ModuleAnalyzer, PycodeError, mangle_signature +from sphinx.ext.autosummary import import_by_name, extract_summary, autosummary_table, nodes, switch_source_input, rst +from sphinx.ext.autodoc.directive import DocumenterBridge, Options + +class MsAutosummary(Autosummary): + """ + Inherited from sphinx's autosummary, add titles and a column for the generated table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = "" + self.third_title = "" + self.default_doc = "" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for i, piece in enumerate(doc): + if piece.startswith(self.find_doc_name): + env_sum = doc[i+1][4:] + return env_sum + + def run(self): + """ + run method + """ + self.init() + self.bridge = DocumenterBridge(self.env, self.state.document.reporter, + Options(), self.lineno, self.state) + + names = [x.strip().split()[0] for x in self.content + if x.strip() and re.search(r'^[~a-zA-Z_]', x.strip()[0])] + items = self.get_items(names) + teble_nodes = self.get_table(items) + + if 'toctree' in self.options: + dirname = posixpath.dirname(self.env.docname) + + tree_prefix = self.options['toctree'].strip() + docnames = [] + excluded = Matcher(self.config.exclude_patterns) + for item in items: + docname = posixpath.join(tree_prefix, item[3]) + docname = posixpath.normpath(posixpath.join(dirname, docname)) + if docname not in self.env.found_docs: + location = self.state_machine.get_source_and_line(self.lineno) + if excluded(self.env.doc2path(docname, None)): + msg = __('autosummary references excluded document %r. Ignored.') + else: + msg = __('autosummary: stub file not found %r. ' + 'Check your autosummary_generate setting.') + logger.warning(msg, item[3], location=location) + continue + docnames.append(docname) + + if docnames: + tocnode = addnodes.toctree() + tocnode['includefiles'] = docnames + tocnode['entries'] = [(None, docn) for docn in docnames] + tocnode['maxdepth'] = -1 + tocnode['glob'] = None + teble_nodes.append(autosummary_toc('', '', tocnode)) + return teble_nodes + + def get_items(self, names: List[str]) -> List[Tuple[str, str, str, str, str]]: + """Try to import the given names, and return a list of + ``[(name, signature, summary_string, real_name, env_summary), ...]``. + """ + prefixes = get_import_prefixes_from_env(self.env) + items = [] # type: List[Tuple[str, str, str, str, str]] + max_item_chars = 50 + + for name in names: + display_name = name + if name.startswith('~'): + name = name[1:] + display_name = name.split('.')[-1] + try: + with mock(self.config.autosummary_mock_imports): + real_name, obj, parent, modname = import_by_name(name, prefixes=prefixes) + except ImportError: + logger.warning(__('failed to import %s'), name) + items.append((name, '', '', name, '')) + continue + + self.bridge.result = StringList() # initialize for each documenter + full_name = real_name + if not isinstance(obj, ModuleType): + # give explicitly separated module name, so that members + # of inner classes can be documented + full_name = modname + '::' + full_name[len(modname) + 1:] + # NB. using full_name here is important, since Documenters + # handle module prefixes slightly differently + doccls = get_documenter(self.env.app, obj, parent) + documenter = doccls(self.bridge, full_name) + + if not documenter.parse_name(): + logger.warning(__('failed to parse name %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if not documenter.import_object(): + logger.warning(__('failed to import object %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if documenter.options.members and not documenter.check_module(): + continue + + # try to also get a source code analyzer for attribute docs + try: + documenter.analyzer = ModuleAnalyzer.for_module( + documenter.get_real_modname()) + # parse right now, to get PycodeErrors on parsing (results will + # be cached anyway) + documenter.analyzer.find_attr_docs() + except PycodeError as err: + logger.debug('[autodoc] module analyzer failed: %s', err) + # no source file -- e.g. for builtin and C modules + documenter.analyzer = None + + # -- Grab the signature + + try: + sig = documenter.format_signature(show_annotation=False) + except TypeError: + # the documenter does not support ``show_annotation`` option + sig = documenter.format_signature() + + if not sig: + sig = '' + else: + max_chars = max(10, max_item_chars - len(display_name)) + sig = mangle_signature(sig, max_chars=max_chars) + + # -- Grab the summary + + documenter.add_content(None) + summary = extract_summary(self.bridge.result.data[:], self.state.document) + env_sum = self.extract_env_summary(self.bridge.result.data[:]) + items.append((display_name, sig, summary, real_name, env_sum)) + + return items + + def get_table(self, items: List[Tuple[str, str, str, str, str]]) -> List[Node]: + """Generate a proper list of table nodes for autosummary:: directive. + + *items* is a list produced by :meth:`get_items`. + """ + table_spec = addnodes.tabular_col_spec() + table_spec['spec'] = r'\X{1}{2}\X{1}{2}' + + table = autosummary_table('') + real_table = nodes.table('', classes=['longtable']) + table.append(real_table) + group = nodes.tgroup('', cols=3) + real_table.append(group) + group.append(nodes.colspec('', colwidth=10)) + group.append(nodes.colspec('', colwidth=70)) + group.append(nodes.colspec('', colwidth=30)) + body = nodes.tbody('') + group.append(body) + + def append_row(*column_texts: str) -> None: + row = nodes.row('', color="red") + source, line = self.state_machine.get_source_and_line() + for text in column_texts: + node = nodes.paragraph('') + vl = StringList() + vl.append(text, '%s:%d:' % (source, line)) + with switch_source_input(self.state, vl): + self.state.nested_parse(vl, 0, node) + try: + if isinstance(node[0], nodes.paragraph): + node = node[0] + except IndexError: + pass + row.append(nodes.entry('', node)) + body.append(row) + + # add table's title + append_row("**API Name**", "**Description**", self.third_title) + for name, sig, summary, real_name, env_sum in items: + qualifier = 'obj' + if 'nosignatures' not in self.options: + col1 = ':%s:`%s <%s>`\\ %s' % (qualifier, name, real_name, rst.escape(sig)) + else: + col1 = ':%s:`%s <%s>`' % (qualifier, name, real_name) + col2 = summary + col3 = env_sum + append_row(col1, col2, col3) + + return [table_spec, table] + + +class MsNoteAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Note` to the table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = ".. note::" + self.third_title = "**Note**" + self.default_doc = "None" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for piece in doc: + if piece.startswith(self.find_doc_name): + env_sum = piece[10:] + return env_sum + + +class MsPlatformAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Supported Platforms` to the table. + """ + def init(self): + """ + init method + """ + self.find_doc_name = "Supported Platforms:" + self.third_title = "**{}**".format(self.find_doc_name[:-1]) + self.default_doc = "To Be Developed" + + +def setup(app): + app.add_directive('msplatformautosummary', MsPlatformAutoSummary) + app.add_directive('msnoteautosummary', MsNoteAutoSummary) + +# Modify regex for sphinx.ext.autosummary.generate.find_autosummary_in_lines. +gfile_abs_path = os.path.abspath(g.__file__) +autosummary_re_line_old = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+autosummary::\s*')" +autosummary_re_line_new = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+(ms[a-z]*)?autosummary::\s*')" +with open(gfile_abs_path, "r+", encoding="utf8") as f: + data = f.read() + data = data.replace(autosummary_re_line_old, autosummary_re_line_new) + f.seek(0) + f.write(data) + +# Modify default signatures for autodoc. +autodoc_source_path = os.path.abspath(sphinx_autodoc.__file__) +inspect_source_path = os.path.abspath(sphinx_inspect.__file__) +autodoc_source_re = re.compile(r"(\s+)args = self\.format_args\(\*\*kwargs\)") +inspect_source_code_str = """signature = inspect.signature(subject)""" +inspect_target_code_str = """signature = my_signature.signature(subject)""" +autodoc_source_code_str = """args = self.format_args(**kwargs)""" +is_autodoc_code_str = """args = args.replace("'", "")""" +with open(autodoc_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if is_autodoc_code_str not in code_str: + code_str_lines = code_str.split("\n") + autodoc_target_code_str = None + for line in code_str_lines: + re_matched_str = autodoc_source_re.search(line) + if re_matched_str: + space_num = re_matched_str.group(1) + autodoc_target_code_str = dedent("""\ + {0} + {1}if type(args) != type(None): + {1} {2}""".format(autodoc_source_code_str, space_num, is_autodoc_code_str)) + break + if autodoc_target_code_str: + code_str = code_str.replace(autodoc_source_code_str, autodoc_target_code_str) + f.seek(0) + f.truncate() + f.write(code_str) +with open(inspect_source_path, "r+", encoding="utf8") as g: + code_str = g.read() + if inspect_target_code_str not in code_str: + code_str = code_str.replace(inspect_source_code_str, inspect_target_code_str) + if "import my_signature" not in code_str: + code_str = code_str.replace("import sys", "import sys\nimport my_signature") + g.seek(0) + g.truncate() + g.write(code_str) + +# remove extra space for default params for autodoc. +sphinx_domain_python_source_path = os.path.abspath(sphinx_domain_python.__file__) +python_code_source = """for argument in arglist.split(','):""" +python_code_target = """for argument in [" " + i if num > 1 else i for num,i in enumerate(arglist.split(", "))]:""" +with open(sphinx_domain_python_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if python_code_target not in code_str: + code_str = code_str.replace(python_code_source, python_code_target) + f.seek(0) + f.truncate() + f.write(code_str) diff --git a/docs/mindfl/api/source_zh_cn/index.rst b/docs/mindfl/api/source_zh_cn/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..646f9e52c3baaf9f73903fb963c2631006623a13 --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/index.rst @@ -0,0 +1,13 @@ +.. MindSpore documentation master file, created by + sphinx-quickstart on Thu Mar 24 11:00:00 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +接口参考 +======== + +.. toctree:: + :maxdepth: 1 + + mindfl_client + mindfl_server diff --git a/docs/mindfl/api/source_zh_cn/interface_description_fl_client.md b/docs/mindfl/api/source_zh_cn/interface_description_fl_client.md new file mode 100644 index 0000000000000000000000000000000000000000..6fc3d1348cd51d4bf8b39586d82b58b0e675a53b --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/interface_description_fl_client.md @@ -0,0 +1,188 @@ +# 使用示例 + +## 联邦学习启动接口flJobRun() + +调用flJobRun()接口前,需先实例化参数类FLParameter,进行相关参数设置, 相关参数如下: + +| 参数名称 | 参数类型 | 是否必须 | 描述信息 | 备注 | +| -------------- | -------- | -------- | -------------------------------------------------------- | ------------------------------------------------------------ | +| trainDataset | String | Y | 训练数据集路径 | 情感分类任务是训练数据txt文件格式;图片分类任务是训练data.bin文件与label.bin文件用逗号拼接 | +| vocabFile | String | Y | 数据预处理的词典文件路径 | 情感分类任务必须设置;图片分类任务不需要设置该参数,默认为null | +| idsFile | String | Y | 词典的映射id文件路径 | 情感分类任务必须设置;图片分类任务不需要设置该参数,默认为null | +| testDataset | String | N | 测试数据集路径 | 1. 图片分类任务不需要设置该参数,默认为null;情感分类任务不设置该参数代表训练过程中不进行验证
2.情感分类任务是测试数据txt文件格式;图片分类任务是测试data.bin文件与label.bin文件用逗号拼接 | +| flName | String | Y | 联邦学习使用的模型名称 | 情感分类任务需设置为”adbert“; lenet场景需设置为”lenet“ | +| trainModelPath | String | Y | 联邦学习使用的训练模型路径,为.ms文件的绝对路径 | | +| inferModelPath | String | Y | 联邦学习使用的推理模型路径,为.ms文件的绝对路径 | 情感分类任务必须设置;图片分类任务可设置为与trainModelPath相同 | +| flID | String | Y | 用于唯一标识客户端的ID | | +| ip | String | Y | Server端所启动服务的ip地址,形如“http://10.113.216.106:” | 后期ip+port会改为域名 | +| port | int | Y | Server端所启动服务的端口号 | 后期ip+port会改为域名 | +| useSSL | boolean | N | 端云通信是否进行ssl证书认证,默认不进行 | | + +创建SyncFLJob对象,并通过SyncFLJob类的flJobRun()方法启动同步联邦学习任务。 + +示例代码如下: + +1. 情感分类任务示例代码 + + ```java + // set parameters + String trainDataset = "SyncFLClient0604/data/adbert/client/0.txt"; //绝对路径 + String vocal_file = "SyncFLClient0604/data/adbert/vocab.txt"; //绝对路径 + String idsFile = "SyncFLClient0604/data/adbert/vocab_map_ids.txt"; //绝对路径 + String testDataset = "SyncFLClient0604/data/adbert/eval/eval.txt"; //绝对路径, 若不包含单独的测试集, 可使用训练数据作为测试集, 或不进行测试(不设置该参数) + String flName = "adbert"; + String trainModelPath = "SyncFLClient0604/ms/adbert/albert_ad_train.mindir.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/ms/adbert/albert_ad_infer.mindir.ms"; //绝对路径 + String flID = UUID.randomUUID().toString(); + String ip = "http://10.113.216.106:"; + int port = 6668; + boolean useSSL = false; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setTrainDataset(trainDataset); + flParameter.setVocabFile(vocabFile); + flParameter.setIdsFile(idsFile); + flParameter.setTestDataset(testDataset); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setFlID(flID); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseSSL(useSSL); + + // start FLJob + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.flJobRun(); + ``` + +2. Lenet图片分类任务示例代码 + + ```java + // set parameters + String trainDataset = "SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_9_train_data.bin,SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_9_train_label.bin"; //绝对路径 + String testDataset = "SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_data.bin,SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_label.bin"; //绝对路径, 若不包含单独的测试集, 可使用训练数据作为测试集, 或不进行测试(不设置该参数) + String flName = "lenet"; + String trainModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String flID = UUID.randomUUID().toString(); + String ip = "http://10.113.216.106:"; + int port = 6668; + boolean useSSL = false; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setTrainDataset(trainDataset); + flParameter.setTestDataset(testDataset); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setFlID(flID); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseSSL(useSSL); + + // start FLJob + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.flJobRun(); + ``` + +## 多条数据输入推理接口modelInference() + +### 输入参数列表 + +| 参数名称 | 参数类型 | 是否必须 | 描述信息 | 适应API版本 | +| --------- | -------- | -------- | ----------------------------------------- | --------------------------------------------------------- | +| flName | String | Y | 联邦学习使用的模型名称 | 情感分类任务需设置为”adbert“; 图片分类任务需设置为”lenet“ | +| dataPath | String | Y | 数据集路径 | 情感分类任务为txt文档格式; 图片分类任务为bin文件格式 | +| vocabFile | String | Y | 数据预处理的词典文件路径 | 情感分类任务必须设置;图片分类任务设置为null | +| idsFile | String | Y | 词典的映射id文件路径 | 情感分类任务必须设置;图片分类任务设置为null | +| modelPath | String | Y | 联邦学习推理模型路径,为.ms文件的绝对路径 | | + +创建SyncFLJob对象,并通过SyncFLJob类的modelInference()方法启动端侧推理任务,返回推理的标签数组。 + +示例代码如下: + +1. 情感分类任务示例代码 + + ```java + // set parameters + String flName = "adbert"; + String dataPath = "SyncFLClient0604/data/adbert/eval/eval.txt"; //绝对路径 + String vocal_file = "SyncFLClient0604/data/adbert/vocab.txt"; //绝对路径 + String idsFile = "SyncFLClient0604/data/adbert/vocab_map_ids.txt"; //绝对路径 + String modelPath = "SyncFLClient0604/ms/adbert/albert_ad_infer.mindir.ms"; //绝对路径 + + // inference + SyncFLJob syncFLJob = new SyncFLJob(); + int[] labels = syncFLJob.modelInference(flName, dataPath, vocal_file, idsFile, modelPath); + ``` + +2. Lenet图片分类示例代码 + + ```java + // set parameters + String flName = "lenet"; + String dataPath = "/SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_data.bin,/SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_label.bin"; //绝对路径 + String vocal_file = "null"; //绝对路径 + String idsFile = "null"; //绝对路径 + String modelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + + // inference + SyncFLJob syncFLJob = new SyncFLJob(); + int[] labels = syncFLJob.modelInference(flName, dataPath, vocal_file, idsFile, modelPath); + ``` + +## 获取云侧最新模型接口getModel () + +### 输入参数列表 + +| 参数名称 | 参数类型 | 是否必须 | 描述信息 | 适应API版本 | +| -------------- | -------- | -------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| useElb | boolean | Y | 用于设置是否模拟弹性负载均衡,true代表客户端会将请求随机发给一定范围内的server地址, false客户端的请求会发给固定的server地址。 | | +| serverNum | int | Y | 用于设置模拟弹性负载均衡时可发送请求的server数量,需与云侧启动server数量一致。 | | +| ip | String | Y | Server端所启动服务的ip地址,形如“http://10.113.216.106:” | 后期ip+port会改为域名 | +| port | int | Y | Server端所启动服务的端口号 | 后期ip+port会改为域名 | +| flName | String | Y | 联邦学习使用的模型名称 | 情感分类任务需设置为”adbert“;图片分类任务需设置为”lenet“ | +| trainModelPath | String | Y | 联邦学习使用的训练模型路径,为.ms文件的绝对路径 | | +| inferModelPath | String | Y | 联邦学习使用的推理模型路径,为.ms文件的绝对路径 | 情感分类任务必须设置;图片分类任务可设置为与trainModelPath相同 | +| useSSL | boolean | Y | 端云通信是否进行ssl证书认证,默认不进行 | | + +创建SyncFLJob对象,并通过SyncFLJob类的getModel()方法启动异步推理任务,返回getModel请求状态码。 + +示例代码如下: + +1. 情感分类任务版本 + + ```java + // set parameters + boolean useElb = false; + int serverNum = 1; + String ip = "http://10.113.216.106:"; + int port = 6668; + String flName = "adbert"; //情感分类任务场景需设置为”adbert“, lenet图片分类任务场景需设置为”lenet“ + String trainModelPath = "SyncFLClient0604/ms/adbert/albert_ad_train.mindir.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/ms/adbert/albert_ad_infer.mindir.ms"; //绝对路径 + boolean useSSL = false; + + // getModel + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.getModel(useElb, serverNum, ip, port, flName, trainModelPath, inferModelPath, useSSL); + ``` + +2. Lenet图片分类任务版本 + + ```java + // set parameters + boolean useElb = false; + int serverNum = 1; + String ip = "http://10.113.216.106:"; + int port = 6668; + String flName = "lenet"; //端大脑场景需设置为”adbert“, lenet场景需设置为”lenet“ + String trainModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + boolean useSSL = false; + + // getModel + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.getModel(useElb, serverNum, ip, port, flName, trainModelPath, inferModelPath, useSSL); + ``` diff --git a/docs/mindfl/api/source_zh_cn/java_api_flparameter.md b/docs/mindfl/api/source_zh_cn/java_api_flparameter.md new file mode 100644 index 0000000000000000000000000000000000000000..253d172cea5213f7a7596a6a9ab866ecc8821efe --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/java_api_flparameter.md @@ -0,0 +1,478 @@ +# FLParaMeter + +```java +import com.huawei.flclient.FLParameter +``` + +FLParameter定义联邦学习相关参数,供用户进行设置。 + +## 公有成员函数 + +| **function** | +| ---------------------------------------------------- | +| public static synchronized FLParameter getInstance() | +| public String getHostName() | +| public void setHostName(String hostName) | +| public String getCertPath() | +| public void setCertPath(String certPath) | +| public String getTrainDataset() | +| public void setTrainDataset(String trainDataset) | +| public String getVocabFile() | +| public void setVocabFile(String vocabFile) | +| public String getIdsFile() | +| public void setIdsFile(String idsFile) | +| public String getTestDataset() | +| public void setTestDataset(String testDataset) | +| public String getFlName() | +| public void setFlName(String flName) | +| public String getTrainModelPath() | +| public void setTrainModelPath(String trainModelPath) | +| public String getInferModelPath() | +| public void setInferModelPath(String inferModelPath) | +| public String getIp() | +| public void setIp(String ip) | +| public boolean isUseSSL() | +| public void setUseSSL(boolean useSSL) | +| public int getPort() | +| public void setPort(int port) | +| public int getTimeOut() | +| public void setTimeOut(int timeOut) | +| public int getSleepTime() | +| public void setSleepTime(int sleepTime) | +| public boolean isUseElb() | +| public void setUseElb(boolean useElb) | +| public int getServerNum() | +| public void setServerNum(int serverNum) | +| public boolean isPkiVerify() | +| public void setPkiVerify(boolean pkiVerify) | +| public String getClientID() | +| public void setClientID(String clientID) | + +## getInstance + +```java +public static synchronized FLParameter getInstance() +``` + +获取FLParameter单例。 + +- 返回值 + + FLParameter类型的单例对象。 + +## getHostName + +```java +public String getHostName() +``` + +获取用户设置的域名hostName。 + +- 返回值 + + String类型的域名。 + +## setHostName + +```java +public void setHostName(String hostName) +``` + +用于设置域名hostName。 + +- 参数 + + - `hostName`: 域名。 + +## getCertPath + +```java +public String getCertPath() +``` + +获取用户设置的证书路径certPath。 + +- 返回值 + + String类型的证书路径certPath。 + +## setCertPath + +```java +public void setCertPath(String certPath) +``` + +用于设置证书路径certPath。 + +- 参数 + - `certPath`: 证书路径。 + +## getTrainDataset + +```java +public String getTrainDataset() +``` + +获取用户设置的训练数据集路径trainDataset。 + +- 返回值 + + String类型的训练数据集路径trainDataset。 + +## setTrainDataset + +```java +public void setTrainDataset(String trainDataset) +``` + +用于设置训练数据集路径trainDataset。 + +- 参数 + - `trainDataset`: 训练数据集路径。 + +## getVocabFile + +```java +public String getVocabFile() +``` + +用于获取用户设置的数据预处理的词典文件路径vocabFile。 + +- 返回值 + + String类型的数据预处理的词典文件路径vocabFile。 + +## setVocabFile + +```java +public void setVocabFile(String vocabFile) +``` + +设置数据预处理的词典文件路径VocabFile。 + +- 参数 + - `vocabFile`: 数据预处理的词典文件路径。 + +## getIdsFile + +```java +public String getIdsFile() +``` + +用于获取用户设置的词典的映射id文件路径idsFile。 + +- 返回值 + + String类型的词典的映射id文件路径idsFile。 + +## setIdsFile + +```java +public void setIdsFile(String idsFile) +``` + +设置词典的映射id文件路径idsFile。 + +- 参数 + + - `vocabFile`: 词典的映射id文件路径。 + +## getTestDataset + +```java +public String getTestDataset() +``` + +用于获取用户设置的测试数据集路径testDataset。 + +- 返回值 + + String类型的测试数据集路径testDataset。 + +## setTestDataset + +```java +public void setTestDataset(String testDataset) +``` + +设置测试数据集路径testDataset。 + +- 参数 + - `testDataset`: 测试数据集路径。 + +## getFlName + +```java +public String getFlName() +``` + +用于获取用户设置的模型名称flName。 + +- 返回值 + + String类型的模型名称flName。 + +## setFlName + +```java +public void setFlName(String flName) +``` + +设置模型名称flName。 + +- 参数 + - `flName`: 模型名称。 + +## getTrainModelPath + +```java +public String getTrainModelPath() +``` + +用于获取用户设置的训练模型路径trainModelPath。 + +- 返回值 + + String类型的训练模型路径trainModelPath。 + +## setTrainModelPath + +```java +public void setTrainModelPath(String trainModelPath) +``` + +设置训练模型路径trainModelPath。 + +- 参数 + - `flName`: 训练模型路径。 + +## getInferModelPath + +```java +public String getInferModelPath() +``` + +用于获取用户设置的推理模型路径inferModelPath。 + +- 返回值 + + String类型的推理模型路径inferModelPath。 + +## setInferModelPath + +```java +public void setInferModelPath(String inferModelPath) +``` + +设置推理模型路径inferModelPath。 + +- 参数 + - `inferModelPath`: 推理模型路径。 + +## getIp + +```java +public String getIp() +``` + +用于获取用户设置的端云通信的ip地址。 + +- 返回值 + + String类型的ip地址。 + +## setIp + +```java +public void setIp(String ip) +``` + +设置端云通信的ip地址。 + +- 参数 + - `ip`: 端云通信的ip地址。 + +## isUseSSL + +```java +public boolean isUseSSL() +``` + +端云通信是否进行ssl证书认证。 + +- 返回值 + + boolean类型,true代表进行ssl证书认证, false代表不进行ssl证书认证。 + +## setUseSSL + +```java +public void setUseSSL(boolean useSSL) +``` + +用于设置端云通信是否进行ssl证书认证。 + +- 参数 + - `useSSL`: 端云通信是否进行ssl证书认证。 + +## getPort + +```java +public int getPort() +``` + +用于获取用户设置的端云通信的端口号port。 + +- 返回值 + + int类型的端云通信的端口号port。 + +## setPort + +```java +public void setPort(int port) +``` + +用于设置端云通信的端口号port。 + +- 参数 + - `port`: 端云通信的端口号。 + +## getTimeOut + +```java +public int getTimeOut() +``` + +用于获取用户设置的端侧通信的超时时间timeOut。 + +- 返回值 + + int类型的端侧通信的超时时间timeOut。 + +## setTimeOut + +```java +public void setTimeOut(int timeOut) +``` + +用于设置端侧通信的超时时间timeOut。 + +- 参数 + - `timeOut`: 端侧通信的超时时间。 + +## getSleepTime + +```java +public int getSleepTime() +``` + +用于获取用户设置的重复请求的等待时间sleepTime。 + +- 返回值 + + int类型的重复请求的等待时间sleepTime。 + +## setSleepTime + +```java +public void setSleepTime(int sleepTime) +``` + +用于设置重复请求的等待时间sleepTime。 + +- 参数 + - `sleepTime`: 重复请求的等待时间。 + +## isUseElb + +```java +public boolean isUseElb() +``` + +是否模拟弹性负载均衡,即客户端将请求随机发给一定范围内的server地址。 + +- 返回值 + + boolean类型,true代表客户端会将请求随机发给一定范围内的server地址, false客户端的请求会发给固定的server地址。 + +## setUseElb + +```java +public void setUseElb(boolean useElb) +``` + +用于设置是否模拟弹性负载均衡,即客户端将请求随机发给一定范围内的server地址。 + +- 参数 + - `useElb`: 是否模拟弹性负载均衡,默认为false。 + +## getServerNum + +```java +public int getServerNum() +``` + +用于获取用户设置的模拟弹性负载均衡时可发送请求的server数量。 + +- 返回值 + + int类型的模拟弹性负载均衡时可发送请求的server数量。 + +## setServerNum + +```java +public void setServerNum(int serverNum) +``` + +用于设置模拟弹性负载均衡时可发送请求的server数量。 + +- 参数 + - `serverNum`: 模拟弹性负载均衡时可发送请求的server数量,默认为1。 + +## isPkiVerify + +```java +public boolean isPkiVerify() +``` + +是否进行端云认证。 + +- 返回值 + + boolean类型,true代表进行端云认证,false代表不进行端云认证。 + +## setPkiVerify + +```java +public void setPkiVerify(boolean pkiVerify) +``` + +用于设置是否进行端云认证。 + +- 参数 + + - `pkiVerify`: 是否进行端云认证。 + +## getClientID + +```java +public String getClientID() +``` + +用于获取用户设置的唯一标识客户端的ID。 + +- 返回值 + + String类型的唯一标识客户端的ID。 + +## setClientID + +```java +public void setClientID(String clientID) +``` + +用于设置唯一标识客户端的ID。 + +- 参数 + - `clientID`: 唯一标识客户端的ID。 \ No newline at end of file diff --git a/docs/mindfl/api/source_zh_cn/java_api_syncfljob.md b/docs/mindfl/api/source_zh_cn/java_api_syncfljob.md new file mode 100644 index 0000000000000000000000000000000000000000..6e5f10d681d40b38dd292745bfdd9c550fc1ad78 --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/java_api_syncfljob.md @@ -0,0 +1,64 @@ +# SyncFLJob + +```java +import com.huawei.flclient.SyncFLJob +``` + +SyncFLJob定义了端侧联邦学习启动接口flJobRun()、端侧推理接口modelInference()、获取云侧最新模型的接口getModel ()。 + +## 公有成员函数 + +| **function** | +| ------------------------------------------------------------ | +| public void flJobRun() | +| public int[] modelInference(String flName, String dataPath, String vocabFile, String idsFile, String modelPath) | +| public FLClientStatus getModel(String ip, int port, String flName, String trainModelPath, String inferModelPath, boolean useSSL) | + +## flJobRun + +```java +public void flJobRun() +``` + +启动端侧联邦学习任务。 + +## modelInference + +```java +public int[] modelInference(String flName, String dataPath, String vocabFile, String idsFile, String modelPath) +``` + +启动端侧推理任务。 + +- 参数 + + - `flName`: 联邦学习使用的模型名称, 情感分类任务需设置为”adbert“; 图片分类任务需设置为”lenet“。 + - `dataPath`: 数据集路径,情感分类任务为txt文档格式; 图片分类任务为bin文件格式。 + - `vocabFile`: 数据预处理的词典文件路径, 情感分类任务必须设置;图片分类任务设置为null。 + - `idsFile`: 词典的映射id文件路径, 情感分类任务必须设置;图片分类任务设置为null。 + - `modelPath`: 联邦学习推理模型路径,为.ms文件的绝对路径。 + +- 返回值 + + 根据输入推理出的标签组成的int[]。 + +## getModel + +```java +public FLClientStatus getModel(boolean useElb, int serverNum, String ip, int port, String flName, String trainModelPath, String inferModelPath, boolean useSSL) +``` + +获取云侧最新模型。 + +- 参数 + - `useElb`: 用于设置是否模拟弹性负载均衡,true代表客户端会将请求随机发给一定范围内的server地址, false客户端的请求会发给固定的server地址,默认为false。 + - `serverNum`: 用于设置模拟弹性负载均衡时可发送请求的server数量,默认为1。 + - `ip`: Server端所启动服务的ip地址,形如“http://10.113.216.106:”。 + - `port`: Server端所启动服务的端口号。 + - `flName`: 联邦学习使用的模型名称, 情感分类任务需设置为”adbert“; 图片分类任务需设置为”lenet“。 + - `trainModelPath`: 联邦学习使用的训练模型路径,为.ms文件的绝对路径。 + - `inferModelPath`: 联邦学习使用的推理模型路径,为.ms文件的绝对路径。 + - `useSSL`: 端云通信是否进行ssl证书认证,默认不进行。 +- 返回值 + + 返回getModel请求状态码。 \ No newline at end of file diff --git a/docs/mindfl/api/source_zh_cn/mindfl_client.rst b/docs/mindfl/api/source_zh_cn/mindfl_client.rst new file mode 100644 index 0000000000000000000000000000000000000000..c0c903d32b5698579f2743d01df3243277e03443 --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/mindfl_client.rst @@ -0,0 +1,8 @@ +.. toctree:: + :maxdepth: 1 + + java_api_flparameter + java_api_syncfljob + interface_description_fl_client + + diff --git a/docs/mindfl/api/source_zh_cn/mindfl_server.rst b/docs/mindfl/api/source_zh_cn/mindfl_server.rst new file mode 100644 index 0000000000000000000000000000000000000000..064876f3649a940d1b9720ec909b4328f3115f78 --- /dev/null +++ b/docs/mindfl/api/source_zh_cn/mindfl_server.rst @@ -0,0 +1,12 @@ +mindspore.context.set_fl_context +-------------------------------- + +.. autofunction:: mindspore.context.set_fl_context + +mindspore.context.get_fl_context +-------------------------------- + +.. autofunction:: mindspore.context.get_fl_context + + + diff --git a/docs/mindfl/docs/Makefile b/docs/mindfl/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1eff8952707bdfa503c8d60c1e9a903053170ba2 --- /dev/null +++ b/docs/mindfl/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source_zh_cn +BUILDDIR = build_zh_cn + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/mindfl/docs/requirements.txt b/docs/mindfl/docs/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea17a9e73613ddd99cc31690ddcf283d9a721450 --- /dev/null +++ b/docs/mindfl/docs/requirements.txt @@ -0,0 +1,5 @@ +sphinx >= 2.2.1, <= 2.4.4 +recommonmark +sphinx-markdown-tables +sphinx_rtd_theme +jieba \ No newline at end of file diff --git a/docs/mindfl/docs/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc b/docs/mindfl/docs/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..516af30c9a197f89a41eb556f5f38d22e72d6668 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc differ diff --git a/docs/mindfl/docs/source_zh_cn/_ext/my_signature.py b/docs/mindfl/docs/source_zh_cn/_ext/my_signature.py new file mode 100644 index 0000000000000000000000000000000000000000..7eb6cec6f4607af6b66dfe3d460b7e57d39e20d5 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/_ext/my_signature.py @@ -0,0 +1,354 @@ +""" +Rewrote the Signature module that fix default signature error for autodoc module. +""" + +import inspect +import re +import types +import functools + + +def _sort_param(param_list, target_str): + """Sort param_list as default order.""" + ls = [] + for param_name in param_list: + ls.append((param_name, target_str.find(param_name))) + ls.sort(key=lambda x: x[1], reverse=False) + ls = [i[0] for i in ls] + return ls + + +def get_default_params(func): + """ Get the default signatures from function. """ + source_code = inspect.getsource(func) + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + karg_pos = func_code.co_kwonlyargcount + kwargs_num = arg_names.count("args") + arg_names.count("kwargs") + all_param_names = list(arg_names[:pos_count+karg_pos+kwargs_num]) + all_params = re.findall(r"def [\w_\d\-]+\(([\S\s]*?)\):", source_code)[0].replace("\n", "").replace("'", "\"") + + # sub null spaces from matched all param str. + re_space_sub = re.compile(r",\s+") + all_params = re_space_sub.sub(",", all_params) + + all_param_names = _sort_param(all_param_names, all_params) + + # sub the extra "=" from param. + re_equate_sub = re.compile("=") + + re_defaults_param = re.compile(r"(.*?)".join(all_param_names) + r"(.*)") + defaults_params = re_defaults_param.findall(all_params) + if defaults_params: + if isinstance(defaults_params[0], tuple): + defaults_params = list([i[:-2] if i[-2:] == "**" else i for i in defaults_params[0]]) + defaults_params_list = [] + for i in defaults_params: + if "=" in i and i: + i = re_equate_sub.sub("", i, count=1).strip(",") + if i[:6] == "lambda": + i = "<" + i + ">" + defaults_params_list.append(i) + defaults_params_tuple = tuple(defaults_params_list) + return defaults_params_tuple + return func.__defaults__ + + +def _my_signature_from_function(cls, func): + """Private helper: constructs Signature for the given python function.""" + + is_duck_function = False + if not inspect.isfunction(func): + if inspect._signature_is_functionlike(func): # pylint: disable=protected-access + is_duck_function = True + else: + # If it's not a pure Python function, and not a duck type + # of pure function: + raise TypeError('{!r} is not a Python function'.format(func)) + + Parameter = cls._parameter_cls # pylint: disable=protected-access + + # Parameter information._partialmethod + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + positional = tuple(arg_names[:pos_count]) + keyword_only_count = func_code.co_kwonlyargcount + keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)] + annotations = func.__annotations__ + defaults = get_default_params(func) + if keyword_only_count == len(defaults): + kwdefaults = dict() + for num, arg_name in enumerate(keyword_only): + kwdefaults[arg_name] = defaults[num] + else: + kwdefaults = func.__kwdefaults__ + pos_defaults = func.__defaults__ + + if pos_defaults: + pos_default_count = len(pos_defaults) + else: + pos_default_count = 0 + + parameters = [] + + # Non-keyword-only parameters w/o defaults. + non_default_count = pos_count - pos_default_count + for name in positional[:non_default_count]: + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD)) # pylint: disable=protected-access + + # ... w/ defaults. + for offset, name in enumerate(positional[non_default_count:]): + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD, # pylint: disable=protected-access + default=defaults[offset])) + + # *args + if func_code.co_flags & inspect.CO_VARARGS: + name = arg_names[pos_count + keyword_only_count] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_POSITIONAL)) # pylint: disable=protected-access + + # Keyword-only parameters. + for name in keyword_only: + default = inspect._empty # pylint: disable=protected-access + if kwdefaults is not None: + default = kwdefaults.get(name, inspect._empty) # pylint: disable=protected-access + + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._KEYWORD_ONLY, # pylint: disable=protected-access + default=default)) + # **kwargs + if func_code.co_flags & inspect.CO_VARKEYWORDS: + index = pos_count + keyword_only_count + if func_code.co_flags & inspect.CO_VARARGS: + index += 1 + + name = arg_names[index] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_KEYWORD)) # pylint: disable=protected-access + + # Is 'func' is a pure Python function - don't validate the + # parameters list (for correct order and defaults), it should be OK. + return cls(parameters, + return_annotation=annotations.get('return', inspect._empty), # pylint: disable=protected-access + __validate_parameters__=is_duck_function) + + +def _my_signature_from_callable(obj, *, + follow_wrapper_chains=True, + skip_bound_arg=True, + sigcls): + """Private helper function to get signature for arbitrary + callable objects. + """ + + if not callable(obj): + raise TypeError('{!r} is not a callable object'.format(obj)) + + if isinstance(obj, types.MethodType): + # In this case we skip the first parameter of the underlying + # function (usually `self` or `cls`). + sig = _my_signature_from_callable( + obj.__func__, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + # Was this function wrapped by a decorator? + if follow_wrapper_chains: + obj = inspect.unwrap(obj, stop=(lambda f: hasattr(f, "__signature__"))) + if isinstance(obj, types.MethodType): + # If the unwrapped object is a *method*, we might want to + # skip its first parameter (self). + # See test_signature_wrapped_bound_method for details. + return _my_signature_from_callable( + obj, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + try: + sig = obj.__signature__ + except AttributeError: + pass + else: + if sig is not None: + if not isinstance(sig, MySignature): + raise TypeError( + 'unexpected object {!r} in __signature__ ' + 'attribute'.format(sig)) + return sig + + try: + partialmethod = obj._partialmethod # pylint: disable=protected-access + except AttributeError: + pass + else: + if isinstance(partialmethod, functools.partialmethod): + # Unbound partialmethod (see functools.partialmethod) + # This means, that we need to calculate the signature + # as if it's a regular partial object, but taking into + # account that the first positional argument + # (usually `self`, or `cls`) will not be passed + # automatically (as for boundmethods) + + wrapped_sig = _my_signature_from_callable( + partialmethod.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + sig = inspect._signature_get_partial(wrapped_sig, partialmethod, (None,)) # pylint: disable=protected-access + first_wrapped_param = tuple(wrapped_sig.parameters.values())[0] + if first_wrapped_param.kind is Parameter.VAR_POSITIONAL: # pylint: disable=no-else-return + # First argument of the wrapped callable is `*args`, as in + # `partialmethod(lambda *args)`. + return sig + else: + sig_params = tuple(sig.parameters.values()) + assert (not sig_params or + first_wrapped_param is not sig_params[0]) + new_params = (first_wrapped_param,) + sig_params + return sig.replace(parameters=new_params) + + if inspect.isfunction(obj) or inspect._signature_is_functionlike(obj): # pylint: disable=protected-access + # If it's a pure Python function, or an object that is duck type + # of a Python function (Cython functions, for instance), then: + return _my_signature_from_function(sigcls, obj) + + if inspect._signature_is_builtin(obj): # pylint: disable=protected-access + return inspect._signature_from_builtin(sigcls, obj, # pylint: disable=protected-access + skip_bound_arg=skip_bound_arg) + + if isinstance(obj, functools.partial): + wrapped_sig = _my_signature_from_callable( + obj.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + return inspect._signature_get_partial(wrapped_sig, obj) # pylint: disable=protected-access + + sig = None + if isinstance(obj, type): + # obj is a class or a metaclass + + # First, let's see if it has an overloaded __call__ defined + # in its metaclass + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Now we check if the 'obj' class has a '__new__' method + new = inspect._signature_get_user_defined_method(obj, '__new__') # pylint: disable=protected-access + if new is not None: + sig = _my_signature_from_callable( + new, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Finally, we should have at least __init__ implemented + init = inspect._signature_get_user_defined_method(obj, '__init__') # pylint: disable=protected-access + if init is not None: + sig = _my_signature_from_callable( + init, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if sig is None: + # At this point we know, that `obj` is a class, with no user- + # defined '__init__', '__new__', or class-level '__call__' + + for base in obj.__mro__[:-1]: + # Since '__text_signature__' is implemented as a + # descriptor that extracts text signature from the + # class docstring, if 'obj' is derived from a builtin + # class, its own '__text_signature__' may be 'None'. + # Therefore, we go through the MRO (except the last + # class in there, which is 'object') to find the first + # class with non-empty text signature. + try: + text_sig = base.__text_signature__ + except AttributeError: + pass + else: + if text_sig: + # If 'obj' class has a __text_signature__ attribute: + # return a signature based on it + return inspect._signature_fromstr(sigcls, obj, text_sig) # pylint: disable=protected-access + + # No '__text_signature__' was found for the 'obj' class. + # Last option is to check if its '__init__' is + # object.__init__ or type.__init__. + if type not in obj.__mro__: + # We have a class (not metaclass), but no user-defined + # __init__ or __new__ for it + if (obj.__init__ is object.__init__ and # pylint: disable=no-else-return + obj.__new__ is object.__new__): + # Return a signature of 'object' builtin. + return sigcls.from_callable(object) + else: + raise ValueError( + 'no signature found for builtin type {!r}'.format(obj)) + + elif not isinstance(obj, inspect._NonUserDefinedCallables): # pylint: disable=protected-access + # An object with __call__ + # We also check that the 'obj' is not an instance of + # _WrapperDescriptor or _MethodWrapper to avoid + # infinite recursion (and even potential segfault) + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + try: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + except ValueError as ex: + msg = 'no signature found for {!r}'.format(obj) + raise ValueError(msg) from ex + + if sig is not None: + # For classes and objects we skip the first parameter of their + # __call__, __new__, or __init__ methods + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + if isinstance(obj, types.BuiltinFunctionType): + # Raise a nicer error message for builtins + msg = 'no signature found for builtin function {!r}'.format(obj) + raise ValueError(msg) + + raise ValueError('callable {!r} is not supported by signature'.format(obj)) + + +class MySignature(inspect.Signature): + + @classmethod + def from_callable(cls, obj, *, follow_wrapped=True): + """Constructs Signature for the given callable object.""" + return _my_signature_from_callable(obj, sigcls=cls, + follow_wrapper_chains=follow_wrapped) + + +def signature(obj, *, follow_wrapped=True): + """Get a signature object for the passed callable.""" + return MySignature.from_callable(obj, follow_wrapped=follow_wrapped) diff --git a/docs/mindfl/docs/source_zh_cn/_static/logo_notebook.png b/docs/mindfl/docs/source_zh_cn/_static/logo_notebook.png new file mode 100644 index 0000000000000000000000000000000000000000..18c2e29e4b73ee428f70253feffdd855fdf0c422 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/_static/logo_notebook.png differ diff --git a/docs/mindfl/docs/source_zh_cn/_static/logo_source.png b/docs/mindfl/docs/source_zh_cn/_static/logo_source.png new file mode 100644 index 0000000000000000000000000000000000000000..880f2bc87172daf487654c0ba4f1657c672bd2b8 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/_static/logo_source.png differ diff --git a/docs/mindfl/docs/source_zh_cn/_templates/classtemplate.rst b/docs/mindfl/docs/source_zh_cn/_templates/classtemplate.rst new file mode 100644 index 0000000000000000000000000000000000000000..fd88815f7b49e1cd25195fc8eceba498eafe780c --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/_templates/classtemplate.rst @@ -0,0 +1,24 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname in [] %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% elif objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/mindfl/docs/source_zh_cn/_templates/classtemplate_inherited.rst b/docs/mindfl/docs/source_zh_cn/_templates/classtemplate_inherited.rst new file mode 100644 index 0000000000000000000000000000000000000000..8f4a423dca6e678c191df73d142e4e52a862a3db --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/_templates/classtemplate_inherited.rst @@ -0,0 +1,26 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :inherited-members: + :members: + +{% elif fullname=="mindspore.numpy.ix_" %} + +mindspore.numpy.ix\_ +==================== + +.. autofunction:: mindspore.numpy.ix_ + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. autogenerated from _templates/classtemplate_inherited.rst \ No newline at end of file diff --git a/docs/mindfl/docs/source_zh_cn/_templates/classtemplate_probability.rst b/docs/mindfl/docs/source_zh_cn/_templates/classtemplate_probability.rst new file mode 100644 index 0000000000000000000000000000000000000000..6329880e1fc540de910b25d1724a2cfba8d501f2 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/_templates/classtemplate_probability.rst @@ -0,0 +1,13 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/mindfl/docs/source_zh_cn/conf.py b/docs/mindfl/docs/source_zh_cn/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..170b75c94df1068d62afabf31443d034b296220e --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/conf.py @@ -0,0 +1,398 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import re +import sys +sys.path.append(os.path.abspath('./_ext')) +import sphinx.ext.autosummary.generate as g +from sphinx.ext import autodoc as sphinx_autodoc +from sphinx.util import inspect as sphinx_inspect +from sphinx.domains import python as sphinx_domain_python +from textwrap import dedent +# sys.path.insert(0, os.path.abspath('.')) + +import mindspore +# If you don't want to generate MindArmour APIs, comment this line. +# import mindarmour +# If you don't want to generate MindSpore_Hub APIs, comment this line. +# import mindspore_hub +# If you don't want to generate MindSpore_Serving APIs, comment this line. +# import mindspore_serving + +# -- Project information ----------------------------------------------------- + +project = 'MindSpore' +copyright = '2020, MindSpore' +author = 'MindSpore' + +# The full version, including alpha/beta/rc tags +release = 'master' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', + 'recommonmark', +] + +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +pygments_style = 'sphinx' + +autodoc_inherit_docstrings = False + +autosummary_generate = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +html_search_language = 'zh' + +html_search_options = {'dict': '../../resource/jieba.txt'} + +html_static_path = ['_static'] + +# -- Options for Texinfo output ------------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'python': ('https://docs.python.org/', '../python_objects.inv'), + 'numpy': ('https://docs.scipy.org/doc/numpy/', '../numpy_objects.inv'), +} + +from typing import List, Tuple +from docutils.nodes import Node + +from sphinx.locale import __ +from sphinx.ext.autosummary import Autosummary, posixpath, addnodes, logger, Matcher, autosummary_toc, get_import_prefixes_from_env +from sphinx.ext.autosummary import mock, StringList, ModuleType, get_documenter, ModuleAnalyzer, PycodeError, mangle_signature +from sphinx.ext.autosummary import import_by_name, extract_summary, autosummary_table, nodes, switch_source_input, rst +from sphinx.ext.autodoc.directive import DocumenterBridge, Options + +class MsAutosummary(Autosummary): + """ + Inherited from sphinx's autosummary, add titles and a column for the generated table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = "" + self.third_title = "" + self.default_doc = "" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for i, piece in enumerate(doc): + if piece.startswith(self.find_doc_name): + env_sum = doc[i+1][4:] + return env_sum + + def run(self): + """ + run method + """ + self.init() + self.bridge = DocumenterBridge(self.env, self.state.document.reporter, + Options(), self.lineno, self.state) + + names = [x.strip().split()[0] for x in self.content + if x.strip() and re.search(r'^[~a-zA-Z_]', x.strip()[0])] + items = self.get_items(names) + teble_nodes = self.get_table(items) + + if 'toctree' in self.options: + dirname = posixpath.dirname(self.env.docname) + + tree_prefix = self.options['toctree'].strip() + docnames = [] + excluded = Matcher(self.config.exclude_patterns) + for item in items: + docname = posixpath.join(tree_prefix, item[3]) + docname = posixpath.normpath(posixpath.join(dirname, docname)) + if docname not in self.env.found_docs: + location = self.state_machine.get_source_and_line(self.lineno) + if excluded(self.env.doc2path(docname, None)): + msg = __('autosummary references excluded document %r. Ignored.') + else: + msg = __('autosummary: stub file not found %r. ' + 'Check your autosummary_generate setting.') + logger.warning(msg, item[3], location=location) + continue + docnames.append(docname) + + if docnames: + tocnode = addnodes.toctree() + tocnode['includefiles'] = docnames + tocnode['entries'] = [(None, docn) for docn in docnames] + tocnode['maxdepth'] = -1 + tocnode['glob'] = None + teble_nodes.append(autosummary_toc('', '', tocnode)) + return teble_nodes + + def get_items(self, names: List[str]) -> List[Tuple[str, str, str, str, str]]: + """Try to import the given names, and return a list of + ``[(name, signature, summary_string, real_name, env_summary), ...]``. + """ + prefixes = get_import_prefixes_from_env(self.env) + items = [] # type: List[Tuple[str, str, str, str, str]] + max_item_chars = 50 + + for name in names: + display_name = name + if name.startswith('~'): + name = name[1:] + display_name = name.split('.')[-1] + try: + with mock(self.config.autosummary_mock_imports): + real_name, obj, parent, modname = import_by_name(name, prefixes=prefixes) + except ImportError: + logger.warning(__('failed to import %s'), name) + items.append((name, '', '', name, '')) + continue + + self.bridge.result = StringList() # initialize for each documenter + full_name = real_name + if not isinstance(obj, ModuleType): + # give explicitly separated module name, so that members + # of inner classes can be documented + full_name = modname + '::' + full_name[len(modname) + 1:] + # NB. using full_name here is important, since Documenters + # handle module prefixes slightly differently + doccls = get_documenter(self.env.app, obj, parent) + documenter = doccls(self.bridge, full_name) + + if not documenter.parse_name(): + logger.warning(__('failed to parse name %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if not documenter.import_object(): + logger.warning(__('failed to import object %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if documenter.options.members and not documenter.check_module(): + continue + + # try to also get a source code analyzer for attribute docs + try: + documenter.analyzer = ModuleAnalyzer.for_module( + documenter.get_real_modname()) + # parse right now, to get PycodeErrors on parsing (results will + # be cached anyway) + documenter.analyzer.find_attr_docs() + except PycodeError as err: + logger.debug('[autodoc] module analyzer failed: %s', err) + # no source file -- e.g. for builtin and C modules + documenter.analyzer = None + + # -- Grab the signature + + try: + sig = documenter.format_signature(show_annotation=False) + except TypeError: + # the documenter does not support ``show_annotation`` option + sig = documenter.format_signature() + + if not sig: + sig = '' + else: + max_chars = max(10, max_item_chars - len(display_name)) + sig = mangle_signature(sig, max_chars=max_chars) + + # -- Grab the summary + + documenter.add_content(None) + summary = extract_summary(self.bridge.result.data[:], self.state.document) + env_sum = self.extract_env_summary(self.bridge.result.data[:]) + items.append((display_name, sig, summary, real_name, env_sum)) + + return items + + def get_table(self, items: List[Tuple[str, str, str, str, str]]) -> List[Node]: + """Generate a proper list of table nodes for autosummary:: directive. + + *items* is a list produced by :meth:`get_items`. + """ + table_spec = addnodes.tabular_col_spec() + table_spec['spec'] = r'\X{1}{2}\X{1}{2}' + + table = autosummary_table('') + real_table = nodes.table('', classes=['longtable']) + table.append(real_table) + group = nodes.tgroup('', cols=3) + real_table.append(group) + group.append(nodes.colspec('', colwidth=10)) + group.append(nodes.colspec('', colwidth=70)) + group.append(nodes.colspec('', colwidth=30)) + body = nodes.tbody('') + group.append(body) + + def append_row(*column_texts: str) -> None: + row = nodes.row('', color="red") + source, line = self.state_machine.get_source_and_line() + for text in column_texts: + node = nodes.paragraph('') + vl = StringList() + vl.append(text, '%s:%d:' % (source, line)) + with switch_source_input(self.state, vl): + self.state.nested_parse(vl, 0, node) + try: + if isinstance(node[0], nodes.paragraph): + node = node[0] + except IndexError: + pass + row.append(nodes.entry('', node)) + body.append(row) + + # add table's title + append_row("**API Name**", "**Description**", self.third_title) + for name, sig, summary, real_name, env_sum in items: + qualifier = 'obj' + if 'nosignatures' not in self.options: + col1 = ':%s:`%s <%s>`\\ %s' % (qualifier, name, real_name, rst.escape(sig)) + else: + col1 = ':%s:`%s <%s>`' % (qualifier, name, real_name) + col2 = summary + col3 = env_sum + append_row(col1, col2, col3) + + return [table_spec, table] + + +class MsNoteAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Note` to the table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = ".. note::" + self.third_title = "**Note**" + self.default_doc = "None" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for piece in doc: + if piece.startswith(self.find_doc_name): + env_sum = piece[10:] + return env_sum + + +class MsPlatformAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Supported Platforms` to the table. + """ + def init(self): + """ + init method + """ + self.find_doc_name = "Supported Platforms:" + self.third_title = "**{}**".format(self.find_doc_name[:-1]) + self.default_doc = "To Be Developed" + + +def setup(app): + app.add_directive('msplatformautosummary', MsPlatformAutoSummary) + app.add_directive('msnoteautosummary', MsNoteAutoSummary) + +# Modify regex for sphinx.ext.autosummary.generate.find_autosummary_in_lines. +gfile_abs_path = os.path.abspath(g.__file__) +autosummary_re_line_old = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+autosummary::\s*')" +autosummary_re_line_new = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+(ms[a-z]*)?autosummary::\s*')" +with open(gfile_abs_path, "r+", encoding="utf8") as f: + data = f.read() + data = data.replace(autosummary_re_line_old, autosummary_re_line_new) + f.seek(0) + f.write(data) + +# Modify default signatures for autodoc. +autodoc_source_path = os.path.abspath(sphinx_autodoc.__file__) +inspect_source_path = os.path.abspath(sphinx_inspect.__file__) +autodoc_source_re = re.compile(r"(\s+)args = self\.format_args\(\*\*kwargs\)") +inspect_source_code_str = """signature = inspect.signature(subject)""" +inspect_target_code_str = """signature = my_signature.signature(subject)""" +autodoc_source_code_str = """args = self.format_args(**kwargs)""" +is_autodoc_code_str = """args = args.replace("'", "")""" +with open(autodoc_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if is_autodoc_code_str not in code_str: + code_str_lines = code_str.split("\n") + autodoc_target_code_str = None + for line in code_str_lines: + re_matched_str = autodoc_source_re.search(line) + if re_matched_str: + space_num = re_matched_str.group(1) + autodoc_target_code_str = dedent("""\ + {0} + {1}if type(args) != type(None): + {1} {2}""".format(autodoc_source_code_str, space_num, is_autodoc_code_str)) + break + if autodoc_target_code_str: + code_str = code_str.replace(autodoc_source_code_str, autodoc_target_code_str) + f.seek(0) + f.truncate() + f.write(code_str) +with open(inspect_source_path, "r+", encoding="utf8") as g: + code_str = g.read() + if inspect_target_code_str not in code_str: + code_str = code_str.replace(inspect_source_code_str, inspect_target_code_str) + if "import my_signature" not in code_str: + code_str = code_str.replace("import sys", "import sys\nimport my_signature") + g.seek(0) + g.truncate() + g.write(code_str) + +# remove extra space for default params for autodoc. +sphinx_domain_python_source_path = os.path.abspath(sphinx_domain_python.__file__) +python_code_source = """for argument in arglist.split(','):""" +python_code_target = """for argument in [" " + i if num > 1 else i for num,i in enumerate(arglist.split(", "))]:""" +with open(sphinx_domain_python_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if python_code_target not in code_str: + code_str = code_str.replace(python_code_source, python_code_target) + f.seek(0) + f.truncate() + f.write(code_str) diff --git a/docs/mindfl/docs/source_zh_cn/deploy_fl_client.md b/docs/mindfl/docs/source_zh_cn/deploy_fl_client.md new file mode 100644 index 0000000000000000000000000000000000000000..53bc71e1a6bd6ee2eb1d2ba492089d2dcf085284 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/deploy_fl_client.md @@ -0,0 +1,149 @@ +# 端侧部署 + +下面分别介绍如何在Android环境和x86环境部署FL-Client: + +## Android环境 + +### 编译出包 + +- 配置编译环境 + + 目前只支持Linux环境编译,Linux编译环境配置可参考[这里](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) + +- 在mindspore主目录进行编译,编译包含aarch64和aarch32的AAR包 + + ```sh + bash build.sh -A on -j32 + ``` + +- 生成的Android AAR包路径: + + ```sh + mindspore-lite-maven-{version}.zip + ``` + +### 运行依赖 + +- [Android Studio](https://developer.android.google.cn/studio) >= 3.2 (推荐4.0以上版本) +- [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) >= 26 (Android Studio默认安装) +- [OpenJDK](https://openjdk.java.net/install/) >= 1.8 (Android Studio默认安装) + +### 构建依赖环境 + +将文件`mindspore-lite-maven-{version}.zip`解压后所得到的目录结构如下所示: + +```sh +mindspore-lite-maven-{version} +└── mindspore + └── mindspore-lite + └── {version} + └── mindspore-lite-{version}.aar # MindSpore Lite训练框架AAR包 +``` + +由此可知联邦学习相关的AAR包路径是: + +```sh +mindspore/output/mindspore/mindspore-lite/{version}/mindspore-lite-{version}.aar +``` + +其中AAR包里面与联邦学习相关的目录结构如下: + +```sh +mindspore-lite-{version} +├── jni +│ ├── arm64-v8a +│ │ ├── libjpeg.so # 图像处理动态库文件 +│ │ ├── libminddata-lite.so # 图像处理动态库文件 +│ │ ├── libmindspore-lite-jni.so # MindSpore Lite训练框架依赖的动态库 +│ │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架依赖的动态库 +│ │ └── libturbojpeg.so # 图像处理动态库文件 +│ └── armeabi-v7a +│ ├── libjpeg.so # 图像处理动态库文件 +│ ├── libminddata-lite.so # 图像处理动态库文件 +│ ├── libmindspore-lite-jni.so # MindSpore Lite训练框架依赖的动态库 +│ ├── libmindspore-lite-train.so # MindSpore Lite训练框架依赖的动态库 +│ └── libturbojpeg.so # 图像处理动态库文件 +├── libs +│ ├── mindspore-lite-java-common.jar # MindSpore Lite训练框架jar包 +│ └── mindspore-lite-java-flclient.jar # 联邦学习框架jar包 +└── classes.jar # MindSpore Lite训练框架jar包 +``` + +在Android工程中只需依赖此 AAR包即可调用联邦学习提供的相关接口,接口的具体调用和运行方式可参考联邦学习接口介绍部分。 + +## x86环境 + +### 编译出包 + +- 配置编译环境 + + 目前只支持Linux环境编译,Linux编译环境配置可参考[这里](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) + +- 在mindspore主目录进行编译,编译x86相关架构包 + + ```sh + bash build.sh -I x86_64 -j32 + ``` + +- 生成的x86相关架构包路径: + + ```sh + mindspore/output/mindspore-lite-{version}-linux-x64.tar.gz + ``` + +### 运行依赖 + +- [Python](https://www.python.org/downloads/)>=3.7.5 +- [OpenJDK](https://openjdk.java.net/install/) >= 1.8 + +### 构建依赖环境 + +将文件`mindspore/output/mindspore-lite-{version}-linux-x64.tar.gz`解压后所得到的目录结构如下所示: + +```sh +mindspore-lite-{version}-linux-x64 +├── tools +│ ├── benchmark_train # 训练模型性能与精度调测工具 +│ ├── converter # 模型转换工具 +│ └── cropper # 库裁剪工具 +│ ├── cropper # 库裁剪工具可执行文件 +│ └── cropper_mapping_cpu.cfg # 裁剪cpu库所需的配置文件 +└── runtime + ├── include # 训练框架头文件 + │ └── registry # 自定义算子注册头文件 + ├── lib # 训练框架库 + │ ├── libminddata-lite.so # 图像处理动态库文件 + │ ├── libmindspore-lite-jni.so # MindSpore Lite训练框架的jni动态库 + │ ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 + │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 + │ ├── mindspore-lite-java.jar # MindSpore Lite训练框架jar包 + │ └── mindspore-lite-java-flclient.jar # 联邦学习框架jar包 + └── third_party + └── libjpeg-turbo + └── lib + ├── libjpeg.so.62 # 图像处理动态库文件 + └── libturbojpeg.so # 图像处理动态库文件 +``` + +联邦学习所需的相关x86包名如下: + +```sh +libjpeg.so.62 # 图像处理动态库文件 +libminddata-lite.so # 图像处理动态库文件 +libmindspore-lite-jni.so # MindSpore Lite训练框架依赖的动态库 +libmindspore-lite-train.so # MindSpore Lite训练框架依赖的动态库 +libturbojpeg.so # 图像处理动态库文件 +mindspore-lite-java-flclient.jar # 联邦学习框架jar包 +``` + +其中可将路径`mindspore/output/mindspore-lite-{version}-linux-x64/runtime/lib/`以及`mindspore/output/mindspore-lite-{version}-linux-x64/runtime/third_party/libjpeg-turbo/lib`中联邦学习所依赖的so文件(共5个)放入一个文件夹,比如`/resource/x86libs/`。 + +然后在x86中设置环境变量(下面需给绝对路径): + +```sh +export LD_LIBRARY_PATH=/resource/x86libs/:$LD_LIBRARY_PATH +``` + +设置好依赖环境之后,可参考[这里](https://gitee.com/mindspore/docs/blob/master/docs/mindfl/docs/source_zh_cn/image_classification_application.md)教程在x86环境中模拟启动多个客户端进行联邦学习。 + + diff --git a/docs/mindfl/docs/source_zh_cn/deploy_mind_fl_cluster.md b/docs/mindfl/docs/source_zh_cn/deploy_mind_fl_cluster.md new file mode 100644 index 0000000000000000000000000000000000000000..f08c3e054a330470575762ca1c07f183b97c9fbb --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/deploy_mind_fl_cluster.md @@ -0,0 +1,299 @@ +# 云侧部署 + +`Linux` `模型训练` `中级` `高级` + + + +- [云侧部署](#云侧部署) + - [概述](#概述) + - [准备环节](#准备环节) + - [安装MindSpore](#安装mindspore) + - [定义模型](#定义模型) + - [参数配置](#参数配置) + - [启动集群](#启动集群) + - [弹性伸缩](#弹性伸缩) + - [容灾](#容灾) + + + +## 概述 + +本文档以LeNet网络为例,讲解如何使用MindSpore来部署联邦学习集群。 + +> 可以在[这里](https://gitee.com/mindspore/mindspore/tree/master/tests/st/fl/mobile)下载本文档中的完整Demo。 + +MindSpore Federated Learning Server集群物理架构如图所示: + + + +如上图所示,在联邦学习云侧集群中,有两种角色的MindSpore进程:`Federated Learning Scheduler`和`Federated Learning Server`: + +- Federated Learning Scheduler + + `Scheduler`的作用主要有两点: + + 1. 协助集群组网:在集群初始化阶段,由Scheduler负责收集Server信息,并达成集群一致性。 + 2. 开放管理面:支持用户通过`RESTful`接口对集群进行管理。 + + 在一个联邦学习任务中,只有一个Scheduler,与Server通过TCP私有协议通信。 + +- Federated Learning Server + + `Server`为执行联邦学习任务的主体,用于接收和解析来自端侧设备的数据,具有执行安全聚合、限时通信、模型存储等能力。在一个联邦学习任务中,`Server`可以有多个(用户可配置),`Server`间通过TCP私有协议通信,对外开放HTTP端口用于端侧设备连接。 + + > 在MindSpore联邦学习框架中,`Server`还支持弹性伸缩以及容灾,能够在训练任务不中断的情况下,动态调配硬件资源。 + +## 准备环节 + +### 安装MindSpore + +MindSpore联邦学习云侧集群对硬件设备无依赖,因此安装`CPU`版本的MindSpore即可。执行[官网提供的命令](https://www.mindspore.cn/install)安装MindSpore最新`CPU`版本。 + +## 定义模型 + +为了便于部署,MindSpore联邦学习的`Scheduler`和`Server`进程能够复用训练脚本,通过[参数配置](#参数配置)选择不同的启动方式。 + +本教程选择LeNet网络作为示例,具体网络结构,损失函数和优化器定义请参考[LeNet网络样例脚本](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/lenet/lenet.py)。 + +## 参数配置 + +MindSpore联邦学习任务进程复用了训练脚本,用户只需要使用相同的脚本,通过Python接口`set_fl_context`传递不同的参数,启动不同角色的MindSpore进程。参数配置说明请参考[API文档](https://mindspore.cn/doc/api_python/zh-CN/master/mindspore/mindspore.context.html#mindspore.context.set_fl_context)。 + +在确定参数配置后,用户需要在执行训练前调用`set_fl_context`接口,调用方式如下: + +```python +import mindspore.context as context +... + +enable_fl = True +server_mode = "FEDERATED_LEARNING" +ms_role = "MS_SERVER" +server_num = 4 +scheduler_ip = "192.168.216.124" +scheduler_port = 6667 +fl_server_port = 6668 +fl_name = "LeNet" +scheduler_manage_port = 11202 + +fl_ctx = { + "enable_fl": enable_fl, + "server_mode": server_mode, + "ms_role": ms_role, + "server_num": server_num, + "scheduler_ip": scheduler_ip, + "scheduler_port": scheduler_port, + "fl_server_port": fl_server_port, + "fl_name": fl_name, + "scheduler_manage_port": scheduler_manage_port +} +context.set_fl_context(**fl_ctx) +... + +Model.train() +``` + +本示例设置了本次训练任务的模式为`联邦学习`,此训练进程角色为`Server`,本次任务需要启动`4`个`Server`才能完成集群组网,集群`Scheduler`的IP地址为`192.168.216.124`,集群`Scheduler`端口为`6667`,联邦学习`HTTP服务端口`为`6668`(由端侧设备连接),任务名为`LeNet`,集群`Scheduler`管理端口为`11202`。 + +> 部分参数只在Scheduler用到,如scheduler_manage_port,部分参数只在Server用到,如fl_server_port,为了方便部署,可将这些参数配置统一传入,MindSpore会根据进程角色,读取不同的参数配置。 +> 建议将参数配置通过Python `argparse`模块传入: + +```python +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--server_mode", type=str, default="FEDERATED_LEARNING") +parser.add_argument("--ms_role", type=str, default="MS_SERVER") +parser.add_argument("--server_num", type=int, default=4) +parser.add_argument("--scheduler_ip", type=str, default="192.168.216.124") +parser.add_argument("--scheduler_port", type=int, default=6667) +parser.add_argument("--fl_server_port", type=int, default=6668) +parser.add_argument("--fl_name", type=str, default="LeNet") +parser.add_argument("--scheduler_manage_port", type=int, default=11202) + +args, t = parser.parse_known_args() +server_mode = args.server_mode +ms_role = args.ms_role +server_num = args.server_num +scheduler_ip = args.scheduler_ip +scheduler_port = args.scheduler_port +fl_server_port = args.fl_server_port +fl_name = args.fl_name +scheduler_manage_port = args.scheduler_manage_port +``` + +> 每个Python脚本对应一个进程,若要在不同主机部署多个`Server`角色,则需要分别拉起多个进程,可以通过shell指令配合Python的方式快速启动多`Server`。可参考[示例](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile)。 + +## 启动集群 + +参考[示例](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile),启动集群。参考示例目录结构如下: + +```text +mobile/ +├── finish_mobile.py +├── run_mobile_sched.py +├── run_mobile_server.py +├── src +│   └── model.py +└── test_mobile_lenet.py +``` + +1. 启动Scheduler + + `run_mobile_sched.py`是为用户启动`Scheduler`而提供的Python脚本,并支持通过`argparse`传参修改配置。执行指令如下,代表启动本次联邦学习任务的`Scheduler`,其TCP端口为`6667`,联邦学习HTTP服务端口为`6668`,`Server`数量为`4`个,集群`Scheduler`管理端口为`11202`: + + ```sh + python run_mobile_sched.py --scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --scheduler_manage_port=11202 + ``` + +2. 启动Server + + `run_mobile_server.py`是为用户启动若干`Server`而提供的Python脚本,并支持通过`argparse`传参修改配置。执行指令如下,代表启动本次联邦学习任务的`Server`,其TCP端口为`6667`,联邦学习HTTP服务起始端口为`6668`,`Server`数量为`4`个,联邦学习任务正常进行需要的端侧设备数量为`8`个: + + ```sh + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --start_fl_job_threshold=8 + ``` + + 以上指令等价于启动了4个`Server`进程,每个`Server`的联邦学习服务端口分别为`6668`、`6669`、`6670`和`6671`,具体实现详见[脚本run_mobile_server.py](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile/run_mobile_server.py)。 + + > 若只想在单机部署`Scheduler`以及`Server`,只需将`scheduler_ip`配置项修改为`127.0.0.1`即可。 + + 若想让`Server`分布式部署在不同物理节点,可以使用`local_server_num`参数,代表在**本节点**需要执行的`Server`进程数量: + + ```sh + #在节点1启动3个Server进程 + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --start_fl_job_threshold=8 --local_server_num=3 + ``` + + ```sh + #在节点2启动1个Server进程 + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --start_fl_job_threshold=8 --local_server_num=1 + ``` + + 看到日志打印 + + ```sh + Server started successfully. + ``` + + 则说明启动成功。 + + > 以上分布式部署的指令中,`server_num`都为4,这是因为此参数代表集群全局的`Server`数量,不应随着物理节点的数量而改变。对于不同节点上的`Server`来说,它们无需感知各自的IP地址,集群的一致性和节点发现都由`Scheduler`进行调度。 + +3. 停止联邦学习 + + 目前我们采用`finish_mobile.py`用于停止联邦学习服务器,执行如下指令来停止联邦学习集群,其中`scheduler_port`传参和启动服务器时的传参保持一致: + + ```sh + python finish_mobile.py --scheduler_port=6667 + ``` + + 可看到结果: + + ```sh + killed $PID1 + killed $PID2 + killed $PID3 + killed $PID4 + killed $PID5 + killed $PID6 + killed $PID7 + killed $PID8 + ``` + + 说明停止服务成功。 + +## 弹性伸缩 + +MindSpore联邦学习框架支持`Server`的弹性伸缩,对外通过`Scheduler`管理端口提供`RESTful`服务,使得用户在不中断训练任务的情况下,对硬件资源进行动态调度。目前MindSpore的弹性伸缩仅支持水平伸缩(Scale Out/In),暂不支持垂直伸缩(Scale Up/Down)。在弹性伸缩场景下,必然会有Server进程的增加/减少。 + +这里详细描述用户能如何通过RESTful原生接口,对集群扩容/缩容进行控制。 + +1. 扩容 + + 在集群启动后,向`Scheduler`发起扩容请求,这里使用`curl`指令构造`RESTful`扩容请求,代表集群需要扩容2个`Server`节点: + + ```sh + curl -i -X POST \ + -H "Content-Type:application/json" \ + -d \ + '{ + "worker_num":0, + "server_num":2 + }' \ + 'http://192.168.216.124:11202/scaleout' + ``` + + 这里需要拉起`2`个新的`Server`进程,并将`server_num`参数累加扩容的个数,从而保证全局组网信息的正确性,则扩容后,`server_num`的数量应为`6`,执行指令: + + ```sh + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6672 --server_num=6 --start_fl_job_threshold=8 --local_server_num=2 + ``` + + 此指令代表启动两个`Server`节点,联邦学习服务端口分别为`6672`和`6673`,总`Server`数量为`6`。 + +2. 缩容 + + 在集群启动后,向`Scheduler`发起缩容请求。由于缩容需要对具体节点进行操作,因此需要获取节点信息: + + ```sh + curl -i -X GET \ + 'http://192.168.216.124:11202/nodes' + ``` + + 返回`json`格式的结果: + + ```json + { + "message": "Get nodes info successful.", + "node_ids": [ + { + "node_id": "40d56ffe-f8d1-4960-85fa-fdf88820402a", + "rank_id": "3", + "role": "SERVER" + }, + { + "node_id": "1ba06348-f2e2-4ad2-be83-0d41fcb53228", + "rank_id": "2", + "role": "SERVER" + }, + { + "node_id": "997967bb-c1ab-4916-8697-dcfaaf0354e5", + "rank_id": "1", + "role": "SERVER" + }, + { + "node_id": "4b8d5bdf-eafd-4f5c-8cae-79008f19298a", + "rank_id": "0", + "role": "SERVER" + } + ] + } + ``` + + 选择`Rank3`和`Rank2`进行缩容: + + ```sh + curl -i -X POST \ + -H "Content-Type:application/json" \ + -d \ + '{ + "node_ids": ["40d56ffe-f8d1-4960-85fa-fdf88820402a", "1ba06348-f2e2-4ad2-be83-0d41fcb53228"] + }' \ + 'http://10.113.216.124:11202/scalein' + ``` + +> - 在集群扩容/缩容成功后,训练任务会自动恢复,不需要用户进行额外干预。 +> +> - 可以通过集群管理工具(如Kubernetes)创建或者释放`Server`资源。 + +## 容灾 + +在MindSpore联邦学习集群中某节点下线后,可以保持集群在线而不退出训练任务,在该节点重新被启动后,可以恢复训练任务。目前MindSpore暂时支持Server节点的容灾(Server 0除外)。 + +节点重新启动的指令类似扩容指令,在节点被手动下线之后,执行指令: + +```sh +python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6673 --server_num=6 --start_fl_job_threshold=8 --local_server_num=1 +``` + +此指令代表重新启动了`Server`,其联邦学习服务端口为`6673`。 diff --git a/docs/mindfl/docs/source_zh_cn/fl_install.md b/docs/mindfl/docs/source_zh_cn/fl_install.md new file mode 100644 index 0000000000000000000000000000000000000000..1bc43430be5f3ec6337dc6e6ca161d5360000373 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/fl_install.md @@ -0,0 +1,19 @@ +# 获取MindFL + +`MindFL` `安装` + +## 安装概述 + + 目前MindFL框架代码已经分别集成到云侧MindSpore和端侧Lite框架中,因此需要分别获取MindSpore whl包和MindSpore Lite java安装包。其中MindSpore Whl包负责云侧集群聚合训练以及和Lite通信。MindSpore Lite java包中包括两部分,一部分是MindSpore Lite训练安装包负责模型的底层训练,另一部分是MindFL FL-Client安装包,负责模型的下发,加密以及和云侧MindSpore服务的交互。 + +### 获取MindSpore whl包 + + 包括源码和下载发布版两种方式,支持CPU、GPU等硬件平台,任选其一安装即可。安装流程可参考MindSpore安装指南[安装章节](https://www.mindspore.cn/install)。 + +### 获取MindSpore Lite java包 + + 包括源码和下载发布版两种方式。目前只支持x86和android平台,只支持CPU硬件架构。安装流程可参考MindSpore Lite教程[下载章节](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)和[编译章节](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)。详见下文部署FL-Client章节。 + +### Linux环境编译要求 + + 目前源码编译只支持linux环境,环境要求可参考,[MindSpore源码编译](https://www.mindspore.cn/install) 和[MindSpore Lite源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)。 diff --git a/docs/mindfl/docs/source_zh_cn/image_classification_application.md b/docs/mindfl/docs/source_zh_cn/image_classification_application.md new file mode 100644 index 0000000000000000000000000000000000000000..3e918864d592277ff6f78e3202047382b1198a49 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/image_classification_application.md @@ -0,0 +1,787 @@ +# 实现一个图像分类应用(x86) + +## 准备环节 + +在动手进行实践之前,确保,你已经正确安装了MindSpore。如果没有,可以通过[MindSpore安装页面](https://www.mindspore.cn/install)将MindSpore安装在你的电脑当中。 + +## 下载数据集 + +可参考[leaf数据集官方指导](https://github.com/TalwalkarLab/leaf)。 + +本示例采用`leaf`数据集中的联邦学习数据集`FEMNIST`, 该数据集包含62 个不同类别的手写数字和字母(数字0~9、26 个小写字母、26 个大写字母),图像大小为 28 x 28 像素,数据集包含3500 个用户的手写数字和字母(最多可模拟3500个客户端参与联邦学习),总数据量为805263,平均每个用户包含数据量为226.83,所有用户数据量的方差为88.94。 + +1. 下载数据集之前环境要求 + + ```sh + numpy==1.16.4 + scipy # conda install scipy + tensorflow==1.13.1 # pip install tensorflow + Pillow # pip install Pillow + matplotlib # pip install matplotlib + jupyter # conda install jupyter notebook==5.7.8 tornado==4.5.3 + pandas # pip install pandas + ``` + +2. git下载官方数据集生成脚本 + + ```sh + git clone https://github.com/TalwalkarLab/leaf.git + ``` + + 下载项目后,目录结构如下: + + ```sh + leaf-master/data/femnist + ├── data # 用来存放指令生成的数据集 + ├── preprocess # 存放数据预处理的相关代码 + ├── preprocess.sh # femnist数据集生成shell脚本 + └── README.md # 官方数据集下载指导文档 + ``` + +3. 以`femnist`数据集为例,运行以下指令进入指定路径: + + ```sh + cd leaf-master/data/femnist + ``` + +4. 按照`README.md`文件中说明,在终端输入指令即可下载对应数据集 + + 运行`./preprocess.sh`具有以下标签的选择: + + - `-s`:='iid'以iid方式采样,或'niid'以非iid方式采样;有关“ iid”和“非iid”的更多信息,请参见“说明”部分 + - `--iu`:=用户数(如果进行iid采样);表示为用户总数的分数;默认值为0.01 + - `--sf`:=要采样的数据部分,用十进制表示;默认值为0.1 + - `-k` :=每个用户的最小样本数 + - `-t` :='user'将用户划分为训练测试组,或'sample'将每个用户的样本划分为训练测试组 + - `--tf`:=训练集中的数据部分,用小数表示;默认值为0.9 + - `--smplseed` :=随机抽样数据之前要使用的种子 + - `--spltseed` :=随机分割数据之前要使用的种子 + + 例如: + + - `./preprocess.sh -s niid --sf 1.0 -k 0 -t sample` (下载完整数据集) + - `./preprocess.sh -s niid --sf 0.05 -k 0 -t sample` (下载小型数据集) + + 在重新运行preprocess.sh之前,请确保删除数据目录中的rem_user_data,sampled_data,test和train子文件夹。 + +5. 比如用指令`./preprocess.sh -s niid --sf 1.0 -k 0 -t sample` 生成的数据集包含3500个用户,且按照9:1对每个用户的数据划分训练和测试集 + + 运行之后目录结构如下 + + ```sh + leaf-master/data/femnist/35_client_sf1_data/ + ├── all_data # 所有数据集混合在一起,不区分训练测试集,共包含35个json文件,每个json文件包含100个用户的数据 + ├── test # 按照9:1对每个用户的数据划分训练和测试集后的测试集,共包含35个json文件,每个json文件包含100个用户的数据 + ├── train # 按照9:1对每个用户的数据划分训练和测试集后的训练集,共包含35个json文件,每个json文件包含100个用户的数据 + └── ... # 其他文件,不需要用到,不作介绍 + ``` + + 其中每个json文件包含以下三个部分: + + - `users`: 用户列表 + + - `num_samples`: 每个用户的样本数量列表 + + - `user_data`: 一个以用户名为key,以它们各自的数据为value的字典对象; 对于每个用户,数据表示为图像列表,每张图像表示为大小为 784 的整数列表(将28 x 28 图像数组展平所得) + +6. 将35个json文件划分为3500个json文件(每个json文件代表一个用户) + + 参考代码如下: + + ```python + def partition_json(root_path, new_root_path): + + """ + partition 35 json files to 3500 json file + + Each raw .json file is an object with 3 keys: + 1. 'users', a list of users + 2. 'num_samples', a list of the number of samples for each user + 3. 'user_data', an object with user names as keys and their respective data as values; for each user, data is represented as a list of images, with each image represented as a size-784 integer list (flattened from 28 by 28) + + Each new .json file is an object with 3 keys: + 1. 'user_name', the name of user + 2. 'num_samples', the number of samples for the user + 3. 'user_data', an dict object with 'x' as keys and their respective data as values; with 'y' as keys and their respective label as values; + + Args: + root_path (str): raw root path of 35 json files + new_root_path (str): new root path of 3500 json files + """ + paths = os.listdir(root_path) + count = 0 + file_num = 0 + for i in paths: + file_num += 1 + file_path = os.path.join(root_path, i) + print('======== process ' + str(file_num) + ' file: ' + str(file_path) + '======================') + with open(file_path, 'r') as load_f: + load_dict = json.load(load_f) + users = load_dict['users'] + num_users = len(users) + num_samples = load_dict['num_samples'] + for j in range(num_users): + count += 1 + print('---processing user: '+str(count)+'---') + cur_out = {'user_name': None, 'num_samples': None, 'user_data': {}} + cur_user_id = users[j] + cur_data_num = num_samples[i] + cur_user_path = os.path.join(new_root_path, cur_user_id+'.json') + cur_out['user_name'] = cur_user_id + cur_out['num_samples'] = cur_data_num + cur_out['user_data'].update(load_dict['user_data'][cur_user_id]) + with open(cur_user_path, 'w') as f: + json.dump(cur_out, f) + f = os.listdir(new_root_path) + print(len(f), ' users have been processed!') + # partition train json files + partition_json("leaf-master/data/femnist/35_client_sf1_data/train", "leaf-master/data/femnist/3500_client_json/train") + # partition test json files + partition_json("leaf-master/data/femnist/35_client_sf1_data/test", "leaf-master/data/femnist/3500_client_json/test") + ``` + + 其中`root_path`为`leaf-master/data/femnist/35_client_sf1_data/{train,test}`,`new_root_path`自行设置,用于存放生成的3500个用户json文件,需分别对训练和测试文件夹进行处理。 + + 新生成的3500个用户json文件,每个文件均包含以下三个部分: + + - `user_name`: 用户名 + - `num_samples`: 用户的样本数 + - `user_data`: 一个以 'x' 为key,以用户数据为value的字典对象; 以“y”为键,以用户数据对应的标签为值 + +7. 将json文件转换为图片文件 + + 可参考如下代码: + + ```python + def json_2_numpy(img_size, file_path): + """ + read json file to numpy + Args: + img_size (list): contain three elements: the height, width, channel of image + file_path (str): root path of 3500 json files + return: + image_numpy (numpy) + label_numpy (numpy) + """ + # open json file + with open(file_path, 'r') as load_f_train: + load_dict = json.load(load_f_train) + num_samples = load_dict['num_samples'] + x = load_dict['user_data']['x'] + y = load_dict['user_data']['y'] + size = (num_samples, img_size[0], img_size[1], img_size[2]) + image_numpy = np.array(x, dtype=np.float32).reshape(size) # mindspore doesn't support float64 and int64 + label_numpy = np.array(y, dtype=np.int32) + return image_numpy, label_numpy + + def json_2_img(json_path, save_path): + """ + transform single json file to images + + Args: + json_path (str): the path json file + save_path (str): the root path to save images + + """ + data, label = json_2_numpy([28, 28, 3], json_path) + for i in range(data.shape[0]): + img = data[i] * 255 # PIL don't support the 0/1 image ,need convert to 0~255 image + im = Image.fromarray(np.squeeze(img)) + im = im.convert('L') + img_name = str(label[i]) + '_' + name_list[label[i]] + '_' + str(i) + '.png' + path1 = os.path.join(save_path, str(label[i])) + mkdir(path1) + img_path = os.path.join(path1, img_name) + im.save(img_path) + print('-----', i, '-----') + + def all_json_2_img(root_path,save_root_path): + """ + transform json files to images + Args: + json_path (str): the root path of 3500 json files + save_path (str): the root path to save images + """ + usage=['train','test'] + for i in range(2): + x = usage[i] + files_path = os.path.join(root_path,x) + files = os.listdir(files_path) + + for name in files: + user_name = name.split('.')[0] + json_path = os.path.join(files_path,name) + save_path1 = os.path.join(save_root_path, user_name) + mkdir(save_path1) + save_path = os.path.join(save_path1,x) + mkdir(save_path) + print('============================='+name+'=======================') + json_2_img(json_path,save_path) + + all_json_2_img("leaf-master/data/femnist/3500_client_json/", "leaf-master/data/femnist/3500_client_img/") + ``` + +8. 将图片数据集转换为联邦学习框架可用的bin文件格式 + + 可参考下面代码 + + ```python + def img2bin(root_path, root_save): + + """ + transform images to bin files + + Args: + root_path: the root path of 3500 images files + root_save: the root path to save bin files + + """ + + use_list=[] + train_batch_num=[] + test_batch_num=[] + mkdir(root_save) + users = os.listdir(root_path) + for user in users: + use_list.append(user) + user_path = os.path.join(root_path, user) + train_test = os.listdir(user_path) + for tag in train_test: + data_path = os.path.join(user_path, tag) + dataset = create_dataset_from_folder(data_path, (32, 32, 1), 32) + batch_num = 0 + img_list = [] + label_list = [] + for data in dataset.create_dict_iterator(): + batch_x_tensor = data['image'] + batch_y_tensor = data['label'] + trans_img = np.transpose(batch_x_tensor.asnumpy(), [0, 2, 3, 1]) + img_list.append(trans_img) + label_list.append(batch_y_tensor.asnumpy()) + batch_num += 1 + + if tag == "train": + train_batch_num.append(batch_num) + elif tag == "test": + test_batch_num.append(batch_num) + + imgs = np.array(img_list) # (batch_num, 32,3,32,32) + labels = np.array(label_list) + path1 = os.path.join(root_save, user) + mkdir(path1) + image_path = os.path.join(path1, user + "_" + "bn_" + str(batch_num) + "_" + tag + "_data.bin") + label_path = os.path.join(path1, user + "_" +"bn_" + str(batch_num) + "_" + tag + "_label.bin") + + imgs.tofile(image_path) + labels.tofile(label_path) + print("use: "+user+ " "+ tag +"_batch_num: " + str(batch_num)) + print(len(use_list), len(train_batch_num), len(test_batch_num)) + for k1 in range(len(use_list)): + print("\""+use_list[k1]+"\"",end=", ") + print() + for k2 in range(len(train_batch_num)): + print(str(train_batch_num[k2]),end=", ") + print() + for k3 in range(len(test_batch_num)): + print(str(test_batch_num[k3]), end=", ") + print() + root_path = "leaf-master/data/femnist/3500_client_img/" + root_save = "leaf-master/data/femnist/3500_clients_bin" + img2bin(root_path, root_save) + ``` + +9. 生成3500_clients_bin文件夹内共包含3500个用户文件夹,其目录结构如下: + + ```sh + leaf-master/data/femnist/3500_clients_bin + ├── f0000_14 # 用户编号 + │ ├── f0000_14_bn_10_train_data.bin # 用户f0000_14的训练数据 (bn_后面的数字10代表batch number) + │ ├── f0000_14_bn_10_train_label.bin # 用户f0000_14的训练标签 + │ ├── f0000_14_bn_1_test_data.bin # 用户f0000_14的测试数据 (bn_后面的数字1代表batch number) + │ └── f0000_14_bn_1_test_label.bin # 用户f0000_14的测试标签 + ├── f0001_41 # 用户编号 + │ ├── f0001_41_bn_11_train_data.bin # 用户f0001_41的训练数据 (bn_后面的数字11代表batch number) + │ ├── f0001_41_bn_11_train_label.bin # 用户f0001_41的训练标签 + │ ├── f0001_41_bn_1_test_data.bin # 用户f0001_41的测试数据 (bn_后面的数字1代表batch number) + │ └── f0001_41_bn_1_test_label.bin # 用户f0001_41的测试标签 + │ + │ + │ ... + │ + │ + └── f4099_10 # 用户编号 + ├── f4099_10_bn_4_train_data.bin # 用户f4099_10的训练数据 (bn_后面的数字4代表batch number) + ├── f4099_10_bn_4_train_label.bin # 用户f4099_10的训练标签 + ├── f4099_10_bn_1_test_data.bin # 用户f4099_10的测试数据 (bn_后面的数字1代表batch number) + └── f4099_10_bn_1_test_label.bin # 用户f4099_10的测试标签 + ``` + +## 定义网络 + +我们选择相对简单的LeNet网络。LeNet网络不包括输入层的情况下,共有7层:2个卷积层、2个下采样层(池化层)、3个全连接层。每层都包含不同数量的训练参数,如下图所示: + +![LeNet5](images/LeNet_5.jpg) + +> 更多的LeNet网络的介绍不在此赘述,希望详细了解LeNet网络,可以查询http://yann.lecun.com/exdb/lenet/。 + +网络定义流程[可参考model.py文件]( https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile/src/model.py)。 + +具体网络定义流程可参考[MindSpore官方图片分类任务文档]( https://www.mindspore.cn/tutorial/training/zh-CN/master/quick_start/quick_start.html#%E5%AE%9A%E4%B9%89%E7%BD%91%E7%BB%9C )。 + +## 定义训练过程 + +可参考如下代码: + +```python +import argparse +import numpy as np + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.nn import TrainOneStepCell, WithLossCell +from src.model import LeNet5 +from src.adam import AdamWeightDecayOp + +parser = argparse.ArgumentParser(description="test_fl_lenet") +parser.add_argument("--device_target", type=str, default="CPU") +parser.add_argument("--server_mode", type=str, default="FEDERATED_LEARNING") +parser.add_argument("--ms_role", type=str, default="MS_WORKER") +parser.add_argument("--worker_num", type=int, default=0) +parser.add_argument("--server_num", type=int, default=1) +parser.add_argument("--scheduler_ip", type=str, default="127.0.0.1") +parser.add_argument("--scheduler_port", type=int, default=8113) +parser.add_argument("--fl_server_port", type=int, default=6666) +parser.add_argument("--start_fl_job_threshold", type=int, default=1) +parser.add_argument("--start_fl_job_time_window", type=int, default=3000) +parser.add_argument("--update_model_ratio", type=float, default=1.0) +parser.add_argument("--update_model_time_window", type=int, default=3000) +parser.add_argument("--fl_name", type=str, default="Lenet") +parser.add_argument("--fl_iteration_num", type=int, default=25) +parser.add_argument("--client_epoch_num", type=int, default=20) +parser.add_argument("--client_batch_size", type=int, default=32) +parser.add_argument("--client_learning_rate", type=float, default=0.1) +parser.add_argument("--scheduler_manage_port", type=int, default=11202) + +args, _ = parser.parse_known_args() +device_target = args.device_target +server_mode = args.server_mode +ms_role = args.ms_role +worker_num = args.worker_num +server_num = args.server_num +scheduler_ip = args.scheduler_ip +scheduler_port = args.scheduler_port +fl_server_port = args.fl_server_port +start_fl_job_threshold = args.start_fl_job_threshold +start_fl_job_time_window = args.start_fl_job_time_window +update_model_ratio = args.update_model_ratio +update_model_time_window = args.update_model_time_window +fl_name = args.fl_name +fl_iteration_num = args.fl_iteration_num +client_epoch_num = args.client_epoch_num +client_batch_size = args.client_batch_size +client_learning_rate = args.client_learning_rate +scheduler_manage_port = args.scheduler_manage_port + +ctx = { + "enable_fl": True, + "server_mode": server_mode, + "ms_role": ms_role, + "worker_num": worker_num, + "server_num": server_num, + "scheduler_ip": scheduler_ip, + "scheduler_port": scheduler_port, + "fl_server_port": fl_server_port, + "start_fl_job_threshold": start_fl_job_threshold, + "start_fl_job_time_window": start_fl_job_time_window, + "update_model_ratio": update_model_ratio, + "update_model_time_window": update_model_time_window, + "fl_name": fl_name, + "fl_iteration_num": fl_iteration_num, + "client_epoch_num": client_epoch_num, + "client_batch_size": client_batch_size, + "client_learning_rate": client_learning_rate, + "scheduler_manage_port": scheduler_manage_port +} + +context.set_context(mode=context.GRAPH_MODE, device_target=device_target, save_graphs=False) +context.set_fl_context(**ctx) # 设置联邦学习训练流程相关参数 + +if __name__ == "__main__": + epoch = 5 + np.random.seed(0) + network = LeNet5(62) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") + net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) + net_adam_opt = AdamWeightDecayOp(network.trainable_params(), weight_decay=0.1) + net_with_criterion = WithLossCell(network, criterion) + train_network = TrainOneStepCell(net_with_criterion, net_opt) + train_network.set_train() + losses = [] + + for _ in range(epoch): + data = Tensor(np.random.rand(32, 3, 32, 32).astype(np.float32)) + label = Tensor(np.random.randint(0, 61, (32)).astype(np.int32)) + loss = train_network(data, label).asnumpy() + losses.append(loss) + print(losses) +``` + +其中字典`ctx`中参数`enable_fl`用于设置是否启动联邦学习训练流程,为`true`代表启动联邦学习流程,为`false`代表启动普通训练流程,其中其他参数可以根据实际情况进行设置。由于只需要生成可用的模型文件即可,上面脚步中`data`和`label`均采用的模拟数据。 + +具体优化器损失函数定义可参考[MindSpore官方文档](https://www.mindspore.cn/tutorial/training/zh-CN/master/quick_start/quick_start.html#%E5%AE%9A%E4%B9%89%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0%E5%8F%8A%E4%BC%98%E5%8C%96%E5%99%A8)。 + +## 生成端侧模型文件 + +1. **将模型导出为MindIR格式文件** + + 可在训练流程代码中添加`export`语句获取MindIR格式模型文件, 示例代码如下: + + ```python + from mindspore import export + ... + ... + ... + for _ in range(epoch): + data = Tensor(np.random.rand(32, 3, 32, 32).astype(np.float32)) + label = Tensor(np.random.randint(0, 61, (32)).astype(np.int32)) + loss = train_network(data, label).asnumpy() + losses.append(loss) + mindir_name = "lenet_train.mindir" + export(train_network, data, label, file_name= mindir_name, file_format='MINDIR') # 添加export语句获取MindIR格式模型文件 + print(losses) + ``` + + 具体可参考[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/save_model.html?highlight=mindir#mindir )。 + +2. **将MindIR文件转化为联邦学习端侧框架可用的ms文件** + + 具体模型转换教程可参考[训练模型转换教程](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_train.html )。 + + 模型转换示例如下: + + 假设待转换的模型文件为`lenet_train.mindir`,执行如下转换命令: + + ```sh + ./converter_lite --fmk=MINDIR --trainModel=true --modelFile=lenet_train.mindir --outputFile=lenet_train + ``` + + 转换成功输出如下: + + ```sh + CONVERTER RESULT SUCCESS:0 + ``` + + 这表明 MindSpore 模型成功转换为 MindSpore 端侧模型,并生成了新文件`lenet_train.ms`。如果转换失败输出如下: + + ```sh + CONVERT RESULT FAILED: + ``` + + 将生成的`.ms`格式的模型文件放在某个路径上,在调用联邦学习接口时可设置FLParameter.trainModelPath为该模型文件的路径。 + +## 模拟启动多客户端参与联邦学习 + +之后可编写一个Python脚本,调用联邦学习框架jar包 (x86环境联邦学习jar包获取可参考[FL-Client部署教程中编译出包流程](https://gitee.com/mindspore/docs/blob/master/docs/mindfl/docs/source_zh_cn/deploy_fl_client.md)) 来模拟启动多客户端联邦学习任务。 + +1. **以Lenet网络为例,参考脚本`run.py`如下:** + + ```python + import os + import argparse + import subprocess + import random + + parser = argparse.ArgumentParser(description="Run TestClient.java case") + parser.add_argument("--jarPath", type=str, default="mindspore-lite-java-flclient.jar") # must be absolute path + parser.add_argument("--train_dataset", type=str, default="leaf-master/data/femnist/3500_clients_bin/") # must be absolute path + parser.add_argument("--test_dataset", type=str, default="null") # must be absolute path + parser.add_argument("--vocal_file", type=str, default="null") # must be absolute path + parser.add_argument("--ids_file", type=str, default="null") # must be absolute path + parser.add_argument("--flName", type=str, default="lenet") + parser.add_argument("--train_model_path", type=str, default="ms/lenet/") # must be absolute path of .ms files + parser.add_argument("--infer_model_path", type=str, default="ms/lenet/") # must be absolute path of .ms files + parser.add_argument("--ip", type=str, default="http://10.113.216.106:") + parser.add_argument("--ssl", type=str, default="false") + parser.add_argument("--port", type=int, default=6668) + parser.add_argument("--server_num", type=int, default=0) + parser.add_argument("--worker_num", type=int, default=0) + parser.add_argument("--time_window", type=int, default=6000) + parser.add_argument("--use_elb", type=str, default="false") + parser.add_argument("--task", type=str, default="train") + + args, _ = parser.parse_known_args() + jarPath = args.jarPath + train_dataset = args.train_dataset + test_dataset = args.test_dataset + vocal_file = args.vocal_file + ids_file = args.ids_file + flName = args.flName + train_model_path = args.train_model_path + infer_model_path = args.infer_model_path + ip = args.ip + ssl = args.ssl + port = args.port + server_num = args.server_num + worker_num = args.worker_num + use_elb = args.use_elb + task = args.task + + users = os.listdir(train_dataset) + def get_client_data_path(data_root_path, user): + use_path = os.path.join(data_root_path, user) + bin_file_paths = os.listdir(use_path) + + train_data_path = "" + train_label_path = "" + train_batch_num = "" + + test_data_path = "" + test_label_path = "" + test_batch_num = "" + + for file in bin_file_paths: + info = file.split(".")[0].split("_") + if info[4] == "train" and info[5] == "data": + train_data_path = os.path.join(use_path, file) + train_batch_num = info[3] + elif info[4] == "train" and info[5] == "label": + train_label_path = os.path.join(use_path, file) + elif info[4] == "test" and info[5] == "data": + test_data_path = os.path.join(use_path, file) + test_batch_num = info[3] + elif info[4] == "test" and info[5] == "label": + test_label_path = os.path.join(use_path, file) + train_path = train_data_path + "," + train_label_path + test_path = test_data_path + "," + test_label_path + return train_path, test_path, train_batch_num, test_batch_num + + for i in range(worker_num): + flId = "f"+str(i) + user = users[i] + train_path, test_path = "", "" + train_path, test_path, _, _= get_client_data_path(data_root_path, user) + print("===========================") + print("fl id: ", flId) + print("train path: ", train_path) + print("test path: ", test_path) + + cmd_client = "execute_path=$(pwd) && self_path=$(dirname \"${script_self}\") && " + cmd_client += "rm -rf ${execute_path}/client_" + str(i) + "/ &&" + cmd_client += "mkdir ${execute_path}/client_" + str(i) + "/ &&" + cmd_client += "cd ${execute_path}/client_" + str(i) + "/ || exit &&" + + cmd_client += "java -jar " + cmd_client += jarPath + " " + cmd_client += train_path + " " + cmd_client += vocal_file + " " + cmd_client += ids_file + " " + cmd_client += test_path + " " + cmd_client += flName + " " + cmd_client += train_model_path + "lenet_train" + str(i) + ".ms" + " " + print("model path: ", train_model_path + "lenet_train" + str(i) + ".ms" + " ") + cmd_client += infer_model_path + "lenet_train" + str(i) + ".ms" + " " + print("model path: ", infer_model_path + "lenet_train" + str(i) + ".ms" + " ") + cmd_client += flId + " " + cmd_client += ip + " " + cmd_client += ssl + " " + cmd_client += str(port) + " " + cmd_client += use_elb + " " + cmd_client += str(server_num) + " " + cmd_client += task + " " + cmd_client += " > client" + ".log 2>&1 &" + subprocess.call(['bash', '-c', cmd_client]) + + ``` + + run.py脚本中入参含义如下,可根据实际情况进行设置: + + 以下涉及路径的必须给出绝对路径 + + - **`--jarPath`** + + 设置联邦学习jar包路径,x86环境联邦学习jar包获取可参考[FL-Client部署教程中编译出包流程](https://gitee.com/mindspore/docs/blob/master/docs/mindfl/docs/source_zh_cn/deploy_fl_client.md)。 + + - **`--train_dataset`** + + 训练数据集root路径, 情感分类任务在该root路径中存放的是每个客户端的训练数据(txt文件格式);lenet图片分类任务在该root路径中存放的是每个客户端的训练data.bin文件与label.bin文件,例如`leaf-master/data/femnist/3500_clients_bin/`。 + + - **`--test_dataset`** + + 测试数据集路径 , lenet图片分类任务不需要设置该参数,默认为null;情感分类任务不设置该参数代表训练过程中不进行验证。 + + - **`--vocal_file`** + + 设置数据预处理的词典文件路径,lenet网络设置为null,情感分类任务给出实际路径,必须为绝对路径。 + + - **`--ids_file`** + + 设置词典的映射id文件路径,lenet网络设置为null,情感分类任务给出实际路径,必须为绝对路径。 + + - **`--flName`** + + 设置联邦学习模型名称,目前只支持`lenet`(采用lenet网络进行图片分类任务)和`adbert`(采用albert网络进行情感分类任务)。 + + - **`--train_model_path`** + + 设置联邦学习使用的训练模型路径,为上面教程中拷贝的多份.ms文件所存放的目录,比如`ms/lenet`,必须为绝对路径。 + + - **`--infer_model_path`** + + 联邦学习使用的推理模型路径,为.ms格式的模型文件的绝对路径, 情感分类任务必须设置;lenet图片分类任务可设置为与train_model_path相同。 + + - **`--ip`** + + 设置ip地址 , 即启动server端的服务器地址, 格式为:http://10.113.216.106:。 + + - **`--ssl`** + + 设置端云通信是否进行ssl证书认证,默认不进行。 + + - **`--port`** + + 设置端口号,与启动server端时的`fl_server_port`参数保持一致, 格式为: 6668。 + + - **`--server_num`** + + 设置server数量,与启动server端时的`server_num`参数保持一致,用于模拟客户端随机选择不同的server发送信息,真实场景不需要此参数。 + + - **`--worker_num`** + + 设置client数量, 与启动server端时的`start_fl_job_cnt`保持一致,真实场景不需要此参数。 + + - **`--use_elb`** + + 用于多server场景,为true代表客户端每个round的请求都采用指定范围内的随机端口,false则采用固定端口。默认为false,当启动server端的`server_num`大于1时,该参数需设置成true。用于模拟客户端随机选择不同的server发送信息,真实场景不需要此参数。 + + - **`--task`** + + 用于设置本此启动的任务类型,为`train`代表启动训练任务,为`inference`代表启动多条数据推理任务, 为`getModel`代表启动获取云侧模型的任务, 设置其他字符串代表启动单条数据推理任务。默认为`train`。由于初始的模型文件(.ms文件)是未训练过的,建议先启动训练任务,待训练完成之后,再启动推理任务(注意两次启动的`worker_num`保持一致,以保证`inference`使用的模型文件与`train`保持一致)。 + +2. **为客户端准备好模型文件** + + 由于真实场景一个客户端包含一个.ms格式的模型文件,在模拟场景中,需要拷贝多份.ms文件,并按照`lenet_train{i}.ms`格式进行命名。其中i代表客户端编号,由于`run.py`中脚本的设置,需要设置为`0, 1, 2, 3, 4, 5 .....`等数字。每个客户端各使用一份.ms文件。 + + 可参考下面脚步,对原始.ms文件进行拷贝和命名: + + ```python + import shutil + import os + + def copy_file(raw_path,new_path,copy_num): + # Copy the specified number of files from the raw path to the new path + for i in range(copy_num): + file_name = "lenet_train" + str(i) + ".ms" + new_file_path = os.path.join(new_path, file_name) + shutil.copy(raw_path ,new_file_path) + print('====== copying ',i, ' file ======') + print("the number of copy .ms files: ", len(os.listdir(new_path))) + + if __name__ == "__main__": + raw_path = "lenet_train.ms" + new_path = "ms/lenet" + num = 5 + copy_file(raw_path, new_path, num) + ``` + + 其中`raw_path`设置原始.ms文件路径,`new_path`设置拷贝的.ms文件需要放置的路径,`num`设置拷贝的份数,一般需要模拟启动客户端的数量。 + + 比如以上脚步中设置,在路径`ms/lenet`中生成了供5个客户端使用的.ms文件,其目录结构如下: + + ```sh + ms/lenet + ├── lenet_train0.ms # 客户端0使用的.ms文件 + ├── lenet_train1.ms # 客户端1使用的.ms文件 + ├── lenet_train2.ms # 客户端2使用的.ms文件 + ├── lenet_train3.ms # 客户端3使用的.ms文件 + └── lenet_train4.ms # 客户端4使用的.ms文件 + ``` + +3. **启动客户端** + + 运行`run.py`, 指令如下: + + ```sh + python run.py --ip=http://10.113.216.106: --port=6668 --server_num=8 --worker_num=5 --task=train + ``` + + 该指令代表启动5个客户端参与联邦学习,若启动成功,会在当前文件夹生成5个客户端对应的日志文件,查看日志文件内容可了解每个客户端的运行情况: + + ```sh + ./ + ├── client_0 + │ └── client.log # 客户端0的日志文件 + ├── client_1 + │ └── client.log # 客户端1的日志文件 + ├── client_2 + │ └── client.log # 客户端2的日志文件 + ├── client_3 + │ └── client.log # 客户端3的日志文件 + └── lenet_train4.ms + └── client.log # 客户端4的日志文件 + ``` + +4. **关闭客户端进程** + + 可参考`finish.py`脚本,具体如下: + + ```python + import os + import argparse + import subprocess + + parser = argparse.ArgumentParser(description="Finish test_mobile_lenet.py case") + parser.add_argument("--kill_tag", type=str, default="mindspore-lite-java-flclient") + + args, _ = parser.parse_known_args() + kill_tag = args.kill_tag + + cmd = "pid=`ps -ef|grep " + kill_tag + cmd += " |grep -v \"grep\" | grep -v \"finish\" |awk '{print $2}'` && " + cmd += "for id in $pid; do kill -9 $id && echo \"killed $id\"; done" + + subprocess.call(['bash', '-c', cmd]) + ``` + + 关闭客户端指令如下: + + ```sh + python finish.py --kill_tag=mindspore-lite-java-flclient + ``` + + 其中参数`--kill_tag`用于搜索该关键字对客户端进程进行kill,只需要设置`--jarPath`中的特殊关键字即可。默认为`mindspore-lite-java-flclient`,即联邦学习jar包名。 + + 假设启动了5个客户端,每个客户端包含一个Python进程和一个java进程,关闭成功会有以下打印: + + ```sh + killed 56427 + killed 56432 + killed 56435 + killed 56444 + killed 56449 + killed 56451 + killed 56452 + killed 56461 + killed 56465 + killed 56474 + ``` + + 即有10个进程成功被kill。 + +5. **实验结果** + + 目前**`3500_clients_bin`** 文件夹中包含3500个客户端的数据,本脚本最多可模拟3500个客户端参与联邦学习。 + + 下图给出了50个客户端(设置`server_num`为16)进行联邦学习的测试集精度: + + ![lenet_50_clients_acc](images/lenet_50_clients_acc.png) + + 其中联邦学习总迭代数为100,客户端本地训练epoch数为20,batchSize设置为32。 + + 图中测试精度指对于每个联邦学习迭代,各客户端测试集在云侧聚合后的模型上的精度。 + + AVG:对于每个联邦学习迭代,50个客户端测试集精度的平均值。 + + TOP5:对于每个联邦学习迭代,测试集精度最高的5个客户端的精度平均值。 + + LOW5:对于每个联邦学习迭代,测试集精度最低的5个客户端的精度平均值。 \ No newline at end of file diff --git a/docs/mindfl/docs/source_zh_cn/images/LeNet_5.jpg b/docs/mindfl/docs/source_zh_cn/images/LeNet_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7894b0e181d965c5e9cbba91fe240c1890d37bda Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/images/LeNet_5.jpg differ diff --git a/docs/mindfl/docs/source_zh_cn/images/MindFL-Networking.png b/docs/mindfl/docs/source_zh_cn/images/MindFL-Networking.png new file mode 100644 index 0000000000000000000000000000000000000000..cb43d016e916600287e545adb3f8355167831533 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/images/MindFL-Networking.png differ diff --git a/docs/mindfl/docs/source_zh_cn/images/MindFL-architecture.png b/docs/mindfl/docs/source_zh_cn/images/MindFL-architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..afab5c0156a860bc392a72248b43045226f376fc Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/images/MindFL-architecture.png differ diff --git a/docs/mindfl/docs/source_zh_cn/images/create_android_project.png b/docs/mindfl/docs/source_zh_cn/images/create_android_project.png new file mode 100644 index 0000000000000000000000000000000000000000..3c0e1ef0c00d27ca0abf1de363e60202c5fdc872 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/images/create_android_project.png differ diff --git a/docs/mindfl/docs/source_zh_cn/images/lenet_50_clients_acc.png b/docs/mindfl/docs/source_zh_cn/images/lenet_50_clients_acc.png new file mode 100644 index 0000000000000000000000000000000000000000..c1282811f7161d77ec2ea563d96983ef293dbf43 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/images/lenet_50_clients_acc.png differ diff --git a/docs/mindfl/docs/source_zh_cn/images/start_android_project.png b/docs/mindfl/docs/source_zh_cn/images/start_android_project.png new file mode 100644 index 0000000000000000000000000000000000000000..4c12def630d28ececea9d63fb9fa0042cc26bea9 Binary files /dev/null and b/docs/mindfl/docs/source_zh_cn/images/start_android_project.png differ diff --git a/docs/mindfl/docs/source_zh_cn/index.rst b/docs/mindfl/docs/source_zh_cn/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..f76bbfcc2498bb7a7b77b5fa415c8a4870fff2c4 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/index.rst @@ -0,0 +1,28 @@ +.. MindSpore documentation master file, created by + sphinx-quickstart on Thu Mar 24 11:00:00 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +MindSpore Federated Learning +================================== + +.. toctree:: + :maxdepth: 1 + :caption: 安装部署 + + fl_install + deploy_mind_fl_cluster + deploy_fl_client + +.. toctree:: + :maxdepth: 1 + :caption: 应用实践 + + image_classification_application + sentiment_classification_application + +.. toctree:: + :maxdepth: 1 + :caption: 模型安全和隐私 + + security_and_privacy_protection diff --git a/docs/mindfl/docs/source_zh_cn/security_and_privacy_protection.md b/docs/mindfl/docs/source_zh_cn/security_and_privacy_protection.md new file mode 100644 index 0000000000000000000000000000000000000000..1e1c6c3e71c71e61015f27ccd672752bb20f7930 --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/security_and_privacy_protection.md @@ -0,0 +1,76 @@ +# 模型安全与隐私 + +联邦学习过程中,用户数据仅用于本地设备训练,不需要上传至中心服务器,可以避免用户个人数据的直接泄露。 +然而传统联邦学习框架中,模型以明文形式上云,仍然存在间接泄露用户隐私的风险。 +敌手获取到用户上传的明文模型后,可以通过重构、模型逆向等攻击恢复用户的个人训练数据,导致用户隐私泄露。 +MindSpore FL(以下简称MindFL)联邦学习框架,提供了基于本地差分隐私(LDP)和基于多方安全计算(MPC)的安全聚合算法, +在本地模型上云前对其进行加噪或加扰。在保证模型可用性的前提下,解决横向联邦学习中的隐私泄露问题。 + +## 基于LDP的安全聚合 + +### 原理概述 + +差分隐私(differential privacy)是一种保护用户数据隐私的机制。**差分隐私定义**为: +$$ +Pr[\mathcal{K}(D)\in S] \le e^{\epsilon} Pr[\mathcal{K}(D’) \in S]+\delta​ +$$ +对于两个差别只有一条记录的数据集$D, D’$,通过随机算法$\mathcal{K}$,输出结果为集合$S$子集的概率满足上面公式。$\epsilon$为差分隐私预算,$\delta$ 为扰动,$\epsilon$和$\delta$越小,说明$\mathcal{K}$在$D$和$D’$上输出的数据分布越接近。 + +在横向联邦学习中,假设客户端本地训练之后的模型权重矩阵是$W$,由于模型在训练过程中会“记住”训练集的特征,所以敌手可以借助$W$还原出用户的训练数据集[1]。MindFL提供基于本地差分隐私的安全聚合算法,防止本地模型上云时泄露隐私数据。MindFL客户端会生成一个与本地模型$W$相同维度的差分噪声矩阵$G$,然后将二者相加,得到一个满足差分隐私定义的权重$W_p$: +$$ +W_p=W+G +$$ +MindFL客户端将加噪后的模型$W_p$上传至云侧服务器进行联邦聚合。噪声矩阵$G$相当于给原模型加上了一层掩码,在降低模型泄露敏感数据风险的同时,也会影响模型训练的收敛性。如何在模型隐私性和可用性之间取得更好的平衡,仍然是一个值得研究的问题。实验表明,当参与方的数量$n$足够大时(一般指1000以上),大部分噪声能够相互抵消,本地差分机制对聚合模型的精度和收敛性没有明显影响。 + +### 使用方式 + +开启差分隐私训练的方式很简单,只需要在启动云侧服务时, +使用`context.set_fl_context()`设置`encrypt_type='DP_ENCRYPT'`即可。 +此外,为了控制隐私保护的效果,我们还提供了3个参数:`dp_eps`,`dp_delta`以及`dp_norm_clip`, +它们也是通过`context.set_fl_context()`设置。`dp_eps`和`dp_norm_clip`的合法取值范围是大于0, +`dp_delta`的合法取值范围是0<`dp_delta`<1。一般来说,`dp_eps`和`dp_delta`越小,隐私保护效果也越好, +但是对模型收敛性的影响越大。建议`dp_delta`取成客户端数量的倒数,`dp_eps`大于50, +`dp_norm_clip`是差分隐私机制对模型权重加噪前对权重大小的调整系数,会影响模型的收敛性,一般建议取0.5~2。 + +## 基于MPC的安全聚合 + +### 原理概述 + +尽管差分隐私技术可以适当保护用户数据隐私,但是当参与客户端数量比较少或者高斯噪声幅值较大时, +模型精度会受较大影响。为了同时满足模型保护和模型收敛这两个要求,我们提供了基于MPC的安全聚合方案。 +在这种训练模式下,假设参与的客户端集合为$U$,对于任意FL-Client $u$和$v$, +它们会两两协商出一对随机扰动$p_{uv}$、$p_{vu}$,满足 +$$ +p_{uv}=\begin{cases} -p_{vu}, &u{\neq}v\\\\ 0, &u=v \end{cases} +$$ +于是每个FL-Client $u$ 在上传模型至Server前,会在原模型权重$x_u$加上它与其它用户协商的扰动: +$$ +x_{encrypt}=x_u+\sum\limits_{v{\in}U}p_{uv} +$$ +从而FL-Server聚合结果$\overline{x}$为: +$$ +\begin{align} +\overline{x}&=\sum\limits_{u{\in}U}(x_{u}+\sum\limits_{v{\in}U}p_{uv})\\\\ +&=\sum\limits_{u{\in}U}x_{u}+\sum\limits_{u{\in}U}\sum\limits_{v{\in}U}p_{uv}\\\\ +&=\sum\limits_{u{\in}U}x_{u} +\end{align} +$$ +上面的过程只是介绍了聚合算法的主要思想,基于MPC的聚合方案是精度无损的,代价是通讯轮次的增加。 +如果您对算法的具体步骤感兴趣,可以参考原论文[2]。 + +### 使用方式 + +与开启差分隐私训练相似,我们只需要在`context.set_fl_context()`中设置`encrypt_type='PW_ENCRYPT'`即可。 +此外,与安全聚合训练相关的云侧环境参数还有`cipher_client_num`、`share_client_ratio`和`cipher_t`。 +`cipher_client_num`指代参与安全聚合的用户数量(即参与公钥交换的客户端数量),取值需要小于等于参与联邦学习的客户端数量; +`share_client_ratio`指代参与密钥碎片分享的客户端数量与`cipher_client_num`的比例,取值需要小于等于1; +`cipher_t`指代参与密钥碎片恢复的客户端数量,取值需要小于参与密钥碎片分享的客户端数量。 +通常为了保证系统安全,当不考虑server和client合谋的情况下,`cipher_t`需要大于联邦学习客户端数量的一半; +当考虑Server和Client合谋,`cipher_t`需要大于联邦学习客户端数量的2/3。 + +### 参考文献 + +[1] Ligeng Zhu, Zhijian Liu, and Song Han. [Deep Leakage from Gradients](http://arxiv.org/pdf/1906.08935.pdf). NeurIPS, 2019. + +[2] Keith Bonawitz, Vladimir Ivanov, Ben Kreuter, et al. [Practical Secure Aggregationfor Privacy-Preserving Machine Learning](https://dl.acm.org/doi/pdf/10.1145/3133956.3133982). NeurIPS, 2016. + diff --git a/docs/mindfl/docs/source_zh_cn/sentiment_classification_application.md b/docs/mindfl/docs/source_zh_cn/sentiment_classification_application.md new file mode 100644 index 0000000000000000000000000000000000000000..4a8378c73f054839421151796af4071d4bcd953a --- /dev/null +++ b/docs/mindfl/docs/source_zh_cn/sentiment_classification_application.md @@ -0,0 +1,424 @@ +# 实现一个情感分类应用(Android) + +## 概述 + +在隐私合规场景下,通过端云协同的联邦学习建模方式,可以充分发挥端侧数据的优势,避免用户敏感数据直接上报云侧。在联邦学习应用场景的探索中,输入法场景引起了我们的注意。由于用户在使用输入法时对自己的文字隐私十分看重,并且输入法上的智慧功能也是用户非常需要的。因此,联邦学习天然适用在输入法场景中。MindFL将联邦语言模型应用到了输入法的表情图片预测功能中。联邦语言模型会根据聊天文本数据推荐出适合当前语境的表情图片。在使用联邦学习建模时,每一张表情图片会被定义为一个情感标签类别,而每个聊天短语会对应一个表情图片。MindFL将表情图片预测任务定义为联邦情感分类任务。 + +## 准备环节 + +### 环境 + +参考:[服务端环境配置](./deploy_mind_fl_cluster.md)和[客户端环境配置](./deploy_fl_client.md)。 + +### 数据 + +[用于训练的数据](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/train.tar.gz)包含100个用户聊天文件,其目录结构如下: + +```text +mobile/datasets/train/ +├── 0.tsv # 用户0的训练数据 +├── 1.tsv # 用户1的训练数据 +│ +│ ...... +│ +└── 99.tsv # 用户99的训练数据 +``` + +[用于验证的数据](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/eval.tar.gz)包含1个聊天文件,其目录结构如下: + +```text +mobile/datasets/eval/ +├── 0.tsv # 验证数据 +``` + +[标签对应的表情图片数据](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/memo.tar.gz)包含107个图片,其目录结构如下: + +```text +mobile/datasets/memo/ +├── 0.gif # 第0个标签对应的表情图片 +├── 1.gif # 第1个标签对应的表情图片 +│ +│ ...... +│ +└── 106.gif # 第106个标签对应的表情图片 +``` + +### 模型相关文件 + +生成模型需要的起始的[checkpoint](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/models/albert_init.ckpt)和[词典](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/vocab.txt)的目录结构如下: + +```text +mobile/models/ +├── albert_init.ckpt # 起始的checkpoint +└── vocab.txt # 词典 +``` + +## 定义网络 + +联邦学习中的语言模型使用ALBERT模型[1]。客户端上的ALBERT模型包括:embedding层、encoder层和classifier层。 + +具体网络定义请参考[源码](https://gitee.com/mindspore/mindspore/tree/master/tests/st/fl/mobile/src/model.py)。 + +### 生成端侧模型文件 + +#### 将模型导出为MindIR格式文件 + +代码如下: + +```python +import numpy as np +from mindspore import export, Tensor +from src.config import train_cfg, client_net_cfg +from src.cell_wrapper import NetworkTrainCell + +# 构建模型 +client_network_train_cell = NetworkTrainCell(client_net_cfg) + +# 构建输入数据 +input_ids = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.seq_length), dtype=np.int32)) +attention_mask = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.seq_length), dtype=np.int32)) +token_type_ids = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.seq_length), dtype=np.int32)) +label_ids = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.num_labels), dtype=np.int32)) + +# 导出模型 +export(client_network_train_cell, input_ids, attention_mask, token_type_ids, label_ids, file_name='albert_train.mindir', file_format='MINDIR') +``` + +#### 将MindIR文件转化为联邦学习端侧框架可用的ms文件 + +参考[实现一个图像分类应用](./image_classification_application.md)中生成端侧模型文件部分。 + +## 启动联邦学习流程 + +首先在服务端启动脚本:参考[云端部署方式](./deploy_mind_fl_cluster.md) + +以ALBERT模型的训练与推理任务为基础,整体流程为: + +1. Android新建工程 + +2. 编译MindSpore Lite AAR包 + +3. Android实例程序结构说明 + +4. 编写代码 + +5. Android工程配置依赖项 + +6. Android构建与运行 + +### Android新建工程 + +在Android Studio中新建项目工程,并安装相应的SDK(指定SDK版本后,由Android Studio自动安装)。 + +![新建工程](./images/create_android_project.png) + +### 编译MindSpore Lite AAR包 + +- 请参考[联邦学习部署](./deploy_fl_client.md) + +- 生成的Android AAR包名称: + + ```sh + mindspore-lite-.aar + ``` + +- 把AAR包放置安卓工程的app/libs/目录下 + +### Android实例程序结构说明 + +```text +app +│ ├── libs # Android库项目的二进制归档文件 +| | └── mindspore-lite-version.aar # MindSpore Lite针对Android版本的归档文件 +├── src/main +│ ├── assets # 资源目录 +| | └── model # 模型目录 +| | └── albert_ad_train.mindir.ms # 存放的预训练模型文件 +│ | └── albert_ad_infer.mindir.ms # 存放的推理模型文件 +│ | └── data # 数据目录 +| | └── 140.txt # 模型数据文件 +| | └── vocab.txt # 词典文件 +| | └── vocab_map_ids.txt # 词典ID映射文件 +| | └── eval.txt # 训练结果评估文件 +| | └── eval_no_label.txt # 推理数据文件 +│ | +│ ├── java # java层应用代码 +│ │ └── ... 存放Android代码文件,相关目录可以自定义 +│ │ +│ ├── res # 存放Android相关的资源文件 +│ └── AndroidManifest.xml # Android配置文件 +│ +│ +├── build.gradle # Android工程构建配置文件 +├── download.gradle # 工程依赖文件下载 +└── ... +``` + +### 编写代码 + +1. AssetCopyer.java:该代码文件作用是把Android工程的app/src/main/assets目录下的资源文件存放到Android系统的磁盘中,以便在模型训练与推理时联邦学习框架的接口能够根据绝对路径读取到资源文件。 + + ```java + import android.content.Context; + + import java.io.File; + import java.io.FileOutputStream; + import java.io.InputStream; + import java.util.logging.Logger; + + public class AssetCopyer { + private static final Logger LOGGER = Logger.getLogger(AssetCopyer.class.toString()); + public static void copyAllAssets(Context context,String destination) { + LOGGER.info("destination: " + destination); + copyAssetsToDst(context,"",destination); + } + // copy assets目录下面的资源文件到Android系统的磁盘中,具体的路径可打印destination查看 + private static void copyAssetsToDst(Context context,String srcPath, String dstPath) { + try { + // 递归获取assets目录的所有的文件名 + String[] fileNames =context.getAssets().list(srcPath); + if (fileNames.length > 0) { + // 构建目标file对象 + File file = new File(dstPath); + //创建目标目录 + file.mkdirs(); + for (String fileName : fileNames) { + // copy文件到指定的磁盘 + if(!srcPath.equals("")) { + copyAssetsToDst(context,srcPath + "/" + fileName,dstPath+"/"+fileName); + }else{ + copyAssetsToDst(context, fileName,dstPath+"/"+fileName); + } + } + } else { + // 构建源文件的输入流 + InputStream is = context.getAssets().open(srcPath); + // 构建目标文件的输出流 + FileOutputStream fos = new FileOutputStream(new File(dstPath)); + // 定义1024大小的缓冲数组 + byte[] buffer = new byte[1024]; + int byteCount=0; + // 源文件写到目标文件 + while((byteCount=is.read(buffer))!=-1) { + fos.write(buffer, 0, byteCount); + } + // 刷新输出流 + fos.flush(); + // 关闭输入流 + is.close(); + // 关闭输出流 + fos.close(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + } + ``` + +2. FlJob.java:该代码文件作用是定义训练与推理任务的内容,具体的联邦学习接口含义请参考[联邦学习接口介绍](./interface_description_fl_client.md) + + ```java + import android.annotation.SuppressLint; + import android.os.Build; + + import androidx.annotation.RequiresApi; + + import com.huawei.flAndroid.utils.AssetCopyer; + import com.huawei.flclient.FLParameter; + import com.huawei.flclient.SyncFLJob; + + import java.util.Arrays; + import java.util.UUID; + import java.util.logging.Logger; + + public class FlJob { + private static final Logger LOGGER = Logger.getLogger(AssetCopyer.class.toString()); + private final String parentPath; + public FlJob(String parentPath) { + this.parentPath = parentPath; + } + // Android的联邦学习训练任务 + @SuppressLint("NewApi") + @RequiresApi(api = Build.VERSION_CODES.M) + public void syncJobTrain() { + String trainDataset = parentPath + "/data/140.txt"; + String vocal_file = parentPath + "/data/vocab.txt"; + String idsFile = parentPath + "/data/vocab_map_ids.txt"; + String testDataset = parentPath + "/data/eval.txt"; + String trainModelPath = parentPath + "/model/albert_ad_train.mindir.ms"; + String inferModelPath = parentPath + "/model/albert_ad_infer.mindir.ms"; + String flName = "adbert"; + // server ip address,请保证Android能够访问到server,否则会出现connection failed + String ip = "http://127.0.0.1:"; + int port = 6668; + String clientID = UUID.randomUUID().toString(); + boolean useSSL = false; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setTrainDataset(trainDataset); + flParameter.setVocabFile(vocal_file); + flParameter.setIdsFile(idsFile); + flParameter.setTestDataset(testDataset); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setClientID(clientID); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseSSL(useSSL); + + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.flJobRun(); + } + // Android的联邦学习推理任务 + public void syncJobPredict() { + String flName = "adbert"; + String dataPath = parentPath + "/data/eval_no_label.txt"; + String vocal_file = parentPath + "/data/vocab.txt"; + String idsFile = parentPath + "/data/vocab_map_ids.txt"; + String modelPath = parentPath + "/model/albert_ad_infer.mindir.ms"; + SyncFLJob syncFLJob = new SyncFLJob(); + int[] labels = syncFLJob.modelInference(flName, dataPath, vocal_file, idsFile, modelPath); + LOGGER.info("labels = " + Arrays.toString(labels)); + } + } + + ``` + +3. MainActivity.java:该代码文件作用是启动联邦学习训练与推理任务 + + ```java + import android.os.Build; + import android.os.Bundle; + + import androidx.annotation.RequiresApi; + import androidx.appcompat.app.AppCompatActivity; + + import com.huawei.flAndroid.job.FlJob; + import com.huawei.flAndroid.utils.AssetCopyer; + + @RequiresApi(api = Build.VERSION_CODES.P) + public class MainActivity extends AppCompatActivity { + private String parentPath; + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + // 获取该应用程序在Android系统中的磁盘路径 + this.parentPath = this.getExternalFilesDir(null).getAbsolutePath(); + // copy assets目录下面的资源文件到Android系统的磁盘中 + AssetCopyer.copyAllAssets(this.getApplicationContext(), parentPath); + // 新建一个线程,启动联邦学习训练与推理任务 + new Thread(() -> { + FlJob flJob = new FlJob(parentPath); + + flJob.syncJobTrain(); + flJob.syncJobPredict(); + }).start(); + } + } + ``` + +### Android工程配置依赖项 + +1. AndroidManifest.xml + + ```xml + + + + + + + + + + + + + + + ``` + +2. app/build.gradle + + ```text + plugins { + id 'com.android.application' + } + android { + // Android SDK的编译版本,建议大于27 + compileSdkVersion 30 + buildToolsVersion "30.0.3" + defaultConfig { + applicationId "com.huawei.flAndroid" + minSdkVersion 27 + targetSdkVersion 30 + versionCode 1 + versionName "1.0" + multiDexEnabled true + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + ndk { + // 不同的手机型号,对应ndk不相同,本人使用的mate20手机是'armeabi-v7a' + abiFilters 'armeabi-v7a' + } + } + //指定ndk版本 + ndkVersion '21.3.6528147' + sourceSets{ + main { + // 指定jni目录 + jniLibs.srcDirs = ['libs'] + jni.srcDirs = [] + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + } + dependencies { + //指定扫描libs目录下的AAR包 + implementation fileTree(dir:'libs',include:['*.aar']) + implementation 'androidx.appcompat:appcompat:1.1.0' + implementation 'com.google.android.material:material:1.1.0' + implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + androidTestImplementation 'androidx.test.ext:junit:1.1.1' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0' + implementation 'com.android.support:multidex:1.0.3' + } + ``` + +### Android构建与运行 + +1. 连接Android设备,运行联邦学习训练与推理应用程序。通过USB连接Android设备调试,点击`Run 'app'`即可在你的设备上运行联邦学习任务。 + + ![run_app](./images/start_android_project.png) + +2. Android Studio连接设备调试操作,可参考。手机需开启“USB调试模式”,Android Studio才能识别到手机。 华为手机一般在`设置->系统和更新->开发人员选项->USB调试`中打开“USB调试模式”。 + +3. 在Android设备上,点击“继续安装”,安装完即可在APP启动之后执行ALBERT模型的联邦学习的训练与推理任务。 + +4. 程序运行结果如下: + + ```text + I/SyncFLJob: [model inference] inference finish + I/SyncFLJob: labels = [2, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4] + ``` + +## 实验结果 + +联邦学习总迭代数为5,客户端本地训练epoch数为10,batchSize设置为16。 + +| | Top1精度 | Top5精度 | +| ------ | -------- | -------- | +| ALBERT | 24% | 70% | + +## 参考文献 + +[1] Lan Z , Chen M , Goodman S , et al. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations[J]. 2019. diff --git a/docs/mindfl/requirements.txt b/docs/mindfl/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea17a9e73613ddd99cc31690ddcf283d9a721450 --- /dev/null +++ b/docs/mindfl/requirements.txt @@ -0,0 +1,5 @@ +sphinx >= 2.2.1, <= 2.4.4 +recommonmark +sphinx-markdown-tables +sphinx_rtd_theme +jieba \ No newline at end of file diff --git a/docs/mindfl/summarize_fl.md b/docs/mindfl/summarize_fl.md new file mode 100644 index 0000000000000000000000000000000000000000..f1f6daaf561c2bfdc3c38fa0af0f8c95d7e2f65e --- /dev/null +++ b/docs/mindfl/summarize_fl.md @@ -0,0 +1,95 @@ +# 概述 + +`Linux` `Windows` `联邦学习` `分布式应用` `中级` `高级` `贡献者` + + + +MindSpore FL是一款开源联邦学习框架,支持千万级无状态终端设备商用化部署,在用户数据留存在本地的情况下,使能全场景智能应用。 + +联邦学习是一种加密的分布式机器学习技术,它是指参与联邦学习的各用户在不共享本地数据的前提下共建AI模型。MindFL优先专注于大规模参与方的横向联邦的应用场景。 + +## MindFL 框架优势 + +- 隐私安全 + + MindFL框架数据留存本地进行训练,不交换数据本身,而是用加密方式交换更新的模型参数。 + + 支持基于多方安全计算(MPC)的精度无损的安全聚合方案防止模型窃取。 + + 支持基于本地差分隐私的性能无损的加密方案防止模型泄漏隐私数据。 + +- 分布式联邦聚合 + + 云侧松耦合集群化处理方式,支持千万级大规模异构终端部署场景,实现高性能、高可用的分布式联邦聚合计算,可应对网络不稳定,负载突变等。 + +- 联邦效率提升 + + 支持同步和异步的联邦模式,支持多种模型压缩算法,提高联邦学习效率,节省带宽资源。 + + 支持多种联邦聚合策略,提高联邦收敛的平滑度,兼顾全局和局部的精度最优化。 + +- 灵活易用 + + 仅一行代码即可切换单机训练与联邦学习模式 + + 网络模型可编程,聚合算法可编程,安全算法可编程,安全等级可定制。 + +## MindFL 组网架构 + +MindFL采用松耦合组网模式,应对大规模、无状态、不可靠的异构设备的联邦学习任务。 + +![](./docs/source_zh_cn/images/MindFL-Networking.png) + +FL-Scheduler:联邦学习调度器,与FL-Server保持TCP长链接,通过心跳完成FL-Server node的组网结构,并负责管理面任务的下发。 + +FL-Server:联邦学习服务器,FL-Server集群对外暴露唯一地址,内部根据负载均衡策略将FL-Client请求路由到各FL-Server node,实现联邦学习服务化,解决大规模不稳定FL-Client的接入。集群内部保证集群事务弱一致性,并完成联邦聚合算法的分布式计算。使得FL-Client在任何时刻访问任意FL-Server,都能获得训练所需的全量数据。 + +FL-Client:联邦学习客户端,负责本地数据训练以及作为https的客户端与FL-Server交互。 + +## MindFL 总体架构 + +MindSpore FL 分为客户端模块和服务器模块两个部分,其框架的总体架构如下所示: + +![architecture](./docs/source_zh_cn/images/MindFL-architecture.png) + +- MindFL-Server模块: + + - **Federated Job Pipeline:** 联邦学习任务执行、弹性扩缩、容错容灾的主控流程。 + + - **Worker Manager:** 设备管理相关逻辑。 + + - **Aggregator、Optimizer:** 联邦学习在中心侧的聚合和优化逻辑单元,包括多server node间的分布式聚合处理。 + + - **Metrics Manager:** 训练训练效果评估模块,用于判断训练效果和模型收敛。 + + - **Armour:** 安全处理模块,包括多方安全计算等模型加解密策略。 + + - **Protocol:** 联邦学习中的端云交互协议。 + + - **Communication:** 用于联邦学习任务的通信组件。 + + - **Compute Resources:** 用于联邦学习中心侧的硬件计算资源。 + +- MindFL-Client模块: + + - **Federated Job Pipeline:** 端侧联邦学习任务执行的主控逻辑,包括学习策略、同步/异步端云交互。 + + - **Training & Inference:** 轻量化的端侧训练和推理的能力,包括runtime和高性能算子库。 + + - **Armour:** 用于端侧的安全处理模块,包括多方安全计算、本地差分隐私等模型加解密策略。 + + - **Communication:** 端侧用于联邦学习任务的通信组件。 + + - **Compute Resources:** 用于联邦学习端侧的硬件计算资源。 + +## 使用MindFL的工作流程 + +- 场景识别、选择模型:识别出使用联邦学习的场景,进行模型原型的选择或开发。 +- 客户端设置、模型分发:在端侧为联邦任务积累本地数据,并使用工具生成方便部署的端侧模型。 +- 应用部署:将FL-Client部署到端侧应用中,并在云侧设置FL-Plan和部署脚本。 + +## 场景体验 + +- 图像分类 + +- 文本分类 diff --git a/docs/note/source_en/operator_list_parallel.md b/docs/note/source_en/operator_list_parallel.md index 5e185e632080b01fffe5ac86b12da912536c28d6..4a9d5166d49327e8b7ffc11973c387ec99f23201 100644 --- a/docs/note/source_en/operator_list_parallel.md +++ b/docs/note/source_en/operator_list_parallel.md @@ -42,7 +42,7 @@ | [mindspore.ops.Cosh](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Cosh.html) | None | | [mindspore.ops.Div](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Div.html) | None | | [mindspore.ops.DivNoNan](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.DivNoNan.html) | None | -| [mindspore.ops.Dropout](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Dropout.html) | None | +| [mindspore.ops.Dropout](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Dropout.html) | None | | [mindspore.ops.DropoutDoMask](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.DropoutDoMask.html) | Need to be used in conjunction with `DropoutGenMask` | | [mindspore.ops.DropoutGenMask](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.DropoutGenMask.html) | Need to be used in conjunction with `DropoutDoMask`, configuring shard strategy is not supported. | | [mindspore.ops.Elu](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Elu.html) | None | @@ -57,6 +57,7 @@ | [mindspore.ops.FloorDiv](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.FloorDiv.html) | None | | [mindspore.ops.FloorMod](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.FloorMod.html) | None | | [mindspore.ops.Gather](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Gather.html) | Only support 1-dim and 2-dim parameters and the last dimension of the input_params should be 32-byte aligned; Scalar input_indices is not supported; Repeated calculation is not supported when the parameters are split in the dimension of the axis; Split input_indices and input_params at the same time is not supported. | +| [mindspore.ops.GatherNd](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.GatherNd.html) | The first input can't be split, and the last dimension of the second input can't be split; In auto_parallel mode, the strategy's searching algorithm can not use "recursive_programming". | | [mindspore.ops.GeLU](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.GeLU.html) | None | | [mindspore.ops.Greater](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Greater.html) | None | | [mindspore.ops.GreaterEqual](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.GreaterEqual.html) | None | @@ -93,6 +94,8 @@ | [mindspore.ops.Reshape](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Reshape.html) | Configuring shard strategy is not supported. In auto parallel mode, if multiple operators are followed by the reshape operator, different shard strategys are not allowed to be configured for these operators. | | [mindspore.ops.Round](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Round.html) | None | | [mindspore.ops.Rsqrt](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Rsqrt.html) | None | +| [mindspore.ops.ScatterUpdate](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.ScatterUpdate.html) | The first dimension of first input can not be split, the second input can not be split, and the first n dimensions (n is the dimension size of the second input) of the third input can not be split; In auto_parallel mode, the strategy's searching algorithm can not use "recursive_programming". | +| [mindspore.ops.Select](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Select.html) | In auto_parallel mode, the strategy's searching algorithm can not use "recursive_programming". | | [mindspore.ops.Sigmoid](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Sigmoid.html) | None | | [mindspore.ops.SigmoidCrossEntropyWithLogits](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.SigmoidCrossEntropyWithLogits.html) | None | | [mindspore.ops.Sign](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Sign.html) | None | @@ -123,4 +126,3 @@ | [mindspore.ops.ZerosLike](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.ZerosLike.html) | None | > Repeated calculation means that the device is not fully used. For example, the cluster has 8 devices to run distributed training, the splitting strategy only cuts the input into 4 copies. In this case, double counting will occur. -> diff --git a/docs/note/source_en/static_graph_syntax_support.md b/docs/note/source_en/static_graph_syntax_support.md index 19d7e373c0699ea164d8a590e5c2e63be2664f4e..1ca5963f3279546cacb77eb7dae21a663bb41945 100644 --- a/docs/note/source_en/static_graph_syntax_support.md +++ b/docs/note/source_en/static_graph_syntax_support.md @@ -355,7 +355,7 @@ For details about the rules, click In semi_auto_parallel mode, if a parameter is used by multiple operators, please ensure that the parameter layout in each operator is consistent, otherwise an error will be reported during compilation. In the following example, mul1 and mul2 share the weight, but mul1 splits weight into 8 slices by row, however, mul2 splits the weight into 8 slices by column. The layout of weight in the two operators is inconsistent, compilation will be failed. + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import Tensor, Parameter +from mindspore.nn import Cell + +class Net(Cell): + """Net definition""" + def __init__(self): + super(Net, self).__init__() + self.mul1 = ops.Mul().shard(((8, 1), (8, 1))) + self.mul2 = ops.Mul().shard(((1, 8), (1, 8))) + self.weight = Parameter(Tensor(np.ones([16, 32]), dtype=ms.float32), "weight1") + + def construct(self, x): + out = self.mul1(x, self.weight) + out = self.mul2(out, self.weight) + return out +``` + #### all_reduce_fusion_config `all_reduce_fusion_config` allows users to customize the AllReduce segmentation policy by gradient aggregation. To reduce resource consumption and operator execution gaps, the framework fusions all the reverse gradient aggregation AllReduce operators into one by default. However, when the model is large, the iteration smearing time increases. You can set this parameter based on the actual network to manually tune and find the optimal segmentation policy by gradient aggregation. @@ -374,7 +397,7 @@ In `HYBRID_PARALLEL` mode, you need to manually split the model. You need to man The following is a code example: ```python -imoprt numpy as np +import numpy as np from mindspore import Parameter, Tensor x = Parameter(Tensor(np.ones([2, 2])), layerwise_parallel=True) diff --git a/docs/programming_guide/source_en/cache.md b/docs/programming_guide/source_en/cache.md index addfbcddc8d77659d19fdab35efdf6a7804e2fa6..fc3a898fb3eb145b6e3cdce1b10e4c66cd5ab36a 100644 --- a/docs/programming_guide/source_en/cache.md +++ b/docs/programming_guide/source_en/cache.md @@ -106,7 +106,7 @@ Currently, the cache service supports only single-node cache. That is, the clien Where, the table of Cache Server Configuration lists five detailed configuration information. Active sessions shows the list of active session ID in current server if any. - Cache server generates log files with filename "cache_server.\.\.log.\.\.\". + Cache server generates log files with filename "cache_server.\.\.log.\.\.\". Note that there might be masses of DEBUG logs printed to the screen when `GLOG_v=0` is set. > - To enable data spilling, you need to use `-s` to set spilling path when starting cache server. Otherwise, this feature is default to be disabled and it will bring up a memory-only cache server. diff --git a/docs/programming_guide/source_zh_cn/auto_parallel.md b/docs/programming_guide/source_zh_cn/auto_parallel.md index 9f5d4d0dd93c80eda06f9ffa43220f773e837d01..adb4c558f1cd3bf604794159c4bf26274156bec1 100644 --- a/docs/programming_guide/source_zh_cn/auto_parallel.md +++ b/docs/programming_guide/source_zh_cn/auto_parallel.md @@ -117,6 +117,29 @@ mul = ops.Mul().shard(((2, 1), (2, 1))) context.get_auto_parallel_context("parallel_mode") ``` +> 在semi_auto_parallel模式下,如果一个Parameter被多个算子共享,则需要保证该Parameter在每个算子中的排布都一致,否则构图将会失败。比如下面这个例子中,mul1和mul2共享权重weight,但mul1对weight按行切8份,而mul2对weight按列切8份,weight在两个算子中的排布不一致,构图将会失败: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import Tensor, Parameter +from mindspore.nn import Cell + +class Net(Cell): + """Net definition""" + def __init__(self): + super(Net, self).__init__() + self.mul1 = ops.Mul().shard(((8, 1), (8, 1))) + self.mul2 = ops.Mul().shard(((1, 8), (1, 8))) + self.weight = Parameter(Tensor(np.ones([16, 32]), dtype=ms.float32), "weight1") + + def construct(self, x): + out = self.mul1(x, self.weight) + out = self.mul2(out, self.weight) + return out +``` + #### all_reduce_fusion_config `all_reduce_fusion_config`可以让用户自定义梯度AllReduce融合切分策略。出于减少资源消耗及算子执行间隙的目的,框架默认将所有反向梯度聚合的AllReduce融合成一个算子运算,但当模型较大时,这会造成迭代拖尾耗时增加。用户可结合具体网络,通过设置该参数,手动调优找到性能最好的融合切分策略。 @@ -375,7 +398,7 @@ net = Net().set_comm_fusion(2) 代码样例如下: ```python -imoprt numpy as np +import numpy as np from mindspore import Parameter, Tensor x = Parameter(Tensor(np.ones([2, 2])), layerwise_parallel=True) diff --git a/docs/programming_guide/source_zh_cn/cache.ipynb b/docs/programming_guide/source_zh_cn/cache.ipynb index 480632c0a86e2a5c97aecc6e777a21b2fd00257e..42b1cab62142e6b24446168f484443c59298c858 100644 --- a/docs/programming_guide/source_zh_cn/cache.ipynb +++ b/docs/programming_guide/source_zh_cn/cache.ipynb @@ -157,6 +157,7 @@ "metadata": {}, "source": [ "`cache_admin`支持以下命令和参数: \n", + "\n", "- `--start`:启动缓存服务器,支持通过以下参数进行配置:\n", " - `--workers`或`-w`:设置缓存服务器的工作线程数量,默认情况下工作线程数量为机器CPU个数的一半。该参数需要根据NUMA架构来设置,若设置值不是机器中NUMA结点数的整数倍,则缓存服务器会对其进行自动调整。\n", " - `--spilldir`或`-s`:设置若缓存数据的大小超过内存空间,则溢出至磁盘的数据文件路径,默认为空(表示不启用数据溢出功能)。\n", @@ -213,7 +214,7 @@ "source": [ "其中,Cache Server Configuration表格分别列出了当前服务器的IP地址、端口号、工作线程数、日志等级、溢出路径等详细配置信息。Active sessions模块展示了当前服务器中已启用的session ID列表。\n", "\n", - "缓存服务器日志文件的命名格式为 \"cache_server.\\<主机名\\>.\\<用户名\\>.log.\\<日志等级\\>.\\<日期-时间\\>.\\<进程号\\>\"。\n", + "缓存服务器日志文件的命名格式为 \"cache_server.\\<主机名\\>.\\<用户名\\>.log.\\<日志等级\\>.\\<日期-时间\\>.\\<进程号\\>\"。当`GLOG_v=0`时,可能会屏显有大量DEBUG日志。\n", "\n", "> - 若要启用数据溢出功能,则用户在启动缓存服务器时必须使用`-s`参数对溢出路径进行设置,否则该功能默认关闭。" ] @@ -289,6 +290,7 @@ "metadata": {}, "source": [ "输出参数说明:\n", + "\n", "- `Session`: 缓存会话id。\n", "- `Cache Id`: 当前缓存会话中的cache实例id,`n/a`表示当前尚未创建缓存实例。\n", "- `Mem cached`: 缓存在内存中的数据量。\n", @@ -325,13 +327,14 @@ "metadata": {}, "source": [ "`DatasetCache`支持以下参数:\n", + "\n", "- `session_id`:缓存会话的id,通过`cache_admin -g`命令来创建并获取。\n", "- `size`:缓存最大内存空间占用,该参数以MB为单位,例如512GB的缓存空间应设置`size=524288`,默认为0。\n", "- `spilling`:当内存空间超出所设置的最大内存空间占用时,是否允许将剩余的数据溢出至磁盘,默认为False。\n", "- `hostname`:连接至缓存服务器的ip地址,默认为127.0.0.1。\n", "- `port`:连接至缓存服务器的端口号,默认为50052。\n", "- `num_connections`:建立的TCP/IP连接数,默认为12。\n", - "- `prefetch_size`:每次访问获取的行数,默认为20。\n", + "- `prefetch_size`:每次预取的数据行数,默认为20。\n", "\n", "> - 在实际使用中,通常应当首先使用`cache_admin -g`命令从缓存服务器处获得一个缓存会话id并作为`session_id`的参数,防止发生缓存会话不存在而报错的情况。\n", "> - 设置`size=0`代表不限制缓存所使用的内存空间,缓存服务器会根据系统的内存资源状况,自动控制缓存服务器的内存空间占用,使其不超过系统总内存的80%。\n", diff --git a/install/mindspore_ascend_install_docker.md b/install/mindspore_ascend_install_docker.md index c1d9e058d20bb7cd93c5f89fbac1e939b30bbe6d..d608466be9af674a1a078721cc790830745bd28e 100644 --- a/install/mindspore_ascend_install_docker.md +++ b/install/mindspore_ascend_install_docker.md @@ -156,9 +156,7 @@ print(ops.add(x, y)) 验证MindInsight安装: -1. 执行命令:```export PATH=/usr/local/python-3.7.5/bin:$PATH```。 - -2. 输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 +输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 ## 升级MindSpore版本 diff --git a/install/mindspore_ascend_install_docker_en.md b/install/mindspore_ascend_install_docker_en.md index d67434eda59f9f5ce86afba418a8ecb46ae9e1e1..cb6f5daf093a3eab708fd2e54a2f58feb2d0119b 100644 --- a/install/mindspore_ascend_install_docker_en.md +++ b/install/mindspore_ascend_install_docker_en.md @@ -156,9 +156,7 @@ It means MindSpore has been installed by docker successfully. If you need to verify the MindInsight installation: -1. Execute the command: ```export PATH=/usr/local/python-3.7.5/bin:$PATH```. - -2. Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. +Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. ## Version Update diff --git a/install/mindspore_gpu_install_conda.md b/install/mindspore_gpu_install_conda.md index e36e4bcccd43b19acef5b35754b7488cca22aa72..b773ae0989c351773eb82a9eea6d4c576feede01 100644 --- a/install/mindspore_gpu_install_conda.md +++ b/install/mindspore_gpu_install_conda.md @@ -24,9 +24,8 @@ - 确认安装Ubuntu 18.04是64位操作系统。 - 确认安装[GCC 7.3.0版本](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz)。 -- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)。 +- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)配套[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive) 或者 [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive)配套[cuDNN 8.0.X版本](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111)。 - CUDA安装后,若CUDA没有安装在默认位置,需要设置环境变量PATH(如:`export PATH=/usr/local/cuda-${version}/bin:$PATH`)和`LD_LIBRARY_PATH`(如:`export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`),详细安装后的设置可参考[CUDA安装手册](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)。 -- 确认安装[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive)。 - 确认安装[OpenMPI 4.0.3版本](https://www.open-mpi.org/faq/?category=building#easy-build)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[NCCL 2.7.6-1版本](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[TensorRT-7.2.2.3](https://developer.nvidia.com/nvidia-tensorrt-download)(可选,Serving推理需要)。 @@ -60,10 +59,18 @@ conda activate mindspore 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 +CUDA 10.1 版本: + ```bash pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` +CUDA 11.1 版本: + +```bash +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/ubuntu_x86/cuda-11.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + 其中: - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 diff --git a/install/mindspore_gpu_install_docker.md b/install/mindspore_gpu_install_docker.md index a4baad62121f1d0642caff07122ab6f02abe6ca0..49b3844f624c8c2537bbbdca373aaf8c0cdd844e 100644 --- a/install/mindspore_gpu_install_docker.md +++ b/install/mindspore_gpu_install_docker.md @@ -159,9 +159,7 @@ print(ops.add(x, y)) - 验证MindInsight安装: - 1. 执行命令:```export PATH=/usr/local/python-3.7.5/bin:$PATH```。 - - 2. 输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 + 输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 - 如果你安装的是`runtime`标签的容器,需要自行安装MindSpore。 diff --git a/install/mindspore_gpu_install_docker_en.md b/install/mindspore_gpu_install_docker_en.md index 6155251132b52d8e96fb15c9a0b4273e91423788..89cbcb31ab8fa825ab69770abec246245eaed9e3 100644 --- a/install/mindspore_gpu_install_docker_en.md +++ b/install/mindspore_gpu_install_docker_en.md @@ -159,9 +159,7 @@ It means MindSpore has been installed by docker successfully. - If you need to verify the MindInsight installation: - 1. Execute the command: ```export PATH=/usr/local/python-3.7.5/bin:$PATH```. - - 2. Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. + Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. - If you install a container with the label of `runtime`, you need to install MindSpore yourself. diff --git a/install/mindspore_gpu_install_pip.md b/install/mindspore_gpu_install_pip.md index 9597f998af04d758e46b04674e8ffa8f922eb196..0a5a9f99e08ec294f66a4347f9ee54d2a64776ca 100644 --- a/install/mindspore_gpu_install_pip.md +++ b/install/mindspore_gpu_install_pip.md @@ -23,9 +23,8 @@ - 确认安装64位操作系统,[glibc](https://www.gnu.org/software/libc/)>=2.17,其中Ubuntu 18.04是经过验证的。 - 确认安装[GCC 7.3.0版本](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz)。 -- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)。 +- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)配套[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive) 或者 [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive)配套[cuDNN 8.0.X版本](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111)。 - CUDA安装后,若CUDA没有安装在默认位置,需要设置环境变量PATH(如:`export PATH=/usr/local/cuda-${version}/bin:$PATH`)和`LD_LIBRARY_PATH`(如:`export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`),详细安装后的设置可参考[CUDA安装手册](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)。 -- 确认安装[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive)。 - 确认安装[OpenMPI 4.0.3版本](https://www.open-mpi.org/faq/?category=building#easy-build)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[NCCL 2.7.6-1版本](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[TensorRT-7.2.2.3](https://developer.nvidia.com/nvidia-tensorrt-download)(可选,Serving推理需要)。 @@ -37,10 +36,18 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 +CUDA 10.1 版本: + ```bash pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/linux_x86/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` +CUDA 11.1 版本: + +```bash +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/ubuntu_x86/cuda-11.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + 其中: - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 diff --git a/install/mindspore_gpu_install_pip_en.md b/install/mindspore_gpu_install_pip_en.md index 7046c605c71f92eb66aff29a05097e4f7a77bacd..3f9f5ff3f181873af68854da2c5be6836086de4a 100644 --- a/install/mindspore_gpu_install_pip_en.md +++ b/install/mindspore_gpu_install_pip_en.md @@ -21,9 +21,8 @@ This document describes how to quickly install MindSpore by pip in a Linux syste - Confirm that the 64-bit operating system is installed and the [glibc](https://www.gnu.org/software/libc/)>=2.17, where Ubuntu 18.04 is verified. - Confirm that [GCC 7.3.0](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz) is installed. -- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) is installed. +- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) with [cuDNN 7.6.X](https://developer.nvidia.com/rdp/cudnn-archive) or [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive) with [cuDNN 8.0.X](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111) is installed. - If CUDA is installed in a non-default path, after installing CUDA, environment variable `PATH`(e.g. `export PATH=/usr/local/cuda-${version}/bin:$PATH`) and `LD_LIBRARY_PATH`(e.g. `export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`) need to be set. Please refer to [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions) for detailed post installation actions. -- Confirm that [cuDNN 7.6.X](https://developer.nvidia.com/rdp/cudnn-archive) is installed. - Confirm that [OpenMPI 4.0.3](https://www.open-mpi.org/faq/?category=building#easy-build) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) - Confirm that [NCCL 2.7.6-1](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) - Confirm that [TensorRT-7.2.2.3](https://developer.nvidia.com/nvidia-tensorrt-download) is installed. (optional,required for Serving inference). @@ -35,10 +34,18 @@ This document describes how to quickly install MindSpore by pip in a Linux syste It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. +For CUDA 10.1: + ```bash pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/linux_x86/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` +For CUDA 11.1: + +```bash +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/ubuntu_x86/cuda-11.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + Of which, - When the network is connected, dependency items are automatically downloaded during .whl package installation. (For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)). In other cases, you need to manually install dependency items. diff --git a/install/mindspore_gpu_install_source.md b/install/mindspore_gpu_install_source.md index 03f18021a32958a43c527d03bcc663f304932718..bb2c158998bb7f9af996163c8c7db8d6e1844531 100644 --- a/install/mindspore_gpu_install_source.md +++ b/install/mindspore_gpu_install_source.md @@ -34,10 +34,9 @@ - 确认安装[Autoconf 2.69及以上版本](https://www.gnu.org/software/autoconf)(可使用系统自带版本)。 - 确认安装[Libtool 2.4.6-29.fc30及以上版本](https://www.gnu.org/software/libtool)(可使用系统自带版本)。 - 确认安装[Automake 1.15.1及以上版本](https://www.gnu.org/software/automake)(可使用系统自带版本)。 -- 确认安装[cuDNN 7.6及以上版本](https://developer.nvidia.com/rdp/cudnn-archive)。 - 确认安装[Flex 2.5.35及以上版本](https://github.com/westes/flex/)。 - 确认安装[wheel 0.32.0及以上版本](https://pypi.org/project/wheel/)。 -- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)按默认配置安装。 +- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)配套[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive) 或者 [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive)配套[cuDNN 8.0.X版本](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111)。 - CUDA安装后,若CUDA没有安装在默认位置,需要设置环境变量PATH(如:`export PATH=/usr/local/cuda-${version}/bin:$PATH`)和`LD_LIBRARY_PATH`(如:`export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`),详细安装后的设置可参考[CUDA安装手册](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)。 - 确认安装[OpenMPI 4.0.3版本](https://www.open-mpi.org/faq/?category=building#easy-build)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[NCCL 2.7.6-1版本](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian)(可选,单机多卡/多机多卡训练需要)。 diff --git a/install/mindspore_gpu_install_source_en.md b/install/mindspore_gpu_install_source_en.md index ce006e44c177fae412a6bc5b75a3f9dc6e88f274..a650483800c29a9c1ea1d19eb7050f78ea9dd1c9 100644 --- a/install/mindspore_gpu_install_source_en.md +++ b/install/mindspore_gpu_install_source_en.md @@ -31,10 +31,9 @@ This document describes how to quickly install MindSpore by source code in a Lin - Confirm that [Autoconf 2.69 or later](https://www.gnu.org/software/autoconf) is installed. (Default versions of these tools built in their systems are supported.) - Confirm that [Libtool 2.4.6-29.fc30 or later](https://www.gnu.org/software/libtool) is installed. (Default versions of these tools built in their systems are supported.) - Confirm that [Automake 1.15.1 or later](https://www.gnu.org/software/automake) is installed.(Default versions of these tools built in their systems are supported.) -- Confirm that [cuDNN 7.6 or later](https://developer.nvidia.com/rdp/cudnn-archive) is installed. - Confirm that [Flex 2.5.35 or later](https://github.com/westes/flex/) is installed. - Confirm that [wheel 0.32.0 or later](https://pypi.org/project/wheel/) is installed. -- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) is installed as default configuration. +- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) with [cuDNN 7.6.X](https://developer.nvidia.com/rdp/cudnn-archive) or [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive) with [cuDNN 8.0.X](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111) is installed as default configuration. - If CUDA is installed in a non-default path, after installing CUDA, environment variable `PATH`(e.g. `export PATH=/usr/local/cuda-${version}/bin:$PATH`) and `LD_LIBRARY_PATH`(e.g. `export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`) need to be set. Please refer to [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions) for detailed post installation actions. - Confirm that [OpenMPI 4.0.3](https://www.open-mpi.org/faq/?category=building#easy-build) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) - Confirm that [NCCL 2.7.6-1](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) diff --git a/resource/api_updates/nn_api_updates.md b/resource/api_updates/nn_api_updates.md index 2c9ea8e89de30dde3f777790c830f0b0726a7f03..efb0ce7b08f6917a6de3b918163581b0f451c062 100644 --- a/resource/api_updates/nn_api_updates.md +++ b/resource/api_updates/nn_api_updates.md @@ -6,6 +6,7 @@ Compared with the previous version, the added, deleted and supported platforms c |:----|:----|:----|:---- |[mindspore.nn.ForwardValueAndGrad](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.ForwardValueAndGrad.html#mindspore.nn.ForwardValueAndGrad)|New|r1.2: Ascend/GPU/CPU|Wrapper Functions |[mindspore.nn.TimeDistributed](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.TimeDistributed.html#mindspore.nn.TimeDistributed)|New|r1.2: Ascend/GPU/CPU|Wrapper Functions +|[mindspore.nn.SparseToDense](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.SparseToDense.html#mindspore.nn.SparseToDense)|New|r1.2: CPU|Utilities |[mindspore.nn.BatchNorm3d](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.BatchNorm3d.html#mindspore.nn.BatchNorm3d)|New|r1.2: Ascend/GPU/CPU|Normalization Layers |[mindspore.nn.InstanceNorm2d](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.InstanceNorm2d.html#mindspore.nn.InstanceNorm2d)|New|r1.2: GPU|Normalization Layers |[mindspore.nn.SyncBatchNorm](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.SyncBatchNorm.html#mindspore.nn.SyncBatchNorm)|New|r1.2: Ascend|Normalization Layers @@ -33,9 +34,11 @@ Compared with the previous version, the added, deleted and supported platforms c |[mindspore.nn.RMSProp](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.RMSProp.html#mindspore.nn.RMSProp)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Optimizer Functions |[mindspore.nn.GroupNorm](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.GroupNorm.html#mindspore.nn.GroupNorm)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Normalization Layers |[mindspore.nn.BatchNorm1d](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.BatchNorm1d.html#mindspore.nn.BatchNorm1d)|Changed|r1.1: Ascend/GPU => r1.2: Ascend|Normalization Layers +|[mindspore.nn.LayerNorm](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.LayerNorm.html#mindspore.nn.LayerNorm)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Normalization Layers |[mindspore.nn.HSigmoid](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.HSigmoid.html#mindspore.nn.HSigmoid)|Changed|r1.1: GPU => r1.2: GPU/CPU|Non-linear Activations |[mindspore.nn.HSwish](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.HSwish.html#mindspore.nn.HSwish)|Changed|r1.1: GPU => r1.2: GPU/CPU|Non-linear Activations |[mindspore.nn.LeakyReLU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.LeakyReLU.html#mindspore.nn.LeakyReLU)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Non-linear Activations +|[mindspore.nn.GELU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.GELU.html#mindspore.nn.GELU)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Non-linear Activations |[mindspore.nn.ELU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.ELU.html#mindspore.nn.ELU)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Non-linear Activations |[mindspore.nn.get_activation](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.get_activation.html#mindspore.nn.get_activation)|Changed|r1.1: To Be Developed => r1.2: Ascend/GPU/CPU|Non-linear Activations |[mindspore.nn.Moments](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.Moments.html#mindspore.nn.Moments)|Changed|r1.1: Ascend => r1.2: Ascend/GPU|Math Functions diff --git a/resource/api_updates/ops_api_updates.md b/resource/api_updates/ops_api_updates.md index 7186ce20bab922ca979a5f63155e5582787578dc..3b2364f47d32312fa4af8701b060ab828870fcb2 100644 --- a/resource/api_updates/ops_api_updates.md +++ b/resource/api_updates/ops_api_updates.md @@ -33,6 +33,7 @@ Compared with the previous version, the added, deleted and supported platforms c |[mindspore.ops.Dihedral14LJCFForceWithAtomEnergy](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Dihedral14LJCFForceWithAtomEnergy.html#mindspore.ops.Dihedral14LJCFForceWithAtomEnergy)|New|r1.2: GPU|operations--Sponge Operators |[mindspore.ops.AngleAtomEnergy](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.AngleAtomEnergy.html#mindspore.ops.AngleAtomEnergy)|New|r1.2: GPU|operations--Sponge Operators |[mindspore.ops.NoRepeatNGram](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.NoRepeatNGram.html#mindspore.ops.NoRepeatNGram)|New|r1.2: Ascend|operations--Other Operators +|[mindspore.ops.Dropout](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Dropout.html#mindspore.ops.Dropout)|New|r1.2: Ascend/GPU/CPU|operations--Neural Network Operators |[mindspore.ops.Conv3DTranspose](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Conv3DTranspose.html#mindspore.ops.Conv3DTranspose)|New|r1.2: Ascend|operations--Neural Network Operators |[mindspore.ops.SeLU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.SeLU.html#mindspore.ops.SeLU)|New|r1.2: Ascend|operations--Neural Network Operators |[mindspore.ops.Dropout2D](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Dropout2D.html#mindspore.ops.Dropout2D)|New|r1.2: Ascend|operations--Neural Network Operators diff --git a/tutorials/inference/source_en/serving_example.md b/tutorials/inference/source_en/serving_example.md index c406644ed55a6318e93b77f679fdbdf7192463fc..b45657b5a3a114c6c5df85e1bebbdacdbabb0094 100644 --- a/tutorials/inference/source_en/serving_example.md +++ b/tutorials/inference/source_en/serving_example.md @@ -26,15 +26,15 @@ The following uses a simple `Add` network as an example to describe how to use M ### Preparing the Environment -Before running the sample network, ensure that MindSpore Serving has been properly installed. To install MindSpore Serving on your PC, go to the [MindSpore Serving installation page](https://gitee.com/mindspore/serving/blob/master/README.md#installing-serving) and configure environment variables on the [MindSpore Serving environment configuration page](https://gitee.com/mindspore/serving/blob/master/README.md#configuring-environment-variables). +Before running the sample network, ensure that MindSpore Serving has been properly installed. To install MindSpore Serving on your PC, go to the [MindSpore Serving installation page](https://gitee.com/mindspore/serving/blob/master/README.md#installing-serving) and configure environment variables on the [MindSpore Serving environment configuration page](https://gitee.com/mindspore/docs/blob/master/install/mindspore_ascend_install_source_en.md#configuring-environment-variables). ### Downloading the Example -Please download the [add example](https://gitee.com/mindspore/serving/blob/master/example/add/) first. +Please download the [add example](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/) first. ### Exporting the Model -In the directory `export_model`, use [add_model.py](https://gitee.com/mindspore/serving/blob/master/example/add/export_model/add_model.py) to build a network with only the Add operator and export the MindSpore inference deployment model. +In the directory `export_model`, use [add_model.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/export_model/add_model.py) to build a network with only the Add operator and export the MindSpore inference deployment model. ```python import os @@ -99,7 +99,7 @@ Execute the `add_model.py` script to generate the `tensor_add.mindir` file. The Start Serving with the following files: ```text -test_dir +tensor_add ├── add/ │ └── servable_config.py │ └── 1/ @@ -110,7 +110,7 @@ test_dir - `serving_server.py`: Script file for starting the service. - `add`: Model folder, which is named after the model name. - `tensor_add.mindir`: Model file generated by the network in the previous step, which is stored in folder 1 (the number indicates the version number). Different versions are stored in different folders. The version number must be a string of digits. By default, the latest model file is started. -- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/add/add/servable_config.py): [Model configuration file](https://www.mindspore.cn/tutorial/inference/en/master/serving_model.html), which defines the model processing functions, including the `add_common` and `add_cast` methods. `add_common` defines an addition operation whose input is two pieces of float32 data, and `add_cast` defines an addition operation whose input is data with its type converted to float32. +- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/add/servable_config.py): [Model configuration file](https://www.mindspore.cn/tutorial/inference/en/master/serving_model.html), which defines the model processing functions, including the `add_common` and `add_cast` methods. `add_common` defines an addition operation whose input is two pieces of float32 data, and `add_cast` defines an addition operation whose input is data with its type converted to float32. Content of the configuration file: @@ -150,7 +150,7 @@ def add_cast(x1, x2): #### Starting the Service The server calls a Python API to start the inference process shared by both master and worker nodes. The client directly connects to the inference service and delivers an inference task. -Run the [serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_server.py) script to deploy lightweight service: +Run the [serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py) script to deploy lightweight service: ```python import os @@ -175,12 +175,12 @@ if __name__ == "__main__": The above startup script will load and run two inference copies of `add` on devices 0 and 1, and the inference requests from the client will be split to the two inference copies. -If the server prints the `Serving gRPC start success, listening on 0.0.0.0:5500` log, the Serving has loaded the inference model. +If the server prints the `Serving RESTful server start success, listening on 127.0.0.1:1500` log, the Serving RESTful service has started successfully and the inference model has already loaded successfully. ### Inference Execution The client can access the inference service through either [gRPC](https://www.mindspore.cn/tutorial/inference/en/master/serving_grpc.html) or [RESTful](https://www.mindspore.cn/tutorial/inference/en/master/serving_restful.html). The following uses gRPC as an example. -Execute [serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_client.py) to start the Python client. +Execute [serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_client.py) to start the Python client. ```python import numpy as np diff --git a/tutorials/inference/source_en/serving_grpc.md b/tutorials/inference/source_en/serving_grpc.md index bc16c1f761c1e3c7e49576246bd2e7df6bb8241a..13415e23d15d4939d20ba20d7b6a2c21b2bac8c6 100644 --- a/tutorials/inference/source_en/serving_grpc.md +++ b/tutorials/inference/source_en/serving_grpc.md @@ -176,11 +176,11 @@ from mindspore_serving import server def start(): servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="resnet50", device_ids=(0, 1)) server.start_servables(servable_configs=servable_config) - server.start_grpc_server(address="unix:add_test_temp_file") + server.start_grpc_server(address="unix:/tmp/resnet50_test_temp_file") if __name__ == "__main__": @@ -195,7 +195,7 @@ from mindspore_serving.client import Client def run_classify_top1(): - client = Client("unix:add_test_temp_file", "resnet50", "classify_top1") + client = Client("unix:/tmp/resnet50_test_temp_file", "resnet50", "classify_top1") instances = [] for path, _, file_list in os.walk("./test_image/"): for file_name in file_list: diff --git a/tutorials/inference/source_en/serving_restful.md b/tutorials/inference/source_en/serving_restful.md index f81a56094a05a05935d773b3474f250aee00f127..cca2ad67c68075239a8a1314aaeddac3702ac5c9 100644 --- a/tutorials/inference/source_en/serving_restful.md +++ b/tutorials/inference/source_en/serving_restful.md @@ -24,9 +24,7 @@ MindSpore Serving supports both `gPRC` and `RESTful` request modes. The followin For details about how to deploy `Serving`, see [MindSpore Serving-based Inference Service Deployment](https://www.mindspore.cn/tutorial/inference/en/master/serving_example.html). -Use the `master.start_restful_server` API to start the `RESTful` service. Alternatively, you can use `master.start_grpc_server` to start the `gRPC` service. - -> `RESTful` clients do not depend on specific hardware platforms. Currently, the Serving server supports `Ascend 310`, `Ascend 910` and `Nvidia GPU` hardware environments. +We can use the `mindspore_serving.server.start_restful_server` API to start the `RESTful` service. ## Request Method @@ -281,7 +279,7 @@ We can use `curl` command line or `requests` library accessing `SSL/TLS` enabled curl -X POST -d '${REQ_JSON_MESSAGE}' --cacert '${PATH_TO_CA_CERT_FILE}' https://${HOST}:${PORT}/model/${MODLE_NAME}/version/${VERSION}]:${METHOD_NAME} ``` -The example of accessing the method `add_common` of the `add` model is following: +The example of accessing the `add_common` method of the `add` model is as follows: ```text curl -X POST -d '{"instances":[{"x1":[[1.0, 2.0], [3.0, 4.0]], "x2":[[1.0, 2.0], [3.0, 4.0]]}]}' --cacert ca.crt https://localhost:5500/model/add/version/1:add_common diff --git a/tutorials/inference/source_zh_cn/serving_example.md b/tutorials/inference/source_zh_cn/serving_example.md index 4c4df19e74793812518b2f7fa222d89961472d83..bf4b7ae47b5a2abb9afe4709ef816070128d1bf3 100644 --- a/tutorials/inference/source_zh_cn/serving_example.md +++ b/tutorials/inference/source_zh_cn/serving_example.md @@ -26,15 +26,15 @@ MindSpore Serving是一个轻量级、高性能的服务模块,旨在帮助Min ### 环境准备 -运行示例前,需确保已经正确安装了MindSpore Serving。如果没有,可以通过[MindSpore Serving安装页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E5%AE%89%E8%A3%85),将MindSpore Serving正确地安装到你的电脑当中,同时通过[MindSpore Serving环境配置页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E9%85%8D%E7%BD%AE%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F)完成环境变量配置。 +运行示例前,需确保已经正确安装了MindSpore Serving。如果没有,可以通过[MindSpore Serving安装页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E5%AE%89%E8%A3%85),将MindSpore Serving正确地安装到你的电脑当中,同时通过[MindSpore Serving环境配置页面](https://gitee.com/mindspore/docs/blob/master/install/mindspore_ascend_install_pip.md#%E9%85%8D%E7%BD%AE%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F)完成环境变量配置。 ### 下载样例 -请先[下载样例](https://gitee.com/mindspore/serving/blob/master/example/add/)。 +请先[下载样例](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/)。 ### 导出模型 -在`export_model`目录下,使用[add_model.py](https://gitee.com/mindspore/serving/blob/master/example/add/export_model/add_model.py),构造一个只有Add算子的网络,并导出MindSpore推理部署模型。 +在`export_model`目录下,使用[add_model.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/export_model/add_model.py),构造一个只有Add算子的网络,并导出MindSpore推理部署模型。 ```python import os @@ -99,18 +99,18 @@ if __name__ == "__main__": 启动Serving服务,以Add用例为例,需要如下文件列表: ```text -test_dir +tensor_add ├── add/ │   └── servable_config.py │  └── 1/ │   └── tensor_add.mindir -└── master_with_worker.py +└── serving_server.py ``` -- `master_with_worker.py`为启动服务脚本文件。 +- `serving_server.py`为启动服务脚本文件。 - `add`为模型文件夹,文件夹名即为模型名。 - `tensor_add.mindir`为上一步网络生成的模型文件,放置在文件夹1下,1为版本号,不同的版本放置在不同的文件夹下,版本号需以纯数字串命名,默认配置下启动最大数值的版本号的模型文件。 -- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/add/add/servable_config.py)为[模型配置文件](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_model.html),其定义了模型的处理函数,包括`add_common`和`add_cast`两个方法,`add_common`定义了输入为两个普通float32类型的加法操作,`add_cast`定义输入类型为其他类型,经过输入类型转换float32后的加法操作。 +- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/add/servable_config.py)为[模型配置文件](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_model.html),其定义了模型的处理函数,包括`add_common`和`add_cast`两个方法,`add_common`定义了输入为两个普通float32类型的加法操作,`add_cast`定义输入类型为其他类型,经过输入类型转换float32后的加法操作。 模型配置文件内容如下: @@ -149,7 +149,7 @@ def add_cast(x1, x2): #### 启动服务 -执行[serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_server.py),完成服务启动: +执行[serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py),完成服务启动: ```python import os @@ -174,12 +174,12 @@ if __name__ == "__main__": 上述启动脚本将在设备0和1上共加载和运行两个`add`推理副本,来自客户端的推理请求将被切割分流到两个推理副本。 -当服务端打印日志`Serving gRPC start success, listening on 127.0.0.1:5500`时,表示Serving服务已加载推理模型完毕。 +当服务端打印日志`Serving RESTful server start success, listening on 127.0.0.1:1500`时,表示Serving RESTful服务启动成功,推理模型已成功加载。 ### 执行推理 客户端提供两种方式访问推理服务,一种是通过[gRPC方式](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_grpc.html),一种是通过[RESTful方式](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_restful.html),本文以gRPC方式为例。 -使用[serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_client.py),启动Python客户端。 +使用[serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_client.py),启动Python客户端。 ```python import numpy as np diff --git a/tutorials/inference/source_zh_cn/serving_grpc.md b/tutorials/inference/source_zh_cn/serving_grpc.md index 64695a345b672f1205df79077eda0d24f941df1f..907b72fdab980ba085bce0acd6984987d97cb1aa 100644 --- a/tutorials/inference/source_zh_cn/serving_grpc.md +++ b/tutorials/inference/source_zh_cn/serving_grpc.md @@ -176,11 +176,11 @@ from mindspore_serving import server def start(): servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="resnet50", device_ids=(0, 1)) server.start_servables(servable_configs=servable_config) - server.start_grpc_server(address="unix:/tmp/serving_add_test_temp_file") + server.start_grpc_server(address="unix:/tmp/serving_resnet50_test_temp_file") if __name__ == "__main__": @@ -195,7 +195,7 @@ from mindspore_serving.client import Client def run_classify_top1(): - client = Client("unix:/tmp/serving_add_test_temp_file", "resnet50", "classify_top1") + client = Client("unix:/tmp/serving_resnet50_test_temp_file", "resnet50", "classify_top1") instances = [] for path, _, file_list in os.walk("./test_image/"): for file_name in file_list: diff --git a/tutorials/inference/source_zh_cn/serving_restful.md b/tutorials/inference/source_zh_cn/serving_restful.md index 8029baecc1ac6127401eb72a7829bb3c3d4396e7..172548f1d4b84b31b06c5a00ced052d5f3802e42 100644 --- a/tutorials/inference/source_zh_cn/serving_restful.md +++ b/tutorials/inference/source_zh_cn/serving_restful.md @@ -24,9 +24,7 @@ MindSpore Serving支持`gPRC`和`RESTful`两种请求方式。本章节介绍`RE 部署`Serving`参考[快速入门](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_example.html) 章节。 -通过`master.start_restful_server`接口启动`RESTful`服务;另外,可通过`master.start_grpc_server`启动`gRPC`服务。 - -> `RESTful`客户端不依赖特定硬件平台,Serving服务端当前支持`Ascend310`、`Ascend910`和`Nvidia GPU`硬件环境。 +我们可以通过`mindspore_serving.server.start_restful_server`接口启动`RESTful`服务。 ## 请求方式 diff --git a/tutorials/lite/source_en/quick_start/image_segmentation.md b/tutorials/lite/source_en/quick_start/image_segmentation.md index 1b16ba26f00f979394aaed88e02848d5aea972ab..101677be8f37a13d6e03d2ef5c7f749774568102 100644 --- a/tutorials/lite/source_en/quick_start/image_segmentation.md +++ b/tutorials/lite/source_en/quick_start/image_segmentation.md @@ -119,13 +119,13 @@ app ### Configuring MindSpore Lite Dependencies -Related library files are required for Android to call MindSpore Android AAR. You can use MindSpore Lite [source code](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html) to generate the `mindspore-lite-{version}-inference-android.tar.gz` library file package (including the `mindspore-lite-{version}.aar` library file) and decompress it. +Related library files are required for Android to call MindSpore Android AAR. You can use MindSpore Lite [source code](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html) to generate the `mindspore-lite-{version}-android.tar.gz` library file package (including the `mindspore-lite-{version}.aar` library file) and decompress it. > version: version number in the output file, which is the same as the version number of the built branch code. In this example, the MindSpore Lite version file is automatically downloaded using the `app/download.gradle` file during the build process and stored in the `app/libs` directory. -Note: If the automatic download fails, manually download the related library file [mindspore-lite-{version}-inference-android.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html), decompress it, and save it to the corresponding directory. +Note: If the automatic download fails, manually download the related library file [mindspore-lite-{version}-android.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html), decompress it, and save it to the corresponding directory. ### Downloading and Deploying the Model File diff --git a/tutorials/lite/source_en/quick_start/quick_start.md b/tutorials/lite/source_en/quick_start/quick_start.md index 3b69b70e631b9becee52ab6fce4ede4cb762d971..ec414a8e63d6f8692f2755aa9e74dec4b33276cb 100644 --- a/tutorials/lite/source_en/quick_start/quick_start.md +++ b/tutorials/lite/source_en/quick_start/quick_start.md @@ -167,17 +167,17 @@ Create a link to the `.so` library file in the `app/CMakeLists.txt` file: ```text # ============== Set MindSpore Dependencies. ============= include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/include) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/dataset/include) add_library(mindspore-lite SHARED IMPORTED) add_library(minddata-lite SHARED IMPORTED) set_target_properties(mindspore-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/lib/libmindspore-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/lib/libmindspore-lite.so) set_target_properties(minddata-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/lib/libminddata-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/dataset/lib/libminddata-lite.so) # --------------- MindSpore Lite set End. -------------------- # Link target library. diff --git a/tutorials/lite/source_en/quick_start/quick_start_cpp.md b/tutorials/lite/source_en/quick_start/quick_start_cpp.md index e8c97de686d22fbbaab063f88211831910e66d95..a18476b35723a0c45f2931d46c27031bf881ea9b 100644 --- a/tutorials/lite/source_en/quick_start/quick_start_cpp.md +++ b/tutorials/lite/source_en/quick_start/quick_start_cpp.md @@ -57,7 +57,7 @@ The MindSpore Lite inference steps are as follows: bash build.sh ``` - > If the MindSpore Lite inference framework fails to be downloaded by using this build script, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64, and copy the `libmindspore-lite.a` file in the decompressed lib directory to the `mindspore/lite/examples/quick_start_cpp/lib` directory. Also copy the files from `inference/include` to the `mindspore/lite/examples/quick_start_cpp/include` directory. + > If the MindSpore Lite inference framework fails to be downloaded by using this build script, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64, and copy the `libmindspore-lite.a` file in the decompressed lib directory to the `mindspore/lite/examples/quick_start_cpp/lib` directory. Also copy the files from `runtime/include` to the `mindspore/lite/examples/quick_start_cpp/include` directory. > > If the MobileNetV2 model fails to be downloaded, manually download the model file [mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms) and copy it to the `mindspore/lite/examples/quick_start_cpp/model` directory. > @@ -89,7 +89,7 @@ The MindSpore Lite inference steps are as follows: - Build - - Download the library: Manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Windows-x64. Copy the `libmindspore-lite.a` file in the decompressed `inference/lib` directory to the `mindspore/lite/examples/quick_start_cpp/lib` project directory, and change the include directory to the `mindspore/lite/examples/quick_start_cpp/include` project directory. (Note: The `lib` and `include` directories under the project need to be created manually) + - Download the library: Manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Windows-x64. Copy the `libmindspore-lite.a` file in the decompressed `runtime/lib` directory to the `mindspore/lite/examples/quick_start_cpp/lib` project directory, and change the include directory to the `mindspore/lite/examples/quick_start_cpp/include` project directory. (Note: The `lib` and `include` directories under the project need to be created manually) - Download the model: Manually download the model file [mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms) and copy it to the `mindspore/lite/examples/quick_start_cpp/model` directory. diff --git a/tutorials/lite/source_en/quick_start/quick_start_java.md b/tutorials/lite/source_en/quick_start/quick_start_java.md index f84e5283c34511e69dc9a7e53451e9f737e017f9..590201ee0c41cf825e95cc2dd5acc69a0a6ec6e0 100644 --- a/tutorials/lite/source_en/quick_start/quick_start_java.md +++ b/tutorials/lite/source_en/quick_start/quick_start_java.md @@ -52,7 +52,7 @@ The MindSpore Lite inference steps are as follows: bash build.sh ``` - > If the MindSpore Lite inference framework fails to be downloaded, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64. Decompress the package and obtain the `libmindspore-lite.so` file in the `lib\jar` directory. Copy `libmindspore-lite-jni.so` and `libmindspore-lite-java.jar` to the `mindspore/lite/examples/quick_start_java/lib` directory. + > If the MindSpore Lite inference framework fails to be downloaded, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64. Decompress the package and obtain the `libmindspore-lite.so` file in the `runtime/lib` directory. Copy `libmindspore-lite-jni.so` and `libmindspore-lite-java.jar` to the `mindspore/lite/examples/quick_start_java/lib` directory. > > If the MobileNetV2 model fails to be downloaded, manually download the model file [mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms) and copy it to the `mindspore/lite/examples/quick_start_java/model/` directory. > diff --git a/tutorials/lite/source_en/quick_start/train_lenet.md b/tutorials/lite/source_en/quick_start/train_lenet.md index 32ca197db690d0859fa6f19e528edff24691d6a8..3bf213eeba109e50b9ac2c96a78383484971e395 100644 --- a/tutorials/lite/source_en/quick_start/train_lenet.md +++ b/tutorials/lite/source_en/quick_start/train_lenet.md @@ -77,17 +77,17 @@ cd ./mindspore The `mindspore/lite/examples/train_lenet` directory relative to the MindSpore Lite source code contains this demo's source code. -Go to the [MindSpore Lite Download Page](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html) to download the mindspore-lite-{version}-linux-x64.tar.gz and mindspore-lite-{version}-android-aarch64.tar.gz. The mindspore-lite-{version}-linux-x64.tar.gz is the MindSpore Lite install package for x86 platform, it contains the converter tool `converter_lite`, this demo uses it to converte `MIDIR` model to `.ms` which is supported by MindSpore Lite; The mindspore-lite-{version}-android-aarch64.tar.gz is the MindSpore Lite install package for Android, it contains training runtime library `libmindspore-lite.so`, this demo uses it to train model. After download these two files, you need rename the mindspore-lite-{version}-linux-x64.tar.gz to mindspore-lite-{version}-train-linux-x64.tar.gz and rename the mindspore-lite-{version}-android-aarch64.tar.gz to mindspore-lite-{version}-train-android-aarch64.tar.gz. Then put the renamed files to the `output` directory relative to MindSpore Lite source code(if there is no `output` directory,you should create it). +Go to the [MindSpore Lite Download Page](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html) to download the mindspore-lite-{version}-linux-x64.tar.gz and mindspore-lite-{version}-android-aarch64.tar.gz. The mindspore-lite-{version}-linux-x64.tar.gz is the MindSpore Lite install package for x86 platform, it contains the converter tool `converter_lite`, this demo uses it to converte `MIDIR` model to `.ms` which is supported by MindSpore Lite; The mindspore-lite-{version}-android-aarch64.tar.gz is the MindSpore Lite install package for Android, it contains training runtime library `libmindspore-lite.so`, this demo uses it to train model. After download these two files, you need rename the mindspore-lite-{version}-linux-x64.tar.gz to mindspore-lite-{version}-linux-x64.tar.gz and rename the mindspore-lite-{version}-android-aarch64.tar.gz to mindspore-lite-{version}-android-aarch64.tar.gz. Then put the renamed files to the `output` directory relative to MindSpore Lite source code(if there is no `output` directory,you should create it). Suppose these packags are downloaded in `/Downloads` directory, `Linux` commands for operations above is as follows: ```bash mkdir output -cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-train-linux-x64.tar.gz -cp /Downloads/mindspore-lite-{version}0-android-aarch64.tar.gz output/mindspore-lite-{version}-train-android-aarch64.tar.gz +cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-linux-x64.tar.gz +cp /Downloads/mindspore-lite-{version}0-android-aarch64.tar.gz output/mindspore-lite-{version}-android-aarch64.tar.gz ``` -You can also [compile from source](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html) to generate the training package for x86 platform mindspore-lite-{version}-train-linux-x64.tar.gz and for Andorid platform mindspore-lite-{version}-train-android-aarch64.tar.gz. These packages will directly generated in `output` directory and you should make sure that in the `output` directory both the two packages exist. +You can also [compile from source](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html) to generate the training package for x86 platform mindspore-lite-{version}-linux-x64.tar.gz and for Andorid platform mindspore-lite-{version}-android-aarch64.tar.gz. These packages will directly generated in `output` directory and you should make sure that in the `output` directory both the two packages exist. ### Connect Android Device diff --git a/tutorials/lite/source_en/use/benchmark_tool.md b/tutorials/lite/source_en/use/benchmark_tool.md index bc3f630c0ab014cc30bb5a736f7caa36f811e838..472897b86bc68092cfe5e55eda6ff50f3c39ffe8 100644 --- a/tutorials/lite/source_en/use/benchmark_tool.md +++ b/tutorials/lite/source_en/use/benchmark_tool.md @@ -41,7 +41,7 @@ To use the Benchmark tool, you need to prepare the environment as follows: - Add the path of dynamic library required by the inference code to the environment variables LD_LIBRARY_PATH. ```bash - export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/inference/lib:${LD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/runtime/lib:${LD_LIBRARY_PATH} ```` ${PACKAGE_ROOT_PATH} is the compiled inference package path after decompressing. diff --git a/tutorials/lite/source_en/use/benchmark_train_tool.md b/tutorials/lite/source_en/use/benchmark_train_tool.md index ce96e98a378ad8caaac1621519bb93d5f4138d8a..5a358814c851ce733239b5fed5931421cb36884d 100644 --- a/tutorials/lite/source_en/use/benchmark_train_tool.md +++ b/tutorials/lite/source_en/use/benchmark_train_tool.md @@ -29,15 +29,15 @@ To use the `benchmark_train` tool, you need to prepare the environment as follow - Compilation: Install build dependencies and build the MindSpore Lite training framework. The code of the `benchmark_train` tool is stored in the `mindspore/lite/tools/benchmark_train` directory of the MindSpore source code. For details about the build operations, see the [Environment Requirements](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#environment-requirements) and [Compilation Example](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#compilation-example) in the build document. -- Configure environment variables: For details, see [Output Description](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#training-output-description) in the build document. Suppose the absolute path of MindSpore Lite training package you build is `/path/mindspore-lite-{version}-train-{os}-{arch}.tar.gz`, the commands to extract the package and configure the LD_LIBRARY_PATH variable are as follows: +- Configure environment variables: For details, see [Output Description](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#training-output-description) in the build document. Suppose the absolute path of MindSpore Lite training package you build is `/path/mindspore-lite-{version}-{os}-{arch}.tar.gz`, the commands to extract the package and configure the LD_LIBRARY_PATH variable are as follows: ```bash cd /path - tar xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz - export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-train-{os}-{arch}/train/lib:/path/mindspore-lite-{version}-train-{os}-{arch}/train/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} + tar xvf mindspore-lite-{version}-{os}-{arch}.tar.gz + export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-{os}-{arch}/runtime/lib:/path/mindspore-lite-{version}-{os}-{arch}/runtime/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} ``` -The absolute path of the benchmark_train tool is `/path/mindspore-lite-{version}-train-{os}-{arch}/tools/benchmark_train/benchmark_train`. +The absolute path of the benchmark_train tool is `/path/mindspore-lite-{version}-{os}-{arch}/tools/benchmark_train/benchmark_train`. ### Parameter Description diff --git a/tutorials/lite/source_en/use/build.md b/tutorials/lite/source_en/use/build.md index d350b084616bcc072cbb3bb04e2d9085363e461e..0b34df6ef85bafc0e5a6c17aea329e7c774b625f 100644 --- a/tutorials/lite/source_en/use/build.md +++ b/tutorials/lite/source_en/use/build.md @@ -1,6 +1,6 @@ # Building MindSpore Lite -`Windows` `Linux` `Android` `Environment Preparation` `Intermediate` `Expert` +`Windows` `macOS` `Linux` `iOS` `Android` `Environment Preparation` `Intermediate` `Expert` @@ -9,26 +9,22 @@ - [Environment Requirements](#environment-requirements) - [Compilation Options](#compilation-options) - [Compilation Example](#compilation-example) - - [Inference Output Description](#inference-output-description) - - [Description of Converter's Directory Structure](#description-of-converters-directory-structure) - - [Description of Obfuscator's Directory Structure](#description-of-obfuscators-directory-structure) - - [Description of Runtime and Other tools' Directory Structure](#description-of-runtime-and-other-tools-directory-structure) - - [Training Output Description](#training-output-description) - - [Description of Training Runtime and Related Tools' Directory Structure](#description-of-training-runtime-and-related-tools-directory-structure) + - [Directory Structure](#directory-structure) - [Windows Environment Compilation](#windows-environment-compilation) - [Environment Requirements](#environment-requirements-1) - [Compilation Options](#compilation-options-1) - [Compilation Example](#compilation-example-1) - - [Output Description](#output-description) - - [Description of Runtime and Related Tools' Directory Structure](#description-of-runtime-and-related-tools-directory-structure) - - [Docker Environment Compilation](#docker-environment-compilation) - - [Environmental Preparation](#environmental-preparation) - - [Download the docker image](#download-the-docker-image) - - [Create a container](#create-a-container) - - [Enter the container](#enter-the-container) + - [Directory Structure](#directory-structure-1) + - [macOS Environment Compilation](#macOS-environment-compilation) + - [Environment Requirements](#environment-requirements) - [Compilation Options](#compilation-options-2) - [Compilation Example](#compilation-example-2) - - [Output Description](#output-description-1) + - [Directory Structure](#directory-structure-2) + - [Docker Environment Compilation](#docker-environment-compilation) + - [Environmental Preparation](#environmental-preparation) + - [Compilation Options](#compilation-options-3) + - [Compilation Example](#compilation-example-3) + - [Directory Structure](#directory-structure-3) @@ -36,70 +32,47 @@ This chapter introduces how to quickly compile MindSpore Lite, which includes the following modules: -Modules in inference version: +Modules in MindSpore Lite: | Module | Support Platform | Description | | --- | ---- | ---- | -| converter | Linux, Windows | Model Conversion Tool | -| runtime(cpp, java) | Linux, Windows, Android | Model Inference Framework(Windows platform does not support java version runtime) | -| benchmark | Linux, Windows, Android | Benchmarking Tool | -| cropper | Linux | Static library crop tool for libmindspore-lite.a | -| minddata | Linux, Android | Image Processing Library | - -Modules in training version: - -| Module | Support Platform | Description | -| --------------- | ---------------- | ------------------------------------------------ | -| converter | Linux | Model Conversion Tool | -| runtime(cpp) | Linux, Android | Model Train Framework(java is not support) | -| cropper | Linux | Static library crop tool for libmindspore-lite.a | -| minddata | Linux, Android | Image Processing Library | -| benchmark_train | Linux, Android | Performance and Accuracy Validation | -| obfuscator | Linux | Model Obfuscation Tool | +| converter | Linux, Windows | Model Conversion Tool | +| runtime(cpp, java) | Linux, Windows, Android, iOS | Model Inference Framework(Windows platform does not support java version runtime) | +| benchmark | Linux, Windows, Android | Benchmarking Tool | +| benchmark_train | Linux, Android | Performance and Accuracy Validation | +| cropper | Linux | Static library crop tool for libmindspore-lite.a | +| minddata | Linux, Android | Image Processing Library | +| codegen | Linux | Model inference code generation tool | +| obfuscator | Linux | Model Obfuscation Tool | ## Linux Environment Compilation ### Environment Requirements - The compilation environment supports Linux x86_64 only. Ubuntu 18.04.02 LTS is recommended. - -- Compilation dependencies of runtime(cpp): - - [CMake](https://cmake.org/download/) >= 3.18.3 +- Compilation dependencies of cpp: - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 -- Compilation dependencies of converter: - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Autoconf](http://ftp.gnu.org/gnu/autoconf/) >= 2.69 - - [Libtool](https://www.gnu.org/software/libtool/) >= 2.4.6 - - [LibreSSL](http://www.libressl.org/) >= 3.1.3 - - [Automake](https://www.gnu.org/software/automake/) >= 1.11.6 - - [Libevent](https://libevent.org) >= 2.0 - - [OpenSSL](https://www.openssl.org/) >= 1.1.1 - -- Compilation dependencies of runtime(java) - - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - Configure environment variables: `export ANDROID_NDK=NDK path`. + - [DDK](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180) = V500.010 + - Configure environment variables: `export HWHIAI_DDK=DDK path`. +- Additional compilation dependencies of Java: - [Gradle](https://gradle.org/releases/) >= 6.6.1 + - Configure environment variables: `export GRADLE_HOME=GRADLE path`. + - Add the bin directory to the PATH: `export PATH=${GRADLE_HOME}/bin:$PATH`. - [OpenJDK](https://openjdk.java.net/install/) >= 1.8 - -> - To install and use `Android_NDK`, you need to configure environment variables. The command example is `export ANDROID_NDK=${NDK_PATH}/android-ndk-r20b`. -> - After Gradle is installed, you need to add its installation path to the PATH: `export PATH=${GRADLE_PATH}/bin:$PATH`. -> - To install the Android SDK via `Android command line tools`, you need to create a new directory first and configure its path to the environment variable in `${ANDROID_SDK_ROOT}`, then create SDK via `sdkmanager`: `./sdkmanager --sdk_root=$ {ANDROID_SDK_ROOT} "cmdline-tools;latest"`, and finally accept the license through `sdkmanager` under the `${ANDROID_SDK_ROOT}` directory: `yes | ./sdkmanager --licenses`. -> - Compiling AAR relies on Android SDK Build-Tools, Android SDK Platform-Tools and other Android SDK related components. If the Android SDK in the environment does not have related components, the required dependencies will be automatically downloaded during compilation. -> - When compiling the NPU operator, you need to download [DDK V500.010](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180), the directory where the compressed package is decompressed Set to the environment variable `${HWHIAI_DDK}`. + - Configure environment variables: `export JAVA_HOME=JDK path`. + - Add the bin directory to the PATH: `export PATH=${JAVA_HOME}/bin:$PATH`. + - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - Create a new directory, configure environment variables`export ANDROID_SDK_ROOT=new directory`. + - Download `SDK Tools`, create SDK through `sdkmanager`: `./sdkmanager --sdk_root=${ANDROID_SDK_ROOT} "cmdline-tools;latest"`. + - Accept the license through `sdkmanager` under the `${ANDROID_SDK_ROOT}` directory: `yes | ./sdkmanager --licenses`. ### Compilation Options -MindSpore Lite provides a compilation script `build.sh` for one-click compilation, located in the root directory of MindSpore. This script can be used to compile the code of training and inference. - -The following describes the compilation parameter of `build.sh` and the options of `mindspore/lite/CMakeLists.txt`. +The script `build.sh` in the root directory of MindSpore can be used to compile MindSpore Lite. #### The compilation parameter of `build.sh` @@ -110,7 +83,7 @@ The following describes the compilation parameter of `build.sh` and the options | -d | If this parameter is set, the debug version is compiled. Otherwise, the release version is compiled. | None | None | | -i | If this parameter is set, incremental compilation is performed. Otherwise, full compilation is performed. | None | None | | -j[n] | Sets the number of threads used during compilation. Otherwise, the number of threads is set to 8 by default. | Integer | 8 | -| -a | Whether to enable AddressSanitizer | on、off | off | +| -a | Whether to enable AddressSanitizer | on, off | off | > - When compiling the x86_64 version, if the JAVA_HOME environment variable is configured and Gradle is installed, the JAR package will be compiled at the same time. > - When the `-I` parameter changes, such as `-I x86_64` is converted to `-I arm64`, adding `-i` for parameter compilation does not take effect. @@ -120,17 +93,16 @@ The following describes the compilation parameter of `build.sh` and the options | Option | Parameter Description | Value Range | Defaults | | -------- | ----- | ---- | ---- | -| MSLITE_GPU_BACKEND | Set the GPU backend, only valid when `-I arm64` | opencl, vulkan, cuda, off | opencl | -| MSLITE_ENABLE_NPU | Whether to compile NPU operator, only valid when `-I arm64` or `-I arm32` | on、off | on | -| MSLITE_ENABLE_TRAIN | Whether to compile the training version | on、off | off | -| MSLITE_ENABLE_SSE | Whether to enable SSE instruction set, only valid when `-I x86_64` | on、off | off | -| MSLITE_ENABLE_AVX | Whether to enable AVX instruction set, only valid when `-I x86_64` | on、off | off | -| MSLITE_ENABLE_CONVERTER | Whether to compile the model conversion tool, only valid when `-I x86_64` | on、off | on | -| MSLITE_ENABLE_TOOLS | Whether to compile supporting tools | on、off | on | -| MSLITE_ENABLE_TESTCASES | Whether to compile test cases | on、off | off | +| MSLITE_GPU_BACKEND | Set the GPU backend, only valid when `-I arm64` | opencl, off | opencl | +| MSLITE_ENABLE_NPU | Whether to compile NPU operator, only valid when `-I arm64` or `-I arm32` | on, off | on | +| MSLITE_ENABLE_TRAIN | Whether to compile the training version | on, off | on | +| MSLITE_ENABLE_SSE | Whether to enable SSE instruction set, only valid when `-I x86_64` | on, off | off | +| MSLITE_ENABLE_AVX | Whether to enable AVX instruction set, only valid when `-I x86_64` | on, off | off | +| MSLITE_ENABLE_CONVERTER | Whether to compile the model conversion tool, only valid when `-I x86_64` | on, off | on | +| MSLITE_ENABLE_TOOLS | Whether to compile supporting tools | on, off | on | +| MSLITE_ENABLE_TESTCASES | Whether to compile test cases | on, off | off | > - The above options can be modified by setting the environment variable with the same name or the file `mindspore/lite/CMakeLists.txt`. -> - Enabling MSLITE_ENABLE_TRAIN only generates the training version. > - After modifying the Option, adding the `-i` parameter for incremental compilation will not take effect. ### Compilation Example @@ -168,142 +140,66 @@ Then, run the following commands in the root directory of the source code to com bash build.sh -A on -j32 ``` -### Inference Output Description +Finally, the following files will be generated in the `output/` directory: -After the compilation is complete, go to the `mindspore/output` directory of the source code to view the file generated after compilation. The file is divided into the following parts. +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`: Contains runtime, and related tools. -- `mindspore-lite-{version}-inference-{os}-{arch}.tar.gz`: Contains model inference framework runtime (cpp), and related tools. -- `mindspore-lite-maven-{version}.zip`: Contains model reasoning framework AAR package. +- `mindspore-lite-maven-{version}.zip`: The AAR package which contains runtime (java). > - version: Version of the output, consistent with that of the MindSpore. > - os: Operating system on which the output will be deployed. > - arch: System architecture on which the output will be deployed. -Execute the decompression command to obtain the compiled output: - -```bash -tar -xvf mindspore-lite-{version}-inference-{os}-{arch}.tar.gz -unzip mindspore-lite-maven-{version}.zip -``` - -#### Description of Converter's Directory Structure - -The conversion tool is only available under the `-I x86_64` compilation option, and the content includes the following parts: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── converter - ├── include - │ └── registry # Header files of customized op, parser and pass registration - ├── converter # Model conversion tool - │ └── converter_lite # Executable program - └── lib # The dynamic link library that converter depends - ├── libglog.so.0 # Dynamic library of Glog - └── libmslite_converter_plugin.so # Dynamic library of plugin registry -``` - -#### Description of CodeGen's Directory Structure - -The codegen executable program is only available under the `-I x86_64` compilation option, and only the operator library required by the inference code generated by codegen is generated under the `-I arm64` and `-I arm32` compilation options. - -- When the compilation option is `-I x86_64`: - - ```text - mindspore-lite-{version}-inference-linux-x64 - └── tools - └── codegen # Code generation tool - ├── codegen # Executable program - ├── include # Header files of inference framework - │ ├── nnacl # nnacl operator header file - │ └── wrapper - ├── lib - │ └── libwrapper.a # MindSpore Lite CodeGen generates code dependent operator static library - └── third_party - ├── include - │ └── CMSIS # ARM CMSIS NN operator header files - └── lib - └── libcmsis_nn.a # ARM CMSIS NN operator static library - ``` - -- When the compilation option is `-I arm64` or `-I arm32`: - - ```text - mindspore-lite-{version}-inference-android-{arch} - └── tools - └── codegen # Code generation tool - └── operator_library # Operator library - ├── include # Header files of inference framework - │ ├── nnacl # nnacl operator header file - │ └── wrapper - └── lib # Inference framework library - └── libwrapper.a # MindSpore Lite CodeGen generates code dependent static library - ``` - -#### Description of Obfuscator's Directory Structure - -The obfuscation tool is only available under the `-I x86_64` compilation option and the `ENABLE_MODEL_OBF` compilation option in `mindspore/mindspore/lite/CMakeLists.txt` is turned on, the content includes the following parts: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── obfuscator # Model obfuscation tool - └── msobfuscator # Executable program -``` - -#### Description of Runtime and Other tools' Directory Structure - -The inference framework can be obtained under `-I x86_64`, `-I arm64` and `-I arm32` compilation options, and the content includes the following parts: +### Directory Structure - When the compilation option is `-I x86_64`: ```text - mindspore-lite-{version}-inference-linux-x64 - ├── inference - │ ├── include # Header files of inference framework - │ │ └── registry # Header files of customized op registration - │ └── lib # Inference framework library - │ ├── libminddata-lite.so # The files of image processing dynamic library - │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite - │ ├── libmindspore-lite-jni.so # Dynamic library of inference framework jni in MindSpore Lite - │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite - │ ├── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. - │ └── mindspore-lite-java.jar # Jar of inference framework in MindSpore Lite + mindspore-lite-{version}-linux-x64 + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # Static library of image processing + │ │ ├── libminddata-lite.so # Dynamic library of image processing + │ │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite-jni.so # Dynamic library of inference framework jni in MindSpore Lite + │ │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite + │ │ ├── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite + │ │ ├── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. + │ │ └── mindspore-lite-java.jar # Jar of inference framework in MindSpore Lite + │ └── third_party + │ └── libjpeg-turbo └── tools - ├── benchmark # Benchmarking tool - │ └── benchmark # Executable program - ├── codegen # Code generation tool - │ ├── codegen # Executable program - │ ├── include # operator header file - │ ├── lib # operator static library - │ └── third_party # ARM CMSIS NN static library - ├── converter # Model conversion tool - ├── obfuscator # Model obfuscation tool - └── cropper # Static library crop tool - ├── cropper # Executable file of static library crop tool - └── cropper_mapping_cpu.cfg # Crop cpu library related configuration files + ├── benchmark # Benchmarking tool + ├── benchmark_train # Training model benchmark tool + ├── codegen # Code generation tool + ├── converter # Model conversion tool + ├── obfuscator # Model obfuscation tool + └── cropper # Static library crop tool ``` - When the compilation option is `-I arm64` or `-I arm32`: ```text - mindspore-lite-{version}-inference-android-{arch} - ├── inference - │ ├── include # Header files of inference framework - │ │ └── registry # Header files of customized op registration - │ ├── lib # Inference framework library - │ │ ├── libminddata-lite.so # The files of image processing dynamic library - │ │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite - │ │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite - │ │ └── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. + mindspore-lite-{version}-android-{arch} + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # Static library of image processing + │ │ ├── libminddata-lite.so # Dynamic library of image processing + │ │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite + │ │ └── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite + │ │ └── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. │ └── third_party - │ └── hiai_ddk # NPU library, only exists in arm64 package + │ ├── hiai_ddk + │ └── libjpeg-turbo └── tools - ├── benchmark # Benchmarking tool - │ └── benchmark - └── codegen # Code generation tool - ├── include # operator header file - └── lib # operator static library + ├── benchmark # Benchmarking tool + ├── benchmark_train # Training model benchmark tool + └── codegen # Code generation tool ``` - When the compilation option is `-A on`: @@ -316,68 +212,6 @@ The inference framework can be obtained under `-I x86_64`, `-I arm64` and `-I ar └── mindspore-lite-{version}.aar # MindSpore Lite runtime aar ``` -### Training Output Description - -If the MSLITE_ENABLE_TRAIN option is turned on, the training Runtime and related tools will be generated, as follows: - -- `mindspore-lite-{version}-train-{os}-{arch}.tar.gz`: Contains model training framework and related tool. - -> - version: Version of the output, consistent with that of the MindSpore. -> - os: Operating system on which the output will be deployed. -> - arch: System architecture on which the output will be deployed. - -Execute the decompression command to obtain the compiled output: - -```bash -tar -xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz -``` - -#### Description of Training Runtime and Related Tools' Directory Structure - -The MindSpore Lite training framework can be obtained under `-I x86_64`, `-I arm64` and `-I arm32` compilation options, and the content includes the following parts: - -- When the compilation option is `-I x86_64`: - - ```text - mindspore-lite-{version}-train-linux-x64 - ├── tools - │ ├── benchmark_train # Training model benchmark tool - │ ├── converter # Model conversion tool - │ └── cropper # Static library crop tool - │ ├── cropper # Executable file of static library crop tool - │ └── cropper_mapping_cpu.cfg # Crop cpu library related configuration files - └── train - ├── include # Header files of training framework - │ └── registry # Header files of customized op registration - ├── lib # Inference framework library - │ ├── libminddata-lite.so # The files of image processing dynamic library - │ ├── libmindspore-lite-jni.so # Dynamic library of training framework jni in MindSpore Lite - │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite - │ ├── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite - │ └── mindspore-lite-java.jar # Jar of inference framework in MindSpore Lite - └── third_party - └── libjpeg-turbo - ``` - -- When the compilation option is `-I arm64` or `-I arm32`: - - ```text - mindspore-lite-{version}-train-android-{arch} - ├── tools - │ ├── benchmark # Benchmarking tool - │ ├── benchmark_train # Training model benchmark tool - └── train - ├── include # Header files of training framework - │ └── registry # Header files of customized op registration - ├── lib # Training framework library - │ ├── libminddata-lite.so # The files of image processing dynamic library - │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite - │ └── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite - └── third_party - ├── hiai_ddk # NPU library, only exists in arm64 package - └── libjpeg-turbo - ``` - ## Windows Environment Compilation ### Environment Requirements @@ -393,13 +227,27 @@ The MindSpore Lite training framework can be obtained under `-I x86_64`, `-I arm ### Compilation Options -The compilation options of MindSpore Lite are as follows: +The script `build.bat` in the root directory of MindSpore can be used to compile MindSpore Lite. + +#### The compilation parameter of `build.bat` | Parameter | Parameter Description | Mandatory or Not | | -------- | ----- | ---- | | lite | Set this parameter to compile the MindSpore Lite project. | Yes | | [n] | Set the number of threads used during compilation, otherwise the default is set to 6 threads. | No | +#### The options of `mindspore/lite/CMakeLists.txt` + +| Option | Parameter Description | Value Range | Defaults | +| -------- | ----- | ---- | ---- | +| MSLITE_ENABLE_SSE | Whether to enable SSE instruction set | on, off | off | +| MSLITE_ENABLE_AVX | Whether to enable AVX instruction set | on, off | off | +| MSLITE_ENABLE_CONVERTER | Whether to compile the model conversion tool | on, off | on | +| MSLITE_ENABLE_TOOLS | Whether to compile supporting tools | on, off | on | +| MSLITE_ENABLE_TESTCASES | Whether to compile test cases | on, off | off | + +> - The above options can be modified by setting the environment variable with the same name or the file `mindspore/lite/CMakeLists.txt`. + ### Compilation Example First, use the git tool to download the source code from the MindSpore code repository. @@ -422,29 +270,18 @@ call build.bat lite call build.bat lite 8 ``` -### Output Description - -After the compilation is complete, go to the `mindspore/output` directory of the source code to view the file generated after compilation. The file is divided into the following parts. +Finally, the following files will be generated in the `output/` directory: -- `mindspore-lite-{version}-inference-win-x64.zip`: Contains model inference framework and related tool. +- `mindspore-lite-{version}-win-x64.zip`: Contains model inference framework and related tool. > version: Version of the output, consistent with that of the MindSpore. -Execute the decompression command to obtain the compiled output: - -```bat -unzip mindspore-lite-{version}-inference-win-x64.zip -``` - -#### Description of Runtime and Related Tools' Directory Structure - -The content includes the following parts: +### Directory Structure ```text -mindspore-lite-{version}-inference-win-x64 -├── inference -│ ├── include # Header files of inference framework -│   │ └── registry # Header files of customized op registration +mindspore-lite-{version}-win-x64 +├── runtime +│ ├── include │ └── lib │ ├── libgcc_s_seh-1.dll # Dynamic library of MinGW │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite @@ -455,50 +292,103 @@ mindspore-lite-{version}-inference-win-x64 │ └── libwinpthread-1.dll # Dynamic library of MinGW └── tools ├── benchmark # Benchmarking tool - │ └── benchmark.exe # Executable program └── converter # Model conversion tool - ├── include - │ └── registry # Header files of customized op, parser and pass registration - ├── converter - │ └── converter_lite.exe # Executable program - └── lib - ├── libgcc_s_seh-1.dll # Dynamic library of MinGW - ├── libglog.dll # Dynamic library of Glog - ├── libmslite_converter_plugin.dll # Dynamic library of plugin registry - ├── libmslite_converter_plugin.dll.a # Link file of Dynamic library of plugin registry - ├── libssp-0.dll # Dynamic library of MinGW - ├── libstdc++-6.dll # Dynamic library of MinGW - └── libwinpthread-1.dll # Dynamic library of MinGW ``` > Currently, MindSpore Lite is not supported on Windows. -## Docker Environment Compilation +## macOS Environment Compilation -### Environmental Preparation +### Environment Requirements -#### Download the docker image +- System environment: macOS 10.15.4 and above ; 64-bit. -```bash -docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 -``` +- Compilation dependencies are: + - [CMake](https://cmake.org/download/) >= 3.18.3 + - [Xcode](https://developer.apple.com/xcode/download/cn) == 11.4.1 + - [Git](https://git-scm.com/downloads) >= 2.28.0 + +> - The compilation script will execute `git clone` to obtain the code of the third-party dependent libraries. + +### Compilation Options + +The script `build.sh` in the root directory of MindSpore can be used to compile MindSpore Lite. -> - Before downloading the image, please make sure docker has been installed. -> - Docker image does not currently support Windows version compilation. -> - Third-party libraries that compile dependencies have been installed in the image and environment variables have been configured. +#### The compilation parameter of `build.sh` -#### Create a container +| Parameter | Parameter Description | Value Range | Defaults | +| -------- | ----- | ---- | ---- | +| -I | Selects an applicable architecture. | arm64, arm32 | None | +| -j[n] | Sets the number of threads used during compilation. Otherwise, the number of threads is set to 8 by default. | Integer | 8 | + +### Compilation Example + +First, use the git tool to download the source code from the MindSpore code repository. ```bash -docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 +git clone https://gitee.com/mindspore/mindspore.git ``` -#### Enter the container +Then, use the cmd tool to compile MindSpore Lite in the root directory of the source code and execute the following commands. -```bash -docker exec -ti -u 0 docker01 bash +- Compile the ARM64 architecture version + + ```bash + bash build.sh -I arm64 -j8 + ``` + +- Compile the ARM32 architecture version + + ```bash + bash build.sh -I arm32 -j8 + ``` + +Finally, the following files will be generated in the `output/` directory: + +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`: Contains model inference framework. + +> - version: Version of the output, consistent with that of the MindSpore. +> - os: Operating system on which the output will be deployed. +> - arch: System architecture on which the output will be deployed. + +### Directory Structure + +```text +mindspore-lite.framework +└── runtime + ├── Headers # 推理框架头文件 + ├── Info.plist # 配置文件 + └── mindspore-lite # 静态库 ``` +> Currently, MindSpore Lite Train and converter are not supported on macOS. + +## Docker Environment Compilation + +### Environmental Preparation + +- Download the docker image + + ```bash + docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + + > - Before downloading the image, please make sure docker has been installed. + > - Docker image does not currently support Windows version compilation. + > - Third-party libraries that compile dependencies have been installed in the image and environment variables have been configured. + +- Create a container + + ```bash + docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + +- Enter the container + + ```bash + docker exec -ti -u 0 docker01 bash + ``` + ### Compilation Options Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation) @@ -507,6 +397,6 @@ Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/ Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation) -### Output Description +### Directory Structure Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation) diff --git a/tutorials/lite/source_en/use/converter_tool.md b/tutorials/lite/source_en/use/converter_tool.md index 1acc5e2a2698e6b39997cc94f39845efd03f936b..aec48ae6e3ed859699d1124271f5a8235e554030 100644 --- a/tutorials/lite/source_en/use/converter_tool.md +++ b/tutorials/lite/source_en/use/converter_tool.md @@ -8,10 +8,12 @@ - [Overview](#overview) - [Linux Environment Instructions](#linux-environment-instructions) - [Environment Preparation](#environment-preparation) + - [Directory Structure](#directory-structure) - [Parameter Description](#parameter-description) - [Example](#example) - [Windows Environment Instructions](#windows-environment-instructions) - [Environment Preparation](#environment-preparation-1) + - [Directory Structure](#directory-structure-1) - [Parameter Description](#parameter-description-1) - [Example](#example-1) @@ -43,6 +45,21 @@ To use the MindSpore Lite model conversion tool, you need to prepare the environ ${PACKAGE_ROOT_PATH} is the decompressed package path obtained by compiling or downloading. +### Directory Structure + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── converter + ├── include + │ └── registry # Header files of customized op, parser and pass registration + ├── converter # Model conversion tool + │ └── converter_lite # Executable program + └── lib # The dynamic link library that converter depends + ├── libglog.so.0 # Dynamic library of Glog + └── libmslite_converter_plugin.so # Dynamic library of plugin registry +``` + ### Parameter Description MindSpore Lite model conversion tool provides multiple parameters. @@ -68,6 +85,8 @@ The following describes the parameters in detail. > - In order to ensure the accuracy of weight quantization, the "--bitNum" parameter should better be set to a range from 8bit to 16bit. > - PostTraining method currently only supports activation quantization and weight quantization in 8 bit. +### Example + The following describes how to use the conversion command by using several common examples. - Take the Caffe model LeNet as an example. Run the following conversion command: @@ -138,6 +157,26 @@ To use the MindSpore Lite model conversion tool, the following environment prepa %PACKAGE_ROOT_PATH% is the decompressed package path obtained by compiling or downloading. +### Directory Structure + +```text +mindspore-lite-{version}-win-x64 +└── tools + └── converter # Model conversion tool + ├── include + │ └── registry # Header files of customized op, parser and pass registration + ├── converter + │ └── converter_lite.exe # Executable program + └── lib + ├── libgcc_s_seh-1.dll # Dynamic library of MinGW + ├── libglog.dll # Dynamic library of Glog + ├── libmslite_converter_plugin.dll # Dynamic library of plugin registry + ├── libmslite_converter_plugin.dll.a # Link file of Dynamic library of plugin registry + ├── libssp-0.dll # Dynamic library of MinGW + ├── libstdc++-6.dll # Dynamic library of MinGW + └── libwinpthread-1.dll # Dynamic library of MinGW +``` + ### Parameter Description Refer to the Linux environment model conversion tool [parameter description](https://www.mindspore.cn/tutorial/lite/en/master/use/converter_tool.html#parameter-description). diff --git a/tutorials/lite/source_en/use/converter_train.md b/tutorials/lite/source_en/use/converter_train.md index 1ffe6d36a050f01946a1e5d3b18c82159f4c26dc..416f7857e7a189dc19f1d6ecde788ec42da0761b 100644 --- a/tutorials/lite/source_en/use/converter_train.md +++ b/tutorials/lite/source_en/use/converter_train.md @@ -49,6 +49,10 @@ The table below shows the parameters used in the MindSpore Lite model training t | `--modelFile=` | yes | Path of the input model. | - | - | | `--outputFile=` | yes | Path of the output model. The suffix `.ms` can be automatically generated. | - | - | | `--trainModel=true` | yes | Training on Device or not | true, false | false | +| `--quantType=` | No | Sets the quantization type of the model. | WeightQuant: this quantType is only supported while use litetraining | - | +| `--bitNum=` | No | Sets the quantization bitNum when quantType is set as WeightQuant, now supports 1 bit to 16 bit quantization. | \[1, 16] | 8 | +| `--quantWeightSize=` | No | Sets a size threshold of convolution filter when quantType is set as WeightQuant. If the size is bigger than this value, it will trigger weight quantization. | \[0, +∞) | 0 | +| `--quantWeightChannel=` | No | Sets a channel number threshold of convolution filter when quantType is set as WeightQuant. If the number is bigger than the channel number, it will trigger weight quantization. | \[0, +∞) | 16 | > The parameter name and parameter value are separated by an equal sign (=) and no space is allowed between them. diff --git a/tutorials/lite/source_en/use/cropper_tool.md b/tutorials/lite/source_en/use/cropper_tool.md index 42e894e8b47fd6d0a687f664fcc3d693384f14a8..749dce795b1be2c4c40aadd497fb5610ae1c8ae0 100644 --- a/tutorials/lite/source_en/use/cropper_tool.md +++ b/tutorials/lite/source_en/use/cropper_tool.md @@ -58,7 +58,7 @@ The Cropper tool obtains the operator list by parsing the `ms` model, and crop t - Pass in the `ms` model through the folder, and pass the folder path where the model file is located to the `modelFolderPath` parameter to crop the `libmindspore-lite.a` static library of arm64-cpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` This example will read all the `ms` models contained in the `/model` folder, crop the `libmindspore-lite.a` static library of arm64-cpu, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. @@ -66,7 +66,7 @@ This example will read all the `ms` models contained in the `/model` folder, cro - Pass in the `ms` model by file, pass the path where the model file is located to the `modelFile` parameter, and crop the `libmindspore-lite.a` static library of arm64-cpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` In this example, the `libmindspore-lite.a` static library of arm64-cpu will be cropped according to the `ms` model passed by `modelFile`, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. @@ -74,7 +74,7 @@ In this example, the `libmindspore-lite.a` static library of arm64-cpu will be c - Pass in the `ms` model through the folder, and pass the folder path where the model file is located to the `modelFolderPath` parameter to crop the `libmindspore-lite.a` static library of arm64-gpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` This example will read all the `ms` models contained in the `/model` folder, crop the `libmindspore-lite.a` static library of arm64-gpu, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. @@ -82,7 +82,7 @@ This example will read all the `ms` models contained in the `/model` folder, cro - Pass in the `ms` model by file, pass the path where the model file is located to the `modelFile` parameter, and crop the `libmindspore-lite.a` static library of arm64-gpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` In this example, the `libmindspore-lite.a` static library of arm64-gpu will be cropped according to the `ms` model passed by `modelFile`, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. \ No newline at end of file diff --git a/tutorials/lite/source_en/use/micro.md b/tutorials/lite/source_en/use/micro.md index 455da3529ea5b0d331edccb778f6a0a535f6d691..8a01bce501d3f99601727aa7136f5073665988c0 100644 --- a/tutorials/lite/source_en/use/micro.md +++ b/tutorials/lite/source_en/use/micro.md @@ -7,6 +7,7 @@ - [Perform Inference on the Microcontroller](#perform-inference-on-the-microcontroller) - [Overview](#overview) - [Obtaining CodeGen](#obtaining-codeGen) + - [Directory Structure](#directory-structure) - [Parameter Description](#parameter-description) - [Instructions](#instructions) - [Using CodeGen to Perform inference on STM Boards](#perform-inference-on-the-stm-microcontroller) @@ -37,6 +38,25 @@ You can obtain CodeGen by any of the following ways: > Currently the code generator is only available on Linux x86_64. +## Directory Structure + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── codegen # Code generation tool + ├── codegen # Executable program + ├── include # Header files of inference framework + │ ├── nnacl # nnacl operator header file + │ └── wrapper + ├── lib + │ └── libwrapper.a # MindSpore Lite CodeGen generates code dependent operator static library + └── third_party + ├── include + │ └── CMSIS # ARM CMSIS NN operator header files + └── lib + └── libcmsis_nn.a # ARM CMSIS NN operator static library +``` + ## Parameter Description Here is the detailed description of parameters: diff --git a/tutorials/lite/source_en/use/npu_info.md b/tutorials/lite/source_en/use/npu_info.md index e77c095625306e8e65ceb726225e087d7f04cfd2..f691caaa38c44d44b92cf0200b12b8f7b2d1f5b3 100644 --- a/tutorials/lite/source_en/use/npu_info.md +++ b/tutorials/lite/source_en/use/npu_info.md @@ -51,7 +51,7 @@ For more information about compilation, see [Linux Environment Compilation](http For example, ```bash - export LD_LIBRARY_PATH=mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH ``` For more information about compilation, please refer to [Compilation Output](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#description-of-runtime-and-other-tools-directory-structure) @@ -80,8 +80,8 @@ on the phone, as shown in the example below: For more information about the use of Benchmark, see [Benchmark Use](https://www.mindspore.cn/tutorial/lite/en/master/use/benchmark_tool.html). For environment variable settings, you need to set the directory where the libmindspore-lite.so -(under the directory `mindspore-lite-{version}-inference-android-{arch}/inference/lib`) and NPU libraries -(under the directory `mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/`) are located, to `${LD_LIBRARY_PATH}`. +(under the directory `mindspore-lite-{version}-android-{arch}/runtime/lib`) and NPU libraries +(under the directory `mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/`) are located, to `${LD_LIBRARY_PATH}`. The directory is specified in [Compilation Output](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#description-of-runtime-and-other-tools-directory-structure) with compilation option `-I arm64` or `-I arm32`. diff --git a/tutorials/lite/source_en/use/runtime_cpp.md b/tutorials/lite/source_en/use/runtime_cpp.md index edf787311e76c4614e083574dd223694a28a8268..216309dbc5e5f2ce4fba6c79d1118091f8b72ed3 100644 --- a/tutorials/lite/source_en/use/runtime_cpp.md +++ b/tutorials/lite/source_en/use/runtime_cpp.md @@ -1,6 +1,6 @@ # Using C++ Interface to Perform Inference -`Windows` `Linux` `Android` `C++` `Inference Application` `Model Loading` `Data Preparation` `Intermediate` `Expert` +`Windows` `macOS` `Linux` `iOS` `Android` `C++` `Inference Application` `Model Loading` `Data Preparation` `Intermediate` `Expert` @@ -85,6 +85,8 @@ The context saves some basic configuration parameters required by the session to - [device_list_](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#device-list): MindSpore Lite supports heterogeneous inference. The backend configuration information for inference is specified by `device_list_` in [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2). By default, the [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext) of the CPU is stored. During graph build, operator selection and scheduling are performed based on the backend configuration information in `device_list_`. Currently, only CPU and GPU heterogeneity or CPU and NPU heterogeneity is supported. When the GPU's [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext) is configured, GPU-based inference is preferentially used. When the NPU's [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext) is configured, NPU-based inference is preferentially used. > `device_list_[0]` must be `DeviceContext` of the CPU, and `device_list_[1]` must be `DeviceContext` of the GPU or `DeviceContext` of the NPU. Currently, the CPU, GPU, and NPU cannot be set at a time. +> +> For the iOS platform, `device_list_[0]` must be `DeviceContext` of the CPU. ### Configuring the Number of Threads @@ -119,6 +121,8 @@ cpu_device_info.enable_float16_ = true; ``` > Float16 takes effect only when the CPU is of the ARM v8.2 architecture. Other models and x86 platforms that are not supported are automatically rolled back to Float32. +> +> For the iOS platform, only the CPU is supported, and Float16 is temporarily not supported. ### Configuring the GPU Backend @@ -580,6 +584,8 @@ If an exception occurs during inference, you can view logs to locate the fault. logcat -s "MS_LITE" ``` +> For the iOS platform, does not support viewing logs temporarily. + ### Obtaining the Version Number MindSpore Lite provides the [Version](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#version) method to obtain the version number, which is included in the `include/version.h` header file. You can call this method to obtain the version number of MindSpore Lite. diff --git a/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md b/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md index b23c1e0cc4ef9719cfa6ff0618987e6675a96120..89d53905917aa8bb5fe38065c7ec9b3b4ae0e379 100644 --- a/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md +++ b/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md @@ -119,7 +119,7 @@ app ### 配置MindSpore Lite依赖项 -Android调用MindSpore Android AAR时,需要相关库文件支持。可通过MindSpore Lite[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)生成`mindspore-lite-{version}-inference-android.tar.gz`库文件包并解压缩(包含`mindspore-lite-{version}.aar`库文件)。 +Android调用MindSpore Android AAR时,需要相关库文件支持。可通过MindSpore Lite[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)生成`mindspore-lite-{version}-android.tar.gz`库文件包并解压缩(包含`mindspore-lite-{version}.aar`库文件)。 > version:输出件版本号,与所编译的分支代码对应的版本一致。 diff --git a/tutorials/lite/source_zh_cn/quick_start/quick_start.md b/tutorials/lite/source_zh_cn/quick_start/quick_start.md index ee936fbf521fb27eaeabb6ee9583372e2ee0402e..330b1bebec14112d58d02ee30e1eb1c6d5e0b804 100644 --- a/tutorials/lite/source_zh_cn/quick_start/quick_start.md +++ b/tutorials/lite/source_zh_cn/quick_start/quick_start.md @@ -171,17 +171,17 @@ android{ ```text # ============== Set MindSpore Dependencies. ============= include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/include) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/minddata/dataset) add_library(mindspore-lite SHARED IMPORTED) add_library(minddata-lite SHARED IMPORTED) set_target_properties(mindspore-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/lib/libmindspore-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/lib/libmindspore-lite.so) set_target_properties(minddata-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/lib/libminddata-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/minddata/lib/libminddata-lite.so) # --------------- MindSpore Lite set End. -------------------- # Link target library. diff --git a/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md b/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md index 1c824e0a3e9ff1cf4b460b2b2521abf805c79d07..6a746cfbe61de23b537d9e62ad60ad458e57f389 100644 --- a/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md +++ b/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md @@ -54,7 +54,7 @@ bash build.sh ``` - > 若使用该build脚本下载MindSpore Lite推理框架失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 模型推理框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`inference/lib`目录下的`libmindspore-lite.a`文件拷贝到`mindspore/lite/examples/quick_start_cpp/lib`目录、`inference/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`目录下。 + > 若使用该build脚本下载MindSpore Lite推理框架失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 模型推理框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`runtime/lib`目录下的`libmindspore-lite.a`文件拷贝到`mindspore/lite/examples/quick_start_cpp/lib`目录、`runtime/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`目录下。 > > 若MobileNetV2模型下载失败,请手动下载相关模型文件[mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms),并将其拷贝到`mindspore/lite/examples/quick_start_cpp/model`目录。 > @@ -86,7 +86,7 @@ - 编译构建 - - 库下载:请手动下载硬件平台为CPU、操作系统为Windows-x64的MindSpore Lite模型推理框架[mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`inference/lib`目录下的`libmindspore-lite.a`拷贝到`mindspore/lite/examples/quick_start_cpp/lib`工程目录、`inference/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`工程目录下。(注意:工程项目下的`lib`、`include`目录需手工创建) + - 库下载:请手动下载硬件平台为CPU、操作系统为Windows-x64的MindSpore Lite模型推理框架[mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`runtime/lib`目录下的`libmindspore-lite.a`拷贝到`mindspore/lite/examples/quick_start_cpp/lib`工程目录、`runtime/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`工程目录下。(注意:工程项目下的`lib`、`include`目录需手工创建) - 模型下载:请手动下载相关模型文件[mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms),并将其拷贝到`mindspore/lite/examples/quick_start_cpp/model`目录。 - 编译:在`mindspore/lite/examples/quick_start_cpp`目录下执行[build脚本](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/examples/quick_start_cpp/build.bat),将能够自动下载相关文件并编译Demo。 diff --git a/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md b/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md index 9c52e48ff10e086cc9eaf60697cc8b30bf793cc3..d259511c6b4978270b6bb95796f1920ca746ee54 100644 --- a/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md +++ b/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md @@ -52,7 +52,7 @@ bash build.sh ``` - > 若MindSpore Lite推理框架下载失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),解压后将`inference/lib/jar`目录下的`libmindspore-lite.so`、`libmindspore-lite-jni.so`以及`libmindspore-lite-java.jar`拷贝到`mindspore/lite/examples/quick_start_java/lib`目录。 + > 若MindSpore Lite推理框架下载失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),解压后将`runtime/lib`目录下的`libmindspore-lite.so`、`libmindspore-lite-jni.so`以及`libmindspore-lite-java.jar`拷贝到`mindspore/lite/examples/quick_start_java/lib`目录。 > > 若MobileNetV2模型下载失败,请手动下载相关模型文件[mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms),并将其拷贝到`mindspore/lite/examples/quick_start_java/model/`目录。 > diff --git a/tutorials/lite/source_zh_cn/quick_start/train_lenet.md b/tutorials/lite/source_zh_cn/quick_start/train_lenet.md index aecdf3e0565aa75ec7f1f96eba65d26ed8ef0cce..5482473158baf7940f7b49175d8180840b226d64 100644 --- a/tutorials/lite/source_zh_cn/quick_start/train_lenet.md +++ b/tutorials/lite/source_zh_cn/quick_start/train_lenet.md @@ -73,17 +73,17 @@ cd ./mindspore 源码路径下的`mindspore/lite/examples/train_lenet`目录包含了本示例程序的源码。 -请到[MindSpore Lite下载页面](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)下载mindspore-lite-{version}-linux-x64.tar.gz以及mindspore-lite-{version}-android-aarch64.tar.gz。其中,mindspore-lite-{version}-linux-x64.tar.gz是MindSpore Lite在x86平台的安装包,里面包含模型转换工具converter_lite,本示例用它来将MINDIR模型转换成MindSpore Lite支持的`.ms`格式;mindspore-lite-{version}-android-aarch64.tar.gz是MindSpore Lite在Android平台的安装包,里面包含训练运行时库libmindspore-lite.so,本示例用它所提供的接口在Android上训练模型。下载完成后,需要将mindspore-lite-{version}-linux-x64.tar.gz重命名为mindspore-lite-{version}-train-linux-x64.tar.gz,将mindspore-lite-{version}-android-aarch64.tar.gz重命名为mindspore-lite-{version}-train-android-aarch64.tar.gz,最后将重命名后的文件放到MindSpore源码下的`output`目录(如果没有`output`目录,请创建它)。 +请到[MindSpore Lite下载页面](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)下载mindspore-lite-{version}-linux-x64.tar.gz以及mindspore-lite-{version}-android-aarch64.tar.gz。其中,mindspore-lite-{version}-linux-x64.tar.gz是MindSpore Lite在x86平台的安装包,里面包含模型转换工具converter_lite,本示例用它来将MINDIR模型转换成MindSpore Lite支持的`.ms`格式;mindspore-lite-{version}-android-aarch64.tar.gz是MindSpore Lite在Android平台的安装包,里面包含训练运行时库libmindspore-lite.so,本示例用它所提供的接口在Android上训练模型。下载完成后,需要将mindspore-lite-{version}-linux-x64.tar.gz重命名为mindspore-lite-{version}-linux-x64.tar.gz,将mindspore-lite-{version}-android-aarch64.tar.gz重命名为mindspore-lite-{version}-android-aarch64.tar.gz,最后将重命名后的文件放到MindSpore源码下的`output`目录(如果没有`output`目录,请创建它)。 假设下载的安装包存放在`/Downloads`目录,上述操作对应的`Linux`指令如下: ```bash mkdir output -cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-train-linux-x64.tar.gz -cp /Downloads/mindspore-lite-{version}-android-aarch64.tar.gz output/mindspore-lite-{version}-train-android-aarch64.tar.gz +cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-linux-x64.tar.gz +cp /Downloads/mindspore-lite-{version}-android-aarch64.tar.gz output/mindspore-lite-{version}-android-aarch64.tar.gz ``` -您也可以通过[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)直接生成端侧训练框架对应的x86平台安装包mindspore-lite-{version}-train-linux-x64.tar.gz以及Android平台安装包mindspore-lite-{version}-train-android-aarch64.tar.gz,源码编译的安装包会自动生成在`output`目录下,请确保`output`目录下同时存在这两个安装包。 +您也可以通过[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)直接生成端侧训练框架对应的x86平台安装包mindspore-lite-{version}-linux-x64.tar.gz以及Android平台安装包mindspore-lite-{version}-android-aarch64.tar.gz,源码编译的安装包会自动生成在`output`目录下,请确保`output`目录下同时存在这两个安装包。 ### 连接安卓设备 diff --git a/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md b/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md index f142f35f07469d7cb9d0e6e874eb1d55b7776865..ac74ccba1d6d66e7a3d988590137a430d8bdfca9 100644 --- a/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md +++ b/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md @@ -80,7 +80,7 @@ MNIST_Data/ ```bash cd /codes/mindspore/output -tar xzf mindspore-lite-${version}-train-linux-x64-jar.tar.gz +tar xzf mindspore-lite-${version}-linux-x64-jar.tar.gz mkdir ../mindspore/lite/examples/train_lenet_java/lib cp mindspore-lite-${version}-train-linux-x64-jar/jar/* ../mindspore/lite/examples/train_lenet_java/lib/ ``` diff --git a/tutorials/lite/source_zh_cn/use/benchmark_tool.md b/tutorials/lite/source_zh_cn/use/benchmark_tool.md index fdebd8c10b4892465f0696cdd5b62a2ec6903c5d..14ce48285a6fefb6e16a0547cec08ae873556580 100644 --- a/tutorials/lite/source_zh_cn/use/benchmark_tool.md +++ b/tutorials/lite/source_zh_cn/use/benchmark_tool.md @@ -41,7 +41,7 @@ - 将推理需要的动态链接库加入环境变量LD_LIBRARY_PATH。 ```bash - export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/inference/lib:${LD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/runtime/lib:${LD_LIBRARY_PATH} ``` 其中${PACKAGE_ROOT_PATH}是编译得到的包解压后的根目录。 @@ -224,7 +224,7 @@ Model = model.ms, NumThreads = 1, MinRunTime = 0.104000 ms, MaxRunTime = 0.17900 - 将推理需要的动态链接库加入环境变量PATH。 ```bash - set PATH=%PACKAGE_ROOT_PATH%\inference\lib;%PATH% + set PATH=%PACKAGE_ROOT_PATH%\runtime\lib;%PATH% ``` 其中%PACKAGE_ROOT_PATH%是编译得到的包解压后的根目录。 diff --git a/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md b/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md index 38a820392cd068e958b0d63302ae7bce728138e1..d57392ccdf5679a1fa7049a2567096c4d2558da6 100644 --- a/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md +++ b/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md @@ -29,15 +29,15 @@ - 编译:`benchmark_train`工具代码在MindSpore源码的`mindspore/lite/tools/benchmark_train`目录中,参考构建文档中的[环境要求](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id1)和[编译示例](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id3)编译端侧训练框架。 -- 配置环境变量:参考构建文档中的[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id5),获得`benchmark_train`工具,并配置环境变量。假设您编译出的端侧训练框架压缩包所在完整路径为`/path/mindspore-lite-{version}-train-{os}-{arch}.tar.gz`,解压并配置环境变量的命令如下: +- 配置环境变量:参考构建文档中的[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id5),获得`benchmark_train`工具,并配置环境变量。假设您编译出的端侧训练框架压缩包所在完整路径为`/path/mindspore-lite-{version}-{os}-{arch}.tar.gz`,解压并配置环境变量的命令如下: ```bash cd /path - tar xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz - export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-train-{os}-{arch}/train/lib:/path/mindspore-lite-{version}-train-{os}-{arch}/train/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} + tar xvf mindspore-lite-{version}-{os}-{arch}.tar.gz + export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-{os}-{arch}/runtime/lib:/path/mindspore-lite-{version}-{os}-{arch}/runtime/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} ``` -benchmark_train工具所在完整路径为`/path/mindspore-lite-{version}-train-{os}-{arch}/tools/benchmark_train/benchmark_train`。 +benchmark_train工具所在完整路径为`/path/mindspore-lite-{version}-{os}-{arch}/tools/benchmark_train/benchmark_train`。 ### 参数说明 diff --git a/tutorials/lite/source_zh_cn/use/build.md b/tutorials/lite/source_zh_cn/use/build.md index 5469b4695c4de4f8c9857bf570f92678a8a256bc..f97b6e5c95681becc4d03661d10272aa0417b676 100644 --- a/tutorials/lite/source_zh_cn/use/build.md +++ b/tutorials/lite/source_zh_cn/use/build.md @@ -1,6 +1,6 @@ # 编译MindSpore Lite -`Windows` `Linux` `Android` `环境准备` `中级` `高级` +`Windows` `macOS` `Linux` `iOS` `Android` `环境准备` `中级` `高级` @@ -9,27 +9,22 @@ - [环境要求](#环境要求) - [编译选项](#编译选项) - [编译示例](#编译示例) - - [端侧推理框架编译输出](#端侧推理框架编译输出) - - [模型转换工具converter目录结构说明](#推理模型转换工具converter目录结构说明) - - [代码生成工具codegen目录结构说明](#代码生成工具codegen目录结构说明) - - [模型混淆工具obfuscator目录结构说明](#模型混淆工具obfuscator目录结构说明) - - [Runtime及其他工具目录结构说明](#推理Runtime及其他工具目录结构说明) - - [端侧训练框架编译输出](#端侧训练框架编译输出) - - [训练Runtime及配套工具目录结构说明](#训练Runtime及配套工具目录结构说明) + - [目录结构](#目录结构) - [Windows环境编译](#windows环境编译) - [环境要求](#环境要求-1) - [编译选项](#编译选项-1) - [编译示例](#编译示例-1) - - [端侧推理框架编译输出](#端侧推理框架编译输出) - - [Runtime及配套工具目录结构说明](#Runtime及配套工具目录结构说明-1) - - [Docker环境编译](#docker环境编译) - - [环境准备](#环境准备) - - [下载镜像](#下载镜像) - - [创建容器](#创建容器) - - [进入容器](#进入容器) + - [目录结构](#目录结构-1) + - [macOS环境编译](#macOS环境编译) + - [环境要求](#环境要求-2) - [编译选项](#编译选项-2) - [编译示例](#编译示例-2) - - [编译输出](#编译输出) + - [目录结构](#目录结构-2) + - [Docker环境编译](#docker环境编译) + - [环境准备](#环境准备) + - [编译选项](#编译选项-3) + - [编译示例](#编译示例-3) + - [目录结构](#目录结构-3) @@ -37,69 +32,47 @@ 本章节介绍如何快速编译出MindSpore Lite。 -推理版本包含模块: +MindSpore Lite包含模块: | 模块 | 支持平台 | 说明 | | ------------------ | ----------------------- | --------------------------------- | | converter | Linux, Windows | 模型转换工具 | -| runtime(cpp、java) | Linux, Windows, Android | 模型推理框架(Windows平台不支持java版runtime) | +| runtime(cpp、java) | Linux, Windows, Android, iOS | 模型推理框架(Windows平台不支持java版runtime) | | benchmark | Linux, Windows, Android | 基准测试工具 | +| benchmark_train | Linux, Android | 性能测试和精度校验工具 | | cropper | Linux | libmindspore-lite.a静态库裁剪工具 | | minddata | Linux, Android | 图像处理库 | | codegen | Linux | 模型推理代码生成工具 | | obfuscator | Linux | 模型混淆工具 | -训练版本包含模块: - -| 模块 | 支持平台 | 说明 | -| --------------- | -------------- | --------------------------------- | -| converter | Linux | 模型转换工具 | -| runtime(cpp) | Linux, Android | 模型训练框架(暂不支持java) | -| cropper | Linux | libmindspore-lite.a静态库裁剪工具 | -| minddata | Linux, Android | 图像处理库 | -| benchmark_train | Linux, Android | 性能测试和精度校验工具 | - ## Linux环境编译 ### 环境要求 - 系统环境:Linux x86_64,推荐使用Ubuntu 18.04.02LTS -- runtime(cpp)编译依赖 - - [CMake](https://cmake.org/download/) >= 3.18.3 +- C++编译依赖 - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 -- converter编译依赖 - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Autoconf](http://ftp.gnu.org/gnu/autoconf/) >= 2.69 - - [Libtool](https://www.gnu.org/software/libtool/) >= 2.4.6 - - [LibreSSL](http://www.libressl.org/) >= 3.1.3 - - [Automake](https://www.gnu.org/software/automake/) >= 1.11.6 - - [Libevent](https://libevent.org) >= 2.0 - - [OpenSSL](https://www.openssl.org/) >= 1.1.1 -- runtime(java)编译依赖 - - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - 配置环境变量:`export ANDROID_NDK=NDK路径` + - [DDK](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180) = V500.010 + - 配置环境变量:`export HWHIAI_DDK=DDK路径` +- Java编译需要的额外依赖 - [Gradle](https://gradle.org/releases/) >= 6.6.1 + - 配置环境变量:`export GRADLE_HOME=GRADLE路径` + - 将bin目录添加到PATH中:`export PATH=${GRADLE_HOME}/bin:$PATH` - [OpenJDK](https://openjdk.java.net/install/) >= 1.8 - -> - 当安装完依赖项`Android_NDK`后,需配置环境变量:`export ANDROID_NDK=${NDK_PATH}/android-ndk-r20b`。 -> - 当安装完依赖项Gradle后,需将其安装路径增加到PATH当中:`export PATH=${GRADLE_PATH}/bin:$PATH`。 -> - 通过`Android command line tools`安装Android SDK,首先需要创建一个新目录,并将其路径配置到环境变量`${ANDROID_SDK_ROOT}`中,然后通过`sdkmanager`创建SDK:`./sdkmanager --sdk_root=${ANDROID_SDK_ROOT} "cmdline-tools;latest"`,最后通过`${ANDROID_SDK_ROOT}`目录下的`sdkmanager`接受许可证:`yes | ./sdkmanager --licenses`。 -> - 编译AAR需要依赖Android SDK Build-Tools、Android SDK Platform-Tools等Android SDK相关组件,如果环境中的Android SDK不存在相关组件,编译时会自动下载所需依赖。 -> - 编译NPU算子的时候需要下载[DDK V500.010](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180),并将压缩包解压后的目录设置为环境变量`${HWHIAI_DDK}`。 + - 配置环境变量:`export JAVA_HOME=JDK路径` + - 将bin目录添加到PATH中:`export PATH=${JAVA_HOME}/bin:$PATH` + - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - 创建一个新目录,配置环境变量`export ANDROID_SDK_ROOT=新建的目录` + - 下载`SDK Tools`,通过`sdkmanager`创建SDK:`./sdkmanager --sdk_root=${ANDROID_SDK_ROOT} "cmdline-tools;latest"` + - 通过`${ANDROID_SDK_ROOT}`目录下的`sdkmanager`接受许可证:`yes | ./sdkmanager --licenses` ### 编译选项 -MindSpore Lite提供编译脚本`build.sh`用于一键式编译,位于MindSpore根目录下,该脚本可用于MindSpore训练及推理的编译。 - -下面对`build.sh`的编译参数和`mindspore/lite/CMakeLists.txt`的选项进行说明。 +MindSpore根目录下的`build.sh`脚本可用于MindSpore Lite的编译。 #### `build.sh`的编译参数 @@ -120,9 +93,9 @@ MindSpore Lite提供编译脚本`build.sh`用于一键式编译,位于MindSpor | 选项 | 参数说明 | 取值范围 | 默认值 | | -------- | ----- | ---- | ---- | -| MSLITE_GPU_BACKEND | 设置GPU后端,仅在`-I arm64`时有效 | opencl、vulkan、cuda、off | opencl | +| MSLITE_GPU_BACKEND | 设置GPU后端,仅在`-I arm64`时有效 | opencl、off | opencl | | MSLITE_ENABLE_NPU | 是否编译NPU算子,仅在`-I arm64`或`-I arm32`时有效 | on、off | on | -| MSLITE_ENABLE_TRAIN | 是否编译训练版本 | on、off | off | +| MSLITE_ENABLE_TRAIN | 是否编译训练版本 | on、off | on | | MSLITE_ENABLE_SSE | 是否启用SSE指令集,仅在`-I x86_64`时有效 | on、off | off | | MSLITE_ENABLE_AVX | 是否启用AVX指令集,仅在`-I x86_64`时有效 | on、off | off | | MSLITE_ENABLE_CONVERTER | 是否编译模型转换工具,仅在`-I x86_64`时有效 | on、off | on | @@ -130,7 +103,6 @@ MindSpore Lite提供编译脚本`build.sh`用于一键式编译,位于MindSpor | MSLITE_ENABLE_TESTCASES | 是否编译测试用例 | on、off | off | > - 以上选项可通过设置同名环境变量或者`mindspore/lite/CMakeLists.txt`文件修改。 -> - 开启MSLITE_ENABLE_TRAIN只生成训练版本。 > - 修改选项后,添加`-i`参数进行增量编译不生效。 ### 编译示例 @@ -168,142 +140,66 @@ git clone https://gitee.com/mindspore/mindspore.git bash build.sh -A on -j32 ``` -### 端侧推理框架编译输出 - -执行编译指令后,会在`mindspore/output/`目录中生成如下文件: +最后,会在`output/`目录中生成如下文件: -- `mindspore-lite-{version}-inference-{os}-{arch}.tar.gz`:包含模型推理框架runtime(cpp)和配套工具。 +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`:包含runtime和配套工具。 -- `mindspore-lite-maven-{version}.zip`:包含模型推理框架runtime(java)的AAR。 +- `mindspore-lite-maven-{version}.zip`:包含runtime(java)的AAR包。 > - version: 输出件版本号,与所编译的分支代码对应的版本一致。 > - os: 输出件应部署的操作系统。 > - arch: 输出件应部署的系统架构。 -执行解压缩命令,获取编译后的输出件: - -```bash -tar -xvf mindspore-lite-{version}-inference-{os}-{arch}.tar.gz -unzip mindspore-lite-maven-{version}.zip -``` - -#### 模型转换工具converter目录结构说明 - -仅在`-I x86_64`编译选项下获得(推理和训练的目录结构相同)内容如下: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── converter - ├── include - │ └── registry # 自定义算子、模型解析、转换优化注册头文件 - ├── converter # 模型转换工具 - │ └── converter_lite # 可执行程序 - └── lib # 转换工具依赖的动态库 - ├── libglog.so.0 # Glog的动态库 - └── libmslite_converter_plugin.so # 注册插件的动态库 -``` - -#### 代码生成工具CodeGen目录结构说明 - -仅在`-I x86_64`编译选项下获得codegen可执行程序,在`-I arm64`和`-I arm32`编译选项下只生成codegen生成的推理代码所需要的算子库。 - -- `-I x86_64`编译选项下获得codegen,内容如下: - - ```text - mindspore-lite-{version}-inference-linux-x64 - └── tools - └── codegen # 代码生成工具 - ├── codegen # 可执行程序 - ├── include # 推理框架头文件 - │ ├── nnacl # nnacl 算子头文件 - │ └── wrapper - ├── lib - │ └── libwrapper.a # MindSpore Lite CodeGen生成代码依赖的部分算子静态库 - └── third_party - ├── include - │ └── CMSIS # ARM CMSIS NN 算子头文件 - └── lib - └── libcmsis_nn.a # ARM CMSIS NN 算子静态库 - ``` - -- `-I arm64`或`-I arm32`编译选项下获得codegen,内容如下: - - ```text - mindspore-lite-{version}-inference-android-{arch} - └── tools - └── codegen # 代码生成工具 - ├── include # 推理框架头文件 - │ ├── nnacl # nnacl 算子头文件 - │ └── wrapper - └── lib # 推理框架库 - └── libwrapper.a # MindSpore Lite CodeGen生成代码依赖的部分算子静态库 - ``` - -#### 模型混淆工具obfuscator目录结构说明 - -仅在`-I x86_64`编译选项下且`mindspore/mindspore/lite/CMakeLists.txt`中的`ENABLE_MODEL_OBF`选项开启时,获得msobfuscator可执行程序,内容如下: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── obfuscator # 模型混淆工具 - └── msobfuscator # 可执行程序 -``` - -#### Runtime及其他工具目录结构说明 - -推理框架可在`-I x86_64`、`-I arm64`、`-I arm32`和`-A java`编译选项下获得,内容如下: +### 目录结构 - 当编译选项为`-I x86_64`时: ```text - mindspore-lite-{version}-inference-linux-x64 - ├── inference - │ ├── include # 推理框架头文件 - │ │ └── registry # 自定义算子注册头文件 - │ └── lib # 推理框架库 - │ ├── libminddata-lite.so # 图像处理动态库文件 - │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 - │ ├── libmindspore-lite-jni.so # MindSpore Lite推理框架的jni动态库 - │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 - │ ├── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 - │ └── mindspore-lite-java.jar # MindSpore Lite推理框架jar包 + mindspore-lite-{version}-linux-x64 + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # 图像处理静态库 + │ │ ├── libminddata-lite.so # 图像处理动态库 + │ │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 + │ │ ├── libmindspore-lite-jni.so # MindSpore Lite推理框架的jni动态库 + │ │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 + │ │ ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 + │ │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 + │ │ ├── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 + │ │ └── mindspore-lite-java.jar # MindSpore Lite推理框架jar包 + │ └── third_party + │ └── libjpeg-turbo └── tools - ├── benchmark # 基准测试工具 - │ └── benchmark # 可执行程序 - ├── codegen # 代码生成工具 - │ ├── codegen # 可执行程序 - │ ├── include # 算子头文件 - │ ├── lib # 算子静态库 - │ └── third_party # ARM CMSIS NN算子库 - ├── converter # 模型转换工具 - ├── obfuscator # 模型混淆工具 - └── cropper # 库裁剪工具 - ├── cropper # 库裁剪工具可执行文件 - └── cropper_mapping_cpu.cfg # 裁剪cpu库所需的配置文件 + ├── benchmark # 基准测试工具 + ├── benchmark_train # 训练模型性能与精度调测工具 + ├── codegen # 代码生成工具 + ├── converter # 模型转换工具 + ├── obfuscator # 模型混淆工具 + └── cropper # 库裁剪工具 ``` - 当编译选项为`-I arm64`或`-I arm32`时: ```text - mindspore-lite-{version}-inference-android-{arch} - ├── inference - │ ├── include # 推理框架头文件 - │ │ └── registry # 自定义算子注册头文件 - │ ├── lib # 推理框架库 - │ │ ├── libminddata-lite.so # 图像处理动态库文件 - │ │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 - │ │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 - │ │ └── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 + mindspore-lite-{version}-android-{arch} + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # 图像处理静态库 + │ │ ├── libminddata-lite.so # 图像处理动态库 + │ │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 + │ │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 + │ │ ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 + │ │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 + │ │ └── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 │ └── third_party - │ └── hiai_ddk # NPU库,只存在于arm64包 + │ ├── hiai_ddk + │ └── libjpeg-turbo └── tools - ├── benchmark # 基准测试工具 - │ └── benchmark - └── codegen # 代码生成工具 - ├── include # 算子头文件 - └── lib # 算子静态库 + ├── benchmark # 基准测试工具 + ├── benchmark_train # 训练模型性能与精度调测工具 + └── codegen # 代码生成工具 ``` - 当编译选项为`-A on`时: @@ -316,67 +212,6 @@ mindspore-lite-{version}-inference-linux-x64 └── mindspore-lite-{version}.aar # MindSpore Lite推理框架aar包 ``` -### 端侧训练框架编译输出 - -如果开启了MSLITE_ENABLE_TRAIN选项,会生成训练runtime和配套工具,如下: - -`mindspore-lite-{version}-train-{os}-{arch}.tar.gz`:模型训练框架runtime和配套工具。 - -> - version: 输出件版本号,与所编译的分支代码对应的版本一致。 -> - os: 输出件应部署的操作系统。 -> - arch: 输出件应部署的系统架构。 - -执行解压缩命令,获取编译后的输出件: - -```bash -tar -xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz -``` - -#### 训练Runtime及配套工具目录结构说明 - -训练框架可在`-I x86_64`、`-I arm64`、`-I arm32`编译选项下获得对应不同硬件平台的版本,内容如下: - -- 当编译选项为`-I x86_64`时: - - ```text - mindspore-lite-{version}-train-linux-x64 - ├── tools - │   ├── benchmark_train # 训练模型性能与精度调测工具 - │   ├── converter # 模型转换工具 - │   └── cropper # 库裁剪工具 - │   ├── cropper # 库裁剪工具可执行文件 - │   └── cropper_mapping_cpu.cfg # 裁剪cpu库所需的配置文件 - └── train - ├── include # 训练框架头文件 - │ └── registry # 自定义算子注册头文件 - ├── lib # 训练框架库 - │   ├── libminddata-lite.so # 图像处理动态库文件 - │   ├── libmindspore-lite-jni.so # MindSpore Lite训练框架的jni动态库 - │   ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 - │   ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 - │   └── mindspore-lite-java.jar # MindSpore Lite训练框架jar包 - └── third_party - └── libjpeg-turbo - ``` - -- 当编译选项为`-I arm64`或`-I arm32`时: - - ```text - mindspore-lite-{version}-train-android-{arch} - ├── tools - │   └── benchmark_train # 训练模型性能与精度调测工具 - └── train - ├── include # 训练框架头文件 - │ └── registry # 自定义算子注册头文件 - ├── lib # 训练框架库 - │   ├── libminddata-lite.so # 图像处理动态库文件 - │   ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 - │   └── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 - └── third_party - ├── hiai_ddk # NPU库,只存在于arm64包 - └── libjpeg-turbo - ``` - ## Windows环境编译 ### 环境要求 @@ -392,13 +227,27 @@ tar -xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz ### 编译选项 -MindSpore Lite提供编译脚本build.bat用于一键式编译,位于MindSpore根目录下,该脚本可用于MindSpore训练及推理的编译。下面对MindSpore Lite的编译选项进行说明。 +MindSpore根目录下的`build.bat`脚本可用于MindSpore Lite的编译。 + +#### `build.bat`的编译参数 | 参数 | 参数说明 | 是否必选 | | -------- | ----- | ---- | | lite | 设置该参数,则对MindSpore Lite工程进行编译 | 是 | | [n] | 设定编译时所用的线程数,否则默认设定为6线程 | 否 | +#### `mindspore/lite/CMakeLists.txt`的选项 + +| 选项 | 参数说明 | 取值范围 | 默认值 | +| -------- | ----- | ---- | ---- | +| MSLITE_ENABLE_SSE | 是否启用SSE指令集 | on、off | off | +| MSLITE_ENABLE_AVX | 是否启用AVX指令集 | on、off | off | +| MSLITE_ENABLE_CONVERTER | 是否编译模型转换工具 | on、off | on | +| MSLITE_ENABLE_TOOLS | 是否编译配套工具 | on、off | on | +| MSLITE_ENABLE_TESTCASES | 是否编译测试用例 | on、off | off | + +> - 以上选项可通过设置同名环境变量或者`mindspore/lite/CMakeLists.txt`文件修改。 + ### 编译示例 首先,使用git工具,从MindSpore代码仓下载源码。 @@ -421,83 +270,125 @@ call build.bat lite call build.bat lite 8 ``` -### 端侧推理框架编译输出 +最后,会在`output/`目录中生成如下文件: -编译完成后,进入`mindspore/output/`目录,可查看编译后生成的文件。文件分为以下几种: - -- `mindspore-lite-{version}-inference-win-x64.zip`:包含模型推理框架runtime和配套工具。 +- `mindspore-lite-{version}-win-x64.zip`:包含模型推理框架runtime和配套工具。 > version:输出件版本号,与所编译的分支代码对应的版本一致。 -执行解压缩命令,获取编译后的输出件: - -```bat -unzip mindspore-lite-{version}-inference-win-x64.zip -``` - -#### Runtime及配套工具目录结构说明 - -Runtime及配套工具包括以下几部分: +### 目录结构 ```text -mindspore-lite-{version}-inference-win-x64 -├── inference -│   ├── include # 推理框架头文件 -│   │ └── registry # 自定义算子注册头文件 -│   └── lib -│   ├── libgcc_s_seh-1.dll # MinGW动态库 -│   ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 -│   ├── libmindspore-lite.dll # MindSpore Lite推理框架的动态库 -│   ├── libmindspore-lite.dll.a # MindSpore Lite推理框架的动态库的链接文件 -│   ├── libssp-0.dll # MinGW动态库 -│   ├── libstdc++-6.dll # MinGW动态库 -│   └── libwinpthread-1.dll # MinGW动态库 +mindspore-lite-{version}-win-x64 +├── runtime +│ ├── include +│ └── lib +│ ├── libgcc_s_seh-1.dll # MinGW动态库 +│ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 +│ ├── libmindspore-lite.dll # MindSpore Lite推理框架的动态库 +│ ├── libmindspore-lite.dll.a # MindSpore Lite推理框架的动态库的链接文件 +│ ├── libssp-0.dll # MinGW动态库 +│ ├── libstdc++-6.dll # MinGW动态库 +│ └── libwinpthread-1.dll # MinGW动态库 └── tools ├── benchmark # 基准测试工具 - │   └── benchmark.exe # 可执行程序 └── converter # 模型转换工具 - ├── include - │ └── registry # 自定义算子、模型解析、转换优化注册头文件 - ├── converter - │   └── converter_lite.exe # 可执行程序 - └── lib - ├── libgcc_s_seh-1.dll # MinGW动态库 - ├── libglog.dll # Glog的动态库 - ├── libmslite_converter_plugin.dll # 注册插件的动态库 - ├── libmslite_converter_plugin.dll.a # 注册插件的动态库的链接文件 - ├── libssp-0.dll # MinGW动态库 - ├── libstdc++-6.dll # MinGW动态库 - └── libwinpthread-1.dll # MinGW动态库 ``` > 暂不支持在Windows进行端侧训练。 -## Docker环境编译 +## macOS环境编译 -### 环境准备 +### 环境要求 -#### 下载镜像 +- 系统环境:macOS 10.15.4及以上;64位。 -```bash -docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 -``` +- 编译依赖 + - [CMake](https://cmake.org/download/) >= 3.18.3 + - [Xcode](https://developer.apple.com/xcode/download/cn) == 11.4.1 + - [Git](https://git-scm.com/downloads) >= 2.28.0 + +> - 编译脚本中会执行`git clone`获取第三方依赖库的代码。 + +### 编译选项 -> - 下载镜像前,请确保已经安装docker。 -> - docker镜像暂不支持Windows版本编译。 -> - 镜像里已安装好编译依赖的第三方库并且配置好环境变量。 +MindSpore根目录下的`build.sh`脚本可用于MindSpore Lite的编译。 -#### 创建容器 +#### `build.sh`的编译参数 + +| 参数 | 参数说明 | 取值范围 | 默认值 | +| -------- | ----- | ---- | ---- | +| -I | 选择目标架构 | arm64、arm32 | 无 | +| -j[n] | 设定编译时所用的线程数,否则默认设定为8线程 | Integer | 8 | + +### 编译示例 + +首先,在进行编译之前,需从MindSpore代码仓下载源码。 ```bash -docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 +git clone https://gitee.com/mindspore/mindspore.git ``` -#### 进入容器 +然后,在源码根目录下执行如下命令即可编译MindSpore Lite。 -```bash -docker exec -ti -u 0 docker01 bash +- 编译ARM64架构版本。 + + ```bash + bash build.sh -I arm64 -j8 + ``` + +- 编译ARM32架构版本。 + + ```bash + bash build.sh -I arm32 -j8 + ``` + +最后,会在`output/`目录中生成如下文件: + +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`:包含模型推理框架runtime。 + +> - version: 输出件版本号,与所编译的分支代码对应的版本一致。 +> - os: 输出件应部署的操作系统。 +> - arch: 输出件应部署的系统架构。 + +### 目录结构 + +```text +mindspore-lite.framework +└── runtime + ├── Headers # 推理框架头文件 + ├── Info.plist # 配置文件 + └── mindspore-lite # 静态库 ``` +> 暂不支持在macOS进行端侧训练与转换工具。 + +## Docker环境编译 + +### 环境准备 + +- 下载镜像 + + ```bash + docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + + > - 下载镜像前,请确保已经安装docker。 + > - docker镜像暂不支持Windows版本编译。 + > - 镜像里已安装好编译依赖的第三方库并且配置好环境变量。 + +- 创建容器 + + ```bash + docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + +- 进入容器 + + ```bash + docker exec -ti -u 0 docker01 bash + ``` + ### 编译选项 参考[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) @@ -506,6 +397,6 @@ docker exec -ti -u 0 docker01 bash 参考[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) -### 编译输出 +### 目录结构 参考[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) diff --git a/tutorials/lite/source_zh_cn/use/converter_tool.md b/tutorials/lite/source_zh_cn/use/converter_tool.md index 24fcd43b498d49f770d77d7cab143a1f1a0f1e38..37c8f21c3c4f719cd81edb1da5f991b429e22778 100644 --- a/tutorials/lite/source_zh_cn/use/converter_tool.md +++ b/tutorials/lite/source_zh_cn/use/converter_tool.md @@ -8,10 +8,12 @@ - [概述](#概述) - [Linux环境使用说明](#linux环境使用说明) - [环境准备](#环境准备) + - [目录结构](#目录结构) - [参数说明](#参数说明) - [使用示例](#使用示例) - [Windows环境使用说明](#windows环境使用说明) - [环境准备](#环境准备-1) + - [目录结构](#目录结构-1) - [参数说明](#参数说明-1) - [使用示例](#使用示例-1) @@ -42,6 +44,21 @@ MindSpore Lite提供离线转换模型功能的工具,支持多种类型的模 ${PACKAGE_ROOT_PATH}是编译或下载得到的包解压后的路径。 +### 目录结构 + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── converter + ├── include + │ └── registry # 自定义算子、模型解析、转换优化注册头文件 + ├── converter # 模型转换工具 + │ └── converter_lite # 可执行程序 + └── lib # 转换工具依赖的动态库 + ├── libglog.so.0 # Glog的动态库 + └── libmslite_converter_plugin.so # 注册插件的动态库 +``` + ### 参数说明 MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需要来选择使用。此外,用户可输入`./converter_lite --help`获取实时帮助。 @@ -66,6 +83,8 @@ MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需 > - 为保证权重量化的精度,建议`--bitNum`参数设定范围为8bit~16bit。 > - 全量化目前仅支持激活值8bit、权重8bit的量化方式。 +### 使用示例 + 下面选取了几个常用示例,说明转换命令的使用方法。 - 以Caffe模型LeNet为例,执行转换命令。 @@ -137,6 +156,26 @@ MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需 ${PACKAGE_ROOT_PATH}是编译或下载得到的包解压后的路径。 +### 目录结构 + +```text +mindspore-lite-{version}-win-x64 +└── tools + └── converter # 模型转换工具 + ├── include + │ └── registry # 自定义算子、模型解析、转换优化注册头文件 + ├── converter + │ └── converter_lite.exe # 可执行程序 + └── lib + ├── libgcc_s_seh-1.dll # MinGW动态库 + ├── libglog.dll # Glog的动态库 + ├── libmslite_converter_plugin.dll # 注册插件的动态库 + ├── libmslite_converter_plugin.dll.a # 注册插件的动态库的链接文件 + ├── libssp-0.dll # MinGW动态库 + ├── libstdc++-6.dll # MinGW动态库 + └── libwinpthread-1.dll # MinGW动态库 +``` + ### 参数说明 参考Linux环境模型转换工具的[参数说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html#id3)。 diff --git a/tutorials/lite/source_zh_cn/use/converter_train.md b/tutorials/lite/source_zh_cn/use/converter_train.md index b205078cf399a4be82c1085ee7b22cbf4c4c23e6..ca562611067154c6e2182cf2797b004cedcaac8d 100644 --- a/tutorials/lite/source_zh_cn/use/converter_train.md +++ b/tutorials/lite/source_zh_cn/use/converter_train.md @@ -48,6 +48,10 @@ MindSpore Lite 模型转换工具提供了多个参数,目前工具仅支持Li | `--modelFile=` | 是 | MINDIR模型文件名(包括路径) | - | - | | `--outputFile=` | 是 | 输出模型文件名(包括路径)自动生成`.ms`后缀 | - | - | | `--trainModel=true` | 是 | 是否是训练模式;如果要训练模型,必须为true | true, false | false | +| `--quantType=` | 否 | 设置模型的量化类型。 | WeightQuant:权重量化(训练只支持此类型) | - | +| `--bitNum=` | 否 | 设定训练后量化(权重量化)的比特数,目前支持1bit~16bit量化 | \[1,16] | 8 | +| `--quantWeightSize=` | 否 | 设定参与训练后量化(权重量化)的卷积核尺寸阈值,若卷积核尺寸大于该值,则对此权重进行量化 | \[0,+∞) | 0 | +| `--quantWeightChannel=` | 否 | 设定参与训练后量化(权重量化)的卷积通道数阈值,若卷积通道数大于该值,则对此权重进行量化 | \[0,+∞) | 16 | > 参数名称和数值之间使用等号连接且不能有空格。 diff --git a/tutorials/lite/source_zh_cn/use/cropper_tool.md b/tutorials/lite/source_zh_cn/use/cropper_tool.md index 40afbdabaff0d7c83b08f997f3a24b0e711513bc..652085c588afbfb1085cbcd165fa6dabee3198bf 100644 --- a/tutorials/lite/source_zh_cn/use/cropper_tool.md +++ b/tutorials/lite/source_zh_cn/use/cropper_tool.md @@ -58,7 +58,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件夹的方式传入`ms`模型,将模型文件所在的文件夹路径传递给`modelFolderPath`参数,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将读取`/model`文件夹中包含的所有`ms`模型,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 @@ -66,7 +66,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件的方式传入`ms`模型,将模型文件所在的路径传递给`modelFile`参数,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将根据`modelFile`传入的`ms`模型,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 @@ -74,7 +74,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件夹的方式传入`ms`模型,将模型文件所在的文件夹路径传递给`modelFolderPath`参数,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将读取`/model`文件夹中包含的所有`ms`模型,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 @@ -82,7 +82,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件的方式传入`ms`模型,将模型文件所在的路径传递给`modelFile`参数,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将根据`modelFile`传入的`ms`模型,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 \ No newline at end of file diff --git a/tutorials/lite/source_zh_cn/use/micro.md b/tutorials/lite/source_zh_cn/use/micro.md index 18f906e4a0bed29b7104f5ccfd0f9b38e1812a95..422df22afd1c1d0d74ee21d2fc43ffbfa627afec 100644 --- a/tutorials/lite/source_zh_cn/use/micro.md +++ b/tutorials/lite/source_zh_cn/use/micro.md @@ -7,6 +7,7 @@ - [在微控制器上执行推理](#在微控制器上执行推理) - [概述](#概述) - [获取codegen](#获取codegen) + - [目录结构](#目录结构) - [参数说明](#参数说明) - [使用步骤](#使用步骤) - [使用CodeGen在STM开发板上执行推理](#使用CodeGen在STM开发板上执行推理) @@ -37,6 +38,25 @@ > 目前模型生成工具仅支持在Linux x86_64架构下运行。 +## 目录结构 + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── codegen # 代码生成工具 + ├── codegen # 可执行程序 + ├── include # 推理框架头文件 + │ ├── nnacl # nnacl 算子头文件 + │ └── wrapper + ├── lib + │ └── libwrapper.a # MindSpore Lite CodeGen生成代码依赖的部分算子静态库 + └── third_party + ├── include + │ └── CMSIS # ARM CMSIS NN 算子头文件 + └── lib + └── libcmsis_nn.a # ARM CMSIS NN 算子静态库 +``` + ## 参数说明 详细参数说明如下: diff --git a/tutorials/lite/source_zh_cn/use/nnie.md b/tutorials/lite/source_zh_cn/use/nnie.md index d5ca59853d0f4a3229bec7ec3e0c99195f363282..de194d78646eefa5889450704eb0b1f88aa713c3 100644 --- a/tutorials/lite/source_zh_cn/use/nnie.md +++ b/tutorials/lite/source_zh_cn/use/nnie.md @@ -22,7 +22,7 @@ ### 模型转换工具converter目录结构说明 ```text -mindspore-lite-{version}-inference-linux-x64 +mindspore-lite-{version}-runtime-linux-x64 └── tools └── converter └── providers @@ -58,7 +58,7 @@ mindspore-lite-{version}-linux-aarch32 1. 进入**版本发布件根路径**。 ```text - cd mindspore-lite-{version}-inference-linux-x64 + cd mindspore-lite-{version}-linux-x64 ``` 若用户未进入**版本发件件根路径**,后续配置用户需按实际情况进行等价设置。 @@ -93,7 +93,7 @@ mindspore-lite-{version}-linux-aarch32 运行于x86_64系统上的benchmark是用来生成校正集的,以供nnie学习量化参数。用户需设置以下环境变量: ```shell - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./inference/lib + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./runtime/lib export BENCHMARK_PATH=./tools/benchmark ``` @@ -104,7 +104,7 @@ mindspore-lite-{version}-linux-aarch32 ``` 参数modelFile、weightFile、configFile、outputFile用户按实际情况进行设置。 - 当用户在mindspore-lite-{version}-inference-linux-x64/tools/converter/converter目录下时,环境变量NNIE_MAPPER_PATH、NNIE_DATA_PROCESS_PATH、BENCHMARK_PATH可不设置。 + 当用户在mindspore-lite-{version}-linux-x64/tools/converter/converter目录下时,环境变量NNIE_MAPPER_PATH、NNIE_DATA_PROCESS_PATH、BENCHMARK_PATH可不设置。 ### 推理工具runtime diff --git a/tutorials/lite/source_zh_cn/use/npu_info.md b/tutorials/lite/source_zh_cn/use/npu_info.md index 2c9816ecee730465bb0dd89a366cd4215467fd2b..096f7c919df977cb3595357774fb477ad457a8ea 100644 --- a/tutorials/lite/source_zh_cn/use/npu_info.md +++ b/tutorials/lite/source_zh_cn/use/npu_info.md @@ -47,7 +47,7 @@ bash build.sh -I arm64 -e npu 配置好环境变量,将会动态加载libhiai.so, libhiai_ir.so, libhiai_ir_build.so。例如: ```bash - export LD_LIBRARY_PATH=mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH ``` - Benchmark测试NPU推理 @@ -72,8 +72,8 @@ bash build.sh -I arm64 -e npu 有关Benchmark使用详情,见[Benchmark使用](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/benchmark_tool.html)。 有关环境变量设置,需要根据[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)中编译选项为`-I arm64`或`-I arm32`时的目录结构, -将libmindspore-lite.so(目录为`mindspore-lite-{version}-inference-android-{arch}/inference/lib`)和 -NPU库(目录为`mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/`)所在的目录加入`${LD_LIBRARY_PATH}`。 +将libmindspore-lite.so(目录为`mindspore-lite-{version}-android-{arch}/runtime/lib`)和 +NPU库(目录为`mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/`)所在的目录加入`${LD_LIBRARY_PATH}`。 ## 芯片支持 diff --git a/tutorials/lite/source_zh_cn/use/obfuscator_tool.md b/tutorials/lite/source_zh_cn/use/obfuscator_tool.md index bf5dd76e6bd95c8e4712a929a0e3868122301c9b..ff30e7e61cbd5916ea1576679690e3e5aeed1d27 100644 --- a/tutorials/lite/source_zh_cn/use/obfuscator_tool.md +++ b/tutorials/lite/source_zh_cn/use/obfuscator_tool.md @@ -8,6 +8,7 @@ - [概述](#概述) - [Linux环境使用说明](#linux环境使用说明) - [环境准备](#环境准备) + - [目录结构](#目录结构) - [参数说明](#参数说明) - [使用示例](#使用示例) @@ -27,6 +28,15 @@ MindSpore Lite提供一个轻量级的离线模型混淆工具,可用于保护 - 参考构建文档中的[环境要求](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id1)和[编译示例](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id3)编译x86_64版本。 +### 目录结构 + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── obfuscator # 模型混淆工具 + └── msobfuscator # 可执行程序 +``` + ### 参数说明 MindSpore Lite模型混淆工具提供了多种参数设置,用户可根据需要来选择使用。此外,用户可输入`./msobfuscator --help`获取实时帮助。 diff --git a/tutorials/lite/source_zh_cn/use/runtime_cpp.md b/tutorials/lite/source_zh_cn/use/runtime_cpp.md index a397b5d7611c01610aff84e39b04387c507d70e1..ae02c5040bb816eef1fb3331fc417c2cbdb868e2 100644 --- a/tutorials/lite/source_zh_cn/use/runtime_cpp.md +++ b/tutorials/lite/source_zh_cn/use/runtime_cpp.md @@ -1,6 +1,6 @@ # 使用C++接口执行推理 -`Windows` `Linux` `Android` `C++` `推理应用` `模型加载` `数据准备` `中级` `高级` +`Windows` `macOS` `Linux` `iOS` `Android` `C++` `推理应用` `模型加载` `数据准备` `中级` `高级` @@ -85,6 +85,8 @@ if (model == nullptr) { - [device_list_](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#device-list):MindSpore Lite支持异构推理,推理时的后端配置信息由[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)中的`device_list_`指定,默认存放CPU的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext)。在进行图编译时,会根据`device_list_`中不同的后端配置信息进行算子选型调度。目前仅支持两种异构,CPU和GPU异构或者CPU和NPU异构。当配置GPU的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext)时,优先使用GPU推理;当配置NPU的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext)时,优先使用NPU推理。 > `device_list_[0]`必须是CPU的`DeviceContext`, `device_list_[1]`是GPU的`DeviceContext`或者NPU的`DeviceContext`。暂时不支持同时设置CPU, GPU和NPU三个`DeviceContext`。 +> +> 对于iOS设备,暂时只支持`device_list_[0]`为CPU的`DeviceContext`。 ### 配置线程数 @@ -119,6 +121,8 @@ cpu_device_info.enable_float16_ = true; ``` > Float16需要CPU为ARM v8.2架构的机型才能生效,其他不支持的机型和x86平台会自动回退到Float32执行。 +> +> 对于iOS设备, 只支持CPU后端运行, 且暂时不支持CPU后端的Float16的执行。 ### 配置使用GPU后端 @@ -580,6 +584,8 @@ if (session == nullptr) { logcat -s "MS_LITE" ``` +> 对iOS设备暂不支持日志查看。 + ### 获取版本号 MindSpore Lite提供了[Version](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#version)方法可以获取版本号,包含在`include/version.h`头文件中,调用该方法可以得到当前MindSpore Lite的版本号。 diff --git a/tutorials/source_zh_cn/computer_vision.rst b/tutorials/source_zh_cn/computer_vision.rst new file mode 100644 index 0000000000000000000000000000000000000000..677f531fe694ee19757e45714d9eb147325661b7 --- /dev/null +++ b/tutorials/source_zh_cn/computer_vision.rst @@ -0,0 +1,8 @@ +机器视觉 +=================== + +.. toctree:: + :maxdepth: 1 + + middleclass/image_and_video/fine_tune + middleclass/image_and_video/adversative_nets_mark diff --git a/tutorials/source_zh_cn/index.rst b/tutorials/source_zh_cn/index.rst index 124a2f94379bb7aa5dea7f8c2eb934ebcb17591b..239627a06e9a9125391e7c49c8016eed749e6ee6 100644 --- a/tutorials/source_zh_cn/index.rst +++ b/tutorials/source_zh_cn/index.rst @@ -9,7 +9,7 @@ MindSpore教程 .. toctree:: :glob: :maxdepth: 1 - :caption: 快速入门 + :caption: 入门教程 introduction quick_start @@ -26,4 +26,5 @@ MindSpore教程 :maxdepth: 1 :caption: 进阶教程 - custom \ No newline at end of file + custom + computer_vision \ No newline at end of file diff --git a/tutorials/source_zh_cn/middleclass/custom/metric.md b/tutorials/source_zh_cn/middleclass/custom/metric.md index 33c8dbeed039930014d60ee5bc9413c5b20bee85..b57d8337a4bc5adaa24dbb93a3b28f47d3555f1e 100644 --- a/tutorials/source_zh_cn/middleclass/custom/metric.md +++ b/tutorials/source_zh_cn/middleclass/custom/metric.md @@ -12,7 +12,7 @@ - + ## 概述 diff --git a/tutorials/source_zh_cn/middleclass/custom/operator.md b/tutorials/source_zh_cn/middleclass/custom/operator.md index 49f9c4d7ef86bf8fe9f8ad233fc0524bd5bb5234..8313901b1f88a19839a5d5a0208c233ce32cef5a 100644 --- a/tutorials/source_zh_cn/middleclass/custom/operator.md +++ b/tutorials/source_zh_cn/middleclass/custom/operator.md @@ -19,7 +19,7 @@ - + ## 自定义算子开发 diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/adversative_nets_mark.ipynb b/tutorials/source_zh_cn/middleclass/image_and_video/adversative_nets_mark.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bf31078ac86346a1da2160b4c046e359e14bf8ee --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/image_and_video/adversative_nets_mark.ipynb @@ -0,0 +1,558 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# 对抗示例生成\n", + "\n", + "[![](https://gitee.com/mindspore/docs/raw/master/resource/_static/logo_source.png)](https://gitee.com/mindspore/docs/blob/master/tutorials/source_zh_cn/middleclass/image_and_video/adversative_nets_mark.ipynb)\n", + "\n", + "以往机器学习模型的研究方向多数在高效、准确等方面。模型的安全性和鲁棒性被经常性地忽略,特别是在面对攻击时,模型易被欺骗的现象表现得尤为明显。\n", + "\n", + "本教程将讨论机器学习模型的安全漏洞,并深入了解对抗性机器学习这一热门话题。我们会通过图像分类的示例,展示出在图像上添加无法察觉的扰动之后,模型表现也会完全不同。扰动方式将使用梯度攻击来完成。 \n", + "\n", + "## 威胁模型\n", + "\n", + "就上下文而言,有多种类型的对抗性攻击,每种攻击者的目标和假设都不同。但是,总的来说,总体目标是向输入数据添加最少的扰动,以引起所需的错误分类。攻击者的知识有几种假设,其中两种是:**白盒**和**黑盒**。*白盒*攻击假定攻击者具有完全的知识并可以访问模型,包括架构,输入,输出和权重。*黑盒*攻击假定攻击者只能访问模型的输入和输出,并且对底层架构或权重一无所知。目标也有几种类型,包括**错误分类**和**源/目标错误分类**。*错误分类*意味着对手只希望输出分类错误,而不在乎新分类是什么。*源/目标错误分类*意味着对手想要更改最初属于特定源类别的图像,以便将其分类为特定目标类别。\n", + "\n", + "在这种情况下,本实验采用白盒方式攻击,其实验目标是*错误分类*。有了这些背景信息,我们现在可以详细讨论攻击了。\n", + "\n", + "## 梯度符号攻击\n", + "\n", + "迄今为止,最早的也是最流行的对抗性攻击之一被称为*梯度符号攻击(FGSM)*,由[《解释和利用对抗性示例》](https://arxiv.org/abs/1412.6572) (Goodfellow 等)描述。攻击非常强大,而且直观。它旨在利用神经网络学习*梯度*的方式来攻击神经网络。这个想法很简单,不是通过基于反向传播的梯度来调整权重来使损失最小化,攻击会基于相同的反向传播的梯度来调整输入数据,以使损失最大化。换句话说,攻击使用损失相对于输入数据的梯度,然后调整输入数据以使损失最大化。\n", + "\n", + "在执行代码之前,让我们看一下著名的 [FGSM](https://arxiv.org/abs/1412.6572) Pandas 示例,并介绍一些参数。\n", + "\n", + "![fgsm_panda_image](images/panda.png)\n", + "\n", + " - $x$:正确分类为“Pandas”的原始输入图像\n", + " - $y$:是`x`的输出\n", + " - $\\theta$:模型参数\n", + " - $J(\\theta, x, y)$:训练网络的损失\n", + " \n", + " 攻击会将梯度反向传播回输入数据,以计算`ᐁ[x] J(θ, x, y)`。然后,它会沿方向(即`ᐁ[x] J(θ)`)的一小步(图片中的`ε`-0.007)调整输入数据,使损失最大化。最后,当目标图像仍明显是“Pandas”时,目标网络将它们误分类为“长臂猿”。\n", + "\n", + "现在开始准备实现梯度攻击:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# 导入模型训练需要的库\n", + "import os\n", + "import mindspore\n", + "import numpy as np\n", + "from mindspore import Tensor\n", + "import mindspore.nn as nn\n", + "import mindspore.ops as ops\n", + "import matplotlib.pyplot as plt\n", + "from mindspore.common.initializer import Normal\n", + "import argparse\n", + "from mindspore import context\n", + "from mindspore.nn import Accuracy\n", + "from mindspore.train.callback import LossMonitor\n", + "import mindspore.dataset as ds\n", + "import mindspore.dataset.transforms.c_transforms as C\n", + "import mindspore.dataset.vision.c_transforms as CV\n", + "from mindspore.dataset.vision import Inter\n", + "from mindspore import dtype as mstype\n", + "from mindspore import Model\n", + "from mindspore.nn import TrainOneStepCell\n", + "from mindspore.ops import composite as com\n", + "from mindspore import load_checkpoint, load_param_into_net\n", + "from mindspore.train.callback import ModelCheckpoint, CheckpointConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 实现\n", + "\n", + "在本节中,我们将讨论本教程的输入参数,定义受到攻击的模型,然后编写攻击代码并运行一些测试。\n", + "\n", + "### 输入\n", + "\n", + "本教程的输入参数主要为`epsilons`:\n", + "\n", + "* `epsilons`- 指用于运行的`ε`值列表。`ε`为0时表示原始测试集上的模型表现。`ε`值越大,扰动越明显,对模型准确率的攻击也越有效。实验中设定取值范围为0~1.3。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "epsilons = [0, 0.2, 0.5, 0.7, 1.0, 1.3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "下面配置运行所需要的信息" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "parser = argparse.ArgumentParser(description='MindSpore LeNet Example')\n", + "parser.add_argument('--device_target', type=str, default=\"CPU\", choices=['Ascend', 'GPU', 'CPU'])\n", + "args = parser.parse_known_args()[0]\n", + "context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 受到攻击的模型\n", + "\n", + "实验中使用LeNet作为演示模型,采用MINIST数据集。这里先对模型进行训练,后对符合精度要求的模型采去攻击操作。\n", + "\n", + "定义Lenet网络:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class LeNet5(nn.Cell):\n", + "\n", + " def __init__(self, num_class=10, num_channel=1):\n", + " super(LeNet5, self).__init__()\n", + " self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')\n", + " self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')\n", + " self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))\n", + " self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))\n", + " self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))\n", + " self.relu = nn.ReLU()\n", + " self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)\n", + " self.flatten = nn.Flatten()\n", + "\n", + " def construct(self, x):\n", + " x = self.conv1(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.conv2(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.flatten(x)\n", + " x = self.fc1(x)\n", + " x = self.relu(x)\n", + " x = self.fc2(x)\n", + " x = self.relu(x)\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "\n", + "net = LeNet5()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "下载MINIST数据集:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "./datasets/MNIST_Data\n", + "├── test\n", + "│   ├── t10k-images-idx3-ubyte\n", + "│   └── t10k-labels-idx1-ubyte\n", + "└── train\n", + " ├── train-images-idx3-ubyte\n", + " └── train-labels-idx1-ubyte\n", + "\n", + "2 directories, 4 files\n" + ] + } + ], + "source": [ + "!mkdir -p ./datasets/MNIST_Data/train ./datasets/MNIST_Data/test\n", + "!wget -NP ./datasets/MNIST_Data/train https://mindspore-website.obs.myhuaweicloud.com/notebook/datasets/mnist/train-labels-idx1-ubyte\n", + "!wget -NP ./datasets/MNIST_Data/train https://mindspore-website.obs.myhuaweicloud.com/notebook/datasets/mnist/train-images-idx3-ubyte\n", + "!wget -NP ./datasets/MNIST_Data/test https://mindspore-website.obs.myhuaweicloud.com/notebook/datasets/mnist/t10k-labels-idx1-ubyte\n", + "!wget -NP ./datasets/MNIST_Data/test https://mindspore-website.obs.myhuaweicloud.com/notebook/datasets/mnist/t10k-images-idx3-ubyte\n", + "!tree ./datasets/MNIST_Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "进行数据处理:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def create_dataset(data_path, batch_size=1, repeat_size=1,\n", + " num_parallel_workers=1):\n", + " mnist_ds = ds.MnistDataset(data_path)\n", + " resize_height, resize_width = 32, 32\n", + " rescale = 1.0 / 255.0\n", + " shift = 0.0\n", + " rescale_nml = 1 / 0.3081\n", + " shift_nml = -1 * 0.1307 / 0.3081\n", + "\n", + " resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)\n", + " rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)\n", + " rescale_op = CV.Rescale(rescale, shift)\n", + " hwc2chw_op = CV.HWC2CHW()\n", + " type_cast_op = C.TypeCast(mstype.int32)\n", + "\n", + " mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns=\"label\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=resize_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=rescale_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + "\n", + " buffer_size = 10000\n", + " mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)\n", + " mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n", + "\n", + " return mnist_ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "定义优化器与损失函数:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')\n", + "net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "设置并应用模型保存参数:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10)\n", + "ckpoint = ModelCheckpoint(prefix=\"checkpoint_lenet\", config=config_ck)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "定义测试和训练网络:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def test_net(model, data_path):\n", + " ds_eval = create_dataset(os.path.join(data_path, \"test\"))\n", + " acc = model.eval(ds_eval, dataset_sink_mode=False)\n", + " print(\"{}\".format(acc))\n", + " \n", + "def train_net(model, epoch_size, data_path, repeat_size, ckpoint_cb, sink_mode):\n", + " ds_train = create_dataset(os.path.join(data_path, \"train\"), 32, repeat_size)\n", + " model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor(125)], dataset_sink_mode=sink_mode)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "进行预模型训练并加载到网络中:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "epoch: 1 step: 125, loss is 2.2934856\n", + "epoch: 1 step: 250, loss is 2.2901366\n", + "epoch: 1 step: 375, loss is 2.2859986\n", + "epoch: 1 step: 500, loss is 2.2872832\n", + "epoch: 1 step: 625, loss is 2.299801\n", + "epoch: 1 step: 750, loss is 2.2765894\n", + "epoch: 1 step: 875, loss is 0.9849755\n", + "epoch: 1 step: 1000, loss is 0.29427448\n", + "epoch: 1 step: 1125, loss is 0.8494657\n", + "epoch: 1 step: 1250, loss is 0.30428752\n", + "epoch: 1 step: 1375, loss is 0.20079668\n", + "epoch: 1 step: 1500, loss is 0.14930087\n", + "epoch: 1 step: 1625, loss is 0.0818441\n", + "epoch: 1 step: 1750, loss is 0.1273436\n", + "epoch: 1 step: 1875, loss is 0.096194\n", + "{'Accuracy': 0.9345}\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "train_epoch = 1\n", + "mnist_path = \"./datasets/MNIST_Data/\"\n", + "dataset_size = 1\n", + "model = Model(net, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + "train_net(model, train_epoch, mnist_path, dataset_size, ckpoint, False)\n", + "test_net(model, mnist_path)\n", + "param_dict = load_checkpoint(\"checkpoint_lenet-1_1875.ckpt\")\n", + "load_param_into_net(net, param_dict)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 梯度攻击\n", + "\n", + "现在,我们干扰原始输入并定义攻击函数`attack`。`attack`函数接受三个输入,`image`是原始的干净图像(`x`),`epsilon`是像素级扰动量`ε`,`data_grad`是输入图像损失的梯度(`ᐁ[x] J(θ, x, y)`)。该函数然后创建扰动图像为\n", + "\n", + " $$\n", + " perturbed\\\\_image = image + epsilon \\times sign(data\\\\_grad) = x + \\epsilon \\times sign(\\nabla_x J(\\theta ,x ,y))\n", + " $$\n", + "\n", + "最后,为了维持数据的原始范围,将被扰动的图像裁剪到范围`[0,1]`。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "定义攻击方法" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def attack(image, epsilon, data_grad):\n", + " sign = ops.Sign()\n", + " sign_data_grad = sign(data_grad)\n", + " perturbed_image = image + epsilon * sign_data_grad\n", + " perturbed_image = mindspore.ops.clip_by_value(perturbed_image, 0, 1)\n", + " return perturbed_image\n", + "net_with_criterion = nn.WithLossCell(net, net_loss)\n", + "fin_train_net = TrainOneStepCell(net_with_criterion, net_opt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 测试函数\n", + "\n", + "本教程的主要结果来自`test`函数。每次调用此测试函数都会在 MNIST 测试集上执行完整的测试步骤,并报告最终精度。`test`函数中还需要`epsilon`输入来控制攻击的强弱变化。\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def test(net, ds_test, epsilon):\n", + " net_with_criterion = nn.WithLossCell(net, net_loss)\n", + " # 使用Model接口包装模型\n", + " pre_model = Model(net, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})\n", + " # 计算过程变量初始化\n", + " correct = 0\n", + " incorrect = 0\n", + " num = 0\n", + " for data in ds_test:\n", + " image = data[\"image\"].asnumpy()\n", + " label = data[\"label\"].asnumpy()\n", + " image_tensor = Tensor(data['image'])\n", + " label_tensor = Tensor(data['label'])\n", + " pre_output = pre_model.predict(image_tensor)\n", + " predicted = np.argmax(pre_output.asnumpy(), axis=1)\n", + " if predicted[0] != label[0]:\n", + " num += 1\n", + " continue \n", + " num += 1\n", + " pre_loss = net_with_criterion(image_tensor, Tensor(data['label']))\n", + " grad = com.GradOperation()\n", + " grads = grad(net_with_criterion)(image_tensor, Tensor(data['label']))\n", + " perturbed_data = attack(Tensor(data['image']), epsilon, grads)\n", + " final = pre_model.predict(perturbed_data)\n", + " fin_predicted = np.argmax(final.asnumpy(), axis=1)\n", + " if fin_predicted[0] == label[0]:\n", + " correct += 1\n", + " if fin_predicted[0] != label[0]:\n", + " incorrect += 1\n", + " final_acc = correct / (correct + incorrect)\n", + " print (\"Accuracy = \", final_acc)\n", + " return final_acc " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 运行攻击\n", + "\n", + "实验的最后一部分是实际运行攻击。在这里,我们为`epsilon`输入中的每个`ε`值运行完整的测试步骤。`ε = 0`表示没有受到攻击时的原始测试准确率,并且随着`ε`的增大,精度在逐渐下降。\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy = 0.9602996254681648\n", + "eps = 0 Accuracy = 0.9602996254681648\n", + "Accuracy = 0.9250936329588015\n", + "eps = 0.2 Accuracy = 0.9250936329588015\n", + "Accuracy = 0.8088817549491707\n", + "eps = 0.5 Accuracy = 0.8088817549491707\n", + "Accuracy = 0.5854467629748529\n", + "eps = 0.7 Accuracy = 0.5854467629748529\n", + "Accuracy = 0.23542001070090957\n", + "eps = 1.0 Accuracy = 0.23542001070090957\n", + "Accuracy = 0.09898341359015517\n", + "eps = 1.3 Accuracy = 0.09898341359015517\n" + ] + } + ], + "source": [ + "accuracies = []\n", + "# 定义测试数据集,batch_size=1表示每次取出一张图片\n", + "ds_test = create_dataset(os.path.join(mnist_path, \"test\"), batch_size=1).create_dict_iterator()\n", + "for eps in epsilons:\n", + " final_acc = test(net, ds_test, eps)\n", + " accuracies.append(final_acc)\n", + " print (\"eps = \", eps, \"Accuracy = \", final_acc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 结果分析\n", + "\n", + "### 准确率与`ε`\n", + "\n", + "第一个结果是精度与`ε`曲线的关系。如前所述,随着`ε`的增加,我们预计测试精度会降低。这是因为更大的ε意味着我们朝着将损失最大化的方向迈出了更大的一步。请注意,即使`ε`值是线性间隔的,曲线中的趋势也不是线性的。可以看出,Eps越大,Accuracy的下降幅度就越大。在Eps为0时,测试集拥有98%左右的精确度,但当Es增大为1以上时,模型几乎无法识别图像。\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n \n \n \n \n 2021-06-16T17:36:59.904608\n image/svg+xml\n \n \n Matplotlib v3.4.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUoAAAFNCAYAAABmLCa9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtVElEQVR4nO3dd5hU9d3+8fdnG0uvS++9i7oBK4iiggUsUcEW86jExBKjSSSxIybGPGpiifUXk8eGXbEiNkSlLUpbKS5tKSJLX8BdWPbz+2MGHXF3ZymzZ2b2fl3XXO6Zc2bmltWbc+ac7/eYuyMiIuVLCTqAiEi8U1GKiEShohQRiUJFKSIShYpSRCQKFaWISBQqShGRKFSUUilm9rGZbTKzGkFniWdmttzMvjOzbRGPB4POJQdGRSlRmVl74FjAgeFV/NlpVfl5B8np7l4n4nFV0IHkwKgopTIuBqYB/wF+EbnCzNqY2StmVmBmGyL3nszscjNbYGaFZvaVmR0Wft7NrHPEdv8xs3Hhn48zs1VmdoOZrQWeNLOGZvZm+DM2hX9uHfH6Rmb2pJmtCa9/Lfz8fDM7PWK7dDNbb2aH7v0vGM55WsRyWvjzDjOzTDN7Ovzvt9nMZppZs339QzSzS8zsMzN70My2mNlCMzthr/VLw39ey8zsgn39DIkNFaVUxsXAM+HHyXtKwsxSgTeBFUB7oBUwPrzuHOC28GvrEdoT3VDJz2sONALaAaMJ/Xf6ZHi5LfAdEHk4+xRQC+gFNAXuCz//f8CFEdudAnzj7l+W8ZnPAaMilk8G1rv7F4T+cqgPtAEaA1eEM+yPAcASoAlwK/BKuOhrA/cDw9y9LnAUMHs/P0MONnfXQ49yH8AxwC6gSXh5IfC78M9HAgVAWhmvmwj8tpz3dKBzxPJ/gHHhn48DdgKZFWTqB2wK/9wCKAUalrFdS6AQqBdefgn4Yznv2Tm8ba3w8jPALeGf/wf4HOhbiT+v5cA2YHPE4/LwukuANYBFbD8DuAioHd72bKBm0L93PX780B6lRPML4D13Xx9efpYfDr/bACvcvaSM17UhtOe0PwrcvWjPgpnVMrNHzWyFmW0FPgEahPdo2wAb3X3T3m/i7muAz4CzzawBMIxQAf6Eu+cBC4DTzawWoT3gZ8OrnyJU/OPDh/d3m1l6BfnPcPcGEY/HI9at9nBDhq0AWrr7duA8Qnur35jZW2bWvYLPkCqkopRymVlN4FxgkJmtDX9n+DvgEDM7BFgJtC3nhMtKoFM5b72D0KHyHs33Wr/3lFbXA92AAe5eDxi4J2L4cxqFi7As/yV0+H0OMNXdV5ezHfxw+D0C+Cpcnrj7Lne/3d17EjokPo3QVwr7o5WZWcRyW0J7mbj7RHc/kdBe8kLg8TJeLwFQUUpFzgB2Az0JHe72A3oAUwgVxQzgG+AuM6sdPulxdPi1TwC/N7PDLaSzmbULr5sNnG9mqWY2FBgUJUddQt8JbjazRoS+2wPA3b8B3gH+FT7pk25mAyNe+xpwGPBbQt9ZVmQ8cBLwa37Ym8TMBptZn/Ae7FZCX0WURnmv8jQFrgnnPIfQn+fbZtbMzEaEv6ssJnT4vr+fIQeZilIq8gvgSXfPd/e1ex6ETqRcQGiP7nRC3+/lA6sIHT7i7i8CdxIqnEJChdUo/L6/Db9uc/h9XouS4x9ATWA9obPv7+61/iJC5bUQWAdcu2eFu38HvAx0AF6p6EPCpTuV0F7j8xGrmhP6fnMrocPzyYQOx8vzxl7XUb4asW460CX873In8HN330Do/8XrCO1dbiT0l8evK8orVcd+/HWJSPIxs1uAru5+YdSNY5vjEuAydz8myByy7xLxYl6RSgsfql9KaK9TZL/o0FuSlpldTuhkzzvu/knQeSRx6dBbRCQK7VGKiEShohQRiSLhTuY0adLE27dvH3QMEUkys2bNWu/uWWWtS7iibN++PTk5OUHHEJEkY2YrylunQ28RkShUlCIiUagoRUSiUFGKiEShohQRiUJFKSIShYpSRCSKpC/KdVuLOPfRqawrLIq+sYhIGZK+KP/y9gJmLt/I/e9/HXQUEUlQCTcyp7K63fQOxSU/zKT/9PR8np6eT3qqsfCOYaSmWAWvFhH5QdLuUU7542BO69uC9NRQIe6pxV27nX63v8cvn5zBI5OX8EX+Jnbt1q1JRKR8SbtH2bReJvVrplNS6tRIS2Hn7lLO7NeS47o1ZfqyjUxfuoGPFhUAUDM9lcPbNWRAh0b079CIQ9o0IDM9NeB/AxGJF0lblADrtxVzwYB2nN+/Lc/OyKegsIgR/Voxol8rAAoKi5m5PFSa05dt5J5JiwHISEvh0DYNGNChEQM6NubQtg2olZHUf1QiUoGEm+E8OzvbYzV70OYdO5mxbCMzlm1k+rKN5K7ZQqlDWorRt3V9+ndozICOjchu15C6mekxySAiwTCzWe6eXeY6FWX5Cot2kbNiU6g4l25g7qotlJQ6KQa9Wtb//lC9f4dGNKiVUSWZRCQ2VJQHyY6dJXyZv/n7Q/UvV25mZ/jMevfmdb8/VP9Z+0Zk1a0RSEYR2T8qyhgp2rWbOSs3f3+oPmvFJr7btRuATlm16d+hMUd0DO1xtqhf8yevX7e1iKue+5IHzz+UpnUzqzq+iERQUVaRXbtLmbd6C9OXbmTGsg3kLN9EYXEJAG0b1aJ/h0YM6NCIIzo2pnXDmtz82nyemZHPBf3bMu7MPgGnF6neVJQB2V3qLPhmK9PCh+ozl29k845d5W5fIy2FReOGVWFCEdmjoqLUNS8xlJpi9G5Vn96t6nPZsR0pLXUWryvkgwXf8sy0fNZsCY0/TzEY2CWLu8/pG3BiESlL0o7MiUcpKUb35vW4cnAXBndvilno0qNSh48XFzDuzQXkb9gRdEwR2YuKMiB7LoafcNUxnJfdmo5NavHeV2s54d6PuW1CLuu3FQcdUUTCYvodpZkNBf4JpAJPuPtde61vB/wbyAI2Ahe6+6qK3jORvqPcV99uLeKfH3zN8zNXkpmWwuUDO3LZsR2pU0PfkIjEWiAnc8wsFVgMnAisAmYCo9z9q4htXgTedPf/mtnxwC/d/aKK3jeZi3KPJQXb+N+Ji3hn/lqa1Mng6uO7MKp/WzLSdAAgEisVFWUs/8/rD+S5+1J33wmMB0bstU1P4MPwzx+Vsb5a6pRVh4cvPJxXf3MUnbLqcOuEXIbcO5kJc9ZQWppYVymIJINYFmUrYGXE8qrwc5HmAGeFfz4TqGtmjWOYKaEc2rYh40cfwZO//Bm1MlK55rkvOf3BT5nydUHQ0USqlaCP5X4PDDKzL4FBwGpg994bmdloM8sxs5yCgupVEmbG4G5NefuaY7nvvEPYvGMXF/2/GVz4xHTmrdoSdDyRaiGW31EeCdzm7ieHl/8E4O5/LWf7OsBCd29d0ftWh+8oK1Jcspunp+Xz4Idfs2nHLk7r24Lfn9SN9k1qBx1NJKEF9R3lTKCLmXUwswxgJDBhr2BNzGxPhj8ROgMuFaiRlsqlx3Rg8h8Hc/XxnflgwTqG3DuZm1+bT0GhLikSiYWYFaW7lwBXAROBBcAL7p5rZmPNbHh4s+OARWa2GGgG3BmrPMmmXmY615/Ujcl/OI6R/dvw7Ix8Bv39I+6dtJjCovKHSYrIvtNY7ySxtGAb97y3mLfmfUOj2hlcfXxnzh/QlhppuqWFSGUEdegtVahjVh0euuAwXr/yaLo1q8vtb3zFkHsn8/rs1bqkSOQAqSiTzCFtGvDs5QP47//0p26NdH47fjanPfApkxcXkGhHDyLxQkWZhMyMQV2zePPqY/jnyH4UFu/iF/+ewQVPTGfOys1BxxNJOCrKJJaSYozo14oPrjuO207vycK1hYx46DOufOYLlq3fHnQ8kYShkznVSGHRLh6fsownpiyluKSUkT9rw29P6ELTeroNhYhmOJcfKSgs5oEPv+bZ6fmkp6Zw2bEdGD2wo27BK9WailLKtHz9du6ZtJg35qyhYa10rjq+CxceoUuKpHrS5UFSpvZNavPAqEN546pj6NWyPne8+RXH/+9kXvliFbt1SZHI91SUQp/W9Xn6sgE8dWl/GtZO57oX5nDq/VP4aNE6XVIkgopSIhzbJYsJVx7D/aMOZcfO3fzyyZmMfGwaX+ZvCjqaSKBUlPIjKSnG8ENa8v51gxg7ohdLCrZx5r8+59dPz2JJwbag44kEQidzpELbikt4YspSHv9kKUUlpZyb3YZrh3ShmS4pkiSjs95ywNZvK+bBD/N4ZvoKUlOMS4/pwK8GdaKeLimSJKGilIMmf8MO7pm0iNdnr6FBrXSuGtyZC49oR2a6LimSxKbLg+Sgadu4Fv8ceShvXn0MfVrVZ9xbCzjhnsm8PEuXFEnyUlHKfundqj5PXTqAZy4bQKPaGVz/4hxO+ecUPlz4rS4pkqSjopQDcnTnJrx+5dE8eP6hFJfs5n/+k8N5j03jC11SJElERSkHLCXFOK1vSyZdN4g7zujN0oLtnPWvz/nVUznkrdvGuq1FnPvoVNYVFgUdVWS/xLQozWyomS0yszwzG1PG+rZm9pGZfWlmc83slFjmkdhKT03hoiPaMfkPx3H9iV35LG8DJ903mfOfmMbM5Ru5//2vg44osl9iebvaVGAxcCKwitBdGUe5+1cR2zwGfOnuD5tZT+Btd29f0fvqrHfi6HrTO+wsKf3J8zXSUlg0blgAiUTKF9RZ7/5AnrsvdfedwHhgxF7bOFAv/HN9YE0M80gV+/SPgxneryU10kL/maUYjOjXkik3DA44mci+SYvhe7cCVkYsrwIG7LXNbcB7ZnY1UBsYEsM8UsWa1sukbo00du4uJS3FKCl11m0tomldjeqRxBL0yZxRwH/cvTVwCvCUmf0kk5mNNrMcM8spKCio8pCy/9ZvK+aCAe14/cqjyaqTwczlmygoLA46lsg+iWVRrgbaRCy3Dj8X6VLgBQB3nwpkAk32fiN3f8zds909OysrK0ZxJRYevSibcWf0pler+jw3+khSzLhtQm7QsUT2SSyLcibQxcw6mFkGMBKYsNc2+cAJAGbWg1BRapcxSXVuWodrTujMW/O+4b3ctUHHEam0mBWlu5cAVwETgQXAC+6ea2ZjzWx4eLPrgcvNbA7wHHCJa1hHUvvVoE50b16Xm1+fz9aiXUHHEakUTYohVW7uqs2c8dBnnPeztvz1rD5BxxEBNCmGxJm+rRtw6TEdeG5GPtOWbgg6jkhUKkoJxHUndqNto1qMeXkuRbt2Bx1HpEIqSglEzYxU/npWH5Zv2ME/NLRR4pyKUgJzdOcmnJvdmsenLGX+6i1BxxEpl4pSAnXjKT1pVDuDG16eS8nun44LF4kHKkoJVP1a6Ywd3ovcNVt5fMqyoOOIlElFKYEb1qcFJ/dqxj/eX8yy9duDjiPyEypKiQtjR/QmIy2FMS/PpVT33pE4o6KUuNCsXiY3ntKD6cs2Mn7myugvEKlCKkqJG+f9rA1HdmzMX99ewNotum2ExA8VpcQNM+OvZ/Vh5+5Sbn59vu7mKHFDRSlxpX2T2lx3YlcmffUtb8/TDEMSH1SUEncuPaYDfVrV59YJ89m0fWfQcURUlBJ/0lJTuOvsPmzasYtxby0IOo6IilLiU6+W9fnVwI68/MUqPlmsuZwlWCpKiVvXnNCFjk1q8+dX57G9uCToOFKNqSglbmWmp3LX2X1Ztek77nlvcdBxpBpTUUpc69+hERce0ZYnP1/Gl/mbgo4j1VRMi9LMhprZIjPLM7MxZay/z8xmhx+LzWxzLPNIYrphaHea18tkzMvz2FmiGYak6sWsKM0sFXgIGAb0BEaZWc/Ibdz9d+7ez937AQ8Ar8QqjySuupnpjDujN4u+LeThj5cEHUeqoVjuUfYH8tx9qbvvBMYDIyrYfhShOzGK/MQJPZpx+iEtefCjr/n628Kg40g1E8uibAVEzm6wKvzcT5hZO6AD8GEM80iCu/X0ntSukcYNL89lt2YYkioULydzRgIvuXuZd5kys9FmlmNmOQUFuqauumpSpwa3nNaTL/I389TU5UHHkWoklkW5GmgTsdw6/FxZRlLBYbe7P+bu2e6enZWVdRAjSqI589BWDOyaxd0TF7Fq046g40g1EcuinAl0MbMOZpZBqAwn7L2RmXUHGgJTY5hFkoSZ8ZczewNw46uaYUiqRsyK0t1LgKuAicAC4AV3zzWzsWY2PGLTkcB413/xUkmtG9biDyd3Y/LiAl6bXd5BisjBY4nWT9nZ2Z6TkxN0DAnY7lLn5498zvL123n/ukE0rlMj6EiS4Mxslrtnl7UuXk7miOyT1BTjb2f3ZVtxCbe/8VXQcSTJqSglYXVtVpcrB3dmwpw1fLjw26DjSBJTUUpC+81xnenarA43vjqfwqJdQceRJKWilISWkZbCXWf3Ze3WIu5+d1HQcSRJqSgl4R3WtiGXHNWep6atYObyjUHHkSSkopSk8PuTutGqQU1ueHkuRbvKHOAlst9UlJIUatdI469n9WFpwXYe/DAv6DiSZFSUkjQGds3irMNa8cjkJSz4ZmvQcSSJqCglqdx8ak/q10znhpfnUrJbk/zKwaGilKTSsHYGtw3vxdxVW3jys+VBx5EkoaKUpHNa3xYM6dGUeyYtYsWG7UHHkSSgopSkY2bccUZv0lJS+NMr8zTDkBwwFaUkpRb1azJmWHc+X7KBF3JWRn+BSAVUlJK0zu/flv4dGjHurQWs21oUdBxJYCpKSVopKcZdZ/WhuKSUW17PDTqOJDAVpSS1jll1uHZIF97NXcu7878JOo4kKBWlJL3Lj+1Izxb1uPn1XLbs0AxDsu9UlJL00lNTuPvnfdm4fSd/eXtB0HEkAcW0KM1sqJktMrM8MxtTzjbnmtlXZpZrZs/GMo9UX71b1eeyYzvwfM5KPs9bH3QcSTAxK0ozSwUeAoYBPYFRZtZzr226AH8Cjnb3XsC1scoj8rshXWnfuBZjXpnHdzs1w5BUXiz3KPsDee6+1N13AuOBEXttcznwkLtvAnD3dTHMI9VcZnoqfz2rL/kbd3Df+4uDjiMJJJZF2QqIvNJ3Vfi5SF2Brmb2mZlNM7OhMcwjwpGdGjOqfxuemLKUuas2Bx1HEkTQJ3PSgC7AccAo4HEza7D3RmY22sxyzCynoKCgahNK0hkzrAdN6tTgjy/NZZdmGJJKiGVRrgbaRCy3Dj8XaRUwwd13ufsyYDGh4vwRd3/M3bPdPTsrKytmgaV6qF8znTvO6M3CtYXcO2kx5z46lXWFGrkj5YtlUc4EuphZBzPLAEYCE/ba5jVCe5OYWRNCh+JLY5hJBICTezXnlD7NeXTyEmYu28j9738ddCSJY2mxemN3LzGzq4CJQCrwb3fPNbOxQI67TwivO8nMvgJ2A39w9w2xyiSyR7eb3qG45IfD7qen5/P09HxqpKWwaNywAJNJPLJEm4IqOzvbc3Jygo4hCW7d1iLGvb2Ad+Z9w67dTnqqcUqfFtx4ag+a1s0MOp4EwMxmuXt2WeuCPpkjEoim9TKpWyONklLHjO/LUiUpZYlalGZ2upmpUCXprN9WzAUD2vHwBYdhwOd5+tZHylaZAjwP+NrM7jaz7rEOJFJVHr0om3Fn9GZo7xb8+rhOrNlSxNQlKkv5qahF6e4XAocCS4D/mNnU8HWNdWOeTqSKXH18F1o3rMlNr81jZ4murZQfq9QhtbtvBV4iNAyxBXAm8IWZXR3DbCJVpmZGKneM6M2Sgu08PkVXqMmPVeY7yuFm9irwMZAO9Hf3YcAhwPWxjSdSdQZ3b8qw3s25/4Ovyd+wI+g4Ekcqs0d5NnCfu/dx97/vmbjC3XcAl8Y0nUgVu+X0nqSlGDe/Pl93b5TvVaYobwNm7Fkws5pm1h7A3T+ITSyRYLSoX5PrTurG5MUFvDN/bdBxJE5UpihfBCK/3d4dfk4kKf3iyHb0bFGP29/IpbBIt46QyhVlWng+SQDCP2fELpJIsNJSU/jLWX1YV1jMvZM0b6VUrigLzGz4ngUzGwFoLn1Jav3aNODCAe347+fLmb96S9BxJGCVKcorgD+bWb6ZrQRuAH4V21giwfv9yd1oVLsGN746j92lOrFTnVXmgvMl7n4Eofve9HD3o9w9L/bRRIJVv2Y6N5/WgzmrtvDs9BVBx5EAVWqaNTM7FegFZJoZAO4+Noa5ROLC8ENa8mLOKu5+dxEn926uSTOqqcpccP4IofHeVwMGnAO0i3EukbhgZtxxRm+Kd5cy7k3dE7y6qsx3lEe5+8XAJne/HTiS0EzkItVChya1+c1xnZgwZw1TvtY9m6qjyhTlnpuJ7DCzlsAuQuO9RaqNKwZ1okOT2tz82nyKdume4NVNZYryjfCdEf8OfAEsB56NYSaRuJOZHpo0Y/mGHTz88ZKg40gVq7AowxP2fuDum939ZULfTXZ391uqJJ1IHDmmSxNG9GvJwx8vYWnBtqDjSBWqsCjdvRR4KGK52N0rffWtmQ01s0VmlmdmY8pYf4mZFZjZ7PDjsn1KL1LFbjy1BzXSUzRpRjVTmUPvD8zsbNtzXVAlmVkqoZIdRugazFFm1rOMTZ93937hxxP78hkiVa1p3Uz+OLQ7n+VtYMKcNUHHkSpSmaL8FaFJMIrNbKuZFZrZ1kq8rj+Q5+5Lw+PDxwMjDiCrSFw4v39bDmnTgDve/IotOzRpRnVQmZE5dd09xd0z3L1eeLleJd67FbAyYnlV+Lm9nW1mc83sJTNrU9YbhW89kWNmOQUFujxDgpWaYtx5Rm82bt/J3RMXBh1HqkBlLjgfWNbjIH3+G0B7d+8LTAL+W9ZG7v6Yu2e7e3ZWVtZB+miR/de7VX0uOaoDz87I54v8TUHHkRirzKH3HyIeNxMqt9sq8brVQOQeYuvwc99z9w3uXhxefAI4vBLvKxIXrjupK83qZnLjq/Mp2a0bkiWzyhx6nx7xOBHoDVTmr9CZQBcz62BmGcBIYELkBmYWeeH6cEBjxCRh1KmRxq2n92TBN1v5z+fLg44jMVSpuzDuZRXQI9pG7l4CXAVMJFSAL7h7rpmNjZjf8hozyzWzOcA1wCX7kUckMEN7N2dwtyzunbSYNZu/CzqOxIhFuxbMzB4A9myUAvQDlofv913lsrOzPScnJ4iPFinTyo07GHLvZAZ3a8ojF+nbo0RlZrPcPbusdZXZo8wBZoUfU4EbgipJkXjUplEtrjmhC+/mruWDBd8GHUdioDLzUb4EFLn7bghdSG5mtcK3qxUR4PJjO/Lql6u55fVcjurUhJoZqUFHkoOoUiNzgJoRyzWB92MTRyQxZaSlcOcZvVm9+Tvu//DroOPIQVaZosx09+9nAAj/XCt2kUQS04COjfn54a15/JOlLP62MOg4chBVpii3m9lhexbM7HBAp/dEyvCnYd2pk5nGja/Oo1Q3JEsalSnKa4EXzWyKmX0KPE/osh8R2UvjOjX407DuzFy+iZe+WBV0HDlIop7McfeZZtYd6BZ+apG7ayYAkXKcc3gbXsxZxV/fXsCQHs1oVDsj6EhygCoz1vtKoLa7z3f3+UAdM/tN7KOJJKaUFGPcmb0pLCrhrnc02CwZVObQ+3J337xnwd03AZfHLJFIEujevB6XHtuBF3JWMWPZxqDjyAGqTFGmRk7aG56QV8cSIlH89oQutGpQk5tem8fOEk2akcgqU5TvAs+b2QlmdgLwHPBObGOJJL5aGWncPrwXi7/dxv/7dFnQceQAVKYobwA+BK4IP+bx4wvQRaQcQ3o246SezfjnB4tZuVGD2RJVZaZZKwWmE7pNbX/geDQdmkil3Tq8Fylm3DohVzckS1DlFqWZdTWzW81sIfAAkA/g7oPd/cGqCiiS6Fo1qMnvhnTlw4XrmJirSTMSUUV7lAsJ7T2e5u7HuPsDwO6qiSWSXC45uj3dm9fl9jdy2VZcEnQc2UcVFeVZwDfAR2b2ePhEzj7dslZEQtJTU7jzzD58s6WIf0xaHHQc2UflFqW7v+buI4HuwEeEhjI2NbOHzeykKsonkjQOb9eQUf3b8uTny8ldsyXoOLIPKnMyZ7u7P+vupxO6QdiXhM6Ei8g+umFoNxrUTOfGV+dr0owEsk/3zHH3TeFbx55Qme3NbKiZLTKzPDMbU8F2Z5uZm1mZ07CLJIsGtTK46bQezF65medm5gcdRyppf24uVinhETwPAcOAnsAoM+tZxnZ1gd8SugRJJOmd0a8VR3ZszN/eWUhBYXH0F0jgYlaUhK65zHP3pe6+ExgPjChjuzuAvwFFMcwiEjfMQpNmFO0q5S9v65LkRBDLomwFrIxYXhV+7nvhCYHbuPtbFb2RmY02sxwzyykoKDj4SUWqWKesOlwxKHSfnc/z1gcdR6KIZVFWyMxSgHuB66NtG/5eNNvds7OysmIfTqQK/GZwZ9o1rsVNr82nuESXKMezWBblaqBNxHLr8HN71AV6Ax+b2XLgCGCCTuhIdZGZnsrYEb1Zun47j3y8NOg4UoFYFuVMoIuZdTCzDGAkMGHPSnff4u5N3L29u7cHpgHD3T0nhplE4sqgrlmc1rcFD32cx7L124OOI+WIWVG6ewmhe+tMJDSJxgvunmtmY81seKw+VyTR3HxaT2qkpnDL6/M1aUaciul3lO7+trt3dfdO7n5n+Llb3H1CGdsep71JqY6a1cvk9yd3Y8rX63lj7jdBx5EyBHYyR0R+cOER7ejTqj53vPkVW77TvfvijYpSJA6kphh/ObMPG7YVc897i4KOI3tRUYrEiT6t63Pxke15atoK5qzcHHQciaCiFIkj153Ulaw6Nfjzq/Mo2a0bksULFaVIHKmXmc4tp/ckd81Wnpq2Iug4EqaiFIkzp/ZpwcCuWdzz3mLWbtEUCPFARSkSZ8yMO0b0YufuUu5486ug4wgqSpG41K5xba4e3Jm35n3DR4vWBR2n2lNRisSp0YM60jGrNre8Pp+iXZo0I0gqSpE4VSMtlXFn9Gblxu948MO8oONUaypKkTh2VKcmnHVoKx79ZAl56wqDjlNtqShF4tyfT+1BzfRUbnxVk2YERUUpEuea1KnBmGE9mL5sI698sTr6C+SgU1GKJICRP2vDoW0bcOfbC9i8Y2fQcaodFaVIAkhJMe48ow9bvtvF395dGHScakdFKZIgerasx/8c3Z7nZqxk1oqNQcepVlSUIgnk2iFdaVE/kxtfnc8uTZpRZVSUIgmkdo00bhvei4VrC3nys2VBx6k2YlqUZjbUzBaZWZ6ZjSlj/RVmNs/MZpvZp2bWM5Z5RJLBST2bMaRHU+6b9DWrN38XdJxqIWZFaWapwEPAMKAnMKqMInzW3fu4ez/gbkL3+RaRCpgZtw3vBcBtE3IDTlM9xHKPsj+Q5+5L3X0nMB4YEbmBu2+NWKwN6GpakUpo3bAWvx3ShUlffct7uWuDjpP0YlmUrYCVEcurws/9iJldaWZLCO1RXhPDPCJJ5dJjOtC1WR1um5DL9uKSoOMktcBP5rj7Q+7eCbgBuKmsbcxstJnlmFlOQUFB1QYUiVPpqSnceWYf1mwp4v4Pvg46TlKLZVGuBtpELLcOP1ee8cAZZa1w98fcPdvds7Oysg5eQpEE97P2jTgvuw1PfLqMhWu3Rn+B7JdYFuVMoIuZdTCzDGAkMCFyAzPrErF4KqC/FkX20Zhh3amXmcafX5lHaam+5o+FmBWlu5cAVwETgQXAC+6ea2ZjzWx4eLOrzCzXzGYD1wG/iFUekWTVsHYGfz6lB1/kb+b5nJXRXyD7zBJt2qbs7GzPyckJOoZIXHF3zntsGovWFvLB9YNoUqdG0JESjpnNcvfsstYFfjJHRA6cmXHnGb3ZsbOEv7y9IOg4SUdFKZIkujSry+iBHXnli9V8vmR90HGSiopSJIlcNbgLbRrV5KbX5lNcohuSHSwqSpEkUjMjlbHDe7O0YDuPf7I06DhJQ0UpkmQGd2/KKX2a88CHeazYsD3oOElBRSmShG45rRdpKcYtr+fqhmQHgYpSJAk1r5/J9Sd1Y/LiAt6ep0kzDpSKUiRJXXxkO3q1rMftb+RSWLQr6DgJTUUpkqTSwpNmFGwr5p73FgcdJ6GpKEWSWL82DbhwQDv+b+pyJi8q4NxHp7KusCjoWAlHRSmS5H5/cjca1a7B716YzczlG7n/fc09s6/Sgg4gIrHV/873KS754Y6NT0/P5+np+dRIS2HRuGEBJksc2qMUSXJT/jiY4Ye0JNVCywYM6prFlBsGB5orkagoRZJc03qZ1M1MoxRITzUcmLy4gH99tIRtuoVEpagoRaqB9duKuWBAO16/8hjOy25N24Y1+e/U5Zx472QmffVt0PHinuajFKmmZq3YxJ9fmceibwsZ1rs5tw3vRbN6mUHHCozmoxSRnzi8XUPevOYY/nByNz5YuI4h90zmqWkrdDuJMqgoRaqx9NQUrhzcmYnXDqRP6/rc/Np8znl0Kou/LQw6WlyJaVGa2VAzW2RmeWY2poz115nZV2Y218w+MLN2scwjImXr0KQ2z1w2gHvOOYSlBds49f4p3PPeIop2aU5LiGFRmlkq8BAwDOgJjDKznntt9iWQ7e59gZeAu2OVR0QqZmacfXhrPrj+OE4/pCUPfJjHsH9O0WzpxHaPsj+Q5+5L3X0noft2j4jcwN0/cvcd4cVphO79LSIBalQ7g3vP7cfTlw6g1J3zH5/O71+cw6btO4OOFphYFmUrIPLemavCz5XnUuCdGOYRkX1wTJcmTLx2IL85rhOvfbmaE+6dzKtfrqqW81vGxckcM7sQyAb+Xs760WaWY2Y5BQUFVRtOpBrLTE/lj0O788bVx9C2US1+9/wcLv73DPI37Ij+4iQSy6JcDbSJWG4dfu5HzGwIcCMw3N2Ly3ojd3/M3bPdPTsrKysmYUWkfD1a1OPlXx/F7cN78WX+Zk76x2QembyEXbtLo784CcSyKGcCXcysg5llACOBCZEbmNmhwKOESnJdDLOIyAFKTTF+cVR7Jl03kIFdsrjrnYWc/sCnzF65OehoMRezonT3EuAqYCKwAHjB3XPNbKyZDQ9v9negDvCimc02swnlvJ2IxIkW9Wvy2MXZPHLh4WzasZMz//UZt03ITepx4xrCKCL7rbBoF3+fuIinpq2geb1Mxo7ozYk9mwUda79oCKOIxETdzHTGjujNy78+inqZ6Vz+fzlc8dQsvt2aXLOoqyhF5IAd1vaHceMfLUq+ceMqShE5KCLHjfdt88O48UVrE3/cuIpSRA6q9k1q8/SlPx43/r8TE3vcuIpSRA66yHHjw/u15MGPEnvcuIpSRGImWcaNqyhFJOYSfdy4ilJEqsSeceNvXvPjceMrNmwPOlpUKkoRqVLdm4fGjY8dER43ft8nPPxxfI8bV1GKSJVLTTEuPrI97183iOO6ZfG3d+N73LiKUkQC07x+Jo9elM2jFx3O5h274nbcuIpSRAJ3cq/mTLpuIBcf0S4u7zeuohSRuFA3M53byxg3vnZL8OPGVZQiElf2jBv/49DQuPET753MU1OXBzpuXEUpInEnPTWF3xzXmfd+N5BD2jTg5tdz+fkjnwc2blxFKSJxq13j2jx1aX/uPfcQlq3fzqn3T+HvExdW+bhxFaWIxDUz46zDQuPGR/RrxUMfLWHoPz7h87yqGzeuohSRhNCodgb3nHsIz1w2AIDzn5jO9S/MYWMVjBuPaVGa2VAzW2RmeWY2poz1A83sCzMrMbOfxzKLiCSHozs34d1rB3Ll4E68Pns1Q6pg3HjMitLMUoGHgGFAT2CUmfXca7N84BLg2VjlEJHkk5meyh9ODo0bb9f4x+PG120t4txHp7Ku8OBdVhTLPcr+QJ67L3X3ncB4YETkBu6+3N3nAvE7yFNE4lb35vV46YqjuCNi3Pjop2Yxc/lG7n//64P2ObEsylbAyojlVeHnREQOmtQU46Ij27OzpJTiklJmr9yMOzw9PZ/2Y96i203vHPBnJMTJHDMbbWY5ZpZTUFAQdBwRiUOf3jCY4f1akpFqAGSmpTCiX0um3DD4gN87lkW5GmgTsdw6/Nw+c/fH3D3b3bOzsrIOSjgRSS5N62VSt0Yau0qdGmkpFO8upW6NNJrWzTzg9047CPnKMxPoYmYdCBXkSOD8GH6eiFRz67cVc8GAdpzfvy3Pzsin4CCd0LFYnlI3s1OAfwCpwL/d/U4zGwvkuPsEM/sZ8CrQECgC1rp7r4reMzs723NycmKWWUSqJzOb5e7ZZa2L5R4l7v428PZez90S8fNMQofkIiJxKyFO5oiIBElFKSIShYpSRCQKFaWISBQqShGRKFSUIiJRqChFRKJQUYqIRKGiFBGJQkUpIhKFilJEJAoVpYhIFCpKEZEoVJQiIlGoKEVEolBRiohEoaIUEYlCRSkiEkVMi9LMhprZIjPLM7MxZayvYWbPh9dPN7P2scwjIrI/YlaUZpYKPAQMA3oCo8ys516bXQpscvfOwH3A32KVR0Rkf8Vyj7I/kOfuS919JzAeGLHXNiOA/4Z/fgk4wcwshplERPZZLIuyFbAyYnlV+Lkyt3H3EmAL0DiGmURE9llMb1d7sJjZaGB0eHGbmS3ax7doAqw/uKlkH+l3EB/0eyhfu/JWxLIoVwNtIpZbh58ra5tVZpYG1Ac27P1G7v4Y8Nj+BjGznPJubC5VQ7+D+KDfw/6J5aH3TKCLmXUwswxgJDBhr20mAL8I//xz4EN39xhmEhHZZzHbo3T3EjO7CpgIpAL/dvdcMxsL5Lj7BOD/AU+ZWR6wkVCZiojEFasOO3BmNjp8+C4B0e8gPuj3sH+qRVGKiBwIDWEUEYkiqYpSQyaDV4nfwSVmVmBms8OPy4LImczM7N9mts7M5pez3szs/vDvaK6ZHVbVGRNN0hSlhkwGr5K/A4Dn3b1f+PFElYasHv4DDK1g/TCgS/gxGni4CjIltKQpSjRkMh5U5ncgMebunxC6iqQ8I4D/85BpQAMza1E16RJTMhWlhkwGrzK/A4Czw4d8L5lZmzLWS2xV9vckYclUlJIY3gDau3tfYBI/7OGLxK1kKsp9GTJJRUMmZb9F/R24+wZ3Lw4vPgEcXkXZ5AeV+X9FIiRTUWrIZPCi/g72+i5sOLCgCvNJyATg4vDZ7yOALe7+TdCh4llCzB5UGRoyGbxK/g6uMbPhQAmh38ElgQVOUmb2HHAc0MTMVgG3AukA7v4I8DZwCpAH7AB+GUzSxKGROSIiUSTTobeISEyoKEVEolBRiohEoaIUEYlCRSkiEkXSXB4k1ZOZ7QbmRTw13t3vCiqPJCddHiQJzcy2uXudoHNIctOhtyQlM1tuZneb2Twzm2FmncPPn2Nm881sjpl9EnROSQwqSkl0NSMmAZ5tZudFrNvi7n2AB4F/hJ+7BTjZ3Q8hNIRSJCodektCK+/Q28yWA8e7+1IzSwfWuntjM3sE6AS8ALzi7poURaLSHqUkM9/7Z3e/AriJ0Ow5s8xM85FKVCpKSWbnRfxzKoCZdXL36e5+C1DAj6cbEymTLg+SRFfTzGZHLL/r7ntuatbQzOYCxcCo8HN/N7MugAEfAHOqLKkkLH1HKUkp/B1ltruvDzqLJD4deouIRKE9ShGRKLRHKSIShYpSRCQKFaWISBQqShGRKFSUIiJRqChFRKL4/5AN7Ph45RJMAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "plt.figure(figsize=(5,5))\n", + "plt.plot(epsilons, accuracies, \"*-\")\n", + "plt.yticks(np.arange(0, 1.0, step=0.1))\n", + "plt.xticks(np.arange(0, 1.5, step=0.5))\n", + "plt.title(\"Accuracy vs Eps\")\n", + "plt.xlabel(\"Eps\")\n", + "plt.ylabel(\"Accuracy\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "攻击前后数据集图片效果如下图,左侧为原始样本,右侧为梯度攻击后生成的对抗样本。从视觉角度而言,右侧图片与左侧图片几乎没有明显变化,但是均成功误导了模型,使模型将其误分类为其他非正确类别。\n", + "\n", + "![attack_result](images/attack_result.png)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MindSpore1.2", + "language": "python", + "name": "mindspore1.2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5-final" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/fine_tune.md b/tutorials/source_zh_cn/middleclass/image_and_video/fine_tune.md new file mode 100644 index 0000000000000000000000000000000000000000..6cfb5caa2667eac391591d0735382c9621f02c51 --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/image_and_video/fine_tune.md @@ -0,0 +1,340 @@ +# 对象检测微调 + +在本教程中,我们将使用COCO数据集当中book分类下的部分图片,对MaskRCNN模型进行微调。数据集包含180张图片,我们将用它来说明如何在MindSpore中训练实例细分模型。本教程通过终端运行,点击下载教程[代码与数据集](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/source-codes/MaskRCNNFineTune.zip)。 + +## 定义数据集 + +案例中采用自定义数据集的方式加载COCO数据集的子集,完整数据处理代码可参考`src/dataset.py`。数据集由180张包含书籍的图片组成,图像与标注均来自于COCO。在数据处理过程中,`COCOSubDataset`继承`mindspore.dataset`类,并实现`__len__`和`__getitem__`。 + +在`__getitem__`中,应该返回: + +- img:图像 +- annos:图像的bbox、分类和iscrowd信息 +- mask:二进制的图像分割蒙版 +- mask_shape:图像分割蒙版shape + +在`__getitem__`中,应该返回: + +- img:图像 +- annos:图像的bbox、分类和iscrowd信息 +- mask:二进制的图像分割蒙版 +- mask_shape:图像分割蒙版shape + +> 运行本案例需要在MindSpore1.2及以上版本的基础上安装以下依赖项: +> +> Cython +> +> pycocotools +> +> mmcv==0.2.14 + +可通过运行代码文件夹中的`requirements.txt`完成。 + +下面是一个图像分割的示例: + +![bookmask](./images/bookmask.png) + +### 编写自定义数据集 + +自定义的COCO数据集处理代码如下: + +```python +class COCOSubDataset(): + def __init__(self, coco_root, is_training=True, config=None): + + data_type = config.train_data_type + + # 调用COCO数据集的接口读取数据 + anno_json = os.path.join(coco_root, config.instance_set.format(data_type)) + coco = COCO(anno_json) + + self.image_files = [] + self.image_anno_dict = {} + self.masks = {} + self.masks_shape = {} + + # 读取分类的类别信息 + train_cls = config.coco_classes + train_cls_dict = {} + for i, cls in enumerate(train_cls): + train_cls_dict[cls] = i + + classs_dict = {} + cat_ids = coco.loadCats(coco.getCatIds()) + for cat in cat_ids: + classs_dict[cat["id"]] = cat["name"] + + # 读取数据集中的图像、mask、mask_shape + image_ids = coco.getImgIds() + images_num = len(image_ids) + for ind, img_id in enumerate(image_ids): + image_info = coco.loadImgs(img_id) + file_name = image_info[0]["file_name"] + anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = coco.loadAnns(anno_ids) + image_path = os.path.join(coco_root, data_type, file_name) + + annos = [] + instance_masks = [] + image_height = coco.imgs[img_id]["height"] + image_width = coco.imgs[img_id]["width"] + + # 进一步处理,获取图像的mask、box、iscrowd和分类信息 + for label in anno: + bbox = label["bbox"] + class_name = classs_dict[label["category_id"]] + if class_name in train_cls: + # 获取二进制的coco mask + m = annToMask(label, image_height, image_width) + if m.max() < 1: + print("all black mask!!!!") + continue + # 根据对象是单个还是一组处理mask数据 + if label['iscrowd'] and (m.shape[0] != image_height or m.shape[1] != image_width): + m = np.ones([image_height, image_width], dtype=np.bool) + instance_masks.append(m) + + # 获取coco box + x1, x2 = bbox[0], bbox[0] + bbox[2] + y1, y2 = bbox[1], bbox[1] + bbox[3] + annos.append([x1, y1, x2, y2] + [train_cls_dict[class_name]] + [int(label["iscrowd"])]) + else: + print("not in classes: ", class_name) + + # 获取图像、标注信息、mask与mask shape + self.image_files.append(image_path) + self.image_anno_dict[image_path] = np.array(annos) + instance_masks = np.stack(instance_masks, axis=0) + self.masks[image_path] = np.array(instance_masks) + self.masks_shape[image_path] = np.array(instance_masks.shape, dtype=np.int32) + + + def __getitem__(self, index): + """ + 返回单个PIL图像,和与图像关联的分割信息 + """ + image_name = self.image_files[index] + img = np.fromfile(image_name, dtype=np.uint8) + annos = np.array(self.image_anno_dict[image_name], dtype=np.int32) + mask = self.masks[image_name] + mask_shape = self.masks_shape[image_name] + + return img, annos, mask, mask_shape + + def __len__(self): + return len(self.image_files) +``` + +在数据集加载过程中,通过`GeneratorDataset`接口调用自定义的数据集,并完成`map`操作处理数据。 + +```python +def create_new_dataset(image_dir, batch_size=config.batch_size, is_training=True, num_parallel_workers=8): + """创建用于训练的数据集""" + print("=============================================") + cv2.setNumThreads(0) + de.config.set_prefetch_size(8) + # 读取自定义数据集 + subcoco_dataset = COCOSubDataset(coco_root=image_dir, is_training=is_training, config=config) + + dataset_column_names = ["image", "annotation", "mask", "mask_shape"] + # 完成自定义数据集加载 + ds = de.GeneratorDataset(subcoco_dataset, column_names=dataset_column_names, + num_shards=None, shard_id=None, + num_parallel_workers=4, shuffle=is_training, num_samples=10) + # 解码图像 + decode = C.Decode() + ds = ds.map(operations=decode, input_columns=["image"]) + + # 通过map完成数据的过程处理,进一步获取图像的label、box、valid_num信息 + compose_map_func = (lambda image, annotation, mask, mask_shape: + preprocess_fn(image, annotation, mask, mask_shape, is_training)) + + ds = ds.map(operations=compose_map_func, + input_columns=["image", "annotation", "mask", "mask_shape"], + output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], + column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], + python_multiprocessing=False, + num_parallel_workers=num_parallel_workers) + ds = ds.batch(batch_size, drop_remainder=True, pad_info={"mask": ([config.max_instance_count, None, None], 0)}) + + + return ds +``` + +## 定义模型 + +本教程中我们将基于ResNet50使用Mask R-CNN模型。模型的定义位于`src/maskrcnn/mask_rcnn_r50.py`文件中。MaskRCNN是一个两级目标检测网络,作为FasterRCNN的扩展模型,在现有的边框识别分支的基础上增加了一个预测目标掩码的分支。该网络采用区域候选网络(RPN),可与检测网络共享整个图像的卷积特征,无需任何代价就可轻松计算候选区域。整个网络通过共享卷积特征,将RPN和掩码分支合并为一个网络。 + +可参考[MaskRCNN论文](http://cn.arxiv.org/pdf/1703.06870v3)获取更多模型信息。 + +![maskrcnn](./images/maskrcnn.PNG) + +### 将预训练模型用于微调 + +下载已经预训练好的[ResNet50模型](https://download.mindspore.cn/model_zoo/r1.2/resnet50_ascend_v120_imagenet2012_official_cv_bs256_acc76/resnet50_ascend_v120_imagenet2012_official_cv_bs256_acc76.ckpt),针对书籍分类进行微调。 + +首先需要将下载好的模型放置在`MaskRCnnFineTune/`文件夹路径下,并重命名为`resnet50.ckpt`,然后运行`convert_checkpoint.py`。在该脚本中提取了ResNet的主干,作为backbone用于后面的训练: + +```python + +def load_weights(model_path, use_fp16_weight): + + ms_ckpt = load_checkpoint(model_path) + weights = {} + for msname in ms_ckpt: + # 提取主干网络 + if msname.startswith("layer") or msname.startswith("conv1") or msname.startswith("bn"): + param_name = "backbone." + msname + else: + param_name = msname + # 使用新层替换原网络 + if "down_sample_layer.0" in param_name: + param_name = param_name.replace("down_sample_layer.0", "conv_down_sample") + if "down_sample_layer.1" in param_name: + param_name = param_name.replace("down_sample_layer.1", "bn_down_sample") + weights[param_name] = ms_ckpt[msname].data.asnumpy() + # 设置数据类型 + if use_fp16_weight: + dtype = mstype.float16 + else: + dtype = mstype.float32 + parameter_dict = {} + # 提取parameter + for name in weights: + parameter_dict[name] = Parameter(Tensor(weights[name], dtype), name=name) + param_list = [] + for key, value in parameter_dict.items(): + param_list.append({"name": key, "data": value}) + return param_list + +if __name__ == "__main__": + parameter_list = load_weights(args_opt.ckpt_file, use_fp16_weight=False) + save_checkpoint(parameter_list, "resnet50_backbone.ckpt") +``` + +运行成功后在原目录下会得到用于重训的`resnet50_backbone.ckpt`。 + +## 执行训练 + +现在,我们执行`MaskRCnnFineTune/train.py`文件,利用之前获取的backbone和数据集,完成训练过程。 + +```python +# 配置运行需要的信息 +parser = argparse.ArgumentParser(description="MaskRcnn training") + +parser.add_argument("--do_train", type=ast.literal_eval, default=True, help="Do train or not, default is true.") +parser.add_argument("--pre_trained", type=str, default="mask_rcnn-12_7393.ckpt", + help="Pretrain file path.") +parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=0) + +if __name__ == '__main__': + print("Start train for maskrcnn!") + rank = 0 + device_num = 1 + print("Start create dataset!") + # 调用接口进行数据处理 + dataset = create_new_dataset(image_dir=config.coco_root, batch_size=config.batch_size, is_training=True, num_parallel_workers=8) + dataset_size = dataset.get_dataset_size() + print("total images num: ", dataset_size) + print("Create dataset done!") + + # 实例化网络 + net = Mask_Rcnn_Resnet50(config=config) + net = net.set_train() + + # 加载预训练模型 + load_path = args_opt.pre_trained + if load_path != "": + param_dict = load_checkpoint(load_path) + if config.pretrain_epoch_size == 0: + for item in list(param_dict.keys()): + if not (item.startswith('backbone') or item.startswith('rcnn_mask')): + param_dict.pop(item) + load_param_into_net(net, param_dict) + + # 设定损失函数、学习率、优化器 + loss = LossNet() + lr = 0.0001 + opt = Momentum(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum, + weight_decay=config.weight_decay, loss_scale=config.loss_scale) + # 包装损失函数 + net_with_loss = WithLossCell(net, loss) + # 通过TrainOneStepCell自定义训练过程 + net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale) + # 监控训练过程 + time_cb = TimeMonitor(data_size=dataset_size) + loss_cb = LossCallBack(rank_id=rank) + cb = [time_cb, loss_cb] + + # 保存训练后的模型 + if config.save_checkpoint: + # 设置模型保存参数 + ckptconfig = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * dataset_size, + keep_checkpoint_max=config.keep_checkpoint_max) + save_checkpoint_path = os.path.join(config.save_checkpoint_path, 'ckpt_' + str(rank) + '/') + # 应用模型保存参数 + ckpoint_cb = ModelCheckpoint(prefix='mask_rcnn', directory=save_checkpoint_path, config=ckptconfig) + cb += [ckpoint_cb] + + # 进行训练 + model = Model(net) + model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode = False) +``` + +在7个epoch的训练过程中,可以获得如下输出: + +```text +total loss is: 1.3203125 +total loss is: 0.88623046875 +total loss is: 0.794921875 +total loss is: 0.7216796875 +total loss is: 0.67138671875 +epoch time: 133629.138 ms, per step time: 26725.828 ms +total loss is: 0.65625 +total loss is: 0.646484375 +total loss is: 0.5712890625 +total loss is: 0.56982421875 +total loss is: 0.5732421875 +epoch time: 4547.359 ms, per step time: 909.472 ms +total loss is: 0.5595703125 +total loss is: 0.5166015625 +total loss is: 0.8193359375 +total loss is: 0.389892578125 +total loss is: 0.44970703125 +epoch time: 4639.649 ms, per step time: 927.930 ms +total loss is: 0.360107421875 +total loss is: 0.25830078125 +total loss is: 0.30224609375 +total loss is: 0.2236328125 +total loss is: 0.1971435546875 +epoch time: 4851.436 ms, per step time: 970.287 ms +total loss is: 0.2021484375 +total loss is: 0.36376953125 +total loss is: 0.1787109375 +total loss is: 0.56884765625 +total loss is: 0.1864013671875 +epoch time: 4904.663 ms, per step time: 980.933 ms +total loss is: 0.184326171875 +total loss is: 0.1395263671875 +total loss is: 0.301025390625 +total loss is: 0.1458740234375 +total loss is: 0.36376953125 +epoch time: 4838.346 ms, per step time: 967.669 ms +total loss is: 0.1260986328125 +total loss is: 0.08843994140625 +total loss is: 0.10125732421875 +total loss is: 0.09942626953125 +total loss is: 0.162109375 +epoch time: 5143.703 ms, per step time: 1028.741 ms +``` + +经过训练的模型可以分割图片中的书本: + +原图: +![bookmask](./images/maskori.png) + +分割: + +![bookmask](./images/maskres.png) diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/attack_result.png b/tutorials/source_zh_cn/middleclass/image_and_video/images/attack_result.png new file mode 100644 index 0000000000000000000000000000000000000000..3169f95a7821e1a936a23d7c0cb54e28c8d0b858 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/attack_result.png differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/bookmask.png b/tutorials/source_zh_cn/middleclass/image_and_video/images/bookmask.png new file mode 100644 index 0000000000000000000000000000000000000000..93aca87c89b44ab2dba6de8dcb266fa05d8a5237 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/bookmask.png differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/dcgan.png b/tutorials/source_zh_cn/middleclass/image_and_video/images/dcgan.png new file mode 100644 index 0000000000000000000000000000000000000000..5bf9e630be39fb74b9d0eff8624cbcc33eb21cef Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/dcgan.png differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/maskori.png b/tutorials/source_zh_cn/middleclass/image_and_video/images/maskori.png new file mode 100644 index 0000000000000000000000000000000000000000..f7dc9eecf39f539a95e8b387ef22d2078d126049 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/maskori.png differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/maskrcnn.PNG b/tutorials/source_zh_cn/middleclass/image_and_video/images/maskrcnn.PNG new file mode 100644 index 0000000000000000000000000000000000000000..112e673af1f1d1845ac1dd0d31200e33979f2a1a Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/maskrcnn.PNG differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/maskres.png b/tutorials/source_zh_cn/middleclass/image_and_video/images/maskres.png new file mode 100644 index 0000000000000000000000000000000000000000..eb140b8a42dcb165f5b2682a16dc21349897aedb Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/maskres.png differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/panda.png b/tutorials/source_zh_cn/middleclass/image_and_video/images/panda.png new file mode 100644 index 0000000000000000000000000000000000000000..507d7f4cefd9ec6c3ced91ca452be1ca051f9a02 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/panda.png differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/tex21-1.gif b/tutorials/source_zh_cn/middleclass/image_and_video/images/tex21-1.gif new file mode 100644 index 0000000000000000000000000000000000000000..7d658baa7af9a4aa578e700191bde4e8c5279542 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/tex21-1.gif differ diff --git a/tutorials/source_zh_cn/middleclass/image_and_video/images/tex22-1.gif b/tutorials/source_zh_cn/middleclass/image_and_video/images/tex22-1.gif new file mode 100644 index 0000000000000000000000000000000000000000..ad6c9f5b70c41ab23ad0d7c52a449b37820db471 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/image_and_video/images/tex22-1.gif differ diff --git a/tutorials/source_zh_cn/middleclass/text/images/pair.png b/tutorials/source_zh_cn/middleclass/text/images/pair.png new file mode 100644 index 0000000000000000000000000000000000000000..5b7b315ae47d5cf69dd1bcb5c2365ec0aaea7a33 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/text/images/pair.png differ diff --git a/tutorials/training/source_en/advanced_use/custom_operator_gpu.md b/tutorials/training/source_en/advanced_use/custom_operator_gpu.md index a76aadaae3d9273403a1d5cb350e13e702f09f66..cbd7ebe4996042d33bc7fc803c2a6755cd4f998a 100644 --- a/tutorials/training/source_en/advanced_use/custom_operator_gpu.md +++ b/tutorials/training/source_en/advanced_use/custom_operator_gpu.md @@ -246,17 +246,78 @@ context.set_context(device_target='GPU') @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard -def test_TensroAdd(): +def test_TensorAdd(): x1 = Tensor(np.ones((3, 4), np.float32)) x2 = Tensor(np.ones((3, 4), np.float32)) y = ops.TensorAddV2()(x1, x2) print('result: ', y) ``` -When the command `pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py` executes, you can see the results meeting expectations: +When the command `pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_TensorAdd` executes, you can see the results meeting expectations: ```text result: [[2. 2. 2. 2.] [2. 2. 2. 2.] [2. 2. 2. 2.]] ``` + +## Defining Operators' BProp Functions + +If an operator needs to support automatic differentiation, its back-propagation function (bprop) needs to be defined in its primitives. You need to describe the reverse computing logic that uses forward input, forward output, and output gradient to get the input gradient in bprop. Reverse computation logic can be composed of built-in operators or custom reverse operators. + +The following points should be paid attention to when defining operators' bprop functions: + +- The order of input parameters of bprop function is defined as positive input, positive output and output gradient. If the operator is a multi-output operator, the forward output and output gradient will be provided in the form of tuples. +- The form of the return values of bprop function is arranged as a tuple composed of input gradient, and the order of elements in the tuple is consistent with that of forward input parameters. Even if there is only one input gradient, the return value must be in the form of tuples. + +For example, the bprop primitives of `TensorAddV2` are: + +```python +import mindspore.ops as ops +@bprop_getters.register(ops.TensorAddV2) +def get_bprop_tensoraddv2(self): + """Generate bprop for TensorAddV2""" + + def bprop(x, y, out, dout): + return dout, dout + + return bprop +``` + +Define the bprop case in document `test_tensoraddv2_op.py`. + +```python +import mindspore.ops as ops +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = ops.GradOperation(sens_param=True) + self.network = network + + def construct(self, x1, x2, sens): + gout = self.grad(self.network)(x1, x2, sens) + return gout + +def test_grad_net(): + x1 = Tensor(np.ones((3, 4), np.float32)) + x2 = Tensor(np.ones((3, 4), np.float32)) + sens = Tensor(np.arange(3 * 4).reshape(3, 4).astype(np.float32)) + grad = Grad(Net()) + dx = grad(x1, x2, sense) + print("dx[0]: ", dx[0].asnumpy()) +``` + +Running case: + +```bash +pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_grad_net +``` + +Running results: + +```text +dx[0]: [[0. 1. 2. 3.] + [4. 5. 6. 7.] + [8. 9. 10. 11.]] +``` + diff --git a/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md b/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md index 3059a4b4fe668ab7dc33a412d6e460d3b69581b2..b6879ef8dfe720bdc73d17825723bc008260dddb 100644 --- a/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md +++ b/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md @@ -283,6 +283,12 @@ Model.train() profiler.analyse() ``` +To use MindInsight to visualize communication performance data, you need to install the communication performance data parsing WHL package provided by the supporting software package of Ascend 910 AI processor. The WHL package is released with the supporting software package. Refer to the following command to complete the installation. + +```bash +pip install /usr/local/Ascend/tools/hccl_parser-{version}-py3-none-any.whl +``` + ## Resource Utilization ### Cluster Memory Analysis diff --git a/tutorials/training/source_zh_cn/advanced_use/concept_drift_time_series.md b/tutorials/training/source_zh_cn/advanced_use/concept_drift_time_series.md new file mode 100644 index 0000000000000000000000000000000000000000..f92c69745201333e4fe93ae4618a331273244fa9 --- /dev/null +++ b/tutorials/training/source_zh_cn/advanced_use/concept_drift_time_series.md @@ -0,0 +1,136 @@ +# 实现时序数据概念漂移检测应用 + + + +- [实现时序数据概念漂移检测应用](#实现时序数据概念漂移检测应用) + - [概述](#概述) + - [准备环节](#准备环节) + - [下载数据集](#下载数据集) + - [导入Python库&模块](#导入python库模块) + - [数据处理](#数据处理) + - [初始化概念漂移检测模块](#初始化概念漂移检测模块) + - [启动概念漂移检测](#启动概念漂移检测) + - [查看结果](#查看结果) + + + + + +## 概述 + +概念漂移(Concept Drift)是AI学习领域的一种重要数据现象,表现为在线推理数据(实时分布) +与训练阶段(历史分布)不一致。概念漂移检测能够及时发现数据分布变化,提前预测模型失效征兆,对AI模型的及时调整具有重要意义。 + +概念漂移检测本质上是检测数据分布变化,本示例提出一种检测数据变化的方法,对比新窗口数据的特征是否足够偏离历史窗口特征,如若偏离程度大于某一阈值,则数据发生概念漂移。 + +本例会实现一个简单的时序数据概念漂移检测的功能,整体流程如下: + +1. 下载公开数据集或构造数据。 +2. 定义概念漂移类参数。 +3. 调用概念漂移检测函数。 +4. 查看结果。 + +> 你可以在这里找到完整可运行的样例代码: 。 + +## 准备环节 + +确保已经正确安装了MindSpore。如果没有,可以通过[MindSpore安装页面](https://www.mindspore.cn/install)进行安装。 + +### 下载数据集 + +示例中用到金融领域公开数据集:标普500指数记录的美国股市平均记录。 +> 数据集下载页面:。 + +将数据集下载并解压到本地路径下,目录结构如下: + +```bash +├── archive + ├── individual_stocks_5yr + ├──individual_stocks_5yr +``` + +数据路径:archive/individual_stocks_5yr/individual_stocks_5yr。文件夹内每一个csv文件为一组数据用例。 + +### 导入Python库&模块 + +在使用前,需要导入需要的Python库。 + +```python +import numpy +import matplotlib +import itertools +import mindarmour +``` + +## 数据处理 + +从数据路径:archive/individual_stocks_5yr/individual_stocks_5yr 中打开一个数据用例。 + +```python +import numpy as np +DATA_FILE = r'archive/individual_stocks_5yr/individual_stocks_5yr/AEE_data.csv' +data = np.loadtxt(DATA_FILE, str, delimiter=",") +``` + +`data`数据包含包含了`date`,`open`,`high`,`low`,`close`,`volume`,`Name`列 ,其中`open`,`high`,`low`,`close`,`volume`为数值列,可以选择数值列中的某一列或某几列进行概念漂移检测。 + +```python +data = data[1:, 2].astype('float64') # 选择第2列 +``` + +或 + +```python +data = data[1:, 2: 4].astype('float64') # 选择第2-4列 +``` + +为了方便样例使用,可以通过构造的方式获得数据,如下方代码所示。 + +```python +import numpy as np +data = 5*np.random.rand(1000) +data[200: 800] = 50*np.random.rand(600) +``` + +## 初始化概念漂移检测模块 + +导入概念漂移检测模块,并进行初始化,示例代码如下: + +```python +from mindarmour import ConceptDriftCheckTimeSeries + +concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10, step=10, threshold_index=1.5,need_label=False) +``` + +初始化参数含义: + +- `window_size(int)`:概念窗口。数值不小于10,如果给定输入数据`data`的长度, `window_size`范围在[10, 1/3*len(`data`)] 之间。一般,如果时序数据为周期性函数,`window_size`的大小可以选择2-5倍的周期长度。举例,`data`的长度为1000,周期为30,那么`window_size`的范围可以在[10, 333],考虑到数据周期性,`window_size`可以取值90。 +- `rolling_window(int)`:平滑窗口。数值大小 [1, `window_size`]。默认值:10。 +- `step(int)`: 窗口滑动步长。数值范围在 [1,`window_size`]之间。默认值:10。 +- `threshold_index(float)`:阈值系数。阈值系数越高,阈值越大。默认值: 1.5。 +- `need_label(bool)`:标签需求。False或True。如果为True, 表明需要概念漂移标签。如果为False, 则不需要概念漂移标签。默认值:False。 + +## 启动概念漂移检测 + +完成模块初始化后,调用概念漂移检测函数`concept_check`。 + +```python +drift_score, threshold, concept_drift_location = concept.concept_check(data) +``` + +返回值 + +- `drift_score(numpy.ndarray)`:概念漂移分数。针对输入`data`,获得其发生概念漂移的置信分数。分数越高,概念漂移的可能性越大。 +- `threshold(float)`:概念漂移阈值。根据`threshold_index(float)`计算获得的阈值大小。 +- `concept_drift_location(list)`:概念漂移发生位置。返回概念漂移发生的x轴对应位置,通常为某个x轴区域。 + +## 查看结果 + +当执行完concept.concept_check(data),会将执行结果保存为pdf,命名为"concept_drift_check.pdf"。 + +如下图所示: + +![概念漂移](./images/concept_drift_timeseries.JPG) + +**子图1**:用户输入的数据`data`。数据中发生概念漂移的位置用蓝色五星标出,红色虚线(竖直方向)表示概念漂移发生最明显的位置。 +**子图2**:概念漂移置信分数`drift_score`(针对子图1中的数据),分数越高,概念漂移的可能性越大。红色虚线表示判断概念漂移的阈值`threshold`,虚线之上的`drift_score`所对应的横轴位置,判定为发生概念漂移。`threshold`的大小可根据`threshold_index`进行调节。 diff --git a/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md b/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md index bd159ebb6b5596d616fd8f0e401012eb52c21029..1a67d6842c03ff5661d3161f70fbdd4d9557bd10 100644 --- a/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md +++ b/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md @@ -244,17 +244,77 @@ context.set_context(device_target='GPU') @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard -def test_TensroAdd(): +def test_TensorAdd(): x1 = Tensor(np.ones((3, 4), np.float32)) x2 = Tensor(np.ones((3, 4), np.float32)) y = ops.TensorAddV2()(x1, x2) print('result: ', y) ``` -通过`pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py`命令执行后,可以看到结果符合预期: +通过`pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_TensorAdd`命令执行后,可以看到结果符合预期: ```text result: [[2. 2. 2. 2.] [2. 2. 2. 2.] [2. 2. 2. 2.]] ``` + +## 定义算子反向传播函数 + +如果算子要支持自动微分,需要在其原语中定义其反向传播函数(bprop)。你需要在bprop中描述利用正向输入、正向输出和输出梯度得到输入梯度的反向计算逻辑。反向计算逻辑可以使用内置算子或自定义反向算子构成。 + +定义算子反向传播函数时需注意以下几点: + +- bprop函数的入参顺序约定为正向的输入、正向的输出、输出梯度。若算子为多输出算子,正向输出和输出梯度将以元组的形式提供。 +- bprop函数的返回值形式约定为输入梯度组成的元组,元组中元素的顺序与正向输入参数顺序一致。即使只有一个输入梯度,返回值也要求是元组的形式。 + +例如,`TensorAddV2`的反向原语为: + +```python +import mindspore.ops as ops +@bprop_getters.register(ops.TensorAddV2) +def get_bprop_tensoraddv2(self): + """Generate bprop for TensorAddV2""" + + def bprop(x, y, out, dout): + return dout, dout + + return bprop +``` + +在`test_tensoraddv2_op.py`文件中定义反向用例。 + +```python +import mindspore.ops as ops +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = ops.GradOperation(sens_param=True) + self.network = network + + def construct(self, x1, x2, sens): + gout = self.grad(self.network)(x1, x2, sens) + return gout + +def test_grad_net(): + x1 = Tensor(np.ones((3, 4), np.float32)) + x2 = Tensor(np.ones((3, 4), np.float32)) + sens = Tensor(np.arange(3 * 4).reshape(3, 4).astype(np.float32)) + grad = Grad(Net()) + dx = grad(x1, x2, sense) + print("dx[0]: ", dx[0].asnumpy()) +``` + +执行用例: + +```bash +pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_grad_net +``` + +执行结果: + +```text +dx[0]: [[0. 1. 2. 3.] + [4. 5. 6. 7.] + [8. 9. 10. 11.]] +``` diff --git a/tutorials/training/source_zh_cn/advanced_use/debugger.md b/tutorials/training/source_zh_cn/advanced_use/debugger.md index e0136e570ce29bfa133132a4f0957b321ae017d5..6d51232f8f503e580c869b4d53d468dce55db343 100755 --- a/tutorials/training/source_zh_cn/advanced_use/debugger.md +++ b/tutorials/training/source_zh_cn/advanced_use/debugger.md @@ -33,6 +33,7 @@ MindSpore调试器是为图模式训练提供的调试工具,可以用来查 - 在MindInsight调试器界面结合计算图,查看图节点的输出结果; - 设置监测点,监测训练异常情况(比如检查张量溢出),在异常发生时追踪错误原因; - 查看权重等参数的变化情况。 +- 查看图节点和源代码的对应关系。 ## 操作流程 @@ -73,7 +74,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ## 调试器页面介绍 -训练连接成功后,可以在MindInsight调试器界面查看计算图等训练元信息,调试器页面布局由计算图、节点列表、节点信息、监测点列表、监测点命中列表等部分组成。 +训练连接成功后,可以在MindInsight调试器界面查看计算图等训练元信息,调试器页面布局由计算图、节点列表、节点信息、监测点列表、监测点命中列表、堆栈列表、堆栈信息等部分组成。 ![debugger_init_page](images/debugger_init_page.png) @@ -98,7 +99,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ### 节点信息 -点击计算图上的节点后,可以在UI下方查看该节点的详细信息,如图2所示。该部分展示了节点的输出和输入,训练的`轮次`数目,`张量`的`类型`、`形状`和`数值`等信息。 +点击计算图上的节点后,可以在UI下方查看该节点的详细信息,如图2所示。该部分展示了节点的输出和输入,训练的`轮次`数目,`张量`的`类型`、`形状`和`数值`等信息。点击`数值`信息里的`下载`,可以将该张量数据下载为.npy文件,默认在download文件夹下。 在GPU环境下,选中图上的某个可执行节点后,单击鼠标右键,可选择`运行到该节点`,代表将训练脚本运行到被选中节点(不超过一个`轮次`)。 @@ -158,6 +159,28 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER 图5展示了监测点触发后的展示页面,该页面和`节点列表`所在位置相同。触发的节点以及监控条件会按照节点的执行序排列,触发的监控条件上会显示该条件的设置值以及触发该条件的实际值。 另外,用户点击某一行,会在计算图中跳转到对应节点,可以进一步查看节点信息分析异常结果出现的原因。点击`查看`进入张量检查视图可以查看触发的监测点信息以及调优向导,如图6所示。 +### 堆栈列表 + +通过调试器页面左上方的切换按钮,可以从`节点列表`或`命中的监测点`切换到`堆栈列表`。 + +在调试器的堆栈信息列表页,可以看到所有的堆栈信息列表,搜索框输入关键字,可以显示匹配的堆栈信息列表。列表分页显示。点击底部的页码,可以快速跳转到对应页。 + +点击列表中的某一项,可以自动跳转到节点列表,在节点列表可以看到与这一行代码相关的节点。 + +![debugger_stack_list](images/debugger_stack_list.png) + +图6: 堆栈列表 + +### 堆栈信息 + +在图上定位到某一个节点时,点击计算图下方的`堆栈信息`标签,会看到该节点对应的堆栈信息。 + +在堆栈信息标签下,点击某一行的搜索,可以搜索与这一行相关的所有节点,搜索结果会自动展示在节点列表中。 + +![debugger_stack_info](images/debugger_stack_info.png) + +图7: 堆栈信息 + ### 重新检查 为了更详细地对节点进行监测分析,用户可以在修改监测点的节点,添加删除监测点后对当前轮次重新检查。`重新检查`按钮位于监测点列表右上角,如图3所示。 @@ -175,23 +198,25 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ![debugger_tensor_view](images/debugger_tensor_view.png) -图6: 查看`张量`值 +图8: 查看张量值 一些`张量`的维度过多,无法直接在主页进行展示。用户可以点击对应的`查看`按钮,在弹出的张量检查视图中查看`张量`值的详细信息。 -如图6所示,张量检查视图将`张量`值展示在UI的中上位置,用户可以进行`维度选择`,点击`显示当前step`,`显示上一step`和`显示对比结果`对张量进行显示和对比(当前仅支持参数节点与上一轮次对比)。此外,用户可以设置切片进行`维度选择`来显示相应维度的`张量`。 +如图8所示,张量检查视图将`张量`值展示在UI的中上位置,用户可以进行`维度选择`,点击`显示当前step`,`显示上一step`和`显示对比结果`对张量进行显示和对比(当前仅支持参数节点与上一轮次对比)。此外,用户可以设置切片进行`维度选择`来显示相应维度的`张量`。 视图的最上方展示了`节点信息`、`当前轮次`以及`统计信息`;视图的左侧展示了调优向导,当监测点命中时,将显示命中信息和相关的调优建议;视图的下方展示了张量关系图以及详细的`节点信息`。 通过张量关系图,可以分析当前张量是通过哪些张量计算出来的,还可以分析当前张量影响到了哪些常量。张量图中标注了命中监测点的条件的缩写,方便用户快速识别张量问题的传播路径。每个条件的缩写可以在“设置监测点”一节中查到。 +张量检查视图也提供了下载功能,用户可以将需要的张量下载,进行深入的处理分析。 + ## 使用调试器进行调试 1. 在调试器环境准备完成后,打开调试器界面,如下图所示: ![debugger_waiting](images/debugger_waiting.png) - 图7: 调试器等待训练连接 + 图9: 调试器等待训练连接 此时,调试器处于等待训练启动和连接的状态。 @@ -201,7 +226,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ![debugger_ask_recommend](images/debugger_ask_recommend.png) - 图8: 等待用户选择是否使用推荐监测点 + 图10: 等待用户选择是否使用推荐监测点 4. 稍后可以看到计算图显示在调试器界面,见图1。 @@ -212,7 +237,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER 6. 监测点触发,见图5。 - 监测点触发后,用户查看对应的节点信息,通过张量检查视图找出异常原因,修改脚本,修复问题。 + 监测点触发后,用户查看对应的节点信息和堆栈信息,通过张量检查视图找出异常原因,或者下载张量以后再通过离线分析来找出异常原因,修改脚本,修复问题。 ## 注意事项 diff --git a/tutorials/training/source_zh_cn/advanced_use/debugger_offline.md b/tutorials/training/source_zh_cn/advanced_use/debugger_offline.md new file mode 100755 index 0000000000000000000000000000000000000000..c501958f016d79bf99a900e850d1a25d2f319ebf --- /dev/null +++ b/tutorials/training/source_zh_cn/advanced_use/debugger_offline.md @@ -0,0 +1,98 @@ +# 使用离线调试器 + +`Linux` `Ascend` `GPU` `模型调优` `中级` `高级` + + + +- [使用离线调试器](#使用离线调试器) + - [概述](#概述) + - [操作流程](#操作流程) + - [离线调试器环境准备](#离线调试器环境准备) + - [离线调试器页面介绍](#离线调试器页面介绍) + - [使用离线调试器进行调试](#使用离线调试器进行调试) + - [注意事项](#注意事项) + + + + + +## 概述 + +MindSpore离线调试器是基于训练的Dump数据进行可视化调试,可以用来查看并分析计算图节点的中间结果。 + +离线调试器支持对接离线Dump数据,进行可视化分析。离线调试器解决不开启内存复用的情况下,在线调试器不支持的问题。 + +## 操作流程 + +1. 准备Dump数据。Dump的使用方式详见[使用Dump功能在Graph模式调试](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/dump_in_graph_mode.html) ; +2. 启动MindInsight,指定summary-base-dir为dump配置中的{path}路径的上一层或上两层; +3. 从训练列表中找到离线调试器入口,点击“离线调试器”,进入调试器页面,开始进行调试分析。 + +## 离线调试器环境准备 + +使用MindSpore的Dump功能准备离线数据。Dump的使用方式详见[使用Dump功能在Graph模式调试](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/dump_in_graph_mode.html) 。 + +然后,启动MindInsight,指定summary-base-dir为dump配置中的{path}路径的上一层或上两层,即可在UI页面中查询到离线调试器的入口。 + +MindInsight启动命令: + +```text +mindinsight start --port {PORT} --summary-base-dir /path/to/father/directory/of/dump_dir +``` + +或者: + +```text +mindinsight start --port {PORT} --summary-base-dir /path/to/grandfher/directory/of/dump_dir +``` + +参数含义如下: + +|参数名|属性|功能描述|参数类型|默认值|取值范围| +|---|---|---|---|---|---| +|`--port {PORT}`|可选|指定Web可视化服务端口。|Integer|8080|1~65535| +|`--summary-base-dir /path/to`|必选|mp配置中的{path}路径的上一层或上两层。例如,Dump配置文件中的path为“/home/workspace/data/dump_dir”,summary-base-dir可以设置为“/home/workspace/data”或“/home/workspace”。|String|./|-| + +更多启动参数请参考[MindInsight相关命令](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/mindinsight_commands.html)。 + +然后,打开MindInsight页面,从离线调试器入口进入调试器界面。 + +![debugger_offline_entry](images/debugger_offline_entry.png) + +图1: 离线调试器入口 + +## 离线调试器页面介绍 + +离线调试器界面与在线调试器相同。在线调试器的页面介绍详见[调试器页面介绍](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/debugger.html#id5) 。 + +## 使用离线调试器进行调试 + +1. 在调试器环境准备完成后,打开调试器界面,如下图所示: + + ![debugger_waiting](images/debugger_waiting.png) + + 图2: 调试器等待训练连接 + + 此时,调试器处于加载离线数据的状态。 + +2. 稍等片刻,在MindInsight UI上可以看到弹窗,提示选择是否使用推荐监测点,使用步骤与在线调试相同。[使用调试器进行调试](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/debugger.html#id14) 。 + +3. 与在线调试器相比,离线调试器可以重置训练轮次。点击右边的编辑图标,输入需要重置的轮次,点击对钩符号即可。如下图所示。 + + ![debugger_offline_reset](images/debugger_offline_reset.png) + + 图3: 重置训练轮次 + +## 注意事项 + +- 场景支持: + - 调试器暂不支持CPU场景。 + - 离线调试器支持单机多卡场景。若要分析多机多卡的场景。需要自行把多机数据汇总到一起。 + +- GPU场景: + - 与在线调试器不同,离线调试器不支持逐节点执行。 + - 由于GPU上一个轮次是一个子图(而非完整的图),GPU上多图做重新检查时,只能重新检查当前的子图。 + +- 重新检查只检查当前有张量值的监测点。 +- 调试器展示的图是优化后的最终执行图。调用的算子可能已经与其它算子融合,或者在优化后改变了名称。 +- 如果使用Ascend场景下的异步Dump数据,可以使用MindInsight的数据解析工具DumpParser的`convert_all_data_to_host`接口将异步Dump数据转换为`.npy`文件,从而提高数据分析效率。DumpParser的使用方式详见[DumpParser介绍](https://gitee.com/mindspore/mindinsight/tree/master/mindinsight/parser) 。 \ No newline at end of file diff --git a/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md b/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md index a4f284f97022ea613898fbcf0bb0accc7d13c25b..7883e8457a71d79ff5b3da7e467d5cb3afdb4893 100644 --- a/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md +++ b/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md @@ -71,11 +71,10 @@ MindSpore提供了同步Dump与异步Dump两种模式: 不同模式所需要的配置文件和dump出来的数据格式不同: -- 同步模式较异步模式会占用更多内存,但易用性更好。 -- 一般对于中小型网络(如ResNet)等,推荐优先使用同步Dump模式。在网络占用内存不大的情况下,请优先使用同步Dump。若开启同步Dump后,因为模型过大导致需要的内存超过系统限制,再使用异步Dump。 - 在Ascend上开启同步Dump的时候,待Dump的算子会自动关闭内存复用。 - 同步Dump目前支持Ascend、GPU和CPU上的图模式,暂不支持PyNative模式。 - 异步Dump仅支持Ascend上的图模式,不支持PyNative模式。开启异步Dump的时候不会关闭内存复用。 +- 默认使用用异步Dump模式,如果要使用同步Dump模式,需要在配置文件中设置"e2e_dump_settings"。 ## 同步Dump @@ -89,7 +88,7 @@ MindSpore提供了同步Dump与异步Dump两种模式: "dump_mode": 0, "path": "/absolute_path", "net_name": "ResNet50", - "iteration": 0, + "iteration": "0|5-8|100-120", "input_output": 0, "kernels": ["Default/Conv-op12"], "support_device": [0,1,2,3,4,5,6,7] @@ -104,11 +103,11 @@ MindSpore提供了同步Dump与异步Dump两种模式: - `dump_mode`:设置成0,表示Dump出该网络中的所有算子;设置成1,表示Dump`"kernels"`里面指定的算子。 - `path`:Dump保存数据的绝对路径。 - `net_name`:自定义的网络名称,例如:"ResNet50"。 - - `iteration`:指定需要Dump的迭代,若设置成0,表示Dump所有的迭代。 - - `input_output`:设置成0,表示Dump出算子的输入和算子的输出;设置成1,表示Dump出算子的输入;设置成2,表示Dump出算子的输出。该配置参数仅支持Ascend和CPU,GPU只能Dump算子的输出。 + - `iteration`:指定需要Dump数据的迭代。类型为str,用“|”分离要保存的不同区间的step的数据。如"0|5-8|100-120"表示Dump参数初始值,第1个,第6个到第9个, 第101个到第121个step的数据。指定“all”,表示Dump所有迭代的数据。 + - `input_output`:设置成0,表示Dump出算子的输入和算子的输出;设置成1,表示Dump出算子的输入;设置成2,表示Dump出算子的输出。 - `kernels`:算子的名称列表。开启IR保存开关`context.set_context(save_graphs=True)`并执行用例,从生成的IR文件`trace_code_graph_{graph_id}`中获取算子名称。详细说明可以参照教程:[如何保存IR](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/mindir.html#ir)。 - `support_device`:支持的设备,默认设置成0到7即可;在分布式训练场景下,需要dump个别设备上的数据,可以只在`support_device`中指定需要Dump的设备Id。该配置参数在CPU上无效,因为CPU下没有device这个概念。 - - `enable`:开启E2E Dump,如果同时开启同步Dump和异步Dump,那么只有同步Dump会生效。 + - `enable`:开启E2E Dump。 - `trans_flag`:开启格式转换。将设备上的数据格式转换成NCHW格式。若为`True`,则数据会以Host侧的4D格式(NCHW)格式保存;若为`False`,则保留Device侧的数据格式。该配置参数在CPU上无效,因为CPU上没有format转换。 2. 设置Dump环境变量,指定Dump的json配置文件。 @@ -144,43 +143,43 @@ MindSpore提供了同步Dump与异步Dump两种模式: ```text {path}/ - |-- {net_name}/ - |-- {device_id}/ - |-- iteration_{iteration}/ - -- {op_name}_{input_output_index}_{shape}_{data_type}_{format}.bin - … - |-- graphs/ - ms_output_trace_code_graph_{graph_id}.pb - ms_output_trace_code_graph_{graph_id}.ir - |-- execution_order/ - ms_execution_order_graph_{graph_id}.csv - - |-- .metadata/ - data_dump.json + - rank_{rank_id}/ + - .dump_metadata/ + - {net_name}/ + - {graph_id}/ + - {iteration_id}/ + {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{input_output_index}.{slot}.{format}.npy + ... + - graphs/ + ms_output_trace_code_graph_{graph_id}.pb + ms_output_trace_code_graph_{graph_id}.ir + - execution_order/ + ms_execution_order_graph_{graph_id}.csv ``` - `path`:`data_dump.json`配置文件中设置的绝对路径。 -- `net_name`:`data_dump.json`配置文件中设置的网络名称。 -- `device_id`:训练的卡号。 +- `rank_id`: 逻辑卡号。 +- `net_name`:`data_dump.json`配置文件中设置的网络称。 - `graph_id`:训练的图标号。 -- `iteration`:训练的轮次。 -- `operator_name`:算子名称。 -- `input_output_index` :输入或输出标号,例如`output_0`表示该文件是该算子的第1个输出Tensor的数据。 -- `shape`: 张量维度信息。 -- `data_type`: 数据类型。 +- `iteration_id`:训练的轮次。 +- `op_type`:算子类型。 +- `op_name`:算子名称。 +- `ask_id`:任务标号。 +- `stream_id`:流标号。 +- `timestamp`:时间戳。 +- `input_output_index`:输入或输出标号,例如`output.0`表示该文件是该算子的第1个输出Tensor的数据。 +- `slot`:slot标号。 - `format`: 数据格式。 -在CPU上进行数据dump时,没有`device_id`这个目录层级,因为CPU上没有device这个概念,也没有`graphs`、`execution_order`和`.metadata`目录。 - ### 同步Dump数据文件介绍 -同步Dump生成的数据文件是后缀名为`.bin`的二进制文件,文件命名格式为: +同步Dump生成的数据文件是后缀名为`.npy`的文件,文件命名格式为: ```text -{operator_name}_{input_output_index}_{shape}_{data_type}_{format}.bin +{op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{input_output_index}.{slot}.{format}.npy ``` -根据文件名提供的`Tensor`信息,可以用`numpy.fromfile`读取数据,并还原原始数据的`data_type`和`shape`。 +可以用Numpy的`numpy.load`接口读取数据。 同步Dump生成的最终执行图文件后缀名分别为`.pb`和`.ir`,文件命名格式为: @@ -197,7 +196,7 @@ ms_output_trace_code_graph_{graph_id}.ir ms_execution_order_graph_{graph_id}.csv ``` -`.metadata`记录了训练的原信息,其中`data_dump.json`保存了用户设置的dump配置。 +`.dump_metadata`记录了训练的原信息,其中`data_dump.json`保存了用户设置的dump配置。 ### 同步Dump数据分析样例 @@ -322,35 +321,23 @@ IsFeatureMapOutput: true, IsFeatureMapInputList: (0), pri_format: NC1HWC0} 通过算子名称和输入输出信息,可以查找到唯一对应的Tensor数据文件。比如,若要查看Conv2D-op107算子的第1个输出数据对应的Dump文件,可获取以下信息: -- `operator_name`:`Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107`。基于图中序号2声明的算子名称,将其中的`/`替换为`--`可得。 - -- `input_output_index` :`output_0`表示该文件是该算子的第1个输出Tensor的数据。 - -在Dump保存的数据对象文件目录下搜索到相应的文件名: -`Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107_output_0_shape_32_12_13_13_16_Float16_NC1HWC0.bin`。 -从文件名中可以得知以下信息: - -- `shape`: 张量维度是`32_12_13_13_16`。 +- `operator_name`:`Conv2D-op107`。 -- `data_type`: 数据类型为`Float16`。 +- `input_output_index`:`output.0`表示该文件是该算子的第1个输出Tensor的数据。 -- `format`: 数据格式为`NC1HWC0`(可通过Dump配置文件修改要保存的数据格式)。 +- `slot`:0,该算子的输出只有一个slot。 -还原数据的时候,首先通过执行: - -```python -import numpy -numpy.fromfile("Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107_output_0_shape_32_12_13_13_16_Float16_NC1HWC0.bin", numpy.float16) -``` +在Dump保存的数据对象文件目录下搜索到相应的文件名: +`Conv2D.Conv2D-op107.2.2.1623124369613540.output.0.DefaultFormat.npy`。 -生成一维array数据,再通过执行: +还原数据的时候,通过执行: ```python import numpy -numpy.reshape(array, (32,12,13,13,16)) +numpy.load("Conv2D.Conv2D-op107.2.2.1623124369613540.output.0.DefaultFormat.npy") ``` -还原到原始shape数据。 +生成numpy.array数据。 ## 异步Dump @@ -368,13 +355,10 @@ numpy.reshape(array, (32,12,13,13,16)) "dump_mode": 0, "path": "/absolute_path", "net_name": "ResNet50", - "iteration": 0, + "iteration": "0|5-8|100-120", "input_output": 0, "kernels": ["Default/Conv-op12"], - "support_device": [0,1,2,3,4,5,6,7] - }, - "async_dump_settings": { - "enable": true, + "support_device": [0,1,2,3,4,5,6,7], "op_debug_mode": 0 } } @@ -383,11 +367,10 @@ numpy.reshape(array, (32,12,13,13,16)) - `dump_mode`:设置成0,表示Dump出改网络中的所有算子;设置成1,表示Dump`"kernels"`里面指定的算子。 - `path`:Dump保存数据的绝对路径。 - `net_name`:自定义的网络名称,例如:"ResNet50"。 - - `iteration`:指定需要Dump的迭代。非数据下沉模式下,`iteration`需要设置成0,并且会Dump出每个迭代的数据。 + - `iteration`:指定需要Dump的迭代。类型为str,用“|”分离要保存的不同区间的step的数据。如"0|5-8|100-120"表示Dump参数初始值,第1个,第6个到第9个, 第101个到第121个step的数据。指定“all”,表示Dump所有迭代的数据。 - `input_output`:设置成0,表示Dump出算子的输入和算子的输出;设置成1,表示Dump出算子的输入;设置成2,表示Dump出算子的输出。 - `kernels`:算子的名称列表。开启IR保存开关`context.set_context(save_graphs=True)`并执行用例,从生成的`trace_code_graph_{graph_id}`IR文件中获取算子名称。`kernels`仅支持TBE算子、AiCPU算子、通信算子,若设置成通信算子的名称,将会Dump出通信算子的输入算子的数据。详细说明可以参照教程:[如何保存IR](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/mindir.html#ir)。 - `support_device`:支持的设备,默认设置成0到7即可;在分布式训练场景下,需要dump个别设备上的数据,可以只在`support_device`中指定需要Dump的设备Id。 - - `enable`:开启异步Dump,如果同时开启同步Dump和异步Dump,那么只有同步Dump会生效。 - `op_debug_mode`:该属性用于算子溢出调试,设置成0,表示不开启溢出;设置成1,表示开启AiCore溢出检测;设置成2,表示开启Atomic溢出检测;设置成3,表示开启全部溢出检测功能。在Dump数据的时候请设置成0,若设置成其他值,则只会Dump溢出算子的数据。 2. 设置数据Dump的环境变量。 @@ -408,8 +391,6 @@ numpy.reshape(array, (32,12,13,13,16)) 注意: - 若需要dump全量或部分算子,则可以修改json配置文件中的`dump_mode`选项为0或1。 -- 若开启数据下沉功能(设置`model.train`或`DatasetHelper`中的`dataset_sink_mode`参数为`True`),只能dump出配置文件里指定的一个step的数据(此时`iteration 0`表示第0个step),并保存到指定目录下。 -- 若不开启数据下沉功能(设置`model.train`或`DatasetHelper`中的`dataset_sink_mode`参数为`False`),配置文档里`iteration`必须指定为0,所有step的数据都保存在一个目录中,无法支持多step的数据管理。此时建议只执行一次step的数据Dump(可以通过修改脚本只训练一个step)。 - 使用Dump功能将自动生成最终执行图的IR文件。 ### 异步Dump数据对象目录 @@ -418,30 +399,29 @@ numpy.reshape(array, (32,12,13,13,16)) ```text {path}/ - |-- {device_id}/ - |-- {new_name}_graph_{graph_id}/ - |-- {graph_id}/ - |-- {iteration}/ - |-- {op_type}.{op_name}.{task_id}.{timestamp} - … - |-- graphs/ + - rank_{rank_id}/ + - .dump_metadata/ + - {net_name}/ + - {graph_id}/ + - {iteration_id}/ + {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp} + ... + - graphs/ ms_output_trace_code_graph_{graph_id}.pb ms_output_trace_code_graph_{graph_id}.ir - |-- execution_order/ + - execution_order/ ms_execution_order_graph_{graph_id}.csv - - |-- .metadata/ - data_dump.json ``` -- `path`:`data_dump.json`文件中设置的绝对路径。 -- `net_name`:`data_dump.json`文件中设置的网络名称。 -- `device_id`:训练的卡号。 +- `path`:`data_dump.json`配置文件中设置的绝对路径。 +- `rank_id`: 逻辑卡号。 +- `net_name`:`data_dump.json`配置文件中设置的网络称。 - `graph_id`:训练的图标号。 -- `iteration`:训练的轮次。 +- `iteration_id`:训练的轮次。 - `op_type`:算子类型。 - `op_name`:算子名称。 -- `taskid`:任务标号。 +- `task_id`:任务标号。 +- `stream_id`:流标号。 - `timestamp`:时间戳。 ### 异步Dump数据文件介绍 @@ -459,6 +439,8 @@ numpy.reshape(array, (32,12,13,13,16)) 如果`op_type`和`op_name`中出现了“.”、“/”、“\”、空格时,会转换为下划线表示。 +Dump生成的原始数据文件也可以使用MindInsight的数据解析工具DumpParser解析,DumpParser的使用方式详见[DumpParser介绍](https://gitee.com/mindspore/mindinsight/tree/master/mindinsight/parser) 。MindInsight解析出来的数据格式与同步dump的数据格式完全相同。 + 异步Dump生成的最终执行图文件和节点执行序文件命名规则与同步Dump相同,可以参考[同步Dump数据文件介绍](#id7)。 ### 异步Dump数据分析样例 diff --git a/tutorials/training/source_zh_cn/advanced_use/images/concept_drift_timeseries.JPG b/tutorials/training/source_zh_cn/advanced_use/images/concept_drift_timeseries.JPG new file mode 100644 index 0000000000000000000000000000000000000000..3b6cb11c8990228adb01b519f6b88be47ed9424e Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/concept_drift_timeseries.JPG differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png old mode 100755 new mode 100644 index 95af3226a83d5def01c3d0cb8d17ba1e3315e638..dedae2d5e8389cff5864fa9b60e15491d4293e42 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png old mode 100755 new mode 100644 index dc724b0c58e5e0ea9c3b775f83ea76a2349de9d1..2df2e8aaef6caedfc864d50f50dfc6dc288b0ede Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_entry.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_entry.png new file mode 100644 index 0000000000000000000000000000000000000000..6b2a4b903e04df6e9e9f1e9d18d87edc7d98c235 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_entry.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_reset.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_reset.png new file mode 100644 index 0000000000000000000000000000000000000000..8f80e2cf892840f69331d29bfaead7a265d9c728 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_reset.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png old mode 100755 new mode 100644 index 8e23df5ae9eaea10baea89f54360b18eed1aeb0d..1d0789310928faf5062284e648231d5ae5d1a458 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png old mode 100755 new mode 100644 index ab135b12017e1b6918460a52502398849b803fa4..1b4a9c49f5e568a58c006f0c5e5606e333574064 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_info.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_info.png new file mode 100644 index 0000000000000000000000000000000000000000..9da7c2af41f87fb33a3d4230f57240e90cf1be77 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_info.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_list.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_list.png new file mode 100644 index 0000000000000000000000000000000000000000..02dd27420b7b8041f1ad0921c588196c31b61283 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_list.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png old mode 100755 new mode 100644 index d7bc1fcbbc1e767be07c374d3d298e43160a637d..c209a0971bbc256d8f51d8fb50ea97bb0109e967 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png old mode 100755 new mode 100644 index 63bb4d6066fb81eb1629ba7ac545f69114296ff4..6e83e34d9ddbcf4231e4a8eb19d853ae0928eae0 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png old mode 100755 new mode 100644 index 87dfeb02082e7a518bd6cba31224b73d5d7bdd6e..ed62727543880b0b8f3252d83359d9000ab6db2d Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/nlp.rst b/tutorials/training/source_zh_cn/advanced_use/nlp.rst index 37d9606b68dc72b1259f10a5ee6c3afc872842cc..8d92025b769fbc88c575780e0fbb102eccce57b6 100644 --- a/tutorials/training/source_zh_cn/advanced_use/nlp.rst +++ b/tutorials/training/source_zh_cn/advanced_use/nlp.rst @@ -6,3 +6,4 @@ nlp_sentimentnet nlp_bert_poetry + concept_drift_time_series diff --git a/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md b/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md index 61d3aa71d03c01031e14946f5755cb5979cefc09..491710ca88b6c0062e8812c64580b7911c5c53dc 100644 --- a/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md +++ b/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md @@ -286,6 +286,12 @@ Model.train() profiler.analyse() ``` +使用MindInsight可视化通信性能数据需要安装Ascend 910 AI处理器配套软件包提供的通信性能数据解析whl包,whl包随配套软件包发布,参考如下命令完成安装。 + +```bash +pip install /usr/local/Ascend/tools/hccl_parser-{version}-py3-none-any.whl +``` + ## 资源利用 ### 集群内存使用情况分析 diff --git a/tutorials/training/source_zh_cn/advanced_use/visualization_tutorials.rst b/tutorials/training/source_zh_cn/advanced_use/visualization_tutorials.rst index 1480d6709e5a363bb910cf52a33748e2ece3aada..c0b2927316d8c4ec9ef3fba259c0b1475cecbea0 100644 --- a/tutorials/training/source_zh_cn/advanced_use/visualization_tutorials.rst +++ b/tutorials/training/source_zh_cn/advanced_use/visualization_tutorials.rst @@ -10,5 +10,6 @@ hyper_parameters_auto_tuning performance_profiling debugger + debugger_offline model_explanation mindinsight_commands diff --git a/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb b/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb index c6f4e5f8e6e6ddd8453bd441f2088da7d664807e..938b7b2262fa659a96d51dbe97ea39b3ca2058e8 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb +++ b/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb @@ -51,9 +51,9 @@ "\n", "我们示例中用到的`MNIST`数据集是由10类$28*28$的灰度图片组成,训练数据集包含60000张图片,测试数据集包含10000张图片。\n", "\n", - "在Jupyter Notebook中执行如下命令下载MNIST数据集。\n", + "> MNIST数据集下载页面:。页面提供4个数据集下载链接,其中前2个文件是训练数据需要,后2个文件是测试结果需要。\n", "\n", - "> MNIST数据集下载页面:。页面提供4个数据集下载链接,其中前2个文件是训练数据需要,后2个文件是测试结果需要。" + "在Jupyter Notebook中执行如下命令下载MNIST数据集。" ] }, { diff --git a/tutorials/tutorial_code/debugging_info/src/dataset.py b/tutorials/tutorial_code/debugging_info/src/dataset.py index eac47f1b9f5942481b26986265340e5a3608e5f3..cc7ec9bb71305cf4ad330e5d6ce859e766802484 100644 --- a/tutorials/tutorial_code/debugging_info/src/dataset.py +++ b/tutorials/tutorial_code/debugging_info/src/dataset.py @@ -44,6 +44,10 @@ class CustomDataSet: self.repeat_count = 1 self.batch_data_size = (self.batch_size,) + image_size + def get_batch_size(self): + """get batch size""" + return self.batch_size + def get_dataset_size(self): """get dataset size""" return int(self.size / self.batch_size)