diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/README.md b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/README.md index 4f7bd23ed5cda451322719a23fb2261919c3f75b..1d2a630f9d5022fb8654b188af5a5e21821dee27 100644 --- a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/README.md +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/README.md @@ -125,42 +125,57 @@ pip3 install requirements.txt 2. 单卡训练 - 2.1 设置单卡训练参数(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh),示例如下。 - - - ``` - # 训练epoch - train_epochs=90 - # 训练batch_size - batch_size=90 - ``` + 2.1 设置单卡训练参数(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh),示例如下: + + ``` + # 训练epoch + train_epochs=90 + # 训练batch_size + batch_size=90 + ``` + + 2.2 单卡精度训练指令(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh) + 于终端中运行export ASCEND_DEVICE_ID=0 (0~7)以指定单卡训练时使用的卡,然后使用如下命令启动训练: + bash train_full_1p.sh --data_path= - 2.2 单卡训练指令(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh) + 注意:如果是Ascend910B及之后的平台,请指定更高精度的fp32卷积计算启动训练,命令如下: + bash train_full_1p.sh --data_path= --switch_config=/switch_config_fp32.txt + + 2.3 单卡性能训练指令(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_performance_1p.sh) + + 于终端中运行export ASCEND_DEVICE_ID=0 (0~7)以指定单卡训练时使用的卡,然后使用如下命令启动训练: + bash train_performance_1p.sh --data_path= - ``` - 于终端中运行export ASCEND_DEVICE_ID=0 (0~7)以指定单卡训练时使用的卡 - bash train_full_1p.sh --data_path=xx + 注意:如果是Ascend910B及之后的平台,请指定更高精度的fp32卷积计算启动训练,命令如下: + bash train_performance_1p.sh --data_path= --switch_config=/switch_config_fp32.txt 3. 8卡训练 3.1 设置8卡训练参数(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh),示例如下。 - - - ``` - # 训练epoch - train_epochs=90 - # 训练batch_size - batch_size=90 - ``` + ``` + # 训练epoch + train_epochs=90 + # 训练batch_size + batch_size=90 + ``` + + 3.2 8卡精度训练指令(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh) + + 于终端中运行export ASCEND_DEVICE_ID=0,1,2,3,4,5,6,7指定训练中使用的卡,然后使用如下命令启动训练: + bash train_full_8p.sh --data_path= + + 注意:如果是Ascend910B及之后的平台,请指定更高精度的fp32卷积计算启动训练,命令如下: + bash train_full_8p.sh --data_path= --switch_config=/switch_config_fp32.txt - 3.2 8卡训练指令(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh) + 3.3 8卡性能训练指令(脚本位于./Facenet_ID0122_for_TensorFlow/test/train_performance_8p.sh) - ``` - 于终端中运行export ASCEND_DEVICE_ID=0,1,2,3,4,5,6,7指定训练中使用的卡 - bash train_full_8p.sh --data_path=xx + 于终端中运行export ASCEND_DEVICE_ID=0,1,2,3,4,5,6,7指定训练中使用的卡,然后使用如下命令启动训练: + bash train_performance_8p.sh --data_path= + 注意:如果是Ascend910B及之后的平台,请指定更高精度的fp32卷积计算启动训练,命令如下: + bash train_performance_8p.sh --data_path= --switch_config=/switch_config_fp32.txt ## 迁移学习指导 @@ -181,10 +196,10 @@ pip3 install requirements.txt ├── README.md //说明文档 ├── requirements.txt //依赖 ├── test - | |—— train_full_1p.sh //单卡训练脚本 - | |—— train_performance_1p.sh //单卡训练脚本 - | |—— train_full_8p.sh //8卡训练脚本 - | |—— train_performance_8p.sh //8卡训练脚本 + | |—— train_full_1p.sh //单卡精度训练脚本 + | |—— train_performance_1p.sh //单卡性能训练脚本 + | |—— train_full_8p.sh //8卡精度训练脚本 + | |—— train_performance_8p.sh //8卡性能训练脚本 ├── src //训练网络脚本实现 diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/src/train_softmax.py b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/src/train_softmax.py index 4042f8871b83028187c08816f00557ba6ba81154..c1507da815d384481fe12dbbea47b22eadc34eb4 100644 --- a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/src/train_softmax.py +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/src/train_softmax.py @@ -237,7 +237,7 @@ def main(args): custom_op.parameter_map["mix_compile_mode"].b = True custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_fp32_to_fp16") # custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision") - custom_op.parameter_map["customize_dtypes"].s = tf.compat.as_bytes("./switch_config.txt") + custom_op.parameter_map["customize_dtypes"].s = tf.compat.as_bytes(args.switch_config) config.graph_options.rewrite_options.remapping = RewriterConfig.OFF sess = tf.Session(config=config) else: @@ -644,6 +644,11 @@ def parse_arguments(argv): help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true') parser.add_argument('--lfw_subtract_mean', help='Subtract feature mean before calculating distance.', action='store_true') + # -----npu modified start----- + if use_NPU: + parser.add_argument('--switch_config', type=str, + help='File containing matmul and cube op dtype.', default='./switch_config.txt') + # -----npu modified end----- return parser.parse_args(argv) diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/switch_config_fp32.txt b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/switch_config_fp32.txt new file mode 100644 index 0000000000000000000000000000000000000000..2bd563c6275ddedbb1467680ba5680c82ed0a0a8 --- /dev/null +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/switch_config_fp32.txt @@ -0,0 +1,24 @@ +OpType::MatMul:InputDtype:float32,float32,OutputDtype:float32 +OpType::MatMulV2:InputDtype:float32,float32,OutputDtype:float32 +OpType::BatchMatMul:InputDtype:float32,float32,OutputDtype:float32 +OpType::BatchMatMulV2:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv2D:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv2DBackpropInputD:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv2DBackpropInput:int32,InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv2DTransposeD:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv2DTranspose:InputDtype:int32,float32,float32,OutputDtype:float32 +OpType::Conv2DBackpropFilterD:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv2DBackpropFilter:InputDtype:float32,int32,float32,OutputDtype:float32 +OpType::DepthwiseConv2D:InputDtype:float32,float32,OutputDtype:float32 +OpType::DepthwiseConv2DBackpropInputD:InputDtype:float32,float32,OutputDtype:float32 +OpType::DepthwiseConv2DBackpropInput:InputDtype:int32,float32,float32,OutputDtype:float32 +OpType::DepthwiseConv2DBackpropFilterD:InputDtype:float32,float32,OutputDtype:float32 +OpType::DepthwiseConv2DBackpropFilter:InputDtype:float32,int32,float32,OutputDtype:float32 +OpType::Deconvolution:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv3D:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv3DBackpropInputD:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv3DBackpropInput:InputDtype:int32,float32,float32,OutputDtype:float32 +OpType::Conv3DTransposeD:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv3DTranspose:InputDtype:int32,float32,float32,OutputDtype:float32 +OpType::Conv3DBackpropFilterD:InputDtype:float32,float32,OutputDtype:float32 +OpType::Conv3DBackpropFilter:InputDtype:float32,int32,float32,OutputDtype:float32 diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh index efcdb19ca4f74126c0aa3306efffb27dc0c94f93..012d534c7d0abe17e53fd6ad271ddf4244ade2df 100644 --- a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_1p.sh @@ -18,6 +18,7 @@ RANK_SIZE=1 #参数配置 data_path="" +switch_config="${cur_path}/switch_config.txt" if [[ $1 == --help || $1 == --h ]];then echo "usage:./train_full_1p.sh " @@ -28,6 +29,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --switch_config* ]];then + switch_config=`echo ${para#*=}` fi done @@ -68,6 +71,7 @@ nohup python3 ${cur_path}/src/train_softmax.py \ --lfw_subtract_mean \ --use_fixed_image_standardization \ --learning_rate_schedule_file ${cur_path}/data/learning_rate_schedule_classifier_casia.txt \ + --switch_config ${switch_config} \ --weight_decay 5e-4 \ --embedding_size 512 \ --validation_set_split_ratio 0.05 \ diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh index dd3094069743042a54ecf120e94a38f0551a9300..fd98f04eabdae3dfeaf9e62de2a9a47ee66646a6 100644 --- a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_full_8p.sh @@ -22,6 +22,7 @@ RANK_SIZE=8 #参数配置 data_path="" +switch_config="${cur_path}/switch_config.txt" if [[ $1 == --help || $1 == --h ]];then echo "usage:./train_full_8p.sh " @@ -32,6 +33,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --switch_config* ]];then + switch_config=`echo ${para#*=}` fi done @@ -74,6 +77,7 @@ do --lfw_subtract_mean \ --use_fixed_image_standardization \ --learning_rate_schedule_file ${cur_path}/data/learning_rate_schedule_classifier_casia_8p.txt \ + --switch_config ${switch_config} \ --weight_decay 5e-4 \ --embedding_size 512 \ --validation_set_split_ratio 0.05 \ diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_1p.sh index a3fab0377ee15e13a3d3600a66c33c736146e4d5..6da62369e24097c0e251b7c087a059df12eeff8f 100644 --- a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_1p.sh @@ -16,6 +16,7 @@ RANK_SIZE=1 #参数配置 data_path="" +switch_config="${cur_path}/switch_config.txt" if [[ $1 == --help || $1 == --h ]];then echo "usage:./train_performance_1p.sh " @@ -26,6 +27,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --switch_config* ]];then + switch_config=`echo ${para#*=}` fi done @@ -64,6 +67,7 @@ nohup python3 ${cur_path}/src/train_softmax.py \ --lfw_subtract_mean \ --use_fixed_image_standardization \ --learning_rate_schedule_file ${cur_path}/data/learning_rate_schedule_classifier_casia.txt \ + --switch_config ${switch_config} \ --weight_decay 5e-4 \ --embedding_size 512 \ --validation_set_split_ratio 0.05 \ diff --git a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_8p.sh index 48b1c9f45cea7c0e058b66cc1d2c64a345941f3b..3673d4f41be6db4d13bda562f6931b7b18b490f8 100644 --- a/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/Facenet_ID0122_for_TensorFlow/test/train_performance_8p.sh @@ -22,6 +22,7 @@ RANK_SIZE=8 #参数配置 data_path="" +switch_config="${cur_path}/switch_config.txt" if [[ $1 == --help || $1 == --h ]];then echo "usage:./train_performance_8p.sh " @@ -32,6 +33,8 @@ for para in $* do if [[ $para == --data_path* ]];then data_path=`echo ${para#*=}` + elif [[ $para == --switch_config* ]];then + switch_config=`echo ${para#*=}` fi done @@ -72,6 +75,7 @@ do --lfw_subtract_mean \ --use_fixed_image_standardization \ --learning_rate_schedule_file ${cur_path}/data/learning_rate_schedule_classifier_casia_8p.txt \ + --switch_config ${switch_config} \ --weight_decay 5e-4 \ --embedding_size 512 \ --validation_set_split_ratio 0.05 \