From 64ead781d76d0214360883fa69850692f9773594 Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Sat, 13 May 2023 15:55:36 +0800 Subject: [PATCH 1/6] update --- .../src/mains/res50.py | 6 ++++- .../test/train_full_1p.sh | 22 +++++++++++++++++-- .../test/train_full_8p.sh | 20 ++++++++++++++++- .../test/train_performance_bs256_1p.sh | 21 +++++++++++++++--- .../test/train_performance_bs256_8p.sh | 20 ++++++++++++++--- .../test/train_performance_bs256_hw192_16p.sh | 22 ++++++++++++++----- .../test/train_performance_bs256_hw192_1p.sh | 21 +++++++++++++++--- .../test/train_performance_bs256_hw192_32p.sh | 22 ++++++++++++++++--- .../test/train_performance_bs256_hw192_64p.sh | 21 +++++++++++++++++- .../test/train_performance_bs256_hw192_8p.sh | 21 ++++++++++++++++-- .../test/train_performance_bs32_1p.sh | 22 +++++++++++++++---- .../test/train_performance_bs32_8p.sh | 22 ++++++++++++++++--- .../test/train_performance_distribute.sh | 20 +++++++++++++++-- .../run_squad.py | 5 +++++ .../test/train_full_1p.sh | 21 +++++++++++++++++- .../test/train_performance_1p.sh | 21 +++++++++++++++++- .../test/train_performance_8p.sh | 22 ++++++++++++++++++- 17 files changed, 292 insertions(+), 37 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index 35367a710..51f87537b 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -76,6 +76,7 @@ def main(): help="""config file used.""") cmdline.add_argument('--model_dir', default="./model_dir", help="""config file used.""") + cmdline.add_argument('--precision_mode', default='allow_mix_precision', type=str, help='precision_mode') # modify for npu overflow start # enable overflow @@ -95,7 +96,10 @@ def main(): configs = 'configs' cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file) #------------------------------------------------------------------ - + if FLAGS.precision_mode == "allow_mix_precision": + option = {} + option["ACL_PRECISION_MODE"] = "allow_mix_precision" + torch.npu.set_option(option) config = cfg.res50_config() config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) config['max_train_steps'] = int(FLAGS.max_train_steps) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh index df5003759..a26c9139c 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh @@ -30,7 +30,9 @@ max_train_steps=1000 iterations_per_loop=100 debug=True eval=True - +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,不需要修改 over_dump=False @@ -71,6 +73,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -89,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -136,6 +146,7 @@ do --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ --over_dump=${over_dump} \ --over_dump_path=${over_dump_path} \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -170,7 +181,14 @@ echo "E2E training Duration sec: $e2e_time" #训练用例信息,不需要修改 BatchSize=256 DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi + ##获取性能数据,不需要修改 #吞吐量 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh index 254cad2ad..6a0245f55 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh @@ -28,6 +28,9 @@ iterations_per_loop=100 debug=True eval=True batch_size=256 +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,不需要修改 over_dump=False @@ -68,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -89,6 +96,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -144,6 +155,7 @@ do --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ --over_dump=${over_dump} \ --over_dump_path=${over_dump_path} \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -174,7 +186,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh index 2107632c3..86dc946c0 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh @@ -29,7 +29,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +70,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +92,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -124,6 +133,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -153,8 +163,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh index 6ca0c20b9..97d0d3675 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh @@ -31,7 +31,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -71,6 +72,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -94,6 +99,9 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi #8p训练必须参数(本机IP) one_node_ip=$one_node_ip #新增适配集群环境变量 @@ -159,6 +167,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -188,8 +197,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh index a03a8fd2f..05cc275e4 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh @@ -30,6 +30,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -98,11 +104,8 @@ do fi done -if [[ $conf_path == "" ]];then - fix_node_ip=$fix_node_ip - one_node_ip=$one_node_ip -else - one_node_ip=`find $conf_path -name "server_*_0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 fi #新增适配集群环境变量 @@ -180,6 +183,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -206,7 +210,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh index 9c2c2c93e..391a49b32 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh @@ -29,7 +29,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +70,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +92,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -124,6 +133,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -153,8 +163,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh index c4f5e022f..35514b6a3 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh @@ -30,6 +30,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -98,6 +104,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + if [[ $conf_path == "" ]];then fix_node_ip=$fix_node_ip one_node_ip=$one_node_ip @@ -179,7 +189,8 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ - --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --precision_mode ${precision_mode} \ + --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -205,8 +216,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh index 2feec0fff..9f786a11e 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh @@ -30,6 +30,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -101,6 +107,11 @@ do one_node_ip=`echo ${para#*=}` fi done + +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + linux_num=$servers_num if [[ $conf_path == "" ]];then fix_node_ip=$fix_node_ip @@ -184,6 +195,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -210,7 +222,14 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi + ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh index 172348baa..836cdaba0 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh @@ -31,7 +31,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -71,6 +72,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -94,6 +99,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #8p训练必须参数(本机IP) one_node_ip=$one_node_ip #新增适配集群环境变量 @@ -159,6 +168,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -188,7 +198,14 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi + ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh index 6c08021b4..d634867ff 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh @@ -29,14 +29,14 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False data_dump_step="10" profiling=False autotune=False - # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_1p.sh " @@ -69,6 +69,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +91,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -124,6 +132,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -153,8 +162,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh index ed52acfb6..8c86f46b6 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh @@ -31,7 +31,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -71,6 +72,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -94,6 +99,11 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + + #8p训练必须参数(本机IP) one_node_ip=$one_node_ip #新增适配集群环境变量 @@ -159,6 +169,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -188,8 +199,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh index db6e36144..47b887ef3 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh @@ -27,7 +27,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -67,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -88,6 +93,9 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -139,6 +147,7 @@ nohup ${bind_core} python3.7 ${cur_path}/../src/mains/res50.py --config_file=res --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait @@ -167,7 +176,14 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf' +fi + ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py index 0a6ff2c38..5b88ce459 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py @@ -77,6 +77,7 @@ flags.DEFINE_string( flags.DEFINE_string( "init_checkpoint", None, "Initial checkpoint (usually from a pre-trained BERT model).") +flags.DEFINE_string("precision_mode", "must_keep_origin_dtype", 'precision_mode') flags.DEFINE_bool( "do_lower_case", True, @@ -1151,6 +1152,10 @@ def validate_flags_or_throw(bert_config): def main(_): + if FLAGS.precision_mode == "allow_mix_precision": + option = {} + option["ACL_PRECISION_MODE"] = "allow_mix_precision" + torch.npu.set_option(option) tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh index 1d3f7fdfc..9bdde07c6 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh @@ -32,6 +32,10 @@ data_dump_step="10" profiling=False autotune=False +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" + # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_8p.sh " @@ -64,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -145,6 +157,7 @@ do --max_seq_length=384 \ --doc_stride=128 \ --output_dir=./output \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -172,7 +185,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi #获取性能数据 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'` diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh index 6508764ec..8fa84763e 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh @@ -32,6 +32,10 @@ data_dump_step="10" profiling=False autotune=False +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" + # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_8p.sh " @@ -64,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -144,6 +156,7 @@ do --max_seq_length=384 \ --doc_stride=128 \ --output_dir=./output \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -166,7 +179,13 @@ echo "E2E training Duration sec: $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi #获取性能数据 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'` diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh index cc7d8bf9a..b0c9ebddc 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh @@ -32,6 +32,10 @@ data_dump_step="10" profiling=False autotune=False +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" + # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_8p.sh " @@ -64,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -146,6 +158,7 @@ do --max_seq_length=384 \ --doc_stride=128 \ --output_dir=./output \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -168,7 +181,14 @@ echo "E2E training Duration sec: $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi + #获取性能数据 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'` temp0=`echo "scale=2;${fps} * ${batch_size}"|bc` -- Gitee From dd365cdd4580d25fb09fb6c6c083a613292683d7 Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Sat, 13 May 2023 16:03:27 +0800 Subject: [PATCH 2/6] update --- .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index 51f87537b..d7cbc8e2c 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -96,10 +96,11 @@ def main(): configs = 'configs' cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file) #------------------------------------------------------------------ + print ("FLAGS.precision_mode==================",FLAGS.precision_mode) if FLAGS.precision_mode == "allow_mix_precision": - option = {} - option["ACL_PRECISION_MODE"] = "allow_mix_precision" - torch.npu.set_option(option) + option = {} + option["ACL_PRECISION_MODE"] = "allow_mix_precision" + torch.npu.set_option(option) config = cfg.res50_config() config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) config['max_train_steps'] = int(FLAGS.max_train_steps) -- Gitee From 95b71d3cc9eaa1f1803c4493317f3dee4988e79e Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Sat, 13 May 2023 16:07:19 +0800 Subject: [PATCH 3/6] update --- .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index d7cbc8e2c..0b7d5b66d 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -57,7 +57,7 @@ from trainers import gpu_base_trainer as tr # from configs import res50_config as cfg from hyper_param import hyper_param as hp from layers import layers as ly - +import torch import argparse def main(): -- Gitee From 78da5d22864cf22c21c718be2dd445d18e892a3f Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Sat, 13 May 2023 16:13:04 +0800 Subject: [PATCH 4/6] update --- .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index 0b7d5b66d..97e868704 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -100,7 +100,7 @@ def main(): if FLAGS.precision_mode == "allow_mix_precision": option = {} option["ACL_PRECISION_MODE"] = "allow_mix_precision" - torch.npu.set_option(option) + torch_npu.npu.set_option(option) config = cfg.res50_config() config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) config['max_train_steps'] = int(FLAGS.max_train_steps) -- Gitee From 2800a6311d74273e807be2e41c38a2bbea0fb496 Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Sat, 13 May 2023 16:35:44 +0800 Subject: [PATCH 5/6] update --- .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index 97e868704..c3751dd6a 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -57,7 +57,7 @@ from trainers import gpu_base_trainer as tr # from configs import res50_config as cfg from hyper_param import hyper_param as hp from layers import layers as ly -import torch +import torch_npu import argparse def main(): -- Gitee From 235c30d4bd09da9c4bf972edb05f4131b8430e08 Mon Sep 17 00:00:00 2001 From: limingxing517 Date: Sat, 13 May 2023 17:17:39 +0800 Subject: [PATCH 6/6] update --- .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py | 1 - .../built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index c3751dd6a..d131f4afc 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -96,7 +96,6 @@ def main(): configs = 'configs' cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file) #------------------------------------------------------------------ - print ("FLAGS.precision_mode==================",FLAGS.precision_mode) if FLAGS.precision_mode == "allow_mix_precision": option = {} option["ACL_PRECISION_MODE"] = "allow_mix_precision" diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py index 5b88ce459..e398c2c71 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py @@ -45,6 +45,7 @@ import tensorflow as tf from npu_bridge.estimator.npu.npu_config import NPURunConfig from npu_bridge.estimator import npu_ops from npu_bridge.estimator.npu.npu_estimator import NPUEstimator,NPUEstimatorSpec +import torch_npu flags = tf.flags @@ -1155,7 +1156,7 @@ def main(_): if FLAGS.precision_mode == "allow_mix_precision": option = {} option["ACL_PRECISION_MODE"] = "allow_mix_precision" - torch.npu.set_option(option) + torch_npu.npu.set_option(option) tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) -- Gitee