From 64ead781d76d0214360883fa69850692f9773594 Mon Sep 17 00:00:00 2001
From: limingxing517 <limingxing517@163.com>
Date: Sat, 13 May 2023 15:55:36 +0800
Subject: [PATCH 1/6] update

---
 .../src/mains/res50.py                        |  6 ++++-
 .../test/train_full_1p.sh                     | 22 +++++++++++++++++--
 .../test/train_full_8p.sh                     | 20 ++++++++++++++++-
 .../test/train_performance_bs256_1p.sh        | 21 +++++++++++++++---
 .../test/train_performance_bs256_8p.sh        | 20 ++++++++++++++---
 .../test/train_performance_bs256_hw192_16p.sh | 22 ++++++++++++++-----
 .../test/train_performance_bs256_hw192_1p.sh  | 21 +++++++++++++++---
 .../test/train_performance_bs256_hw192_32p.sh | 22 ++++++++++++++++---
 .../test/train_performance_bs256_hw192_64p.sh | 21 +++++++++++++++++-
 .../test/train_performance_bs256_hw192_8p.sh  | 21 ++++++++++++++++--
 .../test/train_performance_bs32_1p.sh         | 22 +++++++++++++++----
 .../test/train_performance_bs32_8p.sh         | 22 ++++++++++++++++---
 .../test/train_performance_distribute.sh      | 20 +++++++++++++++--
 .../run_squad.py                              |  5 +++++
 .../test/train_full_1p.sh                     | 21 +++++++++++++++++-
 .../test/train_performance_1p.sh              | 21 +++++++++++++++++-
 .../test/train_performance_8p.sh              | 22 ++++++++++++++++++-
 17 files changed, 292 insertions(+), 37 deletions(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
index 35367a710..51f87537b 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
@@ -76,6 +76,7 @@ def main():
                          help="""config file used.""")
     cmdline.add_argument('--model_dir', default="./model_dir",
                          help="""config file used.""")
+    cmdline.add_argument('--precision_mode', default='allow_mix_precision', type=str, help='precision_mode') 
     
     # modify for npu overflow start
     # enable overflow
@@ -95,7 +96,10 @@ def main():
     configs = 'configs'
     cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file)
     #------------------------------------------------------------------
-
+    if FLAGS.precision_mode == "allow_mix_precision":
+	option = {}
+	option["ACL_PRECISION_MODE"] = "allow_mix_precision"
+	torch.npu.set_option(option)
     config = cfg.res50_config()
     config['iterations_per_loop'] = int(FLAGS.iterations_per_loop)
     config['max_train_steps'] = int(FLAGS.max_train_steps)
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh
index df5003759..a26c9139c 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh
@@ -30,7 +30,9 @@ max_train_steps=1000
 iterations_per_loop=100
 debug=True
 eval=True
-
+#维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 
 #维持参数，不需要修改
 over_dump=False
@@ -71,6 +73,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -89,6 +95,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -136,6 +146,7 @@ do
 	    --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
 		--over_dump=${over_dump} \
 		--over_dump_path=${over_dump_path} \
+		--precision_mode ${precision_mode} \
 		> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
         #--precision_mode=${precision_mode} \
         #--data_dump_flag=${data_dump_flag} \
@@ -170,7 +181,14 @@ echo "E2E training Duration sec: $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=256
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+fi
+
 
 ##获取性能数据，不需要修改
 #吞吐量
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh
index 254cad2ad..6a0245f55 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh
@@ -28,6 +28,9 @@ iterations_per_loop=100
 debug=True
 eval=True
 batch_size=256
+#维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 
 #维持参数，不需要修改
 over_dump=False
@@ -68,6 +71,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -89,6 +96,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -144,6 +155,7 @@ do
 	    --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
 		--over_dump=${over_dump} \
 		--over_dump_path=${over_dump_path} \
+		--precision_mode ${precision_mode} \
 		> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
         #--precision_mode=${precision_mode} \
         #--data_dump_flag=${data_dump_flag} \
@@ -174,7 +186,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+fi
 
 ##获取性能数据
 #吞吐量，不需要修改
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh
index 2107632c3..86dc946c0 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh
@@ -29,7 +29,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -69,6 +70,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -87,6 +92,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -124,6 +133,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -153,8 +163,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
-
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
 ##获取性能数据
 #吞吐量，不需要修改
 ActualFPS=${FPS}
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh
index 6ca0c20b9..97d0d3675 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh
@@ -31,7 +31,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -71,6 +72,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -94,6 +99,9 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
 #8p训练必须参数（本机IP）
 one_node_ip=$one_node_ip
 #新增适配集群环境变量
@@ -159,6 +167,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -188,8 +197,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p'_'perf'
-
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
 ##获取性能数据
 #吞吐量，不需要修改
 ActualFPS=${FPS}
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh
index a03a8fd2f..05cc275e4 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh
@@ -30,6 +30,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -69,6 +71,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -98,11 +104,8 @@ do
     fi
 done
 
-if [[ $conf_path == "" ]];then
-    fix_node_ip=$fix_node_ip
-    one_node_ip=$one_node_ip
-else
-    one_node_ip=`find $conf_path -name "server_*_0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'`
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
 fi
 
 #新增适配集群环境变量
@@ -180,6 +183,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -206,7 +210,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+fi
 
 ##获取性能数据
 #吞吐量，不需要修改
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh
index 9c2c2c93e..391a49b32 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh
@@ -29,7 +29,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -69,6 +70,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -87,6 +92,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -124,6 +133,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -153,8 +163,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf'
-
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+fi
 ##获取性能数据
 #吞吐量，不需要修改
 ActualFPS=${FPS}
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh
index c4f5e022f..35514b6a3 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh
@@ -30,6 +30,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -69,6 +71,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -98,6 +104,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 if [[ $conf_path == "" ]];then
     fix_node_ip=$fix_node_ip
     one_node_ip=$one_node_ip
@@ -179,7 +189,8 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
-    --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+    --precision_mode ${precision_mode} \
+    --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
 
@@ -205,8 +216,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
-
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+fi
 ##获取性能数据
 #吞吐量，不需要修改
 ActualFPS=${FPS}
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh
index 2feec0fff..9f786a11e 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh
@@ -30,6 +30,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -69,6 +71,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -101,6 +107,11 @@ do
         one_node_ip=`echo ${para#*=}`
     fi
 done
+
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 linux_num=$servers_num
 if [[ $conf_path == "" ]];then
     fix_node_ip=$fix_node_ip
@@ -184,6 +195,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -210,7 +222,14 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+fi
+
 
 ##获取性能数据
 #吞吐量，不需要修改
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh
index 172348baa..836cdaba0 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh
@@ -31,7 +31,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -71,6 +72,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -94,6 +99,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #8p训练必须参数（本机IP）
 one_node_ip=$one_node_ip
 #新增适配集群环境变量
@@ -159,6 +168,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -188,7 +198,14 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf'
+fi
+
 
 ##获取性能数据
 #吞吐量，不需要修改
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh
index 6c08021b4..d634867ff 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh
@@ -29,14 +29,14 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
 data_dump_step="10"
 profiling=False
 autotune=False
-
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
     echo"usage:./train_full_1p.sh <args>"
@@ -69,6 +69,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -87,6 +91,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -124,6 +132,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -153,8 +162,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
-
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
 ##获取性能数据
 #吞吐量，不需要修改
 ActualFPS=${FPS}
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh
index ed52acfb6..8c86f46b6 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh
@@ -31,7 +31,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -71,6 +72,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -94,6 +99,11 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
+
 #8p训练必须参数（本机IP）
 one_node_ip=$one_node_ip
 #新增适配集群环境变量
@@ -159,6 +169,7 @@ do
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
@@ -188,8 +199,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p'_'perf'
-
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
 ##获取性能数据
 #吞吐量，不需要修改
 ActualFPS=${FPS}
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh
index db6e36144..47b887ef3 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh
@@ -27,7 +27,8 @@ train_steps=2000
 learning_rate=
 
 #维测参数，precision_mode需要模型审视修改
-#precision_mode="allow_mix_precision"
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
 #维持参数，以下不需要修改
 over_dump=False
 data_dump_flag=False
@@ -67,6 +68,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -88,6 +93,9 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -139,6 +147,7 @@ nohup ${bind_core} python3.7 ${cur_path}/../src/mains/res50.py --config_file=res
     --iterations_per_loop=100 \
     --debug=True \
     --eval=False \
+    --precision_mode ${precision_mode} \
     --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 wait
 
@@ -167,7 +176,14 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'hf32'_'perf'
+else
+  CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf'
+fi
+
 
 ##获取性能数据
 #吞吐量，不需要修改
diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py
index 0a6ff2c38..5b88ce459 100644
--- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py
+++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py
@@ -77,6 +77,7 @@ flags.DEFINE_string(
 flags.DEFINE_string(
     "init_checkpoint", None,
     "Initial checkpoint (usually from a pre-trained BERT model).")
+flags.DEFINE_string("precision_mode", "must_keep_origin_dtype", 'precision_mode')
 
 flags.DEFINE_bool(
     "do_lower_case", True,
@@ -1151,6 +1152,10 @@ def validate_flags_or_throw(bert_config):
 
 
 def main(_):
+  if FLAGS.precision_mode == "allow_mix_precision":
+    option = {}
+    option["ACL_PRECISION_MODE"] = "allow_mix_precision"
+    torch.npu.set_option(option)
   tf.logging.set_verbosity(tf.logging.INFO)
 
   bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh
index 1d3f7fdfc..9bdde07c6 100644
--- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh
+++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh
@@ -32,6 +32,10 @@ data_dump_step="10"
 profiling=False
 autotune=False
 
+#维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
+
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
     echo"usage:./train_full_8p.sh <args>"
@@ -64,6 +68,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -87,6 +95,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -145,6 +157,7 @@ do
         --max_seq_length=384 \
         --doc_stride=128 \
         --output_dir=./output \
+	--precision_mode ${precision_mode} \
 		> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
         #--precision_mode=${precision_mode} \
         #--data_dump_flag=${data_dump_flag} \
@@ -172,7 +185,13 @@ echo "E2E Training Duration sec : $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+fi
 
 #获取性能数据
 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh
index 6508764ec..8fa84763e 100644
--- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh
@@ -32,6 +32,10 @@ data_dump_step="10"
 profiling=False
 autotune=False
 
+#维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
+
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
     echo"usage:./train_full_8p.sh <args>"
@@ -64,6 +68,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -87,6 +95,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -144,6 +156,7 @@ do
         --max_seq_length=384 \
         --doc_stride=128 \
         --output_dir=./output \
+	--precision_mode ${precision_mode} \
 		> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
         #--precision_mode=${precision_mode} \
         #--data_dump_flag=${data_dump_flag} \
@@ -166,7 +179,13 @@ echo "E2E training Duration sec: $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
 
 #获取性能数据
 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh
index cc7d8bf9a..b0c9ebddc 100644
--- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh
+++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh
@@ -32,6 +32,10 @@ data_dump_step="10"
 profiling=False
 autotune=False
 
+#维测参数，precision_mode需要模型审视修改
+precision_mode="must_keep_origin_dtype"
+fp32="--fp32"
+
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
     echo"usage:./train_full_8p.sh <args>"
@@ -64,6 +68,10 @@ do
         mkdir -p ${data_dump_path}
     elif [[ $para == --data_dump_step* ]];then
         data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --hf32 ]];then
+        hf32=`echo ${para#*=}`
+    elif [[ $para == --fp32 ]];then
+        fp32=`echo ${para#*=}`
     elif [[ $para == --profiling* ]];then
         profiling=`echo ${para#*=}`
         profiling_dump_path=${cur_path}/output/profiling
@@ -87,6 +95,10 @@ do
     fi
 done
 
+if [[ ${fp32} == "--hf32" ]];then
+  export ENABLE_HF32_EXECUTION=1
+fi
+
 #校验是否传入data_path,不需要修改
 if [[ $data_path == "" ]];then
     echo "[Error] para \"data_path\" must be confing"
@@ -146,6 +158,7 @@ do
         --max_seq_length=384 \
         --doc_stride=128 \
         --output_dir=./output \
+	--precision_mode ${precision_mode} \
 		> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
         #--precision_mode=${precision_mode} \
         #--data_dump_flag=${data_dump_flag} \
@@ -168,7 +181,14 @@ echo "E2E training Duration sec: $e2e_time"
 #训练用例信息，不需要修改
 BatchSize=${batch_size}
 DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+if [[ ${fp32} == "--fp32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf'
+elif [[ ${hf32} == "--hf32" ]];then
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf'
+else
+  CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+fi
+
 #获取性能数据
 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'`
 temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`
-- 
Gitee


From dd365cdd4580d25fb09fb6c6c083a613292683d7 Mon Sep 17 00:00:00 2001
From: limingxing517 <limingxing517@163.com>
Date: Sat, 13 May 2023 16:03:27 +0800
Subject: [PATCH 2/6] update

---
 .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py      | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
index 51f87537b..d7cbc8e2c 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
@@ -96,10 +96,11 @@ def main():
     configs = 'configs'
     cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file)
     #------------------------------------------------------------------
+    print ("FLAGS.precision_mode==================",FLAGS.precision_mode)
     if FLAGS.precision_mode == "allow_mix_precision":
-	option = {}
-	option["ACL_PRECISION_MODE"] = "allow_mix_precision"
-	torch.npu.set_option(option)
+        option = {}
+        option["ACL_PRECISION_MODE"] = "allow_mix_precision"
+        torch.npu.set_option(option)
     config = cfg.res50_config()
     config['iterations_per_loop'] = int(FLAGS.iterations_per_loop)
     config['max_train_steps'] = int(FLAGS.max_train_steps)
-- 
Gitee


From 95b71d3cc9eaa1f1803c4493317f3dee4988e79e Mon Sep 17 00:00:00 2001
From: limingxing517 <limingxing517@163.com>
Date: Sat, 13 May 2023 16:07:19 +0800
Subject: [PATCH 3/6] update

---
 .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
index d7cbc8e2c..0b7d5b66d 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
@@ -57,7 +57,7 @@ from trainers import gpu_base_trainer as tr
 # from configs import res50_config as cfg
 from hyper_param import hyper_param as hp
 from layers import layers as ly
-
+import torch
 import argparse
 
 def main():
-- 
Gitee


From 78da5d22864cf22c21c718be2dd445d18e892a3f Mon Sep 17 00:00:00 2001
From: limingxing517 <limingxing517@163.com>
Date: Sat, 13 May 2023 16:13:04 +0800
Subject: [PATCH 4/6] update

---
 .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
index 0b7d5b66d..97e868704 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
@@ -100,7 +100,7 @@ def main():
     if FLAGS.precision_mode == "allow_mix_precision":
         option = {}
         option["ACL_PRECISION_MODE"] = "allow_mix_precision"
-        torch.npu.set_option(option)
+        torch_npu.npu.set_option(option)
     config = cfg.res50_config()
     config['iterations_per_loop'] = int(FLAGS.iterations_per_loop)
     config['max_train_steps'] = int(FLAGS.max_train_steps)
-- 
Gitee


From 2800a6311d74273e807be2e41c38a2bbea0fb496 Mon Sep 17 00:00:00 2001
From: limingxing517 <limingxing517@163.com>
Date: Sat, 13 May 2023 16:35:44 +0800
Subject: [PATCH 5/6] update

---
 .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
index 97e868704..c3751dd6a 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
@@ -57,7 +57,7 @@ from trainers import gpu_base_trainer as tr
 # from configs import res50_config as cfg
 from hyper_param import hyper_param as hp
 from layers import layers as ly
-import torch
+import torch_npu
 import argparse
 
 def main():
-- 
Gitee


From 235c30d4bd09da9c4bf972edb05f4131b8430e08 Mon Sep 17 00:00:00 2001
From: limingxing517 <limingxing517@163.com>
Date: Sat, 13 May 2023 17:17:39 +0800
Subject: [PATCH 6/6] update

---
 .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py          | 1 -
 .../built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py  | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
index c3751dd6a..d131f4afc 100644
--- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py
@@ -96,7 +96,6 @@ def main():
     configs = 'configs'
     cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file)
     #------------------------------------------------------------------
-    print ("FLAGS.precision_mode==================",FLAGS.precision_mode)
     if FLAGS.precision_mode == "allow_mix_precision":
         option = {}
         option["ACL_PRECISION_MODE"] = "allow_mix_precision"
diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py
index 5b88ce459..e398c2c71 100644
--- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py
+++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py
@@ -45,6 +45,7 @@ import tensorflow as tf
 from npu_bridge.estimator.npu.npu_config import NPURunConfig
 from npu_bridge.estimator import npu_ops
 from npu_bridge.estimator.npu.npu_estimator import NPUEstimator,NPUEstimatorSpec
+import torch_npu
 
 flags = tf.flags
 
@@ -1155,7 +1156,7 @@ def main(_):
   if FLAGS.precision_mode == "allow_mix_precision":
     option = {}
     option["ACL_PRECISION_MODE"] = "allow_mix_precision"
-    torch.npu.set_option(option)
+    torch_npu.npu.set_option(option)
   tf.logging.set_verbosity(tf.logging.INFO)
 
   bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
-- 
Gitee