diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index 35367a710a3b4b397b805ae02b423b7f29c3497c..d131f4afcec946c0112fe1dcb7d23080f4caf199 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -57,7 +57,7 @@ from trainers import gpu_base_trainer as tr # from configs import res50_config as cfg from hyper_param import hyper_param as hp from layers import layers as ly - +import torch_npu import argparse def main(): @@ -76,6 +76,7 @@ def main(): help="""config file used.""") cmdline.add_argument('--model_dir', default="./model_dir", help="""config file used.""") + cmdline.add_argument('--precision_mode', default='allow_mix_precision', type=str, help='precision_mode') # modify for npu overflow start # enable overflow @@ -95,7 +96,10 @@ def main(): configs = 'configs' cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file) #------------------------------------------------------------------ - + if FLAGS.precision_mode == "allow_mix_precision": + option = {} + option["ACL_PRECISION_MODE"] = "allow_mix_precision" + torch_npu.npu.set_option(option) config = cfg.res50_config() config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) config['max_train_steps'] = int(FLAGS.max_train_steps) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh index df50037595eb14b92c32e3030d94baa9ca0c8dd3..a26c9139c1daddf8d14f6a52db8fc8d7b7d81ce4 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_1p.sh @@ -30,7 +30,9 @@ max_train_steps=1000 iterations_per_loop=100 debug=True eval=True - +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,不需要修改 over_dump=False @@ -71,6 +73,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -89,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -136,6 +146,7 @@ do --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ --over_dump=${over_dump} \ --over_dump_path=${over_dump_path} \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -170,7 +181,14 @@ echo "E2E training Duration sec: $e2e_time" #训练用例信息,不需要修改 BatchSize=256 DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi + ##获取性能数据,不需要修改 #吞吐量 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh index 254cad2ad44821779738f3a202600f7f8c65f3ee..6a0245f559d55a6a6c25af467e2cfaca51376da6 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_full_8p.sh @@ -28,6 +28,9 @@ iterations_per_loop=100 debug=True eval=True batch_size=256 +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,不需要修改 over_dump=False @@ -68,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -89,6 +96,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -144,6 +155,7 @@ do --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ --over_dump=${over_dump} \ --over_dump_path=${over_dump_path} \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -174,7 +186,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh index 2107632c35bd6d37b43ef24bc298451c6a97249c..86dc946c0262eacf53e8c4500d10d45a3a3eb9f0 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_1p.sh @@ -29,7 +29,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +70,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +92,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -124,6 +133,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -153,8 +163,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh index 6ca0c20b9bfeab794f4a2ed13ca227ec3afe0d7c..97d0d367573d0f3b95e861ac7be3e7b4ff580a9d 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_8p.sh @@ -31,7 +31,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -71,6 +72,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -94,6 +99,9 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi #8p训练必须参数(本机IP) one_node_ip=$one_node_ip #新增适配集群环境变量 @@ -159,6 +167,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -188,8 +197,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh index a03a8fd2f0c1055be5822ac69ae90a9f4d168ea2..05cc275e4bd20ddaeb10cbc6850a9b106b5d3474 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_16p.sh @@ -30,6 +30,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -98,11 +104,8 @@ do fi done -if [[ $conf_path == "" ]];then - fix_node_ip=$fix_node_ip - one_node_ip=$one_node_ip -else - one_node_ip=`find $conf_path -name "server_*_0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'` +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 fi #新增适配集群环境变量 @@ -180,6 +183,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -206,7 +210,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh index 9c2c2c93e845d58ae3231e4c5e29912d3105650f..391a49b325f06c75520d10d59c851e2b28d1add1 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_1p.sh @@ -29,7 +29,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +70,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +92,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -124,6 +133,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -153,8 +163,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh index c4f5e022fecfac7b8410c277063a97a119232e33..35514b6a3f31a51089dca4399968d5c1c8630191 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_32p.sh @@ -30,6 +30,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -98,6 +104,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + if [[ $conf_path == "" ]];then fix_node_ip=$fix_node_ip one_node_ip=$one_node_ip @@ -179,7 +189,8 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ - --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --precision_mode ${precision_mode} \ + --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -205,8 +216,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh index 2feec0fffa6bec2cdca5a1cc572d271a29409611..9f786a11e8a4f8235a04b76c808da39b961d71f7 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_64p.sh @@ -30,6 +30,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -69,6 +71,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -101,6 +107,11 @@ do one_node_ip=`echo ${para#*=}` fi done + +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + linux_num=$servers_num if [[ $conf_path == "" ]];then fix_node_ip=$fix_node_ip @@ -184,6 +195,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -210,7 +222,14 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi + ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh index 172348baafb4b47af7c0ecfa7b3746b38ce97848..836cdaba05e432934116c76af6ffec3381617829 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs256_hw192_8p.sh @@ -31,7 +31,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -71,6 +72,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -94,6 +99,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #8p训练必须参数(本机IP) one_node_ip=$one_node_ip #新增适配集群环境变量 @@ -159,6 +168,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -188,7 +198,14 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p_hw192'_'perf' +fi + ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh index 6c08021b499cdd6fcc6dbfefd9397ce13e0833a1..d634867ff922b812ee4a6623116f0050d96b1044 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh @@ -29,14 +29,14 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False data_dump_step="10" profiling=False autotune=False - # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_1p.sh " @@ -69,6 +69,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +91,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -124,6 +132,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -153,8 +162,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh index ed52acfb696c0bd2e8fdcf51c8dfb7d863b5954f..8c86f46b6b6182602fe9234b9b79cf8afb8fa1a7 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_8p.sh @@ -31,7 +31,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -71,6 +72,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -94,6 +99,11 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + + #8p训练必须参数(本机IP) one_node_ip=$one_node_ip #新增适配集群环境变量 @@ -159,6 +169,7 @@ do --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait @@ -188,8 +199,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZES}'p'_'perf' - +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi ##获取性能数据 #吞吐量,不需要修改 ActualFPS=${FPS} diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh index db6e36144e648744a19ddd5986813d7a543350ce..47b887ef391929908ce600bac0496e0953b71dbb 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_distribute.sh @@ -27,7 +27,8 @@ train_steps=2000 learning_rate= #维测参数,precision_mode需要模型审视修改 -#precision_mode="allow_mix_precision" +precision_mode="must_keep_origin_dtype" +fp32="--fp32" #维持参数,以下不需要修改 over_dump=False data_dump_flag=False @@ -67,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -88,6 +93,9 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -139,6 +147,7 @@ nohup ${bind_core} python3.7 ${cur_path}/../src/mains/res50.py --config_file=res --iterations_per_loop=100 \ --debug=True \ --eval=False \ + --precision_mode ${precision_mode} \ --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} >> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait @@ -167,7 +176,14 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'hf32'_'perf' +else + CaseName=${Network}${name_bind}_bs${BatchSize}_${RANK_SIZE}'p_hw192'_'perf' +fi + ##获取性能数据 #吞吐量,不需要修改 diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py index 0a6ff2c38163c4e6a7a4d2b0941e3597f15dfc11..e398c2c71f3c9ae8f7867811bca550dea04b1e0f 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/run_squad.py @@ -45,6 +45,7 @@ import tensorflow as tf from npu_bridge.estimator.npu.npu_config import NPURunConfig from npu_bridge.estimator import npu_ops from npu_bridge.estimator.npu.npu_estimator import NPUEstimator,NPUEstimatorSpec +import torch_npu flags = tf.flags @@ -77,6 +78,7 @@ flags.DEFINE_string( flags.DEFINE_string( "init_checkpoint", None, "Initial checkpoint (usually from a pre-trained BERT model).") +flags.DEFINE_string("precision_mode", "must_keep_origin_dtype", 'precision_mode') flags.DEFINE_bool( "do_lower_case", True, @@ -1151,6 +1153,10 @@ def validate_flags_or_throw(bert_config): def main(_): + if FLAGS.precision_mode == "allow_mix_precision": + option = {} + option["ACL_PRECISION_MODE"] = "allow_mix_precision" + torch_npu.npu.set_option(option) tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh index 1d3f7fdfcb0e2df6d5da4fb7fcab4cfd591e4e91..9bdde07c698faa76164a6d5b2c907d2394df36a2 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_full_1p.sh @@ -32,6 +32,10 @@ data_dump_step="10" profiling=False autotune=False +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" + # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_8p.sh " @@ -64,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -145,6 +157,7 @@ do --max_seq_length=384 \ --doc_stride=128 \ --output_dir=./output \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -172,7 +185,13 @@ echo "E2E Training Duration sec : $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'acc' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'acc' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' +fi #获取性能数据 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'` diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh index 6508764ec9ef516ffd90f5f1dd194033678a2ab4..8fa84763e4230463fa02e07538e8f92665c3660e 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh @@ -32,6 +32,10 @@ data_dump_step="10" profiling=False autotune=False +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" + # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_8p.sh " @@ -64,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -144,6 +156,7 @@ do --max_seq_length=384 \ --doc_stride=128 \ --output_dir=./output \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -166,7 +179,13 @@ echo "E2E training Duration sec: $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi #获取性能数据 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'` diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh index cc7d8bf9a1ee2b2070dfc69151adf382a1b1e220..b0c9ebddc8816692ec837b1773beaf801154e111 100644 --- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh +++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_8p.sh @@ -32,6 +32,10 @@ data_dump_step="10" profiling=False autotune=False +#维测参数,precision_mode需要模型审视修改 +precision_mode="must_keep_origin_dtype" +fp32="--fp32" + # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then echo"usage:./train_full_8p.sh " @@ -64,6 +68,10 @@ do mkdir -p ${data_dump_path} elif [[ $para == --data_dump_step* ]];then data_dump_step=`echo ${para#*=}` + elif [[ $para == --hf32 ]];then + hf32=`echo ${para#*=}` + elif [[ $para == --fp32 ]];then + fp32=`echo ${para#*=}` elif [[ $para == --profiling* ]];then profiling=`echo ${para#*=}` profiling_dump_path=${cur_path}/output/profiling @@ -87,6 +95,10 @@ do fi done +if [[ ${fp32} == "--hf32" ]];then + export ENABLE_HF32_EXECUTION=1 +fi + #校验是否传入data_path,不需要修改 if [[ $data_path == "" ]];then echo "[Error] para \"data_path\" must be confing" @@ -146,6 +158,7 @@ do --max_seq_length=384 \ --doc_stride=128 \ --output_dir=./output \ + --precision_mode ${precision_mode} \ > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & #--precision_mode=${precision_mode} \ #--data_dump_flag=${data_dump_flag} \ @@ -168,7 +181,14 @@ echo "E2E training Duration sec: $e2e_time" #训练用例信息,不需要修改 BatchSize=${batch_size} DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +if [[ ${fp32} == "--fp32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'fp32'_'perf' +elif [[ ${hf32} == "--hf32" ]];then + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'hf32'_'perf' +else + CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' +fi + #获取性能数据 fps=`grep "global_step/sec:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'global_step/sec:' '{print $2}'|awk 'END {print $1}'` temp0=`echo "scale=2;${fps} * ${batch_size}"|bc`