From 33a656a749b04e450046bb08c16fd1a63affbd36 Mon Sep 17 00:00:00 2001 From: Ryan Date: Wed, 6 Apr 2022 08:06:09 +0000 Subject: [PATCH 01/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../run_squad.py | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index a5203e848d..ea57841c7a 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -894,6 +894,10 @@ def main(): default=None, type=str, help="addr used for distributed training") + # 图模式 + parser.add_argument('--graph_mode', + action='store_true', + help='whether to enable graph mode.') args = parser.parse_args() args.fp16 = args.fp16 or args.amp @@ -1015,13 +1019,13 @@ def main(): # except ImportError: # raise ImportError( # "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - # optimizer = NpuFusedAdam(optimizer_grouped_parameters, - # lr=args.learning_rate) + optimizer = NpuFusedAdam(optimizer_grouped_parameters, + lr=args.learning_rate) - optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, - lr=args.learning_rate, - warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) + # optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, + # lr=args.learning_rate, + # warmup=args.warmup_proportion, + # t_total=num_train_optimization_steps) if args.loss_scale == 0: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, @@ -1108,6 +1112,10 @@ def main(): train_iter = train_dataloader step_start_time = time.time() for step, batch in enumerate(train_iter): + # 图模式 + if args.graph_mode: + print("graph mode on") + torch.npu.enable_graph_mode() # Terminate early for benchmarking data_time = time.time() - step_start_time if args.max_steps > 0 and global_step > args.max_steps: @@ -1141,7 +1149,7 @@ def main(): else: loss.backward() - + if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16 : # modify learning rate with special warm up for BERT which FusedAdam doesn't do @@ -1149,8 +1157,14 @@ def main(): optimizer.step() optimizer.zero_grad() global_step += 1 - - final_loss = loss.item() + # 图模式 + if args.graph_mode: + print("graph mode launch") + torch.npu.launch_graph() + if step == max_steps: + print("graph mode synchronize") + torch.npu.synchronize() + final_loss = 0.0 #loss.item() step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, @@ -1162,7 +1176,9 @@ def main(): "step_loss": round(final_loss, 4), "learning_rate": round(optimizer.param_groups[0]['lr'], 10)}) step_start_time = time.time() - + # 图模式 + if args.graph_mode: + torch.npu.disable_graph_mode() time_to_train = time.time() - train_start if args.do_train and is_main_process() and not args.skip_checkpoint: @@ -1276,6 +1292,7 @@ if __name__ == "__main__": option = {} option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" + option["MM_BMM_ND_ENABLE"] = "enable" torch.npu.set_option(option) main() dllogger.flush() -- Gitee From e4c0ab75322685b7d7c760137936dd3b9acde492 Mon Sep 17 00:00:00 2001 From: "rrrr.cao@hotmail.com" Date: Wed, 6 Apr 2022 16:43:30 +0800 Subject: [PATCH 02/35] add Bert-Squad graph mode --- .../train_ID3078_Bert-Squad_performance_1p.sh | 188 ++++++++++++++++++ .../train_ID3078_Bert-Squad_performance_8p.sh | 188 ++++++++++++++++++ .../test/train_performance_1p.sh | 1 - .../test/train_performance_8p.sh | 1 - 4 files changed, 376 insertions(+), 2 deletions(-) create mode 100644 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh create mode 100644 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh new file mode 100644 index 0000000000..7c87c61096 --- /dev/null +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh @@ -0,0 +1,188 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export BMMV2_ENABLE=1 +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" +ckpt_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Bert-Squad_ID3078_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=32 +#训练step +train_steps= +#学习率 +learning_rate=6e-5 + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3.7 run_squad.py \ + --init_checkpoint ${ckpt_path}/bert_large_pretrained_amp.pt \ + --bert_model bert-large-uncased \ + --do_train \ + --train_file ${data_path}/train-v1.1-min.json \ + --train_batch_size ${batch_size} \ + --do_predict \ + --predict_batch_size ${batch_size} \ + --predict_file ${data_path}/dev-v1.1.json \ + --learning_rate ${learning_rate} \ + --num_train_epochs ${train_epochs} \ + --seed 1 \ + --fp16 \ + --max_steps 100 \ + --use_npu \ + --loss_scale 4096 \ + --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \ + --do_eval \ + --eval_script ${data_path}/evaluate-v1.1.py \ + --npu_id ${ASCEND_DEVICE_ID} \ + --do_lower_case \ + --output_dir ${cur_path}/../results \ + --config_file bert_config.json \ + --graph_mode \ + --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` + +FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep 'F1 : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $10}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +rm -rf ${data_path}/train-v1.1-min.json_bert-large-uncased_384_128_64 +export BMMV2_ENABLE=0 \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh new file mode 100644 index 0000000000..2b52006c63 --- /dev/null +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh @@ -0,0 +1,188 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +export BMMV2_ENABLE=1 +export RANK_SIZE=8 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" +ckpt_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Bert-Squad_ID3078_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=32 +#训练step +train_steps= +#学习率 +learning_rate=2e-4 + + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_fp32_to_fp16" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + export ASCEND_DEVICE_ID=$RANK_ID + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3.7 run_squad.py \ + --init_checkpoint ${ckpt_path}/bert_large_pretrained_amp.pt \ + --bert_model bert-large-uncased \ + --do_train \ + --train_file ${data_path}/train-v1.1-min.json \ + --train_batch_size ${batch_size} \ + --do_predict \ + --predict_batch_size ${batch_size} \ + --predict_file ${data_path}/dev-v1.1.json \ + --learning_rate ${learning_rate} \ + --num_train_epochs ${train_epochs} \ + --seed 1 \ + --fp16 \ + --max_steps 100 \ + --use_npu \ + --loss_scale 4096 \ + --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \ + --do_eval \ + --eval_script ${data_path}/evaluate-v1.1.py \ + --npu_id ${ASCEND_DEVICE_ID} \ + --do_lower_case \ + --output_dir ${cur_path}/../results \ + --config_file bert_config.json \ + --num_npu 8 \ + --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` + +FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'` + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep 'F1 : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $10}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +rm -rf ${data_path}/train-v1.1-min.json_bert-large-uncased_384_128_64 +export BMMV2_ENABLE=0 \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh index 1ab48782e3..04e874cc97 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh @@ -83,7 +83,6 @@ if [[ $data_path == "" ]];then exit 1 fi -cp run_squad.py $cur_path/../ #训练开始时间,不需要修改 start_time=$(date +%s) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh index 5315e72b6a..37195fb612 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh @@ -83,7 +83,6 @@ if [[ $data_path == "" ]];then exit 1 fi -cp run_squad.py $cur_path/../ #训练开始时间,不需要修改 start_time=$(date +%s) -- Gitee From 00be2a3f0d84d7a06cd822a942f2e1208fd85da9 Mon Sep 17 00:00:00 2001 From: Ryan Date: Wed, 6 Apr 2022 08:45:05 +0000 Subject: [PATCH 03/35] update train_ID3078_Bert-Squad_performance_8p.sh. --- .../test/train_ID3078_Bert-Squad_performance_8p.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh index 2b52006c63..7bf5ef040c 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh @@ -132,6 +132,7 @@ do --output_dir ${cur_path}/../results \ --config_file bert_config.json \ --num_npu 8 \ + --graph_mode \ --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait -- Gitee From 025160ffaa4d37c0242a404bc92ccb22503c5759 Mon Sep 17 00:00:00 2001 From: Ryan Date: Wed, 6 Apr 2022 09:07:08 +0000 Subject: [PATCH 04/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../run_squad.py | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index ea57841c7a..b72933c2a3 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1014,18 +1014,20 @@ def main(): ] if args.do_train: if args.fp16: - # try: - # from apex.optimizers import NpuFusedAdam - # except ImportError: - # raise ImportError( - # "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - optimizer = NpuFusedAdam(optimizer_grouped_parameters, - lr=args.learning_rate) - - # optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, - # lr=args.learning_rate, - # warmup=args.warmup_proportion, - # t_total=num_train_optimization_steps) + try: + from apex.optimizers import NpuFusedAdam + except ImportError: + raise ImportError( + "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") + # 图模式 + if args.graph_mode: + optimizer = NpuFusedAdam(optimizer_grouped_parameters, + lr=args.learning_rate) + else: + optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, + lr=args.learning_rate, + warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) if args.loss_scale == 0: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, @@ -1164,7 +1166,11 @@ def main(): if step == max_steps: print("graph mode synchronize") torch.npu.synchronize() - final_loss = 0.0 #loss.item() + # 图模式 + if args.graph_mode: + final_loss = 0.0 + else: + loss.item() step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, @@ -1178,6 +1184,7 @@ def main(): step_start_time = time.time() # 图模式 if args.graph_mode: + print("graph mode off") torch.npu.disable_graph_mode() time_to_train = time.time() - train_start -- Gitee From 6af3d17b4ac1b28e74894f1173f1e0a9cea3d060 Mon Sep 17 00:00:00 2001 From: Ryan Date: Wed, 6 Apr 2022 09:19:49 +0000 Subject: [PATCH 05/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index b72933c2a3..21e87064d9 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1163,14 +1163,14 @@ def main(): if args.graph_mode: print("graph mode launch") torch.npu.launch_graph() - if step == max_steps: + if step == args.max_steps: print("graph mode synchronize") torch.npu.synchronize() # 图模式 if args.graph_mode: final_loss = 0.0 else: - loss.item() + final_loss = loss.item() step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, -- Gitee From 37c97a2783b6347164dadddf3856ce4ef5820934 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 01:33:55 +0000 Subject: [PATCH 06/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../run_squad.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 21e87064d9..19ebd9eabf 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1014,20 +1014,20 @@ def main(): ] if args.do_train: if args.fp16: - try: - from apex.optimizers import NpuFusedAdam - except ImportError: - raise ImportError( - "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - # 图模式 - if args.graph_mode: - optimizer = NpuFusedAdam(optimizer_grouped_parameters, - lr=args.learning_rate) - else: - optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, - lr=args.learning_rate, - warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) + # try: + # from apex.optimizers import NpuFusedAdam + # except ImportError: + # raise ImportError( + # "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") + # # 图模式 + # if args.graph_mode: + # optimizer = NpuFusedAdam(optimizer_grouped_parameters, + # lr=args.learning_rate) + # else: + optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, + lr=args.learning_rate, + warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) if args.loss_scale == 0: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, -- Gitee From 48b4c7c84495f300ea3822f659fc8ce3f54a6722 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 01:51:23 +0000 Subject: [PATCH 07/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 19ebd9eabf..d513ceaadb 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1031,8 +1031,8 @@ def main(): if args.loss_scale == 0: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, - loss_scale="dynamic", combine_grad=True, - combine_ddp=True if args.local_rank != -1 else False) + loss_scale="dynamic", combine_grad=True)#, + # combine_ddp=True if args.local_rank != -1 else False) else: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, loss_scale=args.loss_scale, combine_grad=True, -- Gitee From 87f4e6756ac16e4dc60f499e8abd3d17de3e0a20 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 02:14:47 +0000 Subject: [PATCH 08/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index d513ceaadb..6834fde9dc 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1163,9 +1163,9 @@ def main(): if args.graph_mode: print("graph mode launch") torch.npu.launch_graph() - if step == args.max_steps: - print("graph mode synchronize") - torch.npu.synchronize() + # if step == args.max_steps: + # print("graph mode synchronize") + # torch.npu.synchronize() # 图模式 if args.graph_mode: final_loss = 0.0 -- Gitee From 773ac2e100d3c41af75628e789be8a8ad58f65b4 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 03:49:19 +0000 Subject: [PATCH 09/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../run_squad.py | 148 +++++++++--------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 6834fde9dc..3c8f3ca17a 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1014,25 +1014,25 @@ def main(): ] if args.do_train: if args.fp16: - # try: - # from apex.optimizers import NpuFusedAdam - # except ImportError: - # raise ImportError( - # "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") - # # 图模式 - # if args.graph_mode: - # optimizer = NpuFusedAdam(optimizer_grouped_parameters, - # lr=args.learning_rate) - # else: - optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, - lr=args.learning_rate, - warmup=args.warmup_proportion, - t_total=num_train_optimization_steps) + try: + from apex.optimizers import NpuFusedAdam + except ImportError: + raise ImportError( + "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") + # 图模式 + if args.graph_mode: + optimizer = NpuFusedAdam(optimizer_grouped_parameters, + lr=args.learning_rate) + else: + optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters, + lr=args.learning_rate, + warmup=args.warmup_proportion, + t_total=num_train_optimization_steps) if args.loss_scale == 0: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, - loss_scale="dynamic", combine_grad=True)#, - # combine_ddp=True if args.local_rank != -1 else False) + loss_scale="dynamic", combine_grad=True, + combine_ddp=True if args.local_rank != -1 else False) else: model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False, loss_scale=args.loss_scale, combine_grad=True, @@ -1114,63 +1114,65 @@ def main(): train_iter = train_dataloader step_start_time = time.time() for step, batch in enumerate(train_iter): - # 图模式 - if args.graph_mode: - print("graph mode on") - torch.npu.enable_graph_mode() - # Terminate early for benchmarking - data_time = time.time() - step_start_time - if args.max_steps > 0 and global_step > args.max_steps: - break - - if n_npu == 1: - batch = tuple(t.to(device, non_blocking=True) for t in batch) # multi-gpu does scattering it-self - input_ids, input_mask, segment_ids, start_positions, end_positions = batch - start_logits, end_logits = model(input_ids, segment_ids, input_mask) - # If we are on multi-GPU, split add a dimension - if len(start_positions.size()) > 1: - start_positions = start_positions.squeeze(-1) - if len(end_positions.size()) > 1: - end_positions = end_positions.squeeze(-1) - # sometimes the start/end positions are outside our model inputs, we ignore these terms - ignored_index = start_logits.size(1) - start_positions.clamp_(0, ignored_index) - end_positions.clamp_(0, ignored_index) - - loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index) - start_loss = loss_fct(start_logits, start_positions) - end_loss = loss_fct(end_logits, end_positions) - loss = (start_loss + end_loss) / 2 - if n_npu > 1: - loss = loss.mean() # mean() to average on multi-gpu. - if args.gradient_accumulation_steps > 1: - loss = loss / args.gradient_accumulation_steps - if args.fp16: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - - - if (step + 1) % args.gradient_accumulation_steps == 0: - if args.fp16 : - # modify learning rate with special warm up for BERT which FusedAdam doesn't do - scheduler.step() - optimizer.step() - optimizer.zero_grad() - global_step += 1 - # 图模式 - if args.graph_mode: - print("graph mode launch") - torch.npu.launch_graph() - # if step == args.max_steps: - # print("graph mode synchronize") - # torch.npu.synchronize() - # 图模式 - if args.graph_mode: - final_loss = 0.0 - else: - final_loss = loss.item() + with torch.autograd.profiler.profile(use_npu=False) as prof: + # 图模式 + if args.graph_mode: + print("graph mode on") + torch.npu.enable_graph_mode() + # Terminate early for benchmarking + data_time = time.time() - step_start_time + if args.max_steps > 0 and global_step > args.max_steps: + break + + if n_npu == 1: + batch = tuple(t.to(device, non_blocking=True) for t in batch) # multi-gpu does scattering it-self + input_ids, input_mask, segment_ids, start_positions, end_positions = batch + start_logits, end_logits = model(input_ids, segment_ids, input_mask) + # If we are on multi-GPU, split add a dimension + if len(start_positions.size()) > 1: + start_positions = start_positions.squeeze(-1) + if len(end_positions.size()) > 1: + end_positions = end_positions.squeeze(-1) + # sometimes the start/end positions are outside our model inputs, we ignore these terms + ignored_index = start_logits.size(1) + start_positions.clamp_(0, ignored_index) + end_positions.clamp_(0, ignored_index) + + loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index) + start_loss = loss_fct(start_logits, start_positions) + end_loss = loss_fct(end_logits, end_positions) + loss = (start_loss + end_loss) / 2 + if n_npu > 1: + loss = loss.mean() # mean() to average on multi-gpu. + if args.gradient_accumulation_steps > 1: + loss = loss / args.gradient_accumulation_steps + if args.fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + + + if (step + 1) % args.gradient_accumulation_steps == 0: + if args.fp16 : + # modify learning rate with special warm up for BERT which FusedAdam doesn't do + scheduler.step() + optimizer.step() + optimizer.zero_grad() + global_step += 1 + # 图模式 + if args.graph_mode: + print("graph mode launch") + torch.npu.launch_graph() + if step == args.max_steps: + print("graph mode synchronize") + torch.npu.synchronize() + # 图模式 + if args.graph_mode: + final_loss = 0.0 + else: + final_loss = loss.item() + prof.export_chrome_trace("./profiler_npu_"%d".json",step) step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, -- Gitee From 8eabcdb9e9f826f011974434e7efeaac6cf683fa Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 06:01:23 +0000 Subject: [PATCH 10/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 3c8f3ca17a..46d54012a5 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1172,7 +1172,7 @@ def main(): final_loss = 0.0 else: final_loss = loss.item() - prof.export_chrome_trace("./profiler_npu_"%d".json",step) + prof.export_chrome_trace("./profiler_npu_%d.json"%step) step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, -- Gitee From 709e5ac02f653c4002eb65c28d5ed9aebe55ee5c Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 06:04:00 +0000 Subject: [PATCH 11/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../pytorch_resnet50_apex.py | 184 ++++++++++++++++-- 1 file changed, 165 insertions(+), 19 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index 2932965ffd..e7aa4c7acf 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -364,6 +364,7 @@ def main_worker(gpu, ngpus_per_node, args): else: train_sampler = None + ## 原始loader,下面的优化后loader具有更好的性能,无论单算子还是图模式 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) @@ -377,6 +378,16 @@ def main_worker(gpu, ngpus_per_node, args): ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) + train_loader_len = len(train_loader) + + # 图模式 + if args.graph_mode: + train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data, + args.batch_size, + workers=args.workers, + fp16=True) + val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False) + if args.evaluate: validate(val_loader, model, criterion, args) @@ -387,7 +398,7 @@ def main_worker(gpu, ngpus_per_node, args): train_sampler.set_epoch(epoch) lr_policy(optimizer, 0, epoch) # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args) + train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) @@ -405,7 +416,7 @@ def main_worker(gpu, ngpus_per_node, args): }, is_best, args, file_name) modeltmp.to(CALCULATE_DEVICE) -def train(train_loader, model, criterion, optimizer, epoch, args): +def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args): if args.optimizer_batch_size < 0: batch_size_multiplier = 1 else: @@ -423,7 +434,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args): top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( - len(train_loader), + train_loader_len, [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) @@ -446,26 +457,26 @@ def train(train_loader, model, criterion, optimizer, epoch, args): images = images.cuda(args.gpu, non_blocking=True) images = images.to(CALCULATE_DEVICE, non_blocking=True) - if args.label_smoothing == 0.0: + # 图模式 - if args.graph_mode: - print("args.graph_mode") - target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) - else: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + if args.graph_mode: + print("args.graph_mode") + target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) + else : + if args.label_smoothing == 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + # compute output output = model(images) loss = criterion(output, target) - if args.label_smoothing > 0.0: # 图模式 - if args.graph_mode: - print("args.graph_mode") - target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) - else: + if args.graph_mode: + pass + else: + if args.label_smoothing > 0.0: target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - # measure accuracy and record loss # 图模式 @@ -533,15 +544,26 @@ def validate(val_loader, model, criterion, args): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) images = images.to(CALCULATE_DEVICE, non_blocking=True) - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + + + # 图模式 + if args.graph_mode: + print("args.graph_mode") + target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) + else : + if args.label_smoothing == 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) + # 图模式 + if args.graph_mode: + pass + else: + if args.label_smoothing > 0.0: + target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) @@ -692,5 +714,129 @@ def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None): return lr_policy(_lr_fn, logger=logger) + +def fast_collate(batch): + imgs = [img[0] for img in batch] + targets = torch.tensor([target[1] for target in batch], dtype=torch.int64) + w = imgs[0].size[0] + h = imgs[0].size[1] + tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8) + for i, img in enumerate(imgs): + nump_array = np.asarray(img, dtype=np.uint8) + if (nump_array.ndim < 3): + nump_array = np.expand_dims(nump_array, axis=-1) + nump_array = np.rollaxis(nump_array, 2) + + tensor[i] += torch.from_numpy(nump_array) + + return tensor, targets + + +class PrefetchedWrapper(object): + def prefetched_loader(loader, fp16): + mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1) + std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1) + if fp16: + mean = mean.half() + std = std.half() + + stream = torch.npu.Stream() + first = True + + a = torch.empty([2,3]) + for next_input, next_target in loader: + with torch.npu.stream(stream): + b = a*2 + next_input = next_input.npu(non_blocking=True) + next_target = next_target.npu(non_blocking=True) + b = a/2 + if fp16: + next_input = next_input.half() + + else: + next_input = next_input.float() + + next_input = next_input.sub_(mean).div_(std) + + + if not first: + yield input, target + else: + first = False + + b = a*3 + torch.npu.current_stream().wait_stream(stream) + input = next_input + target = next_target + b = a/2 + yield input, target + + def __init__(self, dataloader, fp16): + self.dataloader = dataloader + self.fp16 = fp16 + self.epoch = 0 + + def __iter__(self): + if (self.dataloader.sampler is not None and + isinstance(self.dataloader.sampler, + torch.utils.data.distributed.DistributedSampler)): + self.dataloader.sampler.set_epoch(self.epoch) + self.epoch += 1 + + start = time.time() + ret = PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) + end = time.time() + print("prefetch time{}".format(end - start)) + + return ret + # return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) + + +def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): + traindir = os.path.join(data_path, 'train') + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + ])) + + if torch.distributed.is_initialized(): + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), + num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, + collate_fn=fast_collate, drop_last=True) + + return PrefetchedWrapper(train_loader, fp16), len(train_loader), train_sampler + + +def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False): + valdir = os.path.join(data_path, 'val') + val_dataset = datasets.ImageFolder( + valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + ])) + + if distributed: + val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) + else: + val_sampler = None + + dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader + val_loader = dataloader_fn( + val_dataset, + sampler=val_sampler, + batch_size=batch_size, shuffle=(val_sampler is None), + num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate) + + return val_loader + + + if __name__ == '__main__': main() -- Gitee From 2c6fe4d31cf77147e1edc0dd79ee11260436a9e2 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 06:13:59 +0000 Subject: [PATCH 12/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index e7aa4c7acf..a593c3ed1f 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -36,6 +36,7 @@ import torchvision.datasets as datasets import torchvision.models as models import torch.npu import DistributedResnet50.image_classification.resnet as nvmodels +from DistributedResnet50.image_classification.multi_epochs_dataloader import MultiEpochsDataLoader from apex import amp BATCH_SIZE = 512 -- Gitee From 93df5045d5ccada580ac0e349272e76fa6459e03 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 07:17:10 +0000 Subject: [PATCH 13/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh. --- .../ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh index f2f584cd46..ac7ac7724d 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh @@ -88,7 +88,7 @@ python3.7 ./pytorch_resnet50_apex.py \ -b ${batch_size} \ --lr 0.2 \ --warmup 5 \ - --label-smoothing=0.0 \ + --label-smoothing=0.1 \ --epochs ${train_epochs} \ --graph_mode \ --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & -- Gitee From a21a9225d184f71c86d7b0243ea732cb4ff0bfa7 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 07:27:01 +0000 Subject: [PATCH 14/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 1 + 1 file changed, 1 insertion(+) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index a593c3ed1f..7b7ac70c4f 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -508,6 +508,7 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar torch.npu.synchronize() # measure elapsed time + print("++++++++++++++++",time.time() - end) batch_time.update(time.time() - end) end = time.time() -- Gitee From bd2f7279f80c7729b14b5d7a0340377eda5c0ced Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 07:36:13 +0000 Subject: [PATCH 15/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index 7b7ac70c4f..bafada2920 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -502,8 +502,9 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar # 图模式 if args.graph_mode: - print("args.graph_mode") + print("torch.npu.launch_graph()") torch.npu.launch_graph() + print("launch end") if i == 100: torch.npu.synchronize() -- Gitee From 720c2531946247006f7e2af56e14c02dfa98263c Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 08:18:34 +0000 Subject: [PATCH 16/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index bafada2920..4590f6cb9c 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -472,9 +472,7 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar loss = criterion(output, target) # 图模式 - if args.graph_mode: - pass - else: + if not args.graph_mode: if args.label_smoothing > 0.0: target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) -- Gitee From 3da84997c55b2d65ca39788097e027333d702653 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 08:35:29 +0000 Subject: [PATCH 17/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index 4590f6cb9c..2c778f8fe7 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -560,9 +560,7 @@ def validate(val_loader, model, criterion, args): loss = criterion(output, target) # 图模式 - if args.graph_mode: - pass - else: + if not args.graph_mode: if args.label_smoothing > 0.0: target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) -- Gitee From c45a57fcfd73520c68fde92d46c0f902a059dccc Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 08:36:06 +0000 Subject: [PATCH 18/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index 2c778f8fe7..a3df61263a 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -500,14 +500,11 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar # 图模式 if args.graph_mode: - print("torch.npu.launch_graph()") torch.npu.launch_graph() - print("launch end") if i == 100: torch.npu.synchronize() # measure elapsed time - print("++++++++++++++++",time.time() - end) batch_time.update(time.time() - end) end = time.time() -- Gitee From a8632d07b490a38284d78d18e39f20bdf7d6bc2a Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 08:37:03 +0000 Subject: [PATCH 19/35] update main_apex_d76_npu.py. --- .../DistributedResnet50/main_apex_d76_npu.py | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py index 26edd676ce..20e5970023 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py @@ -588,9 +588,16 @@ def main_worker(gpu, ngpus_per_node, args): cudnn.benchmark = True # Data loading code - train_loader, train_loader_len, sampler = get_pytorch_train_loader(args.data, args.batch_size, - workers=args.workers, distributed=args.distributed) - + # 图模式 + if args.graph_mode: + train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data, + args.batch_size, + workers=args.workers, + fp16=True) + else: + train_loader, train_loader_len, sampler = get_pytorch_train_loader(args.data, args.batch_size, + workers=args.workers, distributed=args.distributed) + val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False) if args.evaluate: @@ -916,6 +923,27 @@ def fast_collate(batch): tensor[i] += torch.from_numpy(nump_array) return tensor, targets + +def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): + traindir = os.path.join(data_path, 'train') + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + ])) + + if torch.distributed.is_initialized(): + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), + num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, + collate_fn=fast_collate, drop_last=True) + + return PrefetchedWrapper(train_loader, fp16), len(train_loader), train_sampler def get_pytorch_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False): traindir = os.path.join(data_path, 'train') -- Gitee From b6ba196d54984f702aa9411fedba19abb606c829 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 08:58:26 +0000 Subject: [PATCH 20/35] update PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py. --- .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py index a3df61263a..6e9f8fb66a 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py @@ -739,13 +739,10 @@ class PrefetchedWrapper(object): stream = torch.npu.Stream() first = True - a = torch.empty([2,3]) for next_input, next_target in loader: with torch.npu.stream(stream): - b = a*2 next_input = next_input.npu(non_blocking=True) next_target = next_target.npu(non_blocking=True) - b = a/2 if fp16: next_input = next_input.half() @@ -760,11 +757,9 @@ class PrefetchedWrapper(object): else: first = False - b = a*3 torch.npu.current_stream().wait_stream(stream) input = next_input target = next_target - b = a/2 yield input, target def __init__(self, dataloader, fp16): @@ -778,14 +773,8 @@ class PrefetchedWrapper(object): torch.utils.data.distributed.DistributedSampler)): self.dataloader.sampler.set_epoch(self.epoch) self.epoch += 1 - - start = time.time() ret = PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) - end = time.time() - print("prefetch time{}".format(end - start)) - - return ret - # return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) + return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): -- Gitee From 8fd47e4d0078bb51feef802aa16471d1b1ec1a74 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 09:08:59 +0000 Subject: [PATCH 21/35] update train_ID3071_performance_1p.sh. --- .../ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh index ac7ac7724d..2492708ca6 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh @@ -5,7 +5,7 @@ # 网络名称,同目录名称 Network="ResNet50_ID3071_for_PyTorch" # 训练batch_size -batch_size=512 +batch_size=256 # 训练使用的npu卡数 export RANK_SIZE=1 # 数据集路径,保持为空,不需要修改 @@ -91,7 +91,7 @@ python3.7 ./pytorch_resnet50_apex.py \ --label-smoothing=0.1 \ --epochs ${train_epochs} \ --graph_mode \ - --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & + --optimizer-batch-size 256 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & wait -- Gitee From 1bae1f8bc5ad1ab31c643b8f7366f1f5ef02006a Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 09:11:05 +0000 Subject: [PATCH 22/35] update main_apex_d76_npu.py. --- .../DistributedResnet50/main_apex_d76_npu.py | 45 ++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py index 20e5970023..1c982afe0d 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py @@ -923,7 +923,50 @@ def fast_collate(batch): tensor[i] += torch.from_numpy(nump_array) return tensor, targets - +class PrefetchedWrapper(object): + def prefetched_loader(loader, fp16): + mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1) + std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1) + if fp16: + mean = mean.half() + std = std.half() + + stream = torch.npu.Stream() + first = True + + for next_input, next_target in loader: + with torch.npu.stream(stream): + next_input = next_input.npu(non_blocking=True) + next_target = next_target.npu(non_blocking=True) + if fp16: + next_input = next_input.half() + else: + next_input = next_input.float( + + next_input = next_input.sub_(mean).div_(std) + if not first: + yield input, target + else: + first = False + + torch.npu.current_stream().wait_stream(stream) + input = next_input + target = next_target + yield input, target + + def __init__(self, dataloader, fp16): + self.dataloader = dataloader + self.fp16 = fp16 + self.epoch = 0 + + def __iter__(self): + if (self.dataloader.sampler is not None and + isinstance(self.dataloader.sampler, + torch.utils.data.distributed.DistributedSampler)): + self.dataloader.sampler.set_epoch(self.epoch) + self.epoch += 1 + return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) + def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): traindir = os.path.join(data_path, 'train') train_dataset = datasets.ImageFolder( -- Gitee From 1d5ede0eaef8ac2e4adb742ea8f3339dd3ba7e08 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 10:34:22 +0000 Subject: [PATCH 23/35] update main_apex_d76_npu.py. --- .../DistributedResnet50/main_apex_d76_npu.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py index 1c982afe0d..615548e3b5 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py @@ -923,6 +923,7 @@ def fast_collate(batch): tensor[i] += torch.from_numpy(nump_array) return tensor, targets + class PrefetchedWrapper(object): def prefetched_loader(loader, fp16): mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1) @@ -940,10 +941,13 @@ class PrefetchedWrapper(object): next_target = next_target.npu(non_blocking=True) if fp16: next_input = next_input.half() + else: - next_input = next_input.float( + next_input = next_input.float() next_input = next_input.sub_(mean).div_(std) + + if not first: yield input, target else: @@ -954,19 +958,6 @@ class PrefetchedWrapper(object): target = next_target yield input, target - def __init__(self, dataloader, fp16): - self.dataloader = dataloader - self.fp16 = fp16 - self.epoch = 0 - - def __iter__(self): - if (self.dataloader.sampler is not None and - isinstance(self.dataloader.sampler, - torch.utils.data.distributed.DistributedSampler)): - self.dataloader.sampler.set_epoch(self.epoch) - self.epoch += 1 - return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) - def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): traindir = os.path.join(data_path, 'train') train_dataset = datasets.ImageFolder( -- Gitee From 4d6afa1bbf3b557d5c94af944e559b3e97356b4f Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 10:36:43 +0000 Subject: [PATCH 24/35] update train_ID3078_Bert-Squad_performance_1p.sh. --- .../test/train_ID3078_Bert-Squad_performance_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh index 7c87c61096..4609c834db 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh @@ -143,7 +143,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'` -- Gitee From 67ca41d993eda6d490aa21838241d002f6573224 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 10:37:27 +0000 Subject: [PATCH 25/35] update train_ID3078_Bert-Squad_performance_8p.sh. --- .../test/train_ID3078_Bert-Squad_performance_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh index 7bf5ef040c..5efa082ada 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh @@ -144,7 +144,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'` -- Gitee From 7125dd1b464569f4618a53281d34df63d9825064 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 10:43:28 +0000 Subject: [PATCH 26/35] update main_apex_d76_npu.py. --- .../DistributedResnet50/main_apex_d76_npu.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py index 615548e3b5..3b98e7f94a 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py @@ -958,6 +958,19 @@ class PrefetchedWrapper(object): target = next_target yield input, target + def __init__(self, dataloader, fp16): + self.dataloader = dataloader + self.fp16 = fp16 + self.epoch = 0 + + def __iter__(self): + if (self.dataloader.sampler is not None and + isinstance(self.dataloader.sampler, + torch.utils.data.distributed.DistributedSampler)): + self.dataloader.sampler.set_epoch(self.epoch) + self.epoch += 1 + return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) + def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): traindir = os.path.join(data_path, 'train') train_dataset = datasets.ImageFolder( -- Gitee From 3d0088a7e2fd5c8db9de9096e04b32bff476bf5f Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 11:06:34 +0000 Subject: [PATCH 27/35] update train_ID3078_Bert-Squad_performance_8p.sh. --- .../test/train_ID3078_Bert-Squad_performance_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh index 5efa082ada..6cd8336833 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh @@ -144,7 +144,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}' | tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'` -- Gitee From 919a1f2802d254112ff97f8c9ee5db7124c51657 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 11:07:15 +0000 Subject: [PATCH 28/35] update train_ID3078_Bert-Squad_performance_1p.sh. --- .../test/train_ID3078_Bert-Squad_performance_1p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh index 4609c834db..68079e4635 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh @@ -143,7 +143,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'` -- Gitee From 5c9cc071f8d058898ee5a351855ec180b791edff Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 11:10:29 +0000 Subject: [PATCH 29/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../run_squad.py | 116 +++++++++--------- 1 file changed, 57 insertions(+), 59 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 46d54012a5..21e87064d9 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -1114,65 +1114,63 @@ def main(): train_iter = train_dataloader step_start_time = time.time() for step, batch in enumerate(train_iter): - with torch.autograd.profiler.profile(use_npu=False) as prof: - # 图模式 - if args.graph_mode: - print("graph mode on") - torch.npu.enable_graph_mode() - # Terminate early for benchmarking - data_time = time.time() - step_start_time - if args.max_steps > 0 and global_step > args.max_steps: - break - - if n_npu == 1: - batch = tuple(t.to(device, non_blocking=True) for t in batch) # multi-gpu does scattering it-self - input_ids, input_mask, segment_ids, start_positions, end_positions = batch - start_logits, end_logits = model(input_ids, segment_ids, input_mask) - # If we are on multi-GPU, split add a dimension - if len(start_positions.size()) > 1: - start_positions = start_positions.squeeze(-1) - if len(end_positions.size()) > 1: - end_positions = end_positions.squeeze(-1) - # sometimes the start/end positions are outside our model inputs, we ignore these terms - ignored_index = start_logits.size(1) - start_positions.clamp_(0, ignored_index) - end_positions.clamp_(0, ignored_index) - - loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index) - start_loss = loss_fct(start_logits, start_positions) - end_loss = loss_fct(end_logits, end_positions) - loss = (start_loss + end_loss) / 2 - if n_npu > 1: - loss = loss.mean() # mean() to average on multi-gpu. - if args.gradient_accumulation_steps > 1: - loss = loss / args.gradient_accumulation_steps - if args.fp16: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - - - if (step + 1) % args.gradient_accumulation_steps == 0: - if args.fp16 : - # modify learning rate with special warm up for BERT which FusedAdam doesn't do - scheduler.step() - optimizer.step() - optimizer.zero_grad() - global_step += 1 - # 图模式 - if args.graph_mode: - print("graph mode launch") - torch.npu.launch_graph() - if step == args.max_steps: - print("graph mode synchronize") - torch.npu.synchronize() - # 图模式 - if args.graph_mode: - final_loss = 0.0 - else: - final_loss = loss.item() - prof.export_chrome_trace("./profiler_npu_%d.json"%step) + # 图模式 + if args.graph_mode: + print("graph mode on") + torch.npu.enable_graph_mode() + # Terminate early for benchmarking + data_time = time.time() - step_start_time + if args.max_steps > 0 and global_step > args.max_steps: + break + + if n_npu == 1: + batch = tuple(t.to(device, non_blocking=True) for t in batch) # multi-gpu does scattering it-self + input_ids, input_mask, segment_ids, start_positions, end_positions = batch + start_logits, end_logits = model(input_ids, segment_ids, input_mask) + # If we are on multi-GPU, split add a dimension + if len(start_positions.size()) > 1: + start_positions = start_positions.squeeze(-1) + if len(end_positions.size()) > 1: + end_positions = end_positions.squeeze(-1) + # sometimes the start/end positions are outside our model inputs, we ignore these terms + ignored_index = start_logits.size(1) + start_positions.clamp_(0, ignored_index) + end_positions.clamp_(0, ignored_index) + + loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index) + start_loss = loss_fct(start_logits, start_positions) + end_loss = loss_fct(end_logits, end_positions) + loss = (start_loss + end_loss) / 2 + if n_npu > 1: + loss = loss.mean() # mean() to average on multi-gpu. + if args.gradient_accumulation_steps > 1: + loss = loss / args.gradient_accumulation_steps + if args.fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + + + if (step + 1) % args.gradient_accumulation_steps == 0: + if args.fp16 : + # modify learning rate with special warm up for BERT which FusedAdam doesn't do + scheduler.step() + optimizer.step() + optimizer.zero_grad() + global_step += 1 + # 图模式 + if args.graph_mode: + print("graph mode launch") + torch.npu.launch_graph() + if step == args.max_steps: + print("graph mode synchronize") + torch.npu.synchronize() + # 图模式 + if args.graph_mode: + final_loss = 0.0 + else: + final_loss = loss.item() step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, -- Gitee From 0603c4bcf0a04d10e71f3a67ac39e3d6f83df25f Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 11:12:53 +0000 Subject: [PATCH 30/35] update main_apex_d76_npu.py. --- .../DistributedResnet50/main_apex_d76_npu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py index 3b98e7f94a..ef66dc77c8 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py @@ -590,7 +590,7 @@ def main_worker(gpu, ngpus_per_node, args): # Data loading code # 图模式 if args.graph_mode: - train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data, + train_loader, train_loader_len, sampler = get_pytorch_train_loader_V2(args.data, args.batch_size, workers=args.workers, fp16=True) -- Gitee From 790328550ba93315d7900e6e4b036865e018c59b Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Apr 2022 13:59:35 +0000 Subject: [PATCH 31/35] update train_ID3071_ResNet50_performance_8p.sh. --- .../test/train_ID3071_ResNet50_performance_8p.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh index 0013d69590..8c7751bc4e 100644 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh +++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh @@ -5,7 +5,7 @@ # 网络名称,同目录名称 Network="ResNet50_ID3071_for_PyTorch" # 训练batch_size -batch_size=4096 +batch_size=512 # 训练使用的npu卡数 export RANK_SIZE=8 # 数据集路径,保持为空,不需要修改 -- Gitee From 7249a4c5bfa4c5a49ff1b01567cdecd316bfaca0 Mon Sep 17 00:00:00 2001 From: "rrrr.cao@hotmail.com" Date: Tue, 12 Apr 2022 10:13:06 +0800 Subject: [PATCH 32/35] add BertBase_ID0490_for_PyTorch graph mode --- .../BertBase_ID0490_for_PyTorch/run_squad.py | 32 ++- .../train_ID3075_BertBase_performance_1p.sh | 172 +++++++++++++++ .../train_ID3075_BertBase_performance_8p.sh | 200 ++++++++++++++++++ 3 files changed, 399 insertions(+), 5 deletions(-) create mode 100644 PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh create mode 100644 PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py index 34e9946322..91834c3f8a 100644 --- a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py +++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py @@ -897,7 +897,9 @@ def main(): default=None, type=str, help="addr used for distributed training") - + parser.add_argument('--graph_mode', + action='store_true', + help='whether to enable graph mode.') args = parser.parse_args() args.fp16 = args.fp16 or args.amp @@ -1119,12 +1121,20 @@ def main(): step_start_time = time.time() for step, batch in enumerate(train_iter): # Terminate early for benchmarking + # 图模式 + if args.graph_mode: + print("graph mode on") + torch.npu.enable_graph_mode() data_time = time.time() - step_start_time if args.max_steps > 0 and global_step > args.max_steps: break if n_npu == 1: - batch = tuple(t.to(device) for t in batch) # multi-gpu does scattering it-self + # 图模式 + if args.graph_mode: + batch = tuple(t.to(device, non_blocking=True) for t in batch) + else: + batch = tuple(t.to(device) for t in batch) # multi-gpu does scattering it-self input_ids, input_mask, segment_ids, start_positions, end_positions = batch start_logits, end_logits = model(input_ids, segment_ids, input_mask) # If we are on multi-GPU, split add a dimension @@ -1159,8 +1169,17 @@ def main(): optimizer.step() optimizer.zero_grad() global_step += 1 - - final_loss = loss.item() + # 图模式 + if args.graph_mode: + final_loss = 0 + else: + final_loss = loss.item() + # 图模式 + if args.graph_mode: + print("graph mode launch") + torch.npu.launch_graph() + if step == len(train_iter): + torch.npu.synchronize() step_time = time.time() - step_start_time if step % args.log_freq == 0: # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss, @@ -1171,7 +1190,10 @@ def main(): "step_loss": round(final_loss, 4), "iter/s": round(1 / step_time, 4), "learning_rate": round(optimizer.param_groups[0]['lr'], 10)}) step_start_time = time.time() - + # 图模式 + if args.graph_mode: + print("graph mode off") + torch.npu.disable_graph_mode() time_to_train = time.time() - train_start if args.do_train and is_main_process() and not args.skip_checkpoint: diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh new file mode 100644 index 0000000000..5be95b9f33 --- /dev/null +++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh @@ -0,0 +1,172 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 +RANK_ID_START=0 +export RANK_SIZE=1 +data_path="" +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="BertBase_ID3075_for_PyTorch" +#训练epoch +train_epochs=1 +#训练batch_size +batch_size=80 +learning_rate=8e-5 + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +export RANK=0 +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup python3 run_squad.py \ + --init_checkpoint ${data_path}/pretrained/bert_base_pretrain.pt \ + --bert_model bert-large-uncased \ + --do_train \ + --train_file ${data_path}/squad/v1.1/train-v1.1.json \ + --train_batch_size ${batch_size} \ + --do_predict \ + --predict_batch_size ${batch_size} \ + --predict_file ${data_path}/squad/v1.1/dev-v1.1.json \ + --learning_rate ${learning_rate} \ + --num_train_epochs ${train_epochs} \ + --seed 1 \ + --fp16 \ + --max_steps 100 \ + --use_npu \ + --loss_scale 4096 \ + --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \ + --do_eval \ + --eval_script ${data_path}/squad/v1.1/evaluate-v1.1.py \ + --npu_id ${ASCEND_DEVICE_ID} \ + --do_lower_case \ + --output_dir ${cur_path}/../results \ + --config_file bert_base_config.json \ + --graph_mode \ + --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + + + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +perf=`grep "step_loss" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "step_time : " '{print $2}'|awk -F " " '{print $1}'|tail -n +3|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${perf}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +#echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep "step_loss" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "step_loss : " '{print$2}'|awk -F " " '{print $1}'|sed s/[[:space:]]//g > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +rm -rf ${data_path}/squad/v1.1/train-v1.1.json_bert-large-uncased_384_128_64 diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh new file mode 100644 index 0000000000..7370cfa3c3 --- /dev/null +++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh @@ -0,0 +1,200 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +#source ../env_npu.sh + +data_path="" +#集合通信参数,不需要修改 + +export RANK_SIZE=8 + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="BertBase_ID3075_for_PyTorch" +#训练batch_size +batch_size=80 + + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --ckpt_path* ]];then + ckpt_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +init_checkpoint=${1:-"`${data_path}/pretrained/bert_base_pretrain.pt`"} +epochs=${2:-"1.0"} +batch_size=${3:-"80"} +learning_rate=${4:-"2e-4"} +precision=${5:-"fp16"} +num_npu=${6:-"8"} +seed=${7:-"1"} +squad_dir=${8:-"`${data_path}/squad/v1.1`"} +vocab_file=${9:-"data/uncased_L-24_H-1024_A-16/vocab.txt"} +OUT_DIR=${10:-"results/SQuAD"} +mode=${11:-"train eval"} +CONFIG_FILE=${12:-"bert_base_config.json"} +max_steps=${13:-"-1"} + +echo "out dir is $OUT_DIR" +mkdir -p $OUT_DIR +if [ ! -d "$OUT_DIR" ]; then + echo "ERROR: non existing $OUT_DIR" + exit 1 +fi + +use_fp16="" +if [ "$precision" = "fp16" ] ; then + echo "fp16 activated!" + use_fp16=" --fp16 " +fi + +CMD="python3.7 run_squad.py " +CMD+="--init_checkpoint=${data_path}/pretrained/bert_base_pretrain.pt " +if [ "$mode" = "train" ] ; then + CMD+="--do_train " + CMD+="--train_file=${data_path}/squad/v1.1/train-v1.1.json " + CMD+="--train_batch_size=$batch_size " +elif [ "$mode" = "eval" ] ; then + CMD+="--do_predict " + CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " + CMD+="--predict_batch_size=$batch_size " + CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py " + CMD+="--do_eval " +elif [ "$mode" = "prediction" ] ; then + CMD+="--do_predict " + CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " + CMD+="--predict_batch_size=$batch_size " +else + CMD+=" --do_train " + CMD+=" --train_file=${data_path}/squad/v1.1/train-v1.1.json " + CMD+=" --train_batch_size=$batch_size " + CMD+="--do_predict " + CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json " + CMD+="--predict_batch_size=$batch_size " + CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py " + CMD+="--do_eval " +fi + +CMD+=" --do_lower_case " +CMD+=" --bert_model=bert-large-uncased " +CMD+=" --learning_rate=$learning_rate " +CMD+=" --seed=$seed " +CMD+=" --num_train_epochs=$epochs " +CMD+=" --max_seq_length=384 " +CMD+=" --doc_stride=128 " +CMD+=" --output_dir=$OUT_DIR " +CMD+=" --vocab_file=$vocab_file " +CMD+=" --config_file=$CONFIG_FILE " +CMD+=" --max_steps=$max_steps " +CMD+=" $use_fp16" +CMD+=" --use_npu" +CMD+=" --num_npu=$num_npu" +CMD+=" --loss_scale=4096" +CMD+=" --addr=127.0.0.1" +CMD+=" --graph_mode" + +if [ $(uname -m) = "aarch64" ] +then + for i in $(seq 0 7) + do + let p_start=0+24*i + let p_end=23+24*i + export RANK=${i} + if [ -d ${cur_path}/output/${i} ];then + rm -rf ${cur_path}/output/${i} + mkdir -p ${cur_path}/output/$i + else + mkdir -p ${cur_path}/output/$i + fi + taskset -c $p_start-$p_end $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 & + done +else + for i in $(seq 0 7) + do + export RANK=${i} + if [ -d ${cur_path}/output/${i} ];then + rm -rf ${cur_path}/output/${i} + mkdir -p ${cur_path}/output/$i + else + mkdir -p ${cur_path}/output/$i + fi + $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 & + done +fi +wait + +ASCEND_DEVICE_ID=0 +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +iter=`grep 'Epoch: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "iter/s :" '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'` +FPS=`awk 'BEGIN{printf "%.2f\n",'${iter}'*8*'${batch_size}'}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +rm -rf ${data_path}/squad/v1.1/train-v1.1.json_bert-large-uncased_384_128_64 \ No newline at end of file -- Gitee From cfd0d382a4e1ebf3023f4aa0020309c9d450a440 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 12 Apr 2022 11:47:27 +0000 Subject: [PATCH 33/35] =?UTF-8?q?update=20PyTorch/built-in/nlp/Bert-Squad?= =?UTF-8?q?=5FID0470=5Ffor=5FPyTorch/run=5Fsquad.py.=20=E5=B7=B2=E5=B0=86?= =?UTF-8?q?=E5=8D=95=E7=AE=97=E5=AD=90=E6=A8=A1=E5=BC=8FMM=5FBMM=5FND=5FEN?= =?UTF-8?q?ABLE=E5=88=A0=E9=99=A4=EF=BC=9B=20=E9=AA=8C=E8=AF=81=E6=80=A7?= =?UTF-8?q?=E8=83=BD0.26s=EF=BC=8Ctaskid=EF=BC=9Adebug00602423?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 21e87064d9..026ec27dfc 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -900,7 +900,14 @@ def main(): help='whether to enable graph mode.') args = parser.parse_args() - args.fp16 = args.fp16 or args.amp + args.fp16 = args.fp16 or args.amp + + option = {} + option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" + option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" + if args.graph_mode: + option["MM_BMM_ND_ENABLE"] = "enable" + torch.npu.set_option(option) if args.local_rank == -1 or args.no_cuda: if args.use_npu: @@ -1296,10 +1303,5 @@ def main(): dllogger.log(step=tuple(), data={"exact_match": exact_match, "F1": f1}) if __name__ == "__main__": - option = {} - option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" - option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" - option["MM_BMM_ND_ENABLE"] = "enable" - torch.npu.set_option(option) main() dllogger.flush() -- Gitee From 8c44f5f12363d66f1be8e56b1d1948e178a4c4a1 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 12 Apr 2022 11:51:03 +0000 Subject: [PATCH 34/35] update PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py. --- .../nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py index 026ec27dfc..4f91a27de2 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py @@ -902,13 +902,6 @@ def main(): args = parser.parse_args() args.fp16 = args.fp16 or args.amp - option = {} - option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" - option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" - if args.graph_mode: - option["MM_BMM_ND_ENABLE"] = "enable" - torch.npu.set_option(option) - if args.local_rank == -1 or args.no_cuda: if args.use_npu: torch.npu.set_device("npu:%d" % args.npu_id) @@ -1303,5 +1296,10 @@ def main(): dllogger.log(step=tuple(), data={"exact_match": exact_match, "F1": f1}) if __name__ == "__main__": + option = {} + option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance" + option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm" + option["MM_BMM_ND_ENABLE"] = "enable" + torch.npu.set_option(option) main() dllogger.flush() -- Gitee From 938723e9bd7b0948aff88367ef5a6a146a4045d6 Mon Sep 17 00:00:00 2001 From: "rrrr.cao@hotmail.com" Date: Thu, 14 Apr 2022 11:22:45 +0800 Subject: [PATCH 35/35] add BertBase graph mode --- .../image_classification/__init__.py | 62 - .../image_classification/dataloaders.py | 208 ---- .../image_classification/logger.py | 298 ----- .../image_classification/mixup.py | 69 -- .../multi_epochs_dataloader.py | 44 - .../image_classification/resnet.py | 389 ------- .../image_classification/smoothing.py | 83 -- .../image_classification/smoothing_tocpu.py | 95 -- .../image_classification/training.py | 518 --------- .../image_classification/utils.py | 94 -- .../ResNet50_for_PyTorch/Dockerfile | 6 - .../ResNet50_for_PyTorch/LICENSE | 29 - .../ResNet50_for_PyTorch/README.md | 53 - .../ResNet50_for_PyTorch/docker_start.sh | 25 - .../ResNet50_for_PyTorch/env_npu.sh | 71 -- .../ResNet50_for_PyTorch/eval.sh | 30 - .../infer/convert/aipp_resnet50.aippconfig | 27 - .../infer/convert/pb2om.sh | 13 - .../infer/docker_start_infer.sh | 38 - .../infer/mxbase/CMakeLists.txt | 49 - .../infer/mxbase/Resnet50Classify.cpp | 261 ----- .../infer/mxbase/Resnet50Classify.h | 59 - .../mxbase/classification_task_metric.py | 174 --- .../imagenet1000_clsidx_to_labels.names | 1001 ----------------- .../infer/mxbase/main.cpp | 69 -- .../infer/sdk/Resnet50.pipeline | 75 -- .../infer/sdk/classification_task_metric.py | 175 --- .../sdk/imagenet1000_clsidx_to_labels.names | 1001 ----------------- .../ResNet50_for_PyTorch/infer/sdk/main.py | 110 -- .../infer/sdk/resnet50_aipp_pt.cfg | 3 - .../ResNet50_for_PyTorch/infer/sdk/run.sh | 36 - .../modelarts/train_start.py | 688 ----------- .../ResNet50_for_PyTorch/modelzoo_level.txt | 3 - .../ResNet50_for_PyTorch/pthtar2onx.py | 69 -- .../pytorch_resnet50_apex.py | 827 -------------- .../ResNet50_for_PyTorch/requirements.txt | 3 - .../ResNet50_for_PyTorch/run_1p.sh | 31 - .../ResNet50_for_PyTorch/run_2p.sh | 44 - .../ResNet50_for_PyTorch/run_4p.sh | 43 - .../ResNet50_for_PyTorch/run_8p.sh | 41 - .../ResNet50_for_PyTorch/test/env_npu.sh | 71 -- .../train_ID3071_ResNet50_performance_8p.sh | 140 --- .../test/train_ID3071_performance_1p.sh | 151 --- .../test/train_eval_1p.sh | 131 --- .../test/train_full_1p.sh | 141 --- .../test/train_performance_1p.sh | 148 --- .../test/train_performance_1p.sh | 2 +- .../test/train_performance_8p.sh | 2 +- 48 files changed, 2 insertions(+), 7698 deletions(-) delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py deleted file mode 100644 index ba94822187..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py deleted file mode 100644 index ee61fe073d..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION -# Copyright (c) 2017- Facebook, Inc -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import torch -import numpy as np -import torchvision.datasets as datasets -import torchvision.transforms as transforms -from PIL import Image - -DATA_BACKEND_CHOICES = ['pytorch', 'syntetic'] - -def load_jpeg_from_file(path, cuda=True, fp16=False): - img_transforms = transforms.Compose( - [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()] - ) - - img = img_transforms(Image.open(path)) - with torch.no_grad(): - # mean and std are not multiplied by 255 as they are in training script - # torch dataloader reads data into bytes whereas loading directly - # through PIL creates a tensor with floats in [0,1] range - mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) - std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) - - if cuda: - mean = mean.cuda() - std = std.cuda() - img = img.cuda() - if fp16: - mean = mean.half() - std = std.half() - img = img.half() - else: - img = img.float() - - input = img.unsqueeze(0).sub_(mean).div_(std) - - return input - -class DALIWrapper(object): - def gen_wrapper(dalipipeline, num_classes, one_hot): - for data in dalipipeline: - input = data[0]["data"] - target = torch.reshape(data[0]["label"], [-1]).cuda().long() - if one_hot: - target = expand(num_classes, torch.float, target) - yield input, target - dalipipeline.reset() - - def __init__(self, dalipipeline, num_classes, one_hot): - self.dalipipeline = dalipipeline - self.num_classes = num_classes - self.one_hot = one_hot - - def __iter__(self): - return DALIWrapper.gen_wrapper(self.dalipipeline, self.num_classes, self.one_hot) - -def fast_collate(batch): - imgs = [img[0] for img in batch] - targets = torch.tensor([target[1] for target in batch], dtype=torch.int64) - w = imgs[0].size[0] - h = imgs[0].size[1] - tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 ) - for i, img in enumerate(imgs): - nump_array = np.asarray(img, dtype=np.uint8) - if(nump_array.ndim < 3): - nump_array = np.expand_dims(nump_array, axis=-1) - nump_array = np.rollaxis(nump_array, 2) - - tensor[i] += torch.from_numpy(nump_array) - - return tensor, targets - -def expand(num_classes, dtype, tensor): - e = torch.zeros(tensor.size(0), num_classes, dtype=dtype, device=torch.device('cuda')) - e = e.scatter(1, tensor.unsqueeze(1), 1.0) - return e - -class PrefetchedWrapper(object): - def prefetched_loader(loader, num_classes, fp16, one_hot): - mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1) - std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1) - if fp16: - mean = mean.half() - std = std.half() - - stream = torch.cuda.Stream() - first = True - - for next_input, next_target in loader: - with torch.cuda.stream(stream): - next_input = next_input.cuda(non_blocking=True) - next_target = next_target.cuda(non_blocking=True) - if fp16: - next_input = next_input.half() - if one_hot: - next_target = expand(num_classes, torch.half, next_target) - else: - next_input = next_input.float() - if one_hot: - next_target = expand(num_classes, torch.float, next_target) - - next_input = next_input.sub_(mean).div_(std) - - if not first: - yield input, target - else: - first = False - - torch.cuda.current_stream().wait_stream(stream) - input = next_input - target = next_target - - yield input, target - - def __init__(self, dataloader, num_classes, fp16, one_hot): - self.dataloader = dataloader - self.fp16 = fp16 - self.epoch = 0 - self.one_hot = one_hot - self.num_classes = num_classes - - def __iter__(self): - if (self.dataloader.sampler is not None and - isinstance(self.dataloader.sampler, - torch.utils.data.distributed.DistributedSampler)): - - self.dataloader.sampler.set_epoch(self.epoch) - self.epoch += 1 - return PrefetchedWrapper.prefetched_loader(self.dataloader, self.num_classes, self.fp16, self.one_hot) - -def get_pytorch_train_loader(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False): - traindir = os.path.join(data_path, 'train') - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - ])) - - if torch.distributed.is_initialized(): - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), - num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True) - - return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(train_loader) - -def get_pytorch_val_loader(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False): - valdir = os.path.join(data_path, 'val') - val_dataset = datasets.ImageFolder( - valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - ])) - - if torch.distributed.is_initialized(): - val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) - else: - val_sampler = None - - val_loader = torch.utils.data.DataLoader( - val_dataset, - sampler=val_sampler, - batch_size=batch_size, shuffle=False, - num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, - collate_fn=fast_collate) - - return PrefetchedWrapper(val_loader, num_classes, fp16, one_hot), len(val_loader) - -class SynteticDataLoader(object): - def __init__(self, fp16, batch_size, num_classes, num_channels, height, width, one_hot): - input_data = torch.empty(batch_size, num_channels, height, width).cuda().normal_(0, 1.0) - if one_hot: - input_target = torch.empty(batch_size, num_classes).cuda() - input_target[:, 0] = 1.0 - else: - input_target = torch.randint(0, num_classes, (batch_size,)) - input_target=input_target.cuda() - if fp16: - input_data = input_data.half() - - self.input_data = input_data - self.input_target = input_target - - def __iter__(self): - while True: - yield self.input_data, self.input_target - -def get_syntetic_loader(data_path, batch_size, num_classes, one_hot, workers=None, _worker_init_fn=None, fp16=False): - return SynteticDataLoader(fp16, batch_size, 1000, 3, 224, 224, one_hot), -1 diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py deleted file mode 100644 index 5eb24a1fee..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION -# Copyright (c) 2017- Facebook, Inc -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from collections import OrderedDict -import dllogger -import numpy as np - - -def format_step(step): - if isinstance(step, str): - return step - s = "" - if len(step) > 0: - s += "Epoch: {} ".format(step[0]) - if len(step) > 1: - s += "Iteration: {} ".format(step[1]) - if len(step) > 2: - s += "Validation Iteration: {} ".format(step[2]) - if len(step) == 0: - s = "Summary:" - return s - - -PERF_METER = lambda: Meter(AverageMeter(), AverageMeter(), AverageMeter()) -LOSS_METER = lambda: Meter(AverageMeter(), AverageMeter(), MinMeter()) -ACC_METER = lambda: Meter(AverageMeter(), AverageMeter(), MaxMeter()) -LR_METER = lambda: Meter(LastMeter(), LastMeter(), LastMeter()) - -LAT_100 = lambda: Meter(QuantileMeter(1), QuantileMeter(1), QuantileMeter(1)) -LAT_99 = lambda: Meter(QuantileMeter(0.99), QuantileMeter(0.99), QuantileMeter(0.99)) -LAT_95 = lambda: Meter(QuantileMeter(0.95), QuantileMeter(0.95), QuantileMeter(0.95)) - -class Meter(object): - def __init__(self, iteration_aggregator, epoch_aggregator, run_aggregator): - self.run_aggregator = run_aggregator - self.epoch_aggregator = epoch_aggregator - self.iteration_aggregator = iteration_aggregator - - def record(self, val, n=1): - self.iteration_aggregator.record(val, n=n) - - def get_iteration(self): - v, n = self.iteration_aggregator.get_val() - return v - - def reset_iteration(self): - v, n = self.iteration_aggregator.get_data() - self.iteration_aggregator.reset() - if v is not None: - self.epoch_aggregator.record(v, n=n) - - def get_epoch(self): - v, n = self.epoch_aggregator.get_val() - return v - - def reset_epoch(self): - v, n = self.epoch_aggregator.get_data() - self.epoch_aggregator.reset() - if v is not None: - self.run_aggregator.record(v, n=n) - - def get_run(self): - v, n = self.run_aggregator.get_val() - return v - - def reset_run(self): - self.run_aggregator.reset() - - -class QuantileMeter(object): - def __init__(self, q): - self.q = q - self.reset() - - def reset(self): - self.vals = [] - self.n = 0 - - def record(self, val, n=1): - if isinstance(val, list): - self.vals += val - self.n += len(val) - else: - self.vals += [val] * n - self.n += n - - def get_val(self): - if not self.vals: - return None, self.n - return np.quantile(self.vals, self.q, interpolation='nearest'), self.n - - def get_data(self): - return self.vals, self.n - - -class MaxMeter(object): - def __init__(self): - self.reset() - - def reset(self): - self.max = None - self.n = 0 - - def record(self, val, n=1): - if self.max is None: - self.max = val - else: - self.max = max(self.max, val) - self.n = n - - def get_val(self): - return self.max, self.n - - def get_data(self): - return self.max, self.n - - -class MinMeter(object): - def __init__(self): - self.reset() - - def reset(self): - self.min = None - self.n = 0 - - def record(self, val, n=1): - if self.min is None: - self.min = val - else: - self.min = max(self.min, val) - self.n = n - - def get_val(self): - return self.min, self.n - - def get_data(self): - return self.min, self.n - - -class LastMeter(object): - def __init__(self): - self.reset() - - def reset(self): - self.last = None - self.n = 0 - - def record(self, val, n=1): - self.last = val - self.n = n - - def get_val(self): - return self.last, self.n - - def get_data(self): - return self.last, self.n - - -class AverageMeter(object): - def __init__(self): - self.reset() - - def reset(self): - self.n = 0 - self.val = 0 - - def record(self, val, n=1): - self.n += n - self.val += val * n - - def get_val(self): - if self.n == 0: - return None, 0 - return self.val / self.n, self.n - - def get_data(self): - if self.n == 0: - return None, 0 - return self.val / self.n, self.n - - -class Logger(object): - def __init__(self, print_interval, backends, verbose=False): - self.epoch = -1 - self.iteration = -1 - self.val_iteration = -1 - self.metrics = OrderedDict() - self.backends = backends - self.print_interval = print_interval - self.verbose = verbose - dllogger.init(backends) - - def log_parameter(self, data, verbosity=0): - dllogger.log(step="PARAMETER", data=data, verbosity=verbosity) - - def register_metric(self, metric_name, meter, verbosity=0, metadata={}): - if self.verbose: - print("Registering metric: {}".format(metric_name)) - self.metrics[metric_name] = {'meter': meter, 'level': verbosity} - dllogger.metadata(metric_name, metadata) - - def log_metric(self, metric_name, val, n=1): - self.metrics[metric_name]['meter'].record(val, n=n) - - def start_iteration(self, val=False): - if val: - self.val_iteration += 1 - else: - self.iteration += 1 - - def end_iteration(self, val=False): - it = self.val_iteration if val else self.iteration - if (it % self.print_interval == 0): - metrics = { - n: m - for n, m in self.metrics.items() if n.startswith('val') == val - } - step = (self.epoch, - self.iteration) if not val else (self.epoch, - self.iteration, - self.val_iteration) - - verbositys = {m['level'] for _, m in metrics.items()} - for ll in verbositys: - llm = {n: m for n, m in metrics.items() if m['level'] == ll} - - dllogger.log(step=step, - data={ - n: m['meter'].get_iteration() - for n, m in llm.items() - }, - verbosity=ll) - - for n, m in metrics.items(): - m['meter'].reset_iteration() - - dllogger.flush() - - def start_epoch(self): - self.epoch += 1 - self.iteration = 0 - self.val_iteration = 0 - - for n, m in self.metrics.items(): - m['meter'].reset_epoch() - - def end_epoch(self): - for n, m in self.metrics.items(): - m['meter'].reset_iteration() - - verbositys = {m['level'] for _, m in self.metrics.items()} - for ll in verbositys: - llm = {n: m for n, m in self.metrics.items() if m['level'] == ll} - dllogger.log(step=(self.epoch, ), - data={n: m['meter'].get_epoch() - for n, m in llm.items()}) - - def end(self): - for n, m in self.metrics.items(): - m['meter'].reset_epoch() - - verbositys = {m['level'] for _, m in self.metrics.items()} - for ll in verbositys: - llm = {n: m for n, m in self.metrics.items() if m['level'] == ll} - dllogger.log(step=tuple(), - data={n: m['meter'].get_run() - for n, m in llm.items()}) - - for n, m in self.metrics.items(): - m['meter'].reset_epoch() - - dllogger.flush() - - def iteration_generator_wrapper(self, gen, val=False): - for g in gen: - self.start_iteration(val=val) - yield g - self.end_iteration(val=val) - - def epoch_generator_wrapper(self, gen): - for g in gen: - self.start_epoch() - yield g - self.end_epoch() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py deleted file mode 100644 index ff98304306..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import numpy as np - - -def mixup(alpha, num_classes, data, target): - with torch.no_grad(): - bs = data.size(0) - c = np.random.beta(alpha, alpha) - - perm = torch.randperm(bs).cuda() - - md = c * data + (1-c) * data[perm, :] - mt = c * target + (1-c) * target[perm, :] - return md, mt - - -class MixUpWrapper(object): - def __init__(self, alpha, num_classes, dataloader): - self.alpha = alpha - self.dataloader = dataloader - self.num_classes = num_classes - - def mixup_loader(self, loader): - for input, target in loader: - i, t = mixup(self.alpha, self.num_classes, input, target) - yield i, t - - def __iter__(self): - return self.mixup_loader(self.dataloader) - - -class NLLMultiLabelSmooth(nn.Module): - def __init__(self, smoothing = 0.0): - super(NLLMultiLabelSmooth, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - def forward(self, x, target): - if self.training: - x = x.float() - target = target.float() - logprobs = torch.nn.functional.log_softmax(x, dim = -1) - - nll_loss = -logprobs * target - nll_loss = nll_loss.sum(-1) - - smooth_loss = -logprobs.mean(dim=-1) - - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - - return loss.mean() - else: - return torch.nn.functional.cross_entropy(x, target) diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py deleted file mode 100644 index 81fc0f10b8..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - -class MultiEpochsDataLoader(torch.utils.data.DataLoader): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._DataLoader__initialized = False - self.batch_sampler = _RepeatSampler(self.batch_sampler) - self._DataLoader__initialized = True - self.iterator = super().__iter__() - - def __len__(self): - return len(self.batch_sampler.sampler) - - def __iter__(self): - for _ in range(len(self)): - yield next(self.iterator) - -class _RepeatSampler(object): - """ - Sampler that repeats forever. - Args: - sampler (Sampler) - """ - - def __init__(self, sampler): - self.sampler = sampler - - def __iter__(self): - while True: - yield from iter(self.sampler) \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py deleted file mode 100644 index 5d3c2c9f53..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py +++ /dev/null @@ -1,389 +0,0 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION -# Copyright (c) 2017- Facebook, Inc -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' - -import math -import torch -import torch.nn as nn -import numpy as np - -__all__ = ['ResNet', 'build_resnet', 'resnet_versions', 'resnet_configs'] - -# ResNetBuilder {{{ - -class ResNetBuilder(object): - def __init__(self, version, config): - self.conv3x3_cardinality = 1 if 'cardinality' not in version.keys() else version['cardinality'] - self.config = config - - def conv(self, kernel_size, in_planes, out_planes, groups=1, stride=1): - conv = nn.Conv2d( - in_planes, out_planes, - kernel_size=kernel_size, groups=groups, - stride=stride, padding=int((kernel_size - 1)/2), - bias=False) - - if self.config['nonlinearity'] == 'relu': - nn.init.kaiming_normal_(conv.weight, - mode=self.config['conv_init'], - nonlinearity=self.config['nonlinearity']) - - return conv - - def conv3x3(self, in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - c = self.conv(3, in_planes, out_planes, groups=self.conv3x3_cardinality, stride=stride) - return c - - def conv1x1(self, in_planes, out_planes, stride=1): - """1x1 convolution with padding""" - c = self.conv(1, in_planes, out_planes, stride=stride) - return c - - def conv7x7(self, in_planes, out_planes, stride=1): - """7x7 convolution with padding""" - c = self.conv(7, in_planes, out_planes, stride=stride) - return c - - def conv5x5(self, in_planes, out_planes, stride=1): - """5x5 convolution with padding""" - c = self.conv(5, in_planes, out_planes, stride=stride) - return c - - def batchnorm(self, planes, last_bn=False): - bn = nn.BatchNorm2d(planes) - gamma_init_val = 0 if last_bn and self.config['last_bn_0_init'] else 1 - nn.init.constant_(bn.weight, gamma_init_val) - nn.init.constant_(bn.bias, 0) - - return bn - - def activation(self): - return self.config['activation']() - -# ResNetBuilder }}} - -# BasicBlock {{{ -class BasicBlock(nn.Module): - def __init__(self, builder, inplanes, planes, expansion, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = builder.conv3x3(inplanes, planes, stride) - self.bn1 = builder.batchnorm(planes) - self.relu = builder.activation() - self.conv2 = builder.conv3x3(planes, planes*expansion) - self.bn2 = builder.batchnorm(planes*expansion, last_bn=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - if self.bn1 is not None: - out = self.bn1(out) - - out = self.relu(out) - - out = self.conv2(out) - - if self.bn2 is not None: - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out -# BasicBlock }}} - -# SqueezeAndExcitation {{{ -class SqueezeAndExcitation(nn.Module): - def __init__(self, planes, squeeze): - super(SqueezeAndExcitation, self).__init__() - self.squeeze = nn.Linear(planes, squeeze) - self.expand = nn.Linear(squeeze, planes) - self.relu = nn.ReLU(inplace=True) - self.sigmoid = nn.Sigmoid() - - def forward(self, x): - out = torch.mean(x.view(x.size(0), x.size(1), -1), 2) - out = self.squeeze(out) - out = self.relu(out) - out = self.expand(out) - out = self.sigmoid(out) - out = out.unsqueeze(2).unsqueeze(3) - - return out - -# }}} - -# Bottleneck {{{ -class Bottleneck(nn.Module): - def __init__(self, builder, inplanes, planes, expansion, stride=1, se=False, se_squeeze=16, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = builder.conv1x1(inplanes, planes) - self.bn1 = builder.batchnorm(planes) - self.conv2 = builder.conv3x3(planes, planes, stride=stride) - self.bn2 = builder.batchnorm(planes) - self.conv3 = builder.conv1x1(planes, planes * expansion) - self.bn3 = builder.batchnorm(planes * expansion, last_bn=True) - self.relu = builder.activation() - self.downsample = downsample - self.stride = stride - self.squeeze = SqueezeAndExcitation(planes*expansion, se_squeeze) if se else None - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - if self.squeeze is None: - out += residual - else: - out = torch.addcmul(residual, 1.0, out, self.squeeze(out)) - - out = self.relu(out) - - return out - -def SEBottleneck(builder, inplanes, planes, expansion, stride=1, downsample=None): - return Bottleneck(builder, inplanes, planes, expansion, stride=stride, se=True, se_squeeze=16, downsample=downsample) -# Bottleneck }}} - -# ResNet {{{ -class ResNet(nn.Module): - def __init__(self, builder, block, expansion, layers, widths, num_classes=1000): - self.inplanes = 64 - super(ResNet, self).__init__() - self.conv1 = builder.conv7x7(3, 64, stride=2) - self.bn1 = builder.batchnorm(64) - self.relu = builder.activation() - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.layer1 = self._make_layer(builder, block, expansion, widths[0], layers[0]) - self.layer2 = self._make_layer(builder, block, expansion, widths[1], layers[1], stride=2) - self.layer3 = self._make_layer(builder, block, expansion, widths[2], layers[2], stride=2) - self.layer4 = self._make_layer(builder, block, expansion, widths[3], layers[3], stride=2) - self.avgpool = nn.AdaptiveAvgPool2d(1) - self.fc = nn.Linear(widths[3] * expansion, num_classes) - - def _make_layer(self, builder, block, expansion, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * expansion: - dconv = builder.conv1x1(self.inplanes, planes * expansion, - stride=stride) - dbn = builder.batchnorm(planes * expansion) - if dbn is not None: - downsample = nn.Sequential(dconv, dbn) - else: - downsample = dconv - - layers = [] - layers.append(block(builder, self.inplanes, planes, expansion, stride=stride, downsample=downsample)) - self.inplanes = planes * expansion - for i in range(1, blocks): - layers.append(block(builder, self.inplanes, planes, expansion)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1(x) - if self.bn1 is not None: - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - - return x -# ResNet }}} - -resnet_configs = { - 'classic' : { - 'conv' : nn.Conv2d, - 'conv_init' : 'fan_out', - 'nonlinearity' : 'relu', - 'last_bn_0_init' : False, - 'activation' : lambda: nn.ReLU(inplace=True), - }, - 'fanin' : { - 'conv' : nn.Conv2d, - 'conv_init' : 'fan_in', - 'nonlinearity' : 'relu', - 'last_bn_0_init' : False, - 'activation' : lambda: nn.ReLU(inplace=True), - }, - 'grp-fanin' : { - 'conv' : nn.Conv2d, - 'conv_init' : 'fan_in', - 'nonlinearity' : 'relu', - 'last_bn_0_init' : False, - 'activation' : lambda: nn.ReLU(inplace=True), - }, - 'grp-fanout' : { - 'conv' : nn.Conv2d, - 'conv_init' : 'fan_out', - 'nonlinearity' : 'relu', - 'last_bn_0_init' : False, - 'activation' : lambda: nn.ReLU(inplace=True), - }, - } - -resnet_versions = { - 'resnet18' : { - 'net' : ResNet, - 'block' : BasicBlock, - 'layers' : [2, 2, 2, 2], - 'widths' : [64, 128, 256, 512], - 'expansion' : 1, - 'num_classes' : 1000, - }, - 'resnet34' : { - 'net' : ResNet, - 'block' : BasicBlock, - 'layers' : [3, 4, 6, 3], - 'widths' : [64, 128, 256, 512], - 'expansion' : 1, - 'num_classes' : 1000, - }, - 'resnet50' : { - 'net' : ResNet, - 'block' : Bottleneck, - 'layers' : [3, 4, 6, 3], - 'widths' : [64, 128, 256, 512], - 'expansion' : 4, - 'num_classes' : 1000, - }, - 'resnet101' : { - 'net' : ResNet, - 'block' : Bottleneck, - 'layers' : [3, 4, 23, 3], - 'widths' : [64, 128, 256, 512], - 'expansion' : 4, - 'num_classes' : 1000, - }, - 'resnet152' : { - 'net' : ResNet, - 'block' : Bottleneck, - 'layers' : [3, 8, 36, 3], - 'widths' : [64, 128, 256, 512], - 'expansion' : 4, - 'num_classes' : 1000, - }, - 'resnext101-32x4d' : { - 'net' : ResNet, - 'block' : Bottleneck, - 'cardinality' : 32, - 'layers' : [3, 4, 23, 3], - 'widths' : [128, 256, 512, 1024], - 'expansion' : 2, - 'num_classes' : 1000, - }, - 'se-resnext101-32x4d' : { - 'net' : ResNet, - 'block' : SEBottleneck, - 'cardinality' : 32, - 'layers' : [3, 4, 23, 3], - 'widths' : [128, 256, 512, 1024], - 'expansion' : 2, - 'num_classes' : 1000, - }, - } - - -def build_resnet(version, config, verbose=True): - version = resnet_versions[version] - config = resnet_configs[config] - - builder = ResNetBuilder(version, config) - if verbose: - print("Version: {}".format(version)) - print("Config: {}".format(config)) - model = version['net'](builder, - version['block'], - version['expansion'], - version['layers'], - version['widths'], - version['num_classes']) - - return model diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py deleted file mode 100644 index 408718aaf2..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import torch -import torch.nn as nn - - -class CrossEntropy(nn.CrossEntropyLoss): - def __init__(self, smooth_factor=0., num_classes=1000): - super(CrossEntropy, self).__init__() - self.on_value = 1.0 - smooth_factor - self.off_value = 1.0 * smooth_factor / (num_classes - 1) - - def forward(self, input, target): - one_hot_label = torch.npu_one_hot(target, -1, input.size(1), self.on_value, self.off_value) - one_hot_label = one_hot_label.to(torch.float16) - loss = torch.npu_softmax_cross_entropy_with_logits(input.to(torch.float16), one_hot_label) - - loss = torch.mean(loss, [0], keepdim=False, dtype=torch.float32) - return loss - -class LabelSmoothingNpu(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - - :param smoothing: label smoothing factor - """ - super(LabelSmoothingNpu, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - self.epsilon = 0.1 - self.num_classes = 1000 - - def forward(self, x, target): - CALCULATE_DEVICE = x.device - logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") - - targets = torch.zeros_like(logprobs).scatter_(1, target.unsqueeze(1), 1) - targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes - loss = (-targets * logprobs).mean(0).sum() - - return loss.to(CALCULATE_DEVICE) - -class LabelSmoothingGpu(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - - :param smoothing: label smoothing factor - """ - super(LabelSmoothingGpu, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - def forward(self, x, target): - logprobs = torch.nn.functional.log_softmax(x, dim=-1) - - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py deleted file mode 100644 index 6ec5b51765..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -''' -BSD 3-Clause License - -Copyright (c) Soumith Chintala 2016, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://spdx.org/licenses/BSD-3-Clause.html -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' - -import torch -import torch.nn as nn - -class LabelSmoothing(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - - :param smoothing: label smoothing factor - """ - super(LabelSmoothing, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - def forward(self, x, target): - device_x = x.device - device_target = target.device - x = x.to("cpu") - target = target.to("cpu") - logprobs = torch.nn.functional.log_softmax(x, dim=-1) - - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - - x = x.to(device_x) - target = target.to(device_target) - return loss.mean() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py deleted file mode 100644 index 55f7f017d0..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py +++ /dev/null @@ -1,518 +0,0 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION -# Copyright (c) 2017- Facebook, Inc -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import numpy as np -import torch -import torch.nn as nn -from torch.autograd import Variable -from . import logger as log -from . import resnet as nvmodels -from . import utils -import dllogger -try: - from apex.fp16_utils import * - from apex import amp -except ImportError: - raise ImportError( - "Please install apex from https://www.github.com/nvidia/apex to run this example." - ) - -ACC_METADATA = {'unit': '%','format': ':.2f'} -IPS_METADATA = {'unit': 'img/s', 'format': ':.2f'} -TIME_METADATA = {'unit': 's', 'format': ':.5f'} -LOSS_METADATA = {'format': ':.5f'} - -class ModelAndLoss(nn.Module): - def __init__(self, - arch, - loss, - pretrained_weights=None, - cuda=True, - fp16=False): - super(ModelAndLoss, self).__init__() - self.arch = arch - - print("=> creating model '{}'".format(arch)) - model = nvmodels.build_resnet(arch[0], arch[1]) - if pretrained_weights is not None: - print("=> using pre-trained model from a file '{}'".format(arch)) - model.load_state_dict(pretrained_weights) - - if cuda: - model = model.cuda() - if fp16: - model = network_to_half(model) - - # define loss function (criterion) and optimizer - criterion = loss() - - if cuda: - criterion = criterion.cuda() - - self.model = model - self.loss = criterion - - def forward(self, data, target): - output = self.model(data) - loss = self.loss(output, target) - - return loss, output - - def distributed(self): - return - - def load_model_state(self, state): - if not state is None: - self.model.load_state_dict(state) - - -def get_optimizer(parameters, - fp16, - lr, - momentum, - weight_decay, - nesterov=False, - state=None, - static_loss_scale=1., - dynamic_loss_scale=False, - bn_weight_decay=False): - - if bn_weight_decay: - print(" ! Weight decay applied to BN parameters ") - optimizer = torch.optim.SGD([v for n, v in parameters], - lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=nesterov) - else: - print(" ! Weight decay NOT applied to BN parameters ") - bn_params = [v for n, v in parameters if 'bn' in n] - rest_params = [v for n, v in parameters if not 'bn' in n] - print(len(bn_params)) - print(len(rest_params)) - optimizer = torch.optim.SGD([{ - 'params': bn_params, - 'weight_decay': 0 - }, { - 'params': rest_params, - 'weight_decay': weight_decay - }], - lr, - momentum=momentum, - weight_decay=weight_decay, - nesterov=nesterov) - if fp16: - optimizer = FP16_Optimizer(optimizer, - static_loss_scale=static_loss_scale, - dynamic_loss_scale=dynamic_loss_scale, - verbose=False) - - if not state is None: - optimizer.load_state_dict(state) - - return optimizer - - -def lr_policy(lr_fn, logger=None): - if logger is not None: - logger.register_metric('lr', - log.LR_METER(), - verbosity=dllogger.Verbosity.VERBOSE) - - def _alr(optimizer, iteration, epoch): - lr = lr_fn(iteration, epoch) - - if logger is not None: - logger.log_metric('lr', lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - return _alr - - -def lr_step_policy(base_lr, steps, decay_factor, warmup_length, logger=None): - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - lr = base_lr - for s in steps: - if epoch >= s: - lr *= decay_factor - return lr - - return lr_policy(_lr_fn, logger=logger) - - -def lr_linear_policy(base_lr, warmup_length, epochs, logger=None): - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - es = epochs - warmup_length - lr = base_lr * (1 - (e / es)) - return lr - - return lr_policy(_lr_fn, logger=logger) - - -def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None): - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - es = epochs - warmup_length - lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr - return lr - - return lr_policy(_lr_fn, logger=logger) - - -def lr_exponential_policy(base_lr, - warmup_length, - epochs, - final_multiplier=0.001, - logger=None): - es = epochs - warmup_length - epoch_decay = np.power(2, np.log2(final_multiplier) / es) - - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - lr = base_lr * (epoch_decay**e) - return lr - - return lr_policy(_lr_fn, logger=logger) - - -def get_train_step(model_and_loss, - optimizer, - fp16, - use_amp=False, - batch_size_multiplier=1): - def _step(input, target, optimizer_step=True): - input_var = Variable(input) - target_var = Variable(target) - loss, output = model_and_loss(input_var, target_var) - if torch.distributed.is_initialized(): - print('utils.reduce_tensor(loss.data)') - reduced_loss = utils.reduce_tensor(loss.data) - else: - reduced_loss = loss.data - - if fp16: - optimizer.backward(loss) - elif use_amp: - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - - if optimizer_step: - opt = optimizer.optimizer if isinstance( - optimizer, FP16_Optimizer) else optimizer - for param_group in opt.param_groups: - for param in param_group['params']: - param.grad /= batch_size_multiplier - - optimizer.step() - optimizer.zero_grad() - - torch.cuda.synchronize() - - return reduced_loss - - return _step - - -def train(train_loader, - model_and_loss, - optimizer, - lr_scheduler, - fp16, - logger, - epoch, - use_amp=False, - prof=-1, - batch_size_multiplier=1, - register_metrics=True): - - if register_metrics and logger is not None: - logger.register_metric('train.loss', - log.LOSS_METER(), - verbosity=dllogger.Verbosity.DEFAULT, - metadata=LOSS_METADATA) - logger.register_metric('train.compute_ips', - log.PERF_METER(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=IPS_METADATA) - logger.register_metric('train.total_ips', - log.PERF_METER(), - verbosity=dllogger.Verbosity.DEFAULT, - metadata=IPS_METADATA) - logger.register_metric('train.data_time', - log.PERF_METER(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - logger.register_metric('train.compute_time', - log.PERF_METER(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - - step = get_train_step(model_and_loss, - optimizer, - fp16, - use_amp=use_amp, - batch_size_multiplier=batch_size_multiplier) - - model_and_loss.train() - end = time.time() - - optimizer.zero_grad() - - data_iter = enumerate(train_loader) - if logger is not None: - data_iter = logger.iteration_generator_wrapper(data_iter) - if prof > 0: - data_iter = utils.first_n(prof, data_iter) - - for i, (input, target) in data_iter: - bs = input.size(0) - lr_scheduler(optimizer, i, epoch) - data_time = time.time() - end - - optimizer_step = ((i + 1) % batch_size_multiplier) == 0 - loss = step(input, target, optimizer_step=optimizer_step) - - it_time = time.time() - end - - if logger is not None: - logger.log_metric('train.loss', to_python_float(loss), bs) - logger.log_metric('train.compute_ips', - calc_ips(bs, it_time - data_time)) - logger.log_metric('train.total_ips', calc_ips(bs, it_time)) - logger.log_metric('train.data_time', data_time) - logger.log_metric('train.compute_time', it_time - data_time) - - end = time.time() - - -def get_val_step(model_and_loss): - def _step(input, target): - input_var = Variable(input) - target_var = Variable(target) - - with torch.no_grad(): - loss, output = model_and_loss(input_var, target_var) - - prec1, prec5 = utils.accuracy(output.data, target, topk=(1, 5)) - - if torch.distributed.is_initialized(): - reduced_loss = utils.reduce_tensor(loss.data) - prec1 = utils.reduce_tensor(prec1) - prec5 = utils.reduce_tensor(prec5) - else: - reduced_loss = loss.data - - torch.cuda.synchronize() - - return reduced_loss, prec1, prec5 - - return _step - - -def validate(val_loader, - model_and_loss, - fp16, - logger, - epoch, - prof=-1, - register_metrics=True): - if register_metrics and logger is not None: - logger.register_metric('val.top1', - log.ACC_METER(), - verbosity=dllogger.Verbosity.DEFAULT, - metadata=ACC_METADATA) - logger.register_metric('val.top5', - log.ACC_METER(), - verbosity=dllogger.Verbosity.DEFAULT, - metadata=ACC_METADATA) - logger.register_metric('val.loss', - log.LOSS_METER(), - verbosity=dllogger.Verbosity.DEFAULT, - metadata=LOSS_METADATA) - logger.register_metric('val.compute_ips', - log.PERF_METER(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=IPS_METADATA) - logger.register_metric('val.total_ips', - log.PERF_METER(), - verbosity=dllogger.Verbosity.DEFAULT, - metadata=IPS_METADATA) - logger.register_metric('val.data_time', - log.PERF_METER(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - logger.register_metric('val.compute_latency', - log.PERF_METER(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - logger.register_metric('val.compute_latency_at100', - log.LAT_100(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - logger.register_metric('val.compute_latency_at99', - log.LAT_99(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - logger.register_metric('val.compute_latency_at95', - log.LAT_95(), - verbosity=dllogger.Verbosity.VERBOSE, - metadata=TIME_METADATA) - - - step = get_val_step(model_and_loss) - - top1 = log.AverageMeter() - # switch to evaluate mode - model_and_loss.eval() - - end = time.time() - - data_iter = enumerate(val_loader) - if not logger is None: - data_iter = logger.iteration_generator_wrapper(data_iter, val=True) - if prof > 0: - data_iter = utils.first_n(prof, data_iter) - - for i, (input, target) in data_iter: - bs = input.size(0) - data_time = time.time() - end - - loss, prec1, prec5 = step(input, target) - - it_time = time.time() - end - - top1.record(to_python_float(prec1), bs) - if logger is not None: - logger.log_metric('val.top1', to_python_float(prec1), bs) - logger.log_metric('val.top5', to_python_float(prec5), bs) - logger.log_metric('val.loss', to_python_float(loss), bs) - logger.log_metric('val.compute_ips', - calc_ips(bs, it_time - data_time)) - logger.log_metric('val.total_ips', calc_ips(bs, it_time)) - logger.log_metric('val.data_time', data_time) - logger.log_metric('val.compute_latency', it_time - data_time) - logger.log_metric('val.compute_latency_at95', it_time - data_time) - logger.log_metric('val.compute_latency_at99', it_time - data_time) - logger.log_metric('val.compute_latency_at100', it_time - data_time) - - end = time.time() - - return top1.get_val() - - -# Train loop {{{ -def calc_ips(batch_size, time): - world_size = torch.distributed.get_world_size( - ) if torch.distributed.is_initialized() else 1 - tbs = world_size * batch_size - return tbs / time - - -def train_loop(model_and_loss, - optimizer, - lr_scheduler, - train_loader, - val_loader, - epochs, - fp16, - logger, - should_backup_checkpoint, - use_amp=False, - batch_size_multiplier=1, - best_prec1=0, - start_epoch=0, - prof=-1, - skip_training=False, - skip_validation=False, - save_checkpoints=True, - checkpoint_dir='./'): - - prec1 = -1 - - epoch_iter = range(start_epoch, epochs) - for epoch in epoch_iter: - if logger is not None: - logger.start_epoch() - if not skip_training: - train(train_loader, - model_and_loss, - optimizer, - lr_scheduler, - fp16, - logger, - epoch, - use_amp=use_amp, - prof=prof, - register_metrics=epoch == start_epoch, - batch_size_multiplier=batch_size_multiplier) - - if not skip_validation: - prec1, nimg = validate(val_loader, - model_and_loss, - fp16, - logger, - epoch, - prof=prof, - register_metrics=epoch == start_epoch) - if logger is not None: - logger.end_epoch() - - if save_checkpoints and (not torch.distributed.is_initialized() - or torch.distributed.get_rank() == 0): - if not skip_validation: - is_best = logger.metrics['val.top1']['meter'].get_epoch() > best_prec1 - best_prec1 = max(logger.metrics['val.top1']['meter'].get_epoch(), - best_prec1) - else: - is_best = False - best_prec1 = 0 - - if should_backup_checkpoint(epoch): - backup_filename = 'checkpoint-{}.pth.tar'.format(epoch + 1) - else: - backup_filename = None - utils.save_checkpoint( - { - 'epoch': epoch + 1, - 'arch': model_and_loss.arch, - 'state_dict': model_and_loss.model.state_dict(), - 'best_prec1': best_prec1, - 'optimizer': optimizer.state_dict(), - }, - is_best, - checkpoint_dir=checkpoint_dir, - backup_filename=backup_filename) - - -# }}} diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py deleted file mode 100644 index a187d4e6f8..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION -# Copyright (c) 2017- Facebook, Inc -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import numpy as np -import torch -import shutil -import torch.distributed as dist - - -def should_backup_checkpoint(args): - def _sbc(epoch): - return args.gather_checkpoints and (epoch < 10 or epoch % 10 == 0) - - return _sbc - - -def save_checkpoint(state, - is_best, - filename='checkpoint.pth.tar', - checkpoint_dir='./', - backup_filename=None): - if (not torch.distributed.is_initialized() - ) or torch.distributed.get_rank() == 0: - filename = os.path.join(checkpoint_dir, filename) - print("SAVING {}".format(filename)) - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, - os.path.join(checkpoint_dir, 'model_best.pth.tar')) - if backup_filename is not None: - shutil.copyfile(filename, - os.path.join(checkpoint_dir, backup_filename)) - - -def timed_generator(gen): - start = time.time() - for g in gen: - end = time.time() - t = end - start - yield g, t - start = time.time() - - -def timed_function(f): - def _timed_function(*args, **kwargs): - start = time.time() - ret = f(*args, **kwargs) - return ret, time.time() - start - - return _timed_function - - -def accuracy(output, target, topk=(1, )): - """Computes the precision@k for the specified values of k""" - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -def reduce_tensor(tensor): - rt = tensor.clone() - dist.all_reduce(rt, op=dist.ReduceOp.SUM) - rt /= torch.distributed.get_world_size( - ) if torch.distributed.is_initialized() else 1 - return rt - - -def first_n(n, generator): - for i, d in zip(range(n), generator): - yield d diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile deleted file mode 100644 index 271998f958..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -ARG FROM_IMAGE_NAME -FROM $FROM_IMAGE_NAME - -COPY requirements.txt . -RUN pip3.7 install -r requirements.txt - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE deleted file mode 100644 index dfcc682b4b..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2017, -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md deleted file mode 100644 index 291e2e2c31..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md +++ /dev/null @@ -1,53 +0,0 @@ -一、依赖 -* NPU配套的run包安装 -* Python 3.7.5 -* PyTorch(NPU版本) -* apex(NPU版本) -* torch(NPU版本) -* torchvision -* dllogger - -二、训练流程: - -单卡训练流程: - -``` - 1.安装环境 - 2.修改run_1p.sh字段"data"为当前磁盘的数据集路径 - 3.修改字段device_id(单卡训练所使用的device id),为训练配置device_id,比如device_id=0 - 4.cd到run_1p.sh文件的目录,执行bash run_1p.sh单卡脚本, 进行单卡训练 -``` - - -多卡训练流程 - -``` - 1.安装环境 - 2.修改多P脚本中字段"data"为当前磁盘的数据集路径 - 3.修改字段device_id_list(多卡训练所使用的device id列表),为训练配置device_id,比如4p,device_id_list=0,1,2,3;8P默认使用0,1,2,3,4,5,6,7卡不用配置 - 4.cd到run_8p.sh文件的目录,执行bash run_8p.sh等多卡脚本, 进行多卡训练 -``` - - - - -三、Docker容器训练: - -1.导入镜像二进制包docker import ubuntuarmpytorch.tar REPOSITORY:TAG, 比如: - - docker import ubuntuarmpytorch.tar pytorch:b020 - -2.执行docker_start.sh后带三个参数:步骤1生成的REPOSITORY:TAG;数据集路径;模型执行路径;比如: - - ./docker_start.sh pytorch:b020 /train/imagenet /home/ResNet50 - -3.执行步骤一训练流程(环境安装除外) - - -四、测试结果 - -训练日志路径:在训练脚本的同目录下result文件夹里,如: - - /home/ResNet50/result/training_8p_job_20201121023601 - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh deleted file mode 100644 index 944bca3cda..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -docker_image=$1 -data_dir=$2 -model_dir=$3 - -docker run -it --ipc=host \ - --device=/dev/davinci0 \ - --device=/dev/davinci1 \ - --device=/dev/davinci2 \ - --device=/dev/davinci3 \ - --device=/dev/davinci4 \ - --device=/dev/davinci5 \ - --device=/dev/davinci6 \ - --device=/dev/davinci7 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ - -v ${model_dir}:${model_dir} \ - -v ${data_dir}:${data_dir} \ - -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \ - -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \ - -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \ - /bin/bash \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh deleted file mode 100644 index 84d83feb94..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -export install_path=/usr/local/Ascend - -if [ -d ${install_path}/toolkit ]; then - export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} - export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH - export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH - export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH - export ASCEND_OPP_PATH=${install_path}/opp -else - if [ -d ${install_path}/nnae/latest ];then - export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/nnae/latest - else - export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest - fi -fi - - -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 -#设置Event日志开启标志,0-关闭/1-开启 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#设置是否开启taskque,0-关闭/1-开启 -export TASK_QUEUE_ENABLE=1 -#HCCL白名单开关,1-关闭/0-开启 -export HCCL_WHITELIST_DISABLE=1 - -#设置device侧日志登记为error -${install_path}/driver/tools/msnpureport -g error -d 0 -${install_path}/driver/tools/msnpureport -g error -d 1 -${install_path}/driver/tools/msnpureport -g error -d 2 -${install_path}/driver/tools/msnpureport -g error -d 3 -${install_path}/driver/tools/msnpureport -g error -d 4 -${install_path}/driver/tools/msnpureport -g error -d 5 -${install_path}/driver/tools/msnpureport -g error -d 6 -${install_path}/driver/tools/msnpureport -g error -d 7 -#关闭Device侧Event日志 -${install_path}/driver/tools/msnpureport -e disable - -path_lib=$(python3.7 -c """ -import sys -import re -result='' -for index in range(len(sys.path)): - match_sit = re.search('-packages', sys.path[index]) - if match_sit is not None: - match_lib = re.search('lib', sys.path[index]) - - if match_lib is not None: - end=match_lib.span()[1] - result += sys.path[index][0:end] + ':' - - result+=sys.path[index] + '/torch/lib:' -print(result)""" -) - -echo ${path_lib} - -export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh deleted file mode 100644 index 19eb321d39..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export TASK_QUEUE_ENABLE=1 - -device_id=0 - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_1p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" -python3.7 ${currentDir}/pytorch_resnet50_apex.py \ - --data /data/imagenet \ - --npu ${device_id} \ - -j64 \ - -b512 \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs 90 \ - --num_classes=1000 \ - --evaluate=True \ - --resume checkpoint.pth.tar \ - --optimizer-batch-size 512 > ./resnet50_1p.log 2>&1 & - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig deleted file mode 100644 index 1ce1f997bd..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig +++ /dev/null @@ -1,27 +0,0 @@ -aipp_op { - aipp_mode: static - input_format : RGB888_U8 - csc_switch : false - rbuv_swap_switch : true - matrix_r0c0 : 256 - matrix_r0c1 : 0 - matrix_r0c2 : 359 - matrix_r1c0 : 256 - matrix_r1c1 : -88 - matrix_r1c2 : -183 - matrix_r2c0 : 256 - matrix_r2c1 : 454 - matrix_r2c2 : 0 - input_bias_0 : 0 - input_bias_1 : 128 - input_bias_2 : 128 - mean_chn_0 : 0 - mean_chn_1 : 0 - mean_chn_2 : 0 - min_chn_0 : 123.675 - min_chn_1 : 116.28 - min_chn_2 : 103.53 - var_reci_chn_0 : 0.0171247538316637 - var_reci_chn_1 : 0.0175070028011204 - var_reci_chn_2 : 0.0174291938997821 -} \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh deleted file mode 100644 index 6f8a2e44bf..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh +++ /dev/null @@ -1,13 +0,0 @@ -model_path=$1 -framework=$2 -output_model_name=$3 - -/usr/local/Ascend/atc/bin/atc \ ---model=$model_path \ ---framework=$framework \ ---output=$output_model_name \ ---input_format=NCHW --input_shape="actual_input_1:1,3,256,256" \ ---enable_small_channel=1 \ ---log=error \ ---soc_version=Ascend310 \ ---insert_op_conf=./aipp_resnet50.aippconfig \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh deleted file mode 100644 index da1eb3dc4c..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -docker_image=$1 -data_dir=$2 - -if [ -z "${docker_image}" ]; then - echo "please input docker_image" - exit 1 -fi - -if [ ! -d "${data_dir}" ]; then - echo "please input data_dir" - exit 1 -fi - -docker run -it \ - --device=/dev/davinci0 \ - --device=/dev/davinci_manager \ - --device=/dev/devmm_svm \ - --device=/dev/hisi_hdc \ - -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ - -v ${data_dir}:${data_dir} \ - ${docker_image} \ - /bin/bash diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt deleted file mode 100644 index dccbd552fe..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -cmake_minimum_required(VERSION 3.14.0) -project(resnet50) - -set(TARGET resnet50) - -add_definitions(-DENABLE_DVPP_INTERFACE) -add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) -add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) - -# Check environment variable -if(NOT DEFINED ENV{ASCEND_HOME}) - message(FATAL_ERROR "please define environment variable:ASCEND_HOME") -endif() -if(NOT DEFINED ENV{ASCEND_VERSION}) - message(WARNING "please define environment variable:ASCEND_VERSION") -endif() -if(NOT DEFINED ENV{ARCH_PATTERN}) - message(WARNING "please define environment variable:ARCH_PATTERN") -endif() -set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) -set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) - -set(MXBASE_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../) -set(MXBASE_INC ${MXBASE_ROOT_DIR}/mxbase/include) -set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib) -set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib/modelpostprocessors) -set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/postprocess/include) -if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) - set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) -else() - set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource/dist) -endif() - -include_directories(${ACL_INC_DIR}) -include_directories(${OPENSOURCE_DIR}/include) -include_directories(${OPENSOURCE_DIR}/include/opencv4) - -include_directories(${MXBASE_INC}) -include_directories(${MXBASE_POST_PROCESS_DIR}) - -link_directories(${ACL_LIB_DIR}) -link_directories(${OPENSOURCE_DIR}/lib) -link_directories(${MXBASE_LIB_DIR}) -link_directories(${MXBASE_POST_LIB_DIR}) - -add_executable(${TARGET} main.cpp Resnet50Classify.cpp) -target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess opencv_world stdc++fs) - -install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp deleted file mode 100644 index 024a9c3ae1..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "Resnet50Classify.h" -#include "MxBase/DeviceManager/DeviceManager.h" -#include "MxBase/Log/Log.h" - -using namespace MxBase; -namespace { -const uint32_t YUV_BYTE_NU = 3; -const uint32_t YUV_BYTE_DE = 2; -const uint32_t VPC_H_ALIGN = 2; -} - -APP_ERROR Resnet50Classify::Init(const InitParam &initParam) -{ - deviceId_ = initParam.deviceId; - APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); - if (ret != APP_ERR_OK) { - LogError << "Init devices failed, ret=" << ret << "."; - return ret; - } - ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); - if (ret != APP_ERR_OK) { - LogError << "Set context failed, ret=" << ret << "."; - return ret; - } - dvppWrapper_ = std::make_shared(); - ret = dvppWrapper_->Init(); - if (ret != APP_ERR_OK) { - LogError << "DvppWrapper init failed, ret=" << ret << "."; - return ret; - } - model_ = std::make_shared(); - ret = model_->Init(initParam.modelPath, modelDesc_); - if (ret != APP_ERR_OK) { - LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; - return ret; - } - MxBase::ConfigData configData; - const std::string softmax = initParam.softmax ? "true" : "false"; - const std::string checkTensor = initParam.checkTensor ? "true" : "false"; - - configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum)); - configData.SetJsonValue("TOP_K", std::to_string(initParam.topk)); - configData.SetJsonValue("SOFTMAX", softmax); - configData.SetJsonValue("CHECK_MODEL", checkTensor); - - auto jsonStr = configData.GetCfgJson().serialize(); - std::map> config; - config["postProcessConfigContent"] = std::make_shared(jsonStr); - config["labelPath"] = std::make_shared(initParam.labelPath); - - post_ = std::make_shared(); - ret = post_->Init(config); - if (ret != APP_ERR_OK) { - LogError << "Resnet50PostProcess init failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::DeInit() -{ - dvppWrapper_->DeInit(); - model_->DeInit(); - post_->DeInit(); - MxBase::DeviceManager::GetInstance()->DestroyDevices(); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::ReadImage(const std::string &imgPath, cv::Mat &imageMat) -{ - imageMat = cv::imread(imgPath, cv::IMREAD_COLOR); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::CenterCropImage(cv::Mat &img, cv::Mat &cropImg) -{ - float central_fraction = 0.75; - int crop_x = img.cols * central_fraction; - int crop_y = img.rows * central_fraction; - int crop_x1 = (img.cols - crop_x) / 2; - int crop_y1 = (img.rows - crop_y) / 2; - - cv::Rect myROI(crop_x1, crop_y1, crop_x, crop_y); - LogInfo << "images crop_x1: " << crop_x1 << ", crop_x: " << crop_x << ", crop_y1: " << crop_y1 << ", crop_y: " << crop_y; - cropImg = img(myROI); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat) -{ - static constexpr uint32_t resizeHeight = 256; - static constexpr uint32_t resizeWidth = 256; - - cv::resize(srcImageMat, dstImageMat, cv::Size(resizeWidth, resizeHeight)); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase) -{ - const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU; - MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_); - MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC); - - APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); - if (ret != APP_ERR_OK) { - LogError << GetError(ret) << "Memory malloc failed."; - return ret; - } - std::vector shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast(imageMat.cols)}; - tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::Inference(const std::vector &inputs, - std::vector &outputs) -{ - auto dtypes = model_->GetOutputDataType(); - for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { - std::vector shape = {}; - for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { - shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); - } - TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); - APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor); - if (ret != APP_ERR_OK) { - LogError << "TensorBaseMalloc failed, ret=" << ret << "."; - return ret; - } - outputs.push_back(tensor); - } - DynamicInfo dynamicInfo = {}; - dynamicInfo.dynamicType = DynamicType::STATIC_BATCH; - auto startTime = std::chrono::high_resolution_clock::now(); - APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo); - auto endTime = std::chrono::high_resolution_clock::now(); - double costMs = std::chrono::duration(endTime - startTime).count(); - g_inferCost.push_back(costMs); - if (ret != APP_ERR_OK) { - LogError << "ModelInference failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::PostProcess(const std::vector &inputs, - std::vector> &clsInfos) -{ - APP_ERROR ret = post_->Process(inputs, clsInfos); - if (ret != APP_ERR_OK) { - LogError << "Process failed, ret=" << ret << "."; - return ret; - } - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::SaveInferResult(const std::string &imagePath, std::vector> &batchClsInfos) -{ - uint32_t batchIndex = 0; - LogInfo << "image path: " << imagePath; - std::string fileName = imagePath.substr(imagePath.find_last_of("/") + 1); - size_t dot = fileName.find_last_of("."); - - std::string resultPathName = "result"; - if (access(resultPathName.c_str(), 0) != 0) { - APP_ERROR ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); - if (ret != 0) { - LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; - return APP_ERR_COMM_FAILURE; - } - } - std::string resFileName = "result/" + fileName.substr(0,dot) + "_1.txt"; - LogInfo << "file path for saving result: " << resFileName; - std::ofstream tfile(resFileName); - if (tfile.fail()) { - LogError << "Failed to open result file"; - return APP_ERR_COMM_FAILURE; - } - - for (auto clsInfos : batchClsInfos) { - std::string resultStr = ""; - for (auto clsInfo : clsInfos) { - LogDebug << "batchIndex: " << batchIndex << " className: " << clsInfo.className - << " confidence: " << clsInfo.confidence << " classIndex: " << clsInfo.classId; - resultStr += std::to_string(clsInfo.classId) + " "; - } - tfile << resultStr << std::endl; - batchIndex += 1; - } - tfile.close(); - return APP_ERR_OK; -} - -APP_ERROR Resnet50Classify::Process(const std::string &imgPath) -{ - cv::Mat imageMat; - APP_ERROR ret = ReadImage(imgPath, imageMat); - if (ret != APP_ERR_OK) { - LogError << "ReadImage failed, ret=" << ret << "."; - return ret; - } - - ret = CenterCropImage(imageMat, imageMat); - if (ret != APP_ERR_OK) { - LogError << "crop failed, ret=" << ret << "."; - return ret; - } - ret = Resize(imageMat, imageMat); - if (ret != APP_ERR_OK) { - LogError << "Resize failed, ret=" << ret << "."; - return ret; - } - - std::vector inputs = {}; - std::vector outputs = {}; - TensorBase tensorBase; - ret = CVMatToTensorBase(imageMat, tensorBase); - if (ret != APP_ERR_OK) { - LogError << "CVMatToTensorBase failed, ret=" << ret << "."; - return ret; - } - inputs.push_back(tensorBase); - ret = Inference(inputs, outputs); - if (ret != APP_ERR_OK) { - LogError << "Inference failed, ret=" << ret << "."; - return ret; - } - - std::vector> BatchClsInfos = {}; - ret = PostProcess(outputs, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "PostProcess failed, ret=" << ret << "."; - return ret; - } - - ret = SaveInferResult(imgPath, BatchClsInfos); - if (ret != APP_ERR_OK) { - LogError << "Save results failed, ret: " << ret << "."; - return ret; - } - - imageMat.release(); - return APP_ERR_OK; -} \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h deleted file mode 100644 index 02f3b59774..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef RESNET50_CLASSIFY_H -#define RESNET50_CLASSIFY_H - -#include -#include "MxBase/DvppWrapper/DvppWrapper.h" -#include "MxBase/ModelInfer/ModelInferenceProcessor.h" -#include "ClassPostProcessors/Resnet50PostProcess.h" -#include "MxBase/Tensor/TensorContext/TensorContext.h" - -extern std::vector g_inferCost; - -struct InitParam { - uint32_t deviceId; - std::string labelPath; - uint32_t classNum; - uint32_t topk; - bool softmax; - bool checkTensor; - std::string modelPath; -}; - -class Resnet50Classify { -public: - APP_ERROR Init(const InitParam &initParam); - APP_ERROR DeInit(); - APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat); - APP_ERROR Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat); - APP_ERROR CenterCropImage(cv::Mat &img, cv::Mat &cropImg); - APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase); - APP_ERROR Inference(const std::vector &inputs, std::vector &outputs); - APP_ERROR PostProcess(const std::vector &inputs, - std::vector> &clsInfos); - APP_ERROR SaveInferResult(const std::string &imgPath, - std::vector> &batchClsInfos); - APP_ERROR Process(const std::string &imgPath); -private: - std::shared_ptr dvppWrapper_; - std::shared_ptr model_; - std::shared_ptr post_; - MxBase::ModelDesc modelDesc_; - uint32_t deviceId_ = 0; -}; -#endif \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py deleted file mode 100644 index da35817a4d..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py +++ /dev/null @@ -1,174 +0,0 @@ -#coding = utf-8 -#Copyright 2020 Huawei Technologies Co., Ltd -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - """ - :param img_name:image file name contains file path - :return:image file name without file path - """ - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param gtfile_path: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param gtfile_path: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - :param filepath: the result of model predict - :return probabilities, number of label, in_type, color: - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - if cls_ind: - data_vec[ind] = np.int(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, n_labels, topn=5): - """ - :param prediction_file_path: the result of model predict - :param result_store_path: the root path to store result - :param json_file: json file to save result - :param img_gt_dict: the ground truth of imagenet - :param topn: classify model acc topk - :return:NA - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - gt = img_gt_dict[img_name] - real_label = int(gt) - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - print(table_dict) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter right number of argmuments, expected 4!") - exit(1) - # class number - n_labels = 1000 - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - - if not os.path.exists(annotation_file_path): - print("Ground truth file does not exist.") - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, n_labels, topn=5) - - elapsed = time.time() - start diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names deleted file mode 100644 index 6003783a5d..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names +++ /dev/null @@ -1,1001 +0,0 @@ - # This is modified from https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a -tench, Tinca tinca -goldfish, Carassius auratus -great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias -tiger shark, Galeocerdo cuvieri -hammerhead, hammerhead shark -electric ray, crampfish, numbfish, torpedo -stingray -cock -hen -ostrich, Struthio camelus -brambling, Fringilla montifringilla -goldfinch, Carduelis carduelis -house finch, linnet, Carpodacus mexicanus -junco, snowbird -indigo bunting, indigo finch, indigo bird, Passerina cyanea -robin, American robin, Turdus migratorius -bulbul -jay -magpie -chickadee -water ouzel, dipper -kite -bald eagle, American eagle, Haliaeetus leucocephalus -vulture -great grey owl, great gray owl, Strix nebulosa -European fire salamander, Salamandra salamandra -common newt, Triturus vulgaris -eft -spotted salamander, Ambystoma maculatum -axolotl, mud puppy, Ambystoma mexicanum -bullfrog, Rana catesbeiana -tree frog, tree-frog -tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui -loggerhead, loggerhead turtle, Caretta caretta -leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea -mud turtle -terrapin -box turtle, box tortoise -banded gecko -common iguana, iguana, Iguana iguana -American chameleon, anole, Anolis carolinensis -whiptail, whiptail lizard -agama -frilled lizard, Chlamydosaurus kingi -alligator lizard -Gila monster, Heloderma suspectum -green lizard, Lacerta viridis -African chameleon, Chamaeleo chamaeleon -Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis -African crocodile, Nile crocodile, Crocodylus niloticus -American alligator, Alligator mississipiensis -triceratops -thunder snake, worm snake, Carphophis amoenus -ringneck snake, ring-necked snake, ring snake -hognose snake, puff adder, sand viper -green snake, grass snake -king snake, kingsnake -garter snake, grass snake -water snake -vine snake -night snake, Hypsiglena torquata -boa constrictor, Constrictor constrictor -rock python, rock snake, Python sebae -Indian cobra, Naja naja -green mamba -sea snake -horned viper, cerastes, sand viper, horned asp, Cerastes cornutus -diamondback, diamondback rattlesnake, Crotalus adamanteus -sidewinder, horned rattlesnake, Crotalus cerastes -trilobite -harvestman, daddy longlegs, Phalangium opilio -scorpion -black and gold garden spider, Argiope aurantia -barn spider, Araneus cavaticus -garden spider, Aranea diademata -black widow, Latrodectus mactans -tarantula -wolf spider, hunting spider -tick -centipede -black grouse -ptarmigan -ruffed grouse, partridge, Bonasa umbellus -prairie chicken, prairie grouse, prairie fowl -peacock -quail -partridge -African grey, African gray, Psittacus erithacus -macaw -sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita -lorikeet -coucal -bee eater -hornbill -hummingbird -jacamar -toucan -drake -red-breasted merganser, Mergus serrator -goose -black swan, Cygnus atratus -tusker -echidna, spiny anteater, anteater -platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus -wallaby, brush kangaroo -koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus -wombat -jellyfish -sea anemone, anemone -brain coral -flatworm, platyhelminth -nematode, nematode worm, roundworm -conch -snail -slug -sea slug, nudibranch -chiton, coat-of-mail shell, sea cradle, polyplacophore -chambered nautilus, pearly nautilus, nautilus -Dungeness crab, Cancer magister -rock crab, Cancer irroratus -fiddler crab -king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica -American lobster, Northern lobster, Maine lobster, Homarus americanus -spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish -crayfish, crawfish, crawdad, crawdaddy -hermit crab -isopod -white stork, Ciconia ciconia -black stork, Ciconia nigra -spoonbill -flamingo -little blue heron, Egretta caerulea -American egret, great white heron, Egretta albus -bittern -crane -limpkin, Aramus pictus -European gallinule, Porphyrio porphyrio -American coot, marsh hen, mud hen, water hen, Fulica americana -bustard -ruddy turnstone, Arenaria interpres -red-backed sandpiper, dunlin, Erolia alpina -redshank, Tringa totanus -dowitcher -oystercatcher, oyster catcher -pelican -king penguin, Aptenodytes patagonica -albatross, mollymawk -grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus -killer whale, killer, orca, grampus, sea wolf, Orcinus orca -dugong, Dugong dugon -sea lion -Chihuahua -Japanese spaniel -Maltese dog, Maltese terrier, Maltese -Pekinese, Pekingese, Peke -Shih-Tzu -Blenheim spaniel -papillon -toy terrier -Rhodesian ridgeback -Afghan hound, Afghan -basset, basset hound -beagle -bloodhound, sleuthhound -bluetick -black-and-tan coonhound -Walker hound, Walker foxhound -English foxhound -redbone -borzoi, Russian wolfhound -Irish wolfhound -Italian greyhound -whippet -Ibizan hound, Ibizan Podenco -Norwegian elkhound, elkhound -otterhound, otter hound -Saluki, gazelle hound -Scottish deerhound, deerhound -Weimaraner -Staffordshire bullterrier, Staffordshire bull terrier -American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier -Bedlington terrier -Border terrier -Kerry blue terrier -Irish terrier -Norfolk terrier -Norwich terrier -Yorkshire terrier -wire-haired fox terrier -Lakeland terrier -Sealyham terrier, Sealyham -Airedale, Airedale terrier -cairn, cairn terrier -Australian terrier -Dandie Dinmont, Dandie Dinmont terrier -Boston bull, Boston terrier -miniature schnauzer -giant schnauzer -standard schnauzer -Scotch terrier, Scottish terrier, Scottie -Tibetan terrier, chrysanthemum dog -silky terrier, Sydney silky -soft-coated wheaten terrier -West Highland white terrier -Lhasa, Lhasa apso -flat-coated retriever -curly-coated retriever -golden retriever -Labrador retriever -Chesapeake Bay retriever -German short-haired pointer -vizsla, Hungarian pointer -English setter -Irish setter, red setter -Gordon setter -Brittany spaniel -clumber, clumber spaniel -English springer, English springer spaniel -Welsh springer spaniel -cocker spaniel, English cocker spaniel, cocker -Sussex spaniel -Irish water spaniel -kuvasz -schipperke -groenendael -malinois -briard -kelpie -komondor -Old English sheepdog, bobtail -Shetland sheepdog, Shetland sheep dog, Shetland -collie -Border collie -Bouvier des Flandres, Bouviers des Flandres -Rottweiler -German shepherd, German shepherd dog, German police dog, alsatian -Doberman, Doberman pinscher -miniature pinscher -Greater Swiss Mountain dog -Bernese mountain dog -Appenzeller -EntleBucher -boxer -bull mastiff -Tibetan mastiff -French bulldog -Great Dane -Saint Bernard, St Bernard -Eskimo dog, husky -malamute, malemute, Alaskan malamute -Siberian husky -dalmatian, coach dog, carriage dog -affenpinscher, monkey pinscher, monkey dog -basenji -pug, pug-dog -Leonberg -Newfoundland, Newfoundland dog -Great Pyrenees -Samoyed, Samoyede -Pomeranian -chow, chow chow -keeshond -Brabancon griffon -Pembroke, Pembroke Welsh corgi -Cardigan, Cardigan Welsh corgi -toy poodle -miniature poodle -standard poodle -Mexican hairless -timber wolf, grey wolf, gray wolf, Canis lupus -white wolf, Arctic wolf, Canis lupus tundrarum -red wolf, maned wolf, Canis rufus, Canis niger -coyote, prairie wolf, brush wolf, Canis latrans -dingo, warrigal, warragal, Canis dingo -dhole, Cuon alpinus -African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus -hyena, hyaena -red fox, Vulpes vulpes -kit fox, Vulpes macrotis -Arctic fox, white fox, Alopex lagopus -grey fox, gray fox, Urocyon cinereoargenteus -tabby, tabby cat -tiger cat -Persian cat -Siamese cat, Siamese -Egyptian cat -cougar, puma, catamount, mountain lion, painter, panther, Felis concolor -lynx, catamount -leopard, Panthera pardus -snow leopard, ounce, Panthera uncia -jaguar, panther, Panthera onca, Felis onca -lion, king of beasts, Panthera leo -tiger, Panthera tigris -cheetah, chetah, Acinonyx jubatus -brown bear, bruin, Ursus arctos -American black bear, black bear, Ursus americanus, Euarctos americanus -ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus -sloth bear, Melursus ursinus, Ursus ursinus -mongoose -meerkat, mierkat -tiger beetle -ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle -ground beetle, carabid beetle -long-horned beetle, longicorn, longicorn beetle -leaf beetle, chrysomelid -dung beetle -rhinoceros beetle -weevil -fly -bee -ant, emmet, pismire -grasshopper, hopper -cricket -walking stick, walkingstick, stick insect -cockroach, roach -mantis, mantid -cicada, cicala -leafhopper -lacewing, lacewing fly -dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk -damselfly -admiral -ringlet, ringlet butterfly -monarch, monarch butterfly, milkweed butterfly, Danaus plexippus -cabbage butterfly -sulphur butterfly, sulfur butterfly -lycaenid, lycaenid butterfly -starfish, sea star -sea urchin -sea cucumber, holothurian -wood rabbit, cottontail, cottontail rabbit -hare -Angora, Angora rabbit -hamster -porcupine, hedgehog -fox squirrel, eastern fox squirrel, Sciurus niger -marmot -beaver -guinea pig, Cavia cobaya -sorrel -zebra -hog, pig, grunter, squealer, Sus scrofa -wild boar, boar, Sus scrofa -warthog -hippopotamus, hippo, river horse, Hippopotamus amphibius -ox -water buffalo, water ox, Asiatic buffalo, Bubalus bubalis -bison -ram, tup -bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis -ibex, Capra ibex -hartebeest -impala, Aepyceros melampus -gazelle -Arabian camel, dromedary, Camelus dromedarius -llama -weasel -mink -polecat, fitch, foulmart, foumart, Mustela putorius -black-footed ferret, ferret, Mustela nigripes -otter -skunk, polecat, wood pussy -badger -armadillo -three-toed sloth, ai, Bradypus tridactylus -orangutan, orang, orangutang, Pongo pygmaeus -gorilla, Gorilla gorilla -chimpanzee, chimp, Pan troglodytes -gibbon, Hylobates lar -siamang, Hylobates syndactylus, Symphalangus syndactylus -guenon, guenon monkey -patas, hussar monkey, Erythrocebus patas -baboon -macaque -langur -colobus, colobus monkey -proboscis monkey, Nasalis larvatus -marmoset -capuchin, ringtail, Cebus capucinus -howler monkey, howler -titi, titi monkey -spider monkey, Ateles geoffroyi -squirrel monkey, Saimiri sciureus -Madagascar cat, ring-tailed lemur, Lemur catta -indri, indris, Indri indri, Indri brevicaudatus -Indian elephant, Elephas maximus -African elephant, Loxodonta africana -lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens -giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca -barracouta, snoek -eel -coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch -rock beauty, Holocanthus tricolor -anemone fish -sturgeon -gar, garfish, garpike, billfish, Lepisosteus osseus -lionfish -puffer, pufferfish, blowfish, globefish -abacus -abaya -academic gown, academic robe, judge's robe -accordion, piano accordion, squeeze box -acoustic guitar -aircraft carrier, carrier, flattop, attack aircraft carrier -airliner -airship, dirigible -altar -ambulance -amphibian, amphibious vehicle -analog clock -apiary, bee house -apron -ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin -assault rifle, assault gun -backpack, back pack, knapsack, packsack, rucksack, haversack -bakery, bakeshop, bakehouse -balance beam, beam -balloon -ballpoint, ballpoint pen, ballpen, Biro -Band Aid -banjo -bannister, banister, balustrade, balusters, handrail -barbell -barber chair -barbershop -barn -barometer -barrel, cask -barrow, garden cart, lawn cart, wheelbarrow -baseball -basketball -bassinet -bassoon -bathing cap, swimming cap -bath towel -bathtub, bathing tub, bath, tub -beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon -beacon, lighthouse, beacon light, pharos -beaker -bearskin, busby, shako -beer bottle -beer glass -bell cote, bell cot -bib -bicycle-built-for-two, tandem bicycle, tandem -bikini, two-piece -binder, ring-binder -binoculars, field glasses, opera glasses -birdhouse -boathouse -bobsled, bobsleigh, bob -bolo tie, bolo, bola tie, bola -bonnet, poke bonnet -bookcase -bookshop, bookstore, bookstall -bottlecap -bow -bow tie, bow-tie, bowtie -brass, memorial tablet, plaque -brassiere, bra, bandeau -breakwater, groin, groyne, mole, bulwark, seawall, jetty -breastplate, aegis, egis -broom -bucket, pail -buckle -bulletproof vest -bullet train, bullet -butcher shop, meat market -cab, hack, taxi, taxicab -caldron, cauldron -candle, taper, wax light -cannon -canoe -can opener, tin opener -cardigan -car mirror -carousel, carrousel, merry-go-round, roundabout, whirligig -carpenter's kit, tool kit -carton -car wheel -cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM -cassette -cassette player -castle -catamaran -CD player -cello, violoncello -cellular telephone, cellular phone, cellphone, cell, mobile phone -chain -chainlink fence -chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour -chain saw, chainsaw -chest -chiffonier, commode -chime, bell, gong -china cabinet, china closet -Christmas stocking -church, church building -cinema, movie theater, movie theatre, movie house, picture palace -cleaver, meat cleaver, chopper -cliff dwelling -cloak -clog, geta, patten, sabot -cocktail shaker -coffee mug -coffeepot -coil, spiral, volute, whorl, helix -combination lock -computer keyboard, keypad -confectionery, confectionary, candy store -container ship, containership, container vessel -convertible -corkscrew, bottle screw -cornet, horn, trumpet, trump -cowboy boot -cowboy hat, ten-gallon hat -cradle -crane -crash helmet -crate -crib, cot -Crock Pot -croquet ball -crutch -cuirass -dam, dike, dyke -desk -desktop computer -dial telephone, dial phone -diaper, nappy, napkin -digital clock -digital watch -dining table, board -dishrag, dishcloth -dishwasher, dish washer, dishwashing machine -disk brake, disc brake -dock, dockage, docking facility -dogsled, dog sled, dog sleigh -dome -doormat, welcome mat -drilling platform, offshore rig -drum, membranophone, tympan -drumstick -dumbbell -Dutch oven -electric fan, blower -electric guitar -electric locomotive -entertainment center -envelope -espresso maker -face powder -feather boa, boa -file, file cabinet, filing cabinet -fireboat -fire engine, fire truck -fire screen, fireguard -flagpole, flagstaff -flute, transverse flute -folding chair -football helmet -forklift -fountain -fountain pen -four-poster -freight car -French horn, horn -frying pan, frypan, skillet -fur coat -garbage truck, dustcart -gasmask, respirator, gas helmet -gas pump, gasoline pump, petrol pump, island dispenser -goblet -go-kart -golf ball -golfcart, golf cart -gondola -gong, tam-tam -gown -grand piano, grand -greenhouse, nursery, glasshouse -grille, radiator grille -grocery store, grocery, food market, market -guillotine -hair slide -hair spray -half track -hammer -hamper -hand blower, blow dryer, blow drier, hair dryer, hair drier -hand-held computer, hand-held microcomputer -handkerchief, hankie, hanky, hankey -hard disc, hard disk, fixed disk -harmonica, mouth organ, harp, mouth harp -harp -harvester, reaper -hatchet -holster -home theater, home theatre -honeycomb -hook, claw -hoopskirt, crinoline -horizontal bar, high bar -horse cart, horse-cart -hourglass -iPod -iron, smoothing iron -jack-o'-lantern -jean, blue jean, denim -jeep, landrover -jersey, T-shirt, tee shirt -jigsaw puzzle -jinrikisha, ricksha, rickshaw -joystick -kimono -knee pad -knot -lab coat, laboratory coat -ladle -lampshade, lamp shade -laptop, laptop computer -lawn mower, mower -lens cap, lens cover -letter opener, paper knife, paperknife -library -lifeboat -lighter, light, igniter, ignitor -limousine, limo -liner, ocean liner -lipstick, lip rouge -Loafer -lotion -loudspeaker, speaker, speaker unit, loudspeaker system, speaker system -loupe, jeweler's loupe -lumbermill, sawmill -magnetic compass -mailbag, postbag -mailbox, letter box -maillot -maillot, tank suit -manhole cover -maraca -marimba, xylophone -mask -matchstick -maypole -maze, labyrinth -measuring cup -medicine chest, medicine cabinet -megalith, megalithic structure -microphone, mike -microwave, microwave oven -military uniform -milk can -minibus -miniskirt, mini -minivan -missile -mitten -mixing bowl -mobile home, manufactured home -Model T -modem -monastery -monitor -moped -mortar -mortarboard -mosque -mosquito net -motor scooter, scooter -mountain bike, all-terrain bike, off-roader -mountain tent -mouse, computer mouse -mousetrap -moving van -muzzle -nail -neck brace -necklace -nipple -notebook, notebook computer -obelisk -oboe, hautboy, hautbois -ocarina, sweet potato -odometer, hodometer, mileometer, milometer -oil filter -organ, pipe organ -oscilloscope, scope, cathode-ray oscilloscope, CRO -overskirt -oxcart -oxygen mask -packet -paddle, boat paddle -paddlewheel, paddle wheel -padlock -paintbrush -pajama, pyjama, pj's, jammies -palace -panpipe, pandean pipe, syrinx -paper towel -parachute, chute -parallel bars, bars -park bench -parking meter -passenger car, coach, carriage -patio, terrace -pay-phone, pay-station -pedestal, plinth, footstall -pencil box, pencil case -pencil sharpener -perfume, essence -Petri dish -photocopier -pick, plectrum, plectron -pickelhaube -picket fence, paling -pickup, pickup truck -pier -piggy bank, penny bank -pill bottle -pillow -ping-pong ball -pinwheel -pirate, pirate ship -pitcher, ewer -plane, carpenter's plane, woodworking plane -planetarium -plastic bag -plate rack -plow, plough -plunger, plumber's helper -Polaroid camera, Polaroid Land camera -pole -police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria -poncho -pool table, billiard table, snooker table -pop bottle, soda bottle -pot, flowerpot -potter's wheel -power drill -prayer rug, prayer mat -printer -prison, prison house -projectile, missile -projector -puck, hockey puck -punching bag, punch bag, punching ball, punchball -purse -quill, quill pen -quilt, comforter, comfort, puff -racer, race car, racing car -racket, racquet -radiator -radio, wireless -radio telescope, radio reflector -rain barrel -recreational vehicle, RV, R.V. -reel -reflex camera -refrigerator, icebox -remote control, remote -restaurant, eating house, eating place, eatery -revolver, six-gun, six-shooter -rifle -rocking chair, rocker -rotisserie -rubber eraser, rubber, pencil eraser -rugby ball -rule, ruler -running shoe -safe -safety pin -saltshaker, salt shaker -sandal -sarong -sax, saxophone -scabbard -scale, weighing machine -school bus -schooner -scoreboard -screen, CRT screen -screw -screwdriver -seat belt, seatbelt -sewing machine -shield, buckler -shoe shop, shoe-shop, shoe store -shoji -shopping basket -shopping cart -shovel -shower cap -shower curtain -ski -ski mask -sleeping bag -slide rule, slipstick -sliding door -slot, one-armed bandit -snorkel -snowmobile -snowplow, snowplough -soap dispenser -soccer ball -sock -solar dish, solar collector, solar furnace -sombrero -soup bowl -space bar -space heater -space shuttle -spatula -speedboat -spider web, spider's web -spindle -sports car, sport car -spotlight, spot -stage -steam locomotive -steel arch bridge -steel drum -stethoscope -stole -stone wall -stopwatch, stop watch -stove -strainer -streetcar, tram, tramcar, trolley, trolley car -stretcher -studio couch, day bed -stupa, tope -submarine, pigboat, sub, U-boat -suit, suit of clothes -sundial -sunglass -sunglasses, dark glasses, shades -sunscreen, sunblock, sun blocker -suspension bridge -swab, swob, mop -sweatshirt -swimming trunks, bathing trunks -swing -switch, electric switch, electrical switch -syringe -table lamp -tank, army tank, armored combat vehicle, armoured combat vehicle -tape player -teapot -teddy, teddy bear -television, television system -tennis ball -thatch, thatched roof -theater curtain, theatre curtain -thimble -thresher, thrasher, threshing machine -throne -tile roof -toaster -tobacco shop, tobacconist shop, tobacconist -toilet seat -torch -totem pole -tow truck, tow car, wrecker -toyshop -tractor -trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi -tray -trench coat -tricycle, trike, velocipede -trimaran -tripod -triumphal arch -trolleybus, trolley coach, trackless trolley -trombone -tub, vat -turnstile -typewriter keyboard -umbrella -unicycle, monocycle -upright, upright piano -vacuum, vacuum cleaner -vase -vault -velvet -vending machine -vestment -viaduct -violin, fiddle -volleyball -waffle iron -wall clock -wallet, billfold, notecase, pocketbook -wardrobe, closet, press -warplane, military plane -washbasin, handbasin, washbowl, lavabo, wash-hand basin -washer, automatic washer, washing machine -water bottle -water jug -water tower -whiskey jug -whistle -wig -window screen -window shade -Windsor tie -wine bottle -wing -wok -wooden spoon -wool, woolen, woollen -worm fence, snake fence, snake-rail fence, Virginia fence -wreck -yawl -yurt -web site, website, internet site, site -comic book -crossword puzzle, crossword -street sign -traffic light, traffic signal, stoplight -book jacket, dust cover, dust jacket, dust wrapper -menu -plate -guacamole -consomme -hot pot, hotpot -trifle -ice cream, icecream -ice lolly, lolly, lollipop, popsicle -French loaf -bagel, beigel -pretzel -cheeseburger -hotdog, hot dog, red hot -mashed potato -head cabbage -broccoli -cauliflower -zucchini, courgette -spaghetti squash -acorn squash -butternut squash -cucumber, cuke -artichoke, globe artichoke -bell pepper -cardoon -mushroom -Granny Smith -strawberry -orange -lemon -fig -pineapple, ananas -banana -jackfruit, jak, jack -custard apple -pomegranate -hay -carbonara -chocolate sauce, chocolate syrup -dough -meat loaf, meatloaf -pizza, pizza pie -potpie -burrito -red wine -espresso -cup -eggnog -alp -bubble -cliff, drop, drop-off -coral reef -geyser -lakeside, lakeshore -promontory, headland, head, foreland -sandbar, sand bar -seashore, coast, seacoast, sea-coast -valley, vale -volcano -ballplayer, baseball player -groom, bridegroom -scuba diver -rapeseed -daisy -yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum -corn -acorn -hip, rose hip, rosehip -buckeye, horse chestnut, conker -coral fungus -agaric -gyromitra -stinkhorn, carrion fungus -earthstar -hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa -bolete -ear, spike, capitulum -toilet tissue, toilet paper, bathroom tissue diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp deleted file mode 100644 index 137e934b96..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include "Resnet50Classify.h" -#include "MxBase/Log/Log.h" - -namespace fs = std::experimental::filesystem; -namespace { -const uint32_t CLASS_NUM = 1000; -} -std::vector g_inferCost; - -int main(int argc, char* argv[]) -{ - if (argc <= 1) { - LogWarn << "Please input image path, such as './val_union/'."; - return APP_ERR_OK; - } - - InitParam initParam = {}; - initParam.deviceId = 0; - initParam.classNum = CLASS_NUM; - initParam.labelPath = "../models/imagenet1000_clsidx_to_labels.names"; - initParam.topk = 5; - initParam.softmax = true; - initParam.checkTensor = true; - initParam.modelPath = "../models/resnet50_pytorch.om"; - auto resnet50 = std::make_shared(); - APP_ERROR ret = resnet50->Init(initParam); - if (ret != APP_ERR_OK) { - LogError << "Resnet50Classify init failed, ret=" << ret << "."; - return ret; - } - - std::string imgDir = argv[1]; - for (auto & entry : fs::directory_iterator(imgDir)) { - LogInfo << "read image path " << entry.path(); - ret = resnet50->Process(entry.path()); - if (ret != APP_ERR_OK) { - LogError << "Resnet50Classify process failed, ret=" << ret << "."; - resnet50->DeInit(); - return ret; - } - } - resnet50->DeInit(); - double costSum = 0; - for (unsigned int i = 0; i < g_inferCost.size(); i++) { - costSum += g_inferCost[i]; - } - LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; - LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " images/sec."; - return APP_ERR_OK; -} \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline deleted file mode 100644 index c024eb3ef3..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline +++ /dev/null @@ -1,75 +0,0 @@ -{ - "resnet50_classification": { - "stream_config": { - "deviceId": "0" - }, - "appsrc0": { - "props": { - "blocksize": "409600" - }, - "factory": "appsrc", - "next": "mxpi_imagedecoder0" - }, - "mxpi_imagedecoder0": { - "props": { - "handleMethod": "opencv" - }, - "factory": "mxpi_imagedecoder", - "next": "mxpi_imageresize0" - }, - "mxpi_imageresize0": { - "props": { - "handleMethod": "opencv", - "resizeHeight": "304", - "resizeWidth": "304", - "resizeType": "Resizer_Stretch" - }, - "factory": "mxpi_imageresize", - "next": "mxpi_opencvcentercrop0" - }, - "mxpi_opencvcentercrop0": { - "props": { - "dataSource": "mxpi_imageresize0", - "cropHeight": "256", - "cropWidth": "256" - }, - "factory": "mxpi_opencvcentercrop", - "next": "mxpi_tensorinfer0" - }, - - - "mxpi_tensorinfer0": { - "props": { - "dataSource": "mxpi_opencvcentercrop0", - "modelPath": "../models/resnet50_pytorch.om", - "waitingTime": "2000", - "outputDeviceId": "-1" - }, - "factory": "mxpi_tensorinfer", - "next": "mxpi_classpostprocessor0" - }, - "mxpi_classpostprocessor0": { - "props": { - "dataSource": "mxpi_tensorinfer0", - "postProcessConfigPath": "./resnet50_aipp_pt.cfg", - "labelPath": "./imagenet1000_clsidx_to_labels.names", - "postProcessLibPath": "libresnet50postprocess.so" - }, - "factory": "mxpi_classpostprocessor", - "next": "mxpi_dataserialize0" - }, - "mxpi_dataserialize0": { - "props": { - "outputDataKeys": "mxpi_classpostprocessor0" - }, - "factory": "mxpi_dataserialize", - "next": "appsink0" - }, - "appsink0": { - "props": { - "blocksize": "4096000" - }, - "factory": "appsink" - } - } -} \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py deleted file mode 100644 index 2d383ae6cf..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py +++ /dev/null @@ -1,175 +0,0 @@ -#coding = utf-8 -#Copyright 2020 Huawei Technologies Co., Ltd -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys -import json -import numpy as np -import time - -np.set_printoptions(threshold=sys.maxsize) - -LABEL_FILE = "HiAI_label.json" - - -def gen_file_name(img_name): - """ - :param: file path - :return: filename - """ - full_name = img_name.split('/')[-1] - index = full_name.rfind('.') - return full_name[:index] - - -def cre_groundtruth_dict(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - for gtfile in os.listdir(gtfile_path): - if (gtfile != LABEL_FILE): - with open(os.path.join(gtfile_path, gtfile), 'r') as f: - gt = json.load(f) - ret = gt["image"]["annotations"][0]["category_id"] - img_gt_dict[gen_file_name(gtfile)] = ret - return img_gt_dict - - -def cre_groundtruth_dict_fromtxt(gtfile_path): - """ - :param filename: file contains the imagename and label number - :return: dictionary key imagename, value is label number - """ - img_gt_dict = {} - with open(gtfile_path, 'r')as f: - for line in f.readlines(): - temp = line.strip().split(" ") - img_name = temp[0].split(".")[0] - img_lab = temp[1] - img_gt_dict[img_name] = img_lab - return img_gt_dict - - -def load_statistical_predict_result(filepath): - """ - :param filepath: the result of model predict - :return probabilities, number of label, in_type, color: - """ - with open(filepath, 'r')as f: - data = f.readline() - temp = data.strip().split(" ") - n_label = len(temp) - data_vec = np.zeros((n_label), dtype=np.float32) - in_type = '' - color = '' - if n_label == 0: - in_type = f.readline() - color = f.readline() - else: - for ind, cls_ind in enumerate(temp): - if cls_ind: - data_vec[ind] = np.int(cls_ind) - return data_vec, n_label, in_type, color - - -def create_visualization_statistical_result(prediction_file_path, - result_store_path, json_file_name, - img_gt_dict, n_labels, topn=5): - """ - :param prediction_file_path: the result of model predict - :param result_store_path: the root path to store result - :param json_file: json file to save result - :param img_gt_dict: the ground truth of imagenet - :param topn: classify model acc topk - :param n_labels: class numbers - :return: - """ - writer = open(os.path.join(result_store_path, json_file_name), 'w') - table_dict = {} - table_dict["title"] = "Overall statistical evaluation" - table_dict["value"] = [] - - count = 0 - res_cnt = 0 - count_hit = np.zeros(topn) - for tfile_name in os.listdir(prediction_file_path): - count += 1 - temp = tfile_name.split('.')[0] - index = temp.rfind('_') - img_name = temp[:index] - filepath = os.path.join(prediction_file_path, tfile_name) - - ret = load_statistical_predict_result(filepath) - prediction = ret[0] - gt = img_gt_dict[img_name] - real_label = int(gt) - res_cnt = min(len(prediction), topn) - for i in range(res_cnt): - if str(real_label) == str(int(prediction[i])): - count_hit[i] += 1 - break - if 'value' not in table_dict.keys(): - print("the item value does not exist!") - else: - table_dict["value"].extend( - [{"key": "Number of images", "value": str(count)}, - {"key": "Number of classes", "value": str(n_labels)}]) - if count == 0: - accuracy = 0 - else: - accuracy = np.cumsum(count_hit) / count - - for i in range(res_cnt): - table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy", - "value": str( - round(accuracy[i] * 100, 2)) + '%'}) - print(table_dict) - json.dump(table_dict, writer) - writer.close() - - -if __name__ == '__main__': - start = time.time() - try: - # txt file path - folder_davinci_target = sys.argv[1] - # annotation files path, "val_label.txt" - annotation_file_path = sys.argv[2] - # the path to store the results json path - result_json_path = sys.argv[3] - # result json file name - json_file_name = sys.argv[4] - except IndexError: - print("Please enter right number of argmuments, expected 4!") - exit(1) - # class number - n_labels = 1000 - if not os.path.exists(folder_davinci_target): - print("target file folder does not exist.") - - if not os.path.exists(annotation_file_path): - print("Ground truth file does not exist.") - - if not os.path.exists(result_json_path): - print("Result folder doesn't exist.") - - img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path) - create_visualization_statistical_result(folder_davinci_target, - result_json_path, json_file_name, - img_label_dict, n_labels, topn=5) - - elapsed = time.time() - start diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names deleted file mode 100644 index 6003783a5d..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names +++ /dev/null @@ -1,1001 +0,0 @@ - # This is modified from https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a -tench, Tinca tinca -goldfish, Carassius auratus -great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias -tiger shark, Galeocerdo cuvieri -hammerhead, hammerhead shark -electric ray, crampfish, numbfish, torpedo -stingray -cock -hen -ostrich, Struthio camelus -brambling, Fringilla montifringilla -goldfinch, Carduelis carduelis -house finch, linnet, Carpodacus mexicanus -junco, snowbird -indigo bunting, indigo finch, indigo bird, Passerina cyanea -robin, American robin, Turdus migratorius -bulbul -jay -magpie -chickadee -water ouzel, dipper -kite -bald eagle, American eagle, Haliaeetus leucocephalus -vulture -great grey owl, great gray owl, Strix nebulosa -European fire salamander, Salamandra salamandra -common newt, Triturus vulgaris -eft -spotted salamander, Ambystoma maculatum -axolotl, mud puppy, Ambystoma mexicanum -bullfrog, Rana catesbeiana -tree frog, tree-frog -tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui -loggerhead, loggerhead turtle, Caretta caretta -leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea -mud turtle -terrapin -box turtle, box tortoise -banded gecko -common iguana, iguana, Iguana iguana -American chameleon, anole, Anolis carolinensis -whiptail, whiptail lizard -agama -frilled lizard, Chlamydosaurus kingi -alligator lizard -Gila monster, Heloderma suspectum -green lizard, Lacerta viridis -African chameleon, Chamaeleo chamaeleon -Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis -African crocodile, Nile crocodile, Crocodylus niloticus -American alligator, Alligator mississipiensis -triceratops -thunder snake, worm snake, Carphophis amoenus -ringneck snake, ring-necked snake, ring snake -hognose snake, puff adder, sand viper -green snake, grass snake -king snake, kingsnake -garter snake, grass snake -water snake -vine snake -night snake, Hypsiglena torquata -boa constrictor, Constrictor constrictor -rock python, rock snake, Python sebae -Indian cobra, Naja naja -green mamba -sea snake -horned viper, cerastes, sand viper, horned asp, Cerastes cornutus -diamondback, diamondback rattlesnake, Crotalus adamanteus -sidewinder, horned rattlesnake, Crotalus cerastes -trilobite -harvestman, daddy longlegs, Phalangium opilio -scorpion -black and gold garden spider, Argiope aurantia -barn spider, Araneus cavaticus -garden spider, Aranea diademata -black widow, Latrodectus mactans -tarantula -wolf spider, hunting spider -tick -centipede -black grouse -ptarmigan -ruffed grouse, partridge, Bonasa umbellus -prairie chicken, prairie grouse, prairie fowl -peacock -quail -partridge -African grey, African gray, Psittacus erithacus -macaw -sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita -lorikeet -coucal -bee eater -hornbill -hummingbird -jacamar -toucan -drake -red-breasted merganser, Mergus serrator -goose -black swan, Cygnus atratus -tusker -echidna, spiny anteater, anteater -platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus -wallaby, brush kangaroo -koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus -wombat -jellyfish -sea anemone, anemone -brain coral -flatworm, platyhelminth -nematode, nematode worm, roundworm -conch -snail -slug -sea slug, nudibranch -chiton, coat-of-mail shell, sea cradle, polyplacophore -chambered nautilus, pearly nautilus, nautilus -Dungeness crab, Cancer magister -rock crab, Cancer irroratus -fiddler crab -king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica -American lobster, Northern lobster, Maine lobster, Homarus americanus -spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish -crayfish, crawfish, crawdad, crawdaddy -hermit crab -isopod -white stork, Ciconia ciconia -black stork, Ciconia nigra -spoonbill -flamingo -little blue heron, Egretta caerulea -American egret, great white heron, Egretta albus -bittern -crane -limpkin, Aramus pictus -European gallinule, Porphyrio porphyrio -American coot, marsh hen, mud hen, water hen, Fulica americana -bustard -ruddy turnstone, Arenaria interpres -red-backed sandpiper, dunlin, Erolia alpina -redshank, Tringa totanus -dowitcher -oystercatcher, oyster catcher -pelican -king penguin, Aptenodytes patagonica -albatross, mollymawk -grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus -killer whale, killer, orca, grampus, sea wolf, Orcinus orca -dugong, Dugong dugon -sea lion -Chihuahua -Japanese spaniel -Maltese dog, Maltese terrier, Maltese -Pekinese, Pekingese, Peke -Shih-Tzu -Blenheim spaniel -papillon -toy terrier -Rhodesian ridgeback -Afghan hound, Afghan -basset, basset hound -beagle -bloodhound, sleuthhound -bluetick -black-and-tan coonhound -Walker hound, Walker foxhound -English foxhound -redbone -borzoi, Russian wolfhound -Irish wolfhound -Italian greyhound -whippet -Ibizan hound, Ibizan Podenco -Norwegian elkhound, elkhound -otterhound, otter hound -Saluki, gazelle hound -Scottish deerhound, deerhound -Weimaraner -Staffordshire bullterrier, Staffordshire bull terrier -American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier -Bedlington terrier -Border terrier -Kerry blue terrier -Irish terrier -Norfolk terrier -Norwich terrier -Yorkshire terrier -wire-haired fox terrier -Lakeland terrier -Sealyham terrier, Sealyham -Airedale, Airedale terrier -cairn, cairn terrier -Australian terrier -Dandie Dinmont, Dandie Dinmont terrier -Boston bull, Boston terrier -miniature schnauzer -giant schnauzer -standard schnauzer -Scotch terrier, Scottish terrier, Scottie -Tibetan terrier, chrysanthemum dog -silky terrier, Sydney silky -soft-coated wheaten terrier -West Highland white terrier -Lhasa, Lhasa apso -flat-coated retriever -curly-coated retriever -golden retriever -Labrador retriever -Chesapeake Bay retriever -German short-haired pointer -vizsla, Hungarian pointer -English setter -Irish setter, red setter -Gordon setter -Brittany spaniel -clumber, clumber spaniel -English springer, English springer spaniel -Welsh springer spaniel -cocker spaniel, English cocker spaniel, cocker -Sussex spaniel -Irish water spaniel -kuvasz -schipperke -groenendael -malinois -briard -kelpie -komondor -Old English sheepdog, bobtail -Shetland sheepdog, Shetland sheep dog, Shetland -collie -Border collie -Bouvier des Flandres, Bouviers des Flandres -Rottweiler -German shepherd, German shepherd dog, German police dog, alsatian -Doberman, Doberman pinscher -miniature pinscher -Greater Swiss Mountain dog -Bernese mountain dog -Appenzeller -EntleBucher -boxer -bull mastiff -Tibetan mastiff -French bulldog -Great Dane -Saint Bernard, St Bernard -Eskimo dog, husky -malamute, malemute, Alaskan malamute -Siberian husky -dalmatian, coach dog, carriage dog -affenpinscher, monkey pinscher, monkey dog -basenji -pug, pug-dog -Leonberg -Newfoundland, Newfoundland dog -Great Pyrenees -Samoyed, Samoyede -Pomeranian -chow, chow chow -keeshond -Brabancon griffon -Pembroke, Pembroke Welsh corgi -Cardigan, Cardigan Welsh corgi -toy poodle -miniature poodle -standard poodle -Mexican hairless -timber wolf, grey wolf, gray wolf, Canis lupus -white wolf, Arctic wolf, Canis lupus tundrarum -red wolf, maned wolf, Canis rufus, Canis niger -coyote, prairie wolf, brush wolf, Canis latrans -dingo, warrigal, warragal, Canis dingo -dhole, Cuon alpinus -African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus -hyena, hyaena -red fox, Vulpes vulpes -kit fox, Vulpes macrotis -Arctic fox, white fox, Alopex lagopus -grey fox, gray fox, Urocyon cinereoargenteus -tabby, tabby cat -tiger cat -Persian cat -Siamese cat, Siamese -Egyptian cat -cougar, puma, catamount, mountain lion, painter, panther, Felis concolor -lynx, catamount -leopard, Panthera pardus -snow leopard, ounce, Panthera uncia -jaguar, panther, Panthera onca, Felis onca -lion, king of beasts, Panthera leo -tiger, Panthera tigris -cheetah, chetah, Acinonyx jubatus -brown bear, bruin, Ursus arctos -American black bear, black bear, Ursus americanus, Euarctos americanus -ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus -sloth bear, Melursus ursinus, Ursus ursinus -mongoose -meerkat, mierkat -tiger beetle -ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle -ground beetle, carabid beetle -long-horned beetle, longicorn, longicorn beetle -leaf beetle, chrysomelid -dung beetle -rhinoceros beetle -weevil -fly -bee -ant, emmet, pismire -grasshopper, hopper -cricket -walking stick, walkingstick, stick insect -cockroach, roach -mantis, mantid -cicada, cicala -leafhopper -lacewing, lacewing fly -dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk -damselfly -admiral -ringlet, ringlet butterfly -monarch, monarch butterfly, milkweed butterfly, Danaus plexippus -cabbage butterfly -sulphur butterfly, sulfur butterfly -lycaenid, lycaenid butterfly -starfish, sea star -sea urchin -sea cucumber, holothurian -wood rabbit, cottontail, cottontail rabbit -hare -Angora, Angora rabbit -hamster -porcupine, hedgehog -fox squirrel, eastern fox squirrel, Sciurus niger -marmot -beaver -guinea pig, Cavia cobaya -sorrel -zebra -hog, pig, grunter, squealer, Sus scrofa -wild boar, boar, Sus scrofa -warthog -hippopotamus, hippo, river horse, Hippopotamus amphibius -ox -water buffalo, water ox, Asiatic buffalo, Bubalus bubalis -bison -ram, tup -bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis -ibex, Capra ibex -hartebeest -impala, Aepyceros melampus -gazelle -Arabian camel, dromedary, Camelus dromedarius -llama -weasel -mink -polecat, fitch, foulmart, foumart, Mustela putorius -black-footed ferret, ferret, Mustela nigripes -otter -skunk, polecat, wood pussy -badger -armadillo -three-toed sloth, ai, Bradypus tridactylus -orangutan, orang, orangutang, Pongo pygmaeus -gorilla, Gorilla gorilla -chimpanzee, chimp, Pan troglodytes -gibbon, Hylobates lar -siamang, Hylobates syndactylus, Symphalangus syndactylus -guenon, guenon monkey -patas, hussar monkey, Erythrocebus patas -baboon -macaque -langur -colobus, colobus monkey -proboscis monkey, Nasalis larvatus -marmoset -capuchin, ringtail, Cebus capucinus -howler monkey, howler -titi, titi monkey -spider monkey, Ateles geoffroyi -squirrel monkey, Saimiri sciureus -Madagascar cat, ring-tailed lemur, Lemur catta -indri, indris, Indri indri, Indri brevicaudatus -Indian elephant, Elephas maximus -African elephant, Loxodonta africana -lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens -giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca -barracouta, snoek -eel -coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch -rock beauty, Holocanthus tricolor -anemone fish -sturgeon -gar, garfish, garpike, billfish, Lepisosteus osseus -lionfish -puffer, pufferfish, blowfish, globefish -abacus -abaya -academic gown, academic robe, judge's robe -accordion, piano accordion, squeeze box -acoustic guitar -aircraft carrier, carrier, flattop, attack aircraft carrier -airliner -airship, dirigible -altar -ambulance -amphibian, amphibious vehicle -analog clock -apiary, bee house -apron -ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin -assault rifle, assault gun -backpack, back pack, knapsack, packsack, rucksack, haversack -bakery, bakeshop, bakehouse -balance beam, beam -balloon -ballpoint, ballpoint pen, ballpen, Biro -Band Aid -banjo -bannister, banister, balustrade, balusters, handrail -barbell -barber chair -barbershop -barn -barometer -barrel, cask -barrow, garden cart, lawn cart, wheelbarrow -baseball -basketball -bassinet -bassoon -bathing cap, swimming cap -bath towel -bathtub, bathing tub, bath, tub -beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon -beacon, lighthouse, beacon light, pharos -beaker -bearskin, busby, shako -beer bottle -beer glass -bell cote, bell cot -bib -bicycle-built-for-two, tandem bicycle, tandem -bikini, two-piece -binder, ring-binder -binoculars, field glasses, opera glasses -birdhouse -boathouse -bobsled, bobsleigh, bob -bolo tie, bolo, bola tie, bola -bonnet, poke bonnet -bookcase -bookshop, bookstore, bookstall -bottlecap -bow -bow tie, bow-tie, bowtie -brass, memorial tablet, plaque -brassiere, bra, bandeau -breakwater, groin, groyne, mole, bulwark, seawall, jetty -breastplate, aegis, egis -broom -bucket, pail -buckle -bulletproof vest -bullet train, bullet -butcher shop, meat market -cab, hack, taxi, taxicab -caldron, cauldron -candle, taper, wax light -cannon -canoe -can opener, tin opener -cardigan -car mirror -carousel, carrousel, merry-go-round, roundabout, whirligig -carpenter's kit, tool kit -carton -car wheel -cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM -cassette -cassette player -castle -catamaran -CD player -cello, violoncello -cellular telephone, cellular phone, cellphone, cell, mobile phone -chain -chainlink fence -chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour -chain saw, chainsaw -chest -chiffonier, commode -chime, bell, gong -china cabinet, china closet -Christmas stocking -church, church building -cinema, movie theater, movie theatre, movie house, picture palace -cleaver, meat cleaver, chopper -cliff dwelling -cloak -clog, geta, patten, sabot -cocktail shaker -coffee mug -coffeepot -coil, spiral, volute, whorl, helix -combination lock -computer keyboard, keypad -confectionery, confectionary, candy store -container ship, containership, container vessel -convertible -corkscrew, bottle screw -cornet, horn, trumpet, trump -cowboy boot -cowboy hat, ten-gallon hat -cradle -crane -crash helmet -crate -crib, cot -Crock Pot -croquet ball -crutch -cuirass -dam, dike, dyke -desk -desktop computer -dial telephone, dial phone -diaper, nappy, napkin -digital clock -digital watch -dining table, board -dishrag, dishcloth -dishwasher, dish washer, dishwashing machine -disk brake, disc brake -dock, dockage, docking facility -dogsled, dog sled, dog sleigh -dome -doormat, welcome mat -drilling platform, offshore rig -drum, membranophone, tympan -drumstick -dumbbell -Dutch oven -electric fan, blower -electric guitar -electric locomotive -entertainment center -envelope -espresso maker -face powder -feather boa, boa -file, file cabinet, filing cabinet -fireboat -fire engine, fire truck -fire screen, fireguard -flagpole, flagstaff -flute, transverse flute -folding chair -football helmet -forklift -fountain -fountain pen -four-poster -freight car -French horn, horn -frying pan, frypan, skillet -fur coat -garbage truck, dustcart -gasmask, respirator, gas helmet -gas pump, gasoline pump, petrol pump, island dispenser -goblet -go-kart -golf ball -golfcart, golf cart -gondola -gong, tam-tam -gown -grand piano, grand -greenhouse, nursery, glasshouse -grille, radiator grille -grocery store, grocery, food market, market -guillotine -hair slide -hair spray -half track -hammer -hamper -hand blower, blow dryer, blow drier, hair dryer, hair drier -hand-held computer, hand-held microcomputer -handkerchief, hankie, hanky, hankey -hard disc, hard disk, fixed disk -harmonica, mouth organ, harp, mouth harp -harp -harvester, reaper -hatchet -holster -home theater, home theatre -honeycomb -hook, claw -hoopskirt, crinoline -horizontal bar, high bar -horse cart, horse-cart -hourglass -iPod -iron, smoothing iron -jack-o'-lantern -jean, blue jean, denim -jeep, landrover -jersey, T-shirt, tee shirt -jigsaw puzzle -jinrikisha, ricksha, rickshaw -joystick -kimono -knee pad -knot -lab coat, laboratory coat -ladle -lampshade, lamp shade -laptop, laptop computer -lawn mower, mower -lens cap, lens cover -letter opener, paper knife, paperknife -library -lifeboat -lighter, light, igniter, ignitor -limousine, limo -liner, ocean liner -lipstick, lip rouge -Loafer -lotion -loudspeaker, speaker, speaker unit, loudspeaker system, speaker system -loupe, jeweler's loupe -lumbermill, sawmill -magnetic compass -mailbag, postbag -mailbox, letter box -maillot -maillot, tank suit -manhole cover -maraca -marimba, xylophone -mask -matchstick -maypole -maze, labyrinth -measuring cup -medicine chest, medicine cabinet -megalith, megalithic structure -microphone, mike -microwave, microwave oven -military uniform -milk can -minibus -miniskirt, mini -minivan -missile -mitten -mixing bowl -mobile home, manufactured home -Model T -modem -monastery -monitor -moped -mortar -mortarboard -mosque -mosquito net -motor scooter, scooter -mountain bike, all-terrain bike, off-roader -mountain tent -mouse, computer mouse -mousetrap -moving van -muzzle -nail -neck brace -necklace -nipple -notebook, notebook computer -obelisk -oboe, hautboy, hautbois -ocarina, sweet potato -odometer, hodometer, mileometer, milometer -oil filter -organ, pipe organ -oscilloscope, scope, cathode-ray oscilloscope, CRO -overskirt -oxcart -oxygen mask -packet -paddle, boat paddle -paddlewheel, paddle wheel -padlock -paintbrush -pajama, pyjama, pj's, jammies -palace -panpipe, pandean pipe, syrinx -paper towel -parachute, chute -parallel bars, bars -park bench -parking meter -passenger car, coach, carriage -patio, terrace -pay-phone, pay-station -pedestal, plinth, footstall -pencil box, pencil case -pencil sharpener -perfume, essence -Petri dish -photocopier -pick, plectrum, plectron -pickelhaube -picket fence, paling -pickup, pickup truck -pier -piggy bank, penny bank -pill bottle -pillow -ping-pong ball -pinwheel -pirate, pirate ship -pitcher, ewer -plane, carpenter's plane, woodworking plane -planetarium -plastic bag -plate rack -plow, plough -plunger, plumber's helper -Polaroid camera, Polaroid Land camera -pole -police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria -poncho -pool table, billiard table, snooker table -pop bottle, soda bottle -pot, flowerpot -potter's wheel -power drill -prayer rug, prayer mat -printer -prison, prison house -projectile, missile -projector -puck, hockey puck -punching bag, punch bag, punching ball, punchball -purse -quill, quill pen -quilt, comforter, comfort, puff -racer, race car, racing car -racket, racquet -radiator -radio, wireless -radio telescope, radio reflector -rain barrel -recreational vehicle, RV, R.V. -reel -reflex camera -refrigerator, icebox -remote control, remote -restaurant, eating house, eating place, eatery -revolver, six-gun, six-shooter -rifle -rocking chair, rocker -rotisserie -rubber eraser, rubber, pencil eraser -rugby ball -rule, ruler -running shoe -safe -safety pin -saltshaker, salt shaker -sandal -sarong -sax, saxophone -scabbard -scale, weighing machine -school bus -schooner -scoreboard -screen, CRT screen -screw -screwdriver -seat belt, seatbelt -sewing machine -shield, buckler -shoe shop, shoe-shop, shoe store -shoji -shopping basket -shopping cart -shovel -shower cap -shower curtain -ski -ski mask -sleeping bag -slide rule, slipstick -sliding door -slot, one-armed bandit -snorkel -snowmobile -snowplow, snowplough -soap dispenser -soccer ball -sock -solar dish, solar collector, solar furnace -sombrero -soup bowl -space bar -space heater -space shuttle -spatula -speedboat -spider web, spider's web -spindle -sports car, sport car -spotlight, spot -stage -steam locomotive -steel arch bridge -steel drum -stethoscope -stole -stone wall -stopwatch, stop watch -stove -strainer -streetcar, tram, tramcar, trolley, trolley car -stretcher -studio couch, day bed -stupa, tope -submarine, pigboat, sub, U-boat -suit, suit of clothes -sundial -sunglass -sunglasses, dark glasses, shades -sunscreen, sunblock, sun blocker -suspension bridge -swab, swob, mop -sweatshirt -swimming trunks, bathing trunks -swing -switch, electric switch, electrical switch -syringe -table lamp -tank, army tank, armored combat vehicle, armoured combat vehicle -tape player -teapot -teddy, teddy bear -television, television system -tennis ball -thatch, thatched roof -theater curtain, theatre curtain -thimble -thresher, thrasher, threshing machine -throne -tile roof -toaster -tobacco shop, tobacconist shop, tobacconist -toilet seat -torch -totem pole -tow truck, tow car, wrecker -toyshop -tractor -trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi -tray -trench coat -tricycle, trike, velocipede -trimaran -tripod -triumphal arch -trolleybus, trolley coach, trackless trolley -trombone -tub, vat -turnstile -typewriter keyboard -umbrella -unicycle, monocycle -upright, upright piano -vacuum, vacuum cleaner -vase -vault -velvet -vending machine -vestment -viaduct -violin, fiddle -volleyball -waffle iron -wall clock -wallet, billfold, notecase, pocketbook -wardrobe, closet, press -warplane, military plane -washbasin, handbasin, washbowl, lavabo, wash-hand basin -washer, automatic washer, washing machine -water bottle -water jug -water tower -whiskey jug -whistle -wig -window screen -window shade -Windsor tie -wine bottle -wing -wok -wooden spoon -wool, woolen, woollen -worm fence, snake fence, snake-rail fence, Virginia fence -wreck -yawl -yurt -web site, website, internet site, site -comic book -crossword puzzle, crossword -street sign -traffic light, traffic signal, stoplight -book jacket, dust cover, dust jacket, dust wrapper -menu -plate -guacamole -consomme -hot pot, hotpot -trifle -ice cream, icecream -ice lolly, lolly, lollipop, popsicle -French loaf -bagel, beigel -pretzel -cheeseburger -hotdog, hot dog, red hot -mashed potato -head cabbage -broccoli -cauliflower -zucchini, courgette -spaghetti squash -acorn squash -butternut squash -cucumber, cuke -artichoke, globe artichoke -bell pepper -cardoon -mushroom -Granny Smith -strawberry -orange -lemon -fig -pineapple, ananas -banana -jackfruit, jak, jack -custard apple -pomegranate -hay -carbonara -chocolate sauce, chocolate syrup -dough -meat loaf, meatloaf -pizza, pizza pie -potpie -burrito -red wine -espresso -cup -eggnog -alp -bubble -cliff, drop, drop-off -coral reef -geyser -lakeside, lakeshore -promontory, headland, head, foreland -sandbar, sand bar -seashore, coast, seacoast, sea-coast -valley, vale -volcano -ballplayer, baseball player -groom, bridegroom -scuba diver -rapeseed -daisy -yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum -corn -acorn -hip, rose hip, rosehip -buckeye, horse chestnut, conker -coral fungus -agaric -gyromitra -stinkhorn, carrion fungus -earthstar -hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa -bolete -ear, spike, capitulum -toilet tissue, toilet paper, bathroom tissue diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py deleted file mode 100644 index a6c0585eb7..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 - -""" -Copyright 2020 Huawei Technologies Co., Ltd - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -# import StreamManagerApi.py -from StreamManagerApi import StreamManagerApi -from StreamManagerApi import MxDataInput -import os -import json -import numpy as np -import datetime -import sys - - -def save_infer_result(result, result_name, image_name): - """ - save the infer result to name_1.txt - the file content top5: - class_id1, class_id2, class_id3, class_id4, class_id5 - """ - load_dict = json.loads(result) - if load_dict.get('MxpiClass') is None: - with open(result_name + "/" + image_name[:-5] + '.txt', 'w') as f_write: - f_write.write("") - else: - res_vec = load_dict['MxpiClass'] - with open(result_name + "/" + image_name[:-5] + '_1.txt', 'w') as f_write: - list1 = [str(item.get("classId")) + " " for item in res_vec] - f_write.writelines(list1) - f_write.write('\n') - -def main(): - # init stream manager - stream_manager_api = StreamManagerApi() - ret = stream_manager_api.InitManager() - if ret != 0: - print("Failed to init Stream manager, ret=%s" % str(ret)) - exit() - - # create streams by pipeline config file - with open("../pipeline/Resnet50.pipeline", 'rb') as f: - pipeline_str = f.read() - ret = stream_manager_api.CreateMultipleStreams(pipeline_str) - - - if ret != 0: - print("Failed to create Stream, ret=%s" % str(ret)) - exit() - - # Construct the input of the stream - data_input = MxDataInput() - - dir_name = sys.argv[1] - res_dir_name = sys.argv[2] - file_list = os.listdir(dir_name) - if not os.path.exists(res_dir_name): - os.makedirs(res_dir_name) - - for file_name in file_list: - file_path = os.path.join(dir_name, file_name) - if file_name.lower().endswith(".jpg") or file_name.lower().endswith(".jpeg"): - portion = os.path.splitext(file_name) - with open(file_path, 'rb') as f: - data_input.data = f.read() - else: - continue - - empty_data = [] - - stream_name = b'resnet50_classification' - in_plugin_id = 0 - unique_id = stream_manager_api.SendDataWithUniqueId(stream_name, in_plugin_id, data_input) - if unique_id < 0: - print("Failed to send data to stream.") - exit() - # Obtain the inference result by specifying stream_name and unique_id. - start_time = datetime.datetime.now() - infer_result = stream_manager_api.GetResultWithUniqueId(stream_name, unique_id, 3000) - endtime = datetime.datetime.now() - print('sdk run time: {}'.format((endtime - start_time).microseconds)) - if infer_result.errorCode != 0: - print("GetResultWithUniqueId error. errorCode=%d, errorMsg=%s" % ( - infer_result.errorCode, infer_result.data.decode())) - exit() - # print the infer result - infer_res = infer_result.data.decode() - print("process img: {}, infer result: {}".format(file_name, infer_res)) - - save_infer_result(infer_result.data.decode(), res_dir_name, file_name) - - # destroy streams - stream_manager_api.DestroyAllStreams() - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg deleted file mode 100644 index b8552cf634..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg +++ /dev/null @@ -1,3 +0,0 @@ -CLASS_NUM=1000 -SOFTMAX=True -TOP_K=5 \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh deleted file mode 100644 index 3899f83598..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -image_path=$1 -result_dir=$2 - -set -e - -CUR_PATH=$(cd "$(dirname "$0")" || { warn "Failed to check path/to/run.sh" ; exit ; } ; pwd) - -# Simple log helper functions -info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; } -warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; } - -#export MX_SDK_HOME=${CUR_PATH}/../../.. -export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH} -export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner -export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins - -#to set PYTHONPATH, import the StreamManagerApi.py -export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python - -python3 main.py $image_path $result_dir -exit 0 \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py deleted file mode 100644 index 89e41a2e40..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py +++ /dev/null @@ -1,688 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import glob -import random -import shutil -import sys -import time -import warnings -import math -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models -import torch.npu - -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../')) -from pthtar2onx import convert -import DistributedResnet50.image_classification.resnet as nvmodels -from apex import amp -import moxing as mox - -BATCH_SIZE = 512 -EPOCHS_SIZE = 100 -TRAIN_STEP = 8000 -LOG_STEP = 1 - -CALCULATE_DEVICE = "npu:7" -PRINT_DEVICE = "cpu" -SOURCE_DIR = "/data/imagenet" - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('--data_url', - metavar='DIR', - default='/cache/data_url', - help='path to dataset') -parser.add_argument('-a', '--arch', - metavar='ARCH', - default='resnet50', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('-j', '--workers', - default=32, - type=int, - metavar='N', - help='number of data loading workers (default: 8)') -parser.add_argument('--epochs', - default=1, - type=int, - metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', - default=0, - type=int, - metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', - default=BATCH_SIZE, - type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', - default=0.2, - type=float, - metavar='LR', - help='initial learning rate', - dest='lr') -parser.add_argument('--momentum', - default=0.9, - type=float, - metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', - default=1e-4, - type=float, - metavar='W', - help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', - default=10, - type=int, - metavar='N', - help='print frequency (default: 10)') -parser.add_argument('--resume', - default='', - type=str, - metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', - dest='evaluate', - action='store_true', - help='evaluate model on validation set') -parser.add_argument('--pretrained', - dest='pretrained', - action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', - default=-1, - type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', - default=-1, - type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', - default=None, - type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', - default='nccl', - type=str, - help='distributed backend') -parser.add_argument('--seed', - default=None, - type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', - default=None, - type=int, - help='GPU id to use.') -parser.add_argument('--npu', - default=None, - type=int, - help='NPU id to use.') -parser.add_argument('--multiprocessing-distributed', - action='store_true') -parser.add_argument('--warmup', - default=5, - type=int, - metavar='E', - help='number of warmup epochs') -parser.add_argument('--label-smoothing', - default=0.1, - type=float, - metavar='S', - help='label smoothing') -parser.add_argument('--optimizer-batch-size', - default=-1, - type=int, - metavar='N', - help= - 'size of a total batch size, for simulating bigger batches using gradient accumulation') -parser.add_argument('--static-loss-scale', - type=float, - default=1, - help= - 'Static loss scale, positive power of 2 values can improve fp16 convergence.') - -parser.add_argument('-t', '--fine-tuning', default=False, action='store_true', - help='transfer learning + fine tuning - train only the last FC layer') -parser.add_argument('--train_url', - default="/cache/training", - type=str, - help="setting dir of training output") -parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH', - help='path to pretrained weight') -parser.add_argument('--onnx', default=True, action='store_true', - help="convert pth model to onnx") - -CACHE_TRAINING_URL = "/cache/training" -best_acc1 = 0 - -def main(): - args = parser.parse_args() - if args.npu is None: - args.npu = 0 - global CALCULATE_DEVICE - CALCULATE_DEVICE = "npu:{}".format(args.npu) - torch.npu.set_device(CALCULATE_DEVICE) - print("use ", CALCULATE_DEVICE) - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - ngpus_per_node = torch.cuda.device_count() - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = gpu - - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = nvmodels.build_resnet('resnet50', 'classic', True) - CACHE_MODEL_URL = "/cache/model" - os.makedirs(CACHE_MODEL_URL, exist_ok=True) - mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")) - pretrained_weight = os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar") - pretrained_dict = torch.load(pretrained_weight)["state_dict"] - pretrained_dict.pop('module.fc.weight') - pretrained_dict.pop('module.fc.bias') - model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model '{}'".format(args.arch)) - model = models.__dict__[args.arch](zero_init_residual=True) - - if args.fine_tuning: - print("=> transfer-learning mode + fine-tuning (train only the last FC layer)") - # Freeze Previous Layers(now we are using them as features extractor) - # Fine Tuning the last layer for the new task - if args.arch == "resnet50": - model.classifier = nn.Linear(1024, 10) - model.classifier.parameters() - else: - print("Error: Fine-tuning is not supported on this architecture") - exit(-1) - else: - model.parameters() - - for layer in model.modules(): - if isinstance(layer, nn.Linear): - torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), ) - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / ngpus_per_node) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - else: - # DataParallel will divide and allocate batch_size to all available GPUs - if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): - model.features = torch.nn.DataParallel(model.features) - model.cuda() - else: - model = model.to(CALCULATE_DEVICE) - - lr_policy = lr_cosine_policy(args.lr, - args.warmup, - args.epochs) - - - # define loss function (criterion) and optimizer - loss = nn.CrossEntropyLoss - if args.label_smoothing > 0.0: - loss = lambda: LabelSmoothing(args.label_smoothing) - criterion = loss().to(CALCULATE_DEVICE) - optimizer = torch.optim.SGD([ - {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0}, - {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}], - args.lr, - momentum=args.momentum) - - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.npu is not None: - checkpoint = torch.load(args.resume) - elif args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - loc = 'cuda:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.npu is not None: - best_acc1 = best_acc1.to("npu:{}".format(args.npu)) - elif args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - real_path = '/cache/data_url' - if not os.path.exists(real_path): - os.makedirs(real_path) - mox.file.copy_parallel(args.data_url, real_path) - print("training data finish copy to %s." % real_path) - - traindir = os.path.join(real_path, 'train') - valdir = os.path.join(real_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=True) - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - lr_policy(optimizer, 0, epoch) - # train for one epoch - train(train_loader, model, criterion, optimizer, epoch, args) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - file_name = "checkpoint_npu{}".format(args.npu) - modeltmp = model.cpu() - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': modeltmp.state_dict(), - 'best_acc1': best_acc1, - }, is_best, file_name) - modeltmp.to(CALCULATE_DEVICE) - - if args.onnx: - convert_pth_to_onnx(args) - - # --------------modelarts modification---------- - mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url) - # --------------modelarts modification end---------- - -def convert_pth_to_onnx(args): - pth_pattern = os.path.join(CACHE_TRAINING_URL, f"checkpoint_npu{args.npu}.pth.tar") - pth_file_list = glob.glob(pth_pattern) - if not pth_file_list: - print(f"can't find pth {pth_pattern}") - return - pth_file = pth_file_list[0] - onnx_path = pth_file.split(".")[0] + '.onnx' - convert(pth_file, onnx_path) - - -def train(train_loader, model, criterion, optimizer, epoch, args): - if args.optimizer_batch_size < 0: - batch_size_multiplier = 1 - else: - tbs = 1 * args.batch_size - if args.optimizer_batch_size % tbs != 0: - print( - "Warning: simulated batch size {} is not divisible by actual batch size {}" - .format(args.optimizer_batch_size, tbs)) - batch_size_multiplier = int(args.optimizer_batch_size / tbs) - print("BSM: {}".format(batch_size_multiplier)) - - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(train_loader), - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - optimizer.zero_grad() - end = time.time() - for i, (images, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.gpu is not None: - images = images.cuda(args.gpu, non_blocking=True) - - images = images.to(CALCULATE_DEVICE, non_blocking=True) - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer_step = ((i + 1) % batch_size_multiplier) == 0 - if optimizer_step: - if batch_size_multiplier != 1: - for param_group in optimizer.param_groups: - for param in param_group['params']: - param.grad /= batch_size_multiplier - optimizer.step() - optimizer.zero_grad() - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % LOG_STEP == 0: - progress.display(i) - - if i == TRAIN_STEP: - break - - print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size/(batch_time.avg+0.0001))) - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if args.gpu is not None: - images = images.cuda(args.gpu, non_blocking=True) - images = images.to(CALCULATE_DEVICE, non_blocking=True) - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % LOG_STEP == 0: - progress.display(i) - - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - return top1.avg - -def save_checkpoint(state, is_best, filename='checkpoint'): - if not os.path.exists(CACHE_TRAINING_URL): - os.makedirs(CACHE_TRAINING_URL) - - filename2 = os.path.join(CACHE_TRAINING_URL, filename + ".pth.tar") - torch.save(state, filename2) - if is_best: - shutil.copyfile(filename2, os.path.join(CACHE_TRAINING_URL, filename + 'model_best.pth.tar')) - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = 10 - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.batchsize = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.batchsize): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.batchsize) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - lr = args.lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -class LabelSmoothing(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - :param smoothing: label smoothing factor - """ - super(LabelSmoothing, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - def forward(self, x, target): - logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - return loss.mean().to(CALCULATE_DEVICE) - -def lr_policy(lr_fn, logger=None): - if logger is not None: - logger.register_metric('lr', - log.LR_METER(), - verbosity=dllogger.Verbosity.VERBOSE) - - def _alr(optimizer, iteration, epoch): - lr = lr_fn(iteration, epoch) - - if logger is not None: - logger.log_metric('lr', lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - return _alr - -def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None): - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - es = epochs - warmup_length - lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr - return lr - - return lr_policy(_lr_fn, logger=logger) - -if __name__ == '__main__': - main() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt deleted file mode 100644 index 31529da2e6..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt +++ /dev/null @@ -1,3 +0,0 @@ -FuncStatus:OK -PerfStatus:OK -PrecisionStatus:OK \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py deleted file mode 100644 index fd878f6e5a..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -import argparse - -import torch -from DistributedResnet50.image_classification import resnet -import torch.onnx - -from collections import OrderedDict - -parser = argparse.ArgumentParser(description='PyTorch pth convert onnx') -parser.add_argument('--pth_file_path', - metavar='PATH', - default='./resnet50checkpoint.pth.tar', - help='path of pth file') - -parser.add_argument('--onnx_file_path', - metavar='PATH', - default='resnet50_npu_16.onnx', - help='path of onnx file') -args = parser.parse_args() - - -def proc_node_module(checkpoint, AttrName): - new_state_dict = OrderedDict() - for k, v in checkpoint[AttrName].items(): - if(k[0:7] == "module."): - name = k[7:] - else: - name = k[0:] - new_state_dict[name] = v - return new_state_dict - - -def convert(pth_file_path, onnx_file_path): - checkpoint = torch.load(pth_file_path, map_location='cpu') - checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict') - model = resnet.build_resnet("resnet50", "classic") - model.load_state_dict(checkpoint['state_dict'],strict=False) - model.eval() - print(model) - - input_names = ["actual_input_1"] - output_names = ["output1"] - dummy_input = torch.randn(16, 3, 224, 224) - torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names, - opset_version=11) - - -def main(): - src_file_path = args.pth_file_path - dst_file_path = args.onnx_file_path - convert(src_file_path, dst_file_path) - - -if __name__ == "__main__": - main() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py deleted file mode 100644 index 6e9f8fb66a..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py +++ /dev/null @@ -1,827 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import random -import shutil -import time -import warnings -import math -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models -import torch.npu -import DistributedResnet50.image_classification.resnet as nvmodels -from DistributedResnet50.image_classification.multi_epochs_dataloader import MultiEpochsDataLoader -from apex import amp - -BATCH_SIZE = 512 -EPOCHS_SIZE = 100 -TRAIN_STEP = 8000 -LOG_STEP = 1 - -CALCULATE_DEVICE = "npu:7" -PRINT_DEVICE = "cpu" -SOURCE_DIR = "/data/imagenet" - -model_names = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('--data', - metavar='DIR', - default=SOURCE_DIR, - help='path to dataset') -parser.add_argument('--save_ckpt_path', - metavar='DIR', - default='./', - help='path of checkpoint file') -parser.add_argument('-a', '--arch', - metavar='ARCH', - default='resnet50', - choices=model_names, - help='default: resnet50') -parser.add_argument('-j', '--workers', - default=32, - type=int, - metavar='N', - help='number of data loading workers (default: 8)') -parser.add_argument('--num_classes', - default=1000, - type=int, - metavar='N', - help='class number of dataset') -parser.add_argument('--epochs', - default=EPOCHS_SIZE, - type=int, - metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', - default=0, - type=int, - metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', - default=BATCH_SIZE, - type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', - default=0.1, - type=float, - metavar='LR', - help='initial learning rate', - dest='lr') -parser.add_argument('--momentum', - default=0.9, - type=float, - metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', - default=1e-4, - type=float, - metavar='W', - help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', - default=10, - type=int, - metavar='N', - help='print frequency (default: 10)') -parser.add_argument('--resume', - default='', - type=str, - metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', - dest='evaluate', - help='evaluate model on validation set') -parser.add_argument('--pretrained', - dest='pretrained', - action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', - default=-1, - type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', - default=-1, - type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', - default='tcp://224.66.41.62:23456', - type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', - default='nccl', - type=str, - help='distributed backend') -parser.add_argument('--seed', - default=None, - type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', - default=None, - type=int, - help='GPU id to use.') -parser.add_argument('--npu', - default=None, - type=int, - help='NPU id to use.') -parser.add_argument('--multiprocessing-distributed', - action='store_true') -parser.add_argument('--warmup', - default=0, - type=int, - metavar='E', - help='number of warmup epochs') -parser.add_argument('--label-smoothing', - default=0.0, - type=float, - metavar='S', - help='label smoothing') -parser.add_argument('--optimizer-batch-size', - default=-1, - type=int, - metavar='N', - help= - 'size of a total batch size, for simulating bigger batches using gradient accumulation') -parser.add_argument('--static-loss-scale', - type=float, - default=1, - help= - 'Static loss scale, positive power of 2 values can improve fp16 convergence.') -parser.add_argument('-t', - '--fine-tuning', - action='store_true', - help='transfer learning + fine tuning - train only the last FC layer.') -# 图模式 -parser.add_argument('--graph_mode', - action='store_true', - help='whether to enable graph mode.') -best_acc1 = 0 -args = parser.parse_args() -def main(): - - if args.npu is None: - args.npu = 0 - global CALCULATE_DEVICE - CALCULATE_DEVICE = "npu:{}".format(args.npu) - torch.npu.set_device(CALCULATE_DEVICE) - print("use ", CALCULATE_DEVICE) - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - cudnn.deterministic = True - warnings.warn('You have chosen to seed training. ' - 'This will turn on the CUDNN deterministic setting, ' - 'which can slow down your training considerably! ' - 'You may see unexpected behavior when restarting ' - 'from checkpoints.') - - if args.gpu is not None: - warnings.warn('You have chosen a specific GPU. This will completely ' - 'disable data parallelism.') - - if args.dist_url == "env://" and args.world_size == -1: - args.world_size = int(os.environ["WORLD_SIZE"]) - - args.distributed = args.world_size > 1 or args.multiprocessing_distributed - - ngpus_per_node = torch.cuda.device_count() - if args.multiprocessing_distributed: - # Since we have ngpus_per_node processes per node, the total world_size - # needs to be adjusted accordingly - args.world_size = ngpus_per_node * args.world_size - # Use torch.multiprocessing.spawn to launch distributed processes: the - # main_worker process function - mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) - else: - # Simply call main_worker function - main_worker(args.gpu, ngpus_per_node, args) - -def main_worker(gpu, ngpus_per_node, args): - global best_acc1 - args.gpu = gpu - - if args.gpu is not None: - print("Use GPU: {} for training".format(args.gpu)) - - if args.distributed: - if args.dist_url == "env://" and args.rank == -1: - args.rank = int(os.environ["RANK"]) - if args.multiprocessing_distributed: - # For multiprocessing distributed training, rank needs to be the - # global rank among all the processes - args.rank = args.rank * ngpus_per_node + gpu - dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, - world_size=args.world_size, rank=args.rank) - # create model - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = nvmodels.build_resnet("resnet50", "classic", True) - print("load pretrained model") - pretrained_dict = \ - torch.load("/home/checkpoint_npu0model_best.pth.tar", map_location="cpu")["state_dict"] - pretrained_dict.pop('module.fc.weight') - pretrained_dict.pop('module.fc.bias') - model.load_state_dict(pretrained_dict, strict=False) - else: - print("=> creating model '{}'".format(args.arch)) - model = models.__dict__[args.arch](zero_init_residual=True, num_classes=args.num_classes) - - if args.fine_tuning: - print("=> transfer learning + fine tuning(train only the last FC layer)") - if args.arch == "resnet50": - model.parameters() - else: - print("Error: Fine-tuning is not supported on this architecture") - exit(-1) - else: - model.parameters() - - for layer in model.modules(): - if isinstance(layer, nn.Linear): - torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), ) - if args.distributed: - # For multiprocessing distributed, DistributedDataParallel constructor - # should always set the single device scope, otherwise, - # DistributedDataParallel will use all available devices. - if args.gpu is not None: - torch.cuda.set_device(args.gpu) - model.cuda(args.gpu) - # When using a single GPU per process and per - # DistributedDataParallel, we need to divide the batch size - # ourselves based on the total number of GPUs we have - args.batch_size = int(args.batch_size / ngpus_per_node) - args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) - model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) - else: - model.cuda() - # DistributedDataParallel will divide and allocate batch_size to all - # available GPUs if device_ids are not set - model = torch.nn.parallel.DistributedDataParallel(model) - elif args.gpu is not None: - torch.cuda.set_device(args.gpu) - model = model.cuda(args.gpu) - else: - # DataParallel will divide and allocate batch_size to all available GPUs - if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): - model.features = torch.nn.DataParallel(model.features) - model.cuda() - else: - model = model.to(CALCULATE_DEVICE) - - lr_policy = lr_cosine_policy(args.lr, - args.warmup, - args.epochs) - - - # define loss function (criterion) and optimizer - loss = nn.CrossEntropyLoss - if args.label_smoothing > 0.0: - loss = lambda: LabelSmoothing(args.label_smoothing) - criterion = loss().to(CALCULATE_DEVICE) - optimizer = torch.optim.SGD([ - {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0}, - {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}], - args.lr, - momentum=args.momentum) # torch.optim. apex.optimizers.NpuFusedSGD - - model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1,combine_grad=False) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - if args.npu is not None: - checkpoint = torch.load(args.resume) - elif args.gpu is None: - checkpoint = torch.load(args.resume) - else: - # Map model to be loaded to specified single gpu. - loc = 'cuda:{}'.format(args.gpu) - checkpoint = torch.load(args.resume, map_location=loc) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.npu is not None: - best_acc1 = best_acc1.to("npu:{}".format(args.npu)) - elif args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - cudnn.benchmark = True - - # Data loading code - traindir = os.path.join(args.data, 'train') - valdir = os.path.join(args.data, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - if args.distributed: - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - ## 原始loader,下面的优化后loader具有更好的性能,无论单算子还是图模式 - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), - num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) - - val_loader = torch.utils.data.DataLoader( - datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])), - batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=True) - train_loader_len = len(train_loader) - - # 图模式 - if args.graph_mode: - train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data, - args.batch_size, - workers=args.workers, - fp16=True) - val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False) - - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) - lr_policy(optimizer, 0, epoch) - # train for one epoch - train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args) - - # evaluate on validation set - acc1 = validate(val_loader, model, criterion, args) - - # remember best acc@1 and save checkpoint - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - file_name = "checkpoint_npu{}".format(args.npu) - modeltmp = model.cpu() - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': args.arch, - 'state_dict': modeltmp.state_dict(), - 'best_acc1': best_acc1, - }, is_best, args, file_name) - modeltmp.to(CALCULATE_DEVICE) - -def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args): - if args.optimizer_batch_size < 0: - batch_size_multiplier = 1 - else: - tbs = 1 * args.batch_size - if args.optimizer_batch_size % tbs != 0: - print( - "Warning: simulated batch size {} is not divisible by actual batch size {}" - .format(args.optimizer_batch_size, tbs)) - batch_size_multiplier = int(args.optimizer_batch_size / tbs) - print("BSM: {}".format(batch_size_multiplier)) - - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - train_loader_len, - [batch_time, data_time, losses, top1, top5], - prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - optimizer.zero_grad() - end = time.time() - for i, (images, target) in enumerate(train_loader): - # 图模式 - if args.graph_mode: - print("args.graph_mode") - torch.npu.enable_graph_mode() - - if i > 100: - pass - # measure data loading time - data_time.update(time.time() - end) - - if args.gpu is not None: - images = images.cuda(args.gpu, non_blocking=True) - - images = images.to(CALCULATE_DEVICE, non_blocking=True) - - # 图模式 - if args.graph_mode: - print("args.graph_mode") - target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) - else : - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # 图模式 - if not args.graph_mode: - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - - # measure accuracy and record loss - # 图模式 - if not args.graph_mode: - # print("args.graph_mode====================") - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # compute gradient and do SGD step - with amp.scale_loss(loss, optimizer) as scaled_loss: - scaled_loss.backward() - optimizer_step = ((i + 1) % batch_size_multiplier) == 0 - if optimizer_step: - if batch_size_multiplier != 1: - for param_group in optimizer.param_groups: - for param in param_group['params']: - param.grad /= batch_size_multiplier - optimizer.step() - optimizer.zero_grad() - - # 图模式 - if args.graph_mode: - torch.npu.launch_graph() - if i == 100: - torch.npu.synchronize() - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % LOG_STEP == 0: - progress.display(i) - - if i == TRAIN_STEP: - break - # 图模式 - if args.graph_mode: - print("args.graph_mode") - torch.npu.disable_graph_mode() - - print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format( - args.batch_size/batch_time.avg)) - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter( - len(val_loader), - [batch_time, losses, top1, top5], - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - end = time.time() - for i, (images, target) in enumerate(val_loader): - if i > 50: - pass - if args.gpu is not None: - images = images.cuda(args.gpu, non_blocking=True) - images = images.to(CALCULATE_DEVICE, non_blocking=True) - - - # 图模式 - if args.graph_mode: - print("args.graph_mode") - target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32) - else : - if args.label_smoothing == 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # compute output - output = model(images) - loss = criterion(output, target) - - # 图模式 - if not args.graph_mode: - if args.label_smoothing > 0.0: - target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1[0], images.size(0)) - top5.update(acc5[0], images.size(0)) - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % LOG_STEP == 0: - progress.display(i) - - print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - return top1.avg - -def save_checkpoint(state, is_best, args, filename='checkpoint'): - filename2 = os.path.join(args.save_ckpt_path, filename + ".pth.tar") - torch.save(state, filename2) - if is_best: - shutil.copyfile(filename2, os.path.join(args.save_ckpt_path, filename+'model_best.pth.tar')) - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - self.start_count_index = 10 - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - if self.count == 0: - self.batchsize = n - - self.val = val - self.count += n - if self.count > (self.start_count_index * self.batchsize): - self.sum += val * n - self.avg = self.sum / (self.count - self.start_count_index * self.batchsize) - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -class ProgressMeter(object): - def __init__(self, num_batches, meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def display(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - lr = args.lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -class LabelSmoothing(nn.Module): - """ - NLL loss with label smoothing. - """ - def __init__(self, smoothing=0.0): - """ - Constructor for the LabelSmoothing module. - - :param smoothing: label smoothing factor - """ - super(LabelSmoothing, self).__init__() - self.confidence = 1.0 - smoothing - self.smoothing = smoothing - - def forward(self, x, target): - # 图模式 - if args.graph_mode: - logprobs = torch.nn.functional.log_softmax(x, dim=-1) - else: - logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu") - nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) - nll_loss = nll_loss.squeeze(1) - smooth_loss = -logprobs.mean(dim=-1) - loss = self.confidence * nll_loss + self.smoothing * smooth_loss - # 图模式 - if args.graph_mode: - return loss.mean() - else: - return loss.mean().to(CALCULATE_DEVICE) - -def lr_policy(lr_fn, logger=None): - if logger is not None: - logger.register_metric('lr', - log.LR_METER(), - verbosity=dllogger.Verbosity.VERBOSE) - - def _alr(optimizer, iteration, epoch): - lr = lr_fn(iteration, epoch) - - if logger is not None: - logger.log_metric('lr', lr) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - return _alr - -def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None): - def _lr_fn(iteration, epoch): - if epoch < warmup_length: - lr = base_lr * (epoch + 1) / warmup_length - else: - e = epoch - warmup_length - es = epochs - warmup_length - lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr - return lr - - return lr_policy(_lr_fn, logger=logger) - - -def fast_collate(batch): - imgs = [img[0] for img in batch] - targets = torch.tensor([target[1] for target in batch], dtype=torch.int64) - w = imgs[0].size[0] - h = imgs[0].size[1] - tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8) - for i, img in enumerate(imgs): - nump_array = np.asarray(img, dtype=np.uint8) - if (nump_array.ndim < 3): - nump_array = np.expand_dims(nump_array, axis=-1) - nump_array = np.rollaxis(nump_array, 2) - - tensor[i] += torch.from_numpy(nump_array) - - return tensor, targets - - -class PrefetchedWrapper(object): - def prefetched_loader(loader, fp16): - mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1) - std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1) - if fp16: - mean = mean.half() - std = std.half() - - stream = torch.npu.Stream() - first = True - - for next_input, next_target in loader: - with torch.npu.stream(stream): - next_input = next_input.npu(non_blocking=True) - next_target = next_target.npu(non_blocking=True) - if fp16: - next_input = next_input.half() - - else: - next_input = next_input.float() - - next_input = next_input.sub_(mean).div_(std) - - - if not first: - yield input, target - else: - first = False - - torch.npu.current_stream().wait_stream(stream) - input = next_input - target = next_target - yield input, target - - def __init__(self, dataloader, fp16): - self.dataloader = dataloader - self.fp16 = fp16 - self.epoch = 0 - - def __iter__(self): - if (self.dataloader.sampler is not None and - isinstance(self.dataloader.sampler, - torch.utils.data.distributed.DistributedSampler)): - self.dataloader.sampler.set_epoch(self.epoch) - self.epoch += 1 - ret = PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) - return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16) - - -def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False): - traindir = os.path.join(data_path, 'train') - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - ])) - - if torch.distributed.is_initialized(): - train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) - else: - train_sampler = None - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), - num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, - collate_fn=fast_collate, drop_last=True) - - return PrefetchedWrapper(train_loader, fp16), len(train_loader), train_sampler - - -def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False): - valdir = os.path.join(data_path, 'val') - val_dataset = datasets.ImageFolder( - valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - ])) - - if distributed: - val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) - else: - val_sampler = None - - dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader - val_loader = dataloader_fn( - val_dataset, - sampler=val_sampler, - batch_size=batch_size, shuffle=(val_sampler is None), - num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate) - - return val_loader - - - -if __name__ == '__main__': - main() diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt deleted file mode 100644 index fbefb37521..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -apex -torchvision -opencv-python \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh deleted file mode 100644 index e542152b60..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -device_id=0 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_1p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/pytorch_resnet50_apex.py \ - --data /data/imagenet \ - --npu ${device_id} \ - -j64 \ - -b512 \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs 90 \ - --num_classes=1000 \ - --optimizer-batch-size 512 > ./resnet50_1p.log 2>&1 & - - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh deleted file mode 100644 index 047849d5f8..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -ip=$(hostname -I |awk '{print $1}') -device_id_list=0,1 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_2p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \ - --data /data/imagenet \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=0.4 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --device-list=${device_id_list} \ - --benchmark=0 \ - --device='npu' \ - --epochs=90 \ - --num_classes=1000 \ - --batch-size=1024 > ./resnet50_2p.log 2>&1 & - - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh deleted file mode 100644 index 2b29adfe64..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -ip=$(hostname -I |awk '{print $1}') -device_id_list=0,1,2,3 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error - -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_4p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \ - --data /data/imagenet \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=0.8 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --device-list=${device_id_list} \ - --benchmark=0 \ - --device='npu' \ - --epochs=90 \ - --num_classes=1000 \ - --batch-size=2048 > ./resnet50_4p.log 2>&1 & - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh deleted file mode 100644 index e3b0a5b523..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -source env_npu.sh -export WHICH_OP=GEOP -export NEW_GE_FE_ID=1 -export GE_AICPU_FLAG=1 - -/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error -/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error - -ip=$(hostname -I |awk '{print $1}') -currentDir=$(cd "$(dirname "$0")";pwd) -currtime=`date +%Y%m%d%H%M%S` -train_log_dir=${currentDir}/result/training_8p_job_${currtime} -mkdir -p ${train_log_dir} -cd ${train_log_dir} -echo "train log path is ${train_log_dir}" - -python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \ - --data /data/imagenet \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=128 \ - --learning-rate=1.6 \ - --warmup=8 \ - --label-smoothing=0.1 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --benchmark=0 \ - --device='npu' \ - --epochs=90 \ - --num_classes=1000 \ - --batch-size=4096 > ./resnet50_8p.log 2>&1 & - - diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh deleted file mode 100644 index 84d83feb94..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -export install_path=/usr/local/Ascend - -if [ -d ${install_path}/toolkit ]; then - export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} - export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH - export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH - export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH - export ASCEND_OPP_PATH=${install_path}/opp -else - if [ -d ${install_path}/nnae/latest ];then - export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/nnae/latest - else - export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH - export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ - export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ - export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so - export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH - export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest - fi -fi - - -#将Host日志输出到串口,0-关闭/1-开启 -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -#设置默认日志级别,0-debug/1-info/2-warning/3-error -export ASCEND_GLOBAL_LOG_LEVEL=3 -#设置Event日志开启标志,0-关闭/1-开启 -export ASCEND_GLOBAL_EVENT_ENABLE=0 -#设置是否开启taskque,0-关闭/1-开启 -export TASK_QUEUE_ENABLE=1 -#HCCL白名单开关,1-关闭/0-开启 -export HCCL_WHITELIST_DISABLE=1 - -#设置device侧日志登记为error -${install_path}/driver/tools/msnpureport -g error -d 0 -${install_path}/driver/tools/msnpureport -g error -d 1 -${install_path}/driver/tools/msnpureport -g error -d 2 -${install_path}/driver/tools/msnpureport -g error -d 3 -${install_path}/driver/tools/msnpureport -g error -d 4 -${install_path}/driver/tools/msnpureport -g error -d 5 -${install_path}/driver/tools/msnpureport -g error -d 6 -${install_path}/driver/tools/msnpureport -g error -d 7 -#关闭Device侧Event日志 -${install_path}/driver/tools/msnpureport -e disable - -path_lib=$(python3.7 -c """ -import sys -import re -result='' -for index in range(len(sys.path)): - match_sit = re.search('-packages', sys.path[index]) - if match_sit is not None: - match_lib = re.search('lib', sys.path[index]) - - if match_lib is not None: - end=match_lib.span()[1] - result += sys.path[index][0:end] + ':' - - result+=sys.path[index] + '/torch/lib:' -print(result)""" -) - -echo ${path_lib} - -export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh deleted file mode 100644 index 8c7751bc4e..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_ID3071_for_PyTorch" -# 训练batch_size -batch_size=512 -# 训练使用的npu卡数 -export RANK_SIZE=8 -# 数据集路径,保持为空,不需要修改 -data_path="" - -# 训练epoch 90 -train_epochs=3 -# 加载数据进程数 -workers=128 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# -ASCEND_DEVICE_ID=0 -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - - -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi - -python3.7 ./DistributedResnet50/main_apex_d76_npu.py \ - --data ${data_path} \ - --addr=$(hostname -I |awk '{print $1}') \ - --seed=49 \ - --workers=${workers} \ - --learning-rate=1.6 \ - --warmup=8 \ - --label-smoothing=0.0 \ - --mom=0.9 \ - --weight-decay=1.0e-04 \ - --static-loss-scale=128 \ - --print-freq=1 \ - --dist-url='tcp://127.0.0.1:50000' \ - --dist-backend='hccl' \ - --multiprocessing-distributed \ - --world-size=1 \ - --rank=0 \ - --benchmark=0 \ - --device='npu' \ - --graph_mode \ - --epochs=${train_epochs} \ - --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $11}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log -FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh deleted file mode 100644 index 2492708ca6..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_ID3071_for_PyTorch" -# 训练batch_size -batch_size=256 -# 训练使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -# 训练epoch 90 -train_epochs=1 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=64 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - -#修改参数 -sed -i "s|pass|break|g" ${test_path_dir}/../pytorch_resnet50_apex.py -wait -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi - -python3.7 ./pytorch_resnet50_apex.py \ - --data ${data_path} \ - --npu ${ASCEND_DEVICE_ID} \ - -j ${workers} \ - -b ${batch_size} \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs ${train_epochs} \ - --graph_mode \ - --optimizer-batch-size 256 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#参数改回 -sed -i "s|break|pass|g" ${test_path_dir}/../pytorch_resnet50_apex.py -wait - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log -FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh deleted file mode 100644 index e89e5332b6..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh +++ /dev/null @@ -1,131 +0,0 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_for_PyTorch" -# 训练batch_size -batch_size=512 -# 训练使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -# 训练epoch 90 -train_epochs=90 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=64 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - elif [[ $para == --checkpoint* ]];then - checkpoint=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - - -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi -python3.7 ./pytorch_resnet50_apex.py \ - --data ${data_path} \ - --npu ${ASCEND_DEVICE_ID} \ - -j ${workers} \ - -b ${batch_size} \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs ${train_epochs} \ - --evaluate \ - --resume ${checkpoint} \ - --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'eval' - -# 结果打印,不需要修改 - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Test: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh deleted file mode 100644 index 37fd0fd4b8..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_for_PyTorch" -# 训练batch_size -batch_size=512 -# 训练使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -# 训练epoch 90 -train_epochs=90 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=64 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - - -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi -python3.7 ./pytorch_resnet50_apex.py \ - --data ${data_path} \ - --npu ${ASCEND_DEVICE_ID} \ - -j ${workers} \ - -b ${batch_size} \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs ${train_epochs} \ - --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'full' - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log -FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh deleted file mode 100644 index 96226ecf32..0000000000 --- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh +++ /dev/null @@ -1,148 +0,0 @@ -#!/bin/bash - -################基础配置参数,需要模型审视修改################## -# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE -# 网络名称,同目录名称 -Network="ResNet50_ID0095_for_PyTorch" -# 训练batch_size -batch_size=512 -# 训练使用的npu卡数 -export RANK_SIZE=1 -# 数据集路径,保持为空,不需要修改 -data_path="" - -# 训练epoch 90 -train_epochs=1 -# 指定训练所使用的npu device卡id -device_id=0 -# 加载数据进程数 -workers=64 - -# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 -for para in $* -do - if [[ $para == --device_id* ]];then - device_id=`echo ${para#*=}` - elif [[ $para == --data_path* ]];then - data_path=`echo ${para#*=}` - fi -done - -# 校验是否传入data_path,不需要修改 -if [[ $data_path == "" ]];then - echo "[Error] para \"data_path\" must be confing" - exit 1 -fi -# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改 -if [ $ASCEND_DEVICE_ID ];then - echo "device id is ${ASCEND_DEVICE_ID}" -elif [ ${device_id} ];then - export ASCEND_DEVICE_ID=${device_id} - echo "device id is ${ASCEND_DEVICE_ID}" -else - "[Error] device id must be config" - exit 1 -fi - - - -###############指定训练脚本执行路径############### -# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 -cur_path=`pwd` -cur_path_last_dirname=${cur_path##*/} -if [ x"${cur_path_last_dirname}" == x"test" ];then - test_path_dir=${cur_path} - cd .. - cur_path=`pwd` -else - test_path_dir=${cur_path}/test -fi - - -#################创建日志输出目录,不需要修改################# -if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then - rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -else - mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID -fi - -#修改参数 -sed -i "s|pass|break|g" ${test_path_dir}/../pytorch_resnet50_apex.py -wait -#################启动训练脚本################# -# 训练开始时间,不需要修改 -start_time=$(date +%s) -# 非平台场景时source 环境变量 -check_etp_flag=`env | grep etp_running_flag` -etp_flag=`echo ${check_etp_flag#*=}` -if [ x"${etp_flag}" != x"true" ];then - source ${test_path_dir}/env_npu.sh -fi -python3.7 ./pytorch_resnet50_apex.py \ - --data ${data_path} \ - --npu ${ASCEND_DEVICE_ID} \ - -j ${workers} \ - -b ${batch_size} \ - --lr 0.2 \ - --warmup 5 \ - --label-smoothing=0.1 \ - --epochs ${train_epochs} \ - --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & - -wait - - -##################获取训练数据################ -# 训练结束时间,不需要修改 -end_time=$(date +%s) -e2e_time=$(( $end_time - $start_time )) - - -#参数改回 -sed -i "s|break|pass|g" ${test_path_dir}/../pytorch_resnet50_apex.py -wait - -# 训练用例信息,不需要修改 -BatchSize=${batch_size} -DeviceType=`uname -m` -CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' - -# 结果打印,不需要修改 -echo "------------------ Final result ------------------" -# 输出性能FPS,需要模型审视修改 -grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log -FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'` -# 打印,不需要修改 -echo "Final Performance images/sec : $FPS" - -# 输出训练精度,需要模型审视修改 -train_accuracy=`grep -a '* Acc@1' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'` -# 打印,不需要修改 -echo "Final Train Accuracy : ${train_accuracy}" -echo "E2E Training Duration sec : $e2e_time" - -# 性能看护结果汇总 -# 获取性能数据,不需要修改 -# 吞吐量 -ActualFPS=${FPS} -# 单迭代训练时长 -TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` - -# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 -grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt - -# 最后一个迭代loss值,不需要修改 -ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` - -# 关键信息打印到${CaseName}.log中,不需要修改 -echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log -echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log \ No newline at end of file diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh index 04e874cc97..0f29d8b867 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh @@ -141,7 +141,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'` diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh index 37195fb612..7b8a5e8caa 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh @@ -143,7 +143,7 @@ e2e_time=$(( $end_time - $start_time )) #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'` +step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g` FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'` -- Gitee