From 33a656a749b04e450046bb08c16fd1a63affbd36 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Wed, 6 Apr 2022 08:06:09 +0000
Subject: [PATCH 01/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../run_squad.py                              | 37 ++++++++++++++-----
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index a5203e848d..ea57841c7a 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -894,6 +894,10 @@ def main():
                         default=None,
                         type=str,
                         help="addr used for distributed training")
+    # 图模式
+    parser.add_argument('--graph_mode',
+                        action='store_true',
+                        help='whether to enable graph mode.')
 
     args = parser.parse_args()
     args.fp16 = args.fp16 or args.amp    
@@ -1015,13 +1019,13 @@ def main():
             # except ImportError:
             #     raise ImportError(
             #         "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
-            # optimizer = NpuFusedAdam(optimizer_grouped_parameters,
-            #                       lr=args.learning_rate)
+            optimizer = NpuFusedAdam(optimizer_grouped_parameters,
+                                  lr=args.learning_rate)
 
-            optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
-                                         lr=args.learning_rate,
-                                         warmup=args.warmup_proportion,
-                                         t_total=num_train_optimization_steps)
+            # optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
+            #                              lr=args.learning_rate,
+            #                              warmup=args.warmup_proportion,
+            #                              t_total=num_train_optimization_steps)
 
             if args.loss_scale == 0:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
@@ -1108,6 +1112,10 @@ def main():
             train_iter = train_dataloader
             step_start_time = time.time()
             for step, batch in enumerate(train_iter):
+                # 图模式
+                if args.graph_mode:
+                    print("graph mode on")
+                    torch.npu.enable_graph_mode()
                 # Terminate early for benchmarking
                 data_time = time.time() - step_start_time
                 if args.max_steps > 0 and global_step > args.max_steps:
@@ -1141,7 +1149,7 @@ def main():
                 else:
                     loss.backward()
 
- 
+
                 if (step + 1) % args.gradient_accumulation_steps == 0:
                     if args.fp16 :
                         # modify learning rate with special warm up for BERT which FusedAdam doesn't do
@@ -1149,8 +1157,14 @@ def main():
                     optimizer.step()
                     optimizer.zero_grad()
                     global_step += 1
-
-                final_loss = loss.item()
+                # 图模式
+                if args.graph_mode:
+                    print("graph mode launch")
+                    torch.npu.launch_graph()
+                    if step == max_steps:
+                        print("graph mode synchronize")
+                        torch.npu.synchronize()
+                final_loss = 0.0 #loss.item()
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
@@ -1162,7 +1176,9 @@ def main():
                                        "step_loss": round(final_loss, 4),
                                        "learning_rate": round(optimizer.param_groups[0]['lr'], 10)})
                 step_start_time = time.time()
-
+        # 图模式
+        if args.graph_mode:
+            torch.npu.disable_graph_mode()
         time_to_train = time.time() - train_start
 
     if args.do_train and is_main_process() and not args.skip_checkpoint:
@@ -1276,6 +1292,7 @@ if __name__ == "__main__":
     option = {}
     option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance"
     option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm"
+    option["MM_BMM_ND_ENABLE"] = "enable"
     torch.npu.set_option(option)
     main()
     dllogger.flush()
-- 
Gitee


From e4c0ab75322685b7d7c760137936dd3b9acde492 Mon Sep 17 00:00:00 2001
From: "rrrr.cao@hotmail.com" <rrrr.cao@hotmail.com>
Date: Wed, 6 Apr 2022 16:43:30 +0800
Subject: [PATCH 02/35] add Bert-Squad graph mode

---
 .../train_ID3078_Bert-Squad_performance_1p.sh | 188 ++++++++++++++++++
 .../train_ID3078_Bert-Squad_performance_8p.sh | 188 ++++++++++++++++++
 .../test/train_performance_1p.sh              |   1 -
 .../test/train_performance_8p.sh              |   1 -
 4 files changed, 376 insertions(+), 2 deletions(-)
 create mode 100644 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
 create mode 100644 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
new file mode 100644
index 0000000000..7c87c61096
--- /dev/null
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export BMMV2_ENABLE=1
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+ckpt_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Bert-Squad_ID3078_for_PyTorch"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=32
+#训练step
+train_steps=
+#学习率
+learning_rate=6e-5
+
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --ckpt_path* ]];then
+        ckpt_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3.7 run_squad.py \
+          --init_checkpoint ${ckpt_path}/bert_large_pretrained_amp.pt \
+          --bert_model bert-large-uncased \
+		  --do_train \
+		  --train_file ${data_path}/train-v1.1-min.json \
+		  --train_batch_size ${batch_size} \
+		  --do_predict \
+		  --predict_batch_size ${batch_size} \
+		  --predict_file ${data_path}/dev-v1.1.json \
+		  --learning_rate ${learning_rate} \
+		  --num_train_epochs ${train_epochs} \
+		  --seed 1 \
+		  --fp16 \
+		  --max_steps 100 \
+		  --use_npu \
+		  --loss_scale 4096 \
+		  --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \
+		  --do_eval \
+          --eval_script ${data_path}/evaluate-v1.1.py \
+		  --npu_id ${ASCEND_DEVICE_ID} \
+		  --do_lower_case \
+		  --output_dir ${cur_path}/../results \
+		  --config_file bert_config.json \
+          --graph_mode \
+		  --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'`
+
+FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'`
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep 'F1 : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $10}'` 
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+rm -rf ${data_path}/train-v1.1-min.json_bert-large-uncased_384_128_64
+export BMMV2_ENABLE=0
\ No newline at end of file
diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
new file mode 100644
index 0000000000..2b52006c63
--- /dev/null
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export BMMV2_ENABLE=1
+export RANK_SIZE=8
+export JOB_ID=10087
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+ckpt_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Bert-Squad_ID3078_for_PyTorch"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=32
+#训练step
+train_steps=
+#学习率
+learning_rate=2e-4
+
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_fp32_to_fp16"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --ckpt_path* ]];then
+        ckpt_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    export ASCEND_DEVICE_ID=$RANK_ID
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3.7 run_squad.py \
+          --init_checkpoint ${ckpt_path}/bert_large_pretrained_amp.pt \
+          --bert_model bert-large-uncased \
+		  --do_train \
+		  --train_file ${data_path}/train-v1.1-min.json \
+		  --train_batch_size ${batch_size} \
+		  --do_predict \
+		  --predict_batch_size ${batch_size} \
+		  --predict_file ${data_path}/dev-v1.1.json \
+		  --learning_rate ${learning_rate} \
+		  --num_train_epochs ${train_epochs} \
+		  --seed 1 \
+		  --fp16 \
+		  --max_steps 100 \
+		  --use_npu \
+		  --loss_scale 4096 \
+		  --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \
+		  --do_eval \
+          --eval_script ${data_path}/evaluate-v1.1.py \
+		  --npu_id ${ASCEND_DEVICE_ID} \
+		  --do_lower_case \
+		  --output_dir ${cur_path}/../results \
+		  --config_file bert_config.json \
+		  --num_npu 8 \
+		  --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'`
+
+FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'`
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep 'F1 : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk '{print $10}'` 
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+rm -rf ${data_path}/train-v1.1-min.json_bert-large-uncased_384_128_64
+export BMMV2_ENABLE=0
\ No newline at end of file
diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh
index 1ab48782e3..04e874cc97 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh
@@ -83,7 +83,6 @@ if [[ $data_path == "" ]];then
     exit 1
 fi
 
-cp run_squad.py $cur_path/../
 #训练开始时间，不需要修改
 start_time=$(date +%s)
 
diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh
index 5315e72b6a..37195fb612 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh
@@ -83,7 +83,6 @@ if [[ $data_path == "" ]];then
     exit 1
 fi
 
-cp run_squad.py $cur_path/../
 #训练开始时间，不需要修改
 start_time=$(date +%s)
 
-- 
Gitee


From 00be2a3f0d84d7a06cd822a942f2e1208fd85da9 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Wed, 6 Apr 2022 08:45:05 +0000
Subject: [PATCH 03/35] update train_ID3078_Bert-Squad_performance_8p.sh.

---
 .../test/train_ID3078_Bert-Squad_performance_8p.sh               | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
index 2b52006c63..7bf5ef040c 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
@@ -132,6 +132,7 @@ do
 		  --output_dir ${cur_path}/../results \
 		  --config_file bert_config.json \
 		  --num_npu 8 \
+          --graph_mode \
 		  --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json> ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
-- 
Gitee


From 025160ffaa4d37c0242a404bc92ccb22503c5759 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Wed, 6 Apr 2022 09:07:08 +0000
Subject: [PATCH 04/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../run_squad.py                              | 33 +++++++++++--------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index ea57841c7a..b72933c2a3 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1014,18 +1014,20 @@ def main():
     ]
     if args.do_train:
         if args.fp16:
-            # try:
-            #     from apex.optimizers import NpuFusedAdam
-            # except ImportError:
-            #     raise ImportError(
-            #         "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
-            optimizer = NpuFusedAdam(optimizer_grouped_parameters,
-                                  lr=args.learning_rate)
-
-            # optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
-            #                              lr=args.learning_rate,
-            #                              warmup=args.warmup_proportion,
-            #                              t_total=num_train_optimization_steps)
+            try:
+                from apex.optimizers import NpuFusedAdam
+            except ImportError:
+                raise ImportError(
+                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
+            # 图模式
+            if args.graph_mode:
+                optimizer = NpuFusedAdam(optimizer_grouped_parameters,
+                                    lr=args.learning_rate)
+            else:
+                optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
+                                            lr=args.learning_rate,
+                                            warmup=args.warmup_proportion,
+                                            t_total=num_train_optimization_steps)
 
             if args.loss_scale == 0:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
@@ -1164,7 +1166,11 @@ def main():
                     if step == max_steps:
                         print("graph mode synchronize")
                         torch.npu.synchronize()
-                final_loss = 0.0 #loss.item()
+                # 图模式
+                if args.graph_mode:
+                    final_loss = 0.0 
+                else:
+                    loss.item()
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
@@ -1178,6 +1184,7 @@ def main():
                 step_start_time = time.time()
         # 图模式
         if args.graph_mode:
+            print("graph mode off")
             torch.npu.disable_graph_mode()
         time_to_train = time.time() - train_start
 
-- 
Gitee


From 6af3d17b4ac1b28e74894f1173f1e0a9cea3d060 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Wed, 6 Apr 2022 09:19:49 +0000
Subject: [PATCH 05/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index b72933c2a3..21e87064d9 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1163,14 +1163,14 @@ def main():
                 if args.graph_mode:
                     print("graph mode launch")
                     torch.npu.launch_graph()
-                    if step == max_steps:
+                    if step == args.max_steps:
                         print("graph mode synchronize")
                         torch.npu.synchronize()
                 # 图模式
                 if args.graph_mode:
                     final_loss = 0.0 
                 else:
-                    loss.item()
+                    final_loss = loss.item()
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
-- 
Gitee


From 37c97a2783b6347164dadddf3856ce4ef5820934 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 01:33:55 +0000
Subject: [PATCH 06/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../run_squad.py                              | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 21e87064d9..19ebd9eabf 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1014,20 +1014,20 @@ def main():
     ]
     if args.do_train:
         if args.fp16:
-            try:
-                from apex.optimizers import NpuFusedAdam
-            except ImportError:
-                raise ImportError(
-                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
-            # 图模式
-            if args.graph_mode:
-                optimizer = NpuFusedAdam(optimizer_grouped_parameters,
-                                    lr=args.learning_rate)
-            else:
-                optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
-                                            lr=args.learning_rate,
-                                            warmup=args.warmup_proportion,
-                                            t_total=num_train_optimization_steps)
+            # try:
+            #     from apex.optimizers import NpuFusedAdam
+            # except ImportError:
+            #     raise ImportError(
+            #         "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
+            # # 图模式
+            # if args.graph_mode:
+            #     optimizer = NpuFusedAdam(optimizer_grouped_parameters,
+            #                         lr=args.learning_rate)
+            # else:
+            optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
+                                        lr=args.learning_rate,
+                                        warmup=args.warmup_proportion,
+                                        t_total=num_train_optimization_steps)
 
             if args.loss_scale == 0:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
-- 
Gitee


From 48b4c7c84495f300ea3822f659fc8ce3f54a6722 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 01:51:23 +0000
Subject: [PATCH 07/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 19ebd9eabf..d513ceaadb 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1031,8 +1031,8 @@ def main():
 
             if args.loss_scale == 0:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
-                                                  loss_scale="dynamic", combine_grad=True,
-                                                  combine_ddp=True if args.local_rank != -1 else False)
+                                                  loss_scale="dynamic", combine_grad=True)#,
+                                                #   combine_ddp=True if args.local_rank != -1 else False)
             else:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
                                                   loss_scale=args.loss_scale, combine_grad=True,
-- 
Gitee


From 87f4e6756ac16e4dc60f499e8abd3d17de3e0a20 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 02:14:47 +0000
Subject: [PATCH 08/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index d513ceaadb..6834fde9dc 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1163,9 +1163,9 @@ def main():
                 if args.graph_mode:
                     print("graph mode launch")
                     torch.npu.launch_graph()
-                    if step == args.max_steps:
-                        print("graph mode synchronize")
-                        torch.npu.synchronize()
+                    # if step == args.max_steps:
+                    #     print("graph mode synchronize")
+                    #     torch.npu.synchronize()
                 # 图模式
                 if args.graph_mode:
                     final_loss = 0.0 
-- 
Gitee


From 773ac2e100d3c41af75628e789be8a8ad58f65b4 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 03:49:19 +0000
Subject: [PATCH 09/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../run_squad.py                              | 148 +++++++++---------
 1 file changed, 75 insertions(+), 73 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 6834fde9dc..3c8f3ca17a 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1014,25 +1014,25 @@ def main():
     ]
     if args.do_train:
         if args.fp16:
-            # try:
-            #     from apex.optimizers import NpuFusedAdam
-            # except ImportError:
-            #     raise ImportError(
-            #         "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
-            # # 图模式
-            # if args.graph_mode:
-            #     optimizer = NpuFusedAdam(optimizer_grouped_parameters,
-            #                         lr=args.learning_rate)
-            # else:
-            optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
-                                        lr=args.learning_rate,
-                                        warmup=args.warmup_proportion,
-                                        t_total=num_train_optimization_steps)
+            try:
+                from apex.optimizers import NpuFusedAdam
+            except ImportError:
+                raise ImportError(
+                    "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
+            # 图模式
+            if args.graph_mode:
+                optimizer = NpuFusedAdam(optimizer_grouped_parameters,
+                                    lr=args.learning_rate)
+            else:
+                optimizer = NpuFusedBertAdamV2(optimizer_grouped_parameters,
+                                            lr=args.learning_rate,
+                                            warmup=args.warmup_proportion,
+                                            t_total=num_train_optimization_steps)
 
             if args.loss_scale == 0:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
-                                                  loss_scale="dynamic", combine_grad=True)#,
-                                                #   combine_ddp=True if args.local_rank != -1 else False)
+                                                  loss_scale="dynamic", combine_grad=True,
+                                                  combine_ddp=True if args.local_rank != -1 else False)
             else:
                 model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
                                                   loss_scale=args.loss_scale, combine_grad=True,
@@ -1114,63 +1114,65 @@ def main():
             train_iter = train_dataloader
             step_start_time = time.time()
             for step, batch in enumerate(train_iter):
-                # 图模式
-                if args.graph_mode:
-                    print("graph mode on")
-                    torch.npu.enable_graph_mode()
-                # Terminate early for benchmarking
-                data_time = time.time() - step_start_time
-                if args.max_steps > 0 and global_step > args.max_steps:
-                    break
-
-                if n_npu == 1:
-                    batch = tuple(t.to(device, non_blocking=True) for t in batch)  # multi-gpu does scattering it-self
-                input_ids, input_mask, segment_ids, start_positions, end_positions = batch
-                start_logits, end_logits = model(input_ids, segment_ids, input_mask)
-                # If we are on multi-GPU, split add a dimension
-                if len(start_positions.size()) > 1:
-                    start_positions = start_positions.squeeze(-1)
-                if len(end_positions.size()) > 1:
-                    end_positions = end_positions.squeeze(-1)
-                # sometimes the start/end positions are outside our model inputs, we ignore these terms
-                ignored_index = start_logits.size(1)
-                start_positions.clamp_(0, ignored_index)
-                end_positions.clamp_(0, ignored_index)
-
-                loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index)
-                start_loss = loss_fct(start_logits, start_positions)
-                end_loss = loss_fct(end_logits, end_positions)
-                loss = (start_loss + end_loss) / 2
-                if n_npu > 1:
-                    loss = loss.mean()  # mean() to average on multi-gpu.
-                if args.gradient_accumulation_steps > 1:
-                    loss = loss / args.gradient_accumulation_steps
-                if args.fp16:
-                    with amp.scale_loss(loss, optimizer) as scaled_loss:
-                        scaled_loss.backward()
-                else:
-                    loss.backward()
-
-
-                if (step + 1) % args.gradient_accumulation_steps == 0:
-                    if args.fp16 :
-                        # modify learning rate with special warm up for BERT which FusedAdam doesn't do
-                        scheduler.step()
-                    optimizer.step()
-                    optimizer.zero_grad()
-                    global_step += 1
-                # 图模式
-                if args.graph_mode:
-                    print("graph mode launch")
-                    torch.npu.launch_graph()
-                    # if step == args.max_steps:
-                    #     print("graph mode synchronize")
-                    #     torch.npu.synchronize()
-                # 图模式
-                if args.graph_mode:
-                    final_loss = 0.0 
-                else:
-                    final_loss = loss.item()
+                with torch.autograd.profiler.profile(use_npu=False) as prof: 
+                    # 图模式
+                    if args.graph_mode:
+                        print("graph mode on")
+                        torch.npu.enable_graph_mode()
+                    # Terminate early for benchmarking
+                    data_time = time.time() - step_start_time
+                    if args.max_steps > 0 and global_step > args.max_steps:
+                        break
+
+                    if n_npu == 1:
+                        batch = tuple(t.to(device, non_blocking=True) for t in batch)  # multi-gpu does scattering it-self
+                    input_ids, input_mask, segment_ids, start_positions, end_positions = batch
+                    start_logits, end_logits = model(input_ids, segment_ids, input_mask)
+                    # If we are on multi-GPU, split add a dimension
+                    if len(start_positions.size()) > 1:
+                        start_positions = start_positions.squeeze(-1)
+                    if len(end_positions.size()) > 1:
+                        end_positions = end_positions.squeeze(-1)
+                    # sometimes the start/end positions are outside our model inputs, we ignore these terms
+                    ignored_index = start_logits.size(1)
+                    start_positions.clamp_(0, ignored_index)
+                    end_positions.clamp_(0, ignored_index)
+
+                    loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index)
+                    start_loss = loss_fct(start_logits, start_positions)
+                    end_loss = loss_fct(end_logits, end_positions)
+                    loss = (start_loss + end_loss) / 2
+                    if n_npu > 1:
+                        loss = loss.mean()  # mean() to average on multi-gpu.
+                    if args.gradient_accumulation_steps > 1:
+                        loss = loss / args.gradient_accumulation_steps
+                    if args.fp16:
+                        with amp.scale_loss(loss, optimizer) as scaled_loss:
+                            scaled_loss.backward()
+                    else:
+                        loss.backward()
+
+
+                    if (step + 1) % args.gradient_accumulation_steps == 0:
+                        if args.fp16 :
+                            # modify learning rate with special warm up for BERT which FusedAdam doesn't do
+                            scheduler.step()
+                        optimizer.step()
+                        optimizer.zero_grad()
+                        global_step += 1
+                    # 图模式
+                    if args.graph_mode:
+                        print("graph mode launch")
+                        torch.npu.launch_graph()
+                        if step == args.max_steps:
+                            print("graph mode synchronize")
+                            torch.npu.synchronize()
+                    # 图模式
+                    if args.graph_mode:
+                        final_loss = 0.0 
+                    else:
+                        final_loss = loss.item()
+                prof.export_chrome_trace("./profiler_npu_"%d".json",step)
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
-- 
Gitee


From 8eabcdb9e9f826f011974434e7efeaac6cf683fa Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 06:01:23 +0000
Subject: [PATCH 10/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 3c8f3ca17a..46d54012a5 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1172,7 +1172,7 @@ def main():
                         final_loss = 0.0 
                     else:
                         final_loss = loss.item()
-                prof.export_chrome_trace("./profiler_npu_"%d".json",step)
+                prof.export_chrome_trace("./profiler_npu_%d.json"%step)
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
-- 
Gitee


From 709e5ac02f653c4002eb65c28d5ed9aebe55ee5c Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 06:04:00 +0000
Subject: [PATCH 11/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../pytorch_resnet50_apex.py                  | 184 ++++++++++++++++--
 1 file changed, 165 insertions(+), 19 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index 2932965ffd..e7aa4c7acf 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -364,6 +364,7 @@ def main_worker(gpu, ngpus_per_node, args):
     else:
         train_sampler = None
 
+    ##  原始loader，下面的优化后loader具有更好的性能，无论单算子还是图模式
     train_loader = torch.utils.data.DataLoader(
         train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
         num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
@@ -377,6 +378,16 @@ def main_worker(gpu, ngpus_per_node, args):
         ])),
         batch_size=args.batch_size, shuffle=True,
         num_workers=args.workers, pin_memory=True)
+    train_loader_len = len(train_loader)
+
+    # 图模式
+    if args.graph_mode:
+        train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data,
+                                                                                args.batch_size,
+                                                                                workers=args.workers,
+                                                                                fp16=True)
+        val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
+
 
     if args.evaluate:
         validate(val_loader, model, criterion, args)
@@ -387,7 +398,7 @@ def main_worker(gpu, ngpus_per_node, args):
             train_sampler.set_epoch(epoch)
         lr_policy(optimizer, 0, epoch)
         # train for one epoch
-        train(train_loader, model, criterion, optimizer, epoch, args)
+        train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args)
 
         # evaluate on validation set
         acc1 = validate(val_loader, model, criterion, args)
@@ -405,7 +416,7 @@ def main_worker(gpu, ngpus_per_node, args):
         }, is_best, args, file_name)
         modeltmp.to(CALCULATE_DEVICE)
 
-def train(train_loader, model, criterion, optimizer, epoch, args):
+def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args):
     if args.optimizer_batch_size < 0:
         batch_size_multiplier = 1
     else:
@@ -423,7 +434,7 @@ def train(train_loader, model, criterion, optimizer, epoch, args):
     top1 = AverageMeter('Acc@1', ':6.2f')
     top5 = AverageMeter('Acc@5', ':6.2f')
     progress = ProgressMeter(
-        len(train_loader),
+        train_loader_len,
         [batch_time, data_time, losses, top1, top5],
         prefix="Epoch: [{}]".format(epoch))
 
@@ -446,26 +457,26 @@ def train(train_loader, model, criterion, optimizer, epoch, args):
             images = images.cuda(args.gpu, non_blocking=True)
 
         images = images.to(CALCULATE_DEVICE, non_blocking=True)
-        if args.label_smoothing == 0.0:
+        
         # 图模式
-            if args.graph_mode:
-                print("args.graph_mode")
-                target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32)
-            else:
-                target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
+        if args.graph_mode:
+            print("args.graph_mode")
+            target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32)
+        else :
+          if args.label_smoothing == 0.0:
+              target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
+
         # compute output
         output = model(images)
         loss = criterion(output, target)
 
-        if args.label_smoothing > 0.0:
         # 图模式
-            if args.graph_mode:
-                print("args.graph_mode")
-                target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32)
-            else:
+        if args.graph_mode:
+            pass
+        else:
+            if args.label_smoothing > 0.0:
                 target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
         
-        
 
         # measure accuracy and record loss
         # 图模式
@@ -533,15 +544,26 @@ def validate(val_loader, model, criterion, args):
             if args.gpu is not None:
                 images = images.cuda(args.gpu, non_blocking=True)
             images = images.to(CALCULATE_DEVICE, non_blocking=True)
-            if args.label_smoothing == 0.0:
-                target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
+            
+                
+            # 图模式
+            if args.graph_mode:
+                print("args.graph_mode")
+                target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32)
+            else :
+              if args.label_smoothing == 0.0:
+                  target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
 
             # compute output
             output = model(images)
             loss = criterion(output, target)
 
-            if args.label_smoothing > 0.0:
-                target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
+            # 图模式
+            if args.graph_mode:
+                pass
+            else:
+                if args.label_smoothing > 0.0:
+                    target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
 
             # measure accuracy and record loss
             acc1, acc5 = accuracy(output, target, topk=(1, 5))
@@ -692,5 +714,129 @@ def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None):
 
     return lr_policy(_lr_fn, logger=logger)
 
+
+def fast_collate(batch):
+    imgs = [img[0] for img in batch]
+    targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
+    w = imgs[0].size[0]
+    h = imgs[0].size[1]
+    tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
+    for i, img in enumerate(imgs):
+        nump_array = np.asarray(img, dtype=np.uint8)
+        if (nump_array.ndim < 3):
+            nump_array = np.expand_dims(nump_array, axis=-1)
+        nump_array = np.rollaxis(nump_array, 2)
+
+        tensor[i] += torch.from_numpy(nump_array)
+
+    return tensor, targets
+
+
+class PrefetchedWrapper(object):
+    def prefetched_loader(loader, fp16):
+        mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1)
+        std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1)
+        if fp16:
+            mean = mean.half()
+            std = std.half()
+
+        stream = torch.npu.Stream()
+        first = True
+        
+        a = torch.empty([2,3])
+        for next_input, next_target in loader:
+            with torch.npu.stream(stream):
+                b = a*2
+                next_input = next_input.npu(non_blocking=True)
+                next_target = next_target.npu(non_blocking=True)
+                b = a/2
+                if fp16:
+                    next_input = next_input.half()
+
+                else:
+                    next_input = next_input.float()
+
+                next_input = next_input.sub_(mean).div_(std)
+
+        
+            if not first:
+                yield input, target
+            else:
+                first = False
+
+            b = a*3
+            torch.npu.current_stream().wait_stream(stream)
+            input = next_input
+            target = next_target
+            b = a/2
+        yield input, target
+
+    def __init__(self, dataloader, fp16):
+        self.dataloader = dataloader
+        self.fp16 = fp16
+        self.epoch = 0
+
+    def __iter__(self):
+        if (self.dataloader.sampler is not None and
+                isinstance(self.dataloader.sampler,
+                           torch.utils.data.distributed.DistributedSampler)):
+            self.dataloader.sampler.set_epoch(self.epoch)
+        self.epoch += 1
+        
+        start = time.time()
+        ret = PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
+        end = time.time()
+        print("prefetch time{}".format(end - start))
+
+        return ret
+        # return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
+
+
+def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
+    traindir = os.path.join(data_path, 'train')
+    train_dataset = datasets.ImageFolder(
+        traindir,
+        transforms.Compose([
+            transforms.RandomResizedCrop(224),
+            transforms.RandomHorizontalFlip(),
+        ]))
+
+    if torch.distributed.is_initialized():
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
+        num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler,
+        collate_fn=fast_collate, drop_last=True)
+
+    return PrefetchedWrapper(train_loader, fp16), len(train_loader), train_sampler
+
+
+def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
+    valdir = os.path.join(data_path, 'val')
+    val_dataset = datasets.ImageFolder(
+        valdir, transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+        ]))
+
+    if distributed:
+        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
+    else:
+        val_sampler = None
+
+        dataloader_fn = MultiEpochsDataLoader  # torch.utils.data.DataLoader
+        val_loader = dataloader_fn(
+            val_dataset,
+            sampler=val_sampler,
+            batch_size=batch_size, shuffle=(val_sampler is None),
+            num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate)
+
+    return val_loader
+
+
+
 if __name__ == '__main__':
     main()
-- 
Gitee


From 2c6fe4d31cf77147e1edc0dd79ee11260436a9e2 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 06:13:59 +0000
Subject: [PATCH 12/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index e7aa4c7acf..a593c3ed1f 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -36,6 +36,7 @@ import torchvision.datasets as datasets
 import torchvision.models as models
 import torch.npu
 import DistributedResnet50.image_classification.resnet as nvmodels
+from DistributedResnet50.image_classification.multi_epochs_dataloader import MultiEpochsDataLoader 
 from apex import amp
 
 BATCH_SIZE = 512
-- 
Gitee


From 93df5045d5ccada580ac0e349272e76fa6459e03 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 07:17:10 +0000
Subject: [PATCH 13/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh.

---
 .../ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
index f2f584cd46..ac7ac7724d 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
@@ -88,7 +88,7 @@ python3.7 ./pytorch_resnet50_apex.py \
     -b ${batch_size} \
     --lr 0.2 \
     --warmup 5 \
-    --label-smoothing=0.0 \
+    --label-smoothing=0.1 \
     --epochs ${train_epochs} \
     --graph_mode \
     --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-- 
Gitee


From a21a9225d184f71c86d7b0243ea732cb4ff0bfa7 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 07:27:01 +0000
Subject: [PATCH 14/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index a593c3ed1f..7b7ac70c4f 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -508,6 +508,7 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar
                 torch.npu.synchronize()
 
         # measure elapsed time
+        print("++++++++++++++++",time.time() - end)
         batch_time.update(time.time() - end)
         end = time.time()
 
-- 
Gitee


From bd2f7279f80c7729b14b5d7a0340377eda5c0ced Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 07:36:13 +0000
Subject: [PATCH 15/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py              | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index 7b7ac70c4f..bafada2920 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -502,8 +502,9 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar
         
         # 图模式
         if args.graph_mode:
-            print("args.graph_mode")
+            print("torch.npu.launch_graph()")
             torch.npu.launch_graph()
+            print("launch end")
             if i == 100:
                 torch.npu.synchronize()
 
-- 
Gitee


From 720c2531946247006f7e2af56e14c02dfa98263c Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 08:18:34 +0000
Subject: [PATCH 16/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py             | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index bafada2920..4590f6cb9c 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -472,9 +472,7 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar
         loss = criterion(output, target)
 
         # 图模式
-        if args.graph_mode:
-            pass
-        else:
+        if not args.graph_mode:
             if args.label_smoothing > 0.0:
                 target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
         
-- 
Gitee


From 3da84997c55b2d65ca39788097e027333d702653 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 08:35:29 +0000
Subject: [PATCH 17/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py             | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index 4590f6cb9c..2c778f8fe7 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -560,9 +560,7 @@ def validate(val_loader, model, criterion, args):
             loss = criterion(output, target)
 
             # 图模式
-            if args.graph_mode:
-                pass
-            else:
+            if not args.graph_mode:
                 if args.label_smoothing > 0.0:
                     target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
 
-- 
Gitee


From c45a57fcfd73520c68fde92d46c0f902a059dccc Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 08:36:06 +0000
Subject: [PATCH 18/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py              | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index 2c778f8fe7..a3df61263a 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -500,14 +500,11 @@ def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, ar
         
         # 图模式
         if args.graph_mode:
-            print("torch.npu.launch_graph()")
             torch.npu.launch_graph()
-            print("launch end")
             if i == 100:
                 torch.npu.synchronize()
 
         # measure elapsed time
-        print("++++++++++++++++",time.time() - end)
         batch_time.update(time.time() - end)
         end = time.time()
 
-- 
Gitee


From a8632d07b490a38284d78d18e39f20bdf7d6bc2a Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 08:37:03 +0000
Subject: [PATCH 19/35] update  main_apex_d76_npu.py.

---
 .../DistributedResnet50/main_apex_d76_npu.py  | 34 +++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
index 26edd676ce..20e5970023 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
@@ -588,9 +588,16 @@ def main_worker(gpu, ngpus_per_node, args):
     cudnn.benchmark = True
 
     # Data loading code
-    train_loader, train_loader_len, sampler = get_pytorch_train_loader(args.data, args.batch_size,
-                                                                       workers=args.workers, distributed=args.distributed)
-    
+    # 图模式
+    if args.graph_mode:
+        train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data,
+                                                                                args.batch_size,
+                                                                                workers=args.workers,
+                                                                                fp16=True)
+    else:
+        train_loader, train_loader_len, sampler = get_pytorch_train_loader(args.data, args.batch_size,
+                                                                        workers=args.workers, distributed=args.distributed)
+        
     val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
 
     if args.evaluate:
@@ -916,6 +923,27 @@ def fast_collate(batch):
         tensor[i] += torch.from_numpy(nump_array)
     
     return tensor, targets
+    
+def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
+    traindir = os.path.join(data_path, 'train')
+    train_dataset = datasets.ImageFolder(
+        traindir,
+        transforms.Compose([
+            transforms.RandomResizedCrop(224),
+            transforms.RandomHorizontalFlip(),
+        ]))
+
+    if torch.distributed.is_initialized():
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    else:
+        train_sampler = None
+
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
+        num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler,
+        collate_fn=fast_collate, drop_last=True)
+
+    return PrefetchedWrapper(train_loader, fp16), len(train_loader), train_sampler
 
 def get_pytorch_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
     traindir = os.path.join(data_path, 'train')
-- 
Gitee


From b6ba196d54984f702aa9411fedba19abb606c829 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 08:58:26 +0000
Subject: [PATCH 20/35] update
 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py.

---
 .../ResNet50_for_PyTorch/pytorch_resnet50_apex.py   | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
index a3df61263a..6e9f8fb66a 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
@@ -739,13 +739,10 @@ class PrefetchedWrapper(object):
         stream = torch.npu.Stream()
         first = True
         
-        a = torch.empty([2,3])
         for next_input, next_target in loader:
             with torch.npu.stream(stream):
-                b = a*2
                 next_input = next_input.npu(non_blocking=True)
                 next_target = next_target.npu(non_blocking=True)
-                b = a/2
                 if fp16:
                     next_input = next_input.half()
 
@@ -760,11 +757,9 @@ class PrefetchedWrapper(object):
             else:
                 first = False
 
-            b = a*3
             torch.npu.current_stream().wait_stream(stream)
             input = next_input
             target = next_target
-            b = a/2
         yield input, target
 
     def __init__(self, dataloader, fp16):
@@ -778,14 +773,8 @@ class PrefetchedWrapper(object):
                            torch.utils.data.distributed.DistributedSampler)):
             self.dataloader.sampler.set_epoch(self.epoch)
         self.epoch += 1
-        
-        start = time.time()
         ret = PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
-        end = time.time()
-        print("prefetch time{}".format(end - start))
-
-        return ret
-        # return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
+        return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
 
 
 def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
-- 
Gitee


From 8fd47e4d0078bb51feef802aa16471d1b1ec1a74 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 09:08:59 +0000
Subject: [PATCH 21/35] update  train_ID3071_performance_1p.sh.

---
 .../ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
index ac7ac7724d..2492708ca6 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
@@ -5,7 +5,7 @@
 # 网络名称，同目录名称
 Network="ResNet50_ID3071_for_PyTorch"
 # 训练batch_size
-batch_size=512
+batch_size=256
 # 训练使用的npu卡数
 export RANK_SIZE=1
 # 数据集路径,保持为空,不需要修改
@@ -91,7 +91,7 @@ python3.7 ./pytorch_resnet50_apex.py \
     --label-smoothing=0.1 \
     --epochs ${train_epochs} \
     --graph_mode \
-    --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+    --optimizer-batch-size 256 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 
 wait
 
-- 
Gitee


From 1bae1f8bc5ad1ab31c643b8f7366f1f5ef02006a Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 09:11:05 +0000
Subject: [PATCH 22/35] update  main_apex_d76_npu.py.

---
 .../DistributedResnet50/main_apex_d76_npu.py  | 45 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
index 20e5970023..1c982afe0d 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
@@ -923,7 +923,50 @@ def fast_collate(batch):
         tensor[i] += torch.from_numpy(nump_array)
     
     return tensor, targets
-    
+class PrefetchedWrapper(object):
+    def prefetched_loader(loader, fp16):
+        mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1)
+        std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1)
+        if fp16:
+            mean = mean.half()
+            std = std.half()
+
+        stream = torch.npu.Stream()
+        first = True
+        
+        for next_input, next_target in loader:
+            with torch.npu.stream(stream):
+                next_input = next_input.npu(non_blocking=True)
+                next_target = next_target.npu(non_blocking=True)
+                if fp16:
+                    next_input = next_input.half()
+                else:
+                    next_input = next_input.float(
+
+                next_input = next_input.sub_(mean).div_(std)
+            if not first:
+                yield input, target
+            else:
+                first = False
+
+            torch.npu.current_stream().wait_stream(stream)
+            input = next_input
+            target = next_target
+        yield input, target
+
+    def __init__(self, dataloader, fp16):
+        self.dataloader = dataloader
+        self.fp16 = fp16
+        self.epoch = 0
+
+    def __iter__(self):
+        if (self.dataloader.sampler is not None and
+                isinstance(self.dataloader.sampler,
+                           torch.utils.data.distributed.DistributedSampler)):
+            self.dataloader.sampler.set_epoch(self.epoch)
+        self.epoch += 1
+        return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
+           
 def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
     traindir = os.path.join(data_path, 'train')
     train_dataset = datasets.ImageFolder(
-- 
Gitee


From 1d5ede0eaef8ac2e4adb742ea8f3339dd3ba7e08 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 10:34:22 +0000
Subject: [PATCH 23/35] update main_apex_d76_npu.py.

---
 .../DistributedResnet50/main_apex_d76_npu.py  | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
index 1c982afe0d..615548e3b5 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
@@ -923,6 +923,7 @@ def fast_collate(batch):
         tensor[i] += torch.from_numpy(nump_array)
     
     return tensor, targets
+    
 class PrefetchedWrapper(object):
     def prefetched_loader(loader, fp16):
         mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1)
@@ -940,10 +941,13 @@ class PrefetchedWrapper(object):
                 next_target = next_target.npu(non_blocking=True)
                 if fp16:
                     next_input = next_input.half()
+
                 else:
-                    next_input = next_input.float(
+                    next_input = next_input.float()
 
                 next_input = next_input.sub_(mean).div_(std)
+
+        
             if not first:
                 yield input, target
             else:
@@ -954,19 +958,6 @@ class PrefetchedWrapper(object):
             target = next_target
         yield input, target
 
-    def __init__(self, dataloader, fp16):
-        self.dataloader = dataloader
-        self.fp16 = fp16
-        self.epoch = 0
-
-    def __iter__(self):
-        if (self.dataloader.sampler is not None and
-                isinstance(self.dataloader.sampler,
-                           torch.utils.data.distributed.DistributedSampler)):
-            self.dataloader.sampler.set_epoch(self.epoch)
-        self.epoch += 1
-        return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
-           
 def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
     traindir = os.path.join(data_path, 'train')
     train_dataset = datasets.ImageFolder(
-- 
Gitee


From 4d6afa1bbf3b557d5c94af944e559b3e97356b4f Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 10:36:43 +0000
Subject: [PATCH 24/35] update train_ID3078_Bert-Squad_performance_1p.sh.

---
 .../test/train_ID3078_Bert-Squad_performance_1p.sh              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
index 7c87c61096..4609c834db 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
@@ -143,7 +143,7 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'`
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
 
 FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'`
 
-- 
Gitee


From 67ca41d993eda6d490aa21838241d002f6573224 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 10:37:27 +0000
Subject: [PATCH 25/35] update train_ID3078_Bert-Squad_performance_8p.sh.

---
 .../test/train_ID3078_Bert-Squad_performance_8p.sh              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
index 7bf5ef040c..5efa082ada 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
@@ -144,7 +144,7 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'`
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
 
 FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'`
 
-- 
Gitee


From 7125dd1b464569f4618a53281d34df63d9825064 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 10:43:28 +0000
Subject: [PATCH 26/35] update  main_apex_d76_npu.py.

---
 .../DistributedResnet50/main_apex_d76_npu.py        | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
index 615548e3b5..3b98e7f94a 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
@@ -958,6 +958,19 @@ class PrefetchedWrapper(object):
             target = next_target
         yield input, target
 
+    def __init__(self, dataloader, fp16):
+        self.dataloader = dataloader
+        self.fp16 = fp16
+        self.epoch = 0
+
+    def __iter__(self):
+        if (self.dataloader.sampler is not None and
+                isinstance(self.dataloader.sampler,
+                           torch.utils.data.distributed.DistributedSampler)):
+            self.dataloader.sampler.set_epoch(self.epoch)
+        self.epoch += 1
+        return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
+
 def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
     traindir = os.path.join(data_path, 'train')
     train_dataset = datasets.ImageFolder(
-- 
Gitee


From 3d0088a7e2fd5c8db9de9096e04b32bff476bf5f Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 11:06:34 +0000
Subject: [PATCH 27/35] update train_ID3078_Bert-Squad_performance_8p.sh.

---
 .../test/train_ID3078_Bert-Squad_performance_8p.sh              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
index 5efa082ada..6cd8336833 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_8p.sh
@@ -144,7 +144,7 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}' | tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
 
 FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'`
 
-- 
Gitee


From 919a1f2802d254112ff97f8c9ee5db7124c51657 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 11:07:15 +0000
Subject: [PATCH 28/35] update  train_ID3078_Bert-Squad_performance_1p.sh.

---
 .../test/train_ID3078_Bert-Squad_performance_1p.sh              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
index 4609c834db..68079e4635 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_ID3078_Bert-Squad_performance_1p.sh
@@ -143,7 +143,7 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
 
 FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'`
 
-- 
Gitee


From 5c9cc071f8d058898ee5a351855ec180b791edff Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 11:10:29 +0000
Subject: [PATCH 29/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../run_squad.py                              | 116 +++++++++---------
 1 file changed, 57 insertions(+), 59 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 46d54012a5..21e87064d9 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -1114,65 +1114,63 @@ def main():
             train_iter = train_dataloader
             step_start_time = time.time()
             for step, batch in enumerate(train_iter):
-                with torch.autograd.profiler.profile(use_npu=False) as prof: 
-                    # 图模式
-                    if args.graph_mode:
-                        print("graph mode on")
-                        torch.npu.enable_graph_mode()
-                    # Terminate early for benchmarking
-                    data_time = time.time() - step_start_time
-                    if args.max_steps > 0 and global_step > args.max_steps:
-                        break
-
-                    if n_npu == 1:
-                        batch = tuple(t.to(device, non_blocking=True) for t in batch)  # multi-gpu does scattering it-self
-                    input_ids, input_mask, segment_ids, start_positions, end_positions = batch
-                    start_logits, end_logits = model(input_ids, segment_ids, input_mask)
-                    # If we are on multi-GPU, split add a dimension
-                    if len(start_positions.size()) > 1:
-                        start_positions = start_positions.squeeze(-1)
-                    if len(end_positions.size()) > 1:
-                        end_positions = end_positions.squeeze(-1)
-                    # sometimes the start/end positions are outside our model inputs, we ignore these terms
-                    ignored_index = start_logits.size(1)
-                    start_positions.clamp_(0, ignored_index)
-                    end_positions.clamp_(0, ignored_index)
-
-                    loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index)
-                    start_loss = loss_fct(start_logits, start_positions)
-                    end_loss = loss_fct(end_logits, end_positions)
-                    loss = (start_loss + end_loss) / 2
-                    if n_npu > 1:
-                        loss = loss.mean()  # mean() to average on multi-gpu.
-                    if args.gradient_accumulation_steps > 1:
-                        loss = loss / args.gradient_accumulation_steps
-                    if args.fp16:
-                        with amp.scale_loss(loss, optimizer) as scaled_loss:
-                            scaled_loss.backward()
-                    else:
-                        loss.backward()
-
-
-                    if (step + 1) % args.gradient_accumulation_steps == 0:
-                        if args.fp16 :
-                            # modify learning rate with special warm up for BERT which FusedAdam doesn't do
-                            scheduler.step()
-                        optimizer.step()
-                        optimizer.zero_grad()
-                        global_step += 1
-                    # 图模式
-                    if args.graph_mode:
-                        print("graph mode launch")
-                        torch.npu.launch_graph()
-                        if step == args.max_steps:
-                            print("graph mode synchronize")
-                            torch.npu.synchronize()
-                    # 图模式
-                    if args.graph_mode:
-                        final_loss = 0.0 
-                    else:
-                        final_loss = loss.item()
-                prof.export_chrome_trace("./profiler_npu_%d.json"%step)
+                # 图模式
+                if args.graph_mode:
+                    print("graph mode on")
+                    torch.npu.enable_graph_mode()
+                # Terminate early for benchmarking
+                data_time = time.time() - step_start_time
+                if args.max_steps > 0 and global_step > args.max_steps:
+                    break
+
+                if n_npu == 1:
+                    batch = tuple(t.to(device, non_blocking=True) for t in batch)  # multi-gpu does scattering it-self
+                input_ids, input_mask, segment_ids, start_positions, end_positions = batch
+                start_logits, end_logits = model(input_ids, segment_ids, input_mask)
+                # If we are on multi-GPU, split add a dimension
+                if len(start_positions.size()) > 1:
+                    start_positions = start_positions.squeeze(-1)
+                if len(end_positions.size()) > 1:
+                    end_positions = end_positions.squeeze(-1)
+                # sometimes the start/end positions are outside our model inputs, we ignore these terms
+                ignored_index = start_logits.size(1)
+                start_positions.clamp_(0, ignored_index)
+                end_positions.clamp_(0, ignored_index)
+
+                loss_fct = torch.nn.CrossEntropyLoss(ignore_index=ignored_index)
+                start_loss = loss_fct(start_logits, start_positions)
+                end_loss = loss_fct(end_logits, end_positions)
+                loss = (start_loss + end_loss) / 2
+                if n_npu > 1:
+                    loss = loss.mean()  # mean() to average on multi-gpu.
+                if args.gradient_accumulation_steps > 1:
+                    loss = loss / args.gradient_accumulation_steps
+                if args.fp16:
+                    with amp.scale_loss(loss, optimizer) as scaled_loss:
+                        scaled_loss.backward()
+                else:
+                    loss.backward()
+
+
+                if (step + 1) % args.gradient_accumulation_steps == 0:
+                    if args.fp16 :
+                        # modify learning rate with special warm up for BERT which FusedAdam doesn't do
+                        scheduler.step()
+                    optimizer.step()
+                    optimizer.zero_grad()
+                    global_step += 1
+                # 图模式
+                if args.graph_mode:
+                    print("graph mode launch")
+                    torch.npu.launch_graph()
+                    if step == args.max_steps:
+                        print("graph mode synchronize")
+                        torch.npu.synchronize()
+                # 图模式
+                if args.graph_mode:
+                    final_loss = 0.0 
+                else:
+                    final_loss = loss.item()
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
-- 
Gitee


From 0603c4bcf0a04d10e71f3a67ac39e3d6f83df25f Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 11:12:53 +0000
Subject: [PATCH 30/35] update  main_apex_d76_npu.py.

---
 .../DistributedResnet50/main_apex_d76_npu.py                    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
index 3b98e7f94a..ef66dc77c8 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/main_apex_d76_npu.py
@@ -590,7 +590,7 @@ def main_worker(gpu, ngpus_per_node, args):
     # Data loading code
     # 图模式
     if args.graph_mode:
-        train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data,
+        train_loader, train_loader_len, sampler = get_pytorch_train_loader_V2(args.data,
                                                                                 args.batch_size,
                                                                                 workers=args.workers,
                                                                                 fp16=True)
-- 
Gitee


From 790328550ba93315d7900e6e4b036865e018c59b Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Thu, 7 Apr 2022 13:59:35 +0000
Subject: [PATCH 31/35] update  train_ID3071_ResNet50_performance_8p.sh.

---
 .../test/train_ID3071_ResNet50_performance_8p.sh                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
index 0013d69590..8c7751bc4e 100644
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
+++ b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
@@ -5,7 +5,7 @@
 # 网络名称，同目录名称
 Network="ResNet50_ID3071_for_PyTorch"
 # 训练batch_size
-batch_size=4096
+batch_size=512
 # 训练使用的npu卡数
 export RANK_SIZE=8
 # 数据集路径,保持为空,不需要修改
-- 
Gitee


From 7249a4c5bfa4c5a49ff1b01567cdecd316bfaca0 Mon Sep 17 00:00:00 2001
From: "rrrr.cao@hotmail.com" <rrrr.cao@hotmail.com>
Date: Tue, 12 Apr 2022 10:13:06 +0800
Subject: [PATCH 32/35] add BertBase_ID0490_for_PyTorch graph mode

---
 .../BertBase_ID0490_for_PyTorch/run_squad.py  |  32 ++-
 .../train_ID3075_BertBase_performance_1p.sh   | 172 +++++++++++++++
 .../train_ID3075_BertBase_performance_8p.sh   | 200 ++++++++++++++++++
 3 files changed, 399 insertions(+), 5 deletions(-)
 create mode 100644 PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh
 create mode 100644 PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh

diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py
index 34e9946322..91834c3f8a 100644
--- a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py
+++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/run_squad.py
@@ -897,7 +897,9 @@ def main():
                         default=None,
                         type=str,
                         help="addr used for distributed training")
-
+    parser.add_argument('--graph_mode',
+                        action='store_true',
+                        help='whether to enable graph mode.')
     args = parser.parse_args()
     args.fp16 = args.fp16 or args.amp
 
@@ -1119,12 +1121,20 @@ def main():
             step_start_time = time.time()
             for step, batch in enumerate(train_iter):
                 # Terminate early for benchmarking
+                # 图模式
+                if args.graph_mode:
+                    print("graph mode on")
+                    torch.npu.enable_graph_mode()
                 data_time = time.time() - step_start_time
                 if args.max_steps > 0 and global_step > args.max_steps:
                     break
 
                 if n_npu == 1:
-                    batch = tuple(t.to(device) for t in batch)  # multi-gpu does scattering it-self
+                    # 图模式
+                    if args.graph_mode:
+                        batch = tuple(t.to(device, non_blocking=True) for t in batch)
+                    else:
+                        batch = tuple(t.to(device) for t in batch)  # multi-gpu does scattering it-self
                 input_ids, input_mask, segment_ids, start_positions, end_positions = batch
                 start_logits, end_logits = model(input_ids, segment_ids, input_mask)
                 # If we are on multi-GPU, split add a dimension
@@ -1159,8 +1169,17 @@ def main():
                     optimizer.step()
                     optimizer.zero_grad()
                     global_step += 1
-
-                final_loss = loss.item()
+                # 图模式
+                if args.graph_mode:
+                    final_loss = 0
+                else: 
+                    final_loss = loss.item()
+                # 图模式
+                if args.graph_mode:
+                    print("graph mode launch")
+                    torch.npu.launch_graph()
+                    if step == len(train_iter):
+                        torch.npu.synchronize()
                 step_time = time.time() - step_start_time
                 if step % args.log_freq == 0:
                     # dllogger.log(step=(epoch, global_step,), data={"step_loss": final_loss,
@@ -1171,7 +1190,10 @@ def main():
                                        "step_loss": round(final_loss, 4), "iter/s": round(1 / step_time, 4),
                                        "learning_rate": round(optimizer.param_groups[0]['lr'], 10)})
                 step_start_time = time.time()
-
+            # 图模式
+            if args.graph_mode:
+                print("graph mode off")
+                torch.npu.disable_graph_mode() 
         time_to_train = time.time() - train_start
 
     if args.do_train and is_main_process() and not args.skip_checkpoint:
diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh
new file mode 100644
index 0000000000..5be95b9f33
--- /dev/null
+++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_1p.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+RANK_ID_START=0
+export RANK_SIZE=1
+data_path=""
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertBase_ID3075_for_PyTorch"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=80
+learning_rate=8e-5
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --ckpt_path* ]];then
+        ckpt_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+export RANK=0
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup python3 run_squad.py \
+          --init_checkpoint ${data_path}/pretrained/bert_base_pretrain.pt \
+          --bert_model bert-large-uncased \
+		  --do_train \
+		  --train_file ${data_path}/squad/v1.1/train-v1.1.json \
+		  --train_batch_size ${batch_size} \
+		  --do_predict \
+		  --predict_batch_size ${batch_size} \
+		  --predict_file ${data_path}/squad/v1.1/dev-v1.1.json \
+		  --learning_rate ${learning_rate} \
+		  --num_train_epochs ${train_epochs} \
+		  --seed 1 \
+		  --fp16 \
+		  --max_steps 100 \
+		  --use_npu \
+		  --loss_scale 4096 \
+		  --vocab_file "data/uncased_L-24_H-1024_A-16/vocab.txt" \
+		  --do_eval \
+          --eval_script ${data_path}/squad/v1.1/evaluate-v1.1.py \
+	  --npu_id  ${ASCEND_DEVICE_ID} \
+		  --do_lower_case \
+		  --output_dir ${cur_path}/../results \
+		  --config_file bert_base_config.json \
+          --graph_mode \
+          --json-summary ${cur_path}/output/${ASCEND_DEVICE_ID}/dllogger.json > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+
+
+#conda deactivate
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+perf=`grep "step_loss"  $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "step_time : " '{print $2}'|awk -F " " '{print $1}'|tail -n +3|awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g`
+FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${perf}'}'`
+
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "step_loss" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F "step_loss : " '{print$2}'|awk -F " " '{print $1}'|sed s/[[:space:]]//g > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+rm -rf ${data_path}/squad/v1.1/train-v1.1.json_bert-large-uncased_384_128_64                                                                                              
diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh
new file mode 100644
index 0000000000..7370cfa3c3
--- /dev/null
+++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/test/train_ID3075_BertBase_performance_8p.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+#source ../env_npu.sh
+
+data_path=""
+#集合通信参数,不需要修改
+
+export RANK_SIZE=8
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="BertBase_ID3075_for_PyTorch"
+#训练batch_size
+batch_size=80
+
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --ckpt_path* ]];then
+        ckpt_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../
+init_checkpoint=${1:-"`${data_path}/pretrained/bert_base_pretrain.pt`"}
+epochs=${2:-"1.0"}
+batch_size=${3:-"80"}
+learning_rate=${4:-"2e-4"}
+precision=${5:-"fp16"}
+num_npu=${6:-"8"}
+seed=${7:-"1"}
+squad_dir=${8:-"`${data_path}/squad/v1.1`"}
+vocab_file=${9:-"data/uncased_L-24_H-1024_A-16/vocab.txt"}
+OUT_DIR=${10:-"results/SQuAD"}
+mode=${11:-"train eval"}
+CONFIG_FILE=${12:-"bert_base_config.json"}
+max_steps=${13:-"-1"}
+
+echo "out dir is $OUT_DIR"
+mkdir -p $OUT_DIR
+if [ ! -d "$OUT_DIR" ]; then
+  echo "ERROR: non existing $OUT_DIR"
+  exit 1
+fi
+
+use_fp16=""
+if [ "$precision" = "fp16" ] ; then
+  echo "fp16 activated!"
+  use_fp16=" --fp16 "
+fi
+
+CMD="python3.7 run_squad.py "
+CMD+="--init_checkpoint=${data_path}/pretrained/bert_base_pretrain.pt "
+if [ "$mode" = "train" ] ; then
+  CMD+="--do_train "
+  CMD+="--train_file=${data_path}/squad/v1.1/train-v1.1.json "
+  CMD+="--train_batch_size=$batch_size "
+elif [ "$mode" = "eval" ] ; then
+  CMD+="--do_predict "
+  CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json "
+  CMD+="--predict_batch_size=$batch_size "
+  CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py "
+  CMD+="--do_eval "
+elif [ "$mode" = "prediction" ] ; then
+  CMD+="--do_predict "
+  CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json "
+  CMD+="--predict_batch_size=$batch_size "
+else
+  CMD+=" --do_train "
+  CMD+=" --train_file=${data_path}/squad/v1.1/train-v1.1.json "
+  CMD+=" --train_batch_size=$batch_size "
+  CMD+="--do_predict "
+  CMD+="--predict_file=${data_path}/squad/v1.1/dev-v1.1.json "
+  CMD+="--predict_batch_size=$batch_size "
+  CMD+="--eval_script=${data_path}/squad/v1.1/evaluate-v1.1.py "
+  CMD+="--do_eval "
+fi
+
+CMD+=" --do_lower_case "
+CMD+=" --bert_model=bert-large-uncased "
+CMD+=" --learning_rate=$learning_rate "
+CMD+=" --seed=$seed "
+CMD+=" --num_train_epochs=$epochs "
+CMD+=" --max_seq_length=384 "
+CMD+=" --doc_stride=128 "
+CMD+=" --output_dir=$OUT_DIR "
+CMD+=" --vocab_file=$vocab_file "
+CMD+=" --config_file=$CONFIG_FILE "
+CMD+=" --max_steps=$max_steps "
+CMD+=" $use_fp16"
+CMD+=" --use_npu"
+CMD+=" --num_npu=$num_npu"
+CMD+=" --loss_scale=4096"
+CMD+=" --addr=127.0.0.1"
+CMD+=" --graph_mode"
+
+if [ $(uname -m) = "aarch64" ]
+then
+  for i in $(seq 0 7)
+  do
+  let p_start=0+24*i
+  let p_end=23+24*i
+  export RANK=${i}
+  if [ -d ${cur_path}/output/${i} ];then
+        rm -rf ${cur_path}/output/${i}
+        mkdir -p ${cur_path}/output/$i
+  else
+        mkdir -p ${cur_path}/output/$i
+  fi
+  taskset -c $p_start-$p_end $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 &
+  done
+else
+  for i in $(seq 0 7)
+  do
+  export RANK=${i}
+  if [ -d ${cur_path}/output/${i} ];then
+        rm -rf ${cur_path}/output/${i}
+        mkdir -p ${cur_path}/output/$i
+  else
+        mkdir -p ${cur_path}/output/$i
+  fi
+  $CMD --local_rank=$i > ${cur_path}/output/${i}/train_${i}.log 2>&1 &
+  done
+fi
+wait
+
+ASCEND_DEVICE_ID=0
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+iter=`grep 'Epoch: ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "iter/s :" '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'`
+FPS=`awk 'BEGIN{printf "%.2f\n",'${iter}'*8*'${batch_size}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep -r "step_loss :" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk '{print $19}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+rm -rf ${data_path}/squad/v1.1/train-v1.1.json_bert-large-uncased_384_128_64
\ No newline at end of file
-- 
Gitee


From cfd0d382a4e1ebf3023f4aa0020309c9d450a440 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Tue, 12 Apr 2022 11:47:27 +0000
Subject: [PATCH 33/35] =?UTF-8?q?update=20PyTorch/built-in/nlp/Bert-Squad?=
 =?UTF-8?q?=5FID0470=5Ffor=5FPyTorch/run=5Fsquad.py.=20=E5=B7=B2=E5=B0=86?=
 =?UTF-8?q?=E5=8D=95=E7=AE=97=E5=AD=90=E6=A8=A1=E5=BC=8FMM=5FBMM=5FND=5FEN?=
 =?UTF-8?q?ABLE=E5=88=A0=E9=99=A4=EF=BC=9B=20=E9=AA=8C=E8=AF=81=E6=80=A7?=
 =?UTF-8?q?=E8=83=BD0.26s=EF=BC=8Ctaskid=EF=BC=9Adebug00602423?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 21e87064d9..026ec27dfc 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -900,7 +900,14 @@ def main():
                         help='whether to enable graph mode.')
 
     args = parser.parse_args()
-    args.fp16 = args.fp16 or args.amp    
+    args.fp16 = args.fp16 or args.amp
+
+    option = {}
+    option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance"
+    option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm"
+    if args.graph_mode:
+        option["MM_BMM_ND_ENABLE"] = "enable"
+    torch.npu.set_option(option)
 
     if args.local_rank == -1 or args.no_cuda:
         if args.use_npu:
@@ -1296,10 +1303,5 @@ def main():
         dllogger.log(step=tuple(), data={"exact_match": exact_match, "F1": f1})
 
 if __name__ == "__main__":
-    option = {}
-    option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance"
-    option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm"
-    option["MM_BMM_ND_ENABLE"] = "enable"
-    torch.npu.set_option(option)
     main()
     dllogger.flush()
-- 
Gitee


From 8c44f5f12363d66f1be8e56b1d1948e178a4c4a1 Mon Sep 17 00:00:00 2001
From: Ryan <rrrr.cao@hotmail.com>
Date: Tue, 12 Apr 2022 11:51:03 +0000
Subject: [PATCH 34/35] update
 PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py.

---
 .../nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py   | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
index 026ec27dfc..4f91a27de2 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/run_squad.py
@@ -902,13 +902,6 @@ def main():
     args = parser.parse_args()
     args.fp16 = args.fp16 or args.amp
 
-    option = {}
-    option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance"
-    option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm"
-    if args.graph_mode:
-        option["MM_BMM_ND_ENABLE"] = "enable"
-    torch.npu.set_option(option)
-
     if args.local_rank == -1 or args.no_cuda:
         if args.use_npu:
             torch.npu.set_device("npu:%d" % args.npu_id)
@@ -1303,5 +1296,10 @@ def main():
         dllogger.log(step=tuple(), data={"exact_match": exact_match, "F1": f1})
 
 if __name__ == "__main__":
+    option = {}
+    option["ACL_OP_SELECT_IMPL_MODE"] = "high_performance"
+    option["ACL_OPTYPELIST_FOR_IMPLMODE"] = "LayerNorm"
+    option["MM_BMM_ND_ENABLE"] = "enable"
+    torch.npu.set_option(option)
     main()
     dllogger.flush()
-- 
Gitee


From 938723e9bd7b0948aff88367ef5a6a146a4045d6 Mon Sep 17 00:00:00 2001
From: "rrrr.cao@hotmail.com" <rrrr.cao@hotmail.com>
Date: Thu, 14 Apr 2022 11:22:45 +0800
Subject: [PATCH 35/35] add BertBase graph mode

---
 .../image_classification/__init__.py          |   62 -
 .../image_classification/dataloaders.py       |  208 ----
 .../image_classification/logger.py            |  298 -----
 .../image_classification/mixup.py             |   69 --
 .../multi_epochs_dataloader.py                |   44 -
 .../image_classification/resnet.py            |  389 -------
 .../image_classification/smoothing.py         |   83 --
 .../image_classification/smoothing_tocpu.py   |   95 --
 .../image_classification/training.py          |  518 ---------
 .../image_classification/utils.py             |   94 --
 .../ResNet50_for_PyTorch/Dockerfile           |    6 -
 .../ResNet50_for_PyTorch/LICENSE              |   29 -
 .../ResNet50_for_PyTorch/README.md            |   53 -
 .../ResNet50_for_PyTorch/docker_start.sh      |   25 -
 .../ResNet50_for_PyTorch/env_npu.sh           |   71 --
 .../ResNet50_for_PyTorch/eval.sh              |   30 -
 .../infer/convert/aipp_resnet50.aippconfig    |   27 -
 .../infer/convert/pb2om.sh                    |   13 -
 .../infer/docker_start_infer.sh               |   38 -
 .../infer/mxbase/CMakeLists.txt               |   49 -
 .../infer/mxbase/Resnet50Classify.cpp         |  261 -----
 .../infer/mxbase/Resnet50Classify.h           |   59 -
 .../mxbase/classification_task_metric.py      |  174 ---
 .../imagenet1000_clsidx_to_labels.names       | 1001 -----------------
 .../infer/mxbase/main.cpp                     |   69 --
 .../infer/sdk/Resnet50.pipeline               |   75 --
 .../infer/sdk/classification_task_metric.py   |  175 ---
 .../sdk/imagenet1000_clsidx_to_labels.names   | 1001 -----------------
 .../ResNet50_for_PyTorch/infer/sdk/main.py    |  110 --
 .../infer/sdk/resnet50_aipp_pt.cfg            |    3 -
 .../ResNet50_for_PyTorch/infer/sdk/run.sh     |   36 -
 .../modelarts/train_start.py                  |  688 -----------
 .../ResNet50_for_PyTorch/modelzoo_level.txt   |    3 -
 .../ResNet50_for_PyTorch/pthtar2onx.py        |   69 --
 .../pytorch_resnet50_apex.py                  |  827 --------------
 .../ResNet50_for_PyTorch/requirements.txt     |    3 -
 .../ResNet50_for_PyTorch/run_1p.sh            |   31 -
 .../ResNet50_for_PyTorch/run_2p.sh            |   44 -
 .../ResNet50_for_PyTorch/run_4p.sh            |   43 -
 .../ResNet50_for_PyTorch/run_8p.sh            |   41 -
 .../ResNet50_for_PyTorch/test/env_npu.sh      |   71 --
 .../train_ID3071_ResNet50_performance_8p.sh   |  140 ---
 .../test/train_ID3071_performance_1p.sh       |  151 ---
 .../test/train_eval_1p.sh                     |  131 ---
 .../test/train_full_1p.sh                     |  141 ---
 .../test/train_performance_1p.sh              |  148 ---
 .../test/train_performance_1p.sh              |    2 +-
 .../test/train_performance_8p.sh              |    2 +-
 48 files changed, 2 insertions(+), 7698 deletions(-)
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh
 delete mode 100644 PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh

diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py
deleted file mode 100644
index ba94822187..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/__init__.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-'''
-BSD 3-Clause License
-
-Copyright (c) Soumith Chintala 2016,
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://spdx.org/licenses/BSD-3-Clause.html
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py
deleted file mode 100644
index ee61fe073d..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/dataloaders.py
+++ /dev/null
@@ -1,208 +0,0 @@
-# Copyright (c) 2018-2019, NVIDIA CORPORATION
-# Copyright (c) 2017-      Facebook, Inc
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import torch
-import numpy as np
-import torchvision.datasets as datasets
-import torchvision.transforms as transforms
-from PIL import Image
-
-DATA_BACKEND_CHOICES = ['pytorch', 'syntetic']
-
-def load_jpeg_from_file(path, cuda=True, fp16=False):
-    img_transforms = transforms.Compose(
-        [transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()]
-    )
-
-    img = img_transforms(Image.open(path))
-    with torch.no_grad():
-        # mean and std are not multiplied by 255 as they are in training script
-        # torch dataloader reads data into bytes whereas loading directly
-        # through PIL creates a tensor with floats in [0,1] range
-        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
-        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
-
-        if cuda:
-            mean = mean.cuda()
-            std = std.cuda()
-            img = img.cuda()
-        if fp16:
-            mean = mean.half()
-            std = std.half()
-            img = img.half()
-        else:
-            img = img.float()
-
-        input = img.unsqueeze(0).sub_(mean).div_(std)
-
-    return input
-
-class DALIWrapper(object):
-    def gen_wrapper(dalipipeline, num_classes, one_hot):
-        for data in dalipipeline:
-            input = data[0]["data"]
-            target = torch.reshape(data[0]["label"], [-1]).cuda().long()
-            if one_hot:
-                target = expand(num_classes, torch.float, target)
-            yield input, target
-        dalipipeline.reset()
-
-    def __init__(self, dalipipeline, num_classes, one_hot):
-        self.dalipipeline = dalipipeline
-        self.num_classes =  num_classes
-        self.one_hot = one_hot
-
-    def __iter__(self):
-        return DALIWrapper.gen_wrapper(self.dalipipeline, self.num_classes, self.one_hot)
-
-def fast_collate(batch):
-    imgs = [img[0] for img in batch]
-    targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
-    w = imgs[0].size[0]
-    h = imgs[0].size[1]
-    tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 )
-    for i, img in enumerate(imgs):
-        nump_array = np.asarray(img, dtype=np.uint8)
-        if(nump_array.ndim < 3):
-            nump_array = np.expand_dims(nump_array, axis=-1)
-        nump_array = np.rollaxis(nump_array, 2)
-
-        tensor[i] += torch.from_numpy(nump_array)
-
-    return tensor, targets
-
-def expand(num_classes, dtype, tensor):
-    e = torch.zeros(tensor.size(0), num_classes, dtype=dtype, device=torch.device('cuda'))
-    e = e.scatter(1, tensor.unsqueeze(1), 1.0)
-    return e
-
-class PrefetchedWrapper(object):
-    def prefetched_loader(loader, num_classes, fp16, one_hot):
-        mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1)
-        std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1)
-        if fp16:
-            mean = mean.half()
-            std = std.half()
-
-        stream = torch.cuda.Stream()
-        first = True
-
-        for next_input, next_target in loader:
-            with torch.cuda.stream(stream):
-                next_input = next_input.cuda(non_blocking=True)
-                next_target = next_target.cuda(non_blocking=True)
-                if fp16:
-                    next_input = next_input.half()
-                    if one_hot:
-                        next_target = expand(num_classes, torch.half, next_target)
-                else:
-                    next_input = next_input.float()
-                    if one_hot:
-                        next_target = expand(num_classes, torch.float, next_target)
-
-                next_input = next_input.sub_(mean).div_(std)
-
-            if not first:
-                yield input, target
-            else:
-                first = False
-
-            torch.cuda.current_stream().wait_stream(stream)
-            input = next_input
-            target = next_target
-
-        yield input, target
-
-    def __init__(self, dataloader, num_classes, fp16, one_hot):
-        self.dataloader = dataloader
-        self.fp16 = fp16
-        self.epoch = 0
-        self.one_hot = one_hot
-        self.num_classes = num_classes
-
-    def __iter__(self):
-        if (self.dataloader.sampler is not None and
-            isinstance(self.dataloader.sampler,
-                       torch.utils.data.distributed.DistributedSampler)):
-
-            self.dataloader.sampler.set_epoch(self.epoch)
-        self.epoch += 1
-        return PrefetchedWrapper.prefetched_loader(self.dataloader, self.num_classes, self.fp16, self.one_hot)
-
-def get_pytorch_train_loader(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
-    traindir = os.path.join(data_path, 'train')
-    train_dataset = datasets.ImageFolder(
-            traindir,
-            transforms.Compose([
-                transforms.RandomResizedCrop(224),
-                transforms.RandomHorizontalFlip(),
-                ]))
-
-    if torch.distributed.is_initialized():
-        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
-    else:
-        train_sampler = None
-
-    train_loader = torch.utils.data.DataLoader(
-            train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
-            num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
-
-    return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(train_loader)
-
-def get_pytorch_val_loader(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
-    valdir = os.path.join(data_path, 'val')
-    val_dataset = datasets.ImageFolder(
-            valdir, transforms.Compose([
-                transforms.Resize(256),
-                transforms.CenterCrop(224),
-                ]))
-
-    if torch.distributed.is_initialized():
-        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
-    else:
-        val_sampler = None
-
-    val_loader = torch.utils.data.DataLoader(
-            val_dataset,
-            sampler=val_sampler,
-            batch_size=batch_size, shuffle=False,
-            num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True,
-            collate_fn=fast_collate)
-
-    return PrefetchedWrapper(val_loader, num_classes, fp16, one_hot), len(val_loader)
-
-class SynteticDataLoader(object):
-    def __init__(self, fp16, batch_size, num_classes, num_channels, height, width, one_hot):
-        input_data = torch.empty(batch_size, num_channels, height, width).cuda().normal_(0, 1.0)
-        if one_hot:
-            input_target = torch.empty(batch_size, num_classes).cuda()
-            input_target[:, 0] = 1.0
-        else:
-            input_target = torch.randint(0, num_classes, (batch_size,))
-        input_target=input_target.cuda()
-        if fp16:
-            input_data = input_data.half()
-
-        self.input_data = input_data
-        self.input_target = input_target
-
-    def __iter__(self):
-        while True:
-            yield self.input_data, self.input_target
-
-def get_syntetic_loader(data_path, batch_size, num_classes, one_hot, workers=None, _worker_init_fn=None, fp16=False):
-    return SynteticDataLoader(fp16, batch_size, 1000, 3, 224, 224, one_hot), -1
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py
deleted file mode 100644
index 5eb24a1fee..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/logger.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2018-2019, NVIDIA CORPORATION
-# Copyright (c) 2017-      Facebook, Inc
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from collections import OrderedDict
-import dllogger
-import numpy as np
-
-
-def format_step(step):
-    if isinstance(step, str):
-        return step
-    s = ""
-    if len(step) > 0:
-        s += "Epoch: {} ".format(step[0])
-    if len(step) > 1:
-        s += "Iteration: {} ".format(step[1])
-    if len(step) > 2:
-        s += "Validation Iteration: {} ".format(step[2])
-    if len(step) == 0:
-        s = "Summary:"
-    return s
-
-
-PERF_METER = lambda: Meter(AverageMeter(), AverageMeter(), AverageMeter())
-LOSS_METER = lambda: Meter(AverageMeter(), AverageMeter(), MinMeter())
-ACC_METER = lambda: Meter(AverageMeter(), AverageMeter(), MaxMeter())
-LR_METER = lambda: Meter(LastMeter(), LastMeter(), LastMeter())
-
-LAT_100 = lambda: Meter(QuantileMeter(1), QuantileMeter(1), QuantileMeter(1))
-LAT_99 = lambda: Meter(QuantileMeter(0.99), QuantileMeter(0.99), QuantileMeter(0.99))
-LAT_95 = lambda: Meter(QuantileMeter(0.95), QuantileMeter(0.95), QuantileMeter(0.95))
-
-class Meter(object):
-    def __init__(self, iteration_aggregator, epoch_aggregator, run_aggregator):
-        self.run_aggregator = run_aggregator
-        self.epoch_aggregator = epoch_aggregator
-        self.iteration_aggregator = iteration_aggregator
-
-    def record(self, val, n=1):
-        self.iteration_aggregator.record(val, n=n)
-
-    def get_iteration(self):
-        v, n = self.iteration_aggregator.get_val()
-        return v
-
-    def reset_iteration(self):
-        v, n = self.iteration_aggregator.get_data()
-        self.iteration_aggregator.reset()
-        if v is not None:
-            self.epoch_aggregator.record(v, n=n)
-
-    def get_epoch(self):
-        v, n = self.epoch_aggregator.get_val()
-        return v
-
-    def reset_epoch(self):
-        v, n = self.epoch_aggregator.get_data()
-        self.epoch_aggregator.reset()
-        if v is not None:
-            self.run_aggregator.record(v, n=n)
-
-    def get_run(self):
-        v, n = self.run_aggregator.get_val()
-        return v
-
-    def reset_run(self):
-        self.run_aggregator.reset()
-
-
-class QuantileMeter(object):
-    def __init__(self, q):
-        self.q = q
-        self.reset()
-
-    def reset(self):
-        self.vals = []
-        self.n = 0
-
-    def record(self, val, n=1):
-        if isinstance(val, list):
-            self.vals += val
-            self.n += len(val)
-        else:
-            self.vals += [val] * n
-            self.n += n
-
-    def get_val(self):
-        if not self.vals:
-            return None, self.n
-        return np.quantile(self.vals, self.q, interpolation='nearest'), self.n
-
-    def get_data(self):
-        return self.vals, self.n
-
-
-class MaxMeter(object):
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.max = None
-        self.n = 0
-
-    def record(self, val, n=1):
-        if self.max is None:
-            self.max = val
-        else:
-            self.max = max(self.max, val)
-        self.n = n
-
-    def get_val(self):
-        return self.max, self.n
-
-    def get_data(self):
-        return self.max, self.n
-
-
-class MinMeter(object):
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.min = None
-        self.n = 0
-
-    def record(self, val, n=1):
-        if self.min is None:
-            self.min = val
-        else:
-            self.min = max(self.min, val)
-        self.n = n
-
-    def get_val(self):
-        return self.min, self.n
-
-    def get_data(self):
-        return self.min, self.n
-
-
-class LastMeter(object):
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.last = None
-        self.n = 0
-
-    def record(self, val, n=1):
-        self.last = val
-        self.n = n
-
-    def get_val(self):
-        return self.last, self.n
-
-    def get_data(self):
-        return self.last, self.n
-
-
-class AverageMeter(object):
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.n = 0
-        self.val = 0
-
-    def record(self, val, n=1):
-        self.n += n
-        self.val += val * n
-
-    def get_val(self):
-        if self.n == 0:
-            return None, 0
-        return self.val / self.n, self.n
-
-    def get_data(self):
-        if self.n == 0:
-            return None, 0
-        return self.val / self.n, self.n
-
-
-class Logger(object):
-    def __init__(self, print_interval, backends, verbose=False):
-        self.epoch = -1
-        self.iteration = -1
-        self.val_iteration = -1
-        self.metrics = OrderedDict()
-        self.backends = backends
-        self.print_interval = print_interval
-        self.verbose = verbose
-        dllogger.init(backends)
-
-    def log_parameter(self, data, verbosity=0):
-        dllogger.log(step="PARAMETER", data=data, verbosity=verbosity)
-
-    def register_metric(self, metric_name, meter, verbosity=0, metadata={}):
-        if self.verbose:
-            print("Registering metric: {}".format(metric_name))
-        self.metrics[metric_name] = {'meter': meter, 'level': verbosity}
-        dllogger.metadata(metric_name, metadata)
-
-    def log_metric(self, metric_name, val, n=1):
-        self.metrics[metric_name]['meter'].record(val, n=n)
-
-    def start_iteration(self, val=False):
-        if val:
-            self.val_iteration += 1
-        else:
-            self.iteration += 1
-
-    def end_iteration(self, val=False):
-        it = self.val_iteration if val else self.iteration
-        if (it % self.print_interval == 0):
-            metrics = {
-                n: m
-                for n, m in self.metrics.items() if n.startswith('val') == val
-            }
-            step = (self.epoch,
-                    self.iteration) if not val else (self.epoch,
-                                                     self.iteration,
-                                                     self.val_iteration)
-
-            verbositys = {m['level'] for _, m in metrics.items()}
-            for ll in verbositys:
-                llm = {n: m for n, m in metrics.items() if m['level'] == ll}
-
-                dllogger.log(step=step,
-                         data={
-                             n: m['meter'].get_iteration()
-                             for n, m in llm.items()
-                         },
-                         verbosity=ll)
-
-            for n, m in metrics.items():
-                m['meter'].reset_iteration()
-
-            dllogger.flush()
-
-    def start_epoch(self):
-        self.epoch += 1
-        self.iteration = 0
-        self.val_iteration = 0
-
-        for n, m in self.metrics.items():
-            m['meter'].reset_epoch()
-
-    def end_epoch(self):
-        for n, m in self.metrics.items():
-            m['meter'].reset_iteration()
-
-        verbositys = {m['level'] for _, m in self.metrics.items()}
-        for ll in verbositys:
-            llm = {n: m for n, m in self.metrics.items() if m['level'] == ll}
-            dllogger.log(step=(self.epoch, ),
-                     data={n: m['meter'].get_epoch()
-                           for n, m in llm.items()})
-
-    def end(self):
-        for n, m in self.metrics.items():
-            m['meter'].reset_epoch()
-
-        verbositys = {m['level'] for _, m in self.metrics.items()}
-        for ll in verbositys:
-            llm = {n: m for n, m in self.metrics.items() if m['level'] == ll}
-            dllogger.log(step=tuple(),
-                     data={n: m['meter'].get_run()
-                           for n, m in llm.items()})
-
-        for n, m in self.metrics.items():
-            m['meter'].reset_epoch()
-
-        dllogger.flush()
-
-    def iteration_generator_wrapper(self, gen, val=False):
-        for g in gen:
-            self.start_iteration(val=val)
-            yield g
-            self.end_iteration(val=val)
-
-    def epoch_generator_wrapper(self, gen):
-        for g in gen:
-            self.start_epoch()
-            yield g
-            self.end_epoch()
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py
deleted file mode 100644
index ff98304306..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/mixup.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-import numpy as np
-
-
-def mixup(alpha, num_classes, data, target):
-    with torch.no_grad():
-        bs = data.size(0)
-        c = np.random.beta(alpha, alpha)
-
-        perm = torch.randperm(bs).cuda()
-
-        md = c * data + (1-c) * data[perm, :]
-        mt = c * target + (1-c) * target[perm, :]
-        return md, mt
-
-
-class MixUpWrapper(object):
-    def __init__(self, alpha, num_classes, dataloader):
-        self.alpha = alpha
-        self.dataloader = dataloader
-        self.num_classes = num_classes
-
-    def mixup_loader(self, loader):
-        for input, target in loader:
-            i, t = mixup(self.alpha, self.num_classes, input, target)
-            yield i, t
-
-    def __iter__(self):
-        return self.mixup_loader(self.dataloader)
-
-
-class NLLMultiLabelSmooth(nn.Module):
-    def __init__(self, smoothing = 0.0):
-        super(NLLMultiLabelSmooth, self).__init__()
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-
-    def forward(self, x, target):
-        if self.training:
-            x = x.float()
-            target = target.float()
-            logprobs = torch.nn.functional.log_softmax(x, dim = -1)
-    
-            nll_loss = -logprobs * target
-            nll_loss = nll_loss.sum(-1)
-    
-            smooth_loss = -logprobs.mean(dim=-1)
-    
-            loss = self.confidence * nll_loss + self.smoothing * smooth_loss
-    
-            return loss.mean()
-        else:
-            return torch.nn.functional.cross_entropy(x, target)
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py
deleted file mode 100644
index 81fc0f10b8..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/multi_epochs_dataloader.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-class MultiEpochsDataLoader(torch.utils.data.DataLoader):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._DataLoader__initialized = False
-        self.batch_sampler = _RepeatSampler(self.batch_sampler)
-        self._DataLoader__initialized = True
-        self.iterator = super().__iter__()
-    
-    def __len__(self):
-        return len(self.batch_sampler.sampler)
-    
-    def __iter__(self):
-        for _ in range(len(self)):
-            yield next(self.iterator)
-
-class _RepeatSampler(object):
-    """
-    Sampler that repeats forever.
-    Args:
-        sampler (Sampler)
-    """
-
-    def __init__(self, sampler):
-        self.sampler = sampler
-    
-    def __iter__(self):
-        while True:
-            yield from iter(self.sampler)
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py
deleted file mode 100644
index 5d3c2c9f53..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/resnet.py
+++ /dev/null
@@ -1,389 +0,0 @@
-# Copyright (c) 2018-2019, NVIDIA CORPORATION
-# Copyright (c) 2017-      Facebook, Inc
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-'''
-BSD 3-Clause License
-
-Copyright (c) Soumith Chintala 2016,
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://spdx.org/licenses/BSD-3-Clause.html
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-
-import math
-import torch
-import torch.nn as nn
-import numpy as np
-
-__all__ = ['ResNet', 'build_resnet', 'resnet_versions', 'resnet_configs']
-
-# ResNetBuilder {{{
-
-class ResNetBuilder(object):
-    def __init__(self, version, config):
-        self.conv3x3_cardinality = 1 if 'cardinality' not in version.keys() else version['cardinality']
-        self.config = config
-
-    def conv(self, kernel_size, in_planes, out_planes, groups=1, stride=1):
-        conv = nn.Conv2d(
-                in_planes, out_planes,
-                kernel_size=kernel_size, groups=groups,
-                stride=stride, padding=int((kernel_size - 1)/2),
-                bias=False)
-
-        if self.config['nonlinearity'] == 'relu': 
-            nn.init.kaiming_normal_(conv.weight,
-                    mode=self.config['conv_init'],
-                    nonlinearity=self.config['nonlinearity'])
-
-        return conv
-
-    def conv3x3(self, in_planes, out_planes, stride=1):
-        """3x3 convolution with padding"""
-        c = self.conv(3, in_planes, out_planes, groups=self.conv3x3_cardinality, stride=stride)
-        return c
-
-    def conv1x1(self, in_planes, out_planes, stride=1):
-        """1x1 convolution with padding"""
-        c = self.conv(1, in_planes, out_planes, stride=stride)
-        return c
-
-    def conv7x7(self, in_planes, out_planes, stride=1):
-        """7x7 convolution with padding"""
-        c = self.conv(7, in_planes, out_planes, stride=stride)
-        return c
-
-    def conv5x5(self, in_planes, out_planes, stride=1):
-        """5x5 convolution with padding"""
-        c = self.conv(5, in_planes, out_planes, stride=stride)
-        return c
-
-    def batchnorm(self, planes, last_bn=False):
-        bn = nn.BatchNorm2d(planes)
-        gamma_init_val = 0 if last_bn and self.config['last_bn_0_init'] else 1
-        nn.init.constant_(bn.weight, gamma_init_val)
-        nn.init.constant_(bn.bias, 0)
-
-        return bn
-
-    def activation(self):
-        return self.config['activation']()
-
-# ResNetBuilder }}}
-
-# BasicBlock {{{
-class BasicBlock(nn.Module):
-    def __init__(self, builder, inplanes, planes, expansion, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = builder.conv3x3(inplanes, planes, stride)
-        self.bn1 = builder.batchnorm(planes)
-        self.relu = builder.activation()
-        self.conv2 = builder.conv3x3(planes, planes*expansion)
-        self.bn2 = builder.batchnorm(planes*expansion, last_bn=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        if self.bn1 is not None:
-            out = self.bn1(out)
-
-        out = self.relu(out)
-
-        out = self.conv2(out)
-
-        if self.bn2 is not None:
-            out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-# BasicBlock }}}
-
-# SqueezeAndExcitation {{{
-class SqueezeAndExcitation(nn.Module):
-    def __init__(self, planes, squeeze):
-        super(SqueezeAndExcitation, self).__init__()
-        self.squeeze = nn.Linear(planes, squeeze)
-        self.expand = nn.Linear(squeeze, planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.sigmoid = nn.Sigmoid()
-
-    def forward(self, x):
-        out = torch.mean(x.view(x.size(0), x.size(1), -1), 2)
-        out = self.squeeze(out)
-        out = self.relu(out)
-        out = self.expand(out)
-        out = self.sigmoid(out)
-        out = out.unsqueeze(2).unsqueeze(3)
-
-        return out
-
-# }}}
-
-# Bottleneck {{{
-class Bottleneck(nn.Module):
-    def __init__(self, builder, inplanes, planes, expansion, stride=1, se=False, se_squeeze=16, downsample=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = builder.conv1x1(inplanes, planes)
-        self.bn1 = builder.batchnorm(planes)
-        self.conv2 = builder.conv3x3(planes, planes, stride=stride)
-        self.bn2 = builder.batchnorm(planes)
-        self.conv3 = builder.conv1x1(planes, planes * expansion)
-        self.bn3 = builder.batchnorm(planes * expansion, last_bn=True)
-        self.relu = builder.activation()
-        self.downsample = downsample
-        self.stride = stride
-        self.squeeze = SqueezeAndExcitation(planes*expansion, se_squeeze) if se else None
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        if self.squeeze is None:
-            out += residual
-        else:
-            out = torch.addcmul(residual, 1.0, out, self.squeeze(out))
-
-        out = self.relu(out)
-
-        return out
-
-def SEBottleneck(builder, inplanes, planes, expansion, stride=1, downsample=None):
-    return Bottleneck(builder, inplanes, planes, expansion, stride=stride, se=True, se_squeeze=16, downsample=downsample)
-# Bottleneck }}}
-
-# ResNet {{{
-class ResNet(nn.Module):
-    def __init__(self, builder, block, expansion, layers, widths, num_classes=1000):
-        self.inplanes = 64
-        super(ResNet, self).__init__()
-        self.conv1 = builder.conv7x7(3, 64, stride=2)
-        self.bn1 = builder.batchnorm(64)
-        self.relu = builder.activation()
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(builder, block, expansion, widths[0], layers[0])
-        self.layer2 = self._make_layer(builder, block, expansion, widths[1], layers[1], stride=2)
-        self.layer3 = self._make_layer(builder, block, expansion, widths[2], layers[2], stride=2)
-        self.layer4 = self._make_layer(builder, block, expansion, widths[3], layers[3], stride=2)
-        self.avgpool = nn.AdaptiveAvgPool2d(1)
-        self.fc = nn.Linear(widths[3] * expansion, num_classes)
-
-    def _make_layer(self, builder, block, expansion, planes, blocks, stride=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * expansion:
-            dconv = builder.conv1x1(self.inplanes, planes * expansion,
-                                    stride=stride)
-            dbn = builder.batchnorm(planes * expansion)
-            if dbn is not None:
-                downsample = nn.Sequential(dconv, dbn)
-            else:
-                downsample = dconv
-
-        layers = []
-        layers.append(block(builder, self.inplanes, planes, expansion, stride=stride, downsample=downsample))
-        self.inplanes = planes * expansion
-        for i in range(1, blocks):
-            layers.append(block(builder, self.inplanes, planes, expansion))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        if self.bn1 is not None:
-            x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-
-        return x
-# ResNet }}}
-
-resnet_configs = {
-        'classic' : {
-            'conv' : nn.Conv2d,
-            'conv_init' : 'fan_out',
-            'nonlinearity' : 'relu',
-            'last_bn_0_init' : False,
-            'activation' : lambda: nn.ReLU(inplace=True),
-            },
-        'fanin' : {
-            'conv' : nn.Conv2d,
-            'conv_init' : 'fan_in',
-            'nonlinearity' : 'relu',
-            'last_bn_0_init' : False,
-            'activation' : lambda: nn.ReLU(inplace=True),
-            },
-        'grp-fanin' : {
-            'conv' : nn.Conv2d,
-            'conv_init' : 'fan_in',
-            'nonlinearity' : 'relu',
-            'last_bn_0_init' : False,
-            'activation' : lambda: nn.ReLU(inplace=True),
-            },
-        'grp-fanout' : {
-            'conv' : nn.Conv2d,
-            'conv_init' : 'fan_out',
-            'nonlinearity' : 'relu',
-            'last_bn_0_init' : False,
-            'activation' : lambda: nn.ReLU(inplace=True),
-            },
-        }
-
-resnet_versions = {
-        'resnet18' : {
-            'net' : ResNet,
-            'block' : BasicBlock,
-            'layers' : [2, 2, 2, 2],
-            'widths' : [64, 128, 256, 512],
-            'expansion' : 1,
-            'num_classes' : 1000,
-            },
-         'resnet34' : {
-            'net' : ResNet,
-            'block' : BasicBlock,
-            'layers' : [3, 4, 6, 3],
-            'widths' : [64, 128, 256, 512],
-            'expansion' : 1,
-            'num_classes' : 1000,
-            },
-         'resnet50' : {
-            'net' : ResNet,
-            'block' : Bottleneck,
-            'layers' : [3, 4, 6, 3],
-            'widths' : [64, 128, 256, 512],
-            'expansion' : 4,
-            'num_classes' : 1000,
-            },
-        'resnet101' : {
-            'net' : ResNet,
-            'block' : Bottleneck,
-            'layers' : [3, 4, 23, 3],
-            'widths' : [64, 128, 256, 512],
-            'expansion' : 4,
-            'num_classes' : 1000,
-            },
-        'resnet152' : {
-            'net' : ResNet,
-            'block' : Bottleneck,
-            'layers' : [3, 8, 36, 3],
-            'widths' : [64, 128, 256, 512],
-            'expansion' : 4,
-            'num_classes' : 1000,
-            },
-        'resnext101-32x4d' : {
-            'net' : ResNet,
-            'block' : Bottleneck,
-            'cardinality' : 32,
-            'layers' : [3, 4, 23, 3],
-            'widths' : [128, 256, 512, 1024],
-            'expansion' : 2,
-            'num_classes' : 1000,
-            },
-        'se-resnext101-32x4d' : {
-            'net' : ResNet,
-            'block' : SEBottleneck,
-            'cardinality' : 32,
-            'layers' : [3, 4, 23, 3],
-            'widths' : [128, 256, 512, 1024],
-            'expansion' : 2,
-            'num_classes' : 1000,
-            },
-        }
-
-
-def build_resnet(version, config, verbose=True):
-    version = resnet_versions[version]
-    config = resnet_configs[config]
-
-    builder = ResNetBuilder(version, config)
-    if verbose:
-        print("Version: {}".format(version))
-        print("Config: {}".format(config))
-    model = version['net'](builder,
-                           version['block'],
-                           version['expansion'],
-                           version['layers'],
-                           version['widths'],
-                           version['num_classes'])
-
-    return model
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py
deleted file mode 100644
index 408718aaf2..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import torch
-import torch.nn as nn
-
-
-class CrossEntropy(nn.CrossEntropyLoss):
-    def __init__(self, smooth_factor=0., num_classes=1000):
-        super(CrossEntropy, self).__init__()
-        self.on_value = 1.0 - smooth_factor
-        self.off_value = 1.0 * smooth_factor / (num_classes - 1)
-
-    def forward(self, input, target):
-        one_hot_label = torch.npu_one_hot(target, -1, input.size(1), self.on_value, self.off_value)
-        one_hot_label = one_hot_label.to(torch.float16)
-        loss = torch.npu_softmax_cross_entropy_with_logits(input.to(torch.float16), one_hot_label)
-
-        loss = torch.mean(loss, [0], keepdim=False, dtype=torch.float32)
-        return loss
-
-class LabelSmoothingNpu(nn.Module):
-    """
-    NLL loss with label smoothing.
-    """
-    def __init__(self, smoothing=0.0):
-        """
-        Constructor for the LabelSmoothing module.
-
-        :param smoothing: label smoothing factor
-        """
-        super(LabelSmoothingNpu, self).__init__()
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-
-        self.epsilon = 0.1
-        self.num_classes = 1000
-
-    def forward(self, x, target):
-        CALCULATE_DEVICE = x.device
-        logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu")
-
-        targets = torch.zeros_like(logprobs).scatter_(1, target.unsqueeze(1), 1)
-        targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
-        loss = (-targets * logprobs).mean(0).sum()
-
-        return loss.to(CALCULATE_DEVICE)
-
-class LabelSmoothingGpu(nn.Module):
-    """
-    NLL loss with label smoothing.
-    """
-    def __init__(self, smoothing=0.0):
-        """
-        Constructor for the LabelSmoothing module.
-
-        :param smoothing: label smoothing factor
-        """
-        super(LabelSmoothingGpu, self).__init__()
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-
-    def forward(self, x, target):
-        logprobs = torch.nn.functional.log_softmax(x, dim=-1)
-
-        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
-        nll_loss = nll_loss.squeeze(1)
-        smooth_loss = -logprobs.mean(dim=-1)
-        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
-        return loss.mean()
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py
deleted file mode 100644
index 6ec5b51765..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/smoothing_tocpu.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-'''
-BSD 3-Clause License
-
-Copyright (c) Soumith Chintala 2016,
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://spdx.org/licenses/BSD-3-Clause.html
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-
-import torch
-import torch.nn as nn
-
-class LabelSmoothing(nn.Module):
-    """
-    NLL loss with label smoothing.
-    """
-    def __init__(self, smoothing=0.0):
-        """
-        Constructor for the LabelSmoothing module.
-
-        :param smoothing: label smoothing factor
-        """
-        super(LabelSmoothing, self).__init__()
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-
-    def forward(self, x, target):
-        device_x = x.device
-        device_target = target.device
-        x = x.to("cpu")
-        target = target.to("cpu")
-        logprobs = torch.nn.functional.log_softmax(x, dim=-1)
-
-        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
-        nll_loss = nll_loss.squeeze(1)
-        smooth_loss = -logprobs.mean(dim=-1)
-        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
-
-        x = x.to(device_x)
-        target = target.to(device_target)
-        return loss.mean()
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py
deleted file mode 100644
index 55f7f017d0..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/training.py
+++ /dev/null
@@ -1,518 +0,0 @@
-# Copyright (c) 2018-2019, NVIDIA CORPORATION
-# Copyright (c) 2017-      Facebook, Inc
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import time
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.autograd import Variable
-from . import logger as log
-from . import resnet as nvmodels
-from . import utils
-import dllogger
-try:
-    from apex.fp16_utils import *
-    from apex import amp
-except ImportError:
-    raise ImportError(
-        "Please install apex from https://www.github.com/nvidia/apex to run this example."
-    )
-
-ACC_METADATA = {'unit': '%','format': ':.2f'}
-IPS_METADATA = {'unit': 'img/s', 'format': ':.2f'}
-TIME_METADATA = {'unit': 's', 'format': ':.5f'}
-LOSS_METADATA = {'format': ':.5f'}
-
-class ModelAndLoss(nn.Module):
-    def __init__(self,
-                 arch,
-                 loss,
-                 pretrained_weights=None,
-                 cuda=True,
-                 fp16=False):
-        super(ModelAndLoss, self).__init__()
-        self.arch = arch
-
-        print("=> creating model '{}'".format(arch))
-        model = nvmodels.build_resnet(arch[0], arch[1])
-        if pretrained_weights is not None:
-            print("=> using pre-trained model from a file '{}'".format(arch))
-            model.load_state_dict(pretrained_weights)
-
-        if cuda:
-            model = model.cuda()
-        if fp16:
-            model = network_to_half(model)
-
-        # define loss function (criterion) and optimizer
-        criterion = loss()
-
-        if cuda:
-            criterion = criterion.cuda()
-
-        self.model = model
-        self.loss = criterion
-
-    def forward(self, data, target):
-        output = self.model(data)
-        loss = self.loss(output, target)
-
-        return loss, output
-
-    def distributed(self):
-        return
-
-    def load_model_state(self, state):
-        if not state is None:
-            self.model.load_state_dict(state)
-
-
-def get_optimizer(parameters,
-                  fp16,
-                  lr,
-                  momentum,
-                  weight_decay,
-                  nesterov=False,
-                  state=None,
-                  static_loss_scale=1.,
-                  dynamic_loss_scale=False,
-                  bn_weight_decay=False):
-
-    if bn_weight_decay:
-        print(" ! Weight decay applied to BN parameters ")
-        optimizer = torch.optim.SGD([v for n, v in parameters],
-                                    lr,
-                                    momentum=momentum,
-                                    weight_decay=weight_decay,
-                                    nesterov=nesterov)
-    else:
-        print(" ! Weight decay NOT applied to BN parameters ")
-        bn_params = [v for n, v in parameters if 'bn' in n]
-        rest_params = [v for n, v in parameters if not 'bn' in n]
-        print(len(bn_params))
-        print(len(rest_params))
-        optimizer = torch.optim.SGD([{
-            'params': bn_params,
-            'weight_decay': 0
-        }, {
-            'params': rest_params,
-            'weight_decay': weight_decay
-        }],
-                                    lr,
-                                    momentum=momentum,
-                                    weight_decay=weight_decay,
-                                    nesterov=nesterov)
-    if fp16:
-        optimizer = FP16_Optimizer(optimizer,
-                                   static_loss_scale=static_loss_scale,
-                                   dynamic_loss_scale=dynamic_loss_scale,
-                                   verbose=False)
-
-    if not state is None:
-        optimizer.load_state_dict(state)
-
-    return optimizer
-
-
-def lr_policy(lr_fn, logger=None):
-    if logger is not None:
-        logger.register_metric('lr',
-                               log.LR_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE)
-
-    def _alr(optimizer, iteration, epoch):
-        lr = lr_fn(iteration, epoch)
-
-        if logger is not None:
-            logger.log_metric('lr', lr)
-        for param_group in optimizer.param_groups:
-            param_group['lr'] = lr
-
-    return _alr
-
-
-def lr_step_policy(base_lr, steps, decay_factor, warmup_length, logger=None):
-    def _lr_fn(iteration, epoch):
-        if epoch < warmup_length:
-            lr = base_lr * (epoch + 1) / warmup_length
-        else:
-            lr = base_lr
-            for s in steps:
-                if epoch >= s:
-                    lr *= decay_factor
-        return lr
-
-    return lr_policy(_lr_fn, logger=logger)
-
-
-def lr_linear_policy(base_lr, warmup_length, epochs, logger=None):
-    def _lr_fn(iteration, epoch):
-        if epoch < warmup_length:
-            lr = base_lr * (epoch + 1) / warmup_length
-        else:
-            e = epoch - warmup_length
-            es = epochs - warmup_length
-            lr = base_lr * (1 - (e / es))
-        return lr
-
-    return lr_policy(_lr_fn, logger=logger)
-
-
-def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None):
-    def _lr_fn(iteration, epoch):
-        if epoch < warmup_length:
-            lr = base_lr * (epoch + 1) / warmup_length
-        else:
-            e = epoch - warmup_length
-            es = epochs - warmup_length
-            lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
-        return lr
-
-    return lr_policy(_lr_fn, logger=logger)
-
-
-def lr_exponential_policy(base_lr,
-                          warmup_length,
-                          epochs,
-                          final_multiplier=0.001,
-                          logger=None):
-    es = epochs - warmup_length
-    epoch_decay = np.power(2, np.log2(final_multiplier) / es)
-
-    def _lr_fn(iteration, epoch):
-        if epoch < warmup_length:
-            lr = base_lr * (epoch + 1) / warmup_length
-        else:
-            e = epoch - warmup_length
-            lr = base_lr * (epoch_decay**e)
-        return lr
-
-    return lr_policy(_lr_fn, logger=logger)
-
-
-def get_train_step(model_and_loss,
-                   optimizer,
-                   fp16,
-                   use_amp=False,
-                   batch_size_multiplier=1):
-    def _step(input, target, optimizer_step=True):
-        input_var = Variable(input)
-        target_var = Variable(target)
-        loss, output = model_and_loss(input_var, target_var)
-        if torch.distributed.is_initialized():
-            print('utils.reduce_tensor(loss.data)')
-            reduced_loss = utils.reduce_tensor(loss.data)
-        else:
-            reduced_loss = loss.data
-
-        if fp16:
-            optimizer.backward(loss)
-        elif use_amp:
-            with amp.scale_loss(loss, optimizer) as scaled_loss:
-                scaled_loss.backward()
-        else:
-            loss.backward()
-
-        if optimizer_step:
-            opt = optimizer.optimizer if isinstance(
-                optimizer, FP16_Optimizer) else optimizer
-            for param_group in opt.param_groups:
-                for param in param_group['params']:
-                    param.grad /= batch_size_multiplier
-
-            optimizer.step()
-            optimizer.zero_grad()
-
-        torch.cuda.synchronize()
-
-        return reduced_loss
-
-    return _step
-
-
-def train(train_loader,
-          model_and_loss,
-          optimizer,
-          lr_scheduler,
-          fp16,
-          logger,
-          epoch,
-          use_amp=False,
-          prof=-1,
-          batch_size_multiplier=1,
-          register_metrics=True):
-
-    if register_metrics and logger is not None:
-        logger.register_metric('train.loss',
-                               log.LOSS_METER(),
-                               verbosity=dllogger.Verbosity.DEFAULT,
-                               metadata=LOSS_METADATA)
-        logger.register_metric('train.compute_ips',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=IPS_METADATA)
-        logger.register_metric('train.total_ips',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.DEFAULT,
-                               metadata=IPS_METADATA)
-        logger.register_metric('train.data_time',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-        logger.register_metric('train.compute_time',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-
-    step = get_train_step(model_and_loss,
-                          optimizer,
-                          fp16,
-                          use_amp=use_amp,
-                          batch_size_multiplier=batch_size_multiplier)
-
-    model_and_loss.train()
-    end = time.time()
-
-    optimizer.zero_grad()
-
-    data_iter = enumerate(train_loader)
-    if logger is not None:
-        data_iter = logger.iteration_generator_wrapper(data_iter)
-    if prof > 0:
-        data_iter = utils.first_n(prof, data_iter)
-
-    for i, (input, target) in data_iter:
-        bs = input.size(0)
-        lr_scheduler(optimizer, i, epoch)
-        data_time = time.time() - end
-
-        optimizer_step = ((i + 1) % batch_size_multiplier) == 0
-        loss = step(input, target, optimizer_step=optimizer_step)
-
-        it_time = time.time() - end
-
-        if logger is not None:
-            logger.log_metric('train.loss', to_python_float(loss), bs)
-            logger.log_metric('train.compute_ips',
-                              calc_ips(bs, it_time - data_time))
-            logger.log_metric('train.total_ips', calc_ips(bs, it_time))
-            logger.log_metric('train.data_time', data_time)
-            logger.log_metric('train.compute_time', it_time - data_time)
-
-        end = time.time()
-
-
-def get_val_step(model_and_loss):
-    def _step(input, target):
-        input_var = Variable(input)
-        target_var = Variable(target)
-
-        with torch.no_grad():
-            loss, output = model_and_loss(input_var, target_var)
-
-        prec1, prec5 = utils.accuracy(output.data, target, topk=(1, 5))
-
-        if torch.distributed.is_initialized():
-            reduced_loss = utils.reduce_tensor(loss.data)
-            prec1 = utils.reduce_tensor(prec1)
-            prec5 = utils.reduce_tensor(prec5)
-        else:
-            reduced_loss = loss.data
-
-        torch.cuda.synchronize()
-
-        return reduced_loss, prec1, prec5
-
-    return _step
-
-
-def validate(val_loader,
-             model_and_loss,
-             fp16,
-             logger,
-             epoch,
-             prof=-1,
-             register_metrics=True):
-    if register_metrics and logger is not None:
-        logger.register_metric('val.top1',
-                               log.ACC_METER(),
-                               verbosity=dllogger.Verbosity.DEFAULT,
-                               metadata=ACC_METADATA)
-        logger.register_metric('val.top5',
-                               log.ACC_METER(),
-                               verbosity=dllogger.Verbosity.DEFAULT,
-                               metadata=ACC_METADATA)
-        logger.register_metric('val.loss',
-                               log.LOSS_METER(),
-                               verbosity=dllogger.Verbosity.DEFAULT,
-                               metadata=LOSS_METADATA)
-        logger.register_metric('val.compute_ips',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=IPS_METADATA)
-        logger.register_metric('val.total_ips',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.DEFAULT,
-                               metadata=IPS_METADATA)
-        logger.register_metric('val.data_time',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-        logger.register_metric('val.compute_latency',
-                               log.PERF_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-        logger.register_metric('val.compute_latency_at100',
-                               log.LAT_100(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-        logger.register_metric('val.compute_latency_at99',
-                               log.LAT_99(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-        logger.register_metric('val.compute_latency_at95',
-                               log.LAT_95(),
-                               verbosity=dllogger.Verbosity.VERBOSE,
-                               metadata=TIME_METADATA)
-
-
-    step = get_val_step(model_and_loss)
-
-    top1 = log.AverageMeter()
-    # switch to evaluate mode
-    model_and_loss.eval()
-
-    end = time.time()
-
-    data_iter = enumerate(val_loader)
-    if not logger is None:
-        data_iter = logger.iteration_generator_wrapper(data_iter, val=True)
-    if prof > 0:
-        data_iter = utils.first_n(prof, data_iter)
-
-    for i, (input, target) in data_iter:
-        bs = input.size(0)
-        data_time = time.time() - end
-
-        loss, prec1, prec5 = step(input, target)
-
-        it_time = time.time() - end
-
-        top1.record(to_python_float(prec1), bs)
-        if logger is not None:
-            logger.log_metric('val.top1', to_python_float(prec1), bs)
-            logger.log_metric('val.top5', to_python_float(prec5), bs)
-            logger.log_metric('val.loss', to_python_float(loss), bs)
-            logger.log_metric('val.compute_ips',
-                              calc_ips(bs, it_time - data_time))
-            logger.log_metric('val.total_ips', calc_ips(bs, it_time))
-            logger.log_metric('val.data_time', data_time)
-            logger.log_metric('val.compute_latency', it_time - data_time)
-            logger.log_metric('val.compute_latency_at95', it_time - data_time)
-            logger.log_metric('val.compute_latency_at99', it_time - data_time)
-            logger.log_metric('val.compute_latency_at100', it_time - data_time)
-
-        end = time.time()
-
-    return top1.get_val()
-
-
-# Train loop {{{
-def calc_ips(batch_size, time):
-    world_size = torch.distributed.get_world_size(
-    ) if torch.distributed.is_initialized() else 1
-    tbs = world_size * batch_size
-    return tbs / time
-
-
-def train_loop(model_and_loss,
-               optimizer,
-               lr_scheduler,
-               train_loader,
-               val_loader,
-               epochs,
-               fp16,
-               logger,
-               should_backup_checkpoint,
-               use_amp=False,
-               batch_size_multiplier=1,
-               best_prec1=0,
-               start_epoch=0,
-               prof=-1,
-               skip_training=False,
-               skip_validation=False,
-               save_checkpoints=True,
-               checkpoint_dir='./'):
-
-    prec1 = -1
-
-    epoch_iter = range(start_epoch, epochs)
-    for epoch in epoch_iter:
-        if logger is not None:
-            logger.start_epoch()
-        if not skip_training:
-            train(train_loader,
-                  model_and_loss,
-                  optimizer,
-                  lr_scheduler,
-                  fp16,
-                  logger,
-                  epoch,
-                  use_amp=use_amp,
-                  prof=prof,
-                  register_metrics=epoch == start_epoch,
-                  batch_size_multiplier=batch_size_multiplier)
-
-        if not skip_validation:
-            prec1, nimg = validate(val_loader,
-                                   model_and_loss,
-                                   fp16,
-                                   logger,
-                                   epoch,
-                                   prof=prof,
-                                   register_metrics=epoch == start_epoch)
-        if logger is not None:
-            logger.end_epoch()
-
-        if save_checkpoints and (not torch.distributed.is_initialized()
-                                 or torch.distributed.get_rank() == 0):
-            if not skip_validation:
-                is_best = logger.metrics['val.top1']['meter'].get_epoch() > best_prec1
-                best_prec1 = max(logger.metrics['val.top1']['meter'].get_epoch(),
-                                 best_prec1)
-            else:
-                is_best = False
-                best_prec1 = 0
-
-            if should_backup_checkpoint(epoch):
-                backup_filename = 'checkpoint-{}.pth.tar'.format(epoch + 1)
-            else:
-                backup_filename = None
-            utils.save_checkpoint(
-                {
-                    'epoch': epoch + 1,
-                    'arch': model_and_loss.arch,
-                    'state_dict': model_and_loss.model.state_dict(),
-                    'best_prec1': best_prec1,
-                    'optimizer': optimizer.state_dict(),
-                },
-                is_best,
-                checkpoint_dir=checkpoint_dir,
-                backup_filename=backup_filename)
-
-
-# }}}
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py
deleted file mode 100644
index a187d4e6f8..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/DistributedResnet50/image_classification/utils.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) 2018-2019, NVIDIA CORPORATION
-# Copyright (c) 2017-      Facebook, Inc
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import numpy as np
-import torch
-import shutil
-import torch.distributed as dist
-
-
-def should_backup_checkpoint(args):
-    def _sbc(epoch):
-        return args.gather_checkpoints and (epoch < 10 or epoch % 10 == 0)
-
-    return _sbc
-
-
-def save_checkpoint(state,
-                    is_best,
-                    filename='checkpoint.pth.tar',
-                    checkpoint_dir='./',
-                    backup_filename=None):
-    if (not torch.distributed.is_initialized()
-        ) or torch.distributed.get_rank() == 0:
-        filename = os.path.join(checkpoint_dir, filename)
-        print("SAVING {}".format(filename))
-        torch.save(state, filename)
-        if is_best:
-            shutil.copyfile(filename,
-                            os.path.join(checkpoint_dir, 'model_best.pth.tar'))
-        if backup_filename is not None:
-            shutil.copyfile(filename,
-                            os.path.join(checkpoint_dir, backup_filename))
-
-
-def timed_generator(gen):
-    start = time.time()
-    for g in gen:
-        end = time.time()
-        t = end - start
-        yield g, t
-        start = time.time()
-
-
-def timed_function(f):
-    def _timed_function(*args, **kwargs):
-        start = time.time()
-        ret = f(*args, **kwargs)
-        return ret, time.time() - start
-
-    return _timed_function
-
-
-def accuracy(output, target, topk=(1, )):
-    """Computes the precision@k for the specified values of k"""
-    maxk = max(topk)
-    batch_size = target.size(0)
-
-    _, pred = output.topk(maxk, 1, True, True)
-    pred = pred.t()
-    correct = pred.eq(target.view(1, -1).expand_as(pred))
-
-    res = []
-    for k in topk:
-        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
-        res.append(correct_k.mul_(100.0 / batch_size))
-    return res
-
-
-def reduce_tensor(tensor):
-    rt = tensor.clone()
-    dist.all_reduce(rt, op=dist.ReduceOp.SUM)
-    rt /= torch.distributed.get_world_size(
-    ) if torch.distributed.is_initialized() else 1
-    return rt
-
-
-def first_n(n, generator):
-    for i, d in zip(range(n), generator):
-        yield d
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile
deleted file mode 100644
index 271998f958..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/Dockerfile
+++ /dev/null
@@ -1,6 +0,0 @@
-ARG FROM_IMAGE_NAME
-FROM $FROM_IMAGE_NAME
-
-COPY requirements.txt .
-RUN pip3.7 install -r requirements.txt
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE
deleted file mode 100644
index dfcc682b4b..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-BSD 3-Clause License
-
-Copyright (c) 2017,
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md
deleted file mode 100644
index 291e2e2c31..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-一、依赖
-* NPU配套的run包安装
-* Python 3.7.5
-* PyTorch(NPU版本)
-* apex(NPU版本)
-* torch(NPU版本)
-* torchvision
-* dllogger
-
-二、训练流程：
-    
-单卡训练流程：
-
-```
-	1.安装环境
-	2.修改run_1p.sh字段"data"为当前磁盘的数据集路径
-	3.修改字段device_id（单卡训练所使用的device id），为训练配置device_id，比如device_id=0
-	4.cd到run_1p.sh文件的目录，执行bash run_1p.sh单卡脚本， 进行单卡训练
-```
-
-	
-多卡训练流程
-
-```
-	1.安装环境
-	2.修改多P脚本中字段"data"为当前磁盘的数据集路径
-	3.修改字段device_id_list（多卡训练所使用的device id列表），为训练配置device_id，比如4p,device_id_list=0,1,2,3；8P默认使用0，1，2，3，4，5，6，7卡不用配置
-	4.cd到run_8p.sh文件的目录，执行bash run_8p.sh等多卡脚本， 进行多卡训练	
-```
-
-
-
-	
-三、Docker容器训练：
-    
-1.导入镜像二进制包docker import ubuntuarmpytorch.tar REPOSITORY:TAG, 比如:
-
-        docker import ubuntuarmpytorch.tar pytorch:b020
-
-2.执行docker_start.sh后带三个参数：步骤1生成的REPOSITORY:TAG；数据集路径；模型执行路径；比如：
-
-        ./docker_start.sh pytorch:b020 /train/imagenet /home/ResNet50
-
-3.执行步骤一训练流程（环境安装除外）
-	
-
-四、测试结果
-    
-训练日志路径：在训练脚本的同目录下result文件夹里，如：
-
-        /home/ResNet50/result/training_8p_job_20201121023601
-	
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh
deleted file mode 100644
index 944bca3cda..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/docker_start.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-docker_image=$1
-data_dir=$2
-model_dir=$3
-
-docker run -it --ipc=host \
-              --device=/dev/davinci0 \
-              --device=/dev/davinci1 \
-              --device=/dev/davinci2 \
-              --device=/dev/davinci3 \
-              --device=/dev/davinci4 \
-              --device=/dev/davinci5 \
-              --device=/dev/davinci6 \
-              --device=/dev/davinci7 \
-              --device=/dev/davinci_manager \
-              --device=/dev/devmm_svm --device=/dev/hisi_hdc \
-              -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-              -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \
-              -v ${model_dir}:${model_dir} \
-              -v ${data_dir}:${data_dir}  \
-              -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \
-              -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \
-              -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \
-              /bin/bash
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh
deleted file mode 100644
index 84d83feb94..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/env_npu.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-export install_path=/usr/local/Ascend
-
-if [ -d ${install_path}/toolkit ]; then
-    export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
-    export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
-    export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
-    export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
-    export ASCEND_OPP_PATH=${install_path}/opp
-else
-    if [ -d ${install_path}/nnae/latest ];then
-        export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
-        export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
-        export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
-        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
-        export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
-        export ASCEND_AICPU_PATH=${install_path}/nnae/latest
-    else
-        export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
-        export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
-        export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
-        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
-        export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
-        export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
-    fi
-fi
-
-
-#将Host日志输出到串口,0-关闭/1-开启
-export ASCEND_SLOG_PRINT_TO_STDOUT=0
-#设置默认日志级别,0-debug/1-info/2-warning/3-error
-export ASCEND_GLOBAL_LOG_LEVEL=3
-#设置Event日志开启标志,0-关闭/1-开启
-export ASCEND_GLOBAL_EVENT_ENABLE=0
-#设置是否开启taskque,0-关闭/1-开启
-export TASK_QUEUE_ENABLE=1
-#HCCL白名单开关,1-关闭/0-开启
-export HCCL_WHITELIST_DISABLE=1
-
-#设置device侧日志登记为error
-${install_path}/driver/tools/msnpureport -g error -d 0
-${install_path}/driver/tools/msnpureport -g error -d 1
-${install_path}/driver/tools/msnpureport -g error -d 2
-${install_path}/driver/tools/msnpureport -g error -d 3
-${install_path}/driver/tools/msnpureport -g error -d 4
-${install_path}/driver/tools/msnpureport -g error -d 5
-${install_path}/driver/tools/msnpureport -g error -d 6
-${install_path}/driver/tools/msnpureport -g error -d 7
-#关闭Device侧Event日志
-${install_path}/driver/tools/msnpureport -e disable
-
-path_lib=$(python3.7 -c """
-import sys
-import re
-result=''
-for index in range(len(sys.path)):
-    match_sit = re.search('-packages', sys.path[index])
-    if match_sit is not None:
-        match_lib = re.search('lib', sys.path[index])
-
-        if match_lib is not None:
-            end=match_lib.span()[1]
-            result += sys.path[index][0:end] + ':'
-
-        result+=sys.path[index] + '/torch/lib:'
-print(result)"""
-)
-
-echo ${path_lib}
-
-export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh
deleted file mode 100644
index 19eb321d39..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/eval.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-export ASCEND_SLOG_PRINT_TO_STDOUT=0
-export TASK_QUEUE_ENABLE=1
-
-device_id=0
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_1p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-python3.7 ${currentDir}/pytorch_resnet50_apex.py \
-        --data /data/imagenet \
-        --npu ${device_id} \
-        -j64 \
-        -b512 \
-        --lr 0.2 \
-        --warmup 5 \
-        --label-smoothing=0.1 \
-        --epochs 90 \
-        --num_classes=1000 \
-        --evaluate=True \
-        --resume checkpoint.pth.tar \
-        --optimizer-batch-size 512 > ./resnet50_1p.log 2>&1 &
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig
deleted file mode 100644
index 1ce1f997bd..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/aipp_resnet50.aippconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-aipp_op {
-    aipp_mode: static
-    input_format : RGB888_U8
-    csc_switch : false
-    rbuv_swap_switch : true
-    matrix_r0c0 : 256
-    matrix_r0c1 : 0
-    matrix_r0c2 : 359
-    matrix_r1c0 : 256
-    matrix_r1c1 : -88
-    matrix_r1c2 : -183
-    matrix_r2c0 : 256
-    matrix_r2c1 : 454
-    matrix_r2c2 : 0
-    input_bias_0 : 0
-    input_bias_1 : 128
-    input_bias_2 : 128
-    mean_chn_0 : 0
-    mean_chn_1 : 0
-    mean_chn_2 : 0
-    min_chn_0 : 123.675
-    min_chn_1 : 116.28
-    min_chn_2 : 103.53
-    var_reci_chn_0 : 0.0171247538316637
-    var_reci_chn_1 : 0.0175070028011204
-    var_reci_chn_2 : 0.0174291938997821
-}
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh
deleted file mode 100644
index 6f8a2e44bf..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/convert/pb2om.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-model_path=$1
-framework=$2
-output_model_name=$3
-
-/usr/local/Ascend/atc/bin/atc \
---model=$model_path \
---framework=$framework \
---output=$output_model_name \
---input_format=NCHW --input_shape="actual_input_1:1,3,256,256" \
---enable_small_channel=1 \
---log=error \
---soc_version=Ascend310 \
---insert_op_conf=./aipp_resnet50.aippconfig
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh
deleted file mode 100644
index da1eb3dc4c..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/docker_start_infer.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2021 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-docker_image=$1
-data_dir=$2
-
-if [ -z "${docker_image}" ]; then
-	echo "please input docker_image"
-	exit 1
-fi
-
-if [ ! -d "${data_dir}" ]; then
-	echo "please input data_dir"
-	exit 1
-fi
-
-docker run -it \
-  --device=/dev/davinci0 \
-  --device=/dev/davinci_manager \
-  --device=/dev/devmm_svm \
-  --device=/dev/hisi_hdc \
-  -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-  -v ${data_dir}:${data_dir} \
-  ${docker_image} \
-  /bin/bash
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt
deleted file mode 100644
index dccbd552fe..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/CMakeLists.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-cmake_minimum_required(VERSION 3.14.0)
-project(resnet50)
-
-set(TARGET resnet50)
-
-add_definitions(-DENABLE_DVPP_INTERFACE)
-add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall)
-add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie)
-
-# Check environment variable
-if(NOT DEFINED ENV{ASCEND_HOME})
-    message(FATAL_ERROR "please define environment variable:ASCEND_HOME")
-endif()
-if(NOT DEFINED ENV{ASCEND_VERSION})
-    message(WARNING "please define environment variable:ASCEND_VERSION")
-endif()
-if(NOT DEFINED ENV{ARCH_PATTERN})
-    message(WARNING "please define environment variable:ARCH_PATTERN")
-endif()
-set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include)
-set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64)
-
-set(MXBASE_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../)
-set(MXBASE_INC ${MXBASE_ROOT_DIR}/mxbase/include)
-set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib)
-set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/dist/lib/modelpostprocessors)
-set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/postprocess/include)
-if(DEFINED ENV{MXSDK_OPENSOURCE_DIR})
-    set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR})
-else()
-    set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource/dist)
-endif()
-
-include_directories(${ACL_INC_DIR})
-include_directories(${OPENSOURCE_DIR}/include)
-include_directories(${OPENSOURCE_DIR}/include/opencv4)
-
-include_directories(${MXBASE_INC})
-include_directories(${MXBASE_POST_PROCESS_DIR})
-
-link_directories(${ACL_LIB_DIR})
-link_directories(${OPENSOURCE_DIR}/lib)
-link_directories(${MXBASE_LIB_DIR})
-link_directories(${MXBASE_POST_LIB_DIR})
-
-add_executable(${TARGET} main.cpp Resnet50Classify.cpp)
-target_link_libraries(${TARGET} glog cpprest mxbase resnet50postprocess opencv_world stdc++fs)
-
-install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/)
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp
deleted file mode 100644
index 024a9c3ae1..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <unistd.h>
-#include <sys/stat.h>
-#include "Resnet50Classify.h"
-#include "MxBase/DeviceManager/DeviceManager.h"
-#include "MxBase/Log/Log.h"
-
-using namespace MxBase;
-namespace {
-const uint32_t YUV_BYTE_NU = 3;
-const uint32_t YUV_BYTE_DE = 2;
-const uint32_t VPC_H_ALIGN = 2;
-}
-
-APP_ERROR Resnet50Classify::Init(const InitParam &initParam)
-{
-    deviceId_ = initParam.deviceId;
-    APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices();
-    if (ret != APP_ERR_OK) {
-        LogError << "Init devices failed, ret=" << ret << ".";
-        return ret;
-    }
-    ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId);
-    if (ret != APP_ERR_OK) {
-        LogError << "Set context failed, ret=" << ret << ".";
-        return ret;
-    }
-    dvppWrapper_ = std::make_shared<MxBase::DvppWrapper>();
-    ret = dvppWrapper_->Init();
-    if (ret != APP_ERR_OK) {
-        LogError << "DvppWrapper init failed, ret=" << ret << ".";
-        return ret;
-    }
-    model_ = std::make_shared<MxBase::ModelInferenceProcessor>();
-    ret = model_->Init(initParam.modelPath, modelDesc_);
-    if (ret != APP_ERR_OK) {
-        LogError << "ModelInferenceProcessor init failed, ret=" << ret << ".";
-        return ret;
-    }
-    MxBase::ConfigData configData;
-    const std::string softmax = initParam.softmax ? "true" : "false";
-    const std::string checkTensor = initParam.checkTensor ? "true" : "false";
-
-    configData.SetJsonValue("CLASS_NUM", std::to_string(initParam.classNum));
-    configData.SetJsonValue("TOP_K", std::to_string(initParam.topk));
-    configData.SetJsonValue("SOFTMAX", softmax);
-    configData.SetJsonValue("CHECK_MODEL", checkTensor);
-
-    auto jsonStr = configData.GetCfgJson().serialize();
-    std::map<std::string, std::shared_ptr<void>> config;
-    config["postProcessConfigContent"] = std::make_shared<std::string>(jsonStr);
-    config["labelPath"] = std::make_shared<std::string>(initParam.labelPath);
-
-    post_ = std::make_shared<MxBase::Resnet50PostProcess>();
-    ret = post_->Init(config);
-    if (ret != APP_ERR_OK) {
-        LogError << "Resnet50PostProcess init failed, ret=" << ret << ".";
-        return ret;
-    }
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::DeInit()
-{
-    dvppWrapper_->DeInit();
-    model_->DeInit();
-    post_->DeInit();
-    MxBase::DeviceManager::GetInstance()->DestroyDevices();
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::ReadImage(const std::string &imgPath, cv::Mat &imageMat)
-{
-    imageMat = cv::imread(imgPath, cv::IMREAD_COLOR);
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::CenterCropImage(cv::Mat &img, cv::Mat &cropImg)
-{
-    float central_fraction = 0.75;
-    int crop_x = img.cols * central_fraction;
-    int crop_y = img.rows * central_fraction;
-    int crop_x1 = (img.cols - crop_x) / 2;
-    int crop_y1 = (img.rows - crop_y) / 2;
-
-    cv::Rect myROI(crop_x1, crop_y1, crop_x, crop_y);
-    LogInfo << "images crop_x1: " <<  crop_x1 << ", crop_x: " << crop_x << ", crop_y1: " << crop_y1 << ", crop_y: " << crop_y;
-    cropImg = img(myROI);
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat)
-{
-    static constexpr uint32_t resizeHeight = 256;
-    static constexpr uint32_t resizeWidth = 256;
-
-    cv::resize(srcImageMat, dstImageMat, cv::Size(resizeWidth, resizeHeight));
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase)
-{
-    const uint32_t dataSize = imageMat.cols * imageMat.rows * YUV444_RGB_WIDTH_NU;
-    MemoryData memoryDataDst(dataSize, MemoryData::MEMORY_DEVICE, deviceId_);
-    MemoryData memoryDataSrc(imageMat.data, dataSize, MemoryData::MEMORY_HOST_MALLOC);
-
-    APP_ERROR ret = MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc);
-    if (ret != APP_ERR_OK) {
-        LogError << GetError(ret) << "Memory malloc failed.";
-        return ret;
-    }
-    std::vector<uint32_t> shape = {imageMat.rows * YUV444_RGB_WIDTH_NU, static_cast<uint32_t>(imageMat.cols)};
-    tensorBase = TensorBase(memoryDataDst, false, shape, TENSOR_DTYPE_UINT8);
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::Inference(const std::vector<MxBase::TensorBase> &inputs,
-    std::vector<MxBase::TensorBase> &outputs)
-{
-    auto dtypes = model_->GetOutputDataType();
-    for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) {
-        std::vector<uint32_t> shape = {};
-        for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) {
-            shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]);
-        }
-        TensorBase tensor(shape, dtypes[i], MemoryData::MemoryType::MEMORY_DEVICE, deviceId_);
-        APP_ERROR ret = TensorBase::TensorBaseMalloc(tensor);
-        if (ret != APP_ERR_OK) {
-            LogError << "TensorBaseMalloc failed, ret=" << ret << ".";
-            return ret;
-        }
-        outputs.push_back(tensor);
-    }
-    DynamicInfo dynamicInfo = {};
-    dynamicInfo.dynamicType = DynamicType::STATIC_BATCH;
-    auto startTime = std::chrono::high_resolution_clock::now();
-    APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo);
-    auto endTime = std::chrono::high_resolution_clock::now();
-    double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
-    g_inferCost.push_back(costMs);
-    if (ret != APP_ERR_OK) {
-        LogError << "ModelInference failed, ret=" << ret << ".";
-        return ret;
-    }
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::PostProcess(const std::vector<MxBase::TensorBase> &inputs,
-    std::vector<std::vector<MxBase::ClassInfo>> &clsInfos)
-{
-    APP_ERROR ret = post_->Process(inputs, clsInfos);
-    if (ret != APP_ERR_OK) {
-        LogError << "Process failed, ret=" << ret << ".";
-        return ret;
-    }
-    return APP_ERR_OK;
-}
-
-APP_ERROR Resnet50Classify::SaveInferResult(const std::string &imagePath, std::vector<std::vector<MxBase::ClassInfo>> &batchClsInfos)
-{
-    uint32_t batchIndex = 0;
-    LogInfo << "image path: " << imagePath;
-    std::string fileName = imagePath.substr(imagePath.find_last_of("/") + 1);
-    size_t dot = fileName.find_last_of(".");
-    
-    std::string resultPathName = "result";
-    if (access(resultPathName.c_str(), 0) != 0) {
-        APP_ERROR ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR);
-        if (ret != 0) {
-            LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret;
-            return APP_ERR_COMM_FAILURE;
-        }
-    }
-    std::string resFileName = "result/" + fileName.substr(0,dot) + "_1.txt";
-    LogInfo << "file path for saving result: " << resFileName;
-    std::ofstream tfile(resFileName);
-    if (tfile.fail()) {
-        LogError << "Failed to open result file";
-        return APP_ERR_COMM_FAILURE;
-    }
-    
-    for (auto clsInfos : batchClsInfos) {
-        std::string resultStr = "";
-        for (auto clsInfo : clsInfos) {
-            LogDebug << "batchIndex: " << batchIndex << " className: " << clsInfo.className
-                     << " confidence: " << clsInfo.confidence << " classIndex: " << clsInfo.classId;
-            resultStr += std::to_string(clsInfo.classId) + " ";
-        }
-        tfile << resultStr << std::endl;
-        batchIndex += 1;
-    }
-    tfile.close();
-    return APP_ERR_OK; 
-}
-
-APP_ERROR Resnet50Classify::Process(const std::string &imgPath)
-{
-    cv::Mat imageMat;
-    APP_ERROR ret = ReadImage(imgPath, imageMat);
-    if (ret != APP_ERR_OK) {
-        LogError << "ReadImage failed, ret=" << ret << ".";
-        return ret;
-    }
-	
-    ret = CenterCropImage(imageMat, imageMat);
-    if (ret != APP_ERR_OK) {
-        LogError << "crop failed, ret=" << ret << ".";
-        return ret;
-    }
-    ret = Resize(imageMat, imageMat);
-    if (ret != APP_ERR_OK) {
-        LogError << "Resize failed, ret=" << ret << ".";
-        return ret;
-    }
-
-    std::vector<MxBase::TensorBase> inputs = {};
-    std::vector<MxBase::TensorBase> outputs = {};
-    TensorBase tensorBase;
-    ret = CVMatToTensorBase(imageMat, tensorBase);
-    if (ret != APP_ERR_OK) {
-        LogError << "CVMatToTensorBase failed, ret=" << ret << ".";
-        return ret;
-    }
-    inputs.push_back(tensorBase);
-    ret = Inference(inputs, outputs);
-    if (ret != APP_ERR_OK) {
-        LogError << "Inference failed, ret=" << ret << ".";
-        return ret;
-    }
-
-    std::vector<std::vector<MxBase::ClassInfo>> BatchClsInfos = {};
-    ret = PostProcess(outputs, BatchClsInfos);
-    if (ret != APP_ERR_OK) {
-        LogError << "PostProcess failed, ret=" << ret << ".";
-        return ret;
-    }
-
-    ret = SaveInferResult(imgPath, BatchClsInfos);
-    if (ret != APP_ERR_OK) {
-        LogError << "Save results failed, ret: " << ret << ".";
-        return ret;
-    }
-    
-    imageMat.release();
-    return APP_ERR_OK;
-}
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h
deleted file mode 100644
index 02f3b59774..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/Resnet50Classify.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef RESNET50_CLASSIFY_H
-#define RESNET50_CLASSIFY_H
-
-#include <opencv2/opencv.hpp>
-#include "MxBase/DvppWrapper/DvppWrapper.h"
-#include "MxBase/ModelInfer/ModelInferenceProcessor.h"
-#include "ClassPostProcessors/Resnet50PostProcess.h"
-#include "MxBase/Tensor/TensorContext/TensorContext.h"
-
-extern std::vector<double> g_inferCost;
-
-struct InitParam {
-    uint32_t deviceId;
-    std::string labelPath;
-    uint32_t classNum;
-    uint32_t topk;
-    bool softmax;
-    bool checkTensor;
-    std::string modelPath;
-};
-
-class Resnet50Classify {
-public:
-    APP_ERROR Init(const InitParam &initParam);
-    APP_ERROR DeInit();
-    APP_ERROR ReadImage(const std::string &imgPath, cv::Mat &imageMat);
-    APP_ERROR Resize(const cv::Mat &srcImageMat, cv::Mat &dstImageMat);
-    APP_ERROR CenterCropImage(cv::Mat &img, cv::Mat &cropImg);
-    APP_ERROR CVMatToTensorBase(const cv::Mat &imageMat, MxBase::TensorBase &tensorBase);
-    APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> &outputs);
-    APP_ERROR PostProcess(const std::vector<MxBase::TensorBase> &inputs,
-        std::vector<std::vector<MxBase::ClassInfo>> &clsInfos);
-    APP_ERROR SaveInferResult(const std::string &imgPath,
-                              std::vector<std::vector<MxBase::ClassInfo>> &batchClsInfos);
-    APP_ERROR Process(const std::string &imgPath);
-private:
-    std::shared_ptr<MxBase::DvppWrapper> dvppWrapper_;
-    std::shared_ptr<MxBase::ModelInferenceProcessor> model_;
-    std::shared_ptr<MxBase::Resnet50PostProcess> post_;
-    MxBase::ModelDesc modelDesc_;
-    uint32_t deviceId_ = 0;
-};
-#endif
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py
deleted file mode 100644
index da35817a4d..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/classification_task_metric.py
+++ /dev/null
@@ -1,174 +0,0 @@
-#coding = utf-8
-#Copyright 2020 Huawei Technologies Co., Ltd
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import json
-import numpy as np
-import time
-
-np.set_printoptions(threshold=sys.maxsize)
-
-LABEL_FILE = "HiAI_label.json"
-
-
-def gen_file_name(img_name):
-    """
-    :param img_name:image file name contains file path
-    :return:image file name without file path
-    """
-    full_name = img_name.split('/')[-1]
-    index = full_name.rfind('.')
-    return full_name[:index]
-
-
-def cre_groundtruth_dict(gtfile_path):
-    """
-    :param gtfile_path: file contains the imagename and label number
-    :return: dictionary key imagename, value is label number
-    """
-    img_gt_dict = {}
-    for gtfile in os.listdir(gtfile_path):
-        if (gtfile != LABEL_FILE):
-            with open(os.path.join(gtfile_path, gtfile), 'r') as f:
-                gt = json.load(f)
-                ret = gt["image"]["annotations"][0]["category_id"]
-                img_gt_dict[gen_file_name(gtfile)] = ret
-    return img_gt_dict
-
-
-def cre_groundtruth_dict_fromtxt(gtfile_path):
-    """
-    :param gtfile_path: file contains the imagename and label number
-    :return: dictionary key imagename, value is label number
-    """
-    img_gt_dict = {}
-    with open(gtfile_path, 'r')as f:
-        for line in f.readlines():
-            temp = line.strip().split(" ")
-            img_name = temp[0].split(".")[0]
-            img_lab = temp[1]
-            img_gt_dict[img_name] = img_lab
-    return img_gt_dict
-
-
-def load_statistical_predict_result(filepath):
-    """
-    :param filepath: the result of model predict
-    :return probabilities, number of label, in_type, color:
-    """
-    with open(filepath, 'r')as f:
-        data = f.readline()
-        temp = data.strip().split(" ")
-        n_label = len(temp)
-        data_vec = np.zeros((n_label), dtype=np.float32)
-        in_type = ''
-        color = ''
-        if n_label == 0:
-            in_type = f.readline()
-            color = f.readline()
-        else:
-            for ind, cls_ind in enumerate(temp):
-                if cls_ind:
-                    data_vec[ind] = np.int(cls_ind)
-    return data_vec, n_label, in_type, color
-
-
-def create_visualization_statistical_result(prediction_file_path,
-                                            result_store_path, json_file_name,
-                                            img_gt_dict, n_labels, topn=5):
-    """
-    :param prediction_file_path: the result of model predict
-    :param result_store_path: the root path to store result
-    :param json_file: json file to save result
-    :param img_gt_dict: the ground truth of imagenet
-    :param topn: classify model acc topk
-    :return:NA
-    """
-    writer = open(os.path.join(result_store_path, json_file_name), 'w')
-    table_dict = {}
-    table_dict["title"] = "Overall statistical evaluation"
-    table_dict["value"] = []
-
-    count = 0
-    res_cnt = 0
-    count_hit = np.zeros(topn)
-    for tfile_name in os.listdir(prediction_file_path):
-        count += 1
-        temp = tfile_name.split('.')[0]
-        index = temp.rfind('_')
-        img_name = temp[:index]
-        filepath = os.path.join(prediction_file_path, tfile_name)
-
-        ret = load_statistical_predict_result(filepath)
-        prediction = ret[0]
-        gt = img_gt_dict[img_name]
-        real_label = int(gt)
-        res_cnt = min(len(prediction), topn)
-        for i in range(res_cnt):
-            if str(real_label) == str(int(prediction[i])):
-                count_hit[i] += 1
-                break
-    if 'value' not in table_dict.keys():
-        print("the item value does not exist!")
-    else:
-        table_dict["value"].extend(
-            [{"key": "Number of images", "value": str(count)},
-             {"key": "Number of classes", "value": str(n_labels)}])
-        if count == 0:
-            accuracy = 0
-        else:
-            accuracy = np.cumsum(count_hit) / count
-
-        for i in range(res_cnt):
-            table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy",
-                                        "value": str(
-                                            round(accuracy[i] * 100, 2)) + '%'})
-        print(table_dict)
-        json.dump(table_dict, writer)
-    writer.close()
-
-
-if __name__ == '__main__':
-    start = time.time()
-    try:
-        # txt file path
-        folder_davinci_target = sys.argv[1]       
-        # annotation files path, "val_label.txt"
-        annotation_file_path = sys.argv[2]                
-        # the path to store the results json path
-        result_json_path = sys.argv[3]
-        # result json file name
-        json_file_name = sys.argv[4]
-    except IndexError:
-        print("Please enter right number of argmuments, expected 4!")
-        exit(1)
-    # class number
-    n_labels = 1000
-    if not os.path.exists(folder_davinci_target):
-        print("target file folder does not exist.")
-
-    if not os.path.exists(annotation_file_path):
-        print("Ground truth file does not exist.")
-
-    if not os.path.exists(result_json_path):
-        print("Result folder doesn't exist.")
-
-    img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path)
-    create_visualization_statistical_result(folder_davinci_target,
-                                            result_json_path, json_file_name,
-                                            img_label_dict, n_labels, topn=5)
-
-    elapsed = time.time() - start
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names
deleted file mode 100644
index 6003783a5d..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/imagenet1000_clsidx_to_labels.names
+++ /dev/null
@@ -1,1001 +0,0 @@
- # This is modified from https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a
-tench, Tinca tinca
-goldfish, Carassius auratus
-great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
-tiger shark, Galeocerdo cuvieri
-hammerhead, hammerhead shark
-electric ray, crampfish, numbfish, torpedo
-stingray
-cock
-hen
-ostrich, Struthio camelus
-brambling, Fringilla montifringilla
-goldfinch, Carduelis carduelis
-house finch, linnet, Carpodacus mexicanus
-junco, snowbird
-indigo bunting, indigo finch, indigo bird, Passerina cyanea
-robin, American robin, Turdus migratorius
-bulbul
-jay
-magpie
-chickadee
-water ouzel, dipper
-kite
-bald eagle, American eagle, Haliaeetus leucocephalus
-vulture
-great grey owl, great gray owl, Strix nebulosa
-European fire salamander, Salamandra salamandra
-common newt, Triturus vulgaris
-eft
-spotted salamander, Ambystoma maculatum
-axolotl, mud puppy, Ambystoma mexicanum
-bullfrog, Rana catesbeiana
-tree frog, tree-frog
-tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
-loggerhead, loggerhead turtle, Caretta caretta
-leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
-mud turtle
-terrapin
-box turtle, box tortoise
-banded gecko
-common iguana, iguana, Iguana iguana
-American chameleon, anole, Anolis carolinensis
-whiptail, whiptail lizard
-agama
-frilled lizard, Chlamydosaurus kingi
-alligator lizard
-Gila monster, Heloderma suspectum
-green lizard, Lacerta viridis
-African chameleon, Chamaeleo chamaeleon
-Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
-African crocodile, Nile crocodile, Crocodylus niloticus
-American alligator, Alligator mississipiensis
-triceratops
-thunder snake, worm snake, Carphophis amoenus
-ringneck snake, ring-necked snake, ring snake
-hognose snake, puff adder, sand viper
-green snake, grass snake
-king snake, kingsnake
-garter snake, grass snake
-water snake
-vine snake
-night snake, Hypsiglena torquata
-boa constrictor, Constrictor constrictor
-rock python, rock snake, Python sebae
-Indian cobra, Naja naja
-green mamba
-sea snake
-horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
-diamondback, diamondback rattlesnake, Crotalus adamanteus
-sidewinder, horned rattlesnake, Crotalus cerastes
-trilobite
-harvestman, daddy longlegs, Phalangium opilio
-scorpion
-black and gold garden spider, Argiope aurantia
-barn spider, Araneus cavaticus
-garden spider, Aranea diademata
-black widow, Latrodectus mactans
-tarantula
-wolf spider, hunting spider
-tick
-centipede
-black grouse
-ptarmigan
-ruffed grouse, partridge, Bonasa umbellus
-prairie chicken, prairie grouse, prairie fowl
-peacock
-quail
-partridge
-African grey, African gray, Psittacus erithacus
-macaw
-sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
-lorikeet
-coucal
-bee eater
-hornbill
-hummingbird
-jacamar
-toucan
-drake
-red-breasted merganser, Mergus serrator
-goose
-black swan, Cygnus atratus
-tusker
-echidna, spiny anteater, anteater
-platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
-wallaby, brush kangaroo
-koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
-wombat
-jellyfish
-sea anemone, anemone
-brain coral
-flatworm, platyhelminth
-nematode, nematode worm, roundworm
-conch
-snail
-slug
-sea slug, nudibranch
-chiton, coat-of-mail shell, sea cradle, polyplacophore
-chambered nautilus, pearly nautilus, nautilus
-Dungeness crab, Cancer magister
-rock crab, Cancer irroratus
-fiddler crab
-king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
-American lobster, Northern lobster, Maine lobster, Homarus americanus
-spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
-crayfish, crawfish, crawdad, crawdaddy
-hermit crab
-isopod
-white stork, Ciconia ciconia
-black stork, Ciconia nigra
-spoonbill
-flamingo
-little blue heron, Egretta caerulea
-American egret, great white heron, Egretta albus
-bittern
-crane
-limpkin, Aramus pictus
-European gallinule, Porphyrio porphyrio
-American coot, marsh hen, mud hen, water hen, Fulica americana
-bustard
-ruddy turnstone, Arenaria interpres
-red-backed sandpiper, dunlin, Erolia alpina
-redshank, Tringa totanus
-dowitcher
-oystercatcher, oyster catcher
-pelican
-king penguin, Aptenodytes patagonica
-albatross, mollymawk
-grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
-killer whale, killer, orca, grampus, sea wolf, Orcinus orca
-dugong, Dugong dugon
-sea lion
-Chihuahua
-Japanese spaniel
-Maltese dog, Maltese terrier, Maltese
-Pekinese, Pekingese, Peke
-Shih-Tzu
-Blenheim spaniel
-papillon
-toy terrier
-Rhodesian ridgeback
-Afghan hound, Afghan
-basset, basset hound
-beagle
-bloodhound, sleuthhound
-bluetick
-black-and-tan coonhound
-Walker hound, Walker foxhound
-English foxhound
-redbone
-borzoi, Russian wolfhound
-Irish wolfhound
-Italian greyhound
-whippet
-Ibizan hound, Ibizan Podenco
-Norwegian elkhound, elkhound
-otterhound, otter hound
-Saluki, gazelle hound
-Scottish deerhound, deerhound
-Weimaraner
-Staffordshire bullterrier, Staffordshire bull terrier
-American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
-Bedlington terrier
-Border terrier
-Kerry blue terrier
-Irish terrier
-Norfolk terrier
-Norwich terrier
-Yorkshire terrier
-wire-haired fox terrier
-Lakeland terrier
-Sealyham terrier, Sealyham
-Airedale, Airedale terrier
-cairn, cairn terrier
-Australian terrier
-Dandie Dinmont, Dandie Dinmont terrier
-Boston bull, Boston terrier
-miniature schnauzer
-giant schnauzer
-standard schnauzer
-Scotch terrier, Scottish terrier, Scottie
-Tibetan terrier, chrysanthemum dog
-silky terrier, Sydney silky
-soft-coated wheaten terrier
-West Highland white terrier
-Lhasa, Lhasa apso
-flat-coated retriever
-curly-coated retriever
-golden retriever
-Labrador retriever
-Chesapeake Bay retriever
-German short-haired pointer
-vizsla, Hungarian pointer
-English setter
-Irish setter, red setter
-Gordon setter
-Brittany spaniel
-clumber, clumber spaniel
-English springer, English springer spaniel
-Welsh springer spaniel
-cocker spaniel, English cocker spaniel, cocker
-Sussex spaniel
-Irish water spaniel
-kuvasz
-schipperke
-groenendael
-malinois
-briard
-kelpie
-komondor
-Old English sheepdog, bobtail
-Shetland sheepdog, Shetland sheep dog, Shetland
-collie
-Border collie
-Bouvier des Flandres, Bouviers des Flandres
-Rottweiler
-German shepherd, German shepherd dog, German police dog, alsatian
-Doberman, Doberman pinscher
-miniature pinscher
-Greater Swiss Mountain dog
-Bernese mountain dog
-Appenzeller
-EntleBucher
-boxer
-bull mastiff
-Tibetan mastiff
-French bulldog
-Great Dane
-Saint Bernard, St Bernard
-Eskimo dog, husky
-malamute, malemute, Alaskan malamute
-Siberian husky
-dalmatian, coach dog, carriage dog
-affenpinscher, monkey pinscher, monkey dog
-basenji
-pug, pug-dog
-Leonberg
-Newfoundland, Newfoundland dog
-Great Pyrenees
-Samoyed, Samoyede
-Pomeranian
-chow, chow chow
-keeshond
-Brabancon griffon
-Pembroke, Pembroke Welsh corgi
-Cardigan, Cardigan Welsh corgi
-toy poodle
-miniature poodle
-standard poodle
-Mexican hairless
-timber wolf, grey wolf, gray wolf, Canis lupus
-white wolf, Arctic wolf, Canis lupus tundrarum
-red wolf, maned wolf, Canis rufus, Canis niger
-coyote, prairie wolf, brush wolf, Canis latrans
-dingo, warrigal, warragal, Canis dingo
-dhole, Cuon alpinus
-African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
-hyena, hyaena
-red fox, Vulpes vulpes
-kit fox, Vulpes macrotis
-Arctic fox, white fox, Alopex lagopus
-grey fox, gray fox, Urocyon cinereoargenteus
-tabby, tabby cat
-tiger cat
-Persian cat
-Siamese cat, Siamese
-Egyptian cat
-cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
-lynx, catamount
-leopard, Panthera pardus
-snow leopard, ounce, Panthera uncia
-jaguar, panther, Panthera onca, Felis onca
-lion, king of beasts, Panthera leo
-tiger, Panthera tigris
-cheetah, chetah, Acinonyx jubatus
-brown bear, bruin, Ursus arctos
-American black bear, black bear, Ursus americanus, Euarctos americanus
-ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
-sloth bear, Melursus ursinus, Ursus ursinus
-mongoose
-meerkat, mierkat
-tiger beetle
-ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
-ground beetle, carabid beetle
-long-horned beetle, longicorn, longicorn beetle
-leaf beetle, chrysomelid
-dung beetle
-rhinoceros beetle
-weevil
-fly
-bee
-ant, emmet, pismire
-grasshopper, hopper
-cricket
-walking stick, walkingstick, stick insect
-cockroach, roach
-mantis, mantid
-cicada, cicala
-leafhopper
-lacewing, lacewing fly
-dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
-damselfly
-admiral
-ringlet, ringlet butterfly
-monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
-cabbage butterfly
-sulphur butterfly, sulfur butterfly
-lycaenid, lycaenid butterfly
-starfish, sea star
-sea urchin
-sea cucumber, holothurian
-wood rabbit, cottontail, cottontail rabbit
-hare
-Angora, Angora rabbit
-hamster
-porcupine, hedgehog
-fox squirrel, eastern fox squirrel, Sciurus niger
-marmot
-beaver
-guinea pig, Cavia cobaya
-sorrel
-zebra
-hog, pig, grunter, squealer, Sus scrofa
-wild boar, boar, Sus scrofa
-warthog
-hippopotamus, hippo, river horse, Hippopotamus amphibius
-ox
-water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
-bison
-ram, tup
-bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
-ibex, Capra ibex
-hartebeest
-impala, Aepyceros melampus
-gazelle
-Arabian camel, dromedary, Camelus dromedarius
-llama
-weasel
-mink
-polecat, fitch, foulmart, foumart, Mustela putorius
-black-footed ferret, ferret, Mustela nigripes
-otter
-skunk, polecat, wood pussy
-badger
-armadillo
-three-toed sloth, ai, Bradypus tridactylus
-orangutan, orang, orangutang, Pongo pygmaeus
-gorilla, Gorilla gorilla
-chimpanzee, chimp, Pan troglodytes
-gibbon, Hylobates lar
-siamang, Hylobates syndactylus, Symphalangus syndactylus
-guenon, guenon monkey
-patas, hussar monkey, Erythrocebus patas
-baboon
-macaque
-langur
-colobus, colobus monkey
-proboscis monkey, Nasalis larvatus
-marmoset
-capuchin, ringtail, Cebus capucinus
-howler monkey, howler
-titi, titi monkey
-spider monkey, Ateles geoffroyi
-squirrel monkey, Saimiri sciureus
-Madagascar cat, ring-tailed lemur, Lemur catta
-indri, indris, Indri indri, Indri brevicaudatus
-Indian elephant, Elephas maximus
-African elephant, Loxodonta africana
-lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
-giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
-barracouta, snoek
-eel
-coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
-rock beauty, Holocanthus tricolor
-anemone fish
-sturgeon
-gar, garfish, garpike, billfish, Lepisosteus osseus
-lionfish
-puffer, pufferfish, blowfish, globefish
-abacus
-abaya
-academic gown, academic robe, judge's robe
-accordion, piano accordion, squeeze box
-acoustic guitar
-aircraft carrier, carrier, flattop, attack aircraft carrier
-airliner
-airship, dirigible
-altar
-ambulance
-amphibian, amphibious vehicle
-analog clock
-apiary, bee house
-apron
-ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
-assault rifle, assault gun
-backpack, back pack, knapsack, packsack, rucksack, haversack
-bakery, bakeshop, bakehouse
-balance beam, beam
-balloon
-ballpoint, ballpoint pen, ballpen, Biro
-Band Aid
-banjo
-bannister, banister, balustrade, balusters, handrail
-barbell
-barber chair
-barbershop
-barn
-barometer
-barrel, cask
-barrow, garden cart, lawn cart, wheelbarrow
-baseball
-basketball
-bassinet
-bassoon
-bathing cap, swimming cap
-bath towel
-bathtub, bathing tub, bath, tub
-beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
-beacon, lighthouse, beacon light, pharos
-beaker
-bearskin, busby, shako
-beer bottle
-beer glass
-bell cote, bell cot
-bib
-bicycle-built-for-two, tandem bicycle, tandem
-bikini, two-piece
-binder, ring-binder
-binoculars, field glasses, opera glasses
-birdhouse
-boathouse
-bobsled, bobsleigh, bob
-bolo tie, bolo, bola tie, bola
-bonnet, poke bonnet
-bookcase
-bookshop, bookstore, bookstall
-bottlecap
-bow
-bow tie, bow-tie, bowtie
-brass, memorial tablet, plaque
-brassiere, bra, bandeau
-breakwater, groin, groyne, mole, bulwark, seawall, jetty
-breastplate, aegis, egis
-broom
-bucket, pail
-buckle
-bulletproof vest
-bullet train, bullet
-butcher shop, meat market
-cab, hack, taxi, taxicab
-caldron, cauldron
-candle, taper, wax light
-cannon
-canoe
-can opener, tin opener
-cardigan
-car mirror
-carousel, carrousel, merry-go-round, roundabout, whirligig
-carpenter's kit, tool kit
-carton
-car wheel
-cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
-cassette
-cassette player
-castle
-catamaran
-CD player
-cello, violoncello
-cellular telephone, cellular phone, cellphone, cell, mobile phone
-chain
-chainlink fence
-chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
-chain saw, chainsaw
-chest
-chiffonier, commode
-chime, bell, gong
-china cabinet, china closet
-Christmas stocking
-church, church building
-cinema, movie theater, movie theatre, movie house, picture palace
-cleaver, meat cleaver, chopper
-cliff dwelling
-cloak
-clog, geta, patten, sabot
-cocktail shaker
-coffee mug
-coffeepot
-coil, spiral, volute, whorl, helix
-combination lock
-computer keyboard, keypad
-confectionery, confectionary, candy store
-container ship, containership, container vessel
-convertible
-corkscrew, bottle screw
-cornet, horn, trumpet, trump
-cowboy boot
-cowboy hat, ten-gallon hat
-cradle
-crane
-crash helmet
-crate
-crib, cot
-Crock Pot
-croquet ball
-crutch
-cuirass
-dam, dike, dyke
-desk
-desktop computer
-dial telephone, dial phone
-diaper, nappy, napkin
-digital clock
-digital watch
-dining table, board
-dishrag, dishcloth
-dishwasher, dish washer, dishwashing machine
-disk brake, disc brake
-dock, dockage, docking facility
-dogsled, dog sled, dog sleigh
-dome
-doormat, welcome mat
-drilling platform, offshore rig
-drum, membranophone, tympan
-drumstick
-dumbbell
-Dutch oven
-electric fan, blower
-electric guitar
-electric locomotive
-entertainment center
-envelope
-espresso maker
-face powder
-feather boa, boa
-file, file cabinet, filing cabinet
-fireboat
-fire engine, fire truck
-fire screen, fireguard
-flagpole, flagstaff
-flute, transverse flute
-folding chair
-football helmet
-forklift
-fountain
-fountain pen
-four-poster
-freight car
-French horn, horn
-frying pan, frypan, skillet
-fur coat
-garbage truck, dustcart
-gasmask, respirator, gas helmet
-gas pump, gasoline pump, petrol pump, island dispenser
-goblet
-go-kart
-golf ball
-golfcart, golf cart
-gondola
-gong, tam-tam
-gown
-grand piano, grand
-greenhouse, nursery, glasshouse
-grille, radiator grille
-grocery store, grocery, food market, market
-guillotine
-hair slide
-hair spray
-half track
-hammer
-hamper
-hand blower, blow dryer, blow drier, hair dryer, hair drier
-hand-held computer, hand-held microcomputer
-handkerchief, hankie, hanky, hankey
-hard disc, hard disk, fixed disk
-harmonica, mouth organ, harp, mouth harp
-harp
-harvester, reaper
-hatchet
-holster
-home theater, home theatre
-honeycomb
-hook, claw
-hoopskirt, crinoline
-horizontal bar, high bar
-horse cart, horse-cart
-hourglass
-iPod
-iron, smoothing iron
-jack-o'-lantern
-jean, blue jean, denim
-jeep, landrover
-jersey, T-shirt, tee shirt
-jigsaw puzzle
-jinrikisha, ricksha, rickshaw
-joystick
-kimono
-knee pad
-knot
-lab coat, laboratory coat
-ladle
-lampshade, lamp shade
-laptop, laptop computer
-lawn mower, mower
-lens cap, lens cover
-letter opener, paper knife, paperknife
-library
-lifeboat
-lighter, light, igniter, ignitor
-limousine, limo
-liner, ocean liner
-lipstick, lip rouge
-Loafer
-lotion
-loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
-loupe, jeweler's loupe
-lumbermill, sawmill
-magnetic compass
-mailbag, postbag
-mailbox, letter box
-maillot
-maillot, tank suit
-manhole cover
-maraca
-marimba, xylophone
-mask
-matchstick
-maypole
-maze, labyrinth
-measuring cup
-medicine chest, medicine cabinet
-megalith, megalithic structure
-microphone, mike
-microwave, microwave oven
-military uniform
-milk can
-minibus
-miniskirt, mini
-minivan
-missile
-mitten
-mixing bowl
-mobile home, manufactured home
-Model T
-modem
-monastery
-monitor
-moped
-mortar
-mortarboard
-mosque
-mosquito net
-motor scooter, scooter
-mountain bike, all-terrain bike, off-roader
-mountain tent
-mouse, computer mouse
-mousetrap
-moving van
-muzzle
-nail
-neck brace
-necklace
-nipple
-notebook, notebook computer
-obelisk
-oboe, hautboy, hautbois
-ocarina, sweet potato
-odometer, hodometer, mileometer, milometer
-oil filter
-organ, pipe organ
-oscilloscope, scope, cathode-ray oscilloscope, CRO
-overskirt
-oxcart
-oxygen mask
-packet
-paddle, boat paddle
-paddlewheel, paddle wheel
-padlock
-paintbrush
-pajama, pyjama, pj's, jammies
-palace
-panpipe, pandean pipe, syrinx
-paper towel
-parachute, chute
-parallel bars, bars
-park bench
-parking meter
-passenger car, coach, carriage
-patio, terrace
-pay-phone, pay-station
-pedestal, plinth, footstall
-pencil box, pencil case
-pencil sharpener
-perfume, essence
-Petri dish
-photocopier
-pick, plectrum, plectron
-pickelhaube
-picket fence, paling
-pickup, pickup truck
-pier
-piggy bank, penny bank
-pill bottle
-pillow
-ping-pong ball
-pinwheel
-pirate, pirate ship
-pitcher, ewer
-plane, carpenter's plane, woodworking plane
-planetarium
-plastic bag
-plate rack
-plow, plough
-plunger, plumber's helper
-Polaroid camera, Polaroid Land camera
-pole
-police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
-poncho
-pool table, billiard table, snooker table
-pop bottle, soda bottle
-pot, flowerpot
-potter's wheel
-power drill
-prayer rug, prayer mat
-printer
-prison, prison house
-projectile, missile
-projector
-puck, hockey puck
-punching bag, punch bag, punching ball, punchball
-purse
-quill, quill pen
-quilt, comforter, comfort, puff
-racer, race car, racing car
-racket, racquet
-radiator
-radio, wireless
-radio telescope, radio reflector
-rain barrel
-recreational vehicle, RV, R.V.
-reel
-reflex camera
-refrigerator, icebox
-remote control, remote
-restaurant, eating house, eating place, eatery
-revolver, six-gun, six-shooter
-rifle
-rocking chair, rocker
-rotisserie
-rubber eraser, rubber, pencil eraser
-rugby ball
-rule, ruler
-running shoe
-safe
-safety pin
-saltshaker, salt shaker
-sandal
-sarong
-sax, saxophone
-scabbard
-scale, weighing machine
-school bus
-schooner
-scoreboard
-screen, CRT screen
-screw
-screwdriver
-seat belt, seatbelt
-sewing machine
-shield, buckler
-shoe shop, shoe-shop, shoe store
-shoji
-shopping basket
-shopping cart
-shovel
-shower cap
-shower curtain
-ski
-ski mask
-sleeping bag
-slide rule, slipstick
-sliding door
-slot, one-armed bandit
-snorkel
-snowmobile
-snowplow, snowplough
-soap dispenser
-soccer ball
-sock
-solar dish, solar collector, solar furnace
-sombrero
-soup bowl
-space bar
-space heater
-space shuttle
-spatula
-speedboat
-spider web, spider's web
-spindle
-sports car, sport car
-spotlight, spot
-stage
-steam locomotive
-steel arch bridge
-steel drum
-stethoscope
-stole
-stone wall
-stopwatch, stop watch
-stove
-strainer
-streetcar, tram, tramcar, trolley, trolley car
-stretcher
-studio couch, day bed
-stupa, tope
-submarine, pigboat, sub, U-boat
-suit, suit of clothes
-sundial
-sunglass
-sunglasses, dark glasses, shades
-sunscreen, sunblock, sun blocker
-suspension bridge
-swab, swob, mop
-sweatshirt
-swimming trunks, bathing trunks
-swing
-switch, electric switch, electrical switch
-syringe
-table lamp
-tank, army tank, armored combat vehicle, armoured combat vehicle
-tape player
-teapot
-teddy, teddy bear
-television, television system
-tennis ball
-thatch, thatched roof
-theater curtain, theatre curtain
-thimble
-thresher, thrasher, threshing machine
-throne
-tile roof
-toaster
-tobacco shop, tobacconist shop, tobacconist
-toilet seat
-torch
-totem pole
-tow truck, tow car, wrecker
-toyshop
-tractor
-trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
-tray
-trench coat
-tricycle, trike, velocipede
-trimaran
-tripod
-triumphal arch
-trolleybus, trolley coach, trackless trolley
-trombone
-tub, vat
-turnstile
-typewriter keyboard
-umbrella
-unicycle, monocycle
-upright, upright piano
-vacuum, vacuum cleaner
-vase
-vault
-velvet
-vending machine
-vestment
-viaduct
-violin, fiddle
-volleyball
-waffle iron
-wall clock
-wallet, billfold, notecase, pocketbook
-wardrobe, closet, press
-warplane, military plane
-washbasin, handbasin, washbowl, lavabo, wash-hand basin
-washer, automatic washer, washing machine
-water bottle
-water jug
-water tower
-whiskey jug
-whistle
-wig
-window screen
-window shade
-Windsor tie
-wine bottle
-wing
-wok
-wooden spoon
-wool, woolen, woollen
-worm fence, snake fence, snake-rail fence, Virginia fence
-wreck
-yawl
-yurt
-web site, website, internet site, site
-comic book
-crossword puzzle, crossword
-street sign
-traffic light, traffic signal, stoplight
-book jacket, dust cover, dust jacket, dust wrapper
-menu
-plate
-guacamole
-consomme
-hot pot, hotpot
-trifle
-ice cream, icecream
-ice lolly, lolly, lollipop, popsicle
-French loaf
-bagel, beigel
-pretzel
-cheeseburger
-hotdog, hot dog, red hot
-mashed potato
-head cabbage
-broccoli
-cauliflower
-zucchini, courgette
-spaghetti squash
-acorn squash
-butternut squash
-cucumber, cuke
-artichoke, globe artichoke
-bell pepper
-cardoon
-mushroom
-Granny Smith
-strawberry
-orange
-lemon
-fig
-pineapple, ananas
-banana
-jackfruit, jak, jack
-custard apple
-pomegranate
-hay
-carbonara
-chocolate sauce, chocolate syrup
-dough
-meat loaf, meatloaf
-pizza, pizza pie
-potpie
-burrito
-red wine
-espresso
-cup
-eggnog
-alp
-bubble
-cliff, drop, drop-off
-coral reef
-geyser
-lakeside, lakeshore
-promontory, headland, head, foreland
-sandbar, sand bar
-seashore, coast, seacoast, sea-coast
-valley, vale
-volcano
-ballplayer, baseball player
-groom, bridegroom
-scuba diver
-rapeseed
-daisy
-yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
-corn
-acorn
-hip, rose hip, rosehip
-buckeye, horse chestnut, conker
-coral fungus
-agaric
-gyromitra
-stinkhorn, carrion fungus
-earthstar
-hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
-bolete
-ear, spike, capitulum
-toilet tissue, toilet paper, bathroom tissue
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp
deleted file mode 100644
index 137e934b96..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/mxbase/main.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2021. Huawei Technologies Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include <experimental/filesystem>
-#include <vector>
-#include "Resnet50Classify.h"
-#include "MxBase/Log/Log.h"
-
-namespace fs = std::experimental::filesystem;
-namespace {
-const uint32_t CLASS_NUM = 1000;
-}
-std::vector<double> g_inferCost;
-
-int main(int argc, char* argv[])
-{
-    if (argc <= 1) {
-        LogWarn << "Please input image path, such as './val_union/'.";
-        return APP_ERR_OK;
-    }
-
-    InitParam initParam = {};
-    initParam.deviceId = 0;
-    initParam.classNum = CLASS_NUM;
-    initParam.labelPath = "../models/imagenet1000_clsidx_to_labels.names";
-    initParam.topk = 5;
-    initParam.softmax = true;
-    initParam.checkTensor = true;
-    initParam.modelPath = "../models/resnet50_pytorch.om";
-    auto resnet50 = std::make_shared<Resnet50Classify>();
-    APP_ERROR ret = resnet50->Init(initParam);
-    if (ret != APP_ERR_OK) {
-        LogError << "Resnet50Classify init failed, ret=" << ret << ".";
-        return ret;
-    }
-
-    std::string imgDir = argv[1];
-    for (auto & entry : fs::directory_iterator(imgDir)) {
-        LogInfo << "read image path " << entry.path();
-        ret = resnet50->Process(entry.path());
-        if (ret != APP_ERR_OK) {
-            LogError << "Resnet50Classify process failed, ret=" << ret << ".";
-            resnet50->DeInit();
-            return ret;
-        }
-    }
-    resnet50->DeInit();
-    double costSum = 0;
-    for (unsigned int i = 0; i < g_inferCost.size(); i++) {
-        costSum += g_inferCost[i];
-    }
-    LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms.";
-    LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " images/sec.";
-    return APP_ERR_OK;
-}
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline
deleted file mode 100644
index c024eb3ef3..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/Resnet50.pipeline
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-  "resnet50_classification": {
-    "stream_config": {
-      "deviceId": "0"
-    },
-    "appsrc0": {
-      "props": {
-        "blocksize": "409600"
-      },
-      "factory": "appsrc",
-      "next": "mxpi_imagedecoder0"
-    },
-    "mxpi_imagedecoder0": {
-      "props": {
-        "handleMethod": "opencv"
-      },
-      "factory": "mxpi_imagedecoder",
-      "next": "mxpi_imageresize0"
-    },
-    "mxpi_imageresize0": {
-      "props": {
-        "handleMethod": "opencv",
-        "resizeHeight": "304",
-        "resizeWidth": "304",
-        "resizeType": "Resizer_Stretch"
-      },
-      "factory": "mxpi_imageresize",
-      "next": "mxpi_opencvcentercrop0"
-    },
-    "mxpi_opencvcentercrop0": {
-      "props": {
-          "dataSource": "mxpi_imageresize0",
-          "cropHeight": "256",
-          "cropWidth": "256"
-      },
-      "factory": "mxpi_opencvcentercrop",
-      "next": "mxpi_tensorinfer0"
-    },
-    
-    
-    "mxpi_tensorinfer0": {
-      "props": {
-        "dataSource": "mxpi_opencvcentercrop0",
-        "modelPath": "../models/resnet50_pytorch.om",
-        "waitingTime": "2000",
-        "outputDeviceId": "-1"
-      },
-      "factory": "mxpi_tensorinfer",
-      "next": "mxpi_classpostprocessor0"
-    },
-    "mxpi_classpostprocessor0": {
-      "props": {
-        "dataSource": "mxpi_tensorinfer0",
-        "postProcessConfigPath": "./resnet50_aipp_pt.cfg",
-        "labelPath": "./imagenet1000_clsidx_to_labels.names",
-        "postProcessLibPath": "libresnet50postprocess.so"
-      },
-      "factory": "mxpi_classpostprocessor",
-      "next": "mxpi_dataserialize0"
-    },
-    "mxpi_dataserialize0": {
-      "props": {
-        "outputDataKeys": "mxpi_classpostprocessor0"
-      },
-      "factory": "mxpi_dataserialize",
-      "next": "appsink0"
-    },
-    "appsink0": {
-      "props": {
-        "blocksize": "4096000"
-      },
-      "factory": "appsink"
-    }
-  }
-}
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py
deleted file mode 100644
index 2d383ae6cf..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/classification_task_metric.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#coding = utf-8
-#Copyright 2020 Huawei Technologies Co., Ltd
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import os
-import sys
-import json
-import numpy as np
-import time
-
-np.set_printoptions(threshold=sys.maxsize)
-
-LABEL_FILE = "HiAI_label.json"
-
-
-def gen_file_name(img_name):
-    """
-    :param: file path
-    :return: filename
-    """
-    full_name = img_name.split('/')[-1]
-    index = full_name.rfind('.')
-    return full_name[:index]
-
-
-def cre_groundtruth_dict(gtfile_path):
-    """
-    :param filename: file contains the imagename and label number
-    :return: dictionary key imagename, value is label number
-    """
-    img_gt_dict = {}
-    for gtfile in os.listdir(gtfile_path):
-        if (gtfile != LABEL_FILE):
-            with open(os.path.join(gtfile_path, gtfile), 'r') as f:
-                gt = json.load(f)
-                ret = gt["image"]["annotations"][0]["category_id"]
-                img_gt_dict[gen_file_name(gtfile)] = ret
-    return img_gt_dict
-
-
-def cre_groundtruth_dict_fromtxt(gtfile_path):
-    """
-    :param filename: file contains the imagename and label number
-    :return: dictionary key imagename, value is label number
-    """
-    img_gt_dict = {}
-    with open(gtfile_path, 'r')as f:
-        for line in f.readlines():
-            temp = line.strip().split(" ")
-            img_name = temp[0].split(".")[0]
-            img_lab = temp[1]
-            img_gt_dict[img_name] = img_lab
-    return img_gt_dict
-
-
-def load_statistical_predict_result(filepath):
-    """
-    :param filepath: the result of model predict
-    :return probabilities, number of label, in_type, color:
-    """
-    with open(filepath, 'r')as f:
-        data = f.readline()
-        temp = data.strip().split(" ")
-        n_label = len(temp)
-        data_vec = np.zeros((n_label), dtype=np.float32)
-        in_type = ''
-        color = ''
-        if n_label == 0:
-            in_type = f.readline()
-            color = f.readline()
-        else:
-            for ind, cls_ind in enumerate(temp):
-                if cls_ind:
-                    data_vec[ind] = np.int(cls_ind)
-    return data_vec, n_label, in_type, color
-
-
-def create_visualization_statistical_result(prediction_file_path,
-                                            result_store_path, json_file_name,
-                                            img_gt_dict, n_labels, topn=5):
-    """
-    :param prediction_file_path: the result of model predict
-    :param result_store_path: the root path to store result
-    :param json_file: json file to save result
-    :param img_gt_dict: the ground truth of imagenet
-    :param topn: classify model acc topk
-    :param n_labels: class numbers
-    :return:
-    """
-    writer = open(os.path.join(result_store_path, json_file_name), 'w')
-    table_dict = {}
-    table_dict["title"] = "Overall statistical evaluation"
-    table_dict["value"] = []
-
-    count = 0
-    res_cnt = 0
-    count_hit = np.zeros(topn)
-    for tfile_name in os.listdir(prediction_file_path):
-        count += 1
-        temp = tfile_name.split('.')[0]
-        index = temp.rfind('_')
-        img_name = temp[:index]
-        filepath = os.path.join(prediction_file_path, tfile_name)
-
-        ret = load_statistical_predict_result(filepath)
-        prediction = ret[0]
-        gt = img_gt_dict[img_name]
-        real_label = int(gt)
-        res_cnt = min(len(prediction), topn)
-        for i in range(res_cnt):
-            if str(real_label) == str(int(prediction[i])):
-                count_hit[i] += 1
-                break
-    if 'value' not in table_dict.keys():
-        print("the item value does not exist!")
-    else:
-        table_dict["value"].extend(
-            [{"key": "Number of images", "value": str(count)},
-             {"key": "Number of classes", "value": str(n_labels)}])
-        if count == 0:
-            accuracy = 0
-        else:
-            accuracy = np.cumsum(count_hit) / count
-
-        for i in range(res_cnt):
-            table_dict["value"].append({"key": "Top" + str(i + 1) + " accuracy",
-                                        "value": str(
-                                            round(accuracy[i] * 100, 2)) + '%'})
-        print(table_dict)
-        json.dump(table_dict, writer)
-    writer.close()
-
-
-if __name__ == '__main__':
-    start = time.time()
-    try:
-        # txt file path
-        folder_davinci_target = sys.argv[1]       
-        # annotation files path, "val_label.txt"
-        annotation_file_path = sys.argv[2]                
-        # the path to store the results json path
-        result_json_path = sys.argv[3]
-        # result json file name
-        json_file_name = sys.argv[4]
-    except IndexError:
-        print("Please enter right number of argmuments, expected 4!")
-        exit(1)
-    # class number
-    n_labels = 1000
-    if not os.path.exists(folder_davinci_target):
-        print("target file folder does not exist.")
-
-    if not os.path.exists(annotation_file_path):
-        print("Ground truth file does not exist.")
-
-    if not os.path.exists(result_json_path):
-        print("Result folder doesn't exist.")
-
-    img_label_dict = cre_groundtruth_dict_fromtxt(annotation_file_path)
-    create_visualization_statistical_result(folder_davinci_target,
-                                            result_json_path, json_file_name,
-                                            img_label_dict, n_labels, topn=5)
-
-    elapsed = time.time() - start
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names
deleted file mode 100644
index 6003783a5d..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/imagenet1000_clsidx_to_labels.names
+++ /dev/null
@@ -1,1001 +0,0 @@
- # This is modified from https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a
-tench, Tinca tinca
-goldfish, Carassius auratus
-great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
-tiger shark, Galeocerdo cuvieri
-hammerhead, hammerhead shark
-electric ray, crampfish, numbfish, torpedo
-stingray
-cock
-hen
-ostrich, Struthio camelus
-brambling, Fringilla montifringilla
-goldfinch, Carduelis carduelis
-house finch, linnet, Carpodacus mexicanus
-junco, snowbird
-indigo bunting, indigo finch, indigo bird, Passerina cyanea
-robin, American robin, Turdus migratorius
-bulbul
-jay
-magpie
-chickadee
-water ouzel, dipper
-kite
-bald eagle, American eagle, Haliaeetus leucocephalus
-vulture
-great grey owl, great gray owl, Strix nebulosa
-European fire salamander, Salamandra salamandra
-common newt, Triturus vulgaris
-eft
-spotted salamander, Ambystoma maculatum
-axolotl, mud puppy, Ambystoma mexicanum
-bullfrog, Rana catesbeiana
-tree frog, tree-frog
-tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
-loggerhead, loggerhead turtle, Caretta caretta
-leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
-mud turtle
-terrapin
-box turtle, box tortoise
-banded gecko
-common iguana, iguana, Iguana iguana
-American chameleon, anole, Anolis carolinensis
-whiptail, whiptail lizard
-agama
-frilled lizard, Chlamydosaurus kingi
-alligator lizard
-Gila monster, Heloderma suspectum
-green lizard, Lacerta viridis
-African chameleon, Chamaeleo chamaeleon
-Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
-African crocodile, Nile crocodile, Crocodylus niloticus
-American alligator, Alligator mississipiensis
-triceratops
-thunder snake, worm snake, Carphophis amoenus
-ringneck snake, ring-necked snake, ring snake
-hognose snake, puff adder, sand viper
-green snake, grass snake
-king snake, kingsnake
-garter snake, grass snake
-water snake
-vine snake
-night snake, Hypsiglena torquata
-boa constrictor, Constrictor constrictor
-rock python, rock snake, Python sebae
-Indian cobra, Naja naja
-green mamba
-sea snake
-horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
-diamondback, diamondback rattlesnake, Crotalus adamanteus
-sidewinder, horned rattlesnake, Crotalus cerastes
-trilobite
-harvestman, daddy longlegs, Phalangium opilio
-scorpion
-black and gold garden spider, Argiope aurantia
-barn spider, Araneus cavaticus
-garden spider, Aranea diademata
-black widow, Latrodectus mactans
-tarantula
-wolf spider, hunting spider
-tick
-centipede
-black grouse
-ptarmigan
-ruffed grouse, partridge, Bonasa umbellus
-prairie chicken, prairie grouse, prairie fowl
-peacock
-quail
-partridge
-African grey, African gray, Psittacus erithacus
-macaw
-sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
-lorikeet
-coucal
-bee eater
-hornbill
-hummingbird
-jacamar
-toucan
-drake
-red-breasted merganser, Mergus serrator
-goose
-black swan, Cygnus atratus
-tusker
-echidna, spiny anteater, anteater
-platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
-wallaby, brush kangaroo
-koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
-wombat
-jellyfish
-sea anemone, anemone
-brain coral
-flatworm, platyhelminth
-nematode, nematode worm, roundworm
-conch
-snail
-slug
-sea slug, nudibranch
-chiton, coat-of-mail shell, sea cradle, polyplacophore
-chambered nautilus, pearly nautilus, nautilus
-Dungeness crab, Cancer magister
-rock crab, Cancer irroratus
-fiddler crab
-king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
-American lobster, Northern lobster, Maine lobster, Homarus americanus
-spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
-crayfish, crawfish, crawdad, crawdaddy
-hermit crab
-isopod
-white stork, Ciconia ciconia
-black stork, Ciconia nigra
-spoonbill
-flamingo
-little blue heron, Egretta caerulea
-American egret, great white heron, Egretta albus
-bittern
-crane
-limpkin, Aramus pictus
-European gallinule, Porphyrio porphyrio
-American coot, marsh hen, mud hen, water hen, Fulica americana
-bustard
-ruddy turnstone, Arenaria interpres
-red-backed sandpiper, dunlin, Erolia alpina
-redshank, Tringa totanus
-dowitcher
-oystercatcher, oyster catcher
-pelican
-king penguin, Aptenodytes patagonica
-albatross, mollymawk
-grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
-killer whale, killer, orca, grampus, sea wolf, Orcinus orca
-dugong, Dugong dugon
-sea lion
-Chihuahua
-Japanese spaniel
-Maltese dog, Maltese terrier, Maltese
-Pekinese, Pekingese, Peke
-Shih-Tzu
-Blenheim spaniel
-papillon
-toy terrier
-Rhodesian ridgeback
-Afghan hound, Afghan
-basset, basset hound
-beagle
-bloodhound, sleuthhound
-bluetick
-black-and-tan coonhound
-Walker hound, Walker foxhound
-English foxhound
-redbone
-borzoi, Russian wolfhound
-Irish wolfhound
-Italian greyhound
-whippet
-Ibizan hound, Ibizan Podenco
-Norwegian elkhound, elkhound
-otterhound, otter hound
-Saluki, gazelle hound
-Scottish deerhound, deerhound
-Weimaraner
-Staffordshire bullterrier, Staffordshire bull terrier
-American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
-Bedlington terrier
-Border terrier
-Kerry blue terrier
-Irish terrier
-Norfolk terrier
-Norwich terrier
-Yorkshire terrier
-wire-haired fox terrier
-Lakeland terrier
-Sealyham terrier, Sealyham
-Airedale, Airedale terrier
-cairn, cairn terrier
-Australian terrier
-Dandie Dinmont, Dandie Dinmont terrier
-Boston bull, Boston terrier
-miniature schnauzer
-giant schnauzer
-standard schnauzer
-Scotch terrier, Scottish terrier, Scottie
-Tibetan terrier, chrysanthemum dog
-silky terrier, Sydney silky
-soft-coated wheaten terrier
-West Highland white terrier
-Lhasa, Lhasa apso
-flat-coated retriever
-curly-coated retriever
-golden retriever
-Labrador retriever
-Chesapeake Bay retriever
-German short-haired pointer
-vizsla, Hungarian pointer
-English setter
-Irish setter, red setter
-Gordon setter
-Brittany spaniel
-clumber, clumber spaniel
-English springer, English springer spaniel
-Welsh springer spaniel
-cocker spaniel, English cocker spaniel, cocker
-Sussex spaniel
-Irish water spaniel
-kuvasz
-schipperke
-groenendael
-malinois
-briard
-kelpie
-komondor
-Old English sheepdog, bobtail
-Shetland sheepdog, Shetland sheep dog, Shetland
-collie
-Border collie
-Bouvier des Flandres, Bouviers des Flandres
-Rottweiler
-German shepherd, German shepherd dog, German police dog, alsatian
-Doberman, Doberman pinscher
-miniature pinscher
-Greater Swiss Mountain dog
-Bernese mountain dog
-Appenzeller
-EntleBucher
-boxer
-bull mastiff
-Tibetan mastiff
-French bulldog
-Great Dane
-Saint Bernard, St Bernard
-Eskimo dog, husky
-malamute, malemute, Alaskan malamute
-Siberian husky
-dalmatian, coach dog, carriage dog
-affenpinscher, monkey pinscher, monkey dog
-basenji
-pug, pug-dog
-Leonberg
-Newfoundland, Newfoundland dog
-Great Pyrenees
-Samoyed, Samoyede
-Pomeranian
-chow, chow chow
-keeshond
-Brabancon griffon
-Pembroke, Pembroke Welsh corgi
-Cardigan, Cardigan Welsh corgi
-toy poodle
-miniature poodle
-standard poodle
-Mexican hairless
-timber wolf, grey wolf, gray wolf, Canis lupus
-white wolf, Arctic wolf, Canis lupus tundrarum
-red wolf, maned wolf, Canis rufus, Canis niger
-coyote, prairie wolf, brush wolf, Canis latrans
-dingo, warrigal, warragal, Canis dingo
-dhole, Cuon alpinus
-African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
-hyena, hyaena
-red fox, Vulpes vulpes
-kit fox, Vulpes macrotis
-Arctic fox, white fox, Alopex lagopus
-grey fox, gray fox, Urocyon cinereoargenteus
-tabby, tabby cat
-tiger cat
-Persian cat
-Siamese cat, Siamese
-Egyptian cat
-cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
-lynx, catamount
-leopard, Panthera pardus
-snow leopard, ounce, Panthera uncia
-jaguar, panther, Panthera onca, Felis onca
-lion, king of beasts, Panthera leo
-tiger, Panthera tigris
-cheetah, chetah, Acinonyx jubatus
-brown bear, bruin, Ursus arctos
-American black bear, black bear, Ursus americanus, Euarctos americanus
-ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
-sloth bear, Melursus ursinus, Ursus ursinus
-mongoose
-meerkat, mierkat
-tiger beetle
-ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
-ground beetle, carabid beetle
-long-horned beetle, longicorn, longicorn beetle
-leaf beetle, chrysomelid
-dung beetle
-rhinoceros beetle
-weevil
-fly
-bee
-ant, emmet, pismire
-grasshopper, hopper
-cricket
-walking stick, walkingstick, stick insect
-cockroach, roach
-mantis, mantid
-cicada, cicala
-leafhopper
-lacewing, lacewing fly
-dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
-damselfly
-admiral
-ringlet, ringlet butterfly
-monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
-cabbage butterfly
-sulphur butterfly, sulfur butterfly
-lycaenid, lycaenid butterfly
-starfish, sea star
-sea urchin
-sea cucumber, holothurian
-wood rabbit, cottontail, cottontail rabbit
-hare
-Angora, Angora rabbit
-hamster
-porcupine, hedgehog
-fox squirrel, eastern fox squirrel, Sciurus niger
-marmot
-beaver
-guinea pig, Cavia cobaya
-sorrel
-zebra
-hog, pig, grunter, squealer, Sus scrofa
-wild boar, boar, Sus scrofa
-warthog
-hippopotamus, hippo, river horse, Hippopotamus amphibius
-ox
-water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
-bison
-ram, tup
-bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
-ibex, Capra ibex
-hartebeest
-impala, Aepyceros melampus
-gazelle
-Arabian camel, dromedary, Camelus dromedarius
-llama
-weasel
-mink
-polecat, fitch, foulmart, foumart, Mustela putorius
-black-footed ferret, ferret, Mustela nigripes
-otter
-skunk, polecat, wood pussy
-badger
-armadillo
-three-toed sloth, ai, Bradypus tridactylus
-orangutan, orang, orangutang, Pongo pygmaeus
-gorilla, Gorilla gorilla
-chimpanzee, chimp, Pan troglodytes
-gibbon, Hylobates lar
-siamang, Hylobates syndactylus, Symphalangus syndactylus
-guenon, guenon monkey
-patas, hussar monkey, Erythrocebus patas
-baboon
-macaque
-langur
-colobus, colobus monkey
-proboscis monkey, Nasalis larvatus
-marmoset
-capuchin, ringtail, Cebus capucinus
-howler monkey, howler
-titi, titi monkey
-spider monkey, Ateles geoffroyi
-squirrel monkey, Saimiri sciureus
-Madagascar cat, ring-tailed lemur, Lemur catta
-indri, indris, Indri indri, Indri brevicaudatus
-Indian elephant, Elephas maximus
-African elephant, Loxodonta africana
-lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
-giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
-barracouta, snoek
-eel
-coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
-rock beauty, Holocanthus tricolor
-anemone fish
-sturgeon
-gar, garfish, garpike, billfish, Lepisosteus osseus
-lionfish
-puffer, pufferfish, blowfish, globefish
-abacus
-abaya
-academic gown, academic robe, judge's robe
-accordion, piano accordion, squeeze box
-acoustic guitar
-aircraft carrier, carrier, flattop, attack aircraft carrier
-airliner
-airship, dirigible
-altar
-ambulance
-amphibian, amphibious vehicle
-analog clock
-apiary, bee house
-apron
-ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
-assault rifle, assault gun
-backpack, back pack, knapsack, packsack, rucksack, haversack
-bakery, bakeshop, bakehouse
-balance beam, beam
-balloon
-ballpoint, ballpoint pen, ballpen, Biro
-Band Aid
-banjo
-bannister, banister, balustrade, balusters, handrail
-barbell
-barber chair
-barbershop
-barn
-barometer
-barrel, cask
-barrow, garden cart, lawn cart, wheelbarrow
-baseball
-basketball
-bassinet
-bassoon
-bathing cap, swimming cap
-bath towel
-bathtub, bathing tub, bath, tub
-beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
-beacon, lighthouse, beacon light, pharos
-beaker
-bearskin, busby, shako
-beer bottle
-beer glass
-bell cote, bell cot
-bib
-bicycle-built-for-two, tandem bicycle, tandem
-bikini, two-piece
-binder, ring-binder
-binoculars, field glasses, opera glasses
-birdhouse
-boathouse
-bobsled, bobsleigh, bob
-bolo tie, bolo, bola tie, bola
-bonnet, poke bonnet
-bookcase
-bookshop, bookstore, bookstall
-bottlecap
-bow
-bow tie, bow-tie, bowtie
-brass, memorial tablet, plaque
-brassiere, bra, bandeau
-breakwater, groin, groyne, mole, bulwark, seawall, jetty
-breastplate, aegis, egis
-broom
-bucket, pail
-buckle
-bulletproof vest
-bullet train, bullet
-butcher shop, meat market
-cab, hack, taxi, taxicab
-caldron, cauldron
-candle, taper, wax light
-cannon
-canoe
-can opener, tin opener
-cardigan
-car mirror
-carousel, carrousel, merry-go-round, roundabout, whirligig
-carpenter's kit, tool kit
-carton
-car wheel
-cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
-cassette
-cassette player
-castle
-catamaran
-CD player
-cello, violoncello
-cellular telephone, cellular phone, cellphone, cell, mobile phone
-chain
-chainlink fence
-chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
-chain saw, chainsaw
-chest
-chiffonier, commode
-chime, bell, gong
-china cabinet, china closet
-Christmas stocking
-church, church building
-cinema, movie theater, movie theatre, movie house, picture palace
-cleaver, meat cleaver, chopper
-cliff dwelling
-cloak
-clog, geta, patten, sabot
-cocktail shaker
-coffee mug
-coffeepot
-coil, spiral, volute, whorl, helix
-combination lock
-computer keyboard, keypad
-confectionery, confectionary, candy store
-container ship, containership, container vessel
-convertible
-corkscrew, bottle screw
-cornet, horn, trumpet, trump
-cowboy boot
-cowboy hat, ten-gallon hat
-cradle
-crane
-crash helmet
-crate
-crib, cot
-Crock Pot
-croquet ball
-crutch
-cuirass
-dam, dike, dyke
-desk
-desktop computer
-dial telephone, dial phone
-diaper, nappy, napkin
-digital clock
-digital watch
-dining table, board
-dishrag, dishcloth
-dishwasher, dish washer, dishwashing machine
-disk brake, disc brake
-dock, dockage, docking facility
-dogsled, dog sled, dog sleigh
-dome
-doormat, welcome mat
-drilling platform, offshore rig
-drum, membranophone, tympan
-drumstick
-dumbbell
-Dutch oven
-electric fan, blower
-electric guitar
-electric locomotive
-entertainment center
-envelope
-espresso maker
-face powder
-feather boa, boa
-file, file cabinet, filing cabinet
-fireboat
-fire engine, fire truck
-fire screen, fireguard
-flagpole, flagstaff
-flute, transverse flute
-folding chair
-football helmet
-forklift
-fountain
-fountain pen
-four-poster
-freight car
-French horn, horn
-frying pan, frypan, skillet
-fur coat
-garbage truck, dustcart
-gasmask, respirator, gas helmet
-gas pump, gasoline pump, petrol pump, island dispenser
-goblet
-go-kart
-golf ball
-golfcart, golf cart
-gondola
-gong, tam-tam
-gown
-grand piano, grand
-greenhouse, nursery, glasshouse
-grille, radiator grille
-grocery store, grocery, food market, market
-guillotine
-hair slide
-hair spray
-half track
-hammer
-hamper
-hand blower, blow dryer, blow drier, hair dryer, hair drier
-hand-held computer, hand-held microcomputer
-handkerchief, hankie, hanky, hankey
-hard disc, hard disk, fixed disk
-harmonica, mouth organ, harp, mouth harp
-harp
-harvester, reaper
-hatchet
-holster
-home theater, home theatre
-honeycomb
-hook, claw
-hoopskirt, crinoline
-horizontal bar, high bar
-horse cart, horse-cart
-hourglass
-iPod
-iron, smoothing iron
-jack-o'-lantern
-jean, blue jean, denim
-jeep, landrover
-jersey, T-shirt, tee shirt
-jigsaw puzzle
-jinrikisha, ricksha, rickshaw
-joystick
-kimono
-knee pad
-knot
-lab coat, laboratory coat
-ladle
-lampshade, lamp shade
-laptop, laptop computer
-lawn mower, mower
-lens cap, lens cover
-letter opener, paper knife, paperknife
-library
-lifeboat
-lighter, light, igniter, ignitor
-limousine, limo
-liner, ocean liner
-lipstick, lip rouge
-Loafer
-lotion
-loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
-loupe, jeweler's loupe
-lumbermill, sawmill
-magnetic compass
-mailbag, postbag
-mailbox, letter box
-maillot
-maillot, tank suit
-manhole cover
-maraca
-marimba, xylophone
-mask
-matchstick
-maypole
-maze, labyrinth
-measuring cup
-medicine chest, medicine cabinet
-megalith, megalithic structure
-microphone, mike
-microwave, microwave oven
-military uniform
-milk can
-minibus
-miniskirt, mini
-minivan
-missile
-mitten
-mixing bowl
-mobile home, manufactured home
-Model T
-modem
-monastery
-monitor
-moped
-mortar
-mortarboard
-mosque
-mosquito net
-motor scooter, scooter
-mountain bike, all-terrain bike, off-roader
-mountain tent
-mouse, computer mouse
-mousetrap
-moving van
-muzzle
-nail
-neck brace
-necklace
-nipple
-notebook, notebook computer
-obelisk
-oboe, hautboy, hautbois
-ocarina, sweet potato
-odometer, hodometer, mileometer, milometer
-oil filter
-organ, pipe organ
-oscilloscope, scope, cathode-ray oscilloscope, CRO
-overskirt
-oxcart
-oxygen mask
-packet
-paddle, boat paddle
-paddlewheel, paddle wheel
-padlock
-paintbrush
-pajama, pyjama, pj's, jammies
-palace
-panpipe, pandean pipe, syrinx
-paper towel
-parachute, chute
-parallel bars, bars
-park bench
-parking meter
-passenger car, coach, carriage
-patio, terrace
-pay-phone, pay-station
-pedestal, plinth, footstall
-pencil box, pencil case
-pencil sharpener
-perfume, essence
-Petri dish
-photocopier
-pick, plectrum, plectron
-pickelhaube
-picket fence, paling
-pickup, pickup truck
-pier
-piggy bank, penny bank
-pill bottle
-pillow
-ping-pong ball
-pinwheel
-pirate, pirate ship
-pitcher, ewer
-plane, carpenter's plane, woodworking plane
-planetarium
-plastic bag
-plate rack
-plow, plough
-plunger, plumber's helper
-Polaroid camera, Polaroid Land camera
-pole
-police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
-poncho
-pool table, billiard table, snooker table
-pop bottle, soda bottle
-pot, flowerpot
-potter's wheel
-power drill
-prayer rug, prayer mat
-printer
-prison, prison house
-projectile, missile
-projector
-puck, hockey puck
-punching bag, punch bag, punching ball, punchball
-purse
-quill, quill pen
-quilt, comforter, comfort, puff
-racer, race car, racing car
-racket, racquet
-radiator
-radio, wireless
-radio telescope, radio reflector
-rain barrel
-recreational vehicle, RV, R.V.
-reel
-reflex camera
-refrigerator, icebox
-remote control, remote
-restaurant, eating house, eating place, eatery
-revolver, six-gun, six-shooter
-rifle
-rocking chair, rocker
-rotisserie
-rubber eraser, rubber, pencil eraser
-rugby ball
-rule, ruler
-running shoe
-safe
-safety pin
-saltshaker, salt shaker
-sandal
-sarong
-sax, saxophone
-scabbard
-scale, weighing machine
-school bus
-schooner
-scoreboard
-screen, CRT screen
-screw
-screwdriver
-seat belt, seatbelt
-sewing machine
-shield, buckler
-shoe shop, shoe-shop, shoe store
-shoji
-shopping basket
-shopping cart
-shovel
-shower cap
-shower curtain
-ski
-ski mask
-sleeping bag
-slide rule, slipstick
-sliding door
-slot, one-armed bandit
-snorkel
-snowmobile
-snowplow, snowplough
-soap dispenser
-soccer ball
-sock
-solar dish, solar collector, solar furnace
-sombrero
-soup bowl
-space bar
-space heater
-space shuttle
-spatula
-speedboat
-spider web, spider's web
-spindle
-sports car, sport car
-spotlight, spot
-stage
-steam locomotive
-steel arch bridge
-steel drum
-stethoscope
-stole
-stone wall
-stopwatch, stop watch
-stove
-strainer
-streetcar, tram, tramcar, trolley, trolley car
-stretcher
-studio couch, day bed
-stupa, tope
-submarine, pigboat, sub, U-boat
-suit, suit of clothes
-sundial
-sunglass
-sunglasses, dark glasses, shades
-sunscreen, sunblock, sun blocker
-suspension bridge
-swab, swob, mop
-sweatshirt
-swimming trunks, bathing trunks
-swing
-switch, electric switch, electrical switch
-syringe
-table lamp
-tank, army tank, armored combat vehicle, armoured combat vehicle
-tape player
-teapot
-teddy, teddy bear
-television, television system
-tennis ball
-thatch, thatched roof
-theater curtain, theatre curtain
-thimble
-thresher, thrasher, threshing machine
-throne
-tile roof
-toaster
-tobacco shop, tobacconist shop, tobacconist
-toilet seat
-torch
-totem pole
-tow truck, tow car, wrecker
-toyshop
-tractor
-trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
-tray
-trench coat
-tricycle, trike, velocipede
-trimaran
-tripod
-triumphal arch
-trolleybus, trolley coach, trackless trolley
-trombone
-tub, vat
-turnstile
-typewriter keyboard
-umbrella
-unicycle, monocycle
-upright, upright piano
-vacuum, vacuum cleaner
-vase
-vault
-velvet
-vending machine
-vestment
-viaduct
-violin, fiddle
-volleyball
-waffle iron
-wall clock
-wallet, billfold, notecase, pocketbook
-wardrobe, closet, press
-warplane, military plane
-washbasin, handbasin, washbowl, lavabo, wash-hand basin
-washer, automatic washer, washing machine
-water bottle
-water jug
-water tower
-whiskey jug
-whistle
-wig
-window screen
-window shade
-Windsor tie
-wine bottle
-wing
-wok
-wooden spoon
-wool, woolen, woollen
-worm fence, snake fence, snake-rail fence, Virginia fence
-wreck
-yawl
-yurt
-web site, website, internet site, site
-comic book
-crossword puzzle, crossword
-street sign
-traffic light, traffic signal, stoplight
-book jacket, dust cover, dust jacket, dust wrapper
-menu
-plate
-guacamole
-consomme
-hot pot, hotpot
-trifle
-ice cream, icecream
-ice lolly, lolly, lollipop, popsicle
-French loaf
-bagel, beigel
-pretzel
-cheeseburger
-hotdog, hot dog, red hot
-mashed potato
-head cabbage
-broccoli
-cauliflower
-zucchini, courgette
-spaghetti squash
-acorn squash
-butternut squash
-cucumber, cuke
-artichoke, globe artichoke
-bell pepper
-cardoon
-mushroom
-Granny Smith
-strawberry
-orange
-lemon
-fig
-pineapple, ananas
-banana
-jackfruit, jak, jack
-custard apple
-pomegranate
-hay
-carbonara
-chocolate sauce, chocolate syrup
-dough
-meat loaf, meatloaf
-pizza, pizza pie
-potpie
-burrito
-red wine
-espresso
-cup
-eggnog
-alp
-bubble
-cliff, drop, drop-off
-coral reef
-geyser
-lakeside, lakeshore
-promontory, headland, head, foreland
-sandbar, sand bar
-seashore, coast, seacoast, sea-coast
-valley, vale
-volcano
-ballplayer, baseball player
-groom, bridegroom
-scuba diver
-rapeseed
-daisy
-yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
-corn
-acorn
-hip, rose hip, rosehip
-buckeye, horse chestnut, conker
-coral fungus
-agaric
-gyromitra
-stinkhorn, carrion fungus
-earthstar
-hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
-bolete
-ear, spike, capitulum
-toilet tissue, toilet paper, bathroom tissue
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py
deleted file mode 100644
index a6c0585eb7..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/main.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-"""
-Copyright 2020 Huawei Technologies Co., Ltd
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-# import StreamManagerApi.py
-from StreamManagerApi import StreamManagerApi
-from StreamManagerApi import MxDataInput
-import os
-import json
-import numpy as np
-import datetime
-import sys
-
-
-def save_infer_result(result, result_name, image_name):
-    """
-    save the infer result to name_1.txt
-    the file content top5:
-        class_id1, class_id2, class_id3, class_id4, class_id5
-    """
-    load_dict = json.loads(result)
-    if load_dict.get('MxpiClass') is None:
-        with open(result_name + "/" + image_name[:-5] + '.txt', 'w') as f_write:
-            f_write.write("")
-    else:
-        res_vec = load_dict['MxpiClass']
-        with open(result_name + "/" + image_name[:-5] + '_1.txt', 'w') as f_write:
-            list1 = [str(item.get("classId")) + " " for item in res_vec]
-            f_write.writelines(list1)
-            f_write.write('\n')
-            
-def main():
-    # init stream manager
-    stream_manager_api = StreamManagerApi()
-    ret = stream_manager_api.InitManager()
-    if ret != 0:
-        print("Failed to init Stream manager, ret=%s" % str(ret))
-        exit()
-
-    # create streams by pipeline config file
-    with open("../pipeline/Resnet50.pipeline", 'rb') as f:
-        pipeline_str = f.read()
-    ret = stream_manager_api.CreateMultipleStreams(pipeline_str)
-
-
-    if ret != 0:
-        print("Failed to create Stream, ret=%s" % str(ret))
-        exit()
-
-    # Construct the input of the stream
-    data_input = MxDataInput()
-    
-    dir_name = sys.argv[1]
-    res_dir_name = sys.argv[2]
-    file_list = os.listdir(dir_name)
-    if not os.path.exists(res_dir_name):
-        os.makedirs(res_dir_name)
-
-    for file_name in file_list:
-        file_path = os.path.join(dir_name, file_name)
-        if file_name.lower().endswith(".jpg") or file_name.lower().endswith(".jpeg"):
-            portion = os.path.splitext(file_name)
-            with open(file_path, 'rb') as f:
-                data_input.data = f.read()
-        else:
-            continue
-        
-        empty_data = []
-
-        stream_name = b'resnet50_classification'
-        in_plugin_id = 0
-        unique_id = stream_manager_api.SendDataWithUniqueId(stream_name, in_plugin_id, data_input)
-        if unique_id < 0:
-            print("Failed to send data to stream.")
-            exit()
-        # Obtain the inference result by specifying stream_name and unique_id.
-        start_time = datetime.datetime.now()
-        infer_result = stream_manager_api.GetResultWithUniqueId(stream_name, unique_id, 3000)
-        endtime = datetime.datetime.now()
-        print('sdk run time: {}'.format((endtime - start_time).microseconds))
-        if infer_result.errorCode != 0:
-            print("GetResultWithUniqueId error. errorCode=%d, errorMsg=%s" % (
-                infer_result.errorCode, infer_result.data.decode()))
-            exit()
-        # print the infer result
-        infer_res = infer_result.data.decode()
-        print("process img: {}, infer result: {}".format(file_name, infer_res))
-        
-        save_infer_result(infer_result.data.decode(), res_dir_name, file_name)
-
-    # destroy streams
-    stream_manager_api.DestroyAllStreams()
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg
deleted file mode 100644
index b8552cf634..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/resnet50_aipp_pt.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-CLASS_NUM=1000
-SOFTMAX=True
-TOP_K=5
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh
deleted file mode 100644
index 3899f83598..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/infer/sdk/run.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-image_path=$1
-result_dir=$2
-
-set -e
-
-CUR_PATH=$(cd "$(dirname "$0")" || { warn "Failed to check path/to/run.sh" ; exit ; } ; pwd)
-
-# Simple log helper functions
-info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; }
-warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; }
-
-#export MX_SDK_HOME=${CUR_PATH}/../../..
-export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH}
-export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner
-export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins
-
-#to set PYTHONPATH, import the StreamManagerApi.py
-export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python
-
-python3 main.py $image_path  $result_dir
-exit 0
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py
deleted file mode 100644
index 89e41a2e40..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelarts/train_start.py
+++ /dev/null
@@ -1,688 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import glob
-import random
-import shutil
-import sys
-import time
-import warnings
-import math
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.nn.parallel
-import torch.backends.cudnn as cudnn
-import torch.distributed as dist
-import torch.optim
-import torch.multiprocessing as mp
-import torch.utils.data
-import torch.utils.data.distributed
-import torchvision.transforms as transforms
-import torchvision.datasets as datasets
-import torchvision.models as models
-import torch.npu
-
-sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../'))
-from pthtar2onx import convert
-import DistributedResnet50.image_classification.resnet as nvmodels
-from apex import amp
-import moxing as mox
-
-BATCH_SIZE = 512
-EPOCHS_SIZE = 100
-TRAIN_STEP = 8000
-LOG_STEP = 1
-
-CALCULATE_DEVICE = "npu:7"
-PRINT_DEVICE = "cpu"
-SOURCE_DIR = "/data/imagenet"
-
-model_names = sorted(name for name in models.__dict__
-                     if name.islower() and not name.startswith("__")
-                     and callable(models.__dict__[name]))
-
-parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
-parser.add_argument('--data_url',
-                    metavar='DIR',
-                    default='/cache/data_url',
-                    help='path to dataset')
-parser.add_argument('-a', '--arch',
-                    metavar='ARCH',
-                    default='resnet50',
-                    choices=model_names,
-                    help='model architecture: ' +
-                         ' | '.join(model_names) +
-                         ' (default: resnet18)')
-parser.add_argument('-j', '--workers',
-                    default=32,
-                    type=int,
-                    metavar='N',
-                    help='number of data loading workers (default: 8)')
-parser.add_argument('--epochs',
-                    default=1,
-                    type=int,
-                    metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--start-epoch',
-                    default=0,
-                    type=int,
-                    metavar='N',
-                    help='manual epoch number (useful on restarts)')
-parser.add_argument('-b', '--batch-size',
-                    default=BATCH_SIZE,
-                    type=int,
-                    metavar='N',
-                    help='mini-batch size (default: 256), this is the total '
-                         'batch size of all GPUs on the current node when '
-                         'using Data Parallel or Distributed Data Parallel')
-parser.add_argument('--lr', '--learning-rate',
-                    default=0.2,
-                    type=float,
-                    metavar='LR',
-                    help='initial learning rate',
-                    dest='lr')
-parser.add_argument('--momentum',
-                    default=0.9,
-                    type=float,
-                    metavar='M',
-                    help='momentum')
-parser.add_argument('--wd', '--weight-decay',
-                    default=1e-4,
-                    type=float,
-                    metavar='W',
-                    help='weight decay (default: 1e-4)',
-                    dest='weight_decay')
-parser.add_argument('-p', '--print-freq',
-                    default=10,
-                    type=int,
-                    metavar='N',
-                    help='print frequency (default: 10)')
-parser.add_argument('--resume',
-                    default='',
-                    type=str,
-                    metavar='PATH',
-                    help='path to latest checkpoint (default: none)')
-parser.add_argument('-e', '--evaluate',
-                    dest='evaluate',
-                    action='store_true',
-                    help='evaluate model on validation set')
-parser.add_argument('--pretrained',
-                    dest='pretrained',
-                    action='store_true',
-                    help='use pre-trained model')
-parser.add_argument('--world-size',
-                    default=-1,
-                    type=int,
-                    help='number of nodes for distributed training')
-parser.add_argument('--rank',
-                    default=-1,
-                    type=int,
-                    help='node rank for distributed training')
-parser.add_argument('--dist-url',
-                    default=None,
-                    type=str,
-                    help='url used to set up distributed training')
-parser.add_argument('--dist-backend',
-                    default='nccl',
-                    type=str,
-                    help='distributed backend')
-parser.add_argument('--seed',
-                    default=None,
-                    type=int,
-                    help='seed for initializing training. ')
-parser.add_argument('--gpu',
-                    default=None,
-                    type=int,
-                    help='GPU id to use.')
-parser.add_argument('--npu',
-                    default=None,
-                    type=int,
-                    help='NPU id to use.')
-parser.add_argument('--multiprocessing-distributed',
-                    action='store_true')
-parser.add_argument('--warmup',
-                    default=5,
-                    type=int,
-                    metavar='E',
-                    help='number of warmup epochs')
-parser.add_argument('--label-smoothing',
-                    default=0.1,
-                    type=float,
-                    metavar='S',
-                    help='label smoothing')
-parser.add_argument('--optimizer-batch-size',
-                    default=-1,
-                    type=int,
-                    metavar='N',
-                    help=
-                    'size of a total batch size, for simulating bigger batches using gradient accumulation')
-parser.add_argument('--static-loss-scale',
-                    type=float,
-                    default=1,
-                    help=
-                    'Static loss scale, positive power of 2 values can improve fp16 convergence.')
-
-parser.add_argument('-t', '--fine-tuning', default=False, action='store_true',
-                    help='transfer learning + fine tuning - train only the last FC layer')
-parser.add_argument('--train_url',
-                    default="/cache/training",
-                    type=str,
-                    help="setting dir of training output")
-parser.add_argument('--pretrained_weight', default='', type=str, metavar='PATH',
-                    help='path to pretrained weight')
-parser.add_argument('--onnx', default=True, action='store_true',
-                    help="convert pth model to onnx")
-
-CACHE_TRAINING_URL = "/cache/training"
-best_acc1 = 0
-
-def main():
-    args = parser.parse_args()
-    if args.npu is None:
-        args.npu = 0
-    global CALCULATE_DEVICE
-    CALCULATE_DEVICE = "npu:{}".format(args.npu)
-    torch.npu.set_device(CALCULATE_DEVICE)
-    print("use ", CALCULATE_DEVICE)
-
-    if args.seed is not None:
-        random.seed(args.seed)
-        torch.manual_seed(args.seed)
-        cudnn.deterministic = True
-        warnings.warn('You have chosen to seed training. '
-                      'This will turn on the CUDNN deterministic setting, '
-                      'which can slow down your training considerably! '
-                      'You may see unexpected behavior when restarting '
-                      'from checkpoints.')
-
-    if args.gpu is not None:
-        warnings.warn('You have chosen a specific GPU. This will completely '
-                      'disable data parallelism.')
-
-    if args.dist_url == "env://" and args.world_size == -1:
-        args.world_size = int(os.environ["WORLD_SIZE"])
-
-    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
-
-    ngpus_per_node = torch.cuda.device_count()
-    if args.multiprocessing_distributed:
-        # Since we have ngpus_per_node processes per node, the total world_size
-        # needs to be adjusted accordingly
-        args.world_size = ngpus_per_node * args.world_size
-        # Use torch.multiprocessing.spawn to launch distributed processes: the
-        # main_worker process function
-        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
-    else:
-        # Simply call main_worker function
-        main_worker(args.gpu, ngpus_per_node, args)
-
-def main_worker(gpu, ngpus_per_node, args):
-    global best_acc1
-    args.gpu = gpu
-
-    if args.gpu is not None:
-        print("Use GPU: {} for training".format(args.gpu))
-
-    if args.distributed:
-        if args.dist_url == "env://" and args.rank == -1:
-            args.rank = int(os.environ["RANK"])
-        if args.multiprocessing_distributed:
-            # For multiprocessing distributed training, rank needs to be the
-            # global rank among all the processes
-            args.rank = args.rank * ngpus_per_node + gpu
-        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
-                                world_size=args.world_size, rank=args.rank)
-    # create model
-    if args.pretrained:
-        print("=> using pre-trained model '{}'".format(args.arch))
-        model = nvmodels.build_resnet('resnet50', 'classic', True)
-        CACHE_MODEL_URL = "/cache/model"
-        os.makedirs(CACHE_MODEL_URL, exist_ok=True)
-        mox.file.copy_parallel(args.pretrained_weight, os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar"))
-        pretrained_weight = os.path.join(CACHE_MODEL_URL, "checkpoint.pth.tar")
-        pretrained_dict = torch.load(pretrained_weight)["state_dict"]
-        pretrained_dict.pop('module.fc.weight')
-        pretrained_dict.pop('module.fc.bias')
-        model.load_state_dict(pretrained_dict, strict=False)
-    else:
-        print("=> creating model '{}'".format(args.arch))
-        model = models.__dict__[args.arch](zero_init_residual=True)
-
-    if args.fine_tuning:
-        print("=> transfer-learning mode + fine-tuning (train only the last FC layer)")
-        # Freeze Previous Layers(now we are using them as features extractor)
-        # Fine Tuning the last layer for the new task
-        if args.arch == "resnet50":
-            model.classifier = nn.Linear(1024, 10)
-            model.classifier.parameters()
-        else:
-            print("Error: Fine-tuning is not supported on this architecture")
-            exit(-1)
-    else:
-        model.parameters()
-
-    for layer in model.modules():
-        if isinstance(layer, nn.Linear):
-            torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), )
-    if args.distributed:
-        # For multiprocessing distributed, DistributedDataParallel constructor
-        # should always set the single device scope, otherwise,
-        # DistributedDataParallel will use all available devices.
-        if args.gpu is not None:
-            torch.cuda.set_device(args.gpu)
-            model.cuda(args.gpu)
-            # When using a single GPU per process and per
-            # DistributedDataParallel, we need to divide the batch size
-            # ourselves based on the total number of GPUs we have
-            args.batch_size = int(args.batch_size / ngpus_per_node)
-            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
-            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
-        else:
-            model.cuda()
-            # DistributedDataParallel will divide and allocate batch_size to all
-            # available GPUs if device_ids are not set
-            model = torch.nn.parallel.DistributedDataParallel(model)
-    elif args.gpu is not None:
-        torch.cuda.set_device(args.gpu)
-        model = model.cuda(args.gpu)
-    else:
-        # DataParallel will divide and allocate batch_size to all available GPUs
-        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
-            model.features = torch.nn.DataParallel(model.features)
-            model.cuda()
-        else:
-            model = model.to(CALCULATE_DEVICE)
-
-    lr_policy = lr_cosine_policy(args.lr,
-                                 args.warmup,
-                                 args.epochs)
-
-
-    # define loss function (criterion) and optimizer
-    loss = nn.CrossEntropyLoss
-    if args.label_smoothing > 0.0:
-        loss = lambda: LabelSmoothing(args.label_smoothing)
-    criterion = loss().to(CALCULATE_DEVICE)
-    optimizer = torch.optim.SGD([
-        {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0},
-        {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}],
-                                args.lr,
-                                momentum=args.momentum)
-    
-    model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1)
-
-    # optionally resume from a checkpoint
-    if args.resume:
-        if os.path.isfile(args.resume):
-            print("=> loading checkpoint '{}'".format(args.resume))
-            if args.npu is not None:
-                checkpoint = torch.load(args.resume)
-            elif args.gpu is None:
-                checkpoint = torch.load(args.resume)
-            else:
-                # Map model to be loaded to specified single gpu.
-                loc = 'cuda:{}'.format(args.gpu)
-                checkpoint = torch.load(args.resume, map_location=loc)
-            args.start_epoch = checkpoint['epoch']
-            best_acc1 = checkpoint['best_acc1']
-            if args.npu is not None:
-                best_acc1 = best_acc1.to("npu:{}".format(args.npu))
-            elif args.gpu is not None:
-                # best_acc1 may be from a checkpoint from a different GPU
-                best_acc1 = best_acc1.to(args.gpu)
-            model.load_state_dict(checkpoint['state_dict'])
-            print("=> loaded checkpoint '{}' (epoch {})"
-                  .format(args.resume, checkpoint['epoch']))
-        else:
-            print("=> no checkpoint found at '{}'".format(args.resume))
-
-    cudnn.benchmark = True
-
-    real_path = '/cache/data_url'
-    if not os.path.exists(real_path):
-        os.makedirs(real_path)
-    mox.file.copy_parallel(args.data_url, real_path)
-    print("training data finish copy to %s." % real_path)
-
-    traindir = os.path.join(real_path, 'train')
-    valdir = os.path.join(real_path, 'val')
-    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                     std=[0.229, 0.224, 0.225])
-
-    train_dataset = datasets.ImageFolder(
-        traindir,
-        transforms.Compose([
-            transforms.RandomResizedCrop(224),
-            transforms.RandomHorizontalFlip(),
-            transforms.ToTensor(),
-            normalize,
-        ]))
-
-    if args.distributed:
-        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
-    else:
-        train_sampler = None
-
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
-        num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
-
-    val_loader = torch.utils.data.DataLoader(
-        datasets.ImageFolder(valdir, transforms.Compose([
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            normalize,
-        ])),
-        batch_size=args.batch_size, shuffle=True,
-        num_workers=args.workers, pin_memory=True)
-
-    if args.evaluate:
-        validate(val_loader, model, criterion, args)
-        return
-
-    for epoch in range(args.start_epoch, args.epochs):
-        if args.distributed:
-            train_sampler.set_epoch(epoch)
-        lr_policy(optimizer, 0, epoch)
-        # train for one epoch
-        train(train_loader, model, criterion, optimizer, epoch, args)
-
-        # evaluate on validation set
-        acc1 = validate(val_loader, model, criterion, args)
-
-        # remember best acc@1 and save checkpoint
-        is_best = acc1 > best_acc1
-        best_acc1 = max(acc1, best_acc1)
-        file_name = "checkpoint_npu{}".format(args.npu)
-        modeltmp = model.cpu()
-        save_checkpoint({
-            'epoch': epoch + 1,
-            'arch': args.arch,
-            'state_dict': modeltmp.state_dict(),
-            'best_acc1': best_acc1,
-        }, is_best, file_name)
-        modeltmp.to(CALCULATE_DEVICE)
-
-    if args.onnx:
-        convert_pth_to_onnx(args)
-
-    # --------------modelarts modification----------
-    mox.file.copy_parallel(CACHE_TRAINING_URL, args.train_url)
-    # --------------modelarts modification end----------
-
-def convert_pth_to_onnx(args):
-    pth_pattern = os.path.join(CACHE_TRAINING_URL, f"checkpoint_npu{args.npu}.pth.tar")
-    pth_file_list = glob.glob(pth_pattern)
-    if not pth_file_list:
-        print(f"can't find pth {pth_pattern}")
-        return
-    pth_file = pth_file_list[0]
-    onnx_path = pth_file.split(".")[0] + '.onnx'
-    convert(pth_file, onnx_path)
-
-
-def train(train_loader, model, criterion, optimizer, epoch, args):
-    if args.optimizer_batch_size < 0:
-        batch_size_multiplier = 1
-    else:
-        tbs = 1 * args.batch_size
-        if args.optimizer_batch_size % tbs != 0:
-            print(
-                "Warning: simulated batch size {} is not divisible by actual batch size {}"
-                    .format(args.optimizer_batch_size, tbs))
-        batch_size_multiplier = int(args.optimizer_batch_size / tbs)
-        print("BSM: {}".format(batch_size_multiplier))
-
-    batch_time = AverageMeter('Time', ':6.3f')
-    data_time = AverageMeter('Data', ':6.3f')
-    losses = AverageMeter('Loss', ':.4e')
-    top1 = AverageMeter('Acc@1', ':6.2f')
-    top5 = AverageMeter('Acc@5', ':6.2f')
-    progress = ProgressMeter(
-        len(train_loader),
-        [batch_time, data_time, losses, top1, top5],
-        prefix="Epoch: [{}]".format(epoch))
-
-    # switch to train mode
-    model.train()
-    optimizer.zero_grad()
-    end = time.time()
-    for i, (images, target) in enumerate(train_loader):
-        # measure data loading time
-        data_time.update(time.time() - end)
-
-        if args.gpu is not None:
-            images = images.cuda(args.gpu, non_blocking=True)
-
-        images = images.to(CALCULATE_DEVICE, non_blocking=True)
-        if args.label_smoothing == 0.0:
-            target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-        # compute output
-        output = model(images)
-        loss = criterion(output, target)
-
-        if args.label_smoothing > 0.0:
-            target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-        # measure accuracy and record loss
-        acc1, acc5 = accuracy(output, target, topk=(1, 5))
-        losses.update(loss.item(), images.size(0))
-        top1.update(acc1[0], images.size(0))
-        top5.update(acc5[0], images.size(0))
-
-        # compute gradient and do SGD step
-        with amp.scale_loss(loss, optimizer) as scaled_loss:
-            scaled_loss.backward()
-        optimizer_step = ((i + 1) % batch_size_multiplier) == 0
-        if optimizer_step:
-            if batch_size_multiplier != 1:
-                for param_group in optimizer.param_groups:
-                    for param in param_group['params']:
-                        param.grad /= batch_size_multiplier
-            optimizer.step()
-            optimizer.zero_grad()
-
-        # measure elapsed time
-        batch_time.update(time.time() - end)
-        end = time.time()
-
-        if i % LOG_STEP == 0:
-            progress.display(i)
-
-        if i == TRAIN_STEP:
-            break
-
-    print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format(
-            args.batch_size/(batch_time.avg+0.0001)))
-
-def validate(val_loader, model, criterion, args):
-    batch_time = AverageMeter('Time', ':6.3f')
-    losses = AverageMeter('Loss', ':.4e')
-    top1 = AverageMeter('Acc@1', ':6.2f')
-    top5 = AverageMeter('Acc@5', ':6.2f')
-    progress = ProgressMeter(
-        len(val_loader),
-        [batch_time, losses, top1, top5],
-        prefix='Test: ')
-
-    # switch to evaluate mode
-    model.eval()
-
-    with torch.no_grad():
-        end = time.time()
-        for i, (images, target) in enumerate(val_loader):
-            if args.gpu is not None:
-                images = images.cuda(args.gpu, non_blocking=True)
-            images = images.to(CALCULATE_DEVICE, non_blocking=True)
-            if args.label_smoothing == 0.0:
-                target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-            # compute output
-            output = model(images)
-            loss = criterion(output, target)
-
-            if args.label_smoothing > 0.0:
-                target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-            # measure accuracy and record loss
-            acc1, acc5 = accuracy(output, target, topk=(1, 5))
-            losses.update(loss.item(), images.size(0))
-            top1.update(acc1[0], images.size(0))
-            top5.update(acc5[0], images.size(0))
-
-            # measure elapsed time
-            batch_time.update(time.time() - end)
-            end = time.time()
-
-            if i % LOG_STEP == 0:
-                progress.display(i)
-
-        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
-              .format(top1=top1, top5=top5))
-    return top1.avg
-
-def save_checkpoint(state, is_best, filename='checkpoint'):
-    if not os.path.exists(CACHE_TRAINING_URL):
-        os.makedirs(CACHE_TRAINING_URL)
-
-    filename2 = os.path.join(CACHE_TRAINING_URL, filename + ".pth.tar")
-    torch.save(state, filename2)
-    if is_best:
-        shutil.copyfile(filename2, os.path.join(CACHE_TRAINING_URL, filename + 'model_best.pth.tar'))
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-    def __init__(self, name, fmt=':f'):
-        self.name = name
-        self.fmt = fmt
-        self.reset()
-        self.start_count_index = 10
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        if self.count == 0:
-            self.batchsize = n
-        
-        self.val = val
-        self.count += n
-        if self.count > (self.start_count_index * self.batchsize):
-            self.sum += val * n
-            self.avg = self.sum / (self.count - self.start_count_index * self.batchsize)
-
-    def __str__(self):
-        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
-        return fmtstr.format(**self.__dict__)
-
-class ProgressMeter(object):
-    def __init__(self, num_batches, meters, prefix=""):
-        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
-        self.meters = meters
-        self.prefix = prefix
-
-    def display(self, batch):
-        entries = [self.prefix + self.batch_fmtstr.format(batch)]
-        entries += [str(meter) for meter in self.meters]
-        print('\t'.join(entries))
-
-    def _get_batch_fmtstr(self, num_batches):
-        num_digits = len(str(num_batches // 1))
-        fmt = '{:' + str(num_digits) + 'd}'
-        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
-
-
-def adjust_learning_rate(optimizer, epoch, args):
-    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
-    lr = args.lr * (0.1 ** (epoch // 30))
-    for param_group in optimizer.param_groups:
-        param_group['lr'] = lr
-
-
-def accuracy(output, target, topk=(1,)):
-    """Computes the accuracy over the k top predictions for the specified values of k"""
-    with torch.no_grad():
-        maxk = max(topk)
-        batch_size = target.size(0)
-
-        _, pred = output.topk(maxk, 1, True, True)
-        pred = pred.t()
-        correct = pred.eq(target.view(1, -1).expand_as(pred))
-
-        res = []
-        for k in topk:
-            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
-            res.append(correct_k.mul_(100.0 / batch_size))
-        return res
-
-class LabelSmoothing(nn.Module):
-    """
-    NLL loss with label smoothing.
-    """
-    def __init__(self, smoothing=0.0):
-        """
-        Constructor for the LabelSmoothing module.
-        :param smoothing: label smoothing factor
-        """
-        super(LabelSmoothing, self).__init__()
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-
-    def forward(self, x, target):
-        logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu")
-        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
-        nll_loss = nll_loss.squeeze(1)
-        smooth_loss = -logprobs.mean(dim=-1)
-        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
-        return loss.mean().to(CALCULATE_DEVICE)
-
-def lr_policy(lr_fn, logger=None):
-    if logger is not None:
-        logger.register_metric('lr',
-                               log.LR_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE)
-
-    def _alr(optimizer, iteration, epoch):
-        lr = lr_fn(iteration, epoch)
-
-        if logger is not None:
-            logger.log_metric('lr', lr)
-        for param_group in optimizer.param_groups:
-            param_group['lr'] = lr
-
-    return _alr
-
-def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None):
-    def _lr_fn(iteration, epoch):
-        if epoch < warmup_length:
-            lr = base_lr * (epoch + 1) / warmup_length
-        else:
-            e = epoch - warmup_length
-            es = epochs - warmup_length
-            lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
-        return lr
-
-    return lr_policy(_lr_fn, logger=logger)
-
-if __name__ == '__main__':
-    main()
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt
deleted file mode 100644
index 31529da2e6..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/modelzoo_level.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-FuncStatus:OK
-PerfStatus:OK
-PrecisionStatus:OK
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py
deleted file mode 100644
index fd878f6e5a..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pthtar2onx.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-import argparse
-
-import torch
-from DistributedResnet50.image_classification import resnet
-import torch.onnx
-
-from collections import OrderedDict
-
-parser = argparse.ArgumentParser(description='PyTorch pth convert onnx')
-parser.add_argument('--pth_file_path',
-                    metavar='PATH',
-                    default='./resnet50checkpoint.pth.tar',
-                    help='path of pth file')
-
-parser.add_argument('--onnx_file_path',
-                    metavar='PATH',
-                    default='resnet50_npu_16.onnx',
-                    help='path of onnx file')
-args = parser.parse_args()
-
-
-def proc_node_module(checkpoint, AttrName):
-    new_state_dict = OrderedDict()
-    for k, v in checkpoint[AttrName].items():
-        if(k[0:7] == "module."):
-            name = k[7:]
-        else:
-            name = k[0:]
-        new_state_dict[name] = v
-    return new_state_dict
-
-
-def convert(pth_file_path, onnx_file_path):
-    checkpoint = torch.load(pth_file_path, map_location='cpu')
-    checkpoint['state_dict'] = proc_node_module(checkpoint, 'state_dict')
-    model = resnet.build_resnet("resnet50", "classic")
-    model.load_state_dict(checkpoint['state_dict'],strict=False)
-    model.eval()
-    print(model)
-
-    input_names = ["actual_input_1"]
-    output_names = ["output1"]
-    dummy_input = torch.randn(16, 3, 224, 224)
-    torch.onnx.export(model, dummy_input, onnx_file_path, input_names=input_names, output_names=output_names,
-                      opset_version=11)
-
-
-def main():
-    src_file_path = args.pth_file_path
-    dst_file_path = args.onnx_file_path
-    convert(src_file_path, dst_file_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
deleted file mode 100644
index 6e9f8fb66a..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/pytorch_resnet50_apex.py
+++ /dev/null
@@ -1,827 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the BSD 3-Clause License  (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://opensource.org/licenses/BSD-3-Clause
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import random
-import shutil
-import time
-import warnings
-import math
-import numpy as np
-
-import torch
-import torch.nn as nn
-import torch.nn.parallel
-import torch.backends.cudnn as cudnn
-import torch.distributed as dist
-import torch.optim
-import torch.multiprocessing as mp
-import torch.utils.data
-import torch.utils.data.distributed
-import torchvision.transforms as transforms
-import torchvision.datasets as datasets
-import torchvision.models as models
-import torch.npu
-import DistributedResnet50.image_classification.resnet as nvmodels
-from DistributedResnet50.image_classification.multi_epochs_dataloader import MultiEpochsDataLoader 
-from apex import amp
-
-BATCH_SIZE = 512
-EPOCHS_SIZE = 100
-TRAIN_STEP = 8000
-LOG_STEP = 1
-
-CALCULATE_DEVICE = "npu:7"
-PRINT_DEVICE = "cpu"
-SOURCE_DIR = "/data/imagenet"
-
-model_names = sorted(name for name in models.__dict__
-                     if name.islower() and not name.startswith("__")
-                     and callable(models.__dict__[name]))
-
-parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
-parser.add_argument('--data',
-                    metavar='DIR',
-                    default=SOURCE_DIR,
-                    help='path to dataset')
-parser.add_argument('--save_ckpt_path',
-                    metavar='DIR',
-                    default='./',
-                    help='path of checkpoint file')
-parser.add_argument('-a', '--arch',
-                    metavar='ARCH',
-                    default='resnet50',
-                    choices=model_names,
-                    help='default: resnet50')
-parser.add_argument('-j', '--workers',
-                    default=32,
-                    type=int,
-                    metavar='N',
-                    help='number of data loading workers (default: 8)')
-parser.add_argument('--num_classes',
-                    default=1000,
-                    type=int,
-                    metavar='N',
-                    help='class number of dataset')
-parser.add_argument('--epochs',
-                    default=EPOCHS_SIZE,
-                    type=int,
-                    metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--start-epoch',
-                    default=0,
-                    type=int,
-                    metavar='N',
-                    help='manual epoch number (useful on restarts)')
-parser.add_argument('-b', '--batch-size',
-                    default=BATCH_SIZE,
-                    type=int,
-                    metavar='N',
-                    help='mini-batch size (default: 256), this is the total '
-                         'batch size of all GPUs on the current node when '
-                         'using Data Parallel or Distributed Data Parallel')
-parser.add_argument('--lr', '--learning-rate',
-                    default=0.1,
-                    type=float,
-                    metavar='LR',
-                    help='initial learning rate',
-                    dest='lr')
-parser.add_argument('--momentum',
-                    default=0.9,
-                    type=float,
-                    metavar='M',
-                    help='momentum')
-parser.add_argument('--wd', '--weight-decay',
-                    default=1e-4,
-                    type=float,
-                    metavar='W',
-                    help='weight decay (default: 1e-4)',
-                    dest='weight_decay')
-parser.add_argument('-p', '--print-freq',
-                    default=10,
-                    type=int,
-                    metavar='N',
-                    help='print frequency (default: 10)')
-parser.add_argument('--resume',
-                    default='',
-                    type=str,
-                    metavar='PATH',
-                    help='path to latest checkpoint (default: none)')
-parser.add_argument('-e', '--evaluate',
-                    dest='evaluate',
-                    help='evaluate model on validation set')
-parser.add_argument('--pretrained',
-                    dest='pretrained',
-                    action='store_true',
-                    help='use pre-trained model')
-parser.add_argument('--world-size',
-                    default=-1,
-                    type=int,
-                    help='number of nodes for distributed training')
-parser.add_argument('--rank',
-                    default=-1,
-                    type=int,
-                    help='node rank for distributed training')
-parser.add_argument('--dist-url',
-                    default='tcp://224.66.41.62:23456',
-                    type=str,
-                    help='url used to set up distributed training')
-parser.add_argument('--dist-backend',
-                    default='nccl',
-                    type=str,
-                    help='distributed backend')
-parser.add_argument('--seed',
-                    default=None,
-                    type=int,
-                    help='seed for initializing training. ')
-parser.add_argument('--gpu',
-                    default=None,
-                    type=int,
-                    help='GPU id to use.')
-parser.add_argument('--npu',
-                    default=None,
-                    type=int,
-                    help='NPU id to use.')
-parser.add_argument('--multiprocessing-distributed',
-                    action='store_true')
-parser.add_argument('--warmup',
-                    default=0,
-                    type=int,
-                    metavar='E',
-                    help='number of warmup epochs')
-parser.add_argument('--label-smoothing',
-                    default=0.0,
-                    type=float,
-                    metavar='S',
-                    help='label smoothing')
-parser.add_argument('--optimizer-batch-size',
-                    default=-1,
-                    type=int,
-                    metavar='N',
-                    help=
-                    'size of a total batch size, for simulating bigger batches using gradient accumulation')
-parser.add_argument('--static-loss-scale',
-                    type=float,
-                    default=1,
-                    help=
-                    'Static loss scale, positive power of 2 values can improve fp16 convergence.')
-parser.add_argument('-t',
-                    '--fine-tuning',
-                    action='store_true',
-                    help='transfer learning + fine tuning - train only the last FC layer.')
-# 图模式
-parser.add_argument('--graph_mode',
-                    action='store_true',
-                    help='whether to enable graph mode.')
-best_acc1 = 0
-args = parser.parse_args()
-def main():
-    
-    if args.npu is None:
-        args.npu = 0
-    global CALCULATE_DEVICE
-    CALCULATE_DEVICE = "npu:{}".format(args.npu)
-    torch.npu.set_device(CALCULATE_DEVICE)
-    print("use ", CALCULATE_DEVICE)
-
-    if args.seed is not None:
-        random.seed(args.seed)
-        torch.manual_seed(args.seed)
-        cudnn.deterministic = True
-        warnings.warn('You have chosen to seed training. '
-                      'This will turn on the CUDNN deterministic setting, '
-                      'which can slow down your training considerably! '
-                      'You may see unexpected behavior when restarting '
-                      'from checkpoints.')
-
-    if args.gpu is not None:
-        warnings.warn('You have chosen a specific GPU. This will completely '
-                      'disable data parallelism.')
-
-    if args.dist_url == "env://" and args.world_size == -1:
-        args.world_size = int(os.environ["WORLD_SIZE"])
-
-    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
-
-    ngpus_per_node = torch.cuda.device_count()
-    if args.multiprocessing_distributed:
-        # Since we have ngpus_per_node processes per node, the total world_size
-        # needs to be adjusted accordingly
-        args.world_size = ngpus_per_node * args.world_size
-        # Use torch.multiprocessing.spawn to launch distributed processes: the
-        # main_worker process function
-        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
-    else:
-        # Simply call main_worker function
-        main_worker(args.gpu, ngpus_per_node, args)
-
-def main_worker(gpu, ngpus_per_node, args):
-    global best_acc1
-    args.gpu = gpu
-
-    if args.gpu is not None:
-        print("Use GPU: {} for training".format(args.gpu))
-
-    if args.distributed:
-        if args.dist_url == "env://" and args.rank == -1:
-            args.rank = int(os.environ["RANK"])
-        if args.multiprocessing_distributed:
-            # For multiprocessing distributed training, rank needs to be the
-            # global rank among all the processes
-            args.rank = args.rank * ngpus_per_node + gpu
-        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
-                                world_size=args.world_size, rank=args.rank)
-    # create model
-    if args.pretrained:
-        print("=> using pre-trained model '{}'".format(args.arch))
-        model = nvmodels.build_resnet("resnet50", "classic", True)
-        print("load pretrained model")
-        pretrained_dict = \
-        torch.load("/home/checkpoint_npu0model_best.pth.tar", map_location="cpu")["state_dict"]
-        pretrained_dict.pop('module.fc.weight')
-        pretrained_dict.pop('module.fc.bias')
-        model.load_state_dict(pretrained_dict, strict=False)
-    else:
-        print("=> creating model '{}'".format(args.arch))
-        model = models.__dict__[args.arch](zero_init_residual=True, num_classes=args.num_classes)
-
-    if args.fine_tuning:
-        print("=> transfer learning + fine tuning(train only the last FC layer)")
-        if args.arch == "resnet50":
-            model.parameters()
-        else:
-            print("Error: Fine-tuning is not supported on this architecture")
-            exit(-1)
-    else:
-        model.parameters()
-
-    for layer in model.modules():
-        if isinstance(layer, nn.Linear):
-            torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), )
-    if args.distributed:
-        # For multiprocessing distributed, DistributedDataParallel constructor
-        # should always set the single device scope, otherwise,
-        # DistributedDataParallel will use all available devices.
-        if args.gpu is not None:
-            torch.cuda.set_device(args.gpu)
-            model.cuda(args.gpu)
-            # When using a single GPU per process and per
-            # DistributedDataParallel, we need to divide the batch size
-            # ourselves based on the total number of GPUs we have
-            args.batch_size = int(args.batch_size / ngpus_per_node)
-            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
-            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
-        else:
-            model.cuda()
-            # DistributedDataParallel will divide and allocate batch_size to all
-            # available GPUs if device_ids are not set
-            model = torch.nn.parallel.DistributedDataParallel(model)
-    elif args.gpu is not None:
-        torch.cuda.set_device(args.gpu)
-        model = model.cuda(args.gpu)
-    else:
-        # DataParallel will divide and allocate batch_size to all available GPUs
-        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
-            model.features = torch.nn.DataParallel(model.features)
-            model.cuda()
-        else:
-            model = model.to(CALCULATE_DEVICE)
-
-    lr_policy = lr_cosine_policy(args.lr,
-                                 args.warmup,
-                                 args.epochs)
-
-
-    # define loss function (criterion) and optimizer
-    loss = nn.CrossEntropyLoss
-    if args.label_smoothing > 0.0:
-        loss = lambda: LabelSmoothing(args.label_smoothing)
-    criterion = loss().to(CALCULATE_DEVICE)
-    optimizer = torch.optim.SGD([
-        {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0},
-        {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}],
-                                args.lr,
-                                momentum=args.momentum)  # torch.optim.  apex.optimizers.NpuFusedSGD
-    
-    model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1,combine_grad=False)
-
-    # optionally resume from a checkpoint
-    if args.resume:
-        if os.path.isfile(args.resume):
-            print("=> loading checkpoint '{}'".format(args.resume))
-            if args.npu is not None:
-                checkpoint = torch.load(args.resume)
-            elif args.gpu is None:
-                checkpoint = torch.load(args.resume)
-            else:
-                # Map model to be loaded to specified single gpu.
-                loc = 'cuda:{}'.format(args.gpu)
-                checkpoint = torch.load(args.resume, map_location=loc)
-            args.start_epoch = checkpoint['epoch']
-            best_acc1 = checkpoint['best_acc1']
-            if args.npu is not None:
-                best_acc1 = best_acc1.to("npu:{}".format(args.npu))
-            elif args.gpu is not None:
-                # best_acc1 may be from a checkpoint from a different GPU
-                best_acc1 = best_acc1.to(args.gpu)
-            model.load_state_dict(checkpoint['state_dict'])
-            print("=> loaded checkpoint '{}' (epoch {})"
-                  .format(args.resume, checkpoint['epoch']))
-        else:
-            print("=> no checkpoint found at '{}'".format(args.resume))
-
-    cudnn.benchmark = True
-
-    # Data loading code
-    traindir = os.path.join(args.data, 'train')
-    valdir = os.path.join(args.data, 'val')
-    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                     std=[0.229, 0.224, 0.225])
-
-    train_dataset = datasets.ImageFolder(
-        traindir,
-        transforms.Compose([
-            transforms.RandomResizedCrop(224),
-            transforms.RandomHorizontalFlip(),
-            transforms.ToTensor(),
-            normalize,
-        ]))
-
-    if args.distributed:
-        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
-    else:
-        train_sampler = None
-
-    ##  原始loader，下面的优化后loader具有更好的性能，无论单算子还是图模式
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
-        num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
-
-    val_loader = torch.utils.data.DataLoader(
-        datasets.ImageFolder(valdir, transforms.Compose([
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            normalize,
-        ])),
-        batch_size=args.batch_size, shuffle=True,
-        num_workers=args.workers, pin_memory=True)
-    train_loader_len = len(train_loader)
-
-    # 图模式
-    if args.graph_mode:
-        train_loader, train_loader_len, train_sampler = get_pytorch_train_loader_V2(args.data,
-                                                                                args.batch_size,
-                                                                                workers=args.workers,
-                                                                                fp16=True)
-        val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
-
-
-    if args.evaluate:
-        validate(val_loader, model, criterion, args)
-        return
-
-    for epoch in range(args.start_epoch, args.epochs):
-        if args.distributed:
-            train_sampler.set_epoch(epoch)
-        lr_policy(optimizer, 0, epoch)
-        # train for one epoch
-        train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args)
-
-        # evaluate on validation set
-        acc1 = validate(val_loader, model, criterion, args)
-
-        # remember best acc@1 and save checkpoint
-        is_best = acc1 > best_acc1
-        best_acc1 = max(acc1, best_acc1)
-        file_name = "checkpoint_npu{}".format(args.npu)
-        modeltmp = model.cpu()
-        save_checkpoint({
-            'epoch': epoch + 1,
-            'arch': args.arch,
-            'state_dict': modeltmp.state_dict(),
-            'best_acc1': best_acc1,
-        }, is_best, args, file_name)
-        modeltmp.to(CALCULATE_DEVICE)
-
-def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args):
-    if args.optimizer_batch_size < 0:
-        batch_size_multiplier = 1
-    else:
-        tbs = 1 * args.batch_size
-        if args.optimizer_batch_size % tbs != 0:
-            print(
-                "Warning: simulated batch size {} is not divisible by actual batch size {}"
-                    .format(args.optimizer_batch_size, tbs))
-        batch_size_multiplier = int(args.optimizer_batch_size / tbs)
-        print("BSM: {}".format(batch_size_multiplier))
-
-    batch_time = AverageMeter('Time', ':6.3f')
-    data_time = AverageMeter('Data', ':6.3f')
-    losses = AverageMeter('Loss', ':.4e')
-    top1 = AverageMeter('Acc@1', ':6.2f')
-    top5 = AverageMeter('Acc@5', ':6.2f')
-    progress = ProgressMeter(
-        train_loader_len,
-        [batch_time, data_time, losses, top1, top5],
-        prefix="Epoch: [{}]".format(epoch))
-
-    # switch to train mode
-    model.train()
-    optimizer.zero_grad()
-    end = time.time()
-    for i, (images, target) in enumerate(train_loader):
-        # 图模式
-        if args.graph_mode:
-            print("args.graph_mode")
-            torch.npu.enable_graph_mode()
-
-        if i > 100:
-            pass
-        # measure data loading time
-        data_time.update(time.time() - end)
-
-        if args.gpu is not None:
-            images = images.cuda(args.gpu, non_blocking=True)
-
-        images = images.to(CALCULATE_DEVICE, non_blocking=True)
-        
-        # 图模式
-        if args.graph_mode:
-            print("args.graph_mode")
-            target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32)
-        else :
-          if args.label_smoothing == 0.0:
-              target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-        # compute output
-        output = model(images)
-        loss = criterion(output, target)
-
-        # 图模式
-        if not args.graph_mode:
-            if args.label_smoothing > 0.0:
-                target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-        
-
-        # measure accuracy and record loss
-        # 图模式
-        if not args.graph_mode:
-            # print("args.graph_mode====================")
-            acc1, acc5 = accuracy(output, target, topk=(1, 5))
-            losses.update(loss.item(), images.size(0))
-            top1.update(acc1[0], images.size(0))
-            top5.update(acc5[0], images.size(0))
-
-        # compute gradient and do SGD step
-        with amp.scale_loss(loss, optimizer) as scaled_loss:
-            scaled_loss.backward()
-        optimizer_step = ((i + 1) % batch_size_multiplier) == 0
-        if optimizer_step:
-            if batch_size_multiplier != 1:
-                for param_group in optimizer.param_groups:
-                    for param in param_group['params']:
-                        param.grad /= batch_size_multiplier
-            optimizer.step()
-            optimizer.zero_grad()
-        
-        # 图模式
-        if args.graph_mode:
-            torch.npu.launch_graph()
-            if i == 100:
-                torch.npu.synchronize()
-
-        # measure elapsed time
-        batch_time.update(time.time() - end)
-        end = time.time()
-
-        if i % LOG_STEP == 0:
-            progress.display(i)
-
-        if i == TRAIN_STEP:
-            break
-    # 图模式
-    if args.graph_mode:
-        print("args.graph_mode")
-        torch.npu.disable_graph_mode()
-
-    print("batch_size:", args.batch_size, 'Time: {:.3f}'.format(batch_time.avg), '* FPS@all {:.3f}'.format(
-            args.batch_size/batch_time.avg))
-
-def validate(val_loader, model, criterion, args):
-    batch_time = AverageMeter('Time', ':6.3f')
-    losses = AverageMeter('Loss', ':.4e')
-    top1 = AverageMeter('Acc@1', ':6.2f')
-    top5 = AverageMeter('Acc@5', ':6.2f')
-    progress = ProgressMeter(
-        len(val_loader),
-        [batch_time, losses, top1, top5],
-        prefix='Test: ')
-
-    # switch to evaluate mode
-    model.eval()
-
-    with torch.no_grad():
-        end = time.time()
-        for i, (images, target) in enumerate(val_loader):
-            if i > 50:
-                pass
-            if args.gpu is not None:
-                images = images.cuda(args.gpu, non_blocking=True)
-            images = images.to(CALCULATE_DEVICE, non_blocking=True)
-            
-                
-            # 图模式
-            if args.graph_mode:
-                print("args.graph_mode")
-                target = target.to(CALCULATE_DEVICE, non_blocking=True).to(torch.int32)
-            else :
-              if args.label_smoothing == 0.0:
-                  target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-            # compute output
-            output = model(images)
-            loss = criterion(output, target)
-
-            # 图模式
-            if not args.graph_mode:
-                if args.label_smoothing > 0.0:
-                    target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
-
-            # measure accuracy and record loss
-            acc1, acc5 = accuracy(output, target, topk=(1, 5))
-            losses.update(loss.item(), images.size(0))
-            top1.update(acc1[0], images.size(0))
-            top5.update(acc5[0], images.size(0))
-
-            # measure elapsed time
-            batch_time.update(time.time() - end)
-            end = time.time()
-
-            if i % LOG_STEP == 0:
-                progress.display(i)
-
-        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
-              .format(top1=top1, top5=top5))
-    return top1.avg
-
-def save_checkpoint(state, is_best, args, filename='checkpoint'):
-    filename2 = os.path.join(args.save_ckpt_path, filename + ".pth.tar")
-    torch.save(state, filename2)
-    if is_best:
-        shutil.copyfile(filename2, os.path.join(args.save_ckpt_path, filename+'model_best.pth.tar'))
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-    def __init__(self, name, fmt=':f'):
-        self.name = name
-        self.fmt = fmt
-        self.reset()
-        self.start_count_index = 10
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        if self.count == 0:
-            self.batchsize = n
-        
-        self.val = val
-        self.count += n
-        if self.count > (self.start_count_index * self.batchsize):
-            self.sum += val * n
-            self.avg = self.sum / (self.count - self.start_count_index * self.batchsize)
-
-    def __str__(self):
-        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
-        return fmtstr.format(**self.__dict__)
-
-class ProgressMeter(object):
-    def __init__(self, num_batches, meters, prefix=""):
-        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
-        self.meters = meters
-        self.prefix = prefix
-
-    def display(self, batch):
-        entries = [self.prefix + self.batch_fmtstr.format(batch)]
-        entries += [str(meter) for meter in self.meters]
-        print('\t'.join(entries))
-
-    def _get_batch_fmtstr(self, num_batches):
-        num_digits = len(str(num_batches // 1))
-        fmt = '{:' + str(num_digits) + 'd}'
-        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
-
-
-def adjust_learning_rate(optimizer, epoch, args):
-    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
-    lr = args.lr * (0.1 ** (epoch // 30))
-    for param_group in optimizer.param_groups:
-        param_group['lr'] = lr
-
-
-def accuracy(output, target, topk=(1,)):
-    """Computes the accuracy over the k top predictions for the specified values of k"""
-    with torch.no_grad():
-        maxk = max(topk)
-        batch_size = target.size(0)
-
-        _, pred = output.topk(maxk, 1, True, True)
-        pred = pred.t()
-        correct = pred.eq(target.view(1, -1).expand_as(pred))
-
-        res = []
-        for k in topk:
-            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
-            res.append(correct_k.mul_(100.0 / batch_size))
-        return res
-
-class LabelSmoothing(nn.Module):
-    """
-    NLL loss with label smoothing.
-    """
-    def __init__(self, smoothing=0.0):
-        """
-        Constructor for the LabelSmoothing module.
-
-        :param smoothing: label smoothing factor
-        """
-        super(LabelSmoothing, self).__init__()
-        self.confidence = 1.0 - smoothing
-        self.smoothing = smoothing
-
-    def forward(self, x, target):
-        # 图模式
-        if args.graph_mode:
-            logprobs = torch.nn.functional.log_softmax(x, dim=-1)
-        else:
-            logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu")
-        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
-        nll_loss = nll_loss.squeeze(1)
-        smooth_loss = -logprobs.mean(dim=-1)
-        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
-        # 图模式
-        if args.graph_mode:
-            return loss.mean()
-        else:
-            return loss.mean().to(CALCULATE_DEVICE)
-
-def lr_policy(lr_fn, logger=None):
-    if logger is not None:
-        logger.register_metric('lr',
-                               log.LR_METER(),
-                               verbosity=dllogger.Verbosity.VERBOSE)
-
-    def _alr(optimizer, iteration, epoch):
-        lr = lr_fn(iteration, epoch)
-
-        if logger is not None:
-            logger.log_metric('lr', lr)
-        for param_group in optimizer.param_groups:
-            param_group['lr'] = lr
-
-    return _alr
-
-def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None):
-    def _lr_fn(iteration, epoch):
-        if epoch < warmup_length:
-            lr = base_lr * (epoch + 1) / warmup_length
-        else:
-            e = epoch - warmup_length
-            es = epochs - warmup_length
-            lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
-        return lr
-
-    return lr_policy(_lr_fn, logger=logger)
-
-
-def fast_collate(batch):
-    imgs = [img[0] for img in batch]
-    targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
-    w = imgs[0].size[0]
-    h = imgs[0].size[1]
-    tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
-    for i, img in enumerate(imgs):
-        nump_array = np.asarray(img, dtype=np.uint8)
-        if (nump_array.ndim < 3):
-            nump_array = np.expand_dims(nump_array, axis=-1)
-        nump_array = np.rollaxis(nump_array, 2)
-
-        tensor[i] += torch.from_numpy(nump_array)
-
-    return tensor, targets
-
-
-class PrefetchedWrapper(object):
-    def prefetched_loader(loader, fp16):
-        mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).npu().view(1, 3, 1, 1)
-        std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).npu().view(1, 3, 1, 1)
-        if fp16:
-            mean = mean.half()
-            std = std.half()
-
-        stream = torch.npu.Stream()
-        first = True
-        
-        for next_input, next_target in loader:
-            with torch.npu.stream(stream):
-                next_input = next_input.npu(non_blocking=True)
-                next_target = next_target.npu(non_blocking=True)
-                if fp16:
-                    next_input = next_input.half()
-
-                else:
-                    next_input = next_input.float()
-
-                next_input = next_input.sub_(mean).div_(std)
-
-        
-            if not first:
-                yield input, target
-            else:
-                first = False
-
-            torch.npu.current_stream().wait_stream(stream)
-            input = next_input
-            target = next_target
-        yield input, target
-
-    def __init__(self, dataloader, fp16):
-        self.dataloader = dataloader
-        self.fp16 = fp16
-        self.epoch = 0
-
-    def __iter__(self):
-        if (self.dataloader.sampler is not None and
-                isinstance(self.dataloader.sampler,
-                           torch.utils.data.distributed.DistributedSampler)):
-            self.dataloader.sampler.set_epoch(self.epoch)
-        self.epoch += 1
-        ret = PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
-        return PrefetchedWrapper.prefetched_loader(self.dataloader, self.fp16)
-
-
-def get_pytorch_train_loader_V2(data_path, batch_size, workers=16, _worker_init_fn=None, fp16=False):
-    traindir = os.path.join(data_path, 'train')
-    train_dataset = datasets.ImageFolder(
-        traindir,
-        transforms.Compose([
-            transforms.RandomResizedCrop(224),
-            transforms.RandomHorizontalFlip(),
-        ]))
-
-    if torch.distributed.is_initialized():
-        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
-    else:
-        train_sampler = None
-
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
-        num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler,
-        collate_fn=fast_collate, drop_last=True)
-
-    return PrefetchedWrapper(train_loader, fp16), len(train_loader), train_sampler
-
-
-def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
-    valdir = os.path.join(data_path, 'val')
-    val_dataset = datasets.ImageFolder(
-        valdir, transforms.Compose([
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-        ]))
-
-    if distributed:
-        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
-    else:
-        val_sampler = None
-
-        dataloader_fn = MultiEpochsDataLoader  # torch.utils.data.DataLoader
-        val_loader = dataloader_fn(
-            val_dataset,
-            sampler=val_sampler,
-            batch_size=batch_size, shuffle=(val_sampler is None),
-            num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate)
-
-    return val_loader
-
-
-
-if __name__ == '__main__':
-    main()
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt
deleted file mode 100644
index fbefb37521..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-apex
-torchvision
-opencv-python
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh
deleted file mode 100644
index e542152b60..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_1p.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-device_id=0
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_1p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/pytorch_resnet50_apex.py \
-        --data /data/imagenet \
-        --npu ${device_id} \
-        -j64 \
-        -b512 \
-        --lr 0.2 \
-        --warmup 5 \
-        --label-smoothing=0.1 \
-        --epochs 90 \
-        --num_classes=1000 \
-        --optimizer-batch-size 512 > ./resnet50_1p.log 2>&1 &
-
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh
deleted file mode 100644
index 047849d5f8..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_2p.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-ip=$(hostname -I |awk '{print $1}')
-device_id_list=0,1
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_2p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \
-        --data /data/imagenet \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=128 \
-        --learning-rate=0.4 \
-        --warmup=8 \
-        --label-smoothing=0.1 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04 \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --device-list=${device_id_list} \
-        --benchmark=0 \
-        --device='npu' \
-        --epochs=90 \
-        --num_classes=1000 \
-        --batch-size=1024 > ./resnet50_2p.log 2>&1 &
-
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh
deleted file mode 100644
index 2b29adfe64..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_4p.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-ip=$(hostname -I |awk '{print $1}')
-device_id_list=0,1,2,3
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_4p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \
-        --data /data/imagenet \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=128 \
-        --learning-rate=0.8 \
-        --warmup=8 \
-        --label-smoothing=0.1 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04 \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --device-list=${device_id_list} \
-        --benchmark=0 \
-        --device='npu' \
-        --epochs=90 \
-        --num_classes=1000 \
-        --batch-size=2048 > ./resnet50_4p.log 2>&1 &
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh
deleted file mode 100644
index e3b0a5b523..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/run_8p.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env bash
-source env_npu.sh
-export WHICH_OP=GEOP
-export NEW_GE_FE_ID=1
-export GE_AICPU_FLAG=1
-
-/usr/local/Ascend/driver/tools/msnpureport -d 0 -g error
-/usr/local/Ascend/driver/tools/msnpureport -d 4 -g error
-
-ip=$(hostname -I |awk '{print $1}')
-currentDir=$(cd "$(dirname "$0")";pwd)
-currtime=`date +%Y%m%d%H%M%S`
-train_log_dir=${currentDir}/result/training_8p_job_${currtime}
-mkdir -p ${train_log_dir}
-cd ${train_log_dir}
-echo "train log path is ${train_log_dir}"
-
-python3.7 ${currentDir}/DistributedResnet50/main_apex_d76_npu.py \
-        --data /data/imagenet \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=128 \
-        --learning-rate=1.6 \
-        --warmup=8 \
-        --label-smoothing=0.1 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04  \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --benchmark=0 \
-        --device='npu' \
-        --epochs=90 \
-        --num_classes=1000 \
-        --batch-size=4096 > ./resnet50_8p.log 2>&1 &
-
-
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh
deleted file mode 100644
index 84d83feb94..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/env_npu.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-export install_path=/usr/local/Ascend
-
-if [ -d ${install_path}/toolkit ]; then
-    export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
-    export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
-    export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
-    export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
-    export ASCEND_OPP_PATH=${install_path}/opp
-else
-    if [ -d ${install_path}/nnae/latest ];then
-        export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
-        export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
-        export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
-        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
-        export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
-        export ASCEND_AICPU_PATH=${install_path}/nnae/latest
-    else
-        export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
-        export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
-        export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
-        export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
-        export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
-        export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
-    fi
-fi
-
-
-#将Host日志输出到串口,0-关闭/1-开启
-export ASCEND_SLOG_PRINT_TO_STDOUT=0
-#设置默认日志级别,0-debug/1-info/2-warning/3-error
-export ASCEND_GLOBAL_LOG_LEVEL=3
-#设置Event日志开启标志,0-关闭/1-开启
-export ASCEND_GLOBAL_EVENT_ENABLE=0
-#设置是否开启taskque,0-关闭/1-开启
-export TASK_QUEUE_ENABLE=1
-#HCCL白名单开关,1-关闭/0-开启
-export HCCL_WHITELIST_DISABLE=1
-
-#设置device侧日志登记为error
-${install_path}/driver/tools/msnpureport -g error -d 0
-${install_path}/driver/tools/msnpureport -g error -d 1
-${install_path}/driver/tools/msnpureport -g error -d 2
-${install_path}/driver/tools/msnpureport -g error -d 3
-${install_path}/driver/tools/msnpureport -g error -d 4
-${install_path}/driver/tools/msnpureport -g error -d 5
-${install_path}/driver/tools/msnpureport -g error -d 6
-${install_path}/driver/tools/msnpureport -g error -d 7
-#关闭Device侧Event日志
-${install_path}/driver/tools/msnpureport -e disable
-
-path_lib=$(python3.7 -c """
-import sys
-import re
-result=''
-for index in range(len(sys.path)):
-    match_sit = re.search('-packages', sys.path[index])
-    if match_sit is not None:
-        match_lib = re.search('lib', sys.path[index])
-
-        if match_lib is not None:
-            end=match_lib.span()[1]
-            result += sys.path[index][0:end] + ':'
-
-        result+=sys.path[index] + '/torch/lib:'
-print(result)"""
-)
-
-echo ${path_lib}
-
-export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
deleted file mode 100644
index 8c7751bc4e..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_ResNet50_performance_8p.sh
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/bin/bash
-
-################基础配置参数，需要模型审视修改##################
-# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
-# 网络名称，同目录名称
-Network="ResNet50_ID3071_for_PyTorch"
-# 训练batch_size
-batch_size=512
-# 训练使用的npu卡数
-export RANK_SIZE=8
-# 数据集路径,保持为空,不需要修改
-data_path=""
-
-# 训练epoch 90
-train_epochs=3
-# 加载数据进程数
-workers=128
-
-# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
-for para in $*
-do
-    if [[ $para == --data_path* ]];then
-        data_path=`echo ${para#*=}`
-    fi
-done
-
-
-# 校验是否传入data_path,不需要修改
-if [[ $data_path == "" ]];then
-    echo "[Error] para \"data_path\" must be confing"
-    exit 1
-fi
-
-###############指定训练脚本执行路径###############
-# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
-cur_path=`pwd`
-cur_path_last_dirname=${cur_path##*/}
-if [ x"${cur_path_last_dirname}" == x"test" ];then
-    test_path_dir=${cur_path}
-    cd ..
-    cur_path=`pwd`
-else
-    test_path_dir=${cur_path}/test
-fi
-
-
-#################创建日志输出目录，不需要修改#################
-ASCEND_DEVICE_ID=0
-if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
-    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-else
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-fi
-
-
-#################启动训练脚本#################
-# 训练开始时间，不需要修改
-start_time=$(date +%s)
-# 非平台场景时source 环境变量
-check_etp_flag=`env | grep etp_running_flag`
-etp_flag=`echo ${check_etp_flag#*=}`
-if [ x"${etp_flag}" != x"true" ];then
-    source ${test_path_dir}/env_npu.sh
-fi
-
-python3.7 ./DistributedResnet50/main_apex_d76_npu.py \
-        --data ${data_path} \
-        --addr=$(hostname -I |awk '{print $1}') \
-        --seed=49 \
-        --workers=${workers} \
-        --learning-rate=1.6 \
-        --warmup=8 \
-        --label-smoothing=0.0 \
-        --mom=0.9 \
-        --weight-decay=1.0e-04  \
-        --static-loss-scale=128 \
-        --print-freq=1 \
-        --dist-url='tcp://127.0.0.1:50000' \
-        --dist-backend='hccl' \
-        --multiprocessing-distributed \
-        --world-size=1 \
-        --rank=0 \
-        --benchmark=0 \
-        --device='npu' \
-        --graph_mode \
-        --epochs=${train_epochs} \
-        --batch-size=${batch_size} > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
-wait
-
-
-##################获取训练数据################
-# 训练结束时间，不需要修改
-end_time=$(date +%s)
-e2e_time=$(( $end_time - $start_time ))
-
-# 训练用例信息，不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
-
-# 结果打印，不需要修改
-echo "------------------ Final result ------------------"
-# 输出性能FPS，需要模型审视修改
-grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $11}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log
-FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'`
-# 打印，不需要修改
-echo "Final Performance images/sec : $FPS"
-
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
-# 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
-echo "E2E Training Duration sec : $e2e_time"
-
-# 性能看护结果汇总
-# 获取性能数据，不需要修改
-# 吞吐量
-ActualFPS=${FPS}
-# 单迭代训练时长
-TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
-
-# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-
-# 最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-# 关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
deleted file mode 100644
index 2492708ca6..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_ID3071_performance_1p.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/bin/bash
-
-################基础配置参数，需要模型审视修改##################
-# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
-# 网络名称，同目录名称
-Network="ResNet50_ID3071_for_PyTorch"
-# 训练batch_size
-batch_size=256
-# 训练使用的npu卡数
-export RANK_SIZE=1
-# 数据集路径,保持为空,不需要修改
-data_path=""
-
-# 训练epoch 90
-train_epochs=1
-# 指定训练所使用的npu device卡id
-device_id=0
-# 加载数据进程数
-workers=64
-
-# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
-for para in $*
-do
-    if [[ $para == --device_id* ]];then
-        device_id=`echo ${para#*=}`
-    elif [[ $para == --data_path* ]];then
-        data_path=`echo ${para#*=}`
-    fi
-done
-
-
-# 校验是否传入data_path,不需要修改
-if [[ $data_path == "" ]];then
-    echo "[Error] para \"data_path\" must be confing"
-    exit 1
-fi
-# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
-if [ $ASCEND_DEVICE_ID ];then
-    echo "device id is ${ASCEND_DEVICE_ID}"
-elif [ ${device_id} ];then
-    export ASCEND_DEVICE_ID=${device_id}
-    echo "device id is ${ASCEND_DEVICE_ID}"
-else
-    "[Error] device id must be config"
-    exit 1
-fi
-
-
-
-###############指定训练脚本执行路径###############
-# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
-cur_path=`pwd`
-cur_path_last_dirname=${cur_path##*/}
-if [ x"${cur_path_last_dirname}" == x"test" ];then
-    test_path_dir=${cur_path}
-    cd ..
-    cur_path=`pwd`
-else
-    test_path_dir=${cur_path}/test
-fi
-
-
-#################创建日志输出目录，不需要修改#################
-if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
-    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-else
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-fi
-
-#修改参数
-sed -i "s|pass|break|g" ${test_path_dir}/../pytorch_resnet50_apex.py
-wait
-#################启动训练脚本#################
-# 训练开始时间，不需要修改
-start_time=$(date +%s)
-# 非平台场景时source 环境变量
-check_etp_flag=`env | grep etp_running_flag`
-etp_flag=`echo ${check_etp_flag#*=}`
-if [ x"${etp_flag}" != x"true" ];then
-    source ${test_path_dir}/env_npu.sh
-fi
-
-python3.7 ./pytorch_resnet50_apex.py \
-    --data ${data_path} \
-    --npu ${ASCEND_DEVICE_ID} \
-    -j ${workers} \
-    -b ${batch_size} \
-    --lr 0.2 \
-    --warmup 5 \
-    --label-smoothing=0.1 \
-    --epochs ${train_epochs} \
-    --graph_mode \
-    --optimizer-batch-size 256 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
-wait
-
-
-##################获取训练数据################
-# 训练结束时间，不需要修改
-end_time=$(date +%s)
-e2e_time=$(( $end_time - $start_time ))
-
-
-#参数改回
-sed -i "s|break|pass|g" ${test_path_dir}/../pytorch_resnet50_apex.py
-wait
-
-# 训练用例信息，不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
-
-# 结果打印，不需要修改
-echo "------------------ Final result ------------------"
-# 输出性能FPS，需要模型审视修改
-grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log
-FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'`
-# 打印，不需要修改
-echo "Final Performance images/sec : $FPS"
-
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
-# 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
-echo "E2E Training Duration sec : $e2e_time"
-
-# 性能看护结果汇总
-# 获取性能数据，不需要修改
-# 吞吐量
-ActualFPS=${FPS}
-# 单迭代训练时长
-TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
-
-# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-
-# 最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-# 关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh
deleted file mode 100644
index e89e5332b6..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_eval_1p.sh
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/bin/bash
-
-################基础配置参数，需要模型审视修改##################
-# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
-# 网络名称，同目录名称
-Network="ResNet50_for_PyTorch"
-# 训练batch_size
-batch_size=512
-# 训练使用的npu卡数
-export RANK_SIZE=1
-# 数据集路径,保持为空,不需要修改
-data_path=""
-
-# 训练epoch 90
-train_epochs=90
-# 指定训练所使用的npu device卡id
-device_id=0
-# 加载数据进程数
-workers=64
-
-# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
-for para in $*
-do
-    if [[ $para == --device_id* ]];then
-        device_id=`echo ${para#*=}`
-    elif [[ $para == --data_path* ]];then
-        data_path=`echo ${para#*=}`
-    elif [[ $para == --checkpoint* ]];then
-        checkpoint=`echo ${para#*=}`
-    fi
-done
-
-# 校验是否传入data_path,不需要修改
-if [[ $data_path == "" ]];then
-    echo "[Error] para \"data_path\" must be confing"
-    exit 1
-fi
-# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
-if [ $ASCEND_DEVICE_ID ];then
-    echo "device id is ${ASCEND_DEVICE_ID}"
-elif [ ${device_id} ];then
-    export ASCEND_DEVICE_ID=${device_id}
-    echo "device id is ${ASCEND_DEVICE_ID}"
-else
-    "[Error] device id must be config"
-    exit 1
-fi
-
-
-
-###############指定训练脚本执行路径###############
-# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
-cur_path=`pwd`
-cur_path_last_dirname=${cur_path##*/}
-if [ x"${cur_path_last_dirname}" == x"test" ];then
-    test_path_dir=${cur_path}
-    cd ..
-    cur_path=`pwd`
-else
-    test_path_dir=${cur_path}/test
-fi
-
-
-#################创建日志输出目录，不需要修改#################
-if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
-    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-else
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-fi
-
-
-#################启动训练脚本#################
-# 训练开始时间，不需要修改
-start_time=$(date +%s)
-# 非平台场景时source 环境变量
-check_etp_flag=`env | grep etp_running_flag`
-etp_flag=`echo ${check_etp_flag#*=}`
-if [ x"${etp_flag}" != x"true" ];then
-    source ${test_path_dir}/env_npu.sh
-fi
-python3.7 ./pytorch_resnet50_apex.py \
-    --data ${data_path} \
-    --npu ${ASCEND_DEVICE_ID} \
-    -j ${workers} \
-    -b ${batch_size} \
-    --lr 0.2 \
-    --warmup 5 \
-    --label-smoothing=0.1 \
-    --epochs ${train_epochs} \
-    --evaluate \
-    --resume ${checkpoint} \
-    --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
-wait
-
-
-##################获取训练数据################
-# 训练结束时间，不需要修改
-end_time=$(date +%s)
-e2e_time=$(( $end_time - $start_time ))
-
-# 训练用例信息，不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'eval'
-
-# 结果打印，不需要修改
-
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
-# 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
-echo "E2E Training Duration sec : $e2e_time"
-
-
-# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Test: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-
-# 最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-# 关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh
deleted file mode 100644
index 37fd0fd4b8..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_full_1p.sh
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/bin/bash
-
-################基础配置参数，需要模型审视修改##################
-# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
-# 网络名称，同目录名称
-Network="ResNet50_for_PyTorch"
-# 训练batch_size
-batch_size=512
-# 训练使用的npu卡数
-export RANK_SIZE=1
-# 数据集路径,保持为空,不需要修改
-data_path=""
-
-# 训练epoch 90
-train_epochs=90
-# 指定训练所使用的npu device卡id
-device_id=0
-# 加载数据进程数
-workers=64
-
-# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
-for para in $*
-do
-    if [[ $para == --device_id* ]];then
-        device_id=`echo ${para#*=}`
-    elif [[ $para == --data_path* ]];then
-        data_path=`echo ${para#*=}`
-    fi
-done
-
-# 校验是否传入data_path,不需要修改
-if [[ $data_path == "" ]];then
-    echo "[Error] para \"data_path\" must be confing"
-    exit 1
-fi
-# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
-if [ $ASCEND_DEVICE_ID ];then
-    echo "device id is ${ASCEND_DEVICE_ID}"
-elif [ ${device_id} ];then
-    export ASCEND_DEVICE_ID=${device_id}
-    echo "device id is ${ASCEND_DEVICE_ID}"
-else
-    "[Error] device id must be config"
-    exit 1
-fi
-
-
-
-###############指定训练脚本执行路径###############
-# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
-cur_path=`pwd`
-cur_path_last_dirname=${cur_path##*/}
-if [ x"${cur_path_last_dirname}" == x"test" ];then
-    test_path_dir=${cur_path}
-    cd ..
-    cur_path=`pwd`
-else
-    test_path_dir=${cur_path}/test
-fi
-
-
-#################创建日志输出目录，不需要修改#################
-if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
-    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-else
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-fi
-
-
-#################启动训练脚本#################
-# 训练开始时间，不需要修改
-start_time=$(date +%s)
-# 非平台场景时source 环境变量
-check_etp_flag=`env | grep etp_running_flag`
-etp_flag=`echo ${check_etp_flag#*=}`
-if [ x"${etp_flag}" != x"true" ];then
-    source ${test_path_dir}/env_npu.sh
-fi
-python3.7 ./pytorch_resnet50_apex.py \
-    --data ${data_path} \
-    --npu ${ASCEND_DEVICE_ID} \
-    -j ${workers} \
-    -b ${batch_size} \
-    --lr 0.2 \
-    --warmup 5 \
-    --label-smoothing=0.1 \
-    --epochs ${train_epochs} \
-    --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
-wait
-
-
-##################获取训练数据################
-# 训练结束时间，不需要修改
-end_time=$(date +%s)
-e2e_time=$(( $end_time - $start_time ))
-
-# 训练用例信息，不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'full'
-
-# 结果打印，不需要修改
-echo "------------------ Final result ------------------"
-# 输出性能FPS，需要模型审视修改
-grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log
-FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'`
-# 打印，不需要修改
-echo "Final Performance images/sec : $FPS"
-
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
-# 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
-echo "E2E Training Duration sec : $e2e_time"
-
-# 性能看护结果汇总
-# 获取性能数据，不需要修改
-# 吞吐量
-ActualFPS=${FPS}
-# 单迭代训练时长
-TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
-
-# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-
-# 最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-# 关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh
deleted file mode 100644
index 96226ecf32..0000000000
--- a/PyTorch/built-in/cv/classification/ResNet50_for_PyTorch/test/train_performance_1p.sh
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/bin/bash
-
-################基础配置参数，需要模型审视修改##################
-# 必选字段(必须在此处定义的参数): Network batch_size RANK_SIZE
-# 网络名称，同目录名称
-Network="ResNet50_ID0095_for_PyTorch"
-# 训练batch_size
-batch_size=512
-# 训练使用的npu卡数
-export RANK_SIZE=1
-# 数据集路径,保持为空,不需要修改
-data_path=""
-
-# 训练epoch 90
-train_epochs=1
-# 指定训练所使用的npu device卡id
-device_id=0
-# 加载数据进程数
-workers=64
-
-# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
-for para in $*
-do
-    if [[ $para == --device_id* ]];then
-        device_id=`echo ${para#*=}`
-    elif [[ $para == --data_path* ]];then
-        data_path=`echo ${para#*=}`
-    fi
-done
-
-# 校验是否传入data_path,不需要修改
-if [[ $data_path == "" ]];then
-    echo "[Error] para \"data_path\" must be confing"
-    exit 1
-fi
-# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
-if [ $ASCEND_DEVICE_ID ];then
-    echo "device id is ${ASCEND_DEVICE_ID}"
-elif [ ${device_id} ];then
-    export ASCEND_DEVICE_ID=${device_id}
-    echo "device id is ${ASCEND_DEVICE_ID}"
-else
-    "[Error] device id must be config"
-    exit 1
-fi
-
-
-
-###############指定训练脚本执行路径###############
-# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
-cur_path=`pwd`
-cur_path_last_dirname=${cur_path##*/}
-if [ x"${cur_path_last_dirname}" == x"test" ];then
-    test_path_dir=${cur_path}
-    cd ..
-    cur_path=`pwd`
-else
-    test_path_dir=${cur_path}/test
-fi
-
-
-#################创建日志输出目录，不需要修改#################
-if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
-    rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-else
-    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
-fi
-
-#修改参数
-sed -i "s|pass|break|g" ${test_path_dir}/../pytorch_resnet50_apex.py
-wait
-#################启动训练脚本#################
-# 训练开始时间，不需要修改
-start_time=$(date +%s)
-# 非平台场景时source 环境变量
-check_etp_flag=`env | grep etp_running_flag`
-etp_flag=`echo ${check_etp_flag#*=}`
-if [ x"${etp_flag}" != x"true" ];then
-    source ${test_path_dir}/env_npu.sh
-fi
-python3.7 ./pytorch_resnet50_apex.py \
-    --data ${data_path} \
-    --npu ${ASCEND_DEVICE_ID} \
-    -j ${workers} \
-    -b ${batch_size} \
-    --lr 0.2 \
-    --warmup 5 \
-    --label-smoothing=0.1 \
-    --epochs ${train_epochs} \
-    --optimizer-batch-size 512 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
-
-wait
-
-
-##################获取训练数据################
-# 训练结束时间，不需要修改
-end_time=$(date +%s)
-e2e_time=$(( $end_time - $start_time ))
-
-
-#参数改回
-sed -i "s|break|pass|g" ${test_path_dir}/../pytorch_resnet50_apex.py
-wait
-
-# 训练用例信息，不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
-
-# 结果打印，不需要修改
-echo "------------------ Final result ------------------"
-# 输出性能FPS，需要模型审视修改
-grep "FPS@all" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk '{print $7}' >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_fps.log
-FPS=`cat ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${CaseName}_fps.log | awk '{a+=$1} END {if (NR != 0) printf("%.3f",a/NR)}'`
-# 打印，不需要修改
-echo "Final Performance images/sec : $FPS"
-
-# 输出训练精度,需要模型审视修改
-train_accuracy=`grep -a '* Acc@1'  ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print}'|awk -F "Acc@1" '{print $NF}'|awk -F " " '{print $1}'`
-# 打印，不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
-echo "E2E Training Duration sec : $e2e_time"
-
-# 性能看护结果汇总
-# 获取性能数据，不需要修改
-# 吞吐量
-ActualFPS=${FPS}
-# 单迭代训练时长
-TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
-
-# 从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
-grep Epoch: ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v Test|awk -F "Loss" '{print $NF}' | awk -F " " '{print $1}' >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-
-# 最后一个迭代loss值，不需要修改
-ActualLoss=`awk 'END {print}'  ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-# 关键信息打印到${CaseName}.log中，不需要修改
-echo "Network = ${Network}" >  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >>  ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh
index 04e874cc97..0f29d8b867 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_1p.sh
@@ -141,7 +141,7 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'`
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
 
 FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'}'`
 
diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh
index 37195fb612..7b8a5e8caa 100644
--- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh
+++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/test/train_performance_8p.sh
@@ -143,7 +143,7 @@ e2e_time=$(( $end_time - $start_time ))
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $13}'`
+step_time=`grep 'step_time : ' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk '{print$13}'| tail -n+3 |awk '{sum+=$1} END {print"",sum/NR}' | sed s/[[:space:]]//g`
 
 FPS=`awk 'BEGIN{printf "%d\n", '$batch_size'/'$step_time'*'$RANK_SIZE'}'`
 
-- 
Gitee