diff --git a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_16p.sh b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_16p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b3cefa070c580e22e843f2252c2ea40aab7bde05
--- /dev/null
+++ b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_full_16p.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+cur_path=`pwd`/../
+#失败用例打屏
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=1
+#网络名称，同目录名称
+Network="2D_Unet_ID0624_for_PyTorch"
+#Device数量，单卡默认为1
+RANK_SIZE=16
+#训练epoch，可选
+train_epochs=5
+#训练step
+train_steps=
+#学习率
+learning_rate=1e-3
+#参数配置
+data_path=""
+conf_path=""
+server_index=""
+fix_node_ip=""
+
+if [[ $1 == --help || $1 == --h ]];then
+        echo "usage:./train_accormance_1p.sh "
+        exit 1
+fi
+
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+      data_path=`echo ${para#*=}`
+    elif [[ $para == --conf_path* ]];then
+            conf_path=`echo ${para#*=}`
+    elif [[ $para == --server_index* ]];then
+            server_index=`echo ${para#*=}`
+    elif [[ $para == --fix_node_ip* ]];then
+            fix_node_ip=`echo ${para#*=}`
+    fi
+done
+
+if [[ $data_path  == "" ]];then
+        echo "[Error] para \"data_path\" must be config"
+        exit 1
+fi
+
+one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'`
+linux_num=`find $conf_path -name "server_*.info" |wc -l`
+
+export HCCL_IF_IP=$fix_node_ip
+export MASTER_ADDR=$one_node_ip
+
+##############执行训练##########
+cd $cur_path
+if [ -d $cur_path/test/output ];then
+        rm -rf $cur_path/test/output/*
+        mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+        mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+export HCCL_WHITELIST_DISABLE=1
+#export MASTER_ADDR=127.0.0.1
+export MASTER_PORT=23456
+
+
+export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID
+sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py
+sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py
+start=$(date +%s)
+#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+
+
+NPUS=($(seq 0 7))
+rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'`
+export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'`
+#export NPU_WORLD_SIZE=${#NPUS[@]}
+rank=0
+for i in ${NPUS[@]}
+do
+    mkdir -p  $cur_path/test/output/${i}/
+    export NPU_CALCULATE_DEVICE=${i}
+    export ASCEND_DEVICE_ID=${i}
+    export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'`
+    echo run process ${rank}
+    nohup python3 train.py -e $train_epochs -l 0.0001  --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 &
+    let rank++
+done
+wait
+end=$(date +%s)
+e2e_time=$(( $end - $start ))
+
+
+
+#输出训练精度,需要模型审视修改
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+##获取性能数据，不需要修改
+#吞吐量
+sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log
+TrainingTime=0
+FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log |grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g`
+FPS=$(awk 'BEGIN{print '$FPS'*16}')
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+ActualFPS=${FPS}
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+
+#精度值
+train_accuracy=`grep "Validation Dice" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $NF}'|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'`
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cff304ed2b1e884cd43873500eb8a05e552d8112
--- /dev/null
+++ b/PyTorch/dev/cv/image_classification/2D_Unet_ID0624_for_PyTorch/test/train_performance_16p.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+cur_path=`pwd`/../
+#失败用例打屏
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+
+source /usr/local/Ascend/bin/setenv.bash
+
+export PATH=/usr/local/hdf5/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/hdf5/lib:$LD_LIBRARY_PATH
+export LIBRARY_PATH=/usr/local/hdf5/lib:$LIBRARY_PATH
+export CPATH=/usr/local/hdf5/include:$CPATH
+
+#基础参数，需要模型审视修改
+#Batch Size
+batch_size=1
+#网络名称，同目录名称
+Network="2D_Unet_ID0624_for_PyTorch"
+#Device数量，单卡默认为1
+RANK_SIZE=16
+#训练epoch，可选
+train_epochs=1
+#训练step
+train_steps=
+#学习率
+learning_rate=1e-3
+#参数配置
+data_path=""
+conf_path=""
+server_index=""
+fix_node_ip=""
+
+if [[ $1 == --help || $1 == --h ]];then
+        echo "usage:./train_performance_1p.sh "
+        exit 1
+fi
+
+for para in $*
+do
+        if [[ $para == --data_path* ]];then
+                data_path=`echo ${para#*=}`
+        elif [[ $para == --conf_path* ]];then
+          conf_path=`echo ${para#*=}`
+  elif [[ $para == --server_index* ]];then
+          server_index=`echo ${para#*=}`
+  elif [[ $para == --fix_node_ip* ]];then
+          fix_node_ip=`echo ${para#*=}`
+        fi
+done
+
+if [[ $data_path  == "" ]];then
+        echo "[Error] para \"data_path\" must be config"
+        exit 1
+fi
+
+one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'`
+linux_num=`find $conf_path -name "server_*.info" |wc -l`
+
+export HCCL_IF_IP=$fix_node_ip
+export MASTER_ADDR=$one_node_ip
+
+##############执行训练##########
+cd $cur_path
+if [ -d $cur_path/test/output ];then
+        rm -rf $cur_path/test/output/*
+        mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+else
+        mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
+fi
+wait
+
+
+export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID
+sed -i "s|data/imgs/|$data_path/imgs/|g" $cur_path/train.py
+sed -i "s|data/masks/|$data_path/masks/|g" $cur_path/train.py
+#sed -i "s|if global_step == 100: pass|if global_step == 100: break|g" $cur_path/train.py
+start=$(date +%s)
+#nohup python3 train.py -e $train_epochs > $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log 2>&1 &
+
+
+export HCCL_WHITELIST_DISABLE=1
+export MASTER_PORT=23456
+NPUS=($(seq 0 7))
+rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'`
+export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'`
+rank=0
+for i in ${NPUS[@]}
+do
+    mkdir -p  $cur_path/test/output/${i}/
+    export NPU_CALCULATE_DEVICE=${i}
+    export ASCEND_DEVICE_ID=${i}
+    export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'`
+    echo run process ${rank}
+    nohup python3 train.py -e $train_epochs  --distributed True > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${i}.log 2>&1 &
+    let rank++
+done
+wait
+end=$(date +%s)
+e2e_time=$(( $end - $start ))
+
+
+#sed -i "s|if global_step == 100: break|if global_step == 100: pass|g" $cur_path/train.py
+
+#输出训练精度,需要模型审视修改
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+sed -i "s|\r|\n|g" $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log
+TrainingTime=0
+FPS=`grep img/s $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | grep -v 0% | awk -F "," '{print$2}' | awk '{print$1}' | awk -F "i" '{print$1}' | awk '{sum+=$1} END {print"",sum/NR}'|sed s/[[:space:]]//g`
+FPS=$(awk 'BEGIN{print '$FPS'*16}')
+
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*1000/'${FPS}'}'`
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+ActualFPS=${FPS}
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep Epoch $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log| awk -F "," '{print$3}' | awk -F "=" '{print$2}' | awk -F "]" '{print$1}'| awk '{if(length !=0)print $0}' > $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+
+#精度值
+#train_accuracy=`grep "loss" $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss_2.txt|awk -F " " '{print $8}'|awk 'END {print}'`
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f1dd25a169a86446ba167dbea7d960409b89a8a0
--- /dev/null
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_full_16p.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+
+cur_path=`pwd`
+#集合通信参数,不需要修改
+export RANK_SIZE=16
+#export MASTER_ADDR=localhost
+export MASTER_PORT=29688
+# 数据集路径,保持为空,不需要修改
+data_path=""
+conf_path=""
+server_index=""
+fix_node_ip=""
+
+#网络名称,同目录名称,需要模型审视修改
+Network="Transformer_ID0105_for_PyTorch"
+
+export BMMV2_ENABLE=1
+#训练epoch
+train_epochs=30
+#训练batch_size,,需要模型审视修改
+batch_size=128
+
+
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --epochs* ]];then
+        epochs=`echo ${para#*=}`
+    elif [[ $para == --conf_path* ]];then
+            conf_path=`echo ${para#*=}`
+    elif [[ $para == --server_index* ]];then
+            server_index=`echo ${para#*=}`
+    elif [[ $para == --fix_node_ip* ]];then
+            fix_node_ip=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'`
+linux_num=`find $conf_path -name "server_*.info" |wc -l`
+
+export HCCL_IF_IP=$fix_node_ip
+export MASTER_ADDR=$one_node_ip
+
+
+#创建DeviceID输出目录，不需要修改
+if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+    mkdir -p $cur_path/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p $cur_path/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+
+
+# 必要参数替换配置文件
+cd $cur_path/..
+DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/
+MODELDIR="./checkpoints/"
+mkdir -p "$MODELDIR"
+LOGFILE="$MODELDIR/log"
+STAT_FILE="log.txt"
+
+
+start_time=$(date +%s)
+NPUS=($(seq 0 7))
+rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'`
+export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'`
+#export NPU_WORLD_SIZE=${#NPUS[@]}
+rank=0
+for i in ${NPUS[@]}
+do
+    export NPU_CALCULATE_DEVICE=${i}
+    mkdir -p  $cur_path/output/${i}/
+    export ASCEND_DEVICE_ID=${i}
+    export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'`
+    echo run process ${rank}
+
+
+    python3 train_8p_new.py \
+        $data_path \
+      --arch transformer_wmt_en_de \
+      --share-all-embeddings \
+      --optimizer adam \
+      --adam-beta1 0.9 \
+      --adam-beta2 0.997 \
+      --addr ${one_node_ip} \
+      --port 29990 \
+      --adam-eps "1e-9" \
+      --clip-norm 0.0 \
+      --lr-scheduler inverse_sqrt \
+      --warmup-init-lr 0.0 \
+      --warmup-updates 4000 \
+      --lr 0.0006 \
+      --min-lr 0.0 \
+      --dropout 0.1 \
+      --weight-decay 0.0 \
+      --criterion label_smoothed_cross_entropy \
+      --label-smoothing 0.1 \
+      --max-sentences 128\
+      --max-tokens 102400 \
+      --max-epoch $train_epochs \
+      --seed 1 \
+      --save-dir $MODELDIR \
+      --stat-file $STAT_FILE\
+      --log-interval 1\
+      --amp\
+      --device-id ${rank}\
+      --amp-level O2  >  $cur_path/output/${i}/train_${i}.log 2>&1 &
+    let rank++
+done
+wait
+
+
+
+
+##################获取训练数据################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",16*128*NR/sum}'|sed s/[[:space:]]//g`
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep -rns "Validation" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk 'END {print $6}'`
+
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+#获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'$FPS'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要模型审视修改
+grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}'  > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh
new file mode 100644
index 0000000000000000000000000000000000000000..09788d343cad407c7cce3649aba9a22186d01edd
--- /dev/null
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/test/train_performance_16p.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+
+cur_path=`pwd`
+#nmon -s3 -c 500 -f -m $cur_path
+#集合通信参数,不需要修改
+export RANK_SIZE=16
+#export MASTER_ADDR=localhost
+export MASTER_PORT=29688
+export HCCL_WHITELIST_DISABLE=1
+export BMMV2_ENABLE=1
+# 数据集路径,保持为空,不需要修改
+data_path=""
+conf_path=""
+server_index=""
+fix_node_ip=""
+
+#网络名称,同目录名称,需要模型审视修改
+Network="Transformer_ID0105_for_PyTorch"
+
+#训练batch_size,,需要模型审视修改
+batch_size=128
+
+
+
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --epochs* ]];then
+        epochs=`echo ${para#*=}`
+    elif [[ $para == --conf_path* ]];then
+            conf_path=`echo ${para#*=}`
+    elif [[ $para == --server_index* ]];then
+            server_index=`echo ${para#*=}`
+    elif [[ $para == --fix_node_ip* ]];then
+            fix_node_ip=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+one_node_ip=`find $conf_path -name "server_*0.info"|awk -F "server_" '{print $2}'|awk -F "_" '{print $1}'`
+linux_num=`find $conf_path -name "server_*.info" |wc -l`
+
+export HCCL_IF_IP=$fix_node_ip
+export MASTER_ADDR=$one_node_ip
+
+
+#创建DeviceID输出目录，不需要修改
+if [ -d $cur_path/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf $cur_path/output/$ASCEND_DEVICE_ID
+    mkdir -p $cur_path/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p $cur_path/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+
+
+# 必要参数替换配置文件
+cd $cur_path/..
+DATA_DIR=./data/dataset/wmt14_en_de_joined_dict/
+MODELDIR="./checkpoints/"
+mkdir -p "$MODELDIR"
+LOGFILE="$MODELDIR/log"
+STAT_FILE="log.txt"
+
+sed -i "s|if i>100:pass|if i>100:break|g" train_8p_new.py
+sed -i "s|if m >=2:pass|if m >=2:break|g" train_8p_new.py
+
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL_ETP_ETP=3
+export PTCOPY_ENABLE=1
+export TASK_QUEUE_ENABLE=1
+export DYNAMIC_OP="ADD#MUL"
+start_time=$(date +%s)
+NPUS=($(seq 0 7))
+rank_server=`awk 'BEGIN{printf "%.0f\n",8*'${server_index}'}'`
+export NPU_WORLD_SIZE=`awk 'BEGIN{printf "%.0f\n",8*'${linux_num}'}'`
+#export NPU_WORLD_SIZE=${#NPUS[@]}
+rank=0
+for i in ${NPUS[@]}
+do
+    export NPU_CALCULATE_DEVICE=${i}
+    mkdir -p  $cur_path/output/${i}/
+    export ASCEND_DEVICE_ID=${i}
+    export RANK=`awk 'BEGIN{printf "%.0f\n",'${rank}'+'${rank_server}'}'`
+    echo run process ${rank}
+
+
+    python3 train_8p_new.py \
+       $data_path \
+      --arch transformer_wmt_en_de \
+      --share-all-embeddings \
+      --optimizer adam \
+  --adam-beta1 0.9 \
+  --distributed-world-size ${NPU_WORLD_SIZE} \
+  --adam-beta2 0.997 \
+  --addr ${one_node_ip} \
+  --port 29990 \
+  --adam-eps "1e-9" \
+  --clip-norm 0.0 \
+  --lr-scheduler inverse_sqrt \
+  --warmup-init-lr 0.0 \
+  --warmup-updates 4000 \
+  --lr 0.0006 \
+  --min-lr 0.0 \
+  --dropout 0.1 \
+  --weight-decay 0.0 \
+  --criterion label_smoothed_cross_entropy \
+  --label-smoothing 0.1 \
+  --max-sentences 128\
+  --max-tokens 102400 \
+  --seed 1 \
+  --save-dir $MODELDIR \
+  --stat-file $STAT_FILE\
+  --log-interval 1\
+  --amp\
+  --device-id ${rank}\
+  --amp-level O2  >  $cur_path/output/${i}/train_${i}.log 2>&1 &
+    let rank++
+done
+wait
+sed -i "s|if i>100:break|if i>100:pass|g" train_8p_new.py
+sed -i "s|if m >=2:break|if m >=2:pass|g" train_8p_new.py
+
+
+
+##################获取训练数据################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=` grep -rns "Time" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Time" '{print$2}' |awk -F "(" '{print$1}'|tail -n +5|awk '{sum+=$1} END {print"",16*128*NR/sum}'|sed s/[[:space:]]//g`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+#获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要模型审视修改
+grep -rns "Time" ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |grep -v "all" |awk -F "Loss" '{print$2}' |awk -F "(" '{print$1}'  > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${cur_path}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${cur_path}/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py
index 0179a825cfad84454d00ed41788b4f50e8bb39de..6cfe2702485f2de56e8625f23fbb32876ce906e0 100644
--- a/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py
+++ b/PyTorch/dev/nlp/Transformer_ID0105_for_PyTorch/train_8p_new.py
@@ -111,8 +111,9 @@ def main():
     print(args)
     os.environ['MASTER_ADDR'] = args.addr
     os.environ['MASTER_PORT'] = args.port
+    device_id = args.device_id
     #mp.spawn(main_worker, nprocs=args.distributed_world_size, args=(args.distributed_world_size, args))
-    main_worker(pid_idx=RANK, args=args)
+    main_worker(pid_idx=device_id, args=args)
 
 
 
@@ -240,7 +241,7 @@ def train(args, trainer, datasets, epoch_itr):
     # reset meters
     DLLogger.flush()
     trainer.get_throughput_meter().reset()
-   
+
     for i, sample in enumerate(itr):
         if i>100:pass
         if i < num_batches - 1 and (i + 1) % update_freq > 0:
@@ -414,4 +415,4 @@ def load_checkpoint(args, trainer, epoch_itr):
 
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file