diff --git a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/config.py b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/config.py index a4ac4c607ec1c6da86c4c7040bf918a7d7009228..48e4d22ede787233f9ded82651807b36bfa5be31 100644 --- a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/config.py +++ b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/config.py @@ -53,5 +53,6 @@ def set_args(): parser.add_argument('--learning_rate', type=float, default=0.005, help="learning rate") parser.add_argument('--weight_decay', type=float, default=0.001, help="weight_decay") parser.add_argument('--n_gpu', type=int, default=0, help="n gpu") + parser.add_argument('--graph_mode', action='store_true', help='whether to enable graph mode.') args = parser.parse_args() return args diff --git a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/train.py b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/train.py index d2428f492ac60c8a0610c6e598aefaf528f31511..6519148b3788cc2fbf798bdc4252d72790f045b7 100644 --- a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/train.py +++ b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/WideDeep/train.py @@ -111,6 +111,10 @@ def train_model(model): train_loss_sum = 0.0 start_time = time.time() for step, x in enumerate(train_loader): + #图模式 + if args.graph_mode: + print("graph mode on") + torch.npu.enable_graph_mode() cat_fea, num_fea, label = x[0], x[1], x[2] if torch.npu.is_available(): cat_fea, num_fea, label = cat_fea.npu(non_blocking=True), num_fea.npu(non_blocking=True), label.npu(non_blocking=True) @@ -127,17 +131,27 @@ def train_model(model): #loss.backward() optimizer.step() - - #措施 - #train_loss_sum += loss.cpu().item() - #train_loss_sum += loss.detach() - train_loss_sum += loss - #措施 + #图模式 + if args.graph_mode: + torch.npu.launch_graph() + if step == len(train_loader): + torch.npu.synchronize() + else: + #措施 + #train_loss_sum += loss.cpu().item() + #train_loss_sum += loss.detach() + train_loss_sum += loss + #措施 #if (step + 1) % 50 == 0 or (step + 1) == len(train_loader): print("Epoch {:04d} | Step {:04d} / {} | Loss {:.4f} | Time {:.4f}".format( epoch+1, step+1, len(train_loader), train_loss_sum/(step+1), time.time() - start_time)) start_time = time.time() + #图模式 + if args.graph_mode: + print("graph mode off") + torch.npu.disable_graph_mode() + scheduler.step() cur_auc = evaluate_model(model) if cur_auc > best_auc: diff --git a/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..101b98fcdab28181baa84450308bea55d86ad094 --- /dev/null +++ b/PyTorch/dev/others/Widedeep_ID2866_for_PyTorch/test/train_ID3080_Widedeep_performance_1p.sh @@ -0,0 +1,188 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +export NPU_CALCULATE_DEVICE=$ASCEND_DEVICE_ID +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + +#进入到conda环境 + +#export PATH=/usr/local/python3.7.5/bin:/home/anaconda3/bin:$PATH +#export LD_LIBRARY_PATH=/home/anaconda3/lib:$LD_LIBRARY_PATH +#source activate py8 + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Widedeep_ID3080_for_PyTorch" +#训练epoch +epoch=1 +#训练batch_size +train_batch_size=16 +batch_size=${train_batch_size} +#训练step +#train_steps=`expr 1281167 / ${batch_size}` +#学习率 +#learning_rate=0.495 +PREC="" +#TF2.X独有,不需要修改 +#export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=False +autotune=False +data_path=./data/criteo_sampled_data.csv + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_full_1p.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + apex_opt_level=`echo ${para#*=}` + if [[ $apex_opt_level != "O1" ]] && [[ $apex_opt_level != "O2" ]] && [[ $apex_opt_level != "O3" ]]; then + echo "[ERROR] para \"precision_mode\" must be config O1 or O2 or O3" + exit 1 + fi + PREC="--apex --apex-opt-level "$apex_opt_level + +elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh + source activate $conda_name +fi + +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" +# exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../ +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + #cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + #cpustep=`expr $cpucount / 8` + #echo "taskset c steps:" $cpustep + #let a=RANK_ID*$cpustep + #let b=RANK_ID+1 + #let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + python3 WideDeep/train.py --train_batch_size=${batch_size} --Epochs=${epoch} --graph_mode > $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#conda deactivate +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +Time=`grep Time $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Time" '{print $2}' |tail -n +3 | awk '{sum+=$1} END {print"", sum/NR}'|sed s/[[:space:]]//g` +FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'/'${Time}'}'` + + +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +#train_accuracy=`grep Loss $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| awk -F " " '{print $10}'` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#稳定性精度看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf' + +##获取性能数据 +#吞吐量,不需要修改 +ActualFPS=${FPS} +#单迭代训练时长,不需要修改 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep Time $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F "Time" '{print $2}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +#echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log