From a954b3090854abf6fc9548641f1698e9cd982fda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=BD=A6=E8=BD=B1=E8=BE=98?=
 <5418572+hwwheel123@user.noreply.gitee.com>
Date: Thu, 27 Jun 2024 08:05:35 +0000
Subject: [PATCH] Add train_performance_1p_asan_memleak.sh Add
 train_performance_1p_asan_memleak.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 车轱辘 <5418572+hwwheel123@user.noreply.gitee.com>
---
 .../test/train_performance_1p_asan_memleak.sh | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 TensorFlow/built-in/recommendation/DIN_ID0190_for_TensorFlow/test/train_performance_1p_asan_memleak.sh

diff --git a/TensorFlow/built-in/recommendation/DIN_ID0190_for_TensorFlow/test/train_performance_1p_asan_memleak.sh b/TensorFlow/built-in/recommendation/DIN_ID0190_for_TensorFlow/test/train_performance_1p_asan_memleak.sh
new file mode 100644
index 000000000..6efd73ed3
--- /dev/null
+++ b/TensorFlow/built-in/recommendation/DIN_ID0190_for_TensorFlow/test/train_performance_1p_asan_memleak.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=10087
+
+#export ASCEND_DEVICE_ID=
+#export OP_NO_REUSE_MEM=StridedSliceD
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#精度参数
+#precision_mode="must_keep_origin_dtype"
+
+#网络名称,同目录名称,需要模型审视修改
+Network="DIN_ID0190_for_TensorFlow"
+
+#训练batch_size,,需要模型审视修改
+batch_size=1024
+
+# 指定训练所使用的npu device卡id
+#device_id=0
+# 参数校验，data_path为必传参数，其他参数的增删由模型自身决定；此处新增参数需在上面有定义并赋值
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --batch_size* ]];then
+        batch_size=`echo ${para#*=}`
+    elif [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --device_id* ]];then
+        device_id=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+#if [[ $data_path == "" ]];then
+#    echo "[Error] para \"data_path\" must be confing"
+#    exit 1
+#fi
+# 校验是否指定了device_id,分动态分配device_id与手动指定device_id,此处不需要修改
+#if [ $ASCEND_DEVICE_ID ];then
+#    device_id=${ASCEND_DEVICE_ID}
+#    echo "device id is ${ASCEND_DEVICE_ID}"
+#elif [ ${device_id} ];then
+#    export ASCEND_DEVICE_ID=${device_id}
+#    echo "device id is ${ASCEND_DEVICE_ID}"
+#else
+#    "[Error] device id must be config"
+#    exit 1
+#fi
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+#创建DeviceID输出目录，不需要修改
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output/$ASCEND_DEVICE_ID
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+    mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source ${test_path_dir}/env_npu.sh
+fi
+#数据集处理
+#ln -nsf ${data_path} $cur_path/data
+
+#执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+cd $cur_path/examples
+sed -i  "s|./data|$data_path|g" din_demo.py
+sed -i  "s|epochs=5|epochs=2|g" din_demo.py
+sed -i  "s|rank_id =|#rank_id =|g" din_demo.py
+
+
+RANK_ID_START=0
+RANK_SIZE=1
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+   #echo "Device ID: $RANK_ID"
+   #export RANK_ID=$RANK_ID
+   #export ASCEND_DEVICE_ID=$RANK_ID
+   #ASCEND_DEVICE_ID=$RANK_ID
+  if [   -d $cur_path/test/output/${ASCEND_DEVICE_ID} ];then
+     rm -rf $cur_path/test/output/${ASCEND_DEVICE_ID}
+     mkdir -p $cur_path/test/output/${ASCEND_DEVICE_ID}
+  else
+     mkdir -p $cur_path/test/output/${ASCEND_DEVICE_ID}
+  fi
+
+    corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+    let a=RANK_ID*${corenum}/8
+    let b=RANK_ID+1
+    let c=b*${corenum}/8-1
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+
+  PYTHONMALLOC=malloc LD_PRELOAD=/usr/lib64/libasan.so.4  ASAN_OPTIONS=handle_abort=0:halt_on_error=0:verbosity=0:detect_leaks=1:detect_stack_use_after_return=0:alloc_dealloc_mismatch=0:log_path=/root/asan_cgl/DIN_ID0190_for_TensorFlow_asan.log:handle_segv=0:new_delete_type_mismatch=0 LSAN_OPTIONS=suppressions=/root/suppr.txt python3.7  din_demo.py > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done
+
+wait
+sed -i 's///g' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+sed -i 's//\n/g' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+##################获取训练数据################
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+train_time=`grep -rn "val_binary_crossentropy" ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log |awk -F ' ' '{print $5}'|awk -F 'ms' '{print $1}'|awk '{sum+=$1} END {print"",sum/NR}' |awk '$1=$1'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${train_time}'}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+#吞吐量
+ActualFPS=${FPS}
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'asan_memleak_perf'
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要模型审视修改
+grep "loss" ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk -F 'loss: ' '{print $2}'|awk -F ' ' '{print $1}' > ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' ${test_path_dir}/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+#echo "CompileTime = ${CompileTime}" >> ${test_path_dir}/output/${ASCEND_DEVICE_ID}/${CaseName}.log
-- 
Gitee