From 0858e114e0cea382f4dde68c5a8fa89742c6759b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Spencer=E5=85=94=E5=AD=90?= <hu_yunhan@163.com>
Date: Thu, 4 May 2023 09:03:46 +0000
Subject: [PATCH 1/2] =?UTF-8?q?=E6=96=B0=E5=A2=9Etrain=5Fperformance=5Fbs4?=
 =?UTF-8?q?8=5F1p.sh.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Spencer兔子 <hu_yunhan@163.com>
---
 .../test/train_performance_bs48_1p.sh         | 179 ++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs48_1p.sh
diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs48_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs48_1p.sh
new file mode 100644
index 000000000..9d22e92f3
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs48_1p.sh
@@ -0,0 +1,179 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+export RANK_SIZE=1
+export JOB_ID=99990001
+export RANK_ID=1
+export HCCL_CONNECT_TIMEOUT=600
+RANK_ID_START=0
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="ResNet50_ID0058_for_TensorFlow"
+#训练epoch
+train_epochs=1
+#训练batch_size
+batch_size=48
+#训练step
+train_steps=2000
+#学习率
+learning_rate=
+
+#维测参数，precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_full_1p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		         if or not over detection, default is False
+    --data_dump_flag	     data dump flag, default is False
+    --data_dump_step		 data dump step, default is 10
+    --profiling		         if or not profiling for performance debug, default is False
+    --autotune               whether to enable autotune, default is False
+    --data_path		         source data of training
+    -h/--help		         show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --autotune* ]];then
+        autotune=`echo ${para#*=}`
+        mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+        mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+        autotune_dump_path=${cur_path}/output/autotune_dump
+        mkdir -p ${autotune_dump_path}/GA
+        mkdir -p ${autotune_dump_path}/rl
+        cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+        cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#修改参数
+sed -i "50s|PATH_TO_BE_CONFIGURED|${data_path}|g"  $cur_path/../src/configs/res50_bs48_1p.py
+sed -i "107s|PATH_TO_BE_CONFIGURED|${cur_path}/output/0/d\_solution/ckpt0|g"  $cur_path/../src/configs/res50_bs48_1p.py
+
+cp data_loader.py $cur_path/../src/data_loader/resnet50/
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+cd $cur_path/../
+#进入训练脚本目录，需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+    export DEVICE_INDEX=$RANK_ID
+    
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path，--autotune
+    nohup python3.7 ${cur_path}/../src/mains/res50.py --config_file=res50_bs48_1p \
+    --max_train_steps=${train_steps} \
+    --iterations_per_loop=100 \
+    --debug=True \
+    --eval=False \
+    --model_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#参数改回
+sed -i "50s|${data_path}|PATH_TO_BE_CONFIGURED|g"  $cur_path/../src/configs/res50_bs48_1p.py
+sed -i "107s|${cur_path}/output/0/d\_solution/ckpt0|PATH_TO_BE_CONFIGURED|g"  $cur_path/../src/configs/res50_bs48_1p.py
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`cat ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | grep "FPS: " | awk -F "FPS: " '{print $2}' | awk -F "  loss:" '{print $1}' | tail -n +2 | awk '{sum+=$1} END {print sum/NR}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+#train_accuracy=`grep -A 1 top1 $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $3}'`
+#打印，不需要修改
+#echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#稳定性精度看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据
+#吞吐量，不需要修改
+ActualFPS=${FPS}
+#单迭代训练时长，不需要修改
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep "FPS: " $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log | awk -F "loss: " '{print $2}' | awk -F "total" '{print $1}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
-- 
Gitee


From 47fdb98e6c64982311d90e65e1292b914fffb7c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Spencer=E5=85=94=E5=AD=90?= <hu_yunhan@163.com>
Date: Thu, 4 May 2023 09:05:09 +0000
Subject: [PATCH 2/2] =?UTF-8?q?=E6=96=B0=E5=A2=9Eres50=5Fbs48=5F1p.py.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Spencer兔子 <hu_yunhan@163.com>
---
 .../src/configs/res50_bs48_1p.py              | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/configs/res50_bs48_1p.py

diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/configs/res50_bs48_1p.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/configs/res50_bs48_1p.py
new file mode 100644
index 000000000..220843ebc
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/configs/res50_bs48_1p.py
@@ -0,0 +1,141 @@
+# coding=utf-8
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import tensorflow as tf
+
+import os
+log_dir = './results/'+os.path.basename(__file__).split('.')[0]
+
+#256
+config = {
+    # ============ for testing =====================
+    'accelerator': '1980',    # 'gpu', '1980' 
+    'shuffle_enable': 'yes',
+    'shuffle_buffer_size': 10000,
+    'rank_size': 1, 
+    'shard': False,
+
+    # ======= basic config ======= # 
+    'mode':'train',                            # "train","evaluate","train_and_evaluate"
+    'epochs_between_evals': 4,                              #used if mode is "train_and_evaluate"
+    'stop_threshold': 80.0,                                 #used if mode is "train_and_evaluate"
+    'data_dir':'/opt/npu/resnet_data_new',
+    'data_url': 'file://PATH_TO_BE_CONFIGURED',
+    'data_type': 'TFRECORD',
+    'model_name': 'resnet50', 
+    'num_classes': 1001,
+    'num_epochs': None,
+    'height':224,
+    'width':224, 
+    'dtype': tf.float32,
+    'data_format': 'channels_last',
+    'use_nesterov': True,
+    'eval_interval': 1,
+    'loss_scale': 1024,                                #could be float or string. If float, static loss scaling is applied. 
+                                                            #If string, the corresponding automatic loss scaling algorithm is used.
+                                                            #Must be one of 'Backoff' of 'LogMax' (case insensitive).
+    'use_lars': False,
+    'label_smoothing':0.1,                                  #If greater than 0 then smooth the labels.
+    'weight_decay': 0.0001,
+    'batch_size':48,                                        #minibatch size per node, total batchsize = batch_size*hvd.size()*itersize
+                               
+    'momentum': [0.9],
+
+    #=======  data processing config =======
+    'min_object_covered': 0.1,                              #used for random crop
+    'aspect_ratio_range':[3. / 4., 4. / 3.],
+    'area_range':[0.16, 1.0],
+    'max_attempts': 100,
+
+    #=======  data augment config ======= 
+    'increased_aug': False,
+    'brightness':0.3,
+    'saturation': 0.6,
+    'contrast': 0.6,
+    'hue': 0.13,
+    'num_preproc_threads': 22,
+
+    #=======  initialization config ======= 
+    'conv_init': tf.variance_scaling_initializer(),
+    'bn_init_mode': 'adv_bn_init',                         # "conv_bn_init" or "adv_bn_init",initializer the gamma in bn in different modes
+                                                            # "adv_bn_init" means initialize gamma to 0 in each residual block's last bn, and initialize other gamma to 1
+                                                            # "conv_bn_init" means initialize all the gamma to a constant, defined by "bn_gamma_initial_value"
+    'bn_gamma_initial_value': 1.0,
+
+    #======== model architecture ==========
+    'resnet_version': 'v1.5',  
+    'arch_type': 'original',                                   # ------ input -------
+                                                            # C1,C2,C3: input block, stride in different layer
+                                                            # ------ shortcut ------
+                                                            # D1: average_pooling + conv1*1 in shortcut  in downsample block
+                                                            # D2: conv3*3,stride=2 in shortcut in downsample block
+                                                            # D3: conv1*1 +average_pooling in shortcut  in downsample block
+                                                            # ------ mainstream ----
+                                                            # E1: average_pooling + conv3*3 in mainstream in downsample block  
+                                                            # E2: conv3*3 + average_pooling in mainstream in downsample block 
+
+    #=======  logger config ======= 
+    'display_every': 1,
+    'log_name': 'resnet50.log',
+    'log_dir': 'PATH_TO_BE_CONFIGURED',
+
+    #=======  Learning Rate Config ======= 
+    'lr_warmup_mode': 'linear',                             # "linear" or "cosine"
+    'warmup_lr': 0.0,
+    'warmup_epochs': 10,
+    'learning_rate_maximum': 0.1,                    
+
+    'lr_decay_mode': 'cosine',                              # "steps", "poly", "poly_cycle", "cosine", "linear_cosine", "linear_twice", "constant" for 1980 only
+    'learning_rate_end': 0.00001,
+
+    'decay_steps': '10,20,30',                              #for "steps"
+    'lr_decay_steps': '6.4,0.64,0.064',
+
+    'ploy_power': 2.0,                                      #for "poly" and "poly_cycle"
+
+    'cdr_first_decay_ratio': 0.33,                          #for "cosine_decay_restarts"
+    'cdr_t_mul':2.0,
+    'cdr_m_mul':0.1,
+
+    'lc_periods':0.47,                                      #for "linear_consine"
+    'lc_beta':0.00001, 
+    
+    'lr_mid': 0.5,                                          #for "linear_twice"
+    'epoch_mid': 80,
+    
+    'bn_lr_scale':1.0,
+
+  }
+
+def res50_config():
+    config['global_batch_size'] = config['batch_size'] * config['rank_size']
+    config['do_checkpoint'] = True
+
+    return config
\ No newline at end of file
-- 
Gitee