From ca04096da557c65aac8eb8a40d38cd8c943a6164 Mon Sep 17 00:00:00 2001
From: yongchao1 <297389370@qq.com>
Date: Wed, 17 Jul 2024 08:51:54 +0000
Subject: [PATCH 1/3] =?UTF-8?q?add=20/ResNet50=5FID0360=5Ffor=5FTensorFlow?=
 =?UTF-8?q?2.X/tensorflow/resnet=5Fctl=5Fimagenet=5Fmain=5Fprofiling.py.?=
 =?UTF-8?q?=20=E6=96=B0=E5=A2=9ETF2.Xprofiling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: yongchao1 <297389370@qq.com>
---
 .../resnet_ctl_imagenet_main_profiling.py     | 337 ++++++++++++++++++
 1 file changed, 337 insertions(+)
 create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py

diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py
new file mode 100644
index 000000000..c44196b8d
--- /dev/null
+++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py
@@ -0,0 +1,337 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Runs a ResNet model on the ImageNet dataset using custom training loops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+from absl import logging
+import tensorflow as tf
+
+from tf2_common.modeling import performance
+from tf2_common.training import controller
+from tf2_common.utils.flags import core as flags_core
+from tf2_common.utils.logs import logger
+from tf2_common.utils.misc import distribution_utils
+from tf2_common.utils.misc import keras_utils
+from tf2_common.utils.misc import model_helpers
+from tf2_common.utils.mlp_log import mlp_log
+import common
+import imagenet_preprocessing
+import resnet_runnable
+import json
+import npu_device
+
+
+flags.DEFINE_boolean(name='use_tf_function', default=True,
+                     help='Wrap the train and test step inside a '
+                     'tf.function.')
+flags.DEFINE_boolean(name='single_l2_loss_op', default=False,
+                     help='Calculate L2_loss on concatenated weights, '
+                     'instead of using Keras per-layer L2 loss.')
+flags.DEFINE_boolean(name='cache_decoded_image', default=False,
+                     help='Whether or not to cache decoded images in the '
+                     'input pipeline. If this flag and `cache` is enabled, '
+                     'then TFExample protos will be parsed and then cached '
+                     'which reduces the load on hosts.')
+flags.DEFINE_boolean(name='enable_device_warmup', default=False,
+                     help='Whether or not to enable device warmup. This '
+                     'includes training on dummy data and enabling graph/XLA '
+                     'compilation before run_start.')
+flags.DEFINE_integer(name='device_warmup_steps', default=1,
+                     help='The number of steps to apply for device warmup.')
+flags.DEFINE_integer(name='num_replicas', default=32,
+                     help='The number of TPU cores to use, '
+                     'for log printout only.')
+
+flags.DEFINE_string(name='precision_mode', default= 'allow_mix_precision',
+                    help='allow_fp32_to_fp16/force_fp16/ ' 
+                    'must_keep_origin_dtype/allow_mix_precision.')
+flags.DEFINE_boolean(name='over_dump', default=False,
+                    help='if or not over detection, default is False')
+flags.DEFINE_boolean(name='data_dump_flag', default=False,
+                    help='data dump flag, default is False')
+flags.DEFINE_string(name='data_dump_step', default="10",
+                    help='data dump step, default is 10')
+flags.DEFINE_boolean(name='profiling', default=False,
+                    help='if or not profiling for performance debug, default is False') 
+flags.DEFINE_string(name='profiling_dump_path', default="/home/data",
+                    help='the path to save profiling data')                                      
+flags.DEFINE_string(name='over_dump_path', default="/home/data",
+                    help='the path to save over dump data')  
+flags.DEFINE_string(name='data_dump_path', default="/home/data",
+                    help='the path to save dump data')     
+flags.DEFINE_boolean(name='autotune', default=False,
+                    help='whether to enable autotune, default is False')                                     
+
+def npu_config():
+  FLAGS = flags.FLAGS
+  npu_config = {}
+
+  if FLAGS.data_dump_flag:
+    npu_device.global_options().dump_config.enable_dump = True
+    npu_device.global_options().dump_config.dump_path = FLAGS.data_dump_path
+    npu_device.global_options().dump_config.dump_step = FLAGS.data_dump_step
+    npu_device.global_options().dump_config.dump_mode = "all"
+
+  if FLAGS.over_dump:
+    npu_device.global_options().dump_config.enable_dump_debug = True
+    npu_device.global_options().dump_config.dump_path = FLAGS.over_dump_path
+    npu_device.global_options().dump_config.dump_debug_mode = "all"
+
+  if FLAGS.profiling:
+    npu_device.global_options().profiling_config.enable_profiling = True
+    profiling_options = '{"output":"./profiling", \
+                        "training_trace":"on", \
+                        "task_trace":"on", \
+                        "fp_point":"", \
+                        "bp_point":""}'
+
+    npu_device.global_options().profiling_config.profiling_options = profiling_options
+  npu_device.global_options().precision_mode=FLAGS.precision_mode
+  npu_device.open().as_default()
+
+
+def build_stats(runnable, time_callback):
+  """Normalizes and returns dictionary of stats.
+
+  Args:
+    runnable: The module containing all the training and evaluation metrics.
+    time_callback: Time tracking callback instance.
+
+  Returns:
+    Dictionary of normalized results.
+  """
+  stats = {}
+
+  if not runnable.flags_obj.skip_eval:
+    if runnable.test_loss:
+      stats['eval_loss'] = runnable.test_loss.result().numpy()
+    if runnable.test_accuracy:
+      stats['eval_acc'] = runnable.test_accuracy.result().numpy()
+
+    if runnable.train_loss:
+      stats['train_loss'] = runnable.train_loss.result().numpy()
+    if runnable.train_accuracy:
+      stats['train_acc'] = runnable.train_accuracy.result().numpy()
+
+  if time_callback:
+    timestamp_log = time_callback.timestamp_log
+    stats['step_timestamp_log'] = timestamp_log
+    stats['train_finish_time'] = time_callback.train_finish_time
+    if time_callback.epoch_runtime_log:
+      stats['avg_exp_per_second'] = time_callback.average_examples_per_second
+
+  return stats
+
+
+def _steps_to_run(steps_in_current_epoch, steps_per_epoch, steps_per_loop):
+  """Calculates steps to run on device."""
+  if steps_per_loop <= 0:
+    raise ValueError('steps_per_loop should be positive integer.')
+  if steps_per_loop == 1:
+    return steps_per_loop
+  return min(steps_per_loop, steps_per_epoch - steps_in_current_epoch)
+
+
+def run(flags_obj):
+  """Run ResNet ImageNet training and eval loop using custom training loops.
+
+  Args:
+    flags_obj: An object containing parsed flag values.
+
+  Raises:
+    ValueError: If fp16 is passed as it is not currently supported.
+
+  Returns:
+    Dictionary of training and eval stats.
+  """
+  mlp_log.mlperf_print('cache_clear', True)
+  mlp_log.mlperf_print('init_start', None)
+  mlp_log.mlperf_print('submission_benchmark', 'resnet')
+  mlp_log.mlperf_print('submission_division', 'closed')
+  mlp_log.mlperf_print('submission_org', 'google')
+  mlp_log.mlperf_print(
+      'submission_platform', 'tpu-v3-{}'.format(flags_obj.num_replicas)
+      if flags_obj.tpu else 'gpu-v100-{}'.format(flags_obj.num_gpus))
+  mlp_log.mlperf_print('submission_status', 'cloud')
+
+  npu_config()
+
+  common.print_flags(flags_obj)
+
+  keras_utils.set_session_config(
+      enable_eager=flags_obj.enable_eager,
+      enable_xla=flags_obj.enable_xla)
+  performance.set_mixed_precision_policy(flags_core.get_tf_dtype(flags_obj))
+
+  if tf.config.list_physical_devices('GPU'):
+    if flags_obj.tf_gpu_thread_mode:
+      datasets_num_private_threads = keras_utils.set_gpu_thread_mode_and_count(
+          per_gpu_thread_count=flags_obj.per_gpu_thread_count,
+          gpu_thread_mode=flags_obj.tf_gpu_thread_mode,
+          num_gpus=flags_obj.num_gpus)
+      if not flags_obj.datasets_num_private_threads:
+        flags_obj.datasets_num_private_threads = datasets_num_private_threads
+    common.set_cudnn_batchnorm_mode()
+
+  # TODO(anj-s): Set data_format without using Keras.
+  data_format = flags_obj.data_format
+  if data_format is None:
+    data_format = ('channels_first'
+                   if tf.test.is_built_with_cuda() else 'channels_last')
+  tf.keras.backend.set_image_data_format(data_format)
+
+  strategy = distribution_utils.get_distribution_strategy(
+      distribution_strategy=flags_obj.distribution_strategy,
+      num_gpus=flags_obj.num_gpus,
+      all_reduce_alg=flags_obj.all_reduce_alg,
+      num_packs=flags_obj.num_packs,
+      tpu_address=flags_obj.tpu,
+      tpu_zone=flags_obj.tpu_zone if flags_obj.tpu else None)
+  mlp_log.mlperf_print('global_batch_size', flags_obj.batch_size)
+  mlp_log.mlperf_print('train_samples',
+                       imagenet_preprocessing.NUM_IMAGES['train'])
+  mlp_log.mlperf_print('eval_samples',
+                       imagenet_preprocessing.NUM_IMAGES['validation'])
+  mlp_log.mlperf_print(
+      'model_bn_span',
+      int(flags_obj.batch_size /
+          (flags_obj.num_replicas if flags_obj.tpu else flags_obj.num_gpus)))
+
+  per_epoch_steps, train_epochs = common.get_num_train_iterations(flags_obj)
+  eval_steps = common.get_num_eval_steps(flags_obj)
+  steps_per_loop = min(flags_obj.steps_per_loop, per_epoch_steps)
+
+  logging.info(
+      'Training %d epochs, each epoch has %d steps, '
+      'total steps: %d; Eval %d steps', train_epochs, per_epoch_steps,
+      train_epochs * per_epoch_steps, eval_steps)
+
+  time_callback = keras_utils.TimeHistory(
+      flags_obj.batch_size,
+      flags_obj.log_steps,
+      logdir=flags_obj.model_dir if flags_obj.enable_tensorboard else None)
+  with distribution_utils.get_strategy_scope(strategy):
+    runnable = resnet_runnable.ResnetRunnable(flags_obj, time_callback)
+
+  eval_interval = (
+      flags_obj.epochs_between_evals *
+      per_epoch_steps if not flags_obj.skip_eval else None)
+  eval_offset = (
+      flags_obj.eval_offset_epochs *
+      per_epoch_steps if not flags_obj.skip_eval else 0)
+  if eval_offset != 0:
+    eval_offset -= eval_interval
+  checkpoint_interval = (
+      per_epoch_steps if flags_obj.enable_checkpoint_and_export else None)
+  summary_interval = per_epoch_steps if flags_obj.enable_tensorboard else None
+
+  checkpoint_manager = tf.train.CheckpointManager(
+      runnable.checkpoint,
+      directory=flags_obj.model_dir,
+      max_to_keep=10,
+      step_counter=runnable.global_step,
+      checkpoint_interval=checkpoint_interval)
+
+  device_warmup_steps = (flags_obj.device_warmup_steps
+                         if flags_obj.enable_device_warmup else 0)
+  if flags_obj.enable_device_warmup:
+    logging.info('Warmup for %d steps.', device_warmup_steps)
+
+  resnet_controller = controller.Controller(
+      strategy,
+      runnable.train,
+      runnable.evaluate,
+      runnable.warmup,
+      global_step=runnable.global_step,
+      steps_per_loop=steps_per_loop,
+      train_steps=per_epoch_steps * train_epochs,
+      device_warmup_steps=device_warmup_steps,
+      checkpoint_manager=checkpoint_manager,
+      summary_interval=summary_interval,
+      eval_steps=eval_steps,
+      eval_interval=eval_interval,
+      eval_offset=eval_offset)
+
+  if flags_obj.enable_device_warmup:
+    resnet_controller.warmup()
+
+  mlp_log.mlperf_print('init_stop', None)
+
+  profile_steps = flags_obj.profile_steps
+  if profile_steps:
+    profile_steps = [int(i) for i in profile_steps.split(',')]
+    if profile_steps[0] < 0:
+      runnable.trace_start(-1)
+
+  time_callback.on_train_begin()
+  mlp_log.mlperf_print('run_start', None)
+  mlp_log.mlperf_print(
+      'block_start',
+      None,
+      metadata={
+          'first_epoch_num':
+              1,
+          'epoch_count':
+              (flags_obj.eval_offset_epochs if flags_obj.eval_offset_epochs != 0
+               else flags_obj.epochs_between_evals)
+      })
+  resnet_controller.train(evaluate=not flags_obj.skip_eval)
+  mlp_log.mlperf_print('run_stop', None, metadata={'status': 'success'})
+  time_callback.on_train_end()
+  mlp_log.mlperf_print('run_final', None)
+
+  stats = build_stats(runnable, time_callback)
+  return stats
+
+
+def define_imagenet_keras_flags():
+  common.define_keras_flags()
+  flags_core.set_defaults()
+  flags.adopt_module_key_flags(common)
+
+
+def main(_):
+  model_helpers.apply_clean(flags.FLAGS)
+  with logger.benchmark_context(flags.FLAGS):
+    stats = run(flags.FLAGS)
+  logging.info('Run stats:\n%s', stats)
+
+
+if __name__ == '__main__':
+  logging.set_verbosity(logging.INFO)
+  common.define_keras_flags()
+  app.run(main)
\ No newline at end of file
-- 
Gitee


From 9e89a71593176866eb31eb1206ae428814dffe38 Mon Sep 17 00:00:00 2001
From: yongchao1 <297389370@qq.com>
Date: Wed, 17 Jul 2024 08:56:17 +0000
Subject: [PATCH 2/3] =?UTF-8?q?add=20/ResNet50=5FID0360=5Ffor=5FTensorFlow?=
 =?UTF-8?q?2.X/test/train=5Fperformance=5F1p=5Fprofiling2.sh.=20=E6=96=B0?=
 =?UTF-8?q?=E5=A2=9Eprofiling=E9=80=82=E9=85=8D=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: yongchao1 <297389370@qq.com>
---
 .../test/train_performance_1p_profiling2.sh   | 201 ++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh

diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh
new file mode 100644
index 000000000..01dab899e
--- /dev/null
+++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+#集合通信参数,不需要修改
+
+export RANK_SIZE=1
+export JOB_ID=10087
+RANK_ID_START=0
+
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="ResNet50_ID0360_for_TensorFlow2.X"
+#训练epoch
+train_epochs=2
+#训练batch_size
+batch_size=256
+#训练step
+train_steps=`expr 1281167 / ${batch_size}`
+#学习率
+learning_rate=0.495
+
+#TF2.X独有，需要模型审视修改
+export NPU_LOOP_SIZE=${train_steps}
+
+#维测参数，precision_mode需要模型审视修改
+precision_mode="allow_mix_precision"
+#维持参数，以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=True
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./train_performance_1P.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --precision_mode         precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+    --over_dump		           if or not over detection, default is False
+    --data_dump_flag		     data dump flag, default is False
+    --data_dump_step		     data dump step, default is 10
+    --profiling		           if or not profiling for performance debug, default is False
+    --data_path		           source data of training
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --precision_mode* ]];then
+        precision_mode=`echo ${para#*=}`
+    elif [[ $para == --over_dump* ]];then
+        over_dump=`echo ${para#*=}`
+        over_dump_path=${cur_path}/output/overflow_dump
+        mkdir -p ${over_dump_path}
+    elif [[ $para == --data_dump_flag* ]];then
+        data_dump_flag=`echo ${para#*=}`
+        data_dump_path=${cur_path}/output/data_dump
+        mkdir -p ${data_dump_path}
+    elif [[ $para == --data_dump_step* ]];then
+        data_dump_step=`echo ${para#*=}`
+    elif [[ $para == --profiling* ]];then
+        profiling=`echo ${para#*=}`
+        profiling_dump_path=${cur_path}/output/profiling
+        mkdir -p ${profiling_dump_path}
+    elif [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --conda_name* ]];then
+        conda_name=`echo ${para#*=}`
+        source set_conda.sh
+        source activate $conda_name
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path/../tensorflow
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+    #设置环境变量，不需要修改
+    echo "Device ID: $ASCEND_DEVICE_ID"
+    export RANK_ID=$RANK_ID
+
+
+
+    #创建DeviceID输出目录，不需要修改
+    if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+        rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    else
+        mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+    fi
+    #绑核，不需要绑核的模型删除，需要绑核的模型根据实际修改
+    cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'`
+    cpustep=`expr $cpucount / 8`
+    echo "taskset c steps:" $cpustep
+    let a=RANK_ID*$cpustep
+    let b=RANK_ID+1
+    let c=b*$cpustep-1
+    
+    #执行训练脚本，以下传参不需要修改，其他需要模型审视修改
+    #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path，--data_dump_flag，--data_dump_step，--data_dump_path，--profiling，--profiling_dump_path
+    nohup taskset -c $a-$c python3 resnet_ctl_imagenet_main_profiling2.py \
+        --data_dir=${data_path} \
+        --num_accumulation_steps=1 \
+        --train_steps=${train_steps} \
+        --train_epochs=${train_epochs} \
+        --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \
+        --distribution_strategy=off \
+        --use_tf_while_loop=true \
+        --use_tf_function=true \
+        --enable_checkpoint_and_export \
+        --steps_per_loop=${train_steps} \
+        --base_learning_rate=${learning_rate} \
+        --momentum=0.901 \
+        --epochs_between_evals=1 \
+        --eval_offset_epochs=2 \
+        --optimizer=SGD \
+        --label_smoothing=0.1 \
+        --single_l2_loss_op \
+        --warmup_epochs=5 \
+        --weight_decay=0.000025 \
+        --lr_schedule=polynomial \
+        --drop_eval_remainder=True \
+        --precision_mode=${precision_mode} \
+        --over_dump=${over_dump} \
+        --over_dump_path=${over_dump_path} \
+        --data_dump_flag=${data_dump_flag} \
+        --data_dump_step=${data_dump_step} \
+        --data_dump_path=${data_dump_path} \
+        --batch_size=${batch_size} \
+        --profiling=${profiling} \
+        --profiling_dump_path=${profiling_dump_path}  > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done 
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep TimeHistory  $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'`
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#输出训练精度,需要模型审视修改
+train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5`
+#打印，不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'profiling2'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'`
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中，需要根据模型审视
+grep train_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v BatchTimestamp|awk '{print $10}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值，不需要修改
+ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+sed -i "/AttributeError/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
+sed -i "/MLL/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log
\ No newline at end of file
-- 
Gitee


From c601c390c025d755bebce8a5baa5f82354a9926f Mon Sep 17 00:00:00 2001
From: yongchao1 <297389370@qq.com>
Date: Wed, 17 Jul 2024 08:58:36 +0000
Subject: [PATCH 3/3] =?UTF-8?q?update=20=20ResNet50=5FID0360=5Ffor=5FTenso?=
 =?UTF-8?q?rFlow2.X/test/train=5Fperformance=5F1p=5Fprofiling2.sh.=20?=
 =?UTF-8?q?=E6=96=B0=E5=A2=9Eprofiling=E9=80=82=E9=85=8D=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: yongchao1 <297389370@qq.com>
---
 .../test/train_performance_1p_profiling2.sh                  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh
index 01dab899e..454adee05 100644
--- a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh
+++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh
@@ -154,6 +154,11 @@ wait
 end_time=$(date +%s)
 e2e_time=$(( $end_time - $start_time ))
 
+a=`find /usr/local/Ascend/ -name acp | awk -F 'acp' '{print $1}'`
+cd $a
+./msprof --parse=on --output=$cur_path/../tensorflow/profiling
+./msprof --export=on --output=$cur_path/../tensorflow/profiling
+
 #结果打印，不需要修改
 echo "------------------ Final result ------------------"
 #输出性能FPS，需要模型审视修改
-- 
Gitee