From ca04096da557c65aac8eb8a40d38cd8c943a6164 Mon Sep 17 00:00:00 2001 From: yongchao1 <297389370@qq.com> Date: Wed, 17 Jul 2024 08:51:54 +0000 Subject: [PATCH 1/3] =?UTF-8?q?add=20/ResNet50=5FID0360=5Ffor=5FTensorFlow?= =?UTF-8?q?2.X/tensorflow/resnet=5Fctl=5Fimagenet=5Fmain=5Fprofiling.py.?= =?UTF-8?q?=20=E6=96=B0=E5=A2=9ETF2.Xprofiling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: yongchao1 <297389370@qq.com> --- .../resnet_ctl_imagenet_main_profiling.py | 337 ++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py new file mode 100644 index 000000000..c44196b8d --- /dev/null +++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/tensorflow/resnet_ctl_imagenet_main_profiling.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags +from absl import logging +import tensorflow as tf + +from tf2_common.modeling import performance +from tf2_common.training import controller +from tf2_common.utils.flags import core as flags_core +from tf2_common.utils.logs import logger +from tf2_common.utils.misc import distribution_utils +from tf2_common.utils.misc import keras_utils +from tf2_common.utils.misc import model_helpers +from tf2_common.utils.mlp_log import mlp_log +import common +import imagenet_preprocessing +import resnet_runnable +import json +import npu_device + + +flags.DEFINE_boolean(name='use_tf_function', default=True, + help='Wrap the train and test step inside a ' + 'tf.function.') +flags.DEFINE_boolean(name='single_l2_loss_op', default=False, + help='Calculate L2_loss on concatenated weights, ' + 'instead of using Keras per-layer L2 loss.') +flags.DEFINE_boolean(name='cache_decoded_image', default=False, + help='Whether or not to cache decoded images in the ' + 'input pipeline. If this flag and `cache` is enabled, ' + 'then TFExample protos will be parsed and then cached ' + 'which reduces the load on hosts.') +flags.DEFINE_boolean(name='enable_device_warmup', default=False, + help='Whether or not to enable device warmup. This ' + 'includes training on dummy data and enabling graph/XLA ' + 'compilation before run_start.') +flags.DEFINE_integer(name='device_warmup_steps', default=1, + help='The number of steps to apply for device warmup.') +flags.DEFINE_integer(name='num_replicas', default=32, + help='The number of TPU cores to use, ' + 'for log printout only.') + +flags.DEFINE_string(name='precision_mode', default= 'allow_mix_precision', + help='allow_fp32_to_fp16/force_fp16/ ' + 'must_keep_origin_dtype/allow_mix_precision.') +flags.DEFINE_boolean(name='over_dump', default=False, + help='if or not over detection, default is False') +flags.DEFINE_boolean(name='data_dump_flag', default=False, + help='data dump flag, default is False') +flags.DEFINE_string(name='data_dump_step', default="10", + help='data dump step, default is 10') +flags.DEFINE_boolean(name='profiling', default=False, + help='if or not profiling for performance debug, default is False') +flags.DEFINE_string(name='profiling_dump_path', default="/home/data", + help='the path to save profiling data') +flags.DEFINE_string(name='over_dump_path', default="/home/data", + help='the path to save over dump data') +flags.DEFINE_string(name='data_dump_path', default="/home/data", + help='the path to save dump data') +flags.DEFINE_boolean(name='autotune', default=False, + help='whether to enable autotune, default is False') + +def npu_config(): + FLAGS = flags.FLAGS + npu_config = {} + + if FLAGS.data_dump_flag: + npu_device.global_options().dump_config.enable_dump = True + npu_device.global_options().dump_config.dump_path = FLAGS.data_dump_path + npu_device.global_options().dump_config.dump_step = FLAGS.data_dump_step + npu_device.global_options().dump_config.dump_mode = "all" + + if FLAGS.over_dump: + npu_device.global_options().dump_config.enable_dump_debug = True + npu_device.global_options().dump_config.dump_path = FLAGS.over_dump_path + npu_device.global_options().dump_config.dump_debug_mode = "all" + + if FLAGS.profiling: + npu_device.global_options().profiling_config.enable_profiling = True + profiling_options = '{"output":"./profiling", \ + "training_trace":"on", \ + "task_trace":"on", \ + "fp_point":"", \ + "bp_point":""}' + + npu_device.global_options().profiling_config.profiling_options = profiling_options + npu_device.global_options().precision_mode=FLAGS.precision_mode + npu_device.open().as_default() + + +def build_stats(runnable, time_callback): + """Normalizes and returns dictionary of stats. + + Args: + runnable: The module containing all the training and evaluation metrics. + time_callback: Time tracking callback instance. + + Returns: + Dictionary of normalized results. + """ + stats = {} + + if not runnable.flags_obj.skip_eval: + if runnable.test_loss: + stats['eval_loss'] = runnable.test_loss.result().numpy() + if runnable.test_accuracy: + stats['eval_acc'] = runnable.test_accuracy.result().numpy() + + if runnable.train_loss: + stats['train_loss'] = runnable.train_loss.result().numpy() + if runnable.train_accuracy: + stats['train_acc'] = runnable.train_accuracy.result().numpy() + + if time_callback: + timestamp_log = time_callback.timestamp_log + stats['step_timestamp_log'] = timestamp_log + stats['train_finish_time'] = time_callback.train_finish_time + if time_callback.epoch_runtime_log: + stats['avg_exp_per_second'] = time_callback.average_examples_per_second + + return stats + + +def _steps_to_run(steps_in_current_epoch, steps_per_epoch, steps_per_loop): + """Calculates steps to run on device.""" + if steps_per_loop <= 0: + raise ValueError('steps_per_loop should be positive integer.') + if steps_per_loop == 1: + return steps_per_loop + return min(steps_per_loop, steps_per_epoch - steps_in_current_epoch) + + +def run(flags_obj): + """Run ResNet ImageNet training and eval loop using custom training loops. + + Args: + flags_obj: An object containing parsed flag values. + + Raises: + ValueError: If fp16 is passed as it is not currently supported. + + Returns: + Dictionary of training and eval stats. + """ + mlp_log.mlperf_print('cache_clear', True) + mlp_log.mlperf_print('init_start', None) + mlp_log.mlperf_print('submission_benchmark', 'resnet') + mlp_log.mlperf_print('submission_division', 'closed') + mlp_log.mlperf_print('submission_org', 'google') + mlp_log.mlperf_print( + 'submission_platform', 'tpu-v3-{}'.format(flags_obj.num_replicas) + if flags_obj.tpu else 'gpu-v100-{}'.format(flags_obj.num_gpus)) + mlp_log.mlperf_print('submission_status', 'cloud') + + npu_config() + + common.print_flags(flags_obj) + + keras_utils.set_session_config( + enable_eager=flags_obj.enable_eager, + enable_xla=flags_obj.enable_xla) + performance.set_mixed_precision_policy(flags_core.get_tf_dtype(flags_obj)) + + if tf.config.list_physical_devices('GPU'): + if flags_obj.tf_gpu_thread_mode: + datasets_num_private_threads = keras_utils.set_gpu_thread_mode_and_count( + per_gpu_thread_count=flags_obj.per_gpu_thread_count, + gpu_thread_mode=flags_obj.tf_gpu_thread_mode, + num_gpus=flags_obj.num_gpus) + if not flags_obj.datasets_num_private_threads: + flags_obj.datasets_num_private_threads = datasets_num_private_threads + common.set_cudnn_batchnorm_mode() + + # TODO(anj-s): Set data_format without using Keras. + data_format = flags_obj.data_format + if data_format is None: + data_format = ('channels_first' + if tf.test.is_built_with_cuda() else 'channels_last') + tf.keras.backend.set_image_data_format(data_format) + + strategy = distribution_utils.get_distribution_strategy( + distribution_strategy=flags_obj.distribution_strategy, + num_gpus=flags_obj.num_gpus, + all_reduce_alg=flags_obj.all_reduce_alg, + num_packs=flags_obj.num_packs, + tpu_address=flags_obj.tpu, + tpu_zone=flags_obj.tpu_zone if flags_obj.tpu else None) + mlp_log.mlperf_print('global_batch_size', flags_obj.batch_size) + mlp_log.mlperf_print('train_samples', + imagenet_preprocessing.NUM_IMAGES['train']) + mlp_log.mlperf_print('eval_samples', + imagenet_preprocessing.NUM_IMAGES['validation']) + mlp_log.mlperf_print( + 'model_bn_span', + int(flags_obj.batch_size / + (flags_obj.num_replicas if flags_obj.tpu else flags_obj.num_gpus))) + + per_epoch_steps, train_epochs = common.get_num_train_iterations(flags_obj) + eval_steps = common.get_num_eval_steps(flags_obj) + steps_per_loop = min(flags_obj.steps_per_loop, per_epoch_steps) + + logging.info( + 'Training %d epochs, each epoch has %d steps, ' + 'total steps: %d; Eval %d steps', train_epochs, per_epoch_steps, + train_epochs * per_epoch_steps, eval_steps) + + time_callback = keras_utils.TimeHistory( + flags_obj.batch_size, + flags_obj.log_steps, + logdir=flags_obj.model_dir if flags_obj.enable_tensorboard else None) + with distribution_utils.get_strategy_scope(strategy): + runnable = resnet_runnable.ResnetRunnable(flags_obj, time_callback) + + eval_interval = ( + flags_obj.epochs_between_evals * + per_epoch_steps if not flags_obj.skip_eval else None) + eval_offset = ( + flags_obj.eval_offset_epochs * + per_epoch_steps if not flags_obj.skip_eval else 0) + if eval_offset != 0: + eval_offset -= eval_interval + checkpoint_interval = ( + per_epoch_steps if flags_obj.enable_checkpoint_and_export else None) + summary_interval = per_epoch_steps if flags_obj.enable_tensorboard else None + + checkpoint_manager = tf.train.CheckpointManager( + runnable.checkpoint, + directory=flags_obj.model_dir, + max_to_keep=10, + step_counter=runnable.global_step, + checkpoint_interval=checkpoint_interval) + + device_warmup_steps = (flags_obj.device_warmup_steps + if flags_obj.enable_device_warmup else 0) + if flags_obj.enable_device_warmup: + logging.info('Warmup for %d steps.', device_warmup_steps) + + resnet_controller = controller.Controller( + strategy, + runnable.train, + runnable.evaluate, + runnable.warmup, + global_step=runnable.global_step, + steps_per_loop=steps_per_loop, + train_steps=per_epoch_steps * train_epochs, + device_warmup_steps=device_warmup_steps, + checkpoint_manager=checkpoint_manager, + summary_interval=summary_interval, + eval_steps=eval_steps, + eval_interval=eval_interval, + eval_offset=eval_offset) + + if flags_obj.enable_device_warmup: + resnet_controller.warmup() + + mlp_log.mlperf_print('init_stop', None) + + profile_steps = flags_obj.profile_steps + if profile_steps: + profile_steps = [int(i) for i in profile_steps.split(',')] + if profile_steps[0] < 0: + runnable.trace_start(-1) + + time_callback.on_train_begin() + mlp_log.mlperf_print('run_start', None) + mlp_log.mlperf_print( + 'block_start', + None, + metadata={ + 'first_epoch_num': + 1, + 'epoch_count': + (flags_obj.eval_offset_epochs if flags_obj.eval_offset_epochs != 0 + else flags_obj.epochs_between_evals) + }) + resnet_controller.train(evaluate=not flags_obj.skip_eval) + mlp_log.mlperf_print('run_stop', None, metadata={'status': 'success'}) + time_callback.on_train_end() + mlp_log.mlperf_print('run_final', None) + + stats = build_stats(runnable, time_callback) + return stats + + +def define_imagenet_keras_flags(): + common.define_keras_flags() + flags_core.set_defaults() + flags.adopt_module_key_flags(common) + + +def main(_): + model_helpers.apply_clean(flags.FLAGS) + with logger.benchmark_context(flags.FLAGS): + stats = run(flags.FLAGS) + logging.info('Run stats:\n%s', stats) + + +if __name__ == '__main__': + logging.set_verbosity(logging.INFO) + common.define_keras_flags() + app.run(main) \ No newline at end of file -- Gitee From 9e89a71593176866eb31eb1206ae428814dffe38 Mon Sep 17 00:00:00 2001 From: yongchao1 <297389370@qq.com> Date: Wed, 17 Jul 2024 08:56:17 +0000 Subject: [PATCH 2/3] =?UTF-8?q?add=20/ResNet50=5FID0360=5Ffor=5FTensorFlow?= =?UTF-8?q?2.X/test/train=5Fperformance=5F1p=5Fprofiling2.sh.=20=E6=96=B0?= =?UTF-8?q?=E5=A2=9Eprofiling=E9=80=82=E9=85=8D=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: yongchao1 <297389370@qq.com> --- .../test/train_performance_1p_profiling2.sh | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh new file mode 100644 index 000000000..01dab899e --- /dev/null +++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh @@ -0,0 +1,201 @@ +#!/bin/bash + +#当前路径,不需要修改 +cur_path=`pwd` + +#集合通信参数,不需要修改 + +export RANK_SIZE=1 +export JOB_ID=10087 +RANK_ID_START=0 + + +# 数据集路径,保持为空,不需要修改 +data_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="ResNet50_ID0360_for_TensorFlow2.X" +#训练epoch +train_epochs=2 +#训练batch_size +batch_size=256 +#训练step +train_steps=`expr 1281167 / ${batch_size}` +#学习率 +learning_rate=0.495 + +#TF2.X独有,需要模型审视修改 +export NPU_LOOP_SIZE=${train_steps} + +#维测参数,precision_mode需要模型审视修改 +precision_mode="allow_mix_precision" +#维持参数,以下不需要修改 +over_dump=False +data_dump_flag=False +data_dump_step="10" +profiling=True + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./train_performance_1P.sh " + echo " " + echo "parameter explain: + --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision) + --over_dump if or not over detection, default is False + --data_dump_flag data dump flag, default is False + --data_dump_step data dump step, default is 10 + --profiling if or not profiling for performance debug, default is False + --data_path source data of training + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --precision_mode* ]];then + precision_mode=`echo ${para#*=}` + elif [[ $para == --over_dump* ]];then + over_dump=`echo ${para#*=}` + over_dump_path=${cur_path}/output/overflow_dump + mkdir -p ${over_dump_path} + elif [[ $para == --data_dump_flag* ]];then + data_dump_flag=`echo ${para#*=}` + data_dump_path=${cur_path}/output/data_dump + mkdir -p ${data_dump_path} + elif [[ $para == --data_dump_step* ]];then + data_dump_step=`echo ${para#*=}` + elif [[ $para == --profiling* ]];then + profiling=`echo ${para#*=}` + profiling_dump_path=${cur_path}/output/profiling + mkdir -p ${profiling_dump_path} + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --conda_name* ]];then + conda_name=`echo ${para#*=}` + source set_conda.sh + source activate $conda_name + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path/../tensorflow +for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); +do + #设置环境变量,不需要修改 + echo "Device ID: $ASCEND_DEVICE_ID" + export RANK_ID=$RANK_ID + + + + #创建DeviceID输出目录,不需要修改 + if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID} + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + else + mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt + fi + #绑核,不需要绑核的模型删除,需要绑核的模型根据实际修改 + cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'` + cpustep=`expr $cpucount / 8` + echo "taskset c steps:" $cpustep + let a=RANK_ID*$cpustep + let b=RANK_ID+1 + let c=b*$cpustep-1 + + #执行训练脚本,以下传参不需要修改,其他需要模型审视修改 + #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path + nohup taskset -c $a-$c python3 resnet_ctl_imagenet_main_profiling2.py \ + --data_dir=${data_path} \ + --num_accumulation_steps=1 \ + --train_steps=${train_steps} \ + --train_epochs=${train_epochs} \ + --model_dir=${cur_path}/output/$ASCEND_DEVICE_ID/ckpt \ + --distribution_strategy=off \ + --use_tf_while_loop=true \ + --use_tf_function=true \ + --enable_checkpoint_and_export \ + --steps_per_loop=${train_steps} \ + --base_learning_rate=${learning_rate} \ + --momentum=0.901 \ + --epochs_between_evals=1 \ + --eval_offset_epochs=2 \ + --optimizer=SGD \ + --label_smoothing=0.1 \ + --single_l2_loss_op \ + --warmup_epochs=5 \ + --weight_decay=0.000025 \ + --lr_schedule=polynomial \ + --drop_eval_remainder=True \ + --precision_mode=${precision_mode} \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ + --data_dump_flag=${data_dump_flag} \ + --data_dump_step=${data_dump_step} \ + --data_dump_path=${data_dump_path} \ + --batch_size=${batch_size} \ + --profiling=${profiling} \ + --profiling_dump_path=${profiling_dump_path} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & +done +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep TimeHistory $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $6}'` +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#输出训练精度,需要模型审视修改 +train_accuracy=`grep eval_accuracy $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep -v mlp_log|awk 'END {print $5}'| sed 's/,//g' |cut -c 1-5` +#打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'profiling2'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} +#单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*1000/'${FPS}'}'` + +#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视 +grep train_loss $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|grep -v BatchTimestamp|awk '{print $10}'|sed 's/,//g'|sed '/^$/d' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt + +#最后一个迭代loss值,不需要修改 +ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt` + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log + +sed -i "/AttributeError/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log +sed -i "/MLL/d" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log \ No newline at end of file -- Gitee From c601c390c025d755bebce8a5baa5f82354a9926f Mon Sep 17 00:00:00 2001 From: yongchao1 <297389370@qq.com> Date: Wed, 17 Jul 2024 08:58:36 +0000 Subject: [PATCH 3/3] =?UTF-8?q?update=20=20ResNet50=5FID0360=5Ffor=5FTenso?= =?UTF-8?q?rFlow2.X/test/train=5Fperformance=5F1p=5Fprofiling2.sh.=20?= =?UTF-8?q?=E6=96=B0=E5=A2=9Eprofiling=E9=80=82=E9=85=8D=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: yongchao1 <297389370@qq.com> --- .../test/train_performance_1p_profiling2.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh index 01dab899e..454adee05 100644 --- a/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh +++ b/TensorFlow2/built-in/cv/image_classification/ResNet50_ID0360_for_TensorFlow2.X/test/train_performance_1p_profiling2.sh @@ -154,6 +154,11 @@ wait end_time=$(date +%s) e2e_time=$(( $end_time - $start_time )) +a=`find /usr/local/Ascend/ -name acp | awk -F 'acp' '{print $1}'` +cd $a +./msprof --parse=on --output=$cur_path/../tensorflow/profiling +./msprof --export=on --output=$cur_path/../tensorflow/profiling + #结果打印,不需要修改 echo "------------------ Final result ------------------" #输出性能FPS,需要模型审视修改 -- Gitee