diff --git a/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/FasterRcnn/mask_rcnn_model.py b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/FasterRcnn/mask_rcnn_model.py index ce49fbf91d9dd51991be0ec7c0389da6d4e65129..b1337931415fc238abbbac3f06a2300c732499d1 100644 --- a/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/FasterRcnn/mask_rcnn_model.py +++ b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/FasterRcnn/mask_rcnn_model.py @@ -60,6 +60,7 @@ import roi_ops import spatial_transform_ops import training_ops import sys +import time #sys.path.append('tpu/models/official/mnasnet') #import mnasnet_models from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer @@ -768,6 +769,9 @@ def _model_fn(features, labels, mode, params, variable_filter_fn=None): ) class LoadPretrainBackboneHook(tf.train.SessionRunHook): + def __init__(self): + self.t0 = None + def begin(self): tf.logging.info("################ LoadPretrainBackboneHook.begin") self.saver_backbone = tf.train.Saver(tf.global_variables(scope=params['backbone'])) @@ -790,6 +794,13 @@ def _model_fn(features, labels, mode, params, variable_filter_fn=None): self.saver_backbone.restore(session, latest_ckpt) tf.logging.info(f"################ ... DONE loading backbone checkpoint.") + def before_run(self, run_context): + self.t0 = time.time() + + def after_run(self, run_context, run_values): + batch_time = time.time() - self.t0 + tf.logging.info(f"################ ... batch_time = {batch_time}") + return tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook, LoadPretrainBackboneHook()]) diff --git a/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh index 1fdbeaf902eed4263c6754fd4d41cdfa322f998a..5d5a69fd05812ba2f344db5a21f169aa25318a2b 100644 --- a/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh +++ b/TensorFlow/built-in/cv/detection/FasterRcnn_resnet50_ID0010_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -152,6 +152,9 @@ for pid in "${pids[@]}"; do FPS=`awk 'BEGIN{printf "%.2f\n",'${batch_size}'*'${FPS}'}'` echo "Final Performance images/sec : $FPS" + #输出CompileTime + CompileTime=`grep 'batch_time' $log_file|head -n 2|awk '{sum+=$5} END {print sum}'` + ############################## 精度结果处理 ############################## #精度计算,需要根据网络修改 train_accuracy=`grep "Average Precision" $log_file | awk 'NR==1 {print $NF}'` @@ -181,6 +184,7 @@ for pid in "${pids[@]}"; do echo "TrainingTime = ${TrainingTime}" >> $output_dir/$RANK_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $output_dir/$RANK_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $output_dir/$RANK_ID/${CaseName}.log + echo "CompileTime = ${CompileTime}" >> $output_dir/$RANK_ID/${CaseName}.log if [ $train_accuracy ]; then echo "TrainAccuracy = ${train_accuracy}" >> $output_dir/$RANK_ID/${CaseName}.log fi diff --git a/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/src/model_fn.py b/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/src/model_fn.py index 68ed40e0743b13a60664fa58b249984be2c70ef2..a5b6a4beb0570aec7965f06cb5f683dbbda8cf00 100644 --- a/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/src/model_fn.py +++ b/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/src/model_fn.py @@ -37,10 +37,26 @@ from __future__ import print_function from npu_bridge.npu_init import * import tensorflow as tf +import time from src.network import unet_3d_network +class BenchmarkLoggingHook(tf.train.SessionRunHook): + + def __init__(self): + self.current_step = 0 + self.t0 = None + + def before_run(self, run_context): + self.t0 = time.time() + + def after_run(self, run_context, run_values): + batch_time = time.time() - self.t0 + if self.current_step <= 5: + tf.logging.info(f"################ ... current_step = {self.current_step}, batch_time = {batch_time}") + self.current_step += 1 + def model_fn(features, labels, mode, params): """ Custom estimator setup as per docs and guide: @@ -159,4 +175,4 @@ def model_fn(features, labels, mode, params): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) - return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) + return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[BenchmarkLoggingHook()]) diff --git a/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/test/train_RT2_performance_1p.sh index 458715061d0d6280d2e1388d33e26df521b0abf4..e41585c65fd4a66fc761865eb8704a0b1a9c1cbf 100644 --- a/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/test/train_RT2_performance_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/3D_UNet-Medical_ID1462_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -135,6 +135,9 @@ steps_sec=`grep "global_step/sec" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASC FPS=`echo "${steps_sec} ${batch_size}" | awk '{printf("%.4f\n",$1*$2)}'` echo "Final Performance images/sec : $FPS" +#输出CompileTime +CompileTime=`grep 'batch_time' $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|head -n 2|awk '{sum+=$8} END {print sum}'` + #输出训练精度,需要模型审视修改 train_accuracy=`grep "iou =" $cur_path/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk 'END {print $12}'|cut -d , -f 1` #打印,不需要修改 @@ -170,3 +173,4 @@ echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${Ca echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_RT2_performance_1p.sh b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_RT2_performance_1p.sh index 01a0086fa5e8151aa66ead2b23dd59e63abe7c8b..29983a719a89eea1db149e17b2b8b7fb9a364dcf 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_RT2_performance_1p.sh +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/test/train_RT2_performance_1p.sh @@ -136,6 +136,9 @@ FPS=`grep train_throughput $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_D #打印,不需要修改 echo "Final Performance images/sec : $FPS" +#输出CompileTime +CompileTime=`grep 'step/s' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log| head -n 2| awk '{sum+=$9} END {print sum}'` + #输出训练精度,需要模型审视修改 train_accuracy=null #打印,不需要修改 @@ -172,3 +175,4 @@ echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${Ca echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CompileTime = ${CompileTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/benchmark_hooks.py b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/benchmark_hooks.py index a2e58217fe21ad6ea956156137a848a1a7649d76..7bea84a08a09f8a00d0a534e50456d008e8b8e33 100644 --- a/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/benchmark_hooks.py +++ b/TensorFlow/built-in/recommendation/WideDeep_ID2940_for_TensorFlow/util/hooks/benchmark_hooks.py @@ -40,6 +40,8 @@ class BenchmarkLoggingHook(tf.train.SessionRunHook): def after_run(self, run_context, run_values): batch_time = time.time() - self.t0 samplesps = self.global_batch_size / batch_time + if self.current_step <= 5: + dllogger.log(data={"step/s": batch_time}, step=(0, self.current_step)) if self.current_step >= self.warmup_steps: self.mean_throughput.consume(samplesps) dllogger.log(data={"samplesps": samplesps}, step=(0, self.current_step))