From 7f2a45e7be5e80c8bf316acf593de05a73b69b94 Mon Sep 17 00:00:00 2001 From: limingxing517vim Date: Wed, 17 May 2023 16:07:34 +0800 Subject: [PATCH 1/4] update --- .../test/train_performance_bs32_1p.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh index 0416843d2..195924714 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh @@ -108,6 +108,9 @@ cp data_loader.py $cur_path/../src/data_loader/resnet50/ #训练开始时间,不需要修改 start_time=$(date +%s) cd $cur_path/../ +if [[ ${precision_mode} == "must_keep_origin_dtype" ]];then + sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" src/trainers/gpu_base_trainer.py +fi #进入训练脚本目录,需要模型审视修改 for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); do -- Gitee From 45418215e72e62c920b2eb0d9f28a21b13c30881 Mon Sep 17 00:00:00 2001 From: limingxing517vim Date: Wed, 17 May 2023 16:14:02 +0800 Subject: [PATCH 2/4] update --- .../ResNet50_ID0058_for_TensorFlow/src/mains/res50.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index d131f4afc..bcdcf57d6 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -96,10 +96,12 @@ def main(): configs = 'configs' cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file) #------------------------------------------------------------------ + ''' if FLAGS.precision_mode == "allow_mix_precision": option = {} option["ACL_PRECISION_MODE"] = "allow_mix_precision" torch_npu.npu.set_option(option) + ''' config = cfg.res50_config() config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) config['max_train_steps'] = int(FLAGS.max_train_steps) -- Gitee From 55d926c7afe41853cf42003a5c9dccb6845bc78c Mon Sep 17 00:00:00 2001 From: limingxing517vim Date: Wed, 17 May 2023 16:38:44 +0800 Subject: [PATCH 3/4] update --- .../test/train_performance_bs32_1p.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh index 195924714..230ba4a83 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh @@ -110,6 +110,7 @@ start_time=$(date +%s) cd $cur_path/../ if [[ ${precision_mode} == "must_keep_origin_dtype" ]];then sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" src/trainers/gpu_base_trainer.py + sed -i "s|modify_mixlist='./src/trainers/ReduceMeanD.json'|modify_mixlist=''|g" src/trainers/gpu_base_trainer.py fi #进入训练脚本目录,需要模型审视修改 for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); -- Gitee From 275a1092487672f9590db89ffc756354c48098c2 Mon Sep 17 00:00:00 2001 From: limingxing517vim Date: Wed, 17 May 2023 17:16:25 +0800 Subject: [PATCH 4/4] update --- .../src/mains/res50.py | 2 ++ .../src/trainers/gpu_base_trainer.py | 35 ++++++++++++------- .../test/train_performance_bs32_1p.sh | 4 --- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py index bcdcf57d6..ccb3e77d0 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/mains/res50.py @@ -106,6 +106,7 @@ def main(): config['iterations_per_loop'] = int(FLAGS.iterations_per_loop) config['max_train_steps'] = int(FLAGS.max_train_steps) config['debug'] = FLAGS.debug + config['precision_mode'] = FLAGS.precision_mode config['eval'] = FLAGS.eval config['model_dir'] = FLAGS.model_dir if FLAGS.data_path: @@ -117,6 +118,7 @@ def main(): print("iterations_per_loop:%d" %(config['iterations_per_loop'])) print("max_train_steps :%d" %(config['max_train_steps'])) print("debug :%s" %(config['debug'])) + print("precision_mode :%s" %(config['precision_mode'])) print("eval :%s" %(config['eval'])) print("model_dir :%s" %(config['model_dir'])) print("over_dump :%s" %(config['over_dump'])) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer.py b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer.py index 9a517ee19..d51c767a8 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer.py +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/src/trainers/gpu_base_trainer.py @@ -99,18 +99,29 @@ class GPUBaseTrain(object): else : run_config = NPURunConfig(dump_config=dump_config, hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop']) else: - if self.config['debug'] : - run_config = NPURunConfig(hcom_parallel=True, - precision_mode="allow_mix_precision", - enable_data_pre_proc=True, - save_checkpoints_steps=112590, - session_config=session_config, - model_dir = self.config['model_dir'], - iterations_per_loop=self.config['iterations_per_loop'], - keep_checkpoint_max=5, - enable_small_channel=1, - modify_mixlist='./src/trainers/ReduceMeanD.json') - else : + if self.config['debug']: + if self.config['precision_mode'] == 'must_keep_origin_dtype': + run_config = NPURunConfig(hcom_parallel=True, + precision_mode="must_keep_origin_dtype", + enable_data_pre_proc=True, + save_checkpoints_steps=112590, + session_config=session_config, + model_dir = self.config['model_dir'], + iterations_per_loop=self.config['iterations_per_loop'], + keep_checkpoint_max=5, + enable_small_channel=1) + else: + run_config = NPURunConfig(hcom_parallel=True, + precision_mode="allow_mix_precision", + enable_data_pre_proc=True, + save_checkpoints_steps=112590, + session_config=session_config, + model_dir=self.config['model_dir'], + iterations_per_loop=self.config['iterations_per_loop'], + keep_checkpoint_max=5, + enable_small_channel=1, + modify_mixlist='./src/trainers/ReduceMeanD.json') + else: run_config = NPURunConfig(hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop']) # run_config = NPURunConfig(enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir']) diff --git a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh index 230ba4a83..0416843d2 100644 --- a/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh +++ b/TensorFlow/built-in/cv/image_classification/ResNet50_ID0058_for_TensorFlow/test/train_performance_bs32_1p.sh @@ -108,10 +108,6 @@ cp data_loader.py $cur_path/../src/data_loader/resnet50/ #训练开始时间,不需要修改 start_time=$(date +%s) cd $cur_path/../ -if [[ ${precision_mode} == "must_keep_origin_dtype" ]];then - sed -i "s|allow_mix_precision|must_keep_origin_dtype|g" src/trainers/gpu_base_trainer.py - sed -i "s|modify_mixlist='./src/trainers/ReduceMeanD.json'|modify_mixlist=''|g" src/trainers/gpu_base_trainer.py -fi #进入训练脚本目录,需要模型审视修改 for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); do -- Gitee