diff --git a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py index 602da0127ef7a357b8677f1c77a05633d0bb206d..b2ac9e6ccb3c8da32e3e93d547d02ffe786eac3d 100644 --- a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py +++ b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py @@ -128,6 +128,10 @@ flags.DEFINE_bool("manual_fp16", True, "Whether to use fp32 or fp16 arithmetic o flags.DEFINE_bool("use_xla", False, "Whether to enable XLA JIT compilation.") +flags.DEFINE_bool("over_dump", False, "Whether to enable overflow.") + +flags.DEFINE_string("over_dump_path", None, "path to save overflow dump files.") + flags.DEFINE_bool("use_fp16", False, "Whether to enable AMP ops.") flags.DEFINE_bool("use_fp16_cls", True, "Whether to use fp16 in cls and pooler.") @@ -605,6 +609,10 @@ def main(_): raise ValueError("AMP and Manual Mixed Precision Training are both activated! Error") is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 + if FLAGS.over_dump: + dump_config = DumpConfig(enable_dump_debug = True, dump_path = FLAGS.over_dump_path, dump_debug_mode = "all") + else: + dump_config = DumpConfig(enable_dump_debug = False, dump_path = FLAGS.over_dump_path, dump_debug_mode = "all") config = tf.ConfigProto() if FLAGS.horovod: config.gpu_options.visible_device_list = str(hvd.local_rank()) @@ -621,6 +629,7 @@ def main(_): #run_config = tf.estimator.RunConfig( run_config = NPURunConfig( + dump_config=dump_config model_dir=FLAGS.output_dir, save_summary_steps=0, session_config=config, diff --git a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh index 8fb89982b2e112c5bdc699951c011e832c0e3c58..2cfc4af1a721c7201a89aa657d6d0299809b9f68 100644 --- a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh +++ b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh @@ -138,6 +138,8 @@ do --npu_bert_clip_by_global_norm=False \ --distributed=False \ --npu_bert_loss_scale=0 \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait diff --git a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh index 8fb89982b2e112c5bdc699951c011e832c0e3c58..2cfc4af1a721c7201a89aa657d6d0299809b9f68 100644 --- a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh +++ b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh @@ -138,6 +138,8 @@ do --npu_bert_clip_by_global_norm=False \ --distributed=False \ --npu_bert_loss_scale=0 \ + --over_dump=${over_dump} \ + --over_dump_path=${over_dump_path} \ --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 & done wait