diff --git a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py
index 602da0127ef7a357b8677f1c77a05633d0bb206d..b2ac9e6ccb3c8da32e3e93d547d02ffe786eac3d 100644
--- a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py
+++ b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/src/pretrain/run_pretraining.py
@@ -128,6 +128,10 @@ flags.DEFINE_bool("manual_fp16", True, "Whether to use fp32 or fp16 arithmetic o
 
 flags.DEFINE_bool("use_xla", False, "Whether to enable XLA JIT compilation.")
 
+flags.DEFINE_bool("over_dump", False, "Whether to enable overflow.")
+
+flags.DEFINE_string("over_dump_path", None, "path to save overflow dump files.")
+
 flags.DEFINE_bool("use_fp16", False, "Whether to enable AMP ops.")
 
 flags.DEFINE_bool("use_fp16_cls", True, "Whether to use fp16 in cls and pooler.")
@@ -605,6 +609,10 @@ def main(_):
       raise ValueError("AMP and Manual Mixed Precision Training are both activated! Error")
 
   is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
+  if FLAGS.over_dump:
+      dump_config = DumpConfig(enable_dump_debug = True, dump_path = FLAGS.over_dump_path, dump_debug_mode = "all")
+  else:
+      dump_config = DumpConfig(enable_dump_debug = False, dump_path = FLAGS.over_dump_path, dump_debug_mode = "all")
   config = tf.ConfigProto()
   if FLAGS.horovod:
     config.gpu_options.visible_device_list = str(hvd.local_rank())
@@ -621,6 +629,7 @@ def main(_):
 
   #run_config = tf.estimator.RunConfig(
   run_config = NPURunConfig(
+      dump_config=dump_config
       model_dir=FLAGS.output_dir,
       save_summary_steps=0,
       session_config=config,
diff --git a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh
index 8fb89982b2e112c5bdc699951c011e832c0e3c58..2cfc4af1a721c7201a89aa657d6d0299809b9f68 100644
--- a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh
+++ b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_full_1p.sh
@@ -138,6 +138,8 @@ do
     --npu_bert_clip_by_global_norm=False \
     --distributed=False \
     --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
     --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait
diff --git a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh
index 8fb89982b2e112c5bdc699951c011e832c0e3c58..2cfc4af1a721c7201a89aa657d6d0299809b9f68 100644
--- a/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/Bert-base_ID0060_for_TensorFlow/test/train_performance_1p.sh
@@ -138,6 +138,8 @@ do
     --npu_bert_clip_by_global_norm=False \
     --distributed=False \
     --npu_bert_loss_scale=0 \
+    --over_dump=${over_dump} \
+    --over_dump_path=${over_dump_path} \
     --output_dir=${cur_path}/output/${ASCEND_DEVICE_ID}/d_solution/ckpt${ASCEND_DEVICE_ID} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
 done 
 wait