diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
index 8c70b73c91bc29a7547406b6c652ca7d416f1a72..af51680a73c92e9c0e97c9bc07995f8a86df2281 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
@@ -41,6 +41,7 @@ import modeling
 import optimization
 import tokenization
 import six
+import time
 import tensorflow as tf
 from npu_bridge.estimator.npu.npu_config import NPURunConfig
 from npu_bridge.estimator import npu_ops
@@ -50,6 +51,80 @@ flags = tf.flags
 
 FLAGS = flags.FLAGS
 
+class _TrainHook(tf.train.SessionRunHook):
+  """Logs loss and runtime."""
+  def __init__(self, num_train_steps):
+      self.num_train_steps = num_train_steps / FLAGS.iterations_per_loop
+
+  def after_create_session(self, session, coord):
+      self.init_time = time.time()
+      self.hist_time= 0
+      self.hist_fps = 0
+      self.hist_samples = 0
+      self.epoch1_time= 0
+      self.epoch_fps = 0
+      self.epoch1_samples = 0
+      self.epoch2_time= 0
+      self.epoch2_samples = 0
+
+  def begin(self):
+      self._step = 0
+      self._start_time = time.time()
+
+  def before_run(self, run_context):
+      self._step += 1
+      self._start_time = time.time()
+
+  def after_run(self, run_context, run_values):
+      duration = time.time() - self._start_time
+      examples_per_sec = FLAGS.train_batch_size * rank_size * FLAGS.iterations_per_loop / duration
+      if self._step <= self.num_train_steps / 2:
+          self.epoch1_time += duration
+          self.epoch_time = self.epoch1_time
+          self.epoch1_samples += FLAGS.train_batch_size * FLAGS.iterations_per_loop
+          self.epoch_samples = self.epoch1_samples
+      else:
+          self.epoch2_time += duration
+          self.epoch_time = self.epoch2_time
+          self.epoch2_samples += FLAGS.train_batch_size * FLAGS.iterations_per_loop
+          self.epoch_samples = self.epoch2_samples
+      epoch = self._step / (self.num_train_steps / 2 ) + 1
+      self.epoch_fps = self.epoch_samples / self.epoch_time * rank_size
+      self.hist_samples += FLAGS.train_batch_size * FLAGS.iterations_per_loop
+      self.hist_time += duration 
+      self.hist_fps = self.hist_samples * rank_size / self.hist_time
+      print ('epoch:%d, step:%d, examples/sec:%.1f, time:%.3f, epoch_time:%.3f, epoch_fps:%.1f, hist_samples:%.1f, hist_time:%.3f, hist_fps:%.1f' % (epoch, self._step * FLAGS.iterations_per_loop,
+                           examples_per_sec, duration, self.epoch_time, self.epoch_fps, self.hist_samples, self.hist_time, self.hist_fps))
+
+class _EvalHook(tf.train.SessionRunHook):
+  """Logs loss and runtime."""
+  def __init__(self, samples_num):
+      self.samples_num = samples_num
+
+  def after_create_session(self, session, coord):
+      self.init_time = time.time()
+      self.hist_time= 0
+      self.hist_fps = 0
+      self.hist_samples = 0
+
+
+  def begin(self):
+      self._step = -1
+      self._start_time = time.time()
+
+  def before_run(self, run_context):
+      self._step += 1
+      self._start_time = time.time()
+
+  def after_run(self, run_context, run_values):
+      duration = time.time() - self._start_time
+      examples_per_sec = FLAGS.predict_batch_size / duration
+      self.hist_samples += FLAGS.predict_batch_size
+      self.hist_time += duration
+      self.hist_fps = self.hist_samples / self.hist_time
+      print ('predict: step:%d, examples/sec:%.1f, time:%.3f, hist_samples:%.1f, hist_time:%.3f, hist_fps:%.1f' % (self._step,
+                           examples_per_sec, duration, self.hist_samples, self.hist_time, self.hist_fps))
+
 rank_size = int(os.getenv("RANK_SIZE"))
 rank_id = int(os.getenv("RANK_ID"))
 
@@ -118,7 +193,7 @@ flags.DEFINE_float(
     "Proportion of training to perform linear learning rate warmup for. "
     "E.g., 0.1 = 10% of training.")
 
-flags.DEFINE_integer("save_checkpoints_steps", 1000,
+flags.DEFINE_integer("save_checkpoints_steps", 100000,
                      "How often to save the model checkpoint.")
 
 flags.DEFINE_integer("num_train_steps", 0,
@@ -1234,7 +1309,7 @@ def main(_):
           seq_length=FLAGS.max_seq_length,
           is_training=True,
           drop_remainder=True)
-      estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
+      estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=[_TrainHook(num_train_steps)])
   else:
     if FLAGS.do_train:
       train_examples = read_squad_examples(
@@ -1342,7 +1417,7 @@ def main(_):
     # steps.
     all_results = []
     for result in estimator.predict(
-        predict_input_fn, yield_single_examples=True):
+        predict_input_fn, yield_single_examples=True, hooks=[_EvalHook(len(eval_examples))]):
       if len(all_results) % 1000 == 0:
         tf.logging.info("Processing example: %d" % (len(all_results)))
       unique_id = int(result["unique_ids"])