From 98ae26203ba211a9602945e6d5bcace0bfdb70de Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Fri, 19 May 2023 08:26:29 +0000
Subject: [PATCH 1/8] update /train_performance_squad1.1_base_8p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_performance_squad1.1_base_8p.sh              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
index e2f0108a0..4b99d0d1b 100644
--- a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
+++ b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
@@ -15,13 +15,13 @@ RANK_ID_START=0
 export RANK_SIZE=8
 export RANK_TABLE_FILE=${cur_path}/configs/rank_table_8p.json
 #性能优化
-export NPU_LOOP_SIZE=100
+export NPU_LOOP_SIZE=25
 #训练epoch，可选
 train_epochs=1
 #训练step
-train_steps=300
+train_steps=400
 #学习率
-learning_rate=8e-5
+learning_rate=64e-5
 ckpt_path=""
 #参数配置
 
-- 
Gitee


From bcee7d98dd16ec52d5ad0d9905c686bcc38b930a Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Fri, 19 May 2023 09:11:13 +0000
Subject: [PATCH 2/8] update train_performance_squad1.1_base_8p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_performance_squad1.1_base_8p.sh                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
index 4b99d0d1b..0ca2f36de 100644
--- a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
+++ b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
@@ -19,7 +19,7 @@ export NPU_LOOP_SIZE=25
 #训练epoch，可选
 train_epochs=1
 #训练step
-train_steps=400
+train_steps=600
 #学习率
 learning_rate=64e-5
 ckpt_path=""
-- 
Gitee


From 2243d6a463d24b112113dffabe20bd333b13446b Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Fri, 19 May 2023 09:26:14 +0000
Subject: [PATCH 3/8] update train_performance_squad1.1_base_8p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_performance_squad1.1_base_8p.sh                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
index 0ca2f36de..159577614 100644
--- a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
+++ b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
@@ -134,7 +134,7 @@ do
         --learning_rate=${learning_rate} \
         --num_train_epochs=${train_epochs} \
         --model_dir=$cur_path/test/output/$ASCEND_DEVICE_ID/ckpt \
-        --log_steps=100 \
+        --log_steps=200 \
         --steps_per_loop=${NPU_LOOP_SIZE} \
         --train_steps=${train_steps} \
         --num_gpus=1 \
-- 
Gitee


From 677fe1c4649bf18682bc9a4acd309e9c16fa5486 Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Fri, 19 May 2023 09:42:04 +0000
Subject: [PATCH 4/8] update
 bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_performance_squad1.1_base_8p.sh       | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
index 159577614..98643e7a6 100644
--- a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
+++ b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
@@ -121,6 +121,18 @@ do
         mkdir -p $cur_path/test/output/$ASCEND_DEVICE_ID
     fi
 
+    #绑核，不需要绑核的模型删除，需要绑核的模型根据实际修改
+    cpucount=`lscpu | grep "CPU(s):" | head -n 1 | awk '{print $2}'`
+    cpustep=`expr $cpucount / 8`
+    echo "taskset c steps:" $cpustep
+    let a=RANK_ID*$cpustep
+    let b=RANK_ID+1
+    let c=b*$cpustep-1
+
+    if [ "x${bind_core}" != x ];then
+        bind_core="taskset -c $a-$c"
+    fi
+
     nohup python3 ./official/nlp/bert/run_squad.py \
         --mode=${MODE} \
         --input_meta_data_path=${SQUAD_DIR}/squad_${SQUAD_VERSION}_meta_data \
-- 
Gitee


From 0d81dddcbd7e7206f6f5a87443b2273a1c50a25b Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Fri, 19 May 2023 10:15:44 +0000
Subject: [PATCH 5/8] update
 bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_performance_squad1.1_base_8p.sh                 | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
index 98643e7a6..c4378bebf 100644
--- a/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
+++ b/TensorFlow2/built-in/nlp/bert-squad_ID1566_for_TensorFlow2.X/test/train_performance_squad1.1_base_8p.sh
@@ -19,7 +19,7 @@ export NPU_LOOP_SIZE=25
 #训练epoch，可选
 train_epochs=1
 #训练step
-train_steps=600
+#train_steps=600
 #学习率
 learning_rate=64e-5
 ckpt_path=""
@@ -148,7 +148,6 @@ do
         --model_dir=$cur_path/test/output/$ASCEND_DEVICE_ID/ckpt \
         --log_steps=200 \
         --steps_per_loop=${NPU_LOOP_SIZE} \
-        --train_steps=${train_steps} \
         --num_gpus=1 \
         --distribution_strategy=one_device \
         --sub_model_export_name=sub_model \
-- 
Gitee


From dd20c9f24887393b56f138a9c9e0e5e048d48a77 Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Sat, 20 May 2023 08:27:07 +0000
Subject: [PATCH 6/8] update BertGoogle_Series_for_TensorFlow/run_squad.py.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../nlp/BertGoogle_Series_for_TensorFlow/run_squad.py  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
index 8c70b73c9..a15ea0299 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/run_squad.py
@@ -781,8 +781,8 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
     unique_id_to_result[result.unique_id] = result
 
   # process unique id issue
-  max_unique_id = all_results[-1].unique_id
-  print("max_unique_id=%d" % max_unique_id)
+  #max_unique_id = all_results[-1].unique_id
+  #print("max_unique_id=%d" % max_unique_id)
 
   _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
       "PrelimPrediction",
@@ -802,8 +802,8 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
     null_start_logit = 0  # the start logit at the slice with min null score
     null_end_logit = 0  # the end logit at the slice with min null score
     for (feature_index, feature) in enumerate(features):
-      if feature.unique_id > max_unique_id:
-          continue
+      #if feature.unique_id > max_unique_id:
+          #continue
       result = unique_id_to_result[feature.unique_id]
       start_indexes = _get_best_indexes(result.start_logits, n_best_size)
       end_indexes = _get_best_indexes(result.end_logits, n_best_size)
@@ -1336,7 +1336,7 @@ def main(_):
         input_file=eval_writer.filename,
         seq_length=FLAGS.max_seq_length,
         is_training=False,
-        drop_remainder=True)
+        drop_remainder=False)
 
     # If running eval on the TPU, you will need to specify the number of
     # steps.
-- 
Gitee


From 7e8f76ee0ec360fc6c5a8d864492c274b88b523b Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Sat, 20 May 2023 08:43:43 +0000
Subject: [PATCH 7/8] update /test/train_ID0495_Bert-Squad_full_1p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_ID0495_Bert-Squad_full_1p.sh                     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
index 4c5aca93b..e4ea4691d 100644
--- a/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
+++ b/TensorFlow/built-in/nlp/BertGoogle_Series_for_TensorFlow/test/train_ID0495_Bert-Squad_full_1p.sh
@@ -100,7 +100,7 @@ do
 	nohup python3.7 ${parent_path}/run_squad.py \
         --vocab_file=$vocab_file \
         --bert_config_file=$bert_config_file \
-        --init_checkpoint=$init_checkpoint \\
+        --init_checkpoint=$init_checkpoint \
         --read_tf_record=True \
         --train_file=$train_file \
         --do_predict=True \
-- 
Gitee


From f2a1f24bb63b71a566ca08349bbedd4753c71a3d Mon Sep 17 00:00:00 2001
From: liuyihang <1905527319@qq.com>
Date: Fri, 26 May 2023 03:25:41 +0000
Subject: [PATCH 8/8] update
 Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh.

Signed-off-by: liuyihang <1905527319@qq.com>
---
 .../test/train_performance_1p.sh                               | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh
index 6508764ec..00d89de53 100644
--- a/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh
+++ b/TensorFlow/built-in/nlp/Bertsquad_ID0495_for_TensorFlow/test/train_performance_1p.sh
@@ -175,7 +175,8 @@ ActualFPS=`echo "scale=2;${fps} * ${batch_size}"|bc`
 temp1=`echo "1000 * ${batch_size}"|bc`
 TrainingTime=`echo "scale=2;${temp1} / ${ActualFPS}"|bc`
 
-ActualLoss=`grep "loss =" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'loss =' '{print $2}'|awk 'END {print $1}'|tr -d ,`
+ActualLoss=`grep "Loss for final step:" $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log | awk -F 'Loss for final step:' '{print $2}'|awk 'END {print $1}'`
+ActualLoss=${ActualLoss%.*}
 
 #关键信息打印到${CaseName}.log中，不需要修改
 echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-- 
Gitee