From 3a38cd7d666f05402bdc0125aa736ef08583630c Mon Sep 17 00:00:00 2001
From: xingwendong <13298410087@163.com>
Date: Fri, 8 Apr 2022 15:37:34 +0800
Subject: [PATCH] add
---
.../.idea/.gitignore | 3 +
.../.idea/YOLOv5_ID1719_for_TensorFlow2.X.iml | 14 +
.../inspectionProfiles/profiles_settings.xml | 6 +
.../.idea/misc.xml | 4 +
.../.idea/modules.xml | 8 +
.../core/dataset.py | 10 +-
.../test/set_ranktable.py | 1733 +++++++++++++++++
.../test/train_performance_16p.sh | 225 +++
.../test/train_performance_1p.sh | 204 ++
.../test/train_performance_8p.sh | 42 +-
.../YOLOv5_ID1719_for_TensorFlow2.X/train.py | 27 +-
11 files changed, 2241 insertions(+), 35 deletions(-)
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/.gitignore
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/YOLOv5_ID1719_for_TensorFlow2.X.iml
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/inspectionProfiles/profiles_settings.xml
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/misc.xml
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/modules.xml
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/set_ranktable.py
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_16p.sh
create mode 100644 TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_1p.sh
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/.gitignore b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/.gitignore
new file mode 100644
index 000000000..26d33521a
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/YOLOv5_ID1719_for_TensorFlow2.X.iml b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/YOLOv5_ID1719_for_TensorFlow2.X.iml
new file mode 100644
index 000000000..8e5446ac9
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/YOLOv5_ID1719_for_TensorFlow2.X.iml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/inspectionProfiles/profiles_settings.xml b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/misc.xml b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/misc.xml
new file mode 100644
index 000000000..c456a360d
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/modules.xml b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/modules.xml
new file mode 100644
index 000000000..ee76a2b33
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/core/dataset.py b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/core/dataset.py
index e7f0ad578..c8348d839 100644
--- a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/core/dataset.py
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/core/dataset.py
@@ -63,7 +63,7 @@ class Dataset(object):
self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
self.max_bbox_per_scale = 450
- self.annotations = self.load_annotations()
+ self.annotations = self.load_annotations(FLAGS)
self.num_samples = len(self.annotations)
self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
self.batch_count = 0
@@ -71,7 +71,7 @@ class Dataset(object):
self._data_buff = dict()
- def load_annotations(self):
+ def load_annotations(self,FLAGS):
with open(self.annot_path, "r") as f:
txt = f.readlines()
if self.dataset_type == "converted_coco":
@@ -106,6 +106,12 @@ class Dataset(object):
annotations.append(image_path + string)
np.random.shuffle(annotations)
+
+ # shard
+ if FLAGS.rank_size > 1:
+ len_annotations = len(annotations)
+ annotations = annotations[int(len_annotations//int(FLAGS.rank_size))*FLAGS.rank:int(len_annotations//int(FLAGS.rank_size))*(FLAGS.rank+1)]
+
return annotations
def __iter__(self):
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/set_ranktable.py b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/set_ranktable.py
new file mode 100644
index 000000000..641b0b8b5
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/set_ranktable.py
@@ -0,0 +1,1733 @@
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('-n', '--npu_nums', type=int, default='2', help='nums of npu')
+parser.add_argument('-c', '--conf_path', type=str, default='./', help='the path of server_info')
+FLAGS = parser.parse_args()
+
+import json
+import os
+server = []
+server_conf = []
+server_list = ["0", "1", "2", "3", "4", "5", "6", "7"]
+if os.path.isdir(FLAGS.conf_path):
+ for f in os.listdir(FLAGS.conf_path):
+ if (f.split("_")[-1]).split(".")[0] in server_list and (f.split("_")[-1]).split(".")[1] == 'info' and f.split("_")[0] == 'server':
+ server_conf.append(f)
+
+
+
+
+
+
+rank_address = []
+for i in range(FLAGS.npu_nums):
+ for x in server_conf:
+ if (x.split("_")[-1]).split(".")[0] == str(i):
+ server.append(x.split("_")[1])
+ l = FLAGS.conf_path + "/" + x
+ with open(l, "r") as a:
+ s = a.readlines()
+ for s_ in s:
+ if 'address_0' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_1' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_2' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_3' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_4' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_5' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_6' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_7' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+
+if FLAGS.npu_nums == 1:
+ rank = {
+ "server_count":"1",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]}],
+ "status":"completed",
+ "version":"1.0"
+ }
+elif FLAGS.npu_nums == 2:
+ rank = {
+ "server_count":"2",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]}
+ ],
+
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+elif FLAGS.npu_nums == 3:
+ rank = {
+ "server_count":"3",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+elif FLAGS.npu_nums == 4:
+ rank = {
+ "server_count":"4",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+elif FLAGS.npu_nums == 5:
+ rank = {
+ "server_count":"5",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+
+elif FLAGS.npu_nums == 6:
+ rank = {
+ "server_count":"6",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]},
+ {
+ "server_id":server[5],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[40],
+ "rank_id":"40"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[41],
+ "rank_id":"41"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[42],
+ "rank_id":"42"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[43],
+ "rank_id":"43"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[44],
+ "rank_id":"44"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[45],
+ "rank_id":"45"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[46],
+ "rank_id":"46"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[47],
+ "rank_id":"47"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+elif FLAGS.npu_nums == 7:
+ rank = {
+ "server_count":"7",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]},
+ {
+ "server_id":server[5],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[40],
+ "rank_id":"40"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[41],
+ "rank_id":"41"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[42],
+ "rank_id":"42"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[43],
+ "rank_id":"43"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[44],
+ "rank_id":"44"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[45],
+ "rank_id":"45"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[46],
+ "rank_id":"46"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[47],
+ "rank_id":"47"
+ }
+ ]},
+ {
+ "server_id":server[6],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[48],
+ "rank_id":"48"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[49],
+ "rank_id":"49"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[50],
+ "rank_id":"50"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[51],
+ "rank_id":"51"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[52],
+ "rank_id":"52"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[53],
+ "rank_id":"53"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[54],
+ "rank_id":"54"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[55],
+ "rank_id":"55"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+
+
+elif FLAGS.npu_nums == 8:
+ rank = {
+ "server_count":"8",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]},
+ {
+ "server_id":server[5],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[40],
+ "rank_id":"40"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[41],
+ "rank_id":"41"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[42],
+ "rank_id":"42"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[43],
+ "rank_id":"43"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[44],
+ "rank_id":"44"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[45],
+ "rank_id":"45"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[46],
+ "rank_id":"46"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[47],
+ "rank_id":"47"
+ }
+ ]},
+ {
+ "server_id":server[6],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[48],
+ "rank_id":"48"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[49],
+ "rank_id":"49"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[50],
+ "rank_id":"50"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[51],
+ "rank_id":"51"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[52],
+ "rank_id":"52"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[53],
+ "rank_id":"53"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[54],
+ "rank_id":"54"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[55],
+ "rank_id":"55"
+ }
+ ]},
+ {
+ "server_id":server[7],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[56],
+ "rank_id":"56"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[57],
+ "rank_id":"57"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[58],
+ "rank_id":"58"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[59],
+ "rank_id":"59"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[60],
+ "rank_id":"60"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[61],
+ "rank_id":"61"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[62],
+ "rank_id":"62"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[63],
+ "rank_id":"63"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+with open('rank_table.json','w') as f:
+ json.dump(rank,f)
+
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_16p.sh b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_16p.sh
new file mode 100644
index 000000000..d2221c8d3
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_16p.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning'
+
+# 帮助信息,不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+ echo"usage:./train_performance_1p.sh "
+ echo " "
+ echo "parameter explain:
+ --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+ -h/--help show help message
+ "
+ exit 1
+fi
+
+#参数校验,不需要修改
+for para in $*
+do
+ if [[ $para == --precision_mode* ]];then
+ precision_mode=`echo ${para#*=}`
+ elif [[ $para == --over_dump* ]];then
+ over_dump=`echo ${para#*=}`
+ over_dump_path=${cur_path}/output/overflow_dump
+ mkdir -p ${over_dump_path}
+ elif [[ $para == --data_dump_flag* ]];then
+ data_dump_flag=`echo ${para#*=}`
+ data_dump_path=${cur_path}/output/data_dump
+ mkdir -p ${data_dump_path}
+ elif [[ $para == --data_dump_step* ]];then
+ data_dump_step=`echo ${para#*=}`
+ elif [[ $para == --profiling* ]];then
+ profiling=`echo ${para#*=}`
+ profiling_dump_path=${cur_path}/output/profiling
+ mkdir -p ${profiling_dump_path}
+ elif [[ $para == --autotune* ]];then
+ autotune=`echo ${para#*=}`
+ mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+ mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+ autotune_dump_path=${cur_path}/output/autotune_dump
+ mkdir -p ${autotune_dump_path}/GA
+ mkdir -p ${autotune_dump_path}/rl
+ cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+ cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ elif [[ $para == --bind_core* ]];then
+ bind_core=`echo ${para#*=}`
+ name_bind="_bindcore"
+ elif [[ $para == --server_index* ]];then
+ server_index=`echo ${para#*=}`
+ elif [[ $para == --conf_path* ]];then
+ conf_path=`echo ${para#*=}`
+ fi
+done
+
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+export RANK_SIZE=16
+export JOB_ID=10087
+rank_size=8
+nohup python3 $cur_path/set_ranktable.py --npu_nums=$((RANK_SIZE/rank_size)) --conf_path=$conf_path
+export RANK_TABLE_FILE=${cur_path}/rank_table.json
+export HCCL_CONNECT_TIMEOUT=600
+RANK_ID_START=0
+RANK_SIZE=16
+# 数据集路径,保持为空,不需要修改
+data_path="/npu/traindata/COCO2017"
+
+anno_converted='/npu/traindata/COCO2017/val2017.txt'
+gt_anno_path='/npu/traindata/COCO2017/annotations/instances_val2017.json'
+
+#屏蔽TF2.4升级到TF2.6图差异带来的性能下降
+export NPU_EXECUTE_OP_BY_ACL=false
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
+
+#基础参数 需要模型审视修改
+#网络名称,同目录名称
+Network="YOLOv5_ID1719_for_TensorFlow2.X"
+
+# 训练epoch
+stage1_epoch=0
+stage2_epoch=1
+
+# 训练batchsize
+batch_size=8
+
+train_worker_num=8
+
+# TF2.X独有,不需要修改
+export NPU_LOOPSIZE=1
+
+# 精度模式
+precision_mode='allow_mix_precision'
+#维持参数,不需要修改
+over_dump=False
+over_dump_path=''
+data_dump_flag=False
+data_dump_path=''
+data_dump_step="1"
+profiling=False
+autotune=False
+perf=20
+
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be specified"
+ exit 1
+fi
+
+#训练开始时间,不需要修改
+start_time=$(date +%s)
+bind_core=1
+#进入训练脚本目录,需要模型审视修改
+#for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+for((RANK_ID=$((rank_size*server_index));RANK_ID<$((((server_index+1))*rank_size));RANK_ID++));
+do
+ #设置环境变量,不需要修改
+ echo "Device ID: $RANK_ID"
+ export RANK_ID=$RANK_ID
+ export ASCEND_DEVICE_ID=`expr ${RANK_ID} - $((rank_size*server_index))`
+ ASCEND_DEVICE_ID=`expr ${RANK_ID} - $((rank_size*server_index))`
+ export DEVICE_ID=${ASCEND_DEVICE_ID}
+ #echo 'DEVICE_ID: '$ASCEND_DEVICE_ID
+ RANK_ID_core=$RANK_ID
+
+ #创建DeviceID输出目录,不需要修改
+ if [ -d ${cur_path}/output/$ASCEND_DEVICE_ID ];then
+ rm -rf ${cur_path}/output/$ASCEND_DEVICE_ID
+ mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+ else
+ mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+ fi
+ cd ${cur_path}/output/$ASCEND_DEVICE_ID/
+ #执行训练脚本,需要模型审视修改
+ if [ ${RANK_ID_core} -gt 7 ];then
+ RANK_ID_core=$((RANK_ID_core-8))
+ fi
+
+ #echo 'RANK_ID_core is: '$RANK_ID_core
+
+ # 执行训练脚本,需要模型审视修改
+ corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+ let a=RANK_ID_core*${corenum}/8
+ let b=RANK_ID_core+1
+ let c=b*${corenum}/8-1
+ if [ "x${bind_core}" != x ];then
+ bind_core="taskset -c $a-$c"
+ fi
+ #${bind_core} python3 ../../../train.py --weights='' \
+ nohup ${bind_core} python3 ../../../train.py --weights='' \
+ --perf=$perf \
+ --model=yolov5m \
+ --rank=${RANK_ID} \
+ --rank_size=${RANK_SIZE} \
+ --train_worker_num=${train_worker_num} \
+ --data_path=${data_path} \
+ --anno_converted=${anno_converted} \
+ --gt_anno_path=${gt_anno_path} \
+ --batch_size=${batch_size} \
+ --precision_mode=${precision_mode} \
+ --stage1_epoch=${stage1_epoch} \
+ --stage2_epoch=${stage2_epoch} \
+ --over_dump=${over_dump} \
+ --over_dump_path=${over_dump_path} \
+ --data_dump_flag=${data_dump_flag} \
+ --data_dump_step=${data_dump_step} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done
+wait
+
+#训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+echo "------------------ Final result ------------------"
+#输出性能FPS。需要模型审视修改
+epoch_duration=`grep epoch_duration $cur_path/output/0/train_0.log | awk '{print $2}'`
+first_step=`grep duration: $cur_path/output/0/train_0.log |head -1| awk 'END{print $18}'`
+FPS=`awk 'BEGIN{printf "%.2f\n",('$perf'+'$train_worker_num'-2)/('$epoch_duration'-'$first_step')*'$batch_size'*16}'`
+echo "Final Performance imgs/sec : $FPS"
+
+#训练精度,需要从train_$ASCEND_DEVICE_ID.log里,通过关键字获取。需要模型审视修改
+# li=`cat $cur_path/output/0/train_0.log | wc -l`
+# num=$(($li - 1))
+# train_accuracy=`sed -n "${num}p" $cur_path/output/0/train_0.log | awk '{print $3}'`
+# echo "Final Train Accuracy : ${train_accuracy}"
+#E2E训练端到端时长,直接计算,不需要修改
+echo "E2E training Duration sec: $e2e_time"
+
+#训练用例信息,不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}${name_bind}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据,不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",('$epoch_duration'-'$first_step')/('$perf'+'$train_worker_num'-2)}'`
+
+##获取Loss,通过train_*.log中关键字,需要根据模型审视
+grep loss $cur_path/output/0/train_0.log|awk '{print $13}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值,不需要修改
+ActualLoss=`grep total_loss: $cur_path/output/0/train_0.log | awk 'END{print $13}'`
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime= ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+ sed -i "/AttributeError/d" $cur_path/output/${RANK_ID}/train_${RANK_ID}.log
+done
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_1p.sh b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_1p.sh
new file mode 100644
index 000000000..c55b4030a
--- /dev/null
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_1p.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+
+#当前路径,不需要修改
+cur_path=`pwd`
+
+export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning'
+
+#集合通信参数,不需要修改
+#保证rank table file 文件rank_table_8p.json存放在和test同级的configs目录下
+export RANK_SIZE=1
+#export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
+export JOB_ID=10087
+RANK_ID_START=0
+RANK_SIZE=1
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+anno_converted='/npu/traindata/COCO2017/val2017.txt'
+gt_anno_path='/npu/traindata/COCO2017/annotations/instances_val2017.json'
+
+#屏蔽TF2.4升级到TF2.6图差异带来的性能下降
+#export NPU_EXECUTE_OP_BY_ACL=false
+
+#设置默认日志级别,不需要修改
+export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
+
+#基础参数 需要模型审视修改
+#网络名称,同目录名称
+Network="YOLOv5_ID1719_for_TensorFlow2.X"
+
+# 训练epoch
+stage1_epoch=0
+stage2_epoch=1
+
+# 训练batchsize
+batch_size=8
+
+train_worker_num=8
+
+# TF2.X独有,不需要修改
+export NPU_LOOPSIZE=1
+
+# 精度模式
+precision_mode='allow_mix_precision'
+#维持参数,不需要修改
+over_dump=False
+over_dump_path=''
+data_dump_flag=False
+data_dump_path=''
+data_dump_step="1"
+profiling=False
+autotune=False
+perf=20
+
+# 帮助信息,不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+ echo"usage:./train_full_8p.sh "
+ echo " "
+ echo "parameter explain:
+ --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+ --over_dump if or not over detection, default is False
+ --data_dump_flag data dump flag, default is 0
+ --data_dump_step data dump step, default is 10
+ --profiling if or not profiling for performance debug, default is False
+ --data_path source data of training
+ -h/--help show help message
+ "
+ exit 1
+fi
+
+#参数校验,不需要修改
+for para in $*
+do
+ if [[ $para == --precision_mode* ]];then
+ precision_mode=`echo ${para#*=}`
+ elif [[ $para == --over_dump* ]];then
+ over_dump=`echo ${para#*=}`
+ over_dump_path=${cur_path}/output/overflow_dump
+ mkdir -p ${over_dump_path}
+ elif [[ $para == --data_dump_flag* ]];then
+ data_dump_flag=`echo ${para#*=}`
+ data_dump_path=${cur_path}/output/data_dump
+ mkdir -p ${data_dump_path}
+ elif [[ $para == --data_dump_step* ]];then
+ data_dump_step=`echo ${para#*=}`
+ elif [[ $para == --profiling* ]];then
+ profiling=`echo ${para#*=}`
+ profiling_dump_path=${cur_path}/output/profiling
+ mkdir -p ${profiling_dump_path}
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ elif [[ $para == --bind_core* ]]; then
+ bind_core=`echo ${para#*=}`
+ name_bind="_bindcore"
+ fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be specified"
+ exit 1
+fi
+
+#训练开始时间,不需要修改
+start_time=$(date +%s)
+bind_core=1
+#进入训练脚本目录,需要模型审视修改
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+ #设置环境变量,不需要修改
+ echo "Device ID: $RANK_ID"
+ export RANK_ID=$RANK_ID
+ export ASCEND_DEVICE_ID=$RANK_ID
+ ASCEND_DEVICE_ID=$RANK_ID
+
+ #创建DeviceID输出目录,不需要修改
+ if [ -d ${cur_path}/output/$ASCEND_DEVICE_ID ];then
+ rm -rf ${cur_path}/output/$ASCEND_DEVICE_ID
+ mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+ else
+ mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+ fi
+ cd ${cur_path}/output/$ASCEND_DEVICE_ID/
+ #执行训练脚本,需要模型审视修改
+ corenum=`cat /proc/cpuinfo |grep 'processor' |wc -l`
+ let a=RANK_ID*${corenum}/8
+ let b=RANK_ID+1
+ let c=b*${corenum}/8-1
+ if [ "x${bind_core}" != x ];then
+ bind_core="taskset -c $a-$c"
+ fi
+ #${bind_core} python3 ../../../train.py --weights='' \
+ nohup ${bind_core} python3 ../../../train.py --weights='' \
+ --perf=$perf \
+ --model=yolov5m \
+ --rank=${RANK_ID} \
+ --rank_size=${RANK_SIZE} \
+ --train_worker_num=${train_worker_num} \
+ --data_path=${data_path} \
+ --anno_converted=${anno_converted} \
+ --gt_anno_path=${gt_anno_path} \
+ --batch_size=${batch_size} \
+ --precision_mode=${precision_mode} \
+ --stage1_epoch=${stage1_epoch} \
+ --stage2_epoch=${stage2_epoch} \
+ --over_dump=${over_dump} \
+ --over_dump_path=${over_dump_path} \
+ --data_dump_flag=${data_dump_flag} \
+ --data_dump_step=${data_dump_step} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+done
+wait
+
+#训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+echo "------------------ Final result ------------------"
+#输出性能FPS。需要模型审视修改
+epoch_duration=`grep epoch_duration $cur_path/output/0/train_0.log | awk '{print $2}'`
+first_step=`grep duration: $cur_path/output/0/train_0.log |head -1| awk 'END{print $17}'`
+FPS=`awk 'BEGIN{printf "%.2f\n",('$perf'+'$train_worker_num'-2)/('$epoch_duration'-'$first_step')*'$batch_size'*1}'`
+echo "Final Performance imgs/sec : $FPS"
+
+#训练精度,需要从train_$ASCEND_DEVICE_ID.log里,通过关键字获取。需要模型审视修改
+# li=`cat $cur_path/output/0/train_0.log | wc -l`
+# num=$(($li - 1))
+# train_accuracy=`sed -n "${num}p" $cur_path/output/0/train_0.log | awk '{print $3}'`
+# echo "Final Train Accuracy : ${train_accuracy}"
+#E2E训练端到端时长,直接计算,不需要修改
+echo "E2E training Duration sec: $e2e_time"
+
+#训练用例信息,不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}${name_bind}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+
+##获取性能数据,不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",('$epoch_duration'-'$first_step')/('$perf'+'$train_worker_num'-2)}'`
+
+##获取Loss,通过train_*.log中关键字,需要根据模型审视
+grep loss $cur_path/output/0/train_0.log|awk '{print $13}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+
+#最后一个迭代loss值,不需要修改
+ActualLoss=`grep total_loss: $cur_path/output/0/train_0.log | awk 'END{print $13}'`
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime= ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+ sed -i "/AttributeError/d" $cur_path/output/${RANK_ID}/train_${RANK_ID}.log
+done
\ No newline at end of file
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_8p.sh b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_8p.sh
index 744bdd650..6f24266d5 100644
--- a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_8p.sh
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/test/train_performance_8p.sh
@@ -11,7 +11,7 @@ export RANK_SIZE=8
export RANK_TABLE_FILE=${cur_path}/../configs/rank_table_8p.json
export JOB_ID=10087
RANK_ID_START=0
-
+RANK_SIZE=8
# 数据集路径,保持为空,不需要修改
data_path=""
@@ -22,7 +22,7 @@ gt_anno_path='/npu/traindata/COCO2017/annotations/instances_val2017.json'
export NPU_EXECUTE_OP_BY_ACL=false
#设置默认日志级别,不需要修改
-export ASCEND_GLOBAL_LOG_LEVEL=3
+export ASCEND_GLOBAL_LOG_LEVEL_ETP=3
#基础参数 需要模型审视修改
#网络名称,同目录名称
@@ -103,7 +103,7 @@ fi
#训练开始时间,不需要修改
start_time=$(date +%s)
-
+bind_core=1
#进入训练脚本目录,需要模型审视修改
for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
do
@@ -129,22 +129,24 @@ do
if [ "x${bind_core}" != x ];then
bind_core="taskset -c $a-$c"
fi
- ${bind_core} python3 ../../../train.py --weights='' \
- --perf=$perf \
- --model=yolov5m \
- --rank=${RANK_ID} \
- --train_worker_num=${train_worker_num} \
- --data_path=${data_path} \
- --anno_converted=${anno_converted} \
- --gt_anno_path=${gt_anno_path} \
- --batch_size=${batch_size} \
- --precision_mode=${precision_mode} \
- --stage1_epoch=${stage1_epoch} \
- --stage2_epoch=${stage2_epoch} \
- --over_dump=${over_dump} \
- --over_dump_path=${over_dump_path} \
- --data_dump_flag=${data_dump_flag} \
- --data_dump_step=${data_dump_step} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
+ #${bind_core} python3 ../../../train.py --weights='' \
+ nohup ${bind_core} python3 ../../../train.py --weights='' \
+ --perf=$perf \
+ --model=yolov5m \
+ --rank=${RANK_ID} \
+ --rank_size=${RANK_SIZE} \
+ --train_worker_num=${train_worker_num} \
+ --data_path=${data_path} \
+ --anno_converted=${anno_converted} \
+ --gt_anno_path=${gt_anno_path} \
+ --batch_size=${batch_size} \
+ --precision_mode=${precision_mode} \
+ --stage1_epoch=${stage1_epoch} \
+ --stage2_epoch=${stage2_epoch} \
+ --over_dump=${over_dump} \
+ --over_dump_path=${over_dump_path} \
+ --data_dump_flag=${data_dump_flag} \
+ --data_dump_step=${data_dump_step} > ${cur_path}/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1 &
done
wait
@@ -178,7 +180,7 @@ ActualFPS=${FPS}
TrainingTime=`awk 'BEGIN{printf "%.2f\n",('$epoch_duration'-'$first_step')/('$perf'+'$train_worker_num'-2)}'`
##获取Loss,通过train_*.log中关键字,需要根据模型审视
-grep loss $cur_path/output/0/train_0.log|awk '{print $13}' >> $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+grep loss $cur_path/output/0/train_0.log|awk '{print $13}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
#最后一个迭代loss值,不需要修改
ActualLoss=`grep total_loss: $cur_path/output/0/train_0.log | awk 'END{print $13}'`
diff --git a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/train.py b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/train.py
index 9bb4994f4..d17bc0cc8 100644
--- a/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/train.py
+++ b/TensorFlow2/built-in/cv/detection/YOLOv5_ID1719_for_TensorFlow2.X/train.py
@@ -65,6 +65,7 @@ flags.DEFINE_integer('train_worker_num', 8, 'train worker num')
flags.DEFINE_boolean('eval_only', False, 'skip train process')
flags.DEFINE_boolean('mosaic', True, 'activate mosaic data augmentation')
flags.DEFINE_integer('rank', 0, 'rank of current device')
+flags.DEFINE_integer('rank_size', 1, 'rank size of current device')
flags.DEFINE_integer('perf', 0, 'run steps for perf')
tic = 0
e_tic = 0
@@ -169,8 +170,8 @@ def main(_argv):
lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi)))
optimizer.lr.assign(lr.numpy())
ciou_loss, conf_loss, prob_loss, total_loss = train_execute(image_data, target)
- if FLAGS.rank == 0:
- print("=> STEP %4d/%4d lr: %.6f ciou_loss: %4.2f conf_loss: %4.2f prob_loss: %4.2f total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(), ciou_loss, conf_loss, prob_loss, total_loss), end='', flush=True)
+ # if FLAGS.rank == 0:
+ print("=> STEP %4d/%4d lr: %.6f ciou_loss: %4.2f conf_loss: %4.2f prob_loss: %4.2f total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(), ciou_loss, conf_loss, prob_loss, total_loss), end='', flush=True)
global_steps.assign_add(1)
if not FLAGS.eval_only:
@@ -204,13 +205,13 @@ def main(_argv):
break
image_data, target, _, _, _, _ = fetcher.process_annotations(annotations)
with mutex_sess_run:
- if FLAGS.rank == 0:
- rstart = time.time()
+ # if FLAGS.rank == 0:
+ rstart = time.time()
train_step(image_data, target)
- if FLAGS.rank == 0:
- duration = time.time() - tic
- print(' ,global_step/sec: %.2f ,duration: %.2f'%((1 / duration), duration), flush=True)
- tic = time.time()
+ # if FLAGS.rank == 0:
+ duration = time.time() - tic
+ print(' ,global_step/sec: %.2f ,duration: %.2f'%((1 / duration), duration), flush=True)
+ tic = time.time()
if FLAGS.perf and (FLAGS.perf < global_steps.numpy()):
break
threads = []
@@ -222,11 +223,11 @@ def main(_argv):
for t in threads:
t.join()
- if FLAGS.rank == 0:
- print('epoch_duration: %d'%(time.time() - e_tic), flush=True)
- e_tic = time.time()
- print('saving checkpoints', flush=True)
- checkpoint.save(checkpoint_dir+'/model.ckpt')
+ # if FLAGS.rank == 0:
+ print('epoch_duration: %d'%(time.time() - e_tic), flush=True)
+ e_tic = time.time()
+ # print('saving checkpoints', flush=True)
+ # checkpoint.save(checkpoint_dir+'/model.ckpt')
if not FLAGS.perf and FLAGS.rank == 0:
evaluator = COCOevaluator(model, testset, cfg.TRAIN.INPUT_SIZE, NUM_CLASS, FLAGS)
evaluator.evaluate()
--
Gitee