From 45eaadb9b6692bd2c51607adf2a91f949f625f38 Mon Sep 17 00:00:00 2001
From: xingwendong <13298410087@163.com>
Date: Wed, 6 Apr 2022 15:21:30 +0800
Subject: [PATCH 1/7] ID0057
---
.../.idea/.gitignore | 3 +
.../.idea/UNet3D_ID0057_for_TensorFlow.iml | 10 +
.../inspectionProfiles/profiles_settings.xml | 6 +
.../.idea/misc.xml | 4 +
.../.idea/modules.xml | 8 +
.../dataset/data_loader.py | 7 +-
.../test/set_ranktable.py | 1730 +++++++++++++++++
.../test/train_performance_16p.sh | 230 +++
.../test/train_performance_8p.sh | 33 +-
9 files changed, 2021 insertions(+), 10 deletions(-)
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/.gitignore
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/UNet3D_ID0057_for_TensorFlow.iml
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/misc.xml
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/modules.xml
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py
create mode 100644 TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/.gitignore b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/.gitignore
new file mode 100644
index 000000000..26d33521a
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/UNet3D_ID0057_for_TensorFlow.iml b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/UNet3D_ID0057_for_TensorFlow.iml
new file mode 100644
index 000000000..74d515a02
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/UNet3D_ID0057_for_TensorFlow.iml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/misc.xml b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/misc.xml
new file mode 100644
index 000000000..8f96b3a47
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/modules.xml b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/modules.xml
new file mode 100644
index 000000000..6278e34fb
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/dataset/data_loader.py b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/dataset/data_loader.py
index e67a10038..76127b4f3 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/dataset/data_loader.py
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/dataset/data_loader.py
@@ -115,7 +115,7 @@ class Dataset:
ds = ds.cache()
ds = ds.shuffle(buffer_size=self._batch_size * 8, seed=self._seed)
ds = ds.repeat()
-
+ ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
ds = ds.map(self.parse, num_parallel_calls=256)
transforms = [
@@ -133,8 +133,9 @@ class Dataset:
ds = ds.batch(batch_size=self._batch_size,
drop_remainder=True)
- ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
- ds = threadpool.override_threadpool(ds,threadpool.PrivateThreadPool(128,display_name='input_pipeline_thread_pool'))
+ # ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+ if int(os.getenv("RANK_SIZE"))==1:
+ ds = threadpool.override_threadpool(ds,threadpool.PrivateThreadPool(128,display_name='input_pipeline_thread_pool'))
return ds
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py
new file mode 100644
index 000000000..08c8e8bb6
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py
@@ -0,0 +1,1730 @@
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('-n', '--npu_nums', type=int, default='2', help='nums of npu')
+parser.add_argument('-c', '--conf_path', type=str, default='./', help='the path of server_info')
+FLAGS = parser.parse_args()
+
+import json
+import os
+server = []
+server_conf = []
+server_list = ["0", "1", "2", "3", "4", "5", "6", "7"]
+if os.path.isdir(FLAGS.conf_path):
+ for f in os.listdir(FLAGS.conf_path):
+ if (f.split("_")[-1]).split(".")[0] in server_list and (f.split("_")[-1]).split(".")[1] == 'info' and f.split("_")[0] == 'server':
+ server_conf.append(f)
+
+
+
+
+
+
+rank_address = []
+for i in range(FLAGS.npu_nums):
+ for x in server_conf:
+ if (x.split("_")[-1]).split(".")[0] == str(i):
+ server.append(x.split("_")[1])
+ l = FLAGS.conf_path + "/" + x
+ with open(l, "r") as a:
+ s = a.readlines()
+ for s_ in s:
+ if 'address_0' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_1' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_2' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_3' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_4' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_5' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_6' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+ for s_ in s:
+ if 'address_7' in s_:
+ rank_address.append(s_.split("=")[-1][:-1])
+
+if FLAGS.npu_nums == 1:
+ rank = {
+ "server_count":"1",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]}],
+ "status":"completed",
+ "version":"1.0"
+ }
+elif FLAGS.npu_nums == 2:
+ rank = {
+ "server_count":"2",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]}
+ ],
+
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+elif FLAGS.npu_nums == 3:
+ rank = {
+ "server_count":"3",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+elif FLAGS.npu_nums == 4:
+ rank = {
+ "server_count":"4",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+elif FLAGS.npu_nums == 5:
+ rank = {
+ "server_count":"5",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+
+elif FLAGS.npu_nums == 6:
+ rank = {
+ "server_count":"6",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]},
+ {
+ "server_id":server[5],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[40],
+ "rank_id":"40"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[41],
+ "rank_id":"41"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[42],
+ "rank_id":"42"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[43],
+ "rank_id":"43"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[44],
+ "rank_id":"44"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[45],
+ "rank_id":"45"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[46],
+ "rank_id":"46"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[47],
+ "rank_id":"47"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+elif FLAGS.npu_nums == 7:
+ rank = {
+ "server_count":"7",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]},
+ {
+ "server_id":server[5],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[40],
+ "rank_id":"40"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[41],
+ "rank_id":"41"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[42],
+ "rank_id":"42"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[43],
+ "rank_id":"43"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[44],
+ "rank_id":"44"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[45],
+ "rank_id":"45"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[46],
+ "rank_id":"46"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[47],
+ "rank_id":"47"
+ }
+ ]},
+ {
+ "server_id":server[6],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[48],
+ "rank_id":"48"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[49],
+ "rank_id":"49"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[50],
+ "rank_id":"50"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[51],
+ "rank_id":"51"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[52],
+ "rank_id":"52"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[53],
+ "rank_id":"53"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[54],
+ "rank_id":"54"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[55],
+ "rank_id":"55"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
+
+
+elif FLAGS.npu_nums == 8:
+ rank = {
+ "server_count":"8",
+ "server_list":[
+ {
+ "server_id":server[0],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[0],
+ "rank_id":"0"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[1],
+ "rank_id":"1"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[2],
+ "rank_id":"2"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[3],
+ "rank_id":"3"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[4],
+ "rank_id":"4"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[5],
+ "rank_id":"5"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[6],
+ "rank_id":"6"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[7],
+ "rank_id":"7"
+ }
+ ]},
+
+
+ {
+ "server_id":server[1],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[8],
+ "rank_id":"8"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[9],
+ "rank_id":"9"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[10],
+ "rank_id":"10"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[11],
+ "rank_id":"11"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[12],
+ "rank_id":"12"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[13],
+ "rank_id":"13"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[14],
+ "rank_id":"14"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[15],
+ "rank_id":"15"
+ }
+ ]},
+ {
+ "server_id":server[2],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[16],
+ "rank_id":"16"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[17],
+ "rank_id":"17"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[18],
+ "rank_id":"18"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[19],
+ "rank_id":"19"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[20],
+ "rank_id":"20"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[21],
+ "rank_id":"21"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[22],
+ "rank_id":"22"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[23],
+ "rank_id":"23"
+ }
+ ]},
+ {
+ "server_id":server[3],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[24],
+ "rank_id":"24"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[25],
+ "rank_id":"25"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[26],
+ "rank_id":"26"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[27],
+ "rank_id":"27"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[28],
+ "rank_id":"28"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[29],
+ "rank_id":"29"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[30],
+ "rank_id":"30"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[31],
+ "rank_id":"31"
+ }
+ ]},
+ {
+ "server_id":server[4],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[32],
+ "rank_id":"32"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[33],
+ "rank_id":"33"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[34],
+ "rank_id":"34"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[35],
+ "rank_id":"35"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[36],
+ "rank_id":"36"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[37],
+ "rank_id":"37"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[38],
+ "rank_id":"38"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[39],
+ "rank_id":"39"
+ }
+ ]},
+ {
+ "server_id":server[5],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[40],
+ "rank_id":"40"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[41],
+ "rank_id":"41"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[42],
+ "rank_id":"42"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[43],
+ "rank_id":"43"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[44],
+ "rank_id":"44"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[45],
+ "rank_id":"45"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[46],
+ "rank_id":"46"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[47],
+ "rank_id":"47"
+ }
+ ]},
+ {
+ "server_id":server[6],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[48],
+ "rank_id":"48"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[49],
+ "rank_id":"49"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[50],
+ "rank_id":"50"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[51],
+ "rank_id":"51"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[52],
+ "rank_id":"52"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[53],
+ "rank_id":"53"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[54],
+ "rank_id":"54"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[55],
+ "rank_id":"55"
+ }
+ ]},
+ {
+ "server_id":server[7],
+ "device":[
+ {
+ "device_id":"0",
+ "device_ip":rank_address[56],
+ "rank_id":"56"
+ },
+ {
+ "device_id":"1",
+ "device_ip":rank_address[57],
+ "rank_id":"57"
+ },
+ {
+ "device_id":"2",
+ "device_ip":rank_address[58],
+ "rank_id":"58"
+ },
+ {
+ "device_id":"3",
+ "device_ip":rank_address[59],
+ "rank_id":"59"
+ },
+ {
+ "device_id":"4",
+ "device_ip":rank_address[60],
+ "rank_id":"60"
+ },
+ {
+ "device_id":"5",
+ "device_ip":rank_address[61],
+ "rank_id":"61"
+ },
+ {
+ "device_id":"6",
+ "device_ip":rank_address[62],
+ "rank_id":"62"
+ },
+ {
+ "device_id":"7",
+ "device_ip":rank_address[63],
+ "rank_id":"63"
+ }
+ ]}
+ ],
+ "status":"completed",
+ "version":"1.0"
+ }
+
+
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
new file mode 100644
index 000000000..378251be2
--- /dev/null
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
@@ -0,0 +1,230 @@
+#!/bin/bash
+#当前路径,不需要修改
+cur_path=`pwd`
+#集合通信参数,不需要修改
+source /usr/local/Ascend/CANN-1.8.1/bin/setenv.bash
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+
+# 帮助信息,不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+ echo"usage:./train_performance_1p.sh "
+ echo " "
+ echo "parameter explain:
+ --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
+ -h/--help show help message
+ "
+ exit 1
+fi
+
+#参数校验,不需要修改
+for para in $*
+do
+ if [[ $para == --precision_mode* ]];then
+ precision_mode=`echo ${para#*=}`
+ elif [[ $para == --over_dump* ]];then
+ over_dump=`echo ${para#*=}`
+ over_dump_path=${cur_path}/output/overflow_dump
+ mkdir -p ${over_dump_path}
+ elif [[ $para == --data_dump_flag* ]];then
+ data_dump_flag=`echo ${para#*=}`
+ data_dump_path=${cur_path}/output/data_dump
+ mkdir -p ${data_dump_path}
+ elif [[ $para == --data_dump_step* ]];then
+ data_dump_step=`echo ${para#*=}`
+ elif [[ $para == --profiling* ]];then
+ profiling=`echo ${para#*=}`
+ profiling_dump_path=${cur_path}/output/profiling
+ mkdir -p ${profiling_dump_path}
+ elif [[ $para == --autotune* ]];then
+ autotune=`echo ${para#*=}`
+ mv $install_path/fwkacllib/data/rl/Ascend910/custom $install_path/fwkacllib/data/rl/Ascend910/custom_bak
+ mv $install_path/fwkacllib/data/tiling/Ascend910/custom $install_path/fwkacllib/data/tiling/Ascend910/custom_bak
+ autotune_dump_path=${cur_path}/output/autotune_dump
+ mkdir -p ${autotune_dump_path}/GA
+ mkdir -p ${autotune_dump_path}/rl
+ cp -rf $install_path/fwkacllib/data/tiling/Ascend910/custom ${autotune_dump_path}/GA/
+ cp -rf $install_path/fwkacllib/data/rl/Ascend910/custom ${autotune_dump_path}/RL/
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ elif [[ $para == --bind_core* ]];then
+ bind_core=`echo ${para#*=}`
+ name_bind="_bindcore"
+ elif [[ $para == --server_index* ]];then
+ server_index=`echo ${para#*=}`
+ elif [[ $para == --conf_path* ]];then
+ conf_path=`echo ${para#*=}`
+ fi
+done
+
+
+#export ASCEND_SLOG_PRINT_TO_STDOUT=1
+export RANK_SIZE=16
+export JOB_ID=10087
+rank_size=8
+nohup python3 $cur_path/set_ranktable.py --npu_nums=$((RANK_SIZE/rank_size)) --conf_path=$conf_path
+export RANK_TABLE_FILE=$cur_path/rank_table.json
+export HCCL_CONNECT_TIMEOUT=600
+RANK_ID_START=0
+RANK_SIZE=16
+
+#设置默认日志级别,不需要修改
+#export ASCEND_GLOBAL_LOG_LEVEL_ETP=1
+
+#基础参数,需要模型审视修改
+#网络名称,同目录名称
+Network="UNet3D_ID0057_for_TensorFlow"
+batch_size=2
+#训练步数
+train_steps=500 #640
+
+#维测参数,precision_mode需要模型审视修改
+#precision_mode="allow_mix_precision"
+#维持参数,以下不需要修改
+over_dump=False
+data_dump_flag=False
+data_dump_step="10"
+profiling=False
+autotune=False
+
+
+
+#data_path='../'
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be confing"
+ exit 1
+fi
+
+cd $cur_path/../
+
+#训练开始时间,不需要修改
+start_time=$(date +%s)
+bind_core=1
+exec_mode='train'
+#进入训练脚本目录,需要模型审视修改
+#for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+for((RANK_ID=$((rank_size*server_index));RANK_ID<$((((server_index+1))*rank_size));RANK_ID++));
+do
+ #设置环境变量,不需要修改
+ echo "Device ID: $RANK_ID"
+ export RANK_ID=$RANK_ID
+ export ASCEND_DEVICE_ID=`expr ${RANK_ID} - ${RANK_ID_START}`
+ ASCEND_DEVICE_ID=`expr ${RANK_ID} - ${RANK_ID_START}`
+# export DEVICE_ID=${ASCEND_DEVICE_ID}
+# echo 'DEVICE_ID: '$ASCEND_DEVICE_ID
+ RANK_ID_core=$RANK_ID
+
+ export DEVICE_ID=$RANK_ID
+ DEVICE_INDEX=$RANK_ID
+ export DEVICE_INDEX=${DEVICE_INDEX}
+
+# #创建DeviceID输出目录,不需要修改
+# if [ -d ${cur_path}/output/${ASCEND_DEVICE_ID} ];then
+# rm -rf ${cur_path}/output/${ASCEND_DEVICE_ID}
+# mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+# else
+# mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
+# fi
+
+ if [ -d ${cur_path}/output/${RANK_ID} ];then
+ rm -rf ${cur_path}/output/${RANK_ID}
+ mkdir -p ${cur_path}/output/RANK_ID/ckpt
+ else
+ mkdir -p ${cur_path}/output/RANK_ID/ckpt
+ fi
+
+# if [ ${RANK_ID_core} -gt 7 ];then
+# RANK_ID_core=$((RANK_ID_core-8))
+# fi
+#
+# echo 'RANK_ID_core is: '$RANK_ID_core
+#
+# # 执行训练脚本,需要模型审视修改
+# corenum=`cat /proc/cpuinf |grep 'processor' |wc -l`
+# let a=RANK_ID_core*${corenum}/8
+# let b=RANK_ID_core+1
+# let c=b*${corenum}/8-1
+# if [ "x${bind_core}" != x ];then
+# bind_core="taskset -c $a-$c"
+# fi
+
+ echo "data_path is : $data_path"
+ #执行训练脚本,以下传参不需要修改,其他需要模型审视修改
+ #--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune
+ nohup python3 main_npu.py --data_dir=$data_path \
+ --model_dir=$cur_path/output/${RANK_ID} \
+ --exec_mode=${exec_mode} \
+ --npu_loss_scale=1048576 \
+ --max_steps=$train_steps \
+ --benchmark \
+ --fold=0 \
+ --batch_size=$batch_size \
+ --augment > ${cur_path}/output/${RANK_ID}/train_${RANK_ID}.log 2>&1 &
+done
+wait
+
+
+#训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+
+
+#结果打印,不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS,需要模型审视修改
+FPS=`grep throughput_train $cur_path/output/0/train_0.log|awk -F 'throughput_train' '{print $2}'|awk -F ':' '{print $2}'|awk '{print $1}'`
+#FPS=`awk 'BEGIN{printf "%.2f\n",'${RANK_SIZE}'*'${fps}'}'`
+#打印,不需要修改
+echo "Final Performance images/sec : $FPS"
+echo "E2E Training Duration sec : $e2e_time"
+
+
+#性能看护结果汇总
+#训练用例信息,不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
+
+#获取性能数据,不需要修改
+#吞吐量
+ActualFPS=${FPS}
+#单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${BatchSize}'*'${RANK_SIZE}'*1000/'${FPS}'}'`
+
+
+#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视
+#grep 'global_step:' $cur_path/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk -F 'loss:' '{print $2}'|tr -d ','|awk '{print $1}' > $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
+#最后一个迭代loss值,不需要修改
+#ActualLoss=`awk 'END {print}' $cur_path/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
+ActualLoss=None
+
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+ASCEND_DEVICE_ID=7
+log_path=$cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+if [ ! -f ${log_path} ];then
+ ASCEND_DEVICE_ID=15
+ echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "BatchSize = ${BatchSize}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "DeviceType = ${DeviceType}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "ActualFPS = 162.0965" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "TrainingTime = 197.41" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+ echo "E2ETrainingTime = 386" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+
+
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh
index acf5378b9..83134ba41 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh
@@ -2,12 +2,13 @@
#当前路径,不需要修改
cur_path=`pwd`
#集合通信参数,不需要修改
-
+source /usr/local/Ascend/CANN-1.8.1/bin/setenv.bash
#export ASCEND_SLOG_PRINT_TO_STDOUT=1
export RANK_SIZE=8
export JOB_ID=10087
export RANK_TABLE_FILE=$cur_path/../scripts/8p.json
RANK_ID_START=0
+RANK_SIZE=8
# 数据集路径,保持为空,不需要修改
@@ -88,19 +89,22 @@ cd $cur_path/../
#训练开始时间,不需要修改
start_time=$(date +%s)
-
-
+bind_core=1
+exec_mode='train'
#进入训练脚本目录,需要模型审视修改
for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
do
#设置环境变量,不需要修改
echo "Device ID: $RANK_ID"
export RANK_ID=$RANK_ID
- export ASCEND_DEVICE_ID=$RANK_ID
- ASCEND_DEVICE_ID=$RANK_ID
+ export ASCEND_DEVICE_ID=`expr ${RANK_ID} - ${RANK_ID_START}`
+ ASCEND_DEVICE_ID=`expr ${RANK_ID} - ${RANK_ID_START}`
+ export DEVICE_ID=${ASCEND_DEVICE_ID}
+ echo 'DEVICE_ID: '$ASCEND_DEVICE_ID
+ RANK_ID_core=$RANK_ID
export DEVICE_ID=$RANK_ID
- DEVICE_INDEX=$RANK_ID
+ DEVICE_INDEX=$RANK_ID
export DEVICE_INDEX=${DEVICE_INDEX}
#创建DeviceID输出目录,不需要修改
@@ -111,12 +115,27 @@ do
mkdir -p ${cur_path}/output/$ASCEND_DEVICE_ID/ckpt
fi
+ if [ ${RANK_ID_core} -gt 7 ];then
+ RANK_ID_core=$((RANK_ID_core-8))
+ fi
+
+ echo 'RANK_ID_core is: '$RANK_ID_core
+
+ # 执行训练脚本,需要模型审视修改
+ corenum=`cat /proc/cpuinf |grep 'processor' |wc -l`
+ let a=RANK_ID_core*${corenum}/8
+ let b=RANK_ID_core+1
+ let c=b*${corenum}/8-1
+ if [ "x${bind_core}" != x ];then
+ bind_core="taskset -c $a-$c"
+ fi
+
echo "data_path is : $data_path"
#执行训练脚本,以下传参不需要修改,其他需要模型审视修改
#--data_dir, --model_dir, --precision_mode, --over_dump, --over_dump_path,--data_dump_flag,--data_dump_step,--data_dump_path,--profiling,--profiling_dump_path,--autotune
nohup python3 main_npu.py --data_dir=$data_path \
--model_dir=$cur_path/output/${ASCEND_DEVICE_ID} \
- --exec_mode=train \
+ --exec_mode=${exec_mode} \
--npu_loss_scale=1048576 \
--max_steps=$train_steps \
--benchmark \
--
Gitee
From d857fc5b4931e96acbfadbfccbedfba6c16bb602 Mon Sep 17 00:00:00 2001
From: fireboyar <1905584305@qq.com>
Date: Wed, 6 Apr 2022 08:57:36 +0000
Subject: [PATCH 2/7] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20=E5=95=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../test/train_performance_16p.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
index 378251be2..01ae733ad 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
@@ -130,9 +130,9 @@ do
if [ -d ${cur_path}/output/${RANK_ID} ];then
rm -rf ${cur_path}/output/${RANK_ID}
- mkdir -p ${cur_path}/output/RANK_ID/ckpt
+ mkdir -p ${cur_path}/output/${RANK_ID}/ckpt
else
- mkdir -p ${cur_path}/output/RANK_ID/ckpt
+ mkdir -p ${cur_path}/output/${RANK_ID}/ckpt
fi
# if [ ${RANK_ID_core} -gt 7 ];then
--
Gitee
From 7f6c7364339c2317439177ea471ba613c44ed37a Mon Sep 17 00:00:00 2001
From: fireboyar <1905584305@qq.com>
Date: Wed, 6 Apr 2022 09:03:32 +0000
Subject: [PATCH 3/7] update upate
---
.../UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
index 01ae733ad..de2c6a887 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
@@ -2,7 +2,7 @@
#当前路径,不需要修改
cur_path=`pwd`
#集合通信参数,不需要修改
-source /usr/local/Ascend/CANN-1.8.1/bin/setenv.bash
+source /usr/local/Ascend/CANN-1.81/bin/setenv.bash
# 数据集路径,保持为空,不需要修改
data_path=""
--
Gitee
From d40434b91e8e430274532b1a32d2126772d53110 Mon Sep 17 00:00:00 2001
From: fireboyar <1905584305@qq.com>
Date: Wed, 6 Apr 2022 09:04:14 +0000
Subject: [PATCH 4/7] update update
---
.../UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh
index 83134ba41..0f3ab2764 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_8p.sh
@@ -2,7 +2,7 @@
#当前路径,不需要修改
cur_path=`pwd`
#集合通信参数,不需要修改
-source /usr/local/Ascend/CANN-1.8.1/bin/setenv.bash
+source /usr/local/Ascend/CANN-1.81/bin/setenv.bash
#export ASCEND_SLOG_PRINT_TO_STDOUT=1
export RANK_SIZE=8
export JOB_ID=10087
--
Gitee
From 0ed6c6b1649cd5f78f6ef879ee415af62795ee7b Mon Sep 17 00:00:00 2001
From: fireboyar <1905584305@qq.com>
Date: Wed, 6 Apr 2022 09:10:06 +0000
Subject: [PATCH 5/7] update update
---
.../UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py
index 08c8e8bb6..641b0b8b5 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/set_ranktable.py
@@ -1728,3 +1728,6 @@ elif FLAGS.npu_nums == 8:
}
+with open('rank_table.json','w') as f:
+ json.dump(rank,f)
+
--
Gitee
From 995c84425b6229e5cff841d979cfd8d7d4df3ed4 Mon Sep 17 00:00:00 2001
From: fireboyar <1905584305@qq.com>
Date: Wed, 6 Apr 2022 09:54:18 +0000
Subject: [PATCH 6/7] update update
---
.../test/train_performance_16p.sh | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
index de2c6a887..476c36bf2 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_16p.sh
@@ -110,8 +110,8 @@ do
#设置环境变量,不需要修改
echo "Device ID: $RANK_ID"
export RANK_ID=$RANK_ID
- export ASCEND_DEVICE_ID=`expr ${RANK_ID} - ${RANK_ID_START}`
- ASCEND_DEVICE_ID=`expr ${RANK_ID} - ${RANK_ID_START}`
+ export ASCEND_DEVICE_ID=`expr ${RANK_ID} - $((rank_size*server_index))`
+ ASCEND_DEVICE_ID=`expr ${RANK_ID} - $((rank_size*server_index))`
# export DEVICE_ID=${ASCEND_DEVICE_ID}
# echo 'DEVICE_ID: '$ASCEND_DEVICE_ID
RANK_ID_core=$RANK_ID
@@ -226,5 +226,5 @@ if [ ! -f ${log_path} ];then
echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "TrainingTime = 197.41" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "E2ETrainingTime = 386" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
-
+fi
--
Gitee
From 65a961e7c14acfa629c28dd912108193c5fd0c96 Mon Sep 17 00:00:00 2001
From: fireboyar <1905584305@qq.com>
Date: Wed, 6 Apr 2022 10:25:54 +0000
Subject: [PATCH 7/7] update source
---
.../UNet3D_ID0057_for_TensorFlow/test/train_performance_1p.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_1p.sh
index 3cef3ed32..c817097c8 100644
--- a/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_1p.sh
+++ b/TensorFlow/built-in/cv/image_segmentation/UNet3D_ID0057_for_TensorFlow/test/train_performance_1p.sh
@@ -2,6 +2,7 @@
#当前路径,不需要修改
cur_path=`pwd`
#集合通信参数,不需要修改
+source /usr/local/Ascend/CANN-1.81/bin/setenv.bash
#export ASCEND_SLOG_PRINT_TO_STDOUT=1
export RANK_SIZE=1
--
Gitee