diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/README.md b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/README.md
index 7a20326fde0ec59d9d0c8ede43dd5546120420a1..6487b607580c69c94b4fc7f5236a6da09092b4ec 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/README.md
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/README.md
@@ -48,8 +48,13 @@
│ ├── test.sh ----推理
│ ├── validate_gpu.sh ----gpu(v100)上验证模型精度
│ └── validate_npu.sh ----npu(modelarts)上验证模型精度
-│ ├── boot_modelarts.py
-│ ├── help_modelarts.py
+│ ├── test ----新模板(训练入口)
+│ ├── train_full_1p.sh ----gpu(v100)上验证模型精度
+│ └── train_performance_1p.sh ----npu(modelarts)上验证模型精度
+│ ├── boot_modelarts.py(已过期,可用于旧版训练)
+│ ├── help_modelarts.py(已过期,可用于旧版训练)
+│ ├── modelarts_entry_acc.py ----训练启动文件
+│ ├── modelarts_entry_perf.py ----性能测试启动文件
│ ├── test_bsrn.py ----测试模型
│ ├── train.py ----训练模型
│ ├── output.txt ----训练输出(gpu训练自动生成)
@@ -90,12 +95,8 @@ nohup bash scripts/run_gpu.sh > output.txt 2>&1 &
```
- 训练之前需修改`boot_modelarts.py`中第77行代码为bash_header = os.path.join(code_dir, 'scripts/run_gpu.sh')
-### GPU离线推理 【在线推理待完善】
-命令行切换路径到`tf-bsrn-sr/`,执行以下命令,详细的参数设置请参考脚本中的注释
-```shell
-bash scripts/test.sh
-```
+
### GPU评估
命令行切换路径到`tf-bsrn-sr/`,执行以下命令,详细的参数设置请参考脚本中的注释
@@ -112,17 +113,19 @@ ModelArts的使用请参考[模型开发向导_昇腾CANN社区版(5.0.2.alpha00
配置方式请参考:
+> 旧版,已废弃
+
+> 下图是使用新模板后的更新配置图:
+
+
+
(修改`boot_modelarts.py`中第77行代码bash_header = os.path.join(code_dir, 'scripts/run_npu.sh'),可以设置在NPU上跑还是在GPU上跑)
### 指标对比
均使用相同的训练集以及测试集,训练参数都相同。
-NPU Checkpoints: ([百度云链接,提取码:xxxx]()) 【链接待完善】
-
-GPU Checkpoints: ([百度云链接,提取码:xxxx]()) 【链接待完善】
-
作者论文中提供的各项指标值为:
| | PSNR | SSIM |
@@ -131,8 +134,8 @@ GPU Checkpoints: ([百度云链接,提取码:xxxx]()) 【
**(PSNR, SSIMscores for scale x4 on BSD100 dataset.)**
+##### *×*4-scale BSRN model
-##### *×*4-scale BSRN model 【bsrn gpu, npu指标 待完善】
metrics |
@@ -148,16 +151,15 @@ GPU Checkpoints: ([百度云链接,提取码:xxxx]()) 【
BSRN |
- |
- |
- |
- |
+ 26.444 |
+ 27.387 |
+ 0.680 |
+ 0.702 |
-
### 性能对比
展示bsrn模型在DIV2K 数据集上的训练性能
@@ -168,8 +170,14 @@ GPU Checkpoints: ([百度云链接,提取码:xxxx]()) 【
NPU性能log截图
+> 旧版训练截图
+
+> 下图为使用新模板(1.15)后的训练截图
+
+
+
GPU性能log截图
@@ -179,27 +187,3 @@ GPU Checkpoints: ([百度云链接,提取码:xxxx]()) 【
| NPU | 8 | 0.739 |
| GPU V100 | 8 | 0.828 |
-#### 推理性能 【待完善】
-
-NPU性能log截图
-
-GPU性能log截图
-
-
-
-| 平台 | BatchSize | 训练性能(fps) |
-| :------: | :-------: | :-----------: |
-| NPU | | |
-| GPU V100 | | |
-
-#### 性能调优 【待完善】
-
-##### NPU AutoTune性能
-
-训练时开启AutoTune:
-
-npu训练性能(命令行截图)
-
-| 平台 | BatchSize | 训练性能(fps) |
-| :--: | :-------: | :-----------: |
-| NPU | | |
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/boot_modelarts.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/boot_modelarts.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/boot_modelarts.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/boot_modelarts.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/__init__.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__init__.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/__init__.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__init__.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/__init__.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f99b593115bb8fcca17250998a71d1b3285b87cf
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/base_loader.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/base_loader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4186db84b08a1f8bc1d4396ce4b3d20bdc7d939b
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/base_loader.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/basic_loader.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/basic_loader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41caff3a12fda12d1377aebc83b37177e4271303
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/basic_loader.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/div2k_loader.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/div2k_loader.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7558655bba756c81eca3d0302b8a1fbcf296e583
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/__pycache__/div2k_loader.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/base_loader.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/base_loader.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/base_loader.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/base_loader.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/basic_loader.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/basic_loader.py
similarity index 97%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/basic_loader.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/basic_loader.py
index bd8e58c2bf40888d3018b9b7f2b493c949b159b4..1da40d23fe5299c2447eeacb70da8d04b3b9d051 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/basic_loader.py
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/basic_loader.py
@@ -182,8 +182,9 @@ class BasicLoader(BaseLoader):
has_cached = True
if (image is None):
- image_name_truth = image_name.split("x4")[0]+".png" #add
- image_path = os.path.join(FLAGS.data_truth_path, image_name_truth) #add
+ # image_name_truth = image_name.split("x4")[0]+".png" #add
+ # image_path = os.path.join(FLAGS.data_truth_path, image_name_truth) #add
+ image_path = os.path.join(FLAGS.data_truth_path, image_name)
image = self.tf_image_session.run(self.tf_image, feed_dict={self.tf_image_path:image_path})
if (FLAGS.data_cached and (not has_cached)):
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/div2k_loader.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/div2k_loader.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/dataloaders/div2k_loader.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/dataloaders/div2k_loader.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/help_modelarts.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/help_modelarts.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/help_modelarts.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/help_modelarts.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelarts_entry_acc.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelarts_entry_acc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1245d57140da14a640cd6dd7d9d43100b8617b67
--- /dev/null
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelarts_entry_acc.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import sys
+
+# 解析输入参数data_url
+parser = argparse.ArgumentParser()
+parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")
+parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")
+config = parser.parse_args()
+
+print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0]))
+code_dir = sys.path[0]
+os.chdir(code_dir)
+print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd()))
+
+print("[CANN-Modelzoo] before train - list my run files:")
+os.system("ls -al /usr/local/Ascend/ascend-toolkit/")
+
+print("[CANN-Modelzoo] before train - list my dataset files:")
+os.system("ls -al %s" % config.data_url)
+
+print("[CANN-Modelzoo] start run train shell")
+# 设置sh文件格式为linux可执行
+os.system("dos2unix ./test/*")
+
+# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定
+# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS
+os.system("bash ./test/train_full_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url))
+
+print("[CANN-Modelzoo] finish run train shell")
+
+# 将当前执行目录所有文件拷贝到obs的output进行备份
+print("[CANN-Modelzoo] after train - list my output files:")
+os.system("cp -r %s %s " % (code_dir, config.train_url))
+os.system("ls -al %s" % config.train_url)
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelarts_entry_perf.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelarts_entry_perf.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2d23455d4cdec2d46fc273177a247905c751b73
--- /dev/null
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelarts_entry_perf.py
@@ -0,0 +1,63 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import sys
+
+# 解析输入参数data_url
+parser = argparse.ArgumentParser()
+parser.add_argument("--data_url", type=str, default="/home/ma-user/modelarts/inputs/data_url_0")
+parser.add_argument("--train_url", type=str, default="/home/ma-user/modelarts/outputs/train_url_0/")
+config = parser.parse_args()
+
+print("[CANN-Modelzoo] code_dir path is [%s]" % (sys.path[0]))
+code_dir = sys.path[0]
+os.chdir(code_dir)
+print("[CANN-Modelzoo] work_dir path is [%s]" % (os.getcwd()))
+
+print("[CANN-Modelzoo] before train - list my run files:")
+os.system("ls -al /usr/local/Ascend/ascend-toolkit/")
+
+print("[CANN-Modelzoo] before train - list my dataset files:")
+os.system("ls -al %s" % config.data_url)
+
+print("[CANN-Modelzoo] start run train shell")
+# 设置sh文件格式为linux可执行
+os.system("dos2unix ./test/*")
+
+# 执行train_full_1p.sh或者train_performance_1p.sh,需要用户自己指定
+# full和performance的差异,performance只需要执行很少的step,控制在15分钟以内,主要关注性能FPS
+os.system("bash ./test/train_performance_1p.sh --data_path=%s --output_path=%s " % (config.data_url, config.train_url))
+
+print("[CANN-Modelzoo] finish run train shell")
+
+# 将当前执行目录所有文件拷贝到obs的output进行备份
+print("[CANN-Modelzoo] after train - list my output files:")
+os.system("cp -r %s %s " % (code_dir, config.train_url))
+os.system("ls -al %s" % config.train_url)
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/models/__init__.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__init__.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/models/__init__.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__init__.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/__init__.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..742959f176e0da72d53c0289e47c479742509df6
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/__init__.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/base_model.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/base_model.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6309b9ba9523fdcc3a1fa7b1c3d714ec0bc0688a
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/base_model.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/bsrn.cpython-37.pyc b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/bsrn.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16dcf3a95205e81362f636ad49ba243b3c278853
Binary files /dev/null and b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/__pycache__/bsrn.cpython-37.pyc differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/models/base_model.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/base_model.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/models/base_model.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/base_model.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/models/bsrn.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/bsrn.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/models/bsrn.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/models/bsrn.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelzoo_level.txt b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelzoo_level.txt
index 7074c830f89620c714ac12871d0a6bb82c26344d..1a8f8652695550373b522c5012f226f703369be1 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelzoo_level.txt
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/modelzoo_level.txt
@@ -1,5 +1,5 @@
FuncStatus:OK
-PrecisionStatus:POK
-PerfStatus:POK
+PrecisionStatus:OK
+PerfStatus:OK
GPUStatus:OK
-NPUMigrationStatus:POK
\ No newline at end of file
+NPUMigrationStatus:OK
\ No newline at end of file
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/run_gpu.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/run_gpu.sh
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/run_gpu.sh
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/run_gpu.sh
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/run_npu.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/run_npu.sh
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/run_npu.sh
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/run_npu.sh
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/run_npu_restore.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/run_npu_restore.sh
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/run_npu_restore.sh
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/run_npu_restore.sh
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/test.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/test.sh
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/test.sh
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/test.sh
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/validate_gpu.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/validate_gpu.sh
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/validate_gpu.sh
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/validate_gpu.sh
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/validate_npu.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/validate_npu.sh
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/scripts/validate_npu.sh
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/scripts/validate_npu.sh
diff --git "a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/statics/GPU\346\200\247\350\203\275.jpg" "b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/statics/GPU\346\200\247\350\203\275.jpg"
new file mode 100644
index 0000000000000000000000000000000000000000..772ea2b971585daa23a5831062d8b4783e86dee6
Binary files /dev/null and "b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/statics/GPU\346\200\247\350\203\275.jpg" differ
diff --git "a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/statics/NPU\346\200\247\350\203\275.jpg" "b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/statics/NPU\346\200\247\350\203\275.jpg"
new file mode 100644
index 0000000000000000000000000000000000000000..a526cbe98a7d00dce89931642cb9829e0c963e58
Binary files /dev/null and "b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/statics/NPU\346\200\247\350\203\275.jpg" differ
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_full_1p.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_full_1p.sh
index 5475b928f201b8960c2c099cd4c01af169b433c8..838e8c376a12b5835effef47737f95eb6c6e60b9 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_full_1p.sh
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_full_1p.sh
@@ -1,173 +1,223 @@
#!/bin/bash
-#当前路径,不需要修改
-cur_path=`pwd`/../
+##########################################################
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+##########################################################
+# shell脚本所在路径
+cur_path=`echo $(cd $(dirname $0);pwd)`
-#集合通信参数,不需要修改
+# 判断当前shell是否是performance
+perf_flag=`echo $0 | grep performance | wc -l`
+
+# 当前执行网络的名称
+Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'`
export RANK_SIZE=1
+export RANK_ID=0
export JOB_ID=10087
-RANK_ID_START=0
-
-
-# 数据集路径,保持为空,不需要修改
-data_path=''
-#预训练模型地址
-ckpt_path=''
-
-#设置默认日志级别,不需要改
-#export ASCEND_GLOBAL_LOG_LEVEL=3
-#export ASCEND_DEVICE_ID=4
-
-#基础参数,需要模型审视修改
-#网络名称,同目录名称
-Network="BSRN_ID1296_for_TensorFlow"
-#训练epoch
-epochs=1
-#训练batch_size
-batch_size=8
-
-#TF2.X独有,需要模型审视修改
-export NPU_LOOP_SIZE=${train_steps}
-
-#维测参数,precision_mode需要模型审视修改
-precision_mode="allow_mix_precision"
-#维持参数,以下不需要修改
-over_dump=False
-data_dump_flag=False
-data_dump_step="10"
-profiling=False
+# 路径参数初始化
+data_path=""
+output_path=""
# 帮助信息,不需要修改
if [[ $1 == --help || $1 == -h ]];then
echo"usage:./train_performance_1P.sh "
echo " "
echo "parameter explain:
- --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
- --over_dump if or not over detection, default is False
- --data_dump_flag data dump flag, default is False
- --data_dump_step data dump step, default is 10
- --profiling if or not profiling for performance debug, default is False
- --data_path source data of training
- --ckpt_path model
- -h/--help show help message
+ --data_path # dataset of training
+ --output_path # output of training
+ --train_steps # max_step for training
+ --train_epochs # max_epoch for training
+ --batch_size # batch size
+ -h/--help show help message
"
exit 1
fi
-#参数校验,不需要修改
+# 参数校验,不需要修改
for para in $*
do
- if [[ $para == --precision_mode* ]];then
- precision_mode=`echo ${para#*=}`
- elif [[ $para == --over_dump* ]];then
- over_dump=`echo ${para#*=}`
- over_dump_path=${cur_path}/test/output/overflow_dump
- mkdir -p ${over_dump_path}
- elif [[ $para == --data_dump_flag* ]];then
- data_dump_flag=`echo ${para#*=}`
- data_dump_path=${cur_path}/test/output/data_dump
- mkdir -p ${data_dump_path}
- elif [[ $para == --data_dump_step* ]];then
- data_dump_step=`echo ${para#*=}`
- elif [[ $para == --profiling* ]];then
- profiling=`echo ${para#*=}`
- profiling_dump_path=${cur_path}/test/output/profiling
- mkdir -p ${profiling_dump_path}
- elif [[ $para == --data_path* ]];then
+ if [[ $para == --data_path* ]];then
data_path=`echo ${para#*=}`
- elif [[ $para == --ckpt_path* ]];then
- ckpt_path=`echo ${para#*=}`
- fi
+ elif [[ $para == --output_path* ]];then
+ output_path=`echo ${para#*=}`
+ elif [[ $para == --train_steps* ]];then
+ train_steps=`echo ${para#*=}`
+ elif [[ $para == --train_epochs* ]];then
+ train_epochs=`echo ${para#*=}`
+ elif [[ $para == --batch_size* ]];then
+ batch_size=`echo ${para#*=}`
+ fi
done
-# #校验是否传入data_path,不需要修改
-# if [[$data_path == ""]];then
-# echo "[Error] para \"data_path\" must be confing"
-# exit 1
-# fi
-#训练开始时间,不需要修改
-start_time=$(date +%s)
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be config"
+ exit 1
+fi
-#进入训练脚本目录,需要模型审视修改
-cd $cur_path/tf-bsrn-sr
+# 校验是否传入output_path,不需要修改
+if [[ $output_path == "" ]];then
+ output_path="./test/output/${ASCEND_DEVICE_ID}"
+fi
-#创建DeviceID输出目录,不需要修改
-if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then
- rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID}
- mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt
-else
- mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt
+# 设置打屏日志文件名,请保留,文件名为${print_log}
+print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log"
+modelarts_flag=${MODELARTS_MODEL_PATH}
+if [ x"${modelarts_flag}" != x ];
+then
+ echo "running without etp..."
+ print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank`
+ print_log="/home/ma-user/modelarts/log/${print_log_name}"
fi
+echo "### get your log here : ${print_log}"
+
+CaseName=""
+function get_casename()
+{
+ if [ x"${perf_flag}" = x1 ];
+ then
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+ else
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc'
+ fi
+}
+
+# 跳转到code目录
+cd ${cur_path}/../
+rm -rf ./test/output/${ASCEND_DEVICE_ID}
+mkdir -p ./test/output/${ASCEND_DEVICE_ID}
+
+# 训练开始时间记录,不需要修改
+start_time=$(date +%s)
+##########################################################
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+##########################################################
+
+#=========================================================
+#=========================================================
+#========训练执行命令,需要根据您的网络进行修改==============
+#=========================================================
+#=========================================================
+# 基础参数,需要模型审视修改
+# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取
+# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取
+# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值
+batch_size=8
-#执行训练脚本,以下传参不需要修改,其他需要模型审视修改
-python3 train.py \
- --data_input_path=${data_path}/dataset/DIV2K/DIV2K_train_LR_bicubic\
- --data_truth_path=${data_path}/dataset/DIV2K/DIV2K_train_HR \
- --train_path='./train' \
+#if [ x"${modelarts_flag}" != x ];
+#then
+# echo ${data_path}
+# ls ${data_path}
+# relative_path_LR="DIV2K/DIV2K_train_LR_bicubic"
+# relative_path_HR="DIV2K/DIV2K_train_HR"
+#
+# python3.7 ./train.py \
+# --data_input_path=${data_path}${relative_path_LR} --data_truth_path=${data_path}${relative_path_HR} --train_path=${output_path} \
+# --chip='npu' \
+# --model='bsrn' \
+# --dataloader='div2k_loader' \
+# --batch_size=8 \
+# --max_steps=100000 \
+# --save_freq=1000 \
+# --scales='4'
+#else
+relative_path_LR="/dataset/DIV2K/DIV2K_train_LR_bicubic"
+relative_path_HR="/dataset/DIV2K/DIV2K_train_HR"
+python3.7 ./train.py \
+ --data_input_path=${data_path}${relative_path_LR}\
+ --data_truth_path=${data_path}${relative_path_HR} \
+ --train_path=./checkpoints \
--chip='npu' \
--model='bsrn' \
--dataloader='div2k_loader' \
--batch_size=8 \
- --scales='4' \
- --max_steps=100000 \
- --save_freq=10000 > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
-wait
-python3 validate_bsrn.py \
- --dataloader=basic_loader \
- --data_input_path=${data_path}/dataset/BSD100/LR_bicubic \
- --data_truth_path=${data_path}/dataset/BSD100/original \
- --restore_path=./train/model.ckpt-100000 \
- --model=bsrn \
- --scales=4 \
- --save_path=./result/result-pictures \
- --chip='npu' > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/test_${ASCEND_DEVICE_ID}.log 2>&1
-#训练结束时间,不需要修改
+ --max_steps=300000\
+ --save_freq=10 \
+ --scales='4' 1>${print_log} 2>&1
+
+relative_path_LR="/dataset/BSD100/LR"
+relative_path_HR="/dataset/BSD100/SR"
+# after training, load the model to check the performance
+relative_path_checkpoint='model.ckpt-300000'
+
+python3.7 ./validate_bsrn.py \
+ --dataloader=basic_loader \
+ --data_input_path=${data_path}${relative_path_LR} --data_truth_path=${data_path}${relative_path_HR} \
+ --restore_path=./checkpoints/${relative_path_checkpoint} \
+ --model=bsrn \
+ --scales=4 \
+ --save_path=./result-pictures 1>>${print_log} 2>&1
+#fi
+cat ${print_log}
+# 性能相关数据计算
+StepTime=`grep "sec/batch" ${print_log} | tail -n 20 | awk '{print $(NF-2)}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'`
+# 精度相关数据计算
+PSNR=`grep "Final PSNR" ${print_log} | awk '{print $NF}'`
+SSIM=`grep "Final SSIM" ${print_log} | awk '{print $NF}'`
+# 提取所有loss打印信息
+grep "loss" ${print_log} | awk -F ":" '{print $4}'| grep "loss" |awk -F "," '{print $3}'|awk '{print $2}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt
+
+
+###########################################################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+###########################################################
+
+# 判断本次执行是否正确使用Ascend NPU
+use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l`
+if [ x"${use_npu_flag}" == x0 ];
+then
+ echo "------------------ ERROR NOTICE START ------------------"
+ echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration."
+ echo "------------------ ERROR NOTICE END------------------"
+else
+ echo "------------------ INFO NOTICE START------------------"
+ echo "INFO, your task have used Ascend NPU, please check your result."
+ echo "------------------ INFO NOTICE END------------------"
+fi
+
+# 获取最终的casename,请保留,case文件名为${CaseName}
+get_casename
+
+# 重命名loss文件
+if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ];
+then
+ mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt
+fi
+
+# 训练端到端耗时
end_time=$(date +%s)
e2e_time=$(( $end_time - $start_time ))
-#结果打印,不需要修改
echo "------------------ Final result ------------------"
-#输出性能FPS,需要模型审视修改
-TrainingTime=`grep 'fps' $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $12}'`
-FPS=`grep 'fps' $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $14}'`
-#打印,不需要修改
-echo "Final Performance TrainingTime : $TrainingTime"
+# 输出性能FPS/单step耗时/端到端耗时
echo "Final Performance images/sec : $FPS"
-
-#输出训练精度,需要模型审视修改
-train_accuracy=`grep -A1 RMSE: $cur_path/test/output/${ASCEND_DEVICE_ID}/test_${ASCEND_DEVICE_ID}.log|grep -v RMSE:|awk '{print $NF}'`
-
-#打印,不需要修改
-echo "Final Train Accuracy : ${train_accuracy}"
+echo "Final Performance sec/step : $StepTime"
echo "E2E Training Duration sec : $e2e_time"
-#性能看护结果汇总
-#训练用例信息,不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
-
-##获取性能数据,不需要修改
-#吞吐量
-ActualFPS=${FPS}
-#单迭代训练时长
-#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${FPS}'/69}'`
-
-#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视
-grep 'loss' $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $10}' >> $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-#最后一个迭代loss值,不需要修改
-ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-#关键信息打印到${CaseName}.log中,不需修改
-echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
+# 输出训练精度
+echo "Final Train Accuracy : ${PSNR}"
+echo "Final SSIM : ${SSIM}"
+
+# 最后一个迭代loss值,不需要修改
+ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`)
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
echo "TrainAccuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_performance_1p.sh b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_performance_1p.sh
index f035bc5dd7d17cd0fc8ae05655c802b8179fa462..d8e210b0fcb0fc7f77767ea5b77efaeff8cc6089 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_performance_1p.sh
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test/train_performance_1p.sh
@@ -1,165 +1,210 @@
#!/bin/bash
-#当前路径,不需要修改
-cur_path=`pwd`/../
+##########################################################
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+##########################################################
+# shell脚本所在路径
+cur_path=`echo $(cd $(dirname $0);pwd)`
-#集合通信参数,不需要修改
+# 判断当前shell是否是performance
+perf_flag=`echo $0 | grep performance | wc -l`
+
+# 当前执行网络的名称
+Network=`echo $(cd $(dirname $0);pwd) | awk -F"/" '{print $(NF-1)}'`
export RANK_SIZE=1
+export RANK_ID=0
export JOB_ID=10087
-RANK_ID_START=0
-
-
-# 数据集路径,保持为空,不需要修改
-data_path=''
-#预训练模型地址
-ckpt_path=''
-
-#设置默认日志级别,不需要改
-#export ASCEND_GLOBAL_LOG_LEVEL=3
-#export ASCEND_DEVICE_ID=4
-
-#基础参数,需要模型审视修改
-#网络名称,同目录名称
-Network="BSRN_ID1296_for_TensorFlow"
-#训练epoch
-epochs=1
-#训练batch_size
-batch_size=8
-
-
-#TF2.X独有,需要模型审视修改
-export NPU_LOOP_SIZE=${train_steps}
-#维测参数,precision_mode需要模型审视修改
-precision_mode="allow_mix_precision"
-#维持参数,以下不需要修改
-over_dump=False
-data_dump_flag=False
-data_dump_step="10"
-profiling=False
+# 路径参数初始化
+data_path=""
+output_path=""
# 帮助信息,不需要修改
if [[ $1 == --help || $1 == -h ]];then
echo"usage:./train_performance_1P.sh "
echo " "
echo "parameter explain:
- --precision_mode precision mode(allow_fp32_to_fp16/force_fp16/must_keep_origin_dtype/allow_mix_precision)
- --over_dump if or not over detection, default is False
- --data_dump_flag data dump flag, default is False
- --data_dump_step data dump step, default is 10
- --profiling if or not profiling for performance debug, default is False
- --data_path source data of training
- --ckpt_path model
- -h/--help show help message
+ --data_path # dataset of training
+ --output_path # output of training
+ --train_steps # max_step for training
+ --train_epochs # max_epoch for training
+ --batch_size # batch size
+ -h/--help show help message
"
exit 1
fi
-#参数校验,不需要修改
+# 参数校验,不需要修改
for para in $*
do
- if [[ $para == --precision_mode* ]];then
- precision_mode=`echo ${para#*=}`
- elif [[ $para == --over_dump* ]];then
- over_dump=`echo ${para#*=}`
- over_dump_path=${cur_path}/test/output/overflow_dump
- mkdir -p ${over_dump_path}
- elif [[ $para == --data_dump_flag* ]];then
- data_dump_flag=`echo ${para#*=}`
- data_dump_path=${cur_path}/test/output/data_dump
- mkdir -p ${data_dump_path}
- elif [[ $para == --data_dump_step* ]];then
- data_dump_step=`echo ${para#*=}`
- elif [[ $para == --profiling* ]];then
- profiling=`echo ${para#*=}`
- profiling_dump_path=${cur_path}/test/output/profiling
- mkdir -p ${profiling_dump_path}
- elif [[ $para == --data_path* ]];then
+ if [[ $para == --data_path* ]];then
data_path=`echo ${para#*=}`
- elif [[ $para == --ckpt_path* ]];then
- ckpt_path=`echo ${para#*=}`
- fi
+ elif [[ $para == --output_path* ]];then
+ output_path=`echo ${para#*=}`
+ elif [[ $para == --train_steps* ]];then
+ train_steps=`echo ${para#*=}`
+ elif [[ $para == --train_epochs* ]];then
+ train_epochs=`echo ${para#*=}`
+ elif [[ $para == --batch_size* ]];then
+ batch_size=`echo ${para#*=}`
+ fi
done
-# #校验是否传入data_path,不需要修改
-# if [[$data_path == ""]];then
-# echo "[Error] para \"data_path\" must be confing"
-# exit 1
-# fi
-#训练开始时间,不需要修改
-start_time=$(date +%s)
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be config"
+ exit 1
+fi
-#进入训练脚本目录,需要模型审视修改
-cd $cur_path/tf-bsrn-sr
+# 校验是否传入output_path,不需要修改
+if [[ $output_path == "" ]];then
+ output_path="./test/output/${ASCEND_DEVICE_ID}"
+fi
-#创建DeviceID输出目录,不需要修改
-if [ -d ${cur_path}/test/output/${ASCEND_DEVICE_ID} ];then
- rm -rf ${cur_path}/test/output/${ASCEND_DEVICE_ID}
- mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt
-else
- mkdir -p ${cur_path}/test/output/$ASCEND_DEVICE_ID/ckpt
+# 设置打屏日志文件名,请保留,文件名为${print_log}
+print_log="./test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log"
+modelarts_flag=${MODELARTS_MODEL_PATH}
+if [ x"${modelarts_flag}" != x ];
+then
+ echo "running with modelarts..."
+ print_log_name=`ls /home/ma-user/modelarts/log/ | grep proc-rank`
+ print_log="/home/ma-user/modelarts/log/${print_log_name}"
fi
+echo "### get your log here : ${print_log}"
+
+CaseName=""
+function get_casename()
+{
+ if [ x"${perf_flag}" = x1 ];
+ then
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'perf'
+ else
+ CaseName=${Network}_bs${batch_size}_${RANK_SIZE}'p'_'acc'
+ fi
+}
+
+# 跳转到code目录
+cd ${cur_path}/../
+rm -rf ./test/output/${ASCEND_DEVICE_ID}
+mkdir -p ./test/output/${ASCEND_DEVICE_ID}
+
+# 训练开始时间记录,不需要修改
+start_time=$(date +%s)
+##########################################################
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+#########第3行 至 100行,请一定不要、不要、不要修改##########
+##########################################################
+
+#=========================================================
+#=========================================================
+#========训练执行命令,需要根据您的网络进行修改==============
+#=========================================================
+#=========================================================
+# 基础参数,需要模型审视修改
+# 您的训练数据集在${data_path}路径下,请直接使用这个变量获取
+# 您的训练输出目录在${output_path}路径下,请直接使用这个变量获取
+# 您的其他基础参数,可以自定义增加,但是batch_size请保留,并且设置正确的值
+train_epochs=2
+train_steps=100
+batch_size=8
-#执行训练脚本,以下传参不需要修改,其他需要模型审视修改
-python3 train.py \
- --data_input_path=${data_path}/dataset/DIV2K/DIV2K_train_LR_bicubic\
- --data_truth_path=${data_path}/dataset/DIV2K/DIV2K_train_HR \
- --train_path='./train' \
+#if [ x"${modelarts_flag}" != x ];
+#then
+# echo ${data_path}
+# ls ${data_path}
+# relative_path_LR="DIV2K/DIV2K_train_LR_bicubic"
+# relative_path_HR="DIV2K/DIV2K_train_HR"
+# python3.7 ./train.py \
+# --data_input_path=${data_path}${relative_path_LR} --data_truth_path=${data_path}${relative_path_HR} --train_path=${output_path} \
+# --chip='npu' \
+# --model='bsrn' \
+# --dataloader='div2k_loader' \
+# --batch_size=8 \
+# --max_steps=${train_steps} \
+# --save_freq=1000 \
+# --scales='4'
+#else
+relative_path_LR="/dataset/DIV2K/DIV2K_train_LR_bicubic"
+relative_path_HR="/dataset/DIV2K/DIV2K_train_HR"
+python3.7 ./train.py \
+ --data_input_path=${data_path}${relative_path_LR} --data_truth_path=${data_path}${relative_path_HR} --train_path=${output_path} \
--chip='npu' \
--model='bsrn' \
--dataloader='div2k_loader' \
--batch_size=8 \
- --max_steps=100 \
- --save_freq=50 \
- --scales='4' > ${cur_path}/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log 2>&1
-wait
+ --max_steps=${train_steps} \
+ --save_freq=1000 \
+ --scales='4' 1>${print_log} 2>&1
-#训练结束时间,不需要修改
+
+
+# 性能相关数据计算
+StepTime=`grep "sec/batch" ${print_log} | tail -n 20 | awk '{print $(NF-2)}' | awk '{sum+=$1} END {print sum/NR}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${StepTime}'}'`
+
+# 精度相关数据计算
+PSNR=`grep "Final PSNR" ${print_log} | awk '{print $NF}'`
+SSIM=`grep "Final SSIM" ${print_log} | awk '{print $NF}'`
+# 提取所有loss打印信息
+grep "loss" ${print_log} | awk -F ":" '{print $4}'| grep "loss" |awk -F "," '{print $3}'|awk '{print $2}' > ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt
+
+
+###########################################################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+#########后面的所有内容请不要修改###########################
+###########################################################
+
+# 判断本次执行是否正确使用Ascend NPU
+use_npu_flag=`grep "The model has been compiled on the Ascend AI processor" ${print_log} | wc -l`
+if [ x"${use_npu_flag}" == x0 ];
+then
+ echo "------------------ ERROR NOTICE START ------------------"
+ echo "ERROR, your task haven't used Ascend NPU, please check your npu Migration."
+ echo "------------------ ERROR NOTICE END------------------"
+else
+ echo "------------------ INFO NOTICE START------------------"
+ echo "INFO, your task have used Ascend NPU, please check your result."
+ echo "------------------ INFO NOTICE END------------------"
+fi
+
+# 获取最终的casename,请保留,case文件名为${CaseName}
+get_casename
+
+# 重命名loss文件
+if [ -f ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ];
+then
+ mv ./test/output/${ASCEND_DEVICE_ID}/my_output_loss.txt ./test/output/${ASCEND_DEVICE_ID}/${CaseName}_loss.txt
+fi
+
+# 训练端到端耗时
end_time=$(date +%s)
e2e_time=$(( $end_time - $start_time ))
-#结果打印,不需要修改
echo "------------------ Final result ------------------"
-#输出性能FPS,需要模型审视修改
-TrainingTime=`grep 'fps' $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $12}'`
-FPS=`grep 'fps' $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|awk 'END {print $14}'`
-#打印,不需要修改
-echo "Final Performance TrainingTime : $TrainingTime"
+# 输出性能FPS/单step耗时/端到端耗时
echo "Final Performance images/sec : $FPS"
-
-#输出训练精度,需要模型审视修改
-#train_accuracy=`grep val_loss $cur_path/test/output/${ASCEND_DEVICE_ID}/train_${ASCEND_DEVICE_ID}.log|grep step|grep -v ETA|awk 'END {print}'|awk '{print $11}'`
-
-#打印,不需要修改
-#echo "Final Train Accuracy : ${train_accuracy}"
+echo "Final Performance sec/step : $StepTime"
echo "E2E Training Duration sec : $e2e_time"
-#性能看护结果汇总
-#训练用例信息,不需要修改
-BatchSize=${batch_size}
-DeviceType=`uname -m`
-CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'perf'
-
-##获取性能数据,不需要修改
-#吞吐量
-ActualFPS=${FPS}
-#单迭代训练时长
-#TrainingTime=`awk 'BEGIN{printf "%.2f\n",'${FPS}'/69}'`
-
-#从train_$ASCEND_DEVICE_ID.log提取Loss到train_${CaseName}_loss.txt中,需要根据模型审视
-grep 'loss' $cur_path/test/output/$ASCEND_DEVICE_ID/train_$ASCEND_DEVICE_ID.log|awk '{print $10}' >> $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt
-#最后一个迭代loss值,不需要修改
-ActualLoss=`awk 'END {print}' $cur_path/test/output/$ASCEND_DEVICE_ID/train_${CaseName}_loss.txt`
-
-#关键信息打印到${CaseName}.log中,不需修改
-echo "Network = ${Network}" > $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "RankSize = ${RANK_SIZE}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "BatchSize = ${BatchSize}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "DeviceType = ${DeviceType}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "CaseName = ${CaseName}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualFPS = ${ActualFPS}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "TrainingTime = ${TrainingTime}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "ActualLoss = ${ActualLoss}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
-#echo "Accuracy = ${train_accuracy}" >> $cur_path/test/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
+# 输出训练精度
+echo "Final Train Accuracy : ${PSNR}"
+echo "Final SSIM : ${SSIM}"
+
+# 最后一个迭代loss值,不需要修改
+ActualLoss=(`awk 'END {print $NF}' $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}_loss.txt`)
+
+#关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${batch_size}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = `uname -m`" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${FPS}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${StepTime}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualLoss = ${ActualLoss}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $cur_path/output/$ASCEND_DEVICE_ID/${CaseName}.log
\ No newline at end of file
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/test_bsrn.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test_bsrn.py
similarity index 100%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/test_bsrn.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/test_bsrn.py
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/train.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/train.py
similarity index 98%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/train.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/train.py
index 3f44d3dc3e888bf642832839209a2bfaa02b30fb..ffa526a551328be09ddd07e9ba0fa7d670c60ef1 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/train.py
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/train.py
@@ -82,6 +82,8 @@ if __name__ == '__main__':
pre_parsed = pre_parser.parse_known_args()[0]
if (pre_parsed.dataloader is not None):
+ print( "dataloader: ",pre_parsed.dataloader)
+ tf.logging.info("dataloader: ",pre_parsed.dataloader)
DATALOADER_MODULE = importlib.import_module('dataloaders.' + pre_parsed.dataloader)
if (pre_parsed.model is not None):
MODEL_MODULE = importlib.import_module('models.' + pre_parsed.model)
diff --git a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/validate_bsrn.py b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/validate_bsrn.py
similarity index 86%
rename from TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/validate_bsrn.py
rename to TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/validate_bsrn.py
index 97bd8e6fb22f50dc7fbccbdec2c3e75a87f9edd5..7012829fa13d19741e8481549993034a89a5b789 100644
--- a/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/tf-bsrn-sr/validate_bsrn.py
+++ b/TensorFlow/contrib/cv/BSRN_ID1296_for_TensorFlow/validate_bsrn.py
@@ -31,6 +31,8 @@ import os
import time
import numpy as np
import tensorflow as tf
+from scipy.ndimage import gaussian_filter
+
import dataloaders
import models
@@ -59,7 +61,7 @@ if __name__ == '__main__':
tf.flags.DEFINE_integer('shave_size', 4, 'Amount of pixels to crop the borders of the images before calculating quality metrics.')
tf.flags.DEFINE_boolean('ensemble_only', False, 'Calculate (and save) ensembled image only.')
- tf.flags.DEFINE_string("chip", "gpu", "Run on which chip, (npu or gpu or cpu)")
+ tf.flags.DEFINE_string("chip", "cpu", "Run on which chip, (npu or gpu or cpu)")
tf.flags.DEFINE_string("platform", "linux", 'the platform this code is running on')
# parse data loader and model first and import them
@@ -116,7 +118,55 @@ def _image_rmse2(output_image, truth_image):
diff = yr_tr - yr_out
rmse = np.sqrt(np.mean(diff ** 2))
return rmse
-
+def _image_ssim(X, Y):
+ """
+ Computes the mean structural similarity between two images.
+ """
+ assert (X.shape == Y.shape), "Image-patche provided have different dimensions"
+ nch = 1 if X.ndim == 2 else X.shape[-1]
+ mssim = []
+ for ch in range(nch):
+ Xc, Yc = X[..., ch].astype(np.float64), Y[..., ch].astype(np.float64)
+ mssim.append(compute_ssim(Xc, Yc))
+ return np.mean(mssim)
+
+
+def compute_ssim(X, Y):
+ """
+ Compute the structural similarity per single channel (given two images)
+ """
+ # variables are initialized as suggested in the paper
+ K1 = 0.01
+ K2 = 0.03
+ sigma = 1.5
+ win_size = 5
+
+ # means
+ ux = gaussian_filter(X, sigma)
+ uy = gaussian_filter(Y, sigma)
+
+ # variances and covariances
+ uxx = gaussian_filter(X * X, sigma)
+ uyy = gaussian_filter(Y * Y, sigma)
+ uxy = gaussian_filter(X * Y, sigma)
+
+ # normalize by unbiased estimate of std dev
+ N = win_size ** X.ndim
+ unbiased_norm = N / (N - 1) # eq. 4 of the paper
+ vx = (uxx - ux * ux) * unbiased_norm
+ vy = (uyy - uy * uy) * unbiased_norm
+ vxy = (uxy - ux * uy) * unbiased_norm
+
+ R = 255
+ C1 = (K1 * R) ** 2
+ C2 = (K2 * R) ** 2
+ # compute SSIM (eq. 13 of the paper)
+ sim = (2 * ux * uy + C1) * (2 * vxy + C2)
+ D = (ux ** 2 + uy ** 2 + C1) * (vx + vy + C2)
+ SSIM = sim / D
+ mssim = SSIM.mean()
+
+ return mssim
def main(unused_argv):
# initialize
@@ -240,9 +290,9 @@ def main(unused_argv):
output_image_shaved = _shave_image(output_image, shave_size=FLAGS.shave_size)
psnr = _image_psnr(output_image=output_image_shaved, truth_image=truth_image_shaved)
- rmse = _image_rmse(output_image=output_image_shaved, truth_image=truth_image_shaved)
+ rmse = _image_ssim(output_image_shaved, truth_image_shaved)
- tf.logging.info('t%d, x%d, %d/%d, psnr=%.2f, rmse=%.2f' % (num_recursions, scale, image_index+1, num_images, psnr, rmse))
+ tf.logging.info('t%d, x%d, %d/%d, psnr=%.2f, ssim=%.2f' % (num_recursions, scale, image_index+1, num_images, psnr, rmse))
psnr_list[i].append(psnr)
rmse_list[i].append(rmse)
@@ -262,9 +312,9 @@ def main(unused_argv):
output_image_shaved = _shave_image(output_image, shave_size=FLAGS.shave_size)
psnr = _image_psnr(output_image=output_image_shaved, truth_image=truth_image_shaved)
- rmse = _image_rmse(output_image=output_image_shaved, truth_image=truth_image_shaved)
+ rmse = _image_ssim(output_image_shaved, truth_image_shaved)
- tf.logging.info('ensemble, x%d, %d/%d, psnr=%.2f, rmse=%.2f' % (scale, image_index+1, num_images, psnr, rmse))
+ tf.logging.info('ensemble, x%d, %d/%d, psnr=%.2f, ssim=%.2f' % (scale, image_index+1, num_images, psnr, rmse))
psnr_list[num_total_outputs].append(psnr)
rmse_list[num_total_outputs].append(rmse)
@@ -279,10 +329,10 @@ def main(unused_argv):
# finalize
tf.logging.info('finished')
for scale in scale_list:
- print('- x%d, PSNR and RMSE:' % (scale))
- print(' '.join([('%.3f' % x) for x in modules_average_psnr_dict[scale]]))
+ print('- x%d, PSNR and SSIM:' % (scale))
+ print("Final PSNR: ",' '.join([('%.3f' % x) for x in modules_average_psnr_dict[scale]]))
print('')
- print(' '.join([('%.3f' % x) for x in modules_average_rmse_dict[scale]]))
+ print("Final SSIM: ",' '.join([('%.3f' % x) for x in modules_average_rmse_dict[scale]]))
if FLAGS.platform.lower() == 'modelarts':
from help_modelarts import modelarts_result2obs