From 91b0e37aa9ec0257b187c5562eff87e93255205d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:18:45 +0000
Subject: [PATCH 01/10] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20DnCNN=5F710?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep
new file mode 100644
index 0000000000..e69de29bb2
-- 
Gitee


From 2a8e6ea4b2c452875cfc373ff2781088f565bb55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:19:24 +0000
Subject: [PATCH 02/10] =?UTF-8?q?[=E4=B8=9C=E5=8C=97=E5=A4=A7=E5=AD=A6][?=
 =?UTF-8?q?=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTorch=E7=A6=BB=E7=BA=BF?=
 =?UTF-8?q?=E6=8E=A8=E7=90=86][DnCNN]-=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../DnCNN/DnCNN_710/DnCNN_postprocess.py      | 103 ++++++
 .../DnCNN/DnCNN_710/DnCNN_preprocess.py       |  79 ++++
 .../DnCNN/DnCNN_710/DnCNN_pth2onnx.py         |  80 ++++
 .../image_process/DnCNN/DnCNN_710/READEME.md  | 350 ++++++++++++++++++
 .../image_process/DnCNN/DnCNN_710/get_info.py |  60 +++
 .../DnCNN/DnCNN_710/requirements.txt          |   6 +
 6 files changed, 678 insertions(+)
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py
 create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py
new file mode 100644
index 0000000000..dbaf8635a6
--- /dev/null
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py
@@ -0,0 +1,103 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import sys
+import glob
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+import struct
+from skimage.metrics import peak_signal_noise_ratio as compare_psnr
+
+
+def batch_PSNR(img, imclean, data_range):
+
+    Img = img.data.cpu().numpy().astype(np.float32)
+    Iclean = imclean.data.cpu().numpy().astype(np.float32)
+    PSNR = 0
+    for i in range(Img.shape[0]):
+        PSNR += compare_psnr(Iclean[i, :, :, :], Img[i, :, :, :], data_range=data_range)
+    return (PSNR / Img.shape[0])
+
+
+def bin2npy(filepath):
+
+    size = os.path.getsize(filepath)  
+    res = []
+    L = int(size / 4)
+    binfile = open(filepath, 'rb')  
+    for i in range(L):
+        data = binfile.read(4)  
+        num = struct.unpack('f', data)
+        res.append(num[0])
+    binfile.close()
+    dim_res = np.array(res).reshape(1, 1, 481, 481)
+    return dim_res
+
+
+def main(Result_path):
+
+    # load data info
+    print('Loading ISource bin ...\n')
+    ISource = glob.glob(os.path.join('ISource', '*.bin'))
+    ISource.sort()
+    print('Loading INoisy bin ...\n')
+    INoisy = glob.glob(os.path.join('INoisy', '*.bin'))
+    INoisy.sort()
+    # load result file
+    print('Loading res bin ...\n')
+    Result_path = glob.glob(os.path.join(Result_path, '*.bin'))
+    Result_path.sort()
+
+    # begin data
+    print('begin infer')
+    psnr_test = 0
+    n_lables = 0
+
+    for isource in ISource:
+        isource_name = isource
+        # isource
+        isource = bin2npy(isource)
+        isource = torch.from_numpy(isource)
+        # inoisy
+        inoisy = bin2npy(INoisy[n_lables])
+        inoisy = torch.from_numpy(inoisy)
+        # Result_path
+        Result = bin2npy(Result_path[n_lables])
+        Result = torch.from_numpy(Result)
+        n_lables += 1
+        print('infering...')
+        with torch.no_grad(): 
+            Out = torch.clamp(inoisy - Result, 0., 1.)
+        psnr = batch_PSNR(Out, isource, 1.)
+        psnr_test += psnr
+        print("%s PSNR %f" % (isource_name, psnr))
+    psnr_test /= len(ISource)
+    print("\nPSNR on test data %f" % psnr_test)
+
+if __name__ == "__main__":
+    
+    try:
+        Result_path = sys.argv[1]
+
+    except IndexError:
+        print("Stopped!")
+        exit(1)
+
+    if not (os.path.exists(Result_path)):
+        print("Result path doesn't exist.")
+
+    main(Result_path)
diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py
new file mode 100644
index 0000000000..dd612dcef8
--- /dev/null
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py
@@ -0,0 +1,79 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import sys
+import os
+import os.path
+import numpy as np
+import random
+import torch
+import cv2
+import glob
+
+infer_data = 'Set68'
+infer_noiseL = 15
+
+def normalize(data):
+    return data / 255.
+
+
+def proprecess(data_path, ISource_bin, INoisy_bin):
+
+    # load data info
+    print('Loading data info ...\n')
+    files = glob.glob(os.path.join(data_path, infer_data, '*.png'))
+    files.sort()
+    # process data
+    for i in range(len(files)):
+        # image
+        filename = os.path.basename(files[i])
+        img = cv2.imread(files[i])
+        img = normalize(np.float32(img[:, :, 0]))
+
+        img_padded = np.full([481, 481], 0, dtype=np.float32)
+        width_offset = (481 - img.shape[1]) // 2
+        height_offset = (481 - img.shape[0]) // 2
+        img_padded[height_offset:height_offset + img.shape[0], width_offset:width_offset + img.shape[1]] = img
+        img = img_padded
+
+        img = np.expand_dims(img, 0)
+        img = np.expand_dims(img, 1)
+
+        ISource = torch.Tensor(img)
+        # noise
+        noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=infer_noiseL / 255.)
+        # noisy image
+        INoisy = ISource + noise
+
+        # save ISource_bin
+        ISource = ISource.numpy()
+        print("ISource shape is", ISource.shape)
+        ISource.tofile(os.path.join(ISource_bin, filename.split('.')[0] + '.bin'))
+        
+        # save INoisy_bin
+        INoisy = INoisy.numpy()
+        print("INoisy shape is", INoisy.shape) 
+        INoisy.tofile(os.path.join(INoisy_bin, filename.split('.')[0] + '.bin'))
+        
+if __name__ == '__main__':
+    
+    data_path = sys.argv[1]
+    ISource_bin =  sys.argv[2]
+    INoisy_bin = sys.argv[3]
+    if os.path.exists(ISource_bin) is False:
+        os.mkdir(ISource_bin)
+    if os.path.exists(INoisy_bin) is False:
+        os.mkdir(INoisy_bin)
+
+    proprecess(data_path, ISource_bin, INoisy_bin)
diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py
new file mode 100644
index 0000000000..8362e6eea7
--- /dev/null
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py
@@ -0,0 +1,80 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import torch
+import torch.onnx
+import torch.nn as nn
+import sys
+
+from collections import OrderedDict
+
+class DnCNN(nn.Module):
+    def __init__(self, channels, num_of_layers=17):
+        super(DnCNN, self).__init__()
+        kernel_size = 3
+        padding = 1
+        features = 64
+        layers = []
+        layers.append(nn.Conv2d(in_channels=channels, out_channels=features, \
+                      kernel_size=kernel_size, padding=padding, bias=False))
+        layers.append(nn.ReLU(inplace=True))
+        for _ in range(num_of_layers - 2):
+            layers.append(nn.Conv2d(in_channels=features, out_channels=features, \
+                          kernel_size=kernel_size, padding=padding, bias=False))
+            layers.append(nn.BatchNorm2d(features))
+            layers.append(nn.ReLU(inplace=True))
+        layers.append(nn.Conv2d(in_channels=features, out_channels=channels, \
+                      kernel_size=kernel_size, padding=padding, bias=False))
+        self.dncnn = nn.Sequential(*layers)
+
+    def forward(self, x):
+    
+        out = self.dncnn(x)
+        return out
+
+
+def proc_nodes_module(checkpoint):
+
+    new_state_dict = OrderedDict()
+    for k, v in checkpoint.items():
+        if(k[0:7] == "module."):
+            name = k[7:]
+        else:
+            name = k[0:]
+        new_state_dict[name]=v
+    return new_state_dict
+
+
+def convert(pth_file, onnx_file):
+
+    pretrained_net = torch.load(pth_file, map_location='cpu')
+    pretrained_net['state_dict'] = proc_nodes_module(pretrained_net)
+
+    model = DnCNN(channels=1, num_of_layers=17)
+    model.load_state_dict(pretrained_net['state_dict'])
+    model.eval()
+    input_names = ["actual_input_1"]
+    dummy_input = torch.randn(1, 1, 481, 481)
+    #torch.onnx.export(model, dummy_input, onnx_file, input_names = input_names, opset_version=11, verbose=True)
+    dynamic_axes = {'actual_input_1': {0: '-1'}}
+    torch.onnx.export(model, dummy_input, onnx_file, dynamic_axes=dynamic_axes, \
+                      input_names=input_names, opset_version=11)
+
+if __name__ == "__main__":
+    
+    pth_file = sys.argv[1]
+    onnx_file = sys.argv[2]
+
+    convert(pth_file, onnx_file)
diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md
new file mode 100644
index 0000000000..65429d8a5c
--- /dev/null
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md
@@ -0,0 +1,350 @@
+# DnCNN Onnx模型端到端推理指导
+-   [1 模型概述](#1-模型概述)
+	-   [1.1 论文地址](#11-论文地址)
+	-   [1.2 代码地址](#12-代码地址)
+-   [2 环境说明](#2-环境说明)
+	-   [2.1 深度学习框架](#21-深度学习框架)
+	-   [2.2 python第三方库](#22-python第三方库)
+-   [3 模型转换](#3-模型转换)
+	-   [3.1 pth转onnx模型](#31-pth转onnx模型)
+	-   [3.2 onnx转om模型](#32-onnx转om模型)
+-   [4 数据集预处理](#4-数据集预处理)
+	-   [4.1 数据集获取](#41-数据集获取)
+	-   [4.2 数据集预处理](#42-数据集预处理)
+	-   [4.3 生成数据集信息文件](#43-生成数据集信息文件)
+-   [5 离线推理](#5-离线推理)
+	-   [5.1 benchmark工具概述](#51-benchmark工具概述)
+	-   [5.2 离线推理](#52-离线推理)
+-   [6 精度对比](#6-精度对比)
+	-   [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计)
+	-   [6.2 开源TopN精度](#62-开源TopN精度)
+	-   [6.3 精度对比](#63-精度对比)
+-   [7 性能对比](#7-性能对比)
+	-   [7.1 npu性能数据](#71-npu性能数据)
+	-   [7.2 T4性能数据](#72-T4性能数据)
+	-   [7.3 性能对比](#73-性能对比)
+
+
+
+## 1 模型概述
+
+-   **[论文地址](#11-论文地址)**  
+
+-   **[代码地址](#12-代码地址)**  
+
+### 1.1 论文地址
+[DnCNN论文](https://ieeexplore.ieee.org/document/7839189)  
+
+### 1.2 代码地址
+
+brach:master
+
+commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b
+
+备注：commitid指的是值模型基于此版本代码做的推理
+
+[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch)  
+
+## 2 环境说明
+
+-   **[深度学习框架](#21-深度学习框架)**  
+
+-   **[python第三方库](#22-python第三方库)**  
+
+### 2.1 深度学习框架
+```  
+CANN 5.0.1
+torch==1.8.0
+torchvision==0.9.0
+onnx==1.9.0
+```
+
+### 2.2 python第三方库
+
+```
+numpy==1.20.2
+opencv-python==4.5.2.52
+scikit-image==0.16.2
+```
+
+**说明：** 
+>   X86架构：pytorch，torchvision和onnx可以通过官方下载whl包安装，其它可以通过pip3.7 install 包名 安装
+>
+>   Arm架构：pytorch，torchvision和onnx可以通过源码编译安装，其它可以通过pip3.7 install 包名 安装
+
+## 3 模型转换
+
+-   **[pth转onnx模型](#31-pth转onnx模型)** 
+
+-   **[onnx转om模型](#32-onnx转om模型)** 
+
+### 3.1 pth转onnx模型
+
+1.DnCNN模型代码下载
+```
+git clone https://github.com/SaoYan/DnCNN-PyTorch
+cd DnCNN-PyTorch
+```
+2.获取源码pth权重文件   
+wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth  
+文件的MD5sum值是： 5703a29b082cc03401fa9d9fee12cb71  
+
+3.获取NPU训练pth文件，将net.pth文件移动到DnCNN目录下
+
+4.编写pth2onnx脚本DnCNN_pth2onnx.py
+
+ **说明：**  
+>注意目前ATC支持的onnx算子版本为11
+
+5.执行pth2onnx脚本，生成onnx模型文件
+```
+python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx
+```
+
+ **模型转换要点：**  
+>此模型转换为onnx不需要修改开源代码仓代码，故不需要特殊说明
+
+### 3.2 onnx转om模型
+
+1.设置环境变量
+```
+source env.sh
+```
+2.使用atc将onnx模型转换为om模型文件
+```
+atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
+```
+
+## 4 数据集预处理
+
+-   **[数据集获取](#41-数据集获取)**  
+
+-   **[数据集预处理](#42-数据集预处理)**  
+
+-   **[生成数据集信息文件](#43-生成数据集信息文件)**  
+
+### 4.1 推理数据集获取
+存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录
+
+### 4.2 数据集预处理
+1.预处理脚本data_preprocess.py
+
+2.执行预处理脚本，生成数据集预处理后的bin文件
+
+```
+python3.7 data_preprocess.py data ISource INoisy
+```
+### 4.3 生成数据集信息文件
+1.生成数据集信息文件脚本get_info.py
+
+2.执行生成数据集信息脚本，生成数据集信息文件
+```
+python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481
+```
+第一个参数为模型输入的类型，第二个参数为生成的bin文件路径，第三个为输出的info文件，后面为宽高信息
+## 5 离线推理
+
+-   **[benchmark工具概述](#51-benchmark工具概述)**  
+
+-   **[离线推理](#52-离线推理)**  
+
+### 5.1 benchmark工具概述
+
+benchmark工具为华为自研的模型推理工具，支持多种模型的离线推理，能够迅速统计出模型在Ascend310上的性能，支持真实数据和纯推理两种模式，配合后处理脚本，可以实现诸多模型的端到端过程
+### 5.2 离线推理
+1.设置环境变量
+```
+source env.sh
+```
+2.执行离线推理
+```
+./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
+```
+输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id)，每个输入对应的输出对应一个_X.bin文件。
+
+## 6 精度对比
+
+-   **[离线推理TopN精度](#61-离线推理TopN精度)**  
+-   **[开源TopN精度](#62-开源TopN精度)**  
+-   **[精度对比](#63-精度对比)**  
+
+### 6.1 离线推理TopN精度统计
+
+后处理统计TopN精度
+
+调用postprocess.py脚本推理结果进行PSRN计算，结果会打印在屏幕上
+```
+python3.7 postprocess.py result/dumpOutput_device0/
+```
+第一个参数为benchmark输出目录
+查看输出结果：
+```
+ISource/test064.bin PSNR 29.799832
+infering...
+ISource/test065.bin PSNR 31.486418
+infering...
+ISource/test066.bin PSNR 35.676752
+infering...
+ISource/test067.bin PSNR 28.577475
+infering...
+ISource/test068.bin PSNR 29.709767
+
+PSNR on test data 31.526892
+```
+经过对bs1与bs16的om测试，本模型batch1的精度与batch16的精度没有差别，精度数据均如上
+
+### 6.2 开源PSNR精度
+```
+| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch |
+|:-----------:|:-------:|:-------:|:---------------:|:---------------:|
+|     15      |  31.73  |  31.61  |      31.71      |      31.60      |
+|     25      |  29.23  |  29.16  |      29.21      |      29.15      |
+|     50      |  26.23  |  26.23  |      26.22      |      26.20      |
+```
+### 6.3 精度对比
+将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比，精度下降在1%范围之内，故精度达标。  
+ **精度调试：**  
+
+>没有遇到精度不达标的问题，故不需要进行精度调试
+
+## 7 性能对比
+
+-   **[npu性能数据](#71-npu性能数据)**  
+-   **[T4性能数据](#72-T4性能数据)**  
+-   **[性能对比](#73-性能对比)**  
+
+### 7.1 npu性能数据
+benchmark工具在整个数据集上推理时也会统计性能数据，但是推理整个数据集较慢，如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据，也可以使用benchmark纯推理功能测得性能数据，但是由于随机数不能模拟数据分布，纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式，benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用，模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准，对于使用benchmark工具测试的batch4，8，32的性能数据在README.md中如下作记录即可。  
+1.benchmark工具在整个数据集上推理获得性能数据  
+batch1的性能，benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt：
+
+```
+[e2e] throughputRate: 15.0465, latency: 4519.32
+[data read] throughputRate: 966.417, moduleLatency: 1.03475
+[preprocess] throughputRate: 525.539, moduleLatency: 1.90281
+[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903
+[post] throughputRate: 22.615, moduleLatency: 44.2185
+```
+Interface throughputRate: 23.7919，23.7919x4=95.176既是batch1 310单卡吞吐率  
+
+batch16的性能，benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt：
+```
+[e2e] throughputRate: 15.3818, latency: 4420.81
+[data read] throughputRate: 1484.65, moduleLatency: 0.673559
+[preprocess] throughputRate: 316.273, moduleLatency: 3.16182
+[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179
+[post] throughputRate: 1.56798, moduleLatency: 637.764
+```
+Interface throughputRate: 22.2853，22.2853x4=89.1412既是batch16 310单卡吞吐率  
+
+batch4性能：
+```
+[e2e] throughputRate: 15.5641, latency: 4369.02
+[data read] throughputRate: 1898.17, moduleLatency: 0.526824
+[preprocess] throughputRate: 523.883, moduleLatency: 1.90882
+[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192
+[post] throughputRate: 5.50981, moduleLatency: 181.495
+```
+batch4 310单卡吞吐率 23.9045x4=95.618
+
+batch8性能：
+```
+[e2e] throughputRate: 15.5035, latency: 4386.1
+[data read] throughputRate: 1863.93, moduleLatency: 0.5365
+[preprocess] throughputRate: 461.471, moduleLatency: 2.16699
+[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831
+[post] throughputRate: 2.74035, moduleLatency: 364.917
+```
+batch8 310单卡吞吐率 22.2652x4=89.0608
+
+batch32性能：
+```
+[e2e] throughputRate: 12.4075, latency: 5480.54
+[data read] throughputRate: 1770.65, moduleLatency: 0.564765
+[preprocess] throughputRate: 242.944, moduleLatency: 4.11618
+[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386
+[post] throughputRate: 0.68503, moduleLatency: 1459.79
+```
+batch32 310单卡吞吐率 13.2648x4=53.0592
+
+### 7.2 T4性能数据
+在装有T4卡的服务器上测试gpu性能，TensorRT版本：7.2.3.4，cuda版本：11.0，cudnn版本：8.2  
+batch1性能：
+```
+trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:1x1x484x481 --threads
+```
+gpu T4是4个device并行执行的结果，mean是时延（tensorrt的时延是batch个数据的推理时间），即吞吐率的倒数乘以batch
+```
+[06/05/2021-06:28:42] [I] GPU Compute
+[06/05/2021-06:28:42] [I] min: 12.5439 ms
+[06/05/2021-06:28:42] [I] max: 19.0195 ms
+[06/05/2021-06:28:42] [I] mean: 13.1826 ms
+[06/05/2021-06:28:42] [I] median: 12.9761 ms
+[06/05/2021-06:28:42] [I] percentile: 17.7111 ms at 99%
+[06/05/2021-06:28:42] [I] total compute time: 3.01882 s
+```
+batch1 t4单卡吞吐率：1000x1/(13.1826/1)=75.858fps  
+
+batch16性能：
+```
+trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:16x1x484x481 --threads
+```
+```
+[06/05/2021-06:31:53] [I] GPU Compute
+[06/05/2021-06:31:53] [I] min: 198.604 ms
+[06/05/2021-06:31:53] [I] max: 218.884 ms
+[06/05/2021-06:31:53] [I] mean: 201.968 ms
+[06/05/2021-06:31:53] [I] median: 200.267 ms
+[06/05/2021-06:31:53] [I] percentile: 218.884 ms at 99%
+[06/05/2021-06:31:53] [I] total compute time: 3.23149 s
+```
+batch16 t4单卡吞吐率：1000x1/(201.968/16)=79.220fps  
+
+batch4性能
+```
+[06/05/2021-13:48:52] [I] GPU Compute
+[06/05/2021-13:48:52] [I] min: 48.9983 ms
+[06/05/2021-13:48:52] [I] max: 67.3423 ms
+[06/05/2021-13:48:52] [I] mean: 50.6542 ms
+[06/05/2021-13:48:52] [I] median: 50.0736 ms
+[06/05/2021-13:48:52] [I] percentile: 67.3423 ms at 99%
+[06/05/2021-13:48:52] [I] total compute time: 3.08991 s
+```
+batch4 t4单卡吞吐率：1000x1/(50.6542/4)=78.957fps
+
+batch8性能：
+```
+[06/05/2021-13:50:31] [I] GPU Compute
+[06/05/2021-13:50:31] [I] min: 101.378 ms
+[06/05/2021-13:50:31] [I] max: 128.73 ms
+[06/05/2021-13:50:31] [I] mean: 104.424 ms
+[06/05/2021-13:50:31] [I] median: 102.267 ms
+[06/05/2021-13:50:31] [I] percentile: 128.73 ms at 99%
+[06/05/2021-13:50:31] [I] total compute time: 3.13273 s
+```
+batch8 t4单卡吞吐率：1000x1/(104.424/8)=76.610fps  
+
+batch32性能:
+trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:32x1x484x481 --threads
+```
+[06/05/2021-13:57:44] [I] GPU Compute
+[06/05/2021-13:57:44] [I] min: 399.587 ms
+[06/05/2021-13:57:44] [I] max: 426.525 ms
+[06/05/2021-13:57:44] [I] mean: 409.475 ms
+[06/05/2021-13:57:44] [I] median: 407.555 ms
+[06/05/2021-13:57:44] [I] percentile: 426.525 ms at 99%
+[06/05/2021-13:57:44] [I] total compute time: 4.09475 s
+```
+batch32 t4单卡吞吐率：1000x1/(409.475/32)=78.149fps
+
+
+
+### 7.3 性能对比
+batch1：23.7919x4 > 1000x1/(13.1826/1)  
+batch16：22.2853x4 > 1000x1/(201.968/16)  
+310单个device的吞吐率乘4即单卡吞吐率，所得数据中单batch优于T4，多batch略高于T4 
+对于batch1与batch16，310性能均高于T4性能1.2倍，但是batch32 310全量数据集上推理性能低于T4性能，所以该模型放在Reaserch/cv/classification目录下。
+**性能优化：** 
+
+>单batch性能优于T4,多batch的性能略高于T4,无需优化。
+>batch32 310全量数据集上推理性能低于T4性能，但是batch32纯推理性能94.3228fps，高于T4性能。
\ No newline at end of file
diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py
new file mode 100644
index 0000000000..def864bec0
--- /dev/null
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py
@@ -0,0 +1,60 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import sys
+import cv2
+from glob import glob
+
+
+def get_bin_info(file_path, info_name, width, height):
+    bin_images = glob(os.path.join(file_path, '*.bin'))
+    with open(info_name, 'w') as file:
+        for index, img in enumerate(bin_images):
+            content = ' '.join([str(index), img, width, height])
+            file.write(content)
+            file.write('\n')
+
+
+def get_jpg_info(file_path, info_name):
+    extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
+    image_names = []
+    for extension in extensions:
+        image_names.append(glob(os.path.join(file_path, '*.' + extension)))  
+    with open(info_name, 'w') as file:
+        for image_name in image_names:
+            if len(image_name) == 0:
+                continue
+            else:
+                for index, img in enumerate(image_name):
+                    img_cv = cv2.imread(img)
+                    shape = img_cv.shape
+                    width, height = shape[1], shape[0]
+                    content = ' '.join([str(index), img, str(width), str(height)])
+                    file.write(content)
+                    file.write('\n')
+
+
+if __name__ == '__main__':
+    file_type = sys.argv[1]
+    file_path = sys.argv[2]
+    info_name = sys.argv[3]
+    if file_type == 'bin':
+        width = sys.argv[4]
+        height = sys.argv[5]
+        assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5'
+        get_bin_info(file_path, info_name, width, height)
+    elif file_type == 'jpg':
+        assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3'
+        get_jpg_info(file_path, info_name)
diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt
new file mode 100644
index 0000000000..965bdf8bdd
--- /dev/null
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt
@@ -0,0 +1,6 @@
+torch==1.8.0
+torchvision==0.9.0
+onnx==1.9.0
+numpy==1.20.2
+opencv-python==4.5.2.52
+scikit-image==0.16.2
\ No newline at end of file
-- 
Gitee


From 75dae9654fff70b3209e7e31f314b9f53c4502c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:40:36 +0000
Subject: [PATCH 03/10] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20AC?=
 =?UTF-8?q?L=5FPyTorch/contrib/cv/image=5Fprocess/DnCNN/DnCNN=5F710/READEM?=
 =?UTF-8?q?E.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../image_process/DnCNN/DnCNN_710/READEME.md  | 350 ------------------
 1 file changed, 350 deletions(-)
 delete mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md
deleted file mode 100644
index 65429d8a5c..0000000000
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md
+++ /dev/null
@@ -1,350 +0,0 @@
-# DnCNN Onnx模型端到端推理指导
--   [1 模型概述](#1-模型概述)
-	-   [1.1 论文地址](#11-论文地址)
-	-   [1.2 代码地址](#12-代码地址)
--   [2 环境说明](#2-环境说明)
-	-   [2.1 深度学习框架](#21-深度学习框架)
-	-   [2.2 python第三方库](#22-python第三方库)
--   [3 模型转换](#3-模型转换)
-	-   [3.1 pth转onnx模型](#31-pth转onnx模型)
-	-   [3.2 onnx转om模型](#32-onnx转om模型)
--   [4 数据集预处理](#4-数据集预处理)
-	-   [4.1 数据集获取](#41-数据集获取)
-	-   [4.2 数据集预处理](#42-数据集预处理)
-	-   [4.3 生成数据集信息文件](#43-生成数据集信息文件)
--   [5 离线推理](#5-离线推理)
-	-   [5.1 benchmark工具概述](#51-benchmark工具概述)
-	-   [5.2 离线推理](#52-离线推理)
--   [6 精度对比](#6-精度对比)
-	-   [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计)
-	-   [6.2 开源TopN精度](#62-开源TopN精度)
-	-   [6.3 精度对比](#63-精度对比)
--   [7 性能对比](#7-性能对比)
-	-   [7.1 npu性能数据](#71-npu性能数据)
-	-   [7.2 T4性能数据](#72-T4性能数据)
-	-   [7.3 性能对比](#73-性能对比)
-
-
-
-## 1 模型概述
-
--   **[论文地址](#11-论文地址)**  
-
--   **[代码地址](#12-代码地址)**  
-
-### 1.1 论文地址
-[DnCNN论文](https://ieeexplore.ieee.org/document/7839189)  
-
-### 1.2 代码地址
-
-brach:master
-
-commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b
-
-备注：commitid指的是值模型基于此版本代码做的推理
-
-[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch)  
-
-## 2 环境说明
-
--   **[深度学习框架](#21-深度学习框架)**  
-
--   **[python第三方库](#22-python第三方库)**  
-
-### 2.1 深度学习框架
-```  
-CANN 5.0.1
-torch==1.8.0
-torchvision==0.9.0
-onnx==1.9.0
-```
-
-### 2.2 python第三方库
-
-```
-numpy==1.20.2
-opencv-python==4.5.2.52
-scikit-image==0.16.2
-```
-
-**说明：** 
->   X86架构：pytorch，torchvision和onnx可以通过官方下载whl包安装，其它可以通过pip3.7 install 包名 安装
->
->   Arm架构：pytorch，torchvision和onnx可以通过源码编译安装，其它可以通过pip3.7 install 包名 安装
-
-## 3 模型转换
-
--   **[pth转onnx模型](#31-pth转onnx模型)** 
-
--   **[onnx转om模型](#32-onnx转om模型)** 
-
-### 3.1 pth转onnx模型
-
-1.DnCNN模型代码下载
-```
-git clone https://github.com/SaoYan/DnCNN-PyTorch
-cd DnCNN-PyTorch
-```
-2.获取源码pth权重文件   
-wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth  
-文件的MD5sum值是： 5703a29b082cc03401fa9d9fee12cb71  
-
-3.获取NPU训练pth文件，将net.pth文件移动到DnCNN目录下
-
-4.编写pth2onnx脚本DnCNN_pth2onnx.py
-
- **说明：**  
->注意目前ATC支持的onnx算子版本为11
-
-5.执行pth2onnx脚本，生成onnx模型文件
-```
-python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx
-```
-
- **模型转换要点：**  
->此模型转换为onnx不需要修改开源代码仓代码，故不需要特殊说明
-
-### 3.2 onnx转om模型
-
-1.设置环境变量
-```
-source env.sh
-```
-2.使用atc将onnx模型转换为om模型文件
-```
-atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
-```
-
-## 4 数据集预处理
-
--   **[数据集获取](#41-数据集获取)**  
-
--   **[数据集预处理](#42-数据集预处理)**  
-
--   **[生成数据集信息文件](#43-生成数据集信息文件)**  
-
-### 4.1 推理数据集获取
-存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录
-
-### 4.2 数据集预处理
-1.预处理脚本data_preprocess.py
-
-2.执行预处理脚本，生成数据集预处理后的bin文件
-
-```
-python3.7 data_preprocess.py data ISource INoisy
-```
-### 4.3 生成数据集信息文件
-1.生成数据集信息文件脚本get_info.py
-
-2.执行生成数据集信息脚本，生成数据集信息文件
-```
-python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481
-```
-第一个参数为模型输入的类型，第二个参数为生成的bin文件路径，第三个为输出的info文件，后面为宽高信息
-## 5 离线推理
-
--   **[benchmark工具概述](#51-benchmark工具概述)**  
-
--   **[离线推理](#52-离线推理)**  
-
-### 5.1 benchmark工具概述
-
-benchmark工具为华为自研的模型推理工具，支持多种模型的离线推理，能够迅速统计出模型在Ascend310上的性能，支持真实数据和纯推理两种模式，配合后处理脚本，可以实现诸多模型的端到端过程
-### 5.2 离线推理
-1.设置环境变量
-```
-source env.sh
-```
-2.执行离线推理
-```
-./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
-```
-输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id)，每个输入对应的输出对应一个_X.bin文件。
-
-## 6 精度对比
-
--   **[离线推理TopN精度](#61-离线推理TopN精度)**  
--   **[开源TopN精度](#62-开源TopN精度)**  
--   **[精度对比](#63-精度对比)**  
-
-### 6.1 离线推理TopN精度统计
-
-后处理统计TopN精度
-
-调用postprocess.py脚本推理结果进行PSRN计算，结果会打印在屏幕上
-```
-python3.7 postprocess.py result/dumpOutput_device0/
-```
-第一个参数为benchmark输出目录
-查看输出结果：
-```
-ISource/test064.bin PSNR 29.799832
-infering...
-ISource/test065.bin PSNR 31.486418
-infering...
-ISource/test066.bin PSNR 35.676752
-infering...
-ISource/test067.bin PSNR 28.577475
-infering...
-ISource/test068.bin PSNR 29.709767
-
-PSNR on test data 31.526892
-```
-经过对bs1与bs16的om测试，本模型batch1的精度与batch16的精度没有差别，精度数据均如上
-
-### 6.2 开源PSNR精度
-```
-| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch |
-|:-----------:|:-------:|:-------:|:---------------:|:---------------:|
-|     15      |  31.73  |  31.61  |      31.71      |      31.60      |
-|     25      |  29.23  |  29.16  |      29.21      |      29.15      |
-|     50      |  26.23  |  26.23  |      26.22      |      26.20      |
-```
-### 6.3 精度对比
-将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比，精度下降在1%范围之内，故精度达标。  
- **精度调试：**  
-
->没有遇到精度不达标的问题，故不需要进行精度调试
-
-## 7 性能对比
-
--   **[npu性能数据](#71-npu性能数据)**  
--   **[T4性能数据](#72-T4性能数据)**  
--   **[性能对比](#73-性能对比)**  
-
-### 7.1 npu性能数据
-benchmark工具在整个数据集上推理时也会统计性能数据，但是推理整个数据集较慢，如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据，也可以使用benchmark纯推理功能测得性能数据，但是由于随机数不能模拟数据分布，纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式，benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用，模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准，对于使用benchmark工具测试的batch4，8，32的性能数据在README.md中如下作记录即可。  
-1.benchmark工具在整个数据集上推理获得性能数据  
-batch1的性能，benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt：
-
-```
-[e2e] throughputRate: 15.0465, latency: 4519.32
-[data read] throughputRate: 966.417, moduleLatency: 1.03475
-[preprocess] throughputRate: 525.539, moduleLatency: 1.90281
-[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903
-[post] throughputRate: 22.615, moduleLatency: 44.2185
-```
-Interface throughputRate: 23.7919，23.7919x4=95.176既是batch1 310单卡吞吐率  
-
-batch16的性能，benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt：
-```
-[e2e] throughputRate: 15.3818, latency: 4420.81
-[data read] throughputRate: 1484.65, moduleLatency: 0.673559
-[preprocess] throughputRate: 316.273, moduleLatency: 3.16182
-[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179
-[post] throughputRate: 1.56798, moduleLatency: 637.764
-```
-Interface throughputRate: 22.2853，22.2853x4=89.1412既是batch16 310单卡吞吐率  
-
-batch4性能：
-```
-[e2e] throughputRate: 15.5641, latency: 4369.02
-[data read] throughputRate: 1898.17, moduleLatency: 0.526824
-[preprocess] throughputRate: 523.883, moduleLatency: 1.90882
-[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192
-[post] throughputRate: 5.50981, moduleLatency: 181.495
-```
-batch4 310单卡吞吐率 23.9045x4=95.618
-
-batch8性能：
-```
-[e2e] throughputRate: 15.5035, latency: 4386.1
-[data read] throughputRate: 1863.93, moduleLatency: 0.5365
-[preprocess] throughputRate: 461.471, moduleLatency: 2.16699
-[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831
-[post] throughputRate: 2.74035, moduleLatency: 364.917
-```
-batch8 310单卡吞吐率 22.2652x4=89.0608
-
-batch32性能：
-```
-[e2e] throughputRate: 12.4075, latency: 5480.54
-[data read] throughputRate: 1770.65, moduleLatency: 0.564765
-[preprocess] throughputRate: 242.944, moduleLatency: 4.11618
-[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386
-[post] throughputRate: 0.68503, moduleLatency: 1459.79
-```
-batch32 310单卡吞吐率 13.2648x4=53.0592
-
-### 7.2 T4性能数据
-在装有T4卡的服务器上测试gpu性能，TensorRT版本：7.2.3.4，cuda版本：11.0，cudnn版本：8.2  
-batch1性能：
-```
-trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:1x1x484x481 --threads
-```
-gpu T4是4个device并行执行的结果，mean是时延（tensorrt的时延是batch个数据的推理时间），即吞吐率的倒数乘以batch
-```
-[06/05/2021-06:28:42] [I] GPU Compute
-[06/05/2021-06:28:42] [I] min: 12.5439 ms
-[06/05/2021-06:28:42] [I] max: 19.0195 ms
-[06/05/2021-06:28:42] [I] mean: 13.1826 ms
-[06/05/2021-06:28:42] [I] median: 12.9761 ms
-[06/05/2021-06:28:42] [I] percentile: 17.7111 ms at 99%
-[06/05/2021-06:28:42] [I] total compute time: 3.01882 s
-```
-batch1 t4单卡吞吐率：1000x1/(13.1826/1)=75.858fps  
-
-batch16性能：
-```
-trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:16x1x484x481 --threads
-```
-```
-[06/05/2021-06:31:53] [I] GPU Compute
-[06/05/2021-06:31:53] [I] min: 198.604 ms
-[06/05/2021-06:31:53] [I] max: 218.884 ms
-[06/05/2021-06:31:53] [I] mean: 201.968 ms
-[06/05/2021-06:31:53] [I] median: 200.267 ms
-[06/05/2021-06:31:53] [I] percentile: 218.884 ms at 99%
-[06/05/2021-06:31:53] [I] total compute time: 3.23149 s
-```
-batch16 t4单卡吞吐率：1000x1/(201.968/16)=79.220fps  
-
-batch4性能
-```
-[06/05/2021-13:48:52] [I] GPU Compute
-[06/05/2021-13:48:52] [I] min: 48.9983 ms
-[06/05/2021-13:48:52] [I] max: 67.3423 ms
-[06/05/2021-13:48:52] [I] mean: 50.6542 ms
-[06/05/2021-13:48:52] [I] median: 50.0736 ms
-[06/05/2021-13:48:52] [I] percentile: 67.3423 ms at 99%
-[06/05/2021-13:48:52] [I] total compute time: 3.08991 s
-```
-batch4 t4单卡吞吐率：1000x1/(50.6542/4)=78.957fps
-
-batch8性能：
-```
-[06/05/2021-13:50:31] [I] GPU Compute
-[06/05/2021-13:50:31] [I] min: 101.378 ms
-[06/05/2021-13:50:31] [I] max: 128.73 ms
-[06/05/2021-13:50:31] [I] mean: 104.424 ms
-[06/05/2021-13:50:31] [I] median: 102.267 ms
-[06/05/2021-13:50:31] [I] percentile: 128.73 ms at 99%
-[06/05/2021-13:50:31] [I] total compute time: 3.13273 s
-```
-batch8 t4单卡吞吐率：1000x1/(104.424/8)=76.610fps  
-
-batch32性能:
-trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:32x1x484x481 --threads
-```
-[06/05/2021-13:57:44] [I] GPU Compute
-[06/05/2021-13:57:44] [I] min: 399.587 ms
-[06/05/2021-13:57:44] [I] max: 426.525 ms
-[06/05/2021-13:57:44] [I] mean: 409.475 ms
-[06/05/2021-13:57:44] [I] median: 407.555 ms
-[06/05/2021-13:57:44] [I] percentile: 426.525 ms at 99%
-[06/05/2021-13:57:44] [I] total compute time: 4.09475 s
-```
-batch32 t4单卡吞吐率：1000x1/(409.475/32)=78.149fps
-
-
-
-### 7.3 性能对比
-batch1：23.7919x4 > 1000x1/(13.1826/1)  
-batch16：22.2853x4 > 1000x1/(201.968/16)  
-310单个device的吞吐率乘4即单卡吞吐率，所得数据中单batch优于T4，多batch略高于T4 
-对于batch1与batch16，310性能均高于T4性能1.2倍，但是batch32 310全量数据集上推理性能低于T4性能，所以该模型放在Reaserch/cv/classification目录下。
-**性能优化：** 
-
->单batch性能优于T4,多batch的性能略高于T4,无需优化。
->batch32 310全量数据集上推理性能低于T4性能，但是batch32纯推理性能94.3228fps，高于T4性能。
\ No newline at end of file
-- 
Gitee


From bb237493054e10d9069f10de8b4177099d6795e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:43:17 +0000
Subject: [PATCH 04/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
index 592f7f9936..fa5c266340 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
@@ -111,6 +111,12 @@ source env.sh
 2.使用atc将onnx模型转换为om模型文件
 ```
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
+
+(710_bs1)
+atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710
+
+(710_bs16)
+atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710
 ```
 
 ## 4 数据集预处理
-- 
Gitee


From 2ce6c95381db46b715b8e15898fad599890f4f6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:46:26 +0000
Subject: [PATCH 05/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
index fa5c266340..7decb98bdd 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
@@ -111,10 +111,10 @@ source env.sh
 2.使用atc将onnx模型转换为om模型文件
 ```
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
-
+```
 (710_bs1)
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710
-
+```
 (710_bs16)
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710
 ```
@@ -162,8 +162,13 @@ source env.sh
 ```
 2.执行离线推理
 ```
-./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
+(bs1)
+./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
+```
+(bs16)
+./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs16.om -device_id=0 -batch_size=16 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
 ```
+
 输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id)，每个输入对应的输出对应一个_X.bin文件。
 
 ## 6 精度对比
-- 
Gitee


From 25522ba68151b7d31c4eca122651339370a0f287 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:48:03 +0000
Subject: [PATCH 06/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
index 7decb98bdd..87b84461e5 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
@@ -113,9 +113,11 @@ source env.sh
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
 ```
 (710_bs1)
+```
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710
 ```
 (710_bs16)
+```
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710
 ```
 
-- 
Gitee


From c0fb82aedb4a38ae5d014561c60d51d532543593 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:48:59 +0000
Subject: [PATCH 07/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
index 87b84461e5..46119f3b3c 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
@@ -165,9 +165,11 @@ source env.sh
 2.执行离线推理
 ```
 (bs1)
+```
 ./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
 ```
 (bs16)
+```
 ./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs16.om -device_id=0 -batch_size=16 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
 ```
 
-- 
Gitee


From 85ce66b63ec6e2b112140c149f560940304fb5b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:49:45 +0000
Subject: [PATCH 08/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
index 46119f3b3c..dcc4240037 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
@@ -163,7 +163,6 @@ benchmark工具为华为自研的模型推理工具，支持多种模型的离
 source env.sh
 ```
 2.执行离线推理
-```
 (bs1)
 ```
 ./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
-- 
Gitee


From 348719952c6d7357f53097c404213ebb542f09f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:54:33 +0000
Subject: [PATCH 09/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
index dcc4240037..35ee9aa42b 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md
@@ -108,7 +108,11 @@ python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx
 ```
 source env.sh
 ```
-2.使用atc将onnx模型转换为om模型文件
+2.增加benchmark.{arch}可执行权限。
+```
+chmod u+x benchmark.x86_64
+```
+3.使用atc将onnx模型转换为om模型文件
 ```
 atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
 ```
-- 
Gitee


From 514c8a66b3072f046de7f8d85f813cad1ce70a97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Tue, 10 May 2022 06:55:57 +0000
Subject: [PATCH 10/10] update
 ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py.

---
 ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py
index 0333302e81..7945fd671a 100644
--- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py
+++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py
@@ -20,7 +20,7 @@ import cv2
 import torch
 import torch.nn as nn
 import struct
-from skimage.measure.simple_metrics import compare_psnr
+from skimage.metrics import peak_signal_noise_ratio as compare_psnr
 
 
 def batch_PSNR(img, imclean, data_range):
-- 
Gitee