From 81b1a7a1397d865ee52ccebb65e48839f54816ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?=
 <10760252+mxhua@user.noreply.gitee.com>
Date: Mon, 9 May 2022 06:20:04 +0000
Subject: [PATCH] =?UTF-8?q?[=E4=B8=9C=E5=8C=97=E5=A4=A7=E5=AD=A6][?=
 =?UTF-8?q?=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTorch=E7=A6=BB=E7=BA=BF?=
 =?UTF-8?q?=E6=8E=A8=E7=90=86][DnCNN]-=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 DnCNN/DnCNN_postprocess.py | 103 +++++++++++
 DnCNN/DnCNN_preprocess.py  |  79 +++++++++
 DnCNN/DnCNN_pth2onnx.py    |  80 +++++++++
 DnCNN/READEME.md           | 350 +++++++++++++++++++++++++++++++++++++
 DnCNN/env.sh               |   8 +
 DnCNN/get_info.py          |  60 +++++++
 DnCNN/modelzoo_level.txt   |  14 ++
 DnCNN/requirements.txt     |   6 +
 DnCNN/test/perf_bs1.sh     |   6 +
 DnCNN/test/perf_bs16.sh    |   6 +
 DnCNN/test/perf_g.sh       |   1 +
 DnCNN/test/pth2om_bs1.sh   |   3 +
 DnCNN/test/pth2om_bs16.sh  |   3 +
 13 files changed, 719 insertions(+)
 create mode 100644 DnCNN/DnCNN_postprocess.py
 create mode 100644 DnCNN/DnCNN_preprocess.py
 create mode 100644 DnCNN/DnCNN_pth2onnx.py
 create mode 100644 DnCNN/READEME.md
 create mode 100644 DnCNN/env.sh
 create mode 100644 DnCNN/get_info.py
 create mode 100644 DnCNN/modelzoo_level.txt
 create mode 100644 DnCNN/requirements.txt
 create mode 100644 DnCNN/test/perf_bs1.sh
 create mode 100644 DnCNN/test/perf_bs16.sh
 create mode 100644 DnCNN/test/perf_g.sh
 create mode 100644 DnCNN/test/pth2om_bs1.sh
 create mode 100644 DnCNN/test/pth2om_bs16.sh

diff --git a/DnCNN/DnCNN_postprocess.py b/DnCNN/DnCNN_postprocess.py
new file mode 100644
index 0000000000..82ce433eb3
--- /dev/null
+++ b/DnCNN/DnCNN_postprocess.py
@@ -0,0 +1,103 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import sys
+import glob
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+import struct
+from skimage.metrics import peak_signal_noise_ratio as compare_psnr
+
+
+def batch_PSNR(img, imclean, data_range):
+
+    Img = img.data.cpu().numpy().astype(np.float32)
+    Iclean = imclean.data.cpu().numpy().astype(np.float32)
+    PSNR = 0
+    for i in range(Img.shape[0]):
+        PSNR += compare_psnr(Iclean[i, :, :, :], Img[i, :, :, :], data_range=data_range)
+    return (PSNR / Img.shape[0])
+
+
+def bin2npy(filepath):
+
+    size = os.path.getsize(filepath)  
+    res = []
+    L = int(size / 4)
+    binfile = open(filepath, 'rb')  
+    for i in range(L):
+        data = binfile.read(4)  
+        num = struct.unpack('f', data)
+        res.append(num[0])
+    binfile.close()
+    dim_res = np.array(res).reshape(1, 1, 481, 481)
+    return dim_res
+
+
+def main(Result_path):
+
+    # load data info
+    print('Loading ISource bin ...\n')
+    ISource = glob.glob(os.path.join('ISource', '*.bin'))
+    ISource.sort()
+    print('Loading INoisy bin ...\n')
+    INoisy = glob.glob(os.path.join('INoisy', '*.bin'))
+    INoisy.sort()
+    # load result file
+    print('Loading res bin ...\n')
+    Result_path = glob.glob(os.path.join(Result_path, '*.bin'))
+    Result_path.sort()
+
+    # begin data
+    print('begin infer')
+    psnr_test = 0
+    n_lables = 0
+
+    for isource in ISource:
+        isource_name = isource
+        # isource
+        isource = bin2npy(isource)
+        isource = torch.from_numpy(isource)
+        # inoisy
+        inoisy = bin2npy(INoisy[n_lables])
+        inoisy = torch.from_numpy(inoisy)
+        # Result_path
+        Result = bin2npy(Result_path[n_lables])
+        Result = torch.from_numpy(Result)
+        n_lables += 1
+        print('infering...')
+        with torch.no_grad(): 
+            Out = torch.clamp(inoisy - Result, 0., 1.)
+        psnr = batch_PSNR(Out, isource, 1.)
+        psnr_test += psnr
+        print("%s PSNR %f" % (isource_name, psnr))
+    psnr_test /= len(ISource)
+    print("\nPSNR on test data %f" % psnr_test)
+
+if __name__ == "__main__":
+    
+    try:
+        Result_path = sys.argv[1]
+
+    except IndexError:
+        print("Stopped!")
+        exit(1)
+
+    if not (os.path.exists(Result_path)):
+        print("Result path doesn't exist.")
+
+    main(Result_path)
diff --git a/DnCNN/DnCNN_preprocess.py b/DnCNN/DnCNN_preprocess.py
new file mode 100644
index 0000000000..0de80a5e6c
--- /dev/null
+++ b/DnCNN/DnCNN_preprocess.py
@@ -0,0 +1,79 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import sys
+import os
+import os.path
+import numpy as np
+import random
+import torch
+import cv2
+import glob
+
+infer_data = 'Set68'
+infer_noiseL = 15
+
+def normalize(data):
+    return data / 255.
+
+
+def proprecess(data_path, ISource_bin, INoisy_bin):
+
+    # load data info
+    print('Loading data info ...\n')
+    files = glob.glob(os.path.join(data_path, infer_data, '*.png'))
+    files.sort()
+    # process data
+    for i in range(len(files)):
+        # image
+        filename = os.path.basename(files[i])
+        img = cv2.imread(files[i])
+        img = normalize(np.float32(img[:, :, 0]))
+
+        img_padded = np.full([481, 481], 0, dtype=np.float32)
+        width_offset = (481 - img.shape[1]) // 2
+        height_offset = (481 - img.shape[0]) // 2
+        img_padded[height_offset:height_offset + img.shape[0], width_offset:width_offset + img.shape[1]] = img
+        img = img_padded
+
+        img = np.expand_dims(img, 0)
+        img = np.expand_dims(img, 1)
+
+        ISource = torch.Tensor(img)
+        # noise
+        noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=infer_noiseL / 255.)
+        # noisy image
+        INoisy = ISource + noise
+
+        # save ISource_bin
+        ISource = ISource.numpy()
+        print("ISource shape is", ISource.shape)
+        ISource.tofile(os.path.join(ISource_bin, filename.split('.')[0] + '.bin'))
+        
+        # save INoisy_bin
+        INoisy = INoisy.numpy()
+        print("INoisy shape is", INoisy.shape) 
+        INoisy.tofile(os.path.join(INoisy_bin, filename.split('.')[0] + '.bin'))
+        
+if __name__ == '__main__':
+    
+    data_path = sys.argv[1]
+    ISource_bin =  sys.argv[2]
+    INoisy_bin = sys.argv[3]
+    if os.path.exists(ISource_bin) is False:
+        os.mkdir(ISource_bin)
+    if os.path.exists(INoisy_bin) is False:
+        os.mkdir(INoisy_bin)
+
+    proprecess(data_path, ISource_bin, INoisy_bin)
diff --git a/DnCNN/DnCNN_pth2onnx.py b/DnCNN/DnCNN_pth2onnx.py
new file mode 100644
index 0000000000..deab4434af
--- /dev/null
+++ b/DnCNN/DnCNN_pth2onnx.py
@@ -0,0 +1,80 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import torch
+import torch.onnx
+import torch.nn as nn
+import sys
+
+from collections import OrderedDict
+
+class DnCNN(nn.Module):
+    def __init__(self, channels, num_of_layers=17):
+        super(DnCNN, self).__init__()
+        kernel_size = 3
+        padding = 1
+        features = 64
+        layers = []
+        layers.append(nn.Conv2d(in_channels=channels, out_channels=features, \
+                      kernel_size=kernel_size, padding=padding, bias=False))
+        layers.append(nn.ReLU(inplace=True))
+        for _ in range(num_of_layers - 2):
+            layers.append(nn.Conv2d(in_channels=features, out_channels=features, \
+                          kernel_size=kernel_size, padding=padding, bias=False))
+            layers.append(nn.BatchNorm2d(features))
+            layers.append(nn.ReLU(inplace=True))
+        layers.append(nn.Conv2d(in_channels=features, out_channels=channels, \
+                      kernel_size=kernel_size, padding=padding, bias=False))
+        self.dncnn = nn.Sequential(*layers)
+
+    def forward(self, x):
+    
+        out = self.dncnn(x)
+        return out
+
+
+def proc_nodes_module(checkpoint):
+
+    new_state_dict = OrderedDict()
+    for k, v in checkpoint.items():
+        if(k[0:7] == "module."):
+            name = k[7:]
+        else:
+            name = k[0:]
+        new_state_dict[name]=v
+    return new_state_dict
+
+
+def convert(pth_file, onnx_file):
+
+    pretrained_net = torch.load(pth_file, map_location='cpu')
+    pretrained_net['state_dict'] = proc_nodes_module(pretrained_net)
+
+    model = DnCNN(channels=1, num_of_layers=17)
+    model.load_state_dict(pretrained_net['state_dict'])
+    model.eval()
+    input_names = ["actual_input_1"]
+    dummy_input = torch.randn(1, 1, 481, 481)
+    #torch.onnx.export(model, dummy_input, onnx_file, input_names = input_names, opset_version=11, verbose=True)
+    dynamic_axes = {'actual_input_1': {0: '-1'}}
+    torch.onnx.export(model, dummy_input, onnx_file, dynamic_axes=dynamic_axes, \
+                      input_names=input_names, opset_version=11)
+
+if __name__ == "__main__":
+    
+    pth_file = sys.argv[1]
+    onnx_file = sys.argv[2]
+
+    convert(pth_file, onnx_file)
diff --git a/DnCNN/READEME.md b/DnCNN/READEME.md
new file mode 100644
index 0000000000..65429d8a5c
--- /dev/null
+++ b/DnCNN/READEME.md
@@ -0,0 +1,350 @@
+# DnCNN Onnx模型端到端推理指导
+-   [1 模型概述](#1-模型概述)
+	-   [1.1 论文地址](#11-论文地址)
+	-   [1.2 代码地址](#12-代码地址)
+-   [2 环境说明](#2-环境说明)
+	-   [2.1 深度学习框架](#21-深度学习框架)
+	-   [2.2 python第三方库](#22-python第三方库)
+-   [3 模型转换](#3-模型转换)
+	-   [3.1 pth转onnx模型](#31-pth转onnx模型)
+	-   [3.2 onnx转om模型](#32-onnx转om模型)
+-   [4 数据集预处理](#4-数据集预处理)
+	-   [4.1 数据集获取](#41-数据集获取)
+	-   [4.2 数据集预处理](#42-数据集预处理)
+	-   [4.3 生成数据集信息文件](#43-生成数据集信息文件)
+-   [5 离线推理](#5-离线推理)
+	-   [5.1 benchmark工具概述](#51-benchmark工具概述)
+	-   [5.2 离线推理](#52-离线推理)
+-   [6 精度对比](#6-精度对比)
+	-   [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计)
+	-   [6.2 开源TopN精度](#62-开源TopN精度)
+	-   [6.3 精度对比](#63-精度对比)
+-   [7 性能对比](#7-性能对比)
+	-   [7.1 npu性能数据](#71-npu性能数据)
+	-   [7.2 T4性能数据](#72-T4性能数据)
+	-   [7.3 性能对比](#73-性能对比)
+
+
+
+## 1 模型概述
+
+-   **[论文地址](#11-论文地址)**  
+
+-   **[代码地址](#12-代码地址)**  
+
+### 1.1 论文地址
+[DnCNN论文](https://ieeexplore.ieee.org/document/7839189)  
+
+### 1.2 代码地址
+
+brach:master
+
+commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b
+
+备注：commitid指的是值模型基于此版本代码做的推理
+
+[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch)  
+
+## 2 环境说明
+
+-   **[深度学习框架](#21-深度学习框架)**  
+
+-   **[python第三方库](#22-python第三方库)**  
+
+### 2.1 深度学习框架
+```  
+CANN 5.0.1
+torch==1.8.0
+torchvision==0.9.0
+onnx==1.9.0
+```
+
+### 2.2 python第三方库
+
+```
+numpy==1.20.2
+opencv-python==4.5.2.52
+scikit-image==0.16.2
+```
+
+**说明：** 
+>   X86架构：pytorch，torchvision和onnx可以通过官方下载whl包安装，其它可以通过pip3.7 install 包名 安装
+>
+>   Arm架构：pytorch，torchvision和onnx可以通过源码编译安装，其它可以通过pip3.7 install 包名 安装
+
+## 3 模型转换
+
+-   **[pth转onnx模型](#31-pth转onnx模型)** 
+
+-   **[onnx转om模型](#32-onnx转om模型)** 
+
+### 3.1 pth转onnx模型
+
+1.DnCNN模型代码下载
+```
+git clone https://github.com/SaoYan/DnCNN-PyTorch
+cd DnCNN-PyTorch
+```
+2.获取源码pth权重文件   
+wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth  
+文件的MD5sum值是： 5703a29b082cc03401fa9d9fee12cb71  
+
+3.获取NPU训练pth文件，将net.pth文件移动到DnCNN目录下
+
+4.编写pth2onnx脚本DnCNN_pth2onnx.py
+
+ **说明：**  
+>注意目前ATC支持的onnx算子版本为11
+
+5.执行pth2onnx脚本，生成onnx模型文件
+```
+python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx
+```
+
+ **模型转换要点：**  
+>此模型转换为onnx不需要修改开源代码仓代码，故不需要特殊说明
+
+### 3.2 onnx转om模型
+
+1.设置环境变量
+```
+source env.sh
+```
+2.使用atc将onnx模型转换为om模型文件
+```
+atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310
+```
+
+## 4 数据集预处理
+
+-   **[数据集获取](#41-数据集获取)**  
+
+-   **[数据集预处理](#42-数据集预处理)**  
+
+-   **[生成数据集信息文件](#43-生成数据集信息文件)**  
+
+### 4.1 推理数据集获取
+存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录
+
+### 4.2 数据集预处理
+1.预处理脚本data_preprocess.py
+
+2.执行预处理脚本，生成数据集预处理后的bin文件
+
+```
+python3.7 data_preprocess.py data ISource INoisy
+```
+### 4.3 生成数据集信息文件
+1.生成数据集信息文件脚本get_info.py
+
+2.执行生成数据集信息脚本，生成数据集信息文件
+```
+python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481
+```
+第一个参数为模型输入的类型，第二个参数为生成的bin文件路径，第三个为输出的info文件，后面为宽高信息
+## 5 离线推理
+
+-   **[benchmark工具概述](#51-benchmark工具概述)**  
+
+-   **[离线推理](#52-离线推理)**  
+
+### 5.1 benchmark工具概述
+
+benchmark工具为华为自研的模型推理工具，支持多种模型的离线推理，能够迅速统计出模型在Ascend310上的性能，支持真实数据和纯推理两种模式，配合后处理脚本，可以实现诸多模型的端到端过程
+### 5.2 离线推理
+1.设置环境变量
+```
+source env.sh
+```
+2.执行离线推理
+```
+./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true
+```
+输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id)，每个输入对应的输出对应一个_X.bin文件。
+
+## 6 精度对比
+
+-   **[离线推理TopN精度](#61-离线推理TopN精度)**  
+-   **[开源TopN精度](#62-开源TopN精度)**  
+-   **[精度对比](#63-精度对比)**  
+
+### 6.1 离线推理TopN精度统计
+
+后处理统计TopN精度
+
+调用postprocess.py脚本推理结果进行PSRN计算，结果会打印在屏幕上
+```
+python3.7 postprocess.py result/dumpOutput_device0/
+```
+第一个参数为benchmark输出目录
+查看输出结果：
+```
+ISource/test064.bin PSNR 29.799832
+infering...
+ISource/test065.bin PSNR 31.486418
+infering...
+ISource/test066.bin PSNR 35.676752
+infering...
+ISource/test067.bin PSNR 28.577475
+infering...
+ISource/test068.bin PSNR 29.709767
+
+PSNR on test data 31.526892
+```
+经过对bs1与bs16的om测试，本模型batch1的精度与batch16的精度没有差别，精度数据均如上
+
+### 6.2 开源PSNR精度
+```
+| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch |
+|:-----------:|:-------:|:-------:|:---------------:|:---------------:|
+|     15      |  31.73  |  31.61  |      31.71      |      31.60      |
+|     25      |  29.23  |  29.16  |      29.21      |      29.15      |
+|     50      |  26.23  |  26.23  |      26.22      |      26.20      |
+```
+### 6.3 精度对比
+将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比，精度下降在1%范围之内，故精度达标。  
+ **精度调试：**  
+
+>没有遇到精度不达标的问题，故不需要进行精度调试
+
+## 7 性能对比
+
+-   **[npu性能数据](#71-npu性能数据)**  
+-   **[T4性能数据](#72-T4性能数据)**  
+-   **[性能对比](#73-性能对比)**  
+
+### 7.1 npu性能数据
+benchmark工具在整个数据集上推理时也会统计性能数据，但是推理整个数据集较慢，如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据，也可以使用benchmark纯推理功能测得性能数据，但是由于随机数不能模拟数据分布，纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式，benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用，模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准，对于使用benchmark工具测试的batch4，8，32的性能数据在README.md中如下作记录即可。  
+1.benchmark工具在整个数据集上推理获得性能数据  
+batch1的性能，benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt：
+
+```
+[e2e] throughputRate: 15.0465, latency: 4519.32
+[data read] throughputRate: 966.417, moduleLatency: 1.03475
+[preprocess] throughputRate: 525.539, moduleLatency: 1.90281
+[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903
+[post] throughputRate: 22.615, moduleLatency: 44.2185
+```
+Interface throughputRate: 23.7919，23.7919x4=95.176既是batch1 310单卡吞吐率  
+
+batch16的性能，benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt：
+```
+[e2e] throughputRate: 15.3818, latency: 4420.81
+[data read] throughputRate: 1484.65, moduleLatency: 0.673559
+[preprocess] throughputRate: 316.273, moduleLatency: 3.16182
+[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179
+[post] throughputRate: 1.56798, moduleLatency: 637.764
+```
+Interface throughputRate: 22.2853，22.2853x4=89.1412既是batch16 310单卡吞吐率  
+
+batch4性能：
+```
+[e2e] throughputRate: 15.5641, latency: 4369.02
+[data read] throughputRate: 1898.17, moduleLatency: 0.526824
+[preprocess] throughputRate: 523.883, moduleLatency: 1.90882
+[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192
+[post] throughputRate: 5.50981, moduleLatency: 181.495
+```
+batch4 310单卡吞吐率 23.9045x4=95.618
+
+batch8性能：
+```
+[e2e] throughputRate: 15.5035, latency: 4386.1
+[data read] throughputRate: 1863.93, moduleLatency: 0.5365
+[preprocess] throughputRate: 461.471, moduleLatency: 2.16699
+[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831
+[post] throughputRate: 2.74035, moduleLatency: 364.917
+```
+batch8 310单卡吞吐率 22.2652x4=89.0608
+
+batch32性能：
+```
+[e2e] throughputRate: 12.4075, latency: 5480.54
+[data read] throughputRate: 1770.65, moduleLatency: 0.564765
+[preprocess] throughputRate: 242.944, moduleLatency: 4.11618
+[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386
+[post] throughputRate: 0.68503, moduleLatency: 1459.79
+```
+batch32 310单卡吞吐率 13.2648x4=53.0592
+
+### 7.2 T4性能数据
+在装有T4卡的服务器上测试gpu性能，TensorRT版本：7.2.3.4，cuda版本：11.0，cudnn版本：8.2  
+batch1性能：
+```
+trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:1x1x484x481 --threads
+```
+gpu T4是4个device并行执行的结果，mean是时延（tensorrt的时延是batch个数据的推理时间），即吞吐率的倒数乘以batch
+```
+[06/05/2021-06:28:42] [I] GPU Compute
+[06/05/2021-06:28:42] [I] min: 12.5439 ms
+[06/05/2021-06:28:42] [I] max: 19.0195 ms
+[06/05/2021-06:28:42] [I] mean: 13.1826 ms
+[06/05/2021-06:28:42] [I] median: 12.9761 ms
+[06/05/2021-06:28:42] [I] percentile: 17.7111 ms at 99%
+[06/05/2021-06:28:42] [I] total compute time: 3.01882 s
+```
+batch1 t4单卡吞吐率：1000x1/(13.1826/1)=75.858fps  
+
+batch16性能：
+```
+trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:16x1x484x481 --threads
+```
+```
+[06/05/2021-06:31:53] [I] GPU Compute
+[06/05/2021-06:31:53] [I] min: 198.604 ms
+[06/05/2021-06:31:53] [I] max: 218.884 ms
+[06/05/2021-06:31:53] [I] mean: 201.968 ms
+[06/05/2021-06:31:53] [I] median: 200.267 ms
+[06/05/2021-06:31:53] [I] percentile: 218.884 ms at 99%
+[06/05/2021-06:31:53] [I] total compute time: 3.23149 s
+```
+batch16 t4单卡吞吐率：1000x1/(201.968/16)=79.220fps  
+
+batch4性能
+```
+[06/05/2021-13:48:52] [I] GPU Compute
+[06/05/2021-13:48:52] [I] min: 48.9983 ms
+[06/05/2021-13:48:52] [I] max: 67.3423 ms
+[06/05/2021-13:48:52] [I] mean: 50.6542 ms
+[06/05/2021-13:48:52] [I] median: 50.0736 ms
+[06/05/2021-13:48:52] [I] percentile: 67.3423 ms at 99%
+[06/05/2021-13:48:52] [I] total compute time: 3.08991 s
+```
+batch4 t4单卡吞吐率：1000x1/(50.6542/4)=78.957fps
+
+batch8性能：
+```
+[06/05/2021-13:50:31] [I] GPU Compute
+[06/05/2021-13:50:31] [I] min: 101.378 ms
+[06/05/2021-13:50:31] [I] max: 128.73 ms
+[06/05/2021-13:50:31] [I] mean: 104.424 ms
+[06/05/2021-13:50:31] [I] median: 102.267 ms
+[06/05/2021-13:50:31] [I] percentile: 128.73 ms at 99%
+[06/05/2021-13:50:31] [I] total compute time: 3.13273 s
+```
+batch8 t4单卡吞吐率：1000x1/(104.424/8)=76.610fps  
+
+batch32性能:
+trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:32x1x484x481 --threads
+```
+[06/05/2021-13:57:44] [I] GPU Compute
+[06/05/2021-13:57:44] [I] min: 399.587 ms
+[06/05/2021-13:57:44] [I] max: 426.525 ms
+[06/05/2021-13:57:44] [I] mean: 409.475 ms
+[06/05/2021-13:57:44] [I] median: 407.555 ms
+[06/05/2021-13:57:44] [I] percentile: 426.525 ms at 99%
+[06/05/2021-13:57:44] [I] total compute time: 4.09475 s
+```
+batch32 t4单卡吞吐率：1000x1/(409.475/32)=78.149fps
+
+
+
+### 7.3 性能对比
+batch1：23.7919x4 > 1000x1/(13.1826/1)  
+batch16：22.2853x4 > 1000x1/(201.968/16)  
+310单个device的吞吐率乘4即单卡吞吐率，所得数据中单batch优于T4，多batch略高于T4 
+对于batch1与batch16，310性能均高于T4性能1.2倍，但是batch32 310全量数据集上推理性能低于T4性能，所以该模型放在Reaserch/cv/classification目录下。
+**性能优化：** 
+
+>单batch性能优于T4,多batch的性能略高于T4,无需优化。
+>batch32 310全量数据集上推理性能低于T4性能，但是batch32纯推理性能94.3228fps，高于T4性能。
\ No newline at end of file
diff --git a/DnCNN/env.sh b/DnCNN/env.sh
new file mode 100644
index 0000000000..49be8f16a0
--- /dev/null
+++ b/DnCNN/env.sh
@@ -0,0 +1,8 @@
+#! /bin/bash
+
+export install_path=/usr/local/Ascend/ascend-toolkit/latest
+export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH
+export PYTHONPATH=${install_path}/atc/python/site-packages:$PYTHONPATH
+export LD_LIBRARY_PATH=${install_path}/atc/lib64:${install_path}/acllib/lib64:$LD_LIBRARY_PATH
+export ASCEND_OPP_PATH=${install_path}/opp
+export ASCEND_AICPU_PATH=/usr/local/Ascend/ascend-toolkit/latest
diff --git a/DnCNN/get_info.py b/DnCNN/get_info.py
new file mode 100644
index 0000000000..4d05f7c4bd
--- /dev/null
+++ b/DnCNN/get_info.py
@@ -0,0 +1,60 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import sys
+import cv2
+from glob import glob
+
+
+def get_bin_info(file_path, info_name, width, height):
+    bin_images = glob(os.path.join(file_path, '*.bin'))
+    with open(info_name, 'w') as file:
+        for index, img in enumerate(bin_images):
+            content = ' '.join([str(index), img, width, height])
+            file.write(content)
+            file.write('\n')
+
+
+def get_jpg_info(file_path, info_name):
+    extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
+    image_names = []
+    for extension in extensions:
+        image_names.append(glob(os.path.join(file_path, '*.' + extension)))  
+    with open(info_name, 'w') as file:
+        for image_name in image_names:
+            if len(image_name) == 0:
+                continue
+            else:
+                for index, img in enumerate(image_name):
+                    img_cv = cv2.imread(img)
+                    shape = img_cv.shape
+                    width, height = shape[1], shape[0]
+                    content = ' '.join([str(index), img, str(width), str(height)])
+                    file.write(content)
+                    file.write('\n')
+
+
+if __name__ == '__main__':
+    file_type = sys.argv[1]
+    file_path = sys.argv[2]
+    info_name = sys.argv[3]
+    if file_type == 'bin':
+        width = sys.argv[4]
+        height = sys.argv[5]
+        assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5'
+        get_bin_info(file_path, info_name, width, height)
+    elif file_type == 'jpg':
+        assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3'
+        get_jpg_info(file_path, info_name)
diff --git a/DnCNN/modelzoo_level.txt b/DnCNN/modelzoo_level.txt
new file mode 100644
index 0000000000..2d073f9816
--- /dev/null
+++ b/DnCNN/modelzoo_level.txt
@@ -0,0 +1,14 @@
+精度psnr：
+            310：31.535421
+            710：31.534013
+
+性能：（fps）
+            	310	710	T4
+bs1 	95.3608	131.931	75.8575
+bs4 	95.774	146.31	78.1097
+bs8	89.2188	162.367	43.4176
+bs16	76.896	172.557	44.0069
+bs32	53.1048	93.2036	79.6153
+bs64	94.5656	10.3022	61.1755
+			
+最优batch	95.774	172.557	79.6153
diff --git a/DnCNN/requirements.txt b/DnCNN/requirements.txt
new file mode 100644
index 0000000000..965bdf8bdd
--- /dev/null
+++ b/DnCNN/requirements.txt
@@ -0,0 +1,6 @@
+torch==1.8.0
+torchvision==0.9.0
+onnx==1.9.0
+numpy==1.20.2
+opencv-python==4.5.2.52
+scikit-image==0.16.2
\ No newline at end of file
diff --git a/DnCNN/test/perf_bs1.sh b/DnCNN/test/perf_bs1.sh
new file mode 100644
index 0000000000..ba96f33b2f
--- /dev/null
+++ b/DnCNN/test/perf_bs1.sh
@@ -0,0 +1,6 @@
+python data_preprocess.py data ISource INoisy  #执行预处理脚本，生成数据集预处理后的bin文件
+python get_info.py bin INoisy DnCNN_bin.info 481 481  #生成数据集信息文件脚本get_info.py
+source env.sh  #设置环境变量
+chmod u+x benchmark.x86_64  #增加benchmark.{arch}可执行权限
+./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true  #benchmark离线推理
+python postprocess.py result/dumpOutput_device0/  #调用postprocess.py脚本推理结果进行PSRN计算
\ No newline at end of file
diff --git a/DnCNN/test/perf_bs16.sh b/DnCNN/test/perf_bs16.sh
new file mode 100644
index 0000000000..34db65f134
--- /dev/null
+++ b/DnCNN/test/perf_bs16.sh
@@ -0,0 +1,6 @@
+python data_preprocess.py data ISource INoisy  #执行预处理脚本，生成数据集预处理后的bin文件
+python get_info.py bin INoisy DnCNN_bin.info 481 481  #生成数据集信息文件脚本get_info.py
+source env.sh  #设置环境变量
+chmod u+x benchmark.x86_64  #增加benchmark.{arch}可执行权限
+./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs16.om -device_id=0 -batch_size=16 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true  #benchmark离线推理
+python postprocess.py result/dumpOutput_device0/  #调用postprocess.py脚本推理结果进行PSRN计算
\ No newline at end of file
diff --git a/DnCNN/test/perf_g.sh b/DnCNN/test/perf_g.sh
new file mode 100644
index 0000000000..04863ed8d9
--- /dev/null
+++ b/DnCNN/test/perf_g.sh
@@ -0,0 +1 @@
+trtexec --dumpProfile --onnx=DnCNN-S-15.onnx --shapes=actual_input_1:1x1x481x481 --threads --fp16
\ No newline at end of file
diff --git a/DnCNN/test/pth2om_bs1.sh b/DnCNN/test/pth2om_bs1.sh
new file mode 100644
index 0000000000..e9bbae809c
--- /dev/null
+++ b/DnCNN/test/pth2om_bs1.sh
@@ -0,0 +1,3 @@
+python DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx  #执行pth2onnx脚本，生成onnx模型文件
+source env.sh  #设置环境变量
+atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710
\ No newline at end of file
diff --git a/DnCNN/test/pth2om_bs16.sh b/DnCNN/test/pth2om_bs16.sh
new file mode 100644
index 0000000000..5791dc46e9
--- /dev/null
+++ b/DnCNN/test/pth2om_bs16.sh
@@ -0,0 +1,3 @@
+python DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx  #执行pth2onnx脚本，生成onnx模型文件
+source env.sh  #设置环境变量
+atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710
\ No newline at end of file
-- 
Gitee