From 91b0e37aa9ec0257b187c5562eff87e93255205d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:18:45 +0000 Subject: [PATCH 01/10] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20DnCNN=5F710?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/.keep new file mode 100644 index 0000000000..e69de29bb2 -- Gitee From 2a8e6ea4b2c452875cfc373ff2781088f565bb55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:19:24 +0000 Subject: [PATCH 02/10] =?UTF-8?q?[=E4=B8=9C=E5=8C=97=E5=A4=A7=E5=AD=A6][?= =?UTF-8?q?=E9=AB=98=E6=A0=A1=E8=B4=A1=E7=8C=AE][PyTorch=E7=A6=BB=E7=BA=BF?= =?UTF-8?q?=E6=8E=A8=E7=90=86][DnCNN]-=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DnCNN/DnCNN_710/DnCNN_postprocess.py | 103 ++++++ .../DnCNN/DnCNN_710/DnCNN_preprocess.py | 79 ++++ .../DnCNN/DnCNN_710/DnCNN_pth2onnx.py | 80 ++++ .../image_process/DnCNN/DnCNN_710/READEME.md | 350 ++++++++++++++++++ .../image_process/DnCNN/DnCNN_710/get_info.py | 60 +++ .../DnCNN/DnCNN_710/requirements.txt | 6 + 6 files changed, 678 insertions(+) create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py create mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py new file mode 100644 index 0000000000..dbaf8635a6 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_postprocess.py @@ -0,0 +1,103 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import sys +import glob +import numpy as np +import cv2 +import torch +import torch.nn as nn +import struct +from skimage.metrics import peak_signal_noise_ratio as compare_psnr + + +def batch_PSNR(img, imclean, data_range): + + Img = img.data.cpu().numpy().astype(np.float32) + Iclean = imclean.data.cpu().numpy().astype(np.float32) + PSNR = 0 + for i in range(Img.shape[0]): + PSNR += compare_psnr(Iclean[i, :, :, :], Img[i, :, :, :], data_range=data_range) + return (PSNR / Img.shape[0]) + + +def bin2npy(filepath): + + size = os.path.getsize(filepath) + res = [] + L = int(size / 4) + binfile = open(filepath, 'rb') + for i in range(L): + data = binfile.read(4) + num = struct.unpack('f', data) + res.append(num[0]) + binfile.close() + dim_res = np.array(res).reshape(1, 1, 481, 481) + return dim_res + + +def main(Result_path): + + # load data info + print('Loading ISource bin ...\n') + ISource = glob.glob(os.path.join('ISource', '*.bin')) + ISource.sort() + print('Loading INoisy bin ...\n') + INoisy = glob.glob(os.path.join('INoisy', '*.bin')) + INoisy.sort() + # load result file + print('Loading res bin ...\n') + Result_path = glob.glob(os.path.join(Result_path, '*.bin')) + Result_path.sort() + + # begin data + print('begin infer') + psnr_test = 0 + n_lables = 0 + + for isource in ISource: + isource_name = isource + # isource + isource = bin2npy(isource) + isource = torch.from_numpy(isource) + # inoisy + inoisy = bin2npy(INoisy[n_lables]) + inoisy = torch.from_numpy(inoisy) + # Result_path + Result = bin2npy(Result_path[n_lables]) + Result = torch.from_numpy(Result) + n_lables += 1 + print('infering...') + with torch.no_grad(): + Out = torch.clamp(inoisy - Result, 0., 1.) + psnr = batch_PSNR(Out, isource, 1.) + psnr_test += psnr + print("%s PSNR %f" % (isource_name, psnr)) + psnr_test /= len(ISource) + print("\nPSNR on test data %f" % psnr_test) + +if __name__ == "__main__": + + try: + Result_path = sys.argv[1] + + except IndexError: + print("Stopped!") + exit(1) + + if not (os.path.exists(Result_path)): + print("Result path doesn't exist.") + + main(Result_path) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py new file mode 100644 index 0000000000..dd612dcef8 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_preprocess.py @@ -0,0 +1,79 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import sys +import os +import os.path +import numpy as np +import random +import torch +import cv2 +import glob + +infer_data = 'Set68' +infer_noiseL = 15 + +def normalize(data): + return data / 255. + + +def proprecess(data_path, ISource_bin, INoisy_bin): + + # load data info + print('Loading data info ...\n') + files = glob.glob(os.path.join(data_path, infer_data, '*.png')) + files.sort() + # process data + for i in range(len(files)): + # image + filename = os.path.basename(files[i]) + img = cv2.imread(files[i]) + img = normalize(np.float32(img[:, :, 0])) + + img_padded = np.full([481, 481], 0, dtype=np.float32) + width_offset = (481 - img.shape[1]) // 2 + height_offset = (481 - img.shape[0]) // 2 + img_padded[height_offset:height_offset + img.shape[0], width_offset:width_offset + img.shape[1]] = img + img = img_padded + + img = np.expand_dims(img, 0) + img = np.expand_dims(img, 1) + + ISource = torch.Tensor(img) + # noise + noise = torch.FloatTensor(ISource.size()).normal_(mean=0, std=infer_noiseL / 255.) + # noisy image + INoisy = ISource + noise + + # save ISource_bin + ISource = ISource.numpy() + print("ISource shape is", ISource.shape) + ISource.tofile(os.path.join(ISource_bin, filename.split('.')[0] + '.bin')) + + # save INoisy_bin + INoisy = INoisy.numpy() + print("INoisy shape is", INoisy.shape) + INoisy.tofile(os.path.join(INoisy_bin, filename.split('.')[0] + '.bin')) + +if __name__ == '__main__': + + data_path = sys.argv[1] + ISource_bin = sys.argv[2] + INoisy_bin = sys.argv[3] + if os.path.exists(ISource_bin) is False: + os.mkdir(ISource_bin) + if os.path.exists(INoisy_bin) is False: + os.mkdir(INoisy_bin) + + proprecess(data_path, ISource_bin, INoisy_bin) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py new file mode 100644 index 0000000000..8362e6eea7 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/DnCNN_pth2onnx.py @@ -0,0 +1,80 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import torch +import torch.onnx +import torch.nn as nn +import sys + +from collections import OrderedDict + +class DnCNN(nn.Module): + def __init__(self, channels, num_of_layers=17): + super(DnCNN, self).__init__() + kernel_size = 3 + padding = 1 + features = 64 + layers = [] + layers.append(nn.Conv2d(in_channels=channels, out_channels=features, \ + kernel_size=kernel_size, padding=padding, bias=False)) + layers.append(nn.ReLU(inplace=True)) + for _ in range(num_of_layers - 2): + layers.append(nn.Conv2d(in_channels=features, out_channels=features, \ + kernel_size=kernel_size, padding=padding, bias=False)) + layers.append(nn.BatchNorm2d(features)) + layers.append(nn.ReLU(inplace=True)) + layers.append(nn.Conv2d(in_channels=features, out_channels=channels, \ + kernel_size=kernel_size, padding=padding, bias=False)) + self.dncnn = nn.Sequential(*layers) + + def forward(self, x): + + out = self.dncnn(x) + return out + + +def proc_nodes_module(checkpoint): + + new_state_dict = OrderedDict() + for k, v in checkpoint.items(): + if(k[0:7] == "module."): + name = k[7:] + else: + name = k[0:] + new_state_dict[name]=v + return new_state_dict + + +def convert(pth_file, onnx_file): + + pretrained_net = torch.load(pth_file, map_location='cpu') + pretrained_net['state_dict'] = proc_nodes_module(pretrained_net) + + model = DnCNN(channels=1, num_of_layers=17) + model.load_state_dict(pretrained_net['state_dict']) + model.eval() + input_names = ["actual_input_1"] + dummy_input = torch.randn(1, 1, 481, 481) + #torch.onnx.export(model, dummy_input, onnx_file, input_names = input_names, opset_version=11, verbose=True) + dynamic_axes = {'actual_input_1': {0: '-1'}} + torch.onnx.export(model, dummy_input, onnx_file, dynamic_axes=dynamic_axes, \ + input_names=input_names, opset_version=11) + +if __name__ == "__main__": + + pth_file = sys.argv[1] + onnx_file = sys.argv[2] + + convert(pth_file, onnx_file) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md new file mode 100644 index 0000000000..65429d8a5c --- /dev/null +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md @@ -0,0 +1,350 @@ +# DnCNN Onnx模型端到端推理指导 +- [1 模型概述](#1-模型概述) + - [1.1 论文地址](#11-论文地址) + - [1.2 代码地址](#12-代码地址) +- [2 环境说明](#2-环境说明) + - [2.1 深度学习框架](#21-深度学习框架) + - [2.2 python第三方库](#22-python第三方库) +- [3 模型转换](#3-模型转换) + - [3.1 pth转onnx模型](#31-pth转onnx模型) + - [3.2 onnx转om模型](#32-onnx转om模型) +- [4 数据集预处理](#4-数据集预处理) + - [4.1 数据集获取](#41-数据集获取) + - [4.2 数据集预处理](#42-数据集预处理) + - [4.3 生成数据集信息文件](#43-生成数据集信息文件) +- [5 离线推理](#5-离线推理) + - [5.1 benchmark工具概述](#51-benchmark工具概述) + - [5.2 离线推理](#52-离线推理) +- [6 精度对比](#6-精度对比) + - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) + - [6.2 开源TopN精度](#62-开源TopN精度) + - [6.3 精度对比](#63-精度对比) +- [7 性能对比](#7-性能对比) + - [7.1 npu性能数据](#71-npu性能数据) + - [7.2 T4性能数据](#72-T4性能数据) + - [7.3 性能对比](#73-性能对比) + + + +## 1 模型概述 + +- **[论文地址](#11-论文地址)** + +- **[代码地址](#12-代码地址)** + +### 1.1 论文地址 +[DnCNN论文](https://ieeexplore.ieee.org/document/7839189) + +### 1.2 代码地址 + +brach:master + +commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b + +备注:commitid指的是值模型基于此版本代码做的推理 + +[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch) + +## 2 环境说明 + +- **[深度学习框架](#21-深度学习框架)** + +- **[python第三方库](#22-python第三方库)** + +### 2.1 深度学习框架 +``` +CANN 5.0.1 +torch==1.8.0 +torchvision==0.9.0 +onnx==1.9.0 +``` + +### 2.2 python第三方库 + +``` +numpy==1.20.2 +opencv-python==4.5.2.52 +scikit-image==0.16.2 +``` + +**说明:** +> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 +> +> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 + +## 3 模型转换 + +- **[pth转onnx模型](#31-pth转onnx模型)** + +- **[onnx转om模型](#32-onnx转om模型)** + +### 3.1 pth转onnx模型 + +1.DnCNN模型代码下载 +``` +git clone https://github.com/SaoYan/DnCNN-PyTorch +cd DnCNN-PyTorch +``` +2.获取源码pth权重文件 +wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth +文件的MD5sum值是: 5703a29b082cc03401fa9d9fee12cb71 + +3.获取NPU训练pth文件,将net.pth文件移动到DnCNN目录下 + +4.编写pth2onnx脚本DnCNN_pth2onnx.py + + **说明:** +>注意目前ATC支持的onnx算子版本为11 + +5.执行pth2onnx脚本,生成onnx模型文件 +``` +python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx +``` + + **模型转换要点:** +>此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 + +### 3.2 onnx转om模型 + +1.设置环境变量 +``` +source env.sh +``` +2.使用atc将onnx模型转换为om模型文件 +``` +atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 +``` + +## 4 数据集预处理 + +- **[数据集获取](#41-数据集获取)** + +- **[数据集预处理](#42-数据集预处理)** + +- **[生成数据集信息文件](#43-生成数据集信息文件)** + +### 4.1 推理数据集获取 +存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录 + +### 4.2 数据集预处理 +1.预处理脚本data_preprocess.py + +2.执行预处理脚本,生成数据集预处理后的bin文件 + +``` +python3.7 data_preprocess.py data ISource INoisy +``` +### 4.3 生成数据集信息文件 +1.生成数据集信息文件脚本get_info.py + +2.执行生成数据集信息脚本,生成数据集信息文件 +``` +python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481 +``` +第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 +## 5 离线推理 + +- **[benchmark工具概述](#51-benchmark工具概述)** + +- **[离线推理](#52-离线推理)** + +### 5.1 benchmark工具概述 + +benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程 +### 5.2 离线推理 +1.设置环境变量 +``` +source env.sh +``` +2.执行离线推理 +``` +./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true +``` +输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id),每个输入对应的输出对应一个_X.bin文件。 + +## 6 精度对比 + +- **[离线推理TopN精度](#61-离线推理TopN精度)** +- **[开源TopN精度](#62-开源TopN精度)** +- **[精度对比](#63-精度对比)** + +### 6.1 离线推理TopN精度统计 + +后处理统计TopN精度 + +调用postprocess.py脚本推理结果进行PSRN计算,结果会打印在屏幕上 +``` +python3.7 postprocess.py result/dumpOutput_device0/ +``` +第一个参数为benchmark输出目录 +查看输出结果: +``` +ISource/test064.bin PSNR 29.799832 +infering... +ISource/test065.bin PSNR 31.486418 +infering... +ISource/test066.bin PSNR 35.676752 +infering... +ISource/test067.bin PSNR 28.577475 +infering... +ISource/test068.bin PSNR 29.709767 + +PSNR on test data 31.526892 +``` +经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 + +### 6.2 开源PSNR精度 +``` +| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch | +|:-----------:|:-------:|:-------:|:---------------:|:---------------:| +| 15 | 31.73 | 31.61 | 31.71 | 31.60 | +| 25 | 29.23 | 29.16 | 29.21 | 29.15 | +| 50 | 26.23 | 26.23 | 26.22 | 26.20 | +``` +### 6.3 精度对比 +将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 + **精度调试:** + +>没有遇到精度不达标的问题,故不需要进行精度调试 + +## 7 性能对比 + +- **[npu性能数据](#71-npu性能数据)** +- **[T4性能数据](#72-T4性能数据)** +- **[性能对比](#73-性能对比)** + +### 7.1 npu性能数据 +benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 +1.benchmark工具在整个数据集上推理获得性能数据 +batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: + +``` +[e2e] throughputRate: 15.0465, latency: 4519.32 +[data read] throughputRate: 966.417, moduleLatency: 1.03475 +[preprocess] throughputRate: 525.539, moduleLatency: 1.90281 +[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903 +[post] throughputRate: 22.615, moduleLatency: 44.2185 +``` +Interface throughputRate: 23.7919,23.7919x4=95.176既是batch1 310单卡吞吐率 + +batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: +``` +[e2e] throughputRate: 15.3818, latency: 4420.81 +[data read] throughputRate: 1484.65, moduleLatency: 0.673559 +[preprocess] throughputRate: 316.273, moduleLatency: 3.16182 +[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179 +[post] throughputRate: 1.56798, moduleLatency: 637.764 +``` +Interface throughputRate: 22.2853,22.2853x4=89.1412既是batch16 310单卡吞吐率 + +batch4性能: +``` +[e2e] throughputRate: 15.5641, latency: 4369.02 +[data read] throughputRate: 1898.17, moduleLatency: 0.526824 +[preprocess] throughputRate: 523.883, moduleLatency: 1.90882 +[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192 +[post] throughputRate: 5.50981, moduleLatency: 181.495 +``` +batch4 310单卡吞吐率 23.9045x4=95.618 + +batch8性能: +``` +[e2e] throughputRate: 15.5035, latency: 4386.1 +[data read] throughputRate: 1863.93, moduleLatency: 0.5365 +[preprocess] throughputRate: 461.471, moduleLatency: 2.16699 +[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831 +[post] throughputRate: 2.74035, moduleLatency: 364.917 +``` +batch8 310单卡吞吐率 22.2652x4=89.0608 + +batch32性能: +``` +[e2e] throughputRate: 12.4075, latency: 5480.54 +[data read] throughputRate: 1770.65, moduleLatency: 0.564765 +[preprocess] throughputRate: 242.944, moduleLatency: 4.11618 +[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386 +[post] throughputRate: 0.68503, moduleLatency: 1459.79 +``` +batch32 310单卡吞吐率 13.2648x4=53.0592 + +### 7.2 T4性能数据 +在装有T4卡的服务器上测试gpu性能,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 +batch1性能: +``` +trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:1x1x484x481 --threads +``` +gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch +``` +[06/05/2021-06:28:42] [I] GPU Compute +[06/05/2021-06:28:42] [I] min: 12.5439 ms +[06/05/2021-06:28:42] [I] max: 19.0195 ms +[06/05/2021-06:28:42] [I] mean: 13.1826 ms +[06/05/2021-06:28:42] [I] median: 12.9761 ms +[06/05/2021-06:28:42] [I] percentile: 17.7111 ms at 99% +[06/05/2021-06:28:42] [I] total compute time: 3.01882 s +``` +batch1 t4单卡吞吐率:1000x1/(13.1826/1)=75.858fps + +batch16性能: +``` +trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:16x1x484x481 --threads +``` +``` +[06/05/2021-06:31:53] [I] GPU Compute +[06/05/2021-06:31:53] [I] min: 198.604 ms +[06/05/2021-06:31:53] [I] max: 218.884 ms +[06/05/2021-06:31:53] [I] mean: 201.968 ms +[06/05/2021-06:31:53] [I] median: 200.267 ms +[06/05/2021-06:31:53] [I] percentile: 218.884 ms at 99% +[06/05/2021-06:31:53] [I] total compute time: 3.23149 s +``` +batch16 t4单卡吞吐率:1000x1/(201.968/16)=79.220fps + +batch4性能 +``` +[06/05/2021-13:48:52] [I] GPU Compute +[06/05/2021-13:48:52] [I] min: 48.9983 ms +[06/05/2021-13:48:52] [I] max: 67.3423 ms +[06/05/2021-13:48:52] [I] mean: 50.6542 ms +[06/05/2021-13:48:52] [I] median: 50.0736 ms +[06/05/2021-13:48:52] [I] percentile: 67.3423 ms at 99% +[06/05/2021-13:48:52] [I] total compute time: 3.08991 s +``` +batch4 t4单卡吞吐率:1000x1/(50.6542/4)=78.957fps + +batch8性能: +``` +[06/05/2021-13:50:31] [I] GPU Compute +[06/05/2021-13:50:31] [I] min: 101.378 ms +[06/05/2021-13:50:31] [I] max: 128.73 ms +[06/05/2021-13:50:31] [I] mean: 104.424 ms +[06/05/2021-13:50:31] [I] median: 102.267 ms +[06/05/2021-13:50:31] [I] percentile: 128.73 ms at 99% +[06/05/2021-13:50:31] [I] total compute time: 3.13273 s +``` +batch8 t4单卡吞吐率:1000x1/(104.424/8)=76.610fps + +batch32性能: +trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:32x1x484x481 --threads +``` +[06/05/2021-13:57:44] [I] GPU Compute +[06/05/2021-13:57:44] [I] min: 399.587 ms +[06/05/2021-13:57:44] [I] max: 426.525 ms +[06/05/2021-13:57:44] [I] mean: 409.475 ms +[06/05/2021-13:57:44] [I] median: 407.555 ms +[06/05/2021-13:57:44] [I] percentile: 426.525 ms at 99% +[06/05/2021-13:57:44] [I] total compute time: 4.09475 s +``` +batch32 t4单卡吞吐率:1000x1/(409.475/32)=78.149fps + + + +### 7.3 性能对比 +batch1:23.7919x4 > 1000x1/(13.1826/1) +batch16:22.2853x4 > 1000x1/(201.968/16) +310单个device的吞吐率乘4即单卡吞吐率,所得数据中单batch优于T4,多batch略高于T4 +对于batch1与batch16,310性能均高于T4性能1.2倍,但是batch32 310全量数据集上推理性能低于T4性能,所以该模型放在Reaserch/cv/classification目录下。 +**性能优化:** + +>单batch性能优于T4,多batch的性能略高于T4,无需优化。 +>batch32 310全量数据集上推理性能低于T4性能,但是batch32纯推理性能94.3228fps,高于T4性能。 \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py new file mode 100644 index 0000000000..def864bec0 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/get_info.py @@ -0,0 +1,60 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import sys +import cv2 +from glob import glob + + +def get_bin_info(file_path, info_name, width, height): + bin_images = glob(os.path.join(file_path, '*.bin')) + with open(info_name, 'w') as file: + for index, img in enumerate(bin_images): + content = ' '.join([str(index), img, width, height]) + file.write(content) + file.write('\n') + + +def get_jpg_info(file_path, info_name): + extensions = ['jpg', 'jpeg', 'JPG', 'JPEG'] + image_names = [] + for extension in extensions: + image_names.append(glob(os.path.join(file_path, '*.' + extension))) + with open(info_name, 'w') as file: + for image_name in image_names: + if len(image_name) == 0: + continue + else: + for index, img in enumerate(image_name): + img_cv = cv2.imread(img) + shape = img_cv.shape + width, height = shape[1], shape[0] + content = ' '.join([str(index), img, str(width), str(height)]) + file.write(content) + file.write('\n') + + +if __name__ == '__main__': + file_type = sys.argv[1] + file_path = sys.argv[2] + info_name = sys.argv[3] + if file_type == 'bin': + width = sys.argv[4] + height = sys.argv[5] + assert len(sys.argv) == 6, 'The number of input parameters must be equal to 5' + get_bin_info(file_path, info_name, width, height) + elif file_type == 'jpg': + assert len(sys.argv) == 4, 'The number of input parameters must be equal to 3' + get_jpg_info(file_path, info_name) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt new file mode 100644 index 0000000000..965bdf8bdd --- /dev/null +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/requirements.txt @@ -0,0 +1,6 @@ +torch==1.8.0 +torchvision==0.9.0 +onnx==1.9.0 +numpy==1.20.2 +opencv-python==4.5.2.52 +scikit-image==0.16.2 \ No newline at end of file -- Gitee From 75dae9654fff70b3209e7e31f314b9f53c4502c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:40:36 +0000 Subject: [PATCH 03/10] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20AC?= =?UTF-8?q?L=5FPyTorch/contrib/cv/image=5Fprocess/DnCNN/DnCNN=5F710/READEM?= =?UTF-8?q?E.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../image_process/DnCNN/DnCNN_710/READEME.md | 350 ------------------ 1 file changed, 350 deletions(-) delete mode 100644 ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md deleted file mode 100644 index 65429d8a5c..0000000000 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/DnCNN_710/READEME.md +++ /dev/null @@ -1,350 +0,0 @@ -# DnCNN Onnx模型端到端推理指导 -- [1 模型概述](#1-模型概述) - - [1.1 论文地址](#11-论文地址) - - [1.2 代码地址](#12-代码地址) -- [2 环境说明](#2-环境说明) - - [2.1 深度学习框架](#21-深度学习框架) - - [2.2 python第三方库](#22-python第三方库) -- [3 模型转换](#3-模型转换) - - [3.1 pth转onnx模型](#31-pth转onnx模型) - - [3.2 onnx转om模型](#32-onnx转om模型) -- [4 数据集预处理](#4-数据集预处理) - - [4.1 数据集获取](#41-数据集获取) - - [4.2 数据集预处理](#42-数据集预处理) - - [4.3 生成数据集信息文件](#43-生成数据集信息文件) -- [5 离线推理](#5-离线推理) - - [5.1 benchmark工具概述](#51-benchmark工具概述) - - [5.2 离线推理](#52-离线推理) -- [6 精度对比](#6-精度对比) - - [6.1 离线推理TopN精度统计](#61-离线推理TopN精度统计) - - [6.2 开源TopN精度](#62-开源TopN精度) - - [6.3 精度对比](#63-精度对比) -- [7 性能对比](#7-性能对比) - - [7.1 npu性能数据](#71-npu性能数据) - - [7.2 T4性能数据](#72-T4性能数据) - - [7.3 性能对比](#73-性能对比) - - - -## 1 模型概述 - -- **[论文地址](#11-论文地址)** - -- **[代码地址](#12-代码地址)** - -### 1.1 论文地址 -[DnCNN论文](https://ieeexplore.ieee.org/document/7839189) - -### 1.2 代码地址 - -brach:master - -commit_id: 6b0804951484eadb7f1ea24e8e5c9ede9bea485b - -备注:commitid指的是值模型基于此版本代码做的推理 - -[DnCNN代码](https://github.com/SaoYan/DnCNN-PyTorch) - -## 2 环境说明 - -- **[深度学习框架](#21-深度学习框架)** - -- **[python第三方库](#22-python第三方库)** - -### 2.1 深度学习框架 -``` -CANN 5.0.1 -torch==1.8.0 -torchvision==0.9.0 -onnx==1.9.0 -``` - -### 2.2 python第三方库 - -``` -numpy==1.20.2 -opencv-python==4.5.2.52 -scikit-image==0.16.2 -``` - -**说明:** -> X86架构:pytorch,torchvision和onnx可以通过官方下载whl包安装,其它可以通过pip3.7 install 包名 安装 -> -> Arm架构:pytorch,torchvision和onnx可以通过源码编译安装,其它可以通过pip3.7 install 包名 安装 - -## 3 模型转换 - -- **[pth转onnx模型](#31-pth转onnx模型)** - -- **[onnx转om模型](#32-onnx转om模型)** - -### 3.1 pth转onnx模型 - -1.DnCNN模型代码下载 -``` -git clone https://github.com/SaoYan/DnCNN-PyTorch -cd DnCNN-PyTorch -``` -2.获取源码pth权重文件 -wget https://ascend-model-file.obs.cn-north-4.myhuaweicloud.com/%E4%BA%A4%E4%BB%98%E4%BB%B6/cv/image_classification/DnCnn/net.pth -文件的MD5sum值是: 5703a29b082cc03401fa9d9fee12cb71 - -3.获取NPU训练pth文件,将net.pth文件移动到DnCNN目录下 - -4.编写pth2onnx脚本DnCNN_pth2onnx.py - - **说明:** ->注意目前ATC支持的onnx算子版本为11 - -5.执行pth2onnx脚本,生成onnx模型文件 -``` -python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx -``` - - **模型转换要点:** ->此模型转换为onnx不需要修改开源代码仓代码,故不需要特殊说明 - -### 3.2 onnx转om模型 - -1.设置环境变量 -``` -source env.sh -``` -2.使用atc将onnx模型转换为om模型文件 -``` -atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 -``` - -## 4 数据集预处理 - -- **[数据集获取](#41-数据集获取)** - -- **[数据集预处理](#42-数据集预处理)** - -- **[生成数据集信息文件](#43-生成数据集信息文件)** - -### 4.1 推理数据集获取 -存放路径为 https://github.com/SaoYan/DnCNN-PyTorch 的data目录 - -### 4.2 数据集预处理 -1.预处理脚本data_preprocess.py - -2.执行预处理脚本,生成数据集预处理后的bin文件 - -``` -python3.7 data_preprocess.py data ISource INoisy -``` -### 4.3 生成数据集信息文件 -1.生成数据集信息文件脚本get_info.py - -2.执行生成数据集信息脚本,生成数据集信息文件 -``` -python3.7 get_info.py bin INoisy DnCNN_bin.info 481 481 -``` -第一个参数为模型输入的类型,第二个参数为生成的bin文件路径,第三个为输出的info文件,后面为宽高信息 -## 5 离线推理 - -- **[benchmark工具概述](#51-benchmark工具概述)** - -- **[离线推理](#52-离线推理)** - -### 5.1 benchmark工具概述 - -benchmark工具为华为自研的模型推理工具,支持多种模型的离线推理,能够迅速统计出模型在Ascend310上的性能,支持真实数据和纯推理两种模式,配合后处理脚本,可以实现诸多模型的端到端过程 -### 5.2 离线推理 -1.设置环境变量 -``` -source env.sh -``` -2.执行离线推理 -``` -./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true -``` -输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id),每个输入对应的输出对应一个_X.bin文件。 - -## 6 精度对比 - -- **[离线推理TopN精度](#61-离线推理TopN精度)** -- **[开源TopN精度](#62-开源TopN精度)** -- **[精度对比](#63-精度对比)** - -### 6.1 离线推理TopN精度统计 - -后处理统计TopN精度 - -调用postprocess.py脚本推理结果进行PSRN计算,结果会打印在屏幕上 -``` -python3.7 postprocess.py result/dumpOutput_device0/ -``` -第一个参数为benchmark输出目录 -查看输出结果: -``` -ISource/test064.bin PSNR 29.799832 -infering... -ISource/test065.bin PSNR 31.486418 -infering... -ISource/test066.bin PSNR 35.676752 -infering... -ISource/test067.bin PSNR 28.577475 -infering... -ISource/test068.bin PSNR 29.709767 - -PSNR on test data 31.526892 -``` -经过对bs1与bs16的om测试,本模型batch1的精度与batch16的精度没有差别,精度数据均如上 - -### 6.2 开源PSNR精度 -``` -| Noise Level | DnCNN-S | DnCNN-B | DnCNN-S-PyTorch | DnCNN-B-PyTorch | -|:-----------:|:-------:|:-------:|:---------------:|:---------------:| -| 15 | 31.73 | 31.61 | 31.71 | 31.60 | -| 25 | 29.23 | 29.16 | 29.21 | 29.15 | -| 50 | 26.23 | 26.23 | 26.22 | 26.20 | -``` -### 6.3 精度对比 -将得到的om离线模型推理PSNR值与该模型github代码仓上公布的精度对比,精度下降在1%范围之内,故精度达标。 - **精度调试:** - ->没有遇到精度不达标的问题,故不需要进行精度调试 - -## 7 性能对比 - -- **[npu性能数据](#71-npu性能数据)** -- **[T4性能数据](#72-T4性能数据)** -- **[性能对比](#73-性能对比)** - -### 7.1 npu性能数据 -benchmark工具在整个数据集上推理时也会统计性能数据,但是推理整个数据集较慢,如果这么测性能那么整个推理期间需要确保独占device。为快速获取性能数据,也可以使用benchmark纯推理功能测得性能数据,但是由于随机数不能模拟数据分布,纯推理功能测的有些模型性能数据可能不太准。这里给出两种方式,benchmark纯推理功能测性能仅为快速获取大概的性能数据以便调试优化使用,模型的性能以使用benchmark工具在整个数据集上推理得到bs1与bs16的性能数据为准,对于使用benchmark工具测试的batch4,8,32的性能数据在README.md中如下作记录即可。 -1.benchmark工具在整个数据集上推理获得性能数据 -batch1的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_1_device_0.txt: - -``` -[e2e] throughputRate: 15.0465, latency: 4519.32 -[data read] throughputRate: 966.417, moduleLatency: 1.03475 -[preprocess] throughputRate: 525.539, moduleLatency: 1.90281 -[infer] throughputRate: 22.6328, Interface throughputRate: 23.7919, moduleLatency: 43.8903 -[post] throughputRate: 22.615, moduleLatency: 44.2185 -``` -Interface throughputRate: 23.7919,23.7919x4=95.176既是batch1 310单卡吞吐率 - -batch16的性能,benchmark工具在整个数据集上推理后生成result/perf_vision_batchsize_16_device_1.txt: -``` -[e2e] throughputRate: 15.3818, latency: 4420.81 -[data read] throughputRate: 1484.65, moduleLatency: 0.673559 -[preprocess] throughputRate: 316.273, moduleLatency: 3.16182 -[infer] throughputRate: 21.4529, Interface throughputRate: 22.2853, moduleLatency: 45.6179 -[post] throughputRate: 1.56798, moduleLatency: 637.764 -``` -Interface throughputRate: 22.2853,22.2853x4=89.1412既是batch16 310单卡吞吐率 - -batch4性能: -``` -[e2e] throughputRate: 15.5641, latency: 4369.02 -[data read] throughputRate: 1898.17, moduleLatency: 0.526824 -[preprocess] throughputRate: 523.883, moduleLatency: 1.90882 -[infer] throughputRate: 22.091, Interface throughputRate: 23.9045, moduleLatency: 44.5192 -[post] throughputRate: 5.50981, moduleLatency: 181.495 -``` -batch4 310单卡吞吐率 23.9045x4=95.618 - -batch8性能: -``` -[e2e] throughputRate: 15.5035, latency: 4386.1 -[data read] throughputRate: 1863.93, moduleLatency: 0.5365 -[preprocess] throughputRate: 461.471, moduleLatency: 2.16699 -[infer] throughputRate: 20.7804, Interface throughputRate: 22.2652, moduleLatency: 47.2831 -[post] throughputRate: 2.74035, moduleLatency: 364.917 -``` -batch8 310单卡吞吐率 22.2652x4=89.0608 - -batch32性能: -``` -[e2e] throughputRate: 12.4075, latency: 5480.54 -[data read] throughputRate: 1770.65, moduleLatency: 0.564765 -[preprocess] throughputRate: 242.944, moduleLatency: 4.11618 -[infer] throughputRate: 15.641, Interface throughputRate: 13.2648, moduleLatency: 62.7386 -[post] throughputRate: 0.68503, moduleLatency: 1459.79 -``` -batch32 310单卡吞吐率 13.2648x4=53.0592 - -### 7.2 T4性能数据 -在装有T4卡的服务器上测试gpu性能,TensorRT版本:7.2.3.4,cuda版本:11.0,cudnn版本:8.2 -batch1性能: -``` -trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:1x1x484x481 --threads -``` -gpu T4是4个device并行执行的结果,mean是时延(tensorrt的时延是batch个数据的推理时间),即吞吐率的倒数乘以batch -``` -[06/05/2021-06:28:42] [I] GPU Compute -[06/05/2021-06:28:42] [I] min: 12.5439 ms -[06/05/2021-06:28:42] [I] max: 19.0195 ms -[06/05/2021-06:28:42] [I] mean: 13.1826 ms -[06/05/2021-06:28:42] [I] median: 12.9761 ms -[06/05/2021-06:28:42] [I] percentile: 17.7111 ms at 99% -[06/05/2021-06:28:42] [I] total compute time: 3.01882 s -``` -batch1 t4单卡吞吐率:1000x1/(13.1826/1)=75.858fps - -batch16性能: -``` -trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:16x1x484x481 --threads -``` -``` -[06/05/2021-06:31:53] [I] GPU Compute -[06/05/2021-06:31:53] [I] min: 198.604 ms -[06/05/2021-06:31:53] [I] max: 218.884 ms -[06/05/2021-06:31:53] [I] mean: 201.968 ms -[06/05/2021-06:31:53] [I] median: 200.267 ms -[06/05/2021-06:31:53] [I] percentile: 218.884 ms at 99% -[06/05/2021-06:31:53] [I] total compute time: 3.23149 s -``` -batch16 t4单卡吞吐率:1000x1/(201.968/16)=79.220fps - -batch4性能 -``` -[06/05/2021-13:48:52] [I] GPU Compute -[06/05/2021-13:48:52] [I] min: 48.9983 ms -[06/05/2021-13:48:52] [I] max: 67.3423 ms -[06/05/2021-13:48:52] [I] mean: 50.6542 ms -[06/05/2021-13:48:52] [I] median: 50.0736 ms -[06/05/2021-13:48:52] [I] percentile: 67.3423 ms at 99% -[06/05/2021-13:48:52] [I] total compute time: 3.08991 s -``` -batch4 t4单卡吞吐率:1000x1/(50.6542/4)=78.957fps - -batch8性能: -``` -[06/05/2021-13:50:31] [I] GPU Compute -[06/05/2021-13:50:31] [I] min: 101.378 ms -[06/05/2021-13:50:31] [I] max: 128.73 ms -[06/05/2021-13:50:31] [I] mean: 104.424 ms -[06/05/2021-13:50:31] [I] median: 102.267 ms -[06/05/2021-13:50:31] [I] percentile: 128.73 ms at 99% -[06/05/2021-13:50:31] [I] total compute time: 3.13273 s -``` -batch8 t4单卡吞吐率:1000x1/(104.424/8)=76.610fps - -batch32性能: -trtexec --onnx=DnCNN-S-15.onnx --fp16 --shapes=actual_input_1:32x1x484x481 --threads -``` -[06/05/2021-13:57:44] [I] GPU Compute -[06/05/2021-13:57:44] [I] min: 399.587 ms -[06/05/2021-13:57:44] [I] max: 426.525 ms -[06/05/2021-13:57:44] [I] mean: 409.475 ms -[06/05/2021-13:57:44] [I] median: 407.555 ms -[06/05/2021-13:57:44] [I] percentile: 426.525 ms at 99% -[06/05/2021-13:57:44] [I] total compute time: 4.09475 s -``` -batch32 t4单卡吞吐率:1000x1/(409.475/32)=78.149fps - - - -### 7.3 性能对比 -batch1:23.7919x4 > 1000x1/(13.1826/1) -batch16:22.2853x4 > 1000x1/(201.968/16) -310单个device的吞吐率乘4即单卡吞吐率,所得数据中单batch优于T4,多batch略高于T4 -对于batch1与batch16,310性能均高于T4性能1.2倍,但是batch32 310全量数据集上推理性能低于T4性能,所以该模型放在Reaserch/cv/classification目录下。 -**性能优化:** - ->单batch性能优于T4,多batch的性能略高于T4,无需优化。 ->batch32 310全量数据集上推理性能低于T4性能,但是batch32纯推理性能94.3228fps,高于T4性能。 \ No newline at end of file -- Gitee From bb237493054e10d9069f10de8b4177099d6795e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:43:17 +0000 Subject: [PATCH 04/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index 592f7f9936..fa5c266340 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -111,6 +111,12 @@ source env.sh 2.使用atc将onnx模型转换为om模型文件 ``` atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 + +(710_bs1) +atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710 + +(710_bs16) +atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710 ``` ## 4 数据集预处理 -- Gitee From 2ce6c95381db46b715b8e15898fad599890f4f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:46:26 +0000 Subject: [PATCH 05/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index fa5c266340..7decb98bdd 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -111,10 +111,10 @@ source env.sh 2.使用atc将onnx模型转换为om模型文件 ``` atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 - +``` (710_bs1) atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710 - +``` (710_bs16) atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710 ``` @@ -162,8 +162,13 @@ source env.sh ``` 2.执行离线推理 ``` -./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true +(bs1) +./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true +``` +(bs16) +./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs16.om -device_id=0 -batch_size=16 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true ``` + 输出结果默认保存在当前目录result/dumpOutput_deviceX(X为对应的device_id),每个输入对应的输出对应一个_X.bin文件。 ## 6 精度对比 -- Gitee From 25522ba68151b7d31c4eca122651339370a0f287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:48:03 +0000 Subject: [PATCH 06/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index 7decb98bdd..87b84461e5 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -113,9 +113,11 @@ source env.sh atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 ``` (710_bs1) +``` atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend710 ``` (710_bs16) +``` atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:16,1,481,481" --output=DnCNN-S-15_bs16 --log=debug --soc_version=Ascend710 ``` -- Gitee From c0fb82aedb4a38ae5d014561c60d51d532543593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:48:59 +0000 Subject: [PATCH 07/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index 87b84461e5..46119f3b3c 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -165,9 +165,11 @@ source env.sh 2.执行离线推理 ``` (bs1) +``` ./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true ``` (bs16) +``` ./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs16.om -device_id=0 -batch_size=16 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true ``` -- Gitee From 85ce66b63ec6e2b112140c149f560940304fb5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:49:45 +0000 Subject: [PATCH 08/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 1 - 1 file changed, 1 deletion(-) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index 46119f3b3c..dcc4240037 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -163,7 +163,6 @@ benchmark工具为华为自研的模型推理工具,支持多种模型的离 source env.sh ``` 2.执行离线推理 -``` (bs1) ``` ./benchmark.x86_64 -model_type=vision -om_path=DnCNN-S-15_bs1.om -device_id=0 -batch_size=1 -input_text_path=DnCNN_bin.info -input_width=481 -input_height=481 -useDvpp=false -output_binary=true -- Gitee From 348719952c6d7357f53097c404213ebb542f09f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:54:33 +0000 Subject: [PATCH 09/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md index dcc4240037..35ee9aa42b 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/READEME.md @@ -108,7 +108,11 @@ python3.7 DnCNN_pth2onnx.py net.pth DnCNN-S-15.onnx ``` source env.sh ``` -2.使用atc将onnx模型转换为om模型文件 +2.增加benchmark.{arch}可执行权限。 +``` +chmod u+x benchmark.x86_64 +``` +3.使用atc将onnx模型转换为om模型文件 ``` atc --framework=5 --model=./DnCNN-S-15.onnx --input_format=NCHW --input_shape="actual_input_1:1,1,481,481" --output=DnCNN-S-15_bs1 --log=debug --soc_version=Ascend310 ``` -- Gitee From 514c8a66b3072f046de7f8d85f813cad1ce70a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=98=95=E9=AA=85?= <10760252+mxhua@user.noreply.gitee.com> Date: Tue, 10 May 2022 06:55:57 +0000 Subject: [PATCH 10/10] update ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py. --- ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py b/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py index 0333302e81..7945fd671a 100644 --- a/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py +++ b/ACL_PyTorch/contrib/cv/image_process/DnCNN/postprocess.py @@ -20,7 +20,7 @@ import cv2 import torch import torch.nn as nn import struct -from skimage.measure.simple_metrics import compare_psnr +from skimage.metrics import peak_signal_noise_ratio as compare_psnr def batch_PSNR(img, imclean, data_range): -- Gitee