diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/.keep b/ACL_PyTorch/contrib/cv/classfication/SVTR/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_postprocess.py b/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..d3c5589e6c2536963f8a2ea6c92be6244cb62c48 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_postprocess.py @@ -0,0 +1,68 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, 'PaddleOCR'))) + +import paddle +import numpy as np +import tools.program as program + +from tqdm import tqdm +from ppocr.data import build_dataloader +from ppocr.metrics import build_metric +from ppocr.postprocess import build_post_process + + +def main(config, device, logger, vdl_writer): + valid_dataloader = build_dataloader(config, 'Eval', device, logger) + + eval_class = build_metric(config['Metric']) + + global_config = config['Global'] + post_process_class = build_post_process(config['PostProcess'], global_config) + + pbar = tqdm( + total=len(valid_dataloader), + desc='Postprocessing', + position=0, + leave=True) + + for idx, batch in enumerate(valid_dataloader): + # print(batch) + result_name = 'img_{}_0.bin'.format(idx) + result = os.path.join(config['results'], result_name) + #print(result) + preds = paddle.to_tensor(np.fromfile(result, dtype=np.float32).reshape(1, 25, 37)) + + batch = [item.numpy() for item in batch] + + post_result = post_process_class(preds, batch[1]) + + eval_class(post_result, batch) + + pbar.update(1) + + pbar.close() + metric = eval_class.get_metric() + print(metric) + + +if __name__ == "__main__": + config, device, logger, vdl_writer = program.preprocess() + main(config, device, logger, vdl_writer) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_preprocess.py b/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..a20f735c45c4e82325441b3706de467ac6c41eb6 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_preprocess.py @@ -0,0 +1,42 @@ +import os +import sys +import numpy as np + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, 'PaddleOCR'))) + +from tqdm import tqdm +import tools.program as program +from ppocr.data import build_dataloader + + +def main(config, device, logger, vdl_writer, data_path): + valid_dataloader = build_dataloader(config, 'Eval', device, logger) + + pbar = tqdm( + total=len(valid_dataloader), + desc='Preprocessing', + position=0, + leave=True) + + for idx, batch in enumerate(valid_dataloader): + img_name = 'img_{}.bin'.format(idx) + + batch[0].numpy().tofile(os.path.join(data_path, img_name)) + + pbar.update(1) + + pbar.close() + + +if __name__ == "__main__": + config, device, logger, vdl_writer = program.preprocess() + + + data_path = os.path.join(config['bin_data']) + + if not os.path.exists(data_path): + os.makedirs(data_path) + + main(config, device, logger, vdl_writer, data_path)SVTR_preprocess.py \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_vd.patch b/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_vd.patch new file mode 100644 index 0000000000000000000000000000000000000000..f08986b5f7f24c27c0aa57126be927394b15e9b2 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/classfication/SVTR/SVTR_vd.patch @@ -0,0 +1,99 @@ +diff --git a/configs/rec/rec_r34_vd_tps_bilstm_att.yml b/configs/rec/rec_r34_vd_tps_bilstm_att.yml +index 8919aae7..63e6e744 100644 +--- a/configs/rec/rec_r34_vd_tps_bilstm_att.yml ++++ b/configs/rec/rec_r34_vd_tps_bilstm_att.yml +@@ -1,5 +1,5 @@ + Global: +- use_gpu: True ++ use_gpu: False + epoch_num: 400 + log_smooth_window: 20 + print_batch_step: 10 +@@ -89,12 +89,12 @@ Eval: + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label +- - RecResizeImg: ++ - SVTRRecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False +- batch_size_per_card: 256 +- num_workers: 8 ++ batch_size_per_card: 1 ++ num_workers: 1 +diff --git a/ppocr/modeling/transforms/tps.py b/ppocr/modeling/transforms/tps.py +index 9bdab0f8..ba0551d6 100644 +--- a/ppocr/modeling/transforms/tps.py ++++ b/ppocr/modeling/transforms/tps.py +@@ -19,13 +19,13 @@ https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/modules/t + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function +- ++import os + import math + import paddle + from paddle import nn, ParamAttr + from paddle.nn import functional as F + import numpy as np +- ++import paddle.nn as nn + + class ConvBNLayer(nn.Layer): + def __init__(self, +@@ -233,6 +233,7 @@ class GridGenerator(nn.Layer): + + def build_inv_delta_C_paddle(self, C): + """ Return inv_delta_C which is needed to calculate T """ ++ ''' + F = self.F + hat_eye = paddle.eye(F, dtype='float64') # F x F + hat_C = paddle.norm( +@@ -259,7 +260,8 @@ class GridGenerator(nn.Layer): + axis=1) # 1 x F+3 + ], + axis=0) +- inv_delta_C = paddle.inverse(delta_C) ++ ''' ++ inv_delta_C = paddle.to_tensor(np.load(os.path.join(os.getcwd(), 'inv_delta_C.npy'))) + return inv_delta_C # F+3 x F+3 + + def build_P_hat_paddle(self, C, P): +diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py +index cb1cb10a..d7d7b315 100644 +--- a/ppocr/modeling/transforms/tps_spatial_transformer.py ++++ b/ppocr/modeling/transforms/tps_spatial_transformer.py +@@ -29,9 +29,27 @@ import itertools + + def grid_sample(input, grid, canvas=None): + input.stop_gradient = False +- output = F.grid_sample(input, grid) ++ input = paddle.transpose(input, perm=[0,1,3,2]) ++ grid = paddle.transpose(grid, perm=[0,2,1,3]) ++ ++ input = paddle.add(input, paddle.zeros((1,3,256,64))) ++ grid = paddle.add(grid, paddle.zeros((1,100,32,2))) ++ ++ input = paddle.transpose(input, perm=[0,1,3,2]) ++ grid = paddle.transpose(grid, perm=[0,2,1,3]) ++ ++ my_pad = nn.Pad2D(padding=[1,0,0,0]) ++ grid = my_pad(grid) ++ grid = paddle.transpose(grid, perm=[0,3,1,2]) ++ grid = grid + input[:,:,:32,:100] ++ ++ grid = paddle.transpose(grid, perm=[0,1,3,2]) ++ grid = paddle.add(grid, paddle.zeros((1,3,100,32))) ++ grid = paddle.transpose(grid, perm=[0,1,3,2]) ++ ++ + if canvas is None: +- return output ++ return grid + else: + input_mask = paddle.ones(shape=input.shape) + output_mask = F.grid_sample(input_mask, grid) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/onnx_fix.py b/ACL_PyTorch/contrib/cv/classfication/SVTR/onnx_fix.py new file mode 100644 index 0000000000000000000000000000000000000000..24073479a33fabd3ed56af98354525093095ff7e --- /dev/null +++ b/ACL_PyTorch/contrib/cv/classfication/SVTR/onnx_fix.py @@ -0,0 +1,25 @@ +import argparse +from auto_optimizer import OnnxGraph +def create_grid_sample(onnx_in, onnx_out): + g = OnnxGraph.parse(onnx_in) + g.remove('p2o.Slice.3') + g.remove('p2o.Unsqueeze.0') + g.remove('p2o.Pad.0') + g.remove('p2o.Squeeze.0') + g.remove('p2o.Transpose.4') + g.remove('p2o.Add.22') + Grid_1 = g.add_node('Grid_1', 'GridSample', inputs=['transpose_2.tmp_0','transpose_3.tmp_0'], outputs=['p2o.Add.23'], attrs={'padding_mode':b'zeros', 'mode':b'bilinear', 'align_corners':1}) + g['p2o.Transpose.5'].inputs = ['p2o.Add.23'] + g.update_map() + g.save(onnx_out) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="opt onnx") # task process paramater + parser.add_argument('--in_onnx', type=str) + parser.add_argument('--out_onnx', type=str) + args = parser.parse_args() + + create_grid_sample(args.in_onnx, args.out_onnx) + + print("[info] Optimize onnx success. result onnx is: {}".format(args.out_onnx)) \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/readme.md b/ACL_PyTorch/contrib/cv/classfication/SVTR/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..c2bd86e53f2b9116a917e7d8e739d2d57d3f0d17 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/classfication/SVTR/readme.md @@ -0,0 +1,236 @@ +# SVTR Onnx模型端到端推理指导 + +- [概述](#概述) + - [输入输出数据](#输入输出数据) +- [推理环境](#推理环境) +- [快速上手](#快速上手) + - [获取源码](#获取源码) + - [准备数据集](#准备数据集) + - [模型转换](#模型转换) + - [推理验证](#推理验证) +- [性能&精度](#性能精度) + +---- +# 概述 + +文本识别模型通常包含两个构建块,一个用于特征提取的视觉模型和一个用于文本转录的序列模型。这种混合架构虽然准确,但复杂且效率较低。在本研究中,我们在逐块图像标记化框架内提出了一种用于场景文本识别的单视觉模型,该模型完全省去了顺序建模。该方法称为SVTR,首先将图像文本分解为名为字符成分的小块。随后,通过组件级混合、合并和/或组合来重复执行分级阶段。全局和局部混合块被设计为感知字符间和字符内模式,从而产生多粒度的字符组件感知。因此,通过简单的线性预测来识别字符。在英语和汉语场景文本识别任务上的实验结果证明了SVTR的有效性。SVTR-L(大型)在英语中实现了极具竞争力的准确性,在中文中大大优于现有方法,同时运行速度更快。此外,SVTR-T(Tiny)是一个有效且小得多的模型,其推理速度很快。 + ++ 论文 + [SVTR: Scene Text Recognition with a Single Visual Model](https://arxiv.org/abs/2205.00159) + Yongkun Du, Zhineng Chen, Caiyan Jia, Xiaoting Yin, Tianlun Zheng, Chenxia Li, Yuning Du, Yu-Gang Jiang + ++ 参考实现: + https://github.com/PaddlePaddle/PaddleOCR.git + +## 输入输出数据 ++ 模型输入 + | input-name | data-type | data-format |input-shape | + | ---------- | --------- | ----------- | ---------- | + | image | FLOAT32 | NCHW | batch_size x 3 x 64 x 256 | + ++ 模型输出 + | output-name | data-type | data-format |output-shape | + | ----------- | ---------- | ----------- | ----------- | + | output1 | FLOAT32 | NCHW | batch_size x 25 x 37 | + + +---- +# 推理环境 + +- 该模型推理所需配套的软件如下: + + | 配套 | 版本 | 环境准备指导 | + | --------- | ------- | ---------- | + | 固件与驱动 | 1.0.17 | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) | + | CANN | 6.0.RC1 | - | + | Python | 3.7.5 | - | + + 说明:请根据推理卡型号与 CANN 版本选择相匹配的固件与驱动版本。 + + +---- +# 快速上手 + +## 安装 + +- 安装推理过程所需的依赖 + ```bash + pip3 install -r requirements.txt + ``` +- 获取源码 + ```bash + git clone https://github.com/PaddlePaddle/PaddleOCR.git + cd PaddleOCR + git reset --hard a40f64a70b8d290b74557a41d869c0f9ce4959d5 + #git apply ../SVTR_vd.patch + mv ../SVTR_preprocess.py ./ + mv ../SVTR_postprocess.py ./ + ``` +## 准备数据集 +获取LFW数据集,放在工作目录的data目录下 + + +1. 获取原始数据集 + [English dataset](https://github.com/clovaai/deep-text-recognition-benchmark#download-lmdb-dataset-for-traininig-and-evaluation-from-here) + 将数据集放置于PaddleOCR目录下 + +2. 获取权重文件及配置文件 + [权重文件及配置文件](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) + 将文件解压并放置于PaddleOCR目录下 + ```bash + tar -xvf rec_svtr_tiny_none_ctc_en_train.tar + ``` + +3. 数据预处理 + 执行前处理脚本将原始数据转换为OM模型输入需要的bin/npy文件。 + ```bash + python3 SVTR_preprocess.py \ + --config=./rec_svtr_tiny_none_ctc_en_train/rec_svtr_tiny_6local_6global_stn_en.yml \ + --opt=bin_data=svtr_tiny_bin + ``` + 其中"config"模型配置文件,"opt=bin_data"表示后处理文件名 + + +## 模型转换 + +1. PyTroch 模型转 ONNX 模型 + + ```bash + python3 tools/export_model.py -c ./rec_svtr_tiny_none_ctc_en_train/rec_svtr_tiny_6local_6global_stn_en.yml -o Global.pretrained_model=./rec_svtr_tiny_none_ctc_en_train/best_accuracy Global.save_inference_dir=./inference/rec_svtr_tiny_stn_en + ``` + 执行完成后,会生成inference目录 + ``` + /inference/rec_svtr_tiny_stn_en/ + ├── inference.pdiparams + ├── inference.pdiparams.info + └── inference.pdmodel + ``` + + 导出onnx文件 + ``` + paddle2onnx --model_dir ./inference/rec_svtr_tiny_stn_en \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ./inference/rec_svtr_tiny_stn_en/svtr_tiny.onnx \ + --opset_version 16 \ + --input_shape_dict="{'x':[1,3,64,256]}" \ + --enable_onnx_checker True + python3 -m onnxsim ./inference/rec_svtr_tiny_stn_en/svtr_tiny.onnx ./inference/rec_svtr_tiny_stn_en/svtr_tiny_sim.onnx + python3 -m onnx_fix.py ./inference/rec_svtr_tiny_stn_en/svtr_tiny_sim.onnx ./inference/rec_svtr_tiny_stn_en/svtr_tiny_sim_fix.onnx + ``` + + + ``` + 参数说明: + + --model_filename: 模型文件路径。 + + --params_filename: 模型权重文件路径。 + + --save_file: onnx文件保存路径 + + --opset_version: 预训练权重文件的路径。若不指定,则会通过在线方式获取。 + + --input_shape_dict: 模型输入shape + + --enable_onnx_checker: 校验开关 + + +2. ONNX 模型转 OM 模型 + + step1: 查看NPU芯片名称 \${chip_name} + ```bash + npu-smi info + ``` + 例如该设备芯片名为 310P3,回显如下: + ``` + +-------------------+-----------------+------------------------------------------------------+ + | NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) | + | Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) | + +===================+=================+======================================================+ + | 0 310P3 | OK | 15.8 42 0 / 0 | + | 0 0 | 0000:82:00.0 | 0 1074 / 21534 | + +===================+=================+======================================================+ + | 1 310P3 | OK | 15.4 43 0 / 0 | + | 0 1 | 0000:89:00.0 | 0 1070 / 21534 | + +===================+=================+======================================================+ + ``` + + step2: ONNX 模型转 OM 模型 + ```bash + # 配置环境变量 + source /usr/local/Ascend/ascend-toolkit/set_env.sh + + chip_name=310P3 # 根据 step1 的结果设值 + + # 执行 ATC 进行模型转换 + atc --framework=5 --model=/home/infname45/rjy/rjy/svtr/PaddleOCR/inference/rec_svtr_tiny_stn_en/svtr_tiny_sim.onnx --output=./inference/rec_svtr_tiny_stn_en/svtr_tiny_sim_fix --input_format=NCHW --input_shape="x:1,3,64,256" --log=debug --soc_version=Ascend${chip_name} + ``` + + 参数说明: + + --framework: 5代表ONNX模型 + + --model: ONNX模型路径 + + --input_shape: 模型输入数据的shape + + --input_format: 输入数据的排布格式 + + --output: OM模型路径,无需加后缀 + + --log:日志级别 + + --soc_version: 处理器型号 + +## 推理验证 + +1. 对数据集推理 + 安装ais_bench推理工具。请访问[ais_bench推理工具](https://gitee.com/ascend/tools/tree/master/ais-bench_workload/tool/ais_bench)代码仓,根据readme文档进行工具安装。完成安装后,执行以下命令预处理后的数据进行推理。 + ```bash + mkdir result + python3 -m ais_bench \ + --model /home/infname45/rjy/rjy/svtr/PaddleOCR/inference/rec_svtr_tiny_stn_en/svtr_tiny_sim_fix.om \ + --input /home/infname45/rjy/rjy/svtr/PaddleOCR/svtr_tiny_bin \ + --output ./result/ \ + --outfmt BIN \ + --batchsize ${batch_size} + ``` + 参数说明: + + --model OM模型路径 + + --input 存放预处理后数据的目录路径 + + --output 用于存放推理结果的父目录路径 + + --outfmt 推理结果文件的保存格式 + + --batchsize 模型每次输入bin文件的数量 + + +2. 性能验证 + 对于性能的测试,需要注意以下三点: + + 测试前,请通过`npu-smi info`命令查看NPU设备状态,请务必在NPU设备空闲的状态下进行性能测试。 + + 为了避免测试过程因持续时间太长而受到干扰,建议通过纯推理的方式进行性能测试。 + + 使用吞吐率作为性能指标,单位为 fps,反映模型在单位时间(1秒)内处理的样本数。 + ```bash + python3 -m ais_bench --model model/model_ir_se100_bs${batch_size}.om --batchsize ${batch_size} + ``` + 执行完纯推理命令,程序会打印出与性能相关的指标,找到以关键字 **[INFO] throughput** 开头的一行,行尾的数字即为 OM 模型的吞吐率。 + +3. 精度验证 + + 执行后处理脚本,根据推理结果计算OM模型的精度: + ```bash + python3 ./SVTR_postprocess.py \ + --config=./rec_svtr_tiny_none_ctc_en_train/rec_svtr_tiny_6local_6global_stn_en.yml \ + --opt=results=./result/2023_02_20-09_29_54 + ``` + 参数说明: + + --config: 模型配置文件路径 + + --opt=results: 推理结果路径 + + 运行成功后,控制台输出如下信息: + ``` + accuracy: 0.882738 + norm_edit_dis: 0.9562 + ``` + + + +---- +# 性能&精度 + +在310P设备上,OM模型的精度为 **{Top1@Acc=83.52%}**,当batchsize设为1时模型性能最优,达 266.8 fps。 + +| 芯片型号 | BatchSize | 数据集 | 精度 | 性能 | +| --------- | --------- | ----------- | --------------- | --------- | +|Ascend310P3| 1 | ILSVRC2012 | Top1Acc=83.52% | 266.8 fps | +|Ascend310P3| 4 | ILSVRC2012 | Top1Acc=83.52% | 75.0 fps | +|Ascend310P3| 8 | ILSVRC2012 | Top1Acc=83.52% | 50.9 fps | +|Ascend310P3| 16 | ILSVRC2012 | Top1Acc=83.52% | 20.4 fps | +|Ascend310P3| 32 | ILSVRC2012 | Top1Acc=83.52% | 8.5 fps | +|Ascend310P3| 64 | ILSVRC2012 | Top1Acc=83.52% | 4.5 fps | \ No newline at end of file diff --git a/ACL_PyTorch/contrib/cv/classfication/SVTR/requirements.txt b/ACL_PyTorch/contrib/cv/classfication/SVTR/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fee6fdf3f57e240b931693a4f1b3ed6806156244 --- /dev/null +++ b/ACL_PyTorch/contrib/cv/classfication/SVTR/requirements.txt @@ -0,0 +1,20 @@ +paddlepaddle +shapely +scikit-image +imgaug +pyclipper +lmdb +tqdm +numpy +visualdl +rapidfuzz +opencv-python==4.6.0.66 +opencv-contrib-python==4.6.0.66 +cython +lxml +premailer +openpyxl +attrdict +Polygon3 +lanms-neo==1.0.2 +PyMuPDF<1.21.0 \ No newline at end of file