From fd512d64a17a380f81ee7615ab825a2989b10d28 Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 16:30:41 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E5=BC=AF=E6=9B=B2=E6=96=87=E5=AD=97?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=8F=82=E8=80=83=E8=AE=BE=E8=AE=A1=E9=80=82?= =?UTF-8?q?=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/TextSnake/README.md | 412 ++++++++++------------ contrib/TextSnake/detection.py | 325 +++++++++++++++++ contrib/TextSnake/evaluate.py | 390 ++++++++++---------- contrib/TextSnake/main.py | 343 +++++++++--------- contrib/TextSnake/misc.py | 245 +++++++++++++ contrib/TextSnake/util/Deteval.py | 294 +++++++++++++++ contrib/TextSnake/util/__init__.py | 0 contrib/TextSnake/util/augmentation.py | 339 ++++++++++++++++++ contrib/TextSnake/util/config.py | 60 ++++ contrib/TextSnake/util/detection.py | 325 +++++++++++++++++ contrib/TextSnake/util/misc.py | 245 +++++++++++++ contrib/TextSnake/util/option.py | 108 ++++++ contrib/TextSnake/util/polygon_wrapper.py | 155 ++++++++ contrib/TextSnake/util/shedule.py | 28 ++ contrib/TextSnake/util/summary.py | 25 ++ contrib/TextSnake/util/visualize.py | 55 +++ 16 files changed, 2753 insertions(+), 596 deletions(-) create mode 100644 contrib/TextSnake/detection.py create mode 100644 contrib/TextSnake/misc.py create mode 100644 contrib/TextSnake/util/Deteval.py create mode 100644 contrib/TextSnake/util/__init__.py create mode 100644 contrib/TextSnake/util/augmentation.py create mode 100644 contrib/TextSnake/util/config.py create mode 100644 contrib/TextSnake/util/detection.py create mode 100644 contrib/TextSnake/util/misc.py create mode 100644 contrib/TextSnake/util/option.py create mode 100644 contrib/TextSnake/util/polygon_wrapper.py create mode 100644 contrib/TextSnake/util/shedule.py create mode 100644 contrib/TextSnake/util/summary.py create mode 100644 contrib/TextSnake/util/visualize.py diff --git a/contrib/TextSnake/README.md b/contrib/TextSnake/README.md index ecc149c7b..e2f8dfed0 100644 --- a/contrib/TextSnake/README.md +++ b/contrib/TextSnake/README.md @@ -1,229 +1,183 @@ -# TextSnake 弯曲形状文字检测 - -## 1 介绍 -TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任意弯曲形状文字进行检测,将检测得到的不同类的目标用曲线框标记。本方案使用在人工合成数据集SynthText上训练一个epoch,然后在其他数据集上finetune得到的TextSnake_bs1模型检测,数据集中共包含各种各样的弯曲形状文字,可以对各种角度,各种环境下的弯曲形状文字进行检测。 - -### 1.1 支持的产品 - -本项目以昇腾Atlas310卡为主要的硬件平台。 - -### 1.2 支持的版本 - -支持的SDK版本为 2.0.4, CANN 版本为 5.0.4。 - -### 1.3 软件方案介绍 - -本项目流程为用python代码实现对图像的预处理过程,然后将处理好的图片通过 appsrc 插件输入到业务流程中。整体业务流程为:待检测图片通过 appsrc 插件输入,然后使用图像解码插件 mxpi_imagedecoder 对图片进行解码,解码后的图像输入模型推理插件 mxpi_tensorinfer 得到推理结果。最后通过输出插件 appsink 获取检测结果,并在外部进行后处理和可视化,将检测结果标记到原图上,本系统的各模块及功能描述如表1所示: - -表1.1 系统方案各子系统功能描述: - -| 序号 | 子系统 | 功能描述 | -| ---- | ------ | ------------ | -| 1 | 图片输入 | 获取 jpg 格式输入图片 | -| 2 | 检测前处理 | 更改输入图片尺寸并进行归一化 | -| 3 | 模型推理 | 对输入张量进行推理 | -| 4 | 结果输出 | 获取检测结果 | -| 5 | 检测后处理 | 根据检测结果计算检测框位置和形状 | -| 6 | 结果可视化 | 将检测结果标注在输入图片上 | - - - - - -### 1.4 代码目录结构与说明 - -本工程名称为TextSnake,工程目录如下图所示: - -``` -├── main.py //运行工程项目的主函数 -├── evaluate.py //精度计算 -├── t.pipeline //pipeline -├── sdk.png //流程图 -├── pipeline.png //pipeline流程图 -└──README.md -``` - - -### 1.5 技术实现流程图 - -实现流程图如下图所示: - -![流程](./sdk.png) - - -pipeline流程如下图所示: - -![pipeline](./pipeline.png) - - - -### 1.6 特性及适用场景 - -本案例中的TextSnake模型适用于图像中弯曲形状文字的检测。 - -本模型在以下几种情况下检测弯曲形状文字的效果良好:含有目标数量少、目标面积占比图像较大、各目标边界清晰。 - -在以下情况检测弯曲形状文字效果不太好:图片中的弯曲形状文字数目较多且大小较小,此时会出现缺漏的情况。 - - -## 2 环境依赖 - -推荐系统为ubuntu 18.04,环境依赖软件和版本如下表: - -| 软件名称 | 版本 | -| -------- | ------ | -| MindX SDK | 2.0.4 | -| Ascend-CANN-toolkit | 5.0.4 | -| ubuntu | 18.04.1 LTS | -| python | 3.9.2 | -| cv2 | 4.1.2 | -| numpy | 1.15.1 | -| onnx | 1.8.0 | -| torch | 1.5.0 | -| torchvision | 0.6.0 | -| scikit_image | 0.16.2 | -| scipy | 1.5.4 | -| easydict | 1.8 | -| tdqm | 4.62.3 | - - - - - - -在编译运行项目前,需要设置环境变量: - - -具体执行命令 - -``` -. ${MX_SDK_HOME}/set_env.sh - -. ${ascend-toolkit-path}/set_env.sh -``` - - -## 模型转换 - -本项目使用的模型是TextSnake模型。 - -选用的模型为 pytorch 模型,可从 Ascend modelzoo 获取模型压缩包,在运行项目之前需要将 pytorch 模型转为 onnx 模型,再由 onnx 模型转为 om 模型。 - -pth 权重文件和 onnx 文件的下载链接 -https://www.hiascend.com/zh/software/modelzoo/models/detail/1/74fab02660d635f86325f2ffb56cff1b - - -具体步骤如下 - -1. 下载上述模型压缩包,获取 TextSnake.onnx 模型文件放置 TextSnake/model 目录下。 - -2. 进入TextSnake/model文件夹下执行命令 - -``` -atc --model=TextSnake.onnx --framework=5 --output=TextSnake_bs1 --input_format=NCHW --input_shape="image:1,3,512,512" --log=info --soc_version=Ascend310 - ``` - -3. 执行该命令会在当前目录下生成项目需要的模型文件TextSnake_bs1.om。执行后终端输出为 - - ``` -ATC start working now, please wait for a moment. -ATC run success, welcome to the next use. -``` - - 表示命令执行成功。 - - - -## 编译与运行 - -**步骤 1** 将任意一张jpg格式的图片存到当前目录下(./TextSnake),命名为test.jpg。如果 pipeline 文件(或测试图片)不在当前目录下(./TestSnake),需要修改 main.py 的pipeline(或测试图片)路径指向到所在目录。此外,需要从 -https://github.com/princewang1994/TextSnake.pytorch/tree/b4ee996d5a4d214ed825350d6b307dd1c31faa07 -下载util文件夹至当前目录(./TextSnake),并修改其中的detection.py,修改方式如下(以下行数均为原代码行数): - -(1)将12行改为: - ``` -def __init__(self, tr_thresh=0.4, tcl_thresh=0.6): - ``` - -并删除该构造函数中与model相关的语句。 - -(2)将38行: - ``` -in_poly = cv2.pointPolygonTest(cont, (xmean, i), False) - ``` -改为 - ``` -in_poly = cv2.pointPolygonTest(cont, (int(xmean), int(i)), False) - ``` -56行改为 - ``` -if cv2.pointPolygonTest(cont, (int(test_pt[0]), int(test_pt[1])), False) > 0 - ``` -67行改为 - ``` -return cv2.pointPolygonTest(cont, (int(x), int(y)), False) > 0 - ``` - -(3)在315行前后分别添加: - ``` -conts = list(conts) -``` -``` -conts = tuple(conts) -``` - -**步骤 2** 按照模型转换获取om模型,放置在 TextSnake/model 路径下。若未从 pytorch 模型自行转换模型,使用的是上述链接提供的 onnx 模型,则无需修改相关文件,否则修改 main.py 中pipeline的相关配置,将 mxpi_tensorinfer0 插件 modelPath 属性值中的 om 模型名改成实际使用的 om 模型名。 - -**步骤 3** 在命令行输入 如下命令运行整个工程 - -``` -python3 main.py -``` - -**步骤 4** 图片检测。运行结束输出result.jpg。 - - -## 5 测试精度 - -**步骤 1** 安装数据集用以测试精度。数据集 TotalText 需要自行下载。 -数据集图片部分: -https://drive.google.com/file/d/1bC68CzsSVTusZVvOkk7imSZSbgD1MqK2/view?usp=sharing totaltext.zip - -数据集ground truth部分: -https://drive.google.com/file/d/19quCaJGePvTc3yPZ7MAGNijjKfy77-ke/view?usp=sharing groundtruth_text.zip - -将下载好的数据集调整成以下路径的形式 -``` -├── main.py //运行工程项目的主函数 -├── evaluate.py //精度计算 -├── t.pipeline //pipeline -├── model //存放模型文件 -├── test.jpg //测试图像 -├── result.jpg //输出结果 -├── sdk.png //流程图 -├── pipeline.png //pipeline流程图 -├── data - ├── total-text - ├── gt - ├── Test - ├── poly_gt_img1.mat //测试集groundtruth - ... - ├── img1.jpg //测试集图片 - ... -└──README.md -``` - -**步骤 2** 除先前下载的util文件夹之外,还需要从以下网址中下载Deteval.py与polygon_wrapper.py文件,放入util文件夹中 -https://github.com/princewang1994/TextSnake.pytorch/tree/b4ee996d5a4d214ed825350d6b307dd1c31faa07/dataset/total_text/Evaluation_Protocol/Python_scripts - -**步骤 3** 在命令行输入 如下命令运行精度测试 -``` -python3 evaluate.py -``` -得到精度测试的结果: - -![精度测试结果1](./精度1.png) - -![精度测试结果2](./精度2.png) - -与pytorch实现版本的精度结果相对比,其精度相差在1%以下,精度达标。 \ No newline at end of file +# TextSnake 弯曲形状文字检测 + +## 1 介绍 + +### 1.1 简介 + +TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任意弯曲形状文字进行检测,将检测得到的不同类的目标用曲线框标记。本方案使用在人工合成数据集SynthText上训练一个epoch,然后在其他数据集上finetune得到的TextSnake_bs1模型检测,数据集中共包含各种各样的弯曲形状文字,可以对各种角度,各种环境下的弯曲形状文字进行检测。 +本项目流程为用python代码实现对图像的预处理过程,然后将处理好的图片通过 appsrc 插件输入到业务流程中。整体业务流程为:待检测图片通过 appsrc 插件输入,然后使用图像解码插件 mxpi_imagedecoder 对图片进行解码,解码后的图像输入模型推理插件 mxpi_tensorinfer 得到推理结果。最后通过输出插件 appsink 获取检测结果,并在外部进行后处理和可视化,将检测结果标记到原图上,本系统的各模块及功能描述下表所示: + +| 序号 | 子系统 | 功能描述 | +| ---- | ------ | ------------ | +| 1 | 图片输入 | 获取 jpg 格式输入图片 | +| 2 | 检测前处理 | 更改输入图片尺寸并进行归一化 | +| 3 | 模型推理 | 对输入张量进行推理 | +| 4 | 结果输出 | 获取检测结果 | +| 5 | 检测后处理 | 根据检测结果计算检测框位置和形状 | +| 6 | 结果可视化 | 将检测结果标注在输入图片上 | + + +### 1.2 支持的产品 + +本项目以昇腾Atlas 500 A2为主要的硬件平台。 + +### 1.3 支持的版本 + +| MxVision版本 | CANN版本 | Driver/Firmware版本 | +| --------- | ------------------ | -------------- | +| 5.0.0 | 7.0.0 | 23.0.0 | +| 6.0.RC2 | 8.0.RC2 | 24.1.RC2 | + +### 1.4 三方依赖 + + +| 软件名称 | 版本 | +| -------- |--------| +| cv2 | 4.1.2 | +| numpy | 1.15.1 | +| onnx | 1.8.0 | +| torch | 1.5.0 | +| torchvision | 0.6.0 | +| scikit_image | 0.16.2 | +| scipy | 1.5.4 | +| easydict | 1.13 | +| tdqm | 4.62.3 | +| shapely | 2.0.6 | + +### 1.4 代码目录结构与说明 + +本工程名称为TextSnake,工程目录如下图所示: + +``` +├── main.py //运行工程项目的主函数 +├── evaluate.py //精度计算 +├── t.pipeline //pipeline +├── sdk.png //流程图 +├── pipeline.png //pipeline流程图 +└──README.md +``` + +### 1.5 技术实现流程图 + +实现流程图如下图所示: + +![流程](./sdk.png) + + +pipeline流程如下图所示: + +![pipeline](./pipeline.png) + + +## 2 设置环境变量 + +在编译运行项目前,需要执行一下命令设置环境变量: + +```bash +export PYTHONPATH=${MX_SDK_HOME}/python/:$PYTHONPATH +export install_path=${install_path} +. ${install_path}/set_env.sh +. ${MX_SDK_HOME}/set_env.sh +``` +注:**${MX_SDK_HOME}** 替换为用户自己的MindX_SDK安装路径(例如:"/home/xxx/MindX_SDK/mxVision"); + +**${install_path}** 替换为开发套件包所在路径(例如:/usr/local/Ascend/ascend-toolkit/latest)。 + + +## 3 准备模型 + +本项目使用的模型是TextSnake模型。 + +本项目提供已从pytorch模型转换好的onnx 模型,需要进一步转换为om模型 +pth 权重文件和 onnx 文件的下载链接 +https://mindx.sdk.obs.cn-north-4.myhuaweicloud.com/mindxsdk-referenceapps%20/contrib/TextSnake/ATC%20TextSnake%28FP16%29%20from%20Pytorch%20-%20Ascend310.zip + +该压缩文件中已存在om文件,需删除后重新进行模型转换 +具体步骤如下 + +1. 下载上述模型压缩包,获取 TextSnake.onnx 模型文件放置 TextSnake/model 目录下。 + +2. 进入TextSnake/model文件夹下执行命令 + +``` +atc --model=TextSnake.onnx --framework=5 --output=TextSnake_bs1 --input_format=NCHW --input_shape="image:1,3,512,512" --log=info --soc_version=Ascend310B1 + ``` + +3. 执行该命令会在当前目录下生成项目需要的模型文件TextSnake_bs1.om。执行后终端输出为 + + ``` +ATC start working now, please wait for a moment. +ATC run success, welcome to the next use. +``` + +表示命令执行成功。 + + + +## 4 运行 + +**步骤 1** 将任意一张jpg格式的图片存到当前目录下(./TextSnake),命名为test.jpg。如果 pipeline 文件(或测试图片)不在当前目录下(./TestSnake),需要修改 main.py 的pipeline(或测试图片)路径指向到所在目录。此外,需要从 +https://github.com/princewang1994/TextSnake.pytorch/tree/b4ee996d5a4d214ed825350d6b307dd1c31faa07 +下载util文件夹至当前目录(./TextSnake),并将其中的detection.py和misc.py文件替换为./TestSnake文件夹下的detection.py和misc.py文件。 + +**步骤 2** 按照模型转换获取om模型,放置在 TextSnake/model 路径下。若未从 pytorch 模型自行转换模型,使用的是上述链接提供的 onnx 模型,则无需修改相关文件,否则修改 main.py 中pipeline的相关配置,将 mxpi_tensorinfer0 插件 modelPath 属性值中的 om 模型名改成实际使用的 om 模型名。 + +**步骤 3** 在命令行输入 如下命令运行整个工程 + +``` +python3 main.py +``` + +**步骤 4** 图片检测。运行结束输出result.jpg。 + + +## 5 精度验证 + +**步骤 1** 安装数据集用以测试精度。数据集 TotalText和GroundTruth文件 需要自行下载:[下载地址](https://drive.google.com/file/d/1bC68CzsSVTusZVvOkk7imSZSbgD1MqK2/view?usp=sharing), +groundTruth的[下载地址](https://drive.google.com/file/d/19quCaJGePvTc3yPZ7MAGNijjKfy77-ke/view?usp=sharing)。 + +将下载好的数据集和groundTruth文件调整成以下路径的形式(需手动创建相关文件夹) +测试图片位于total-text/Images/Test +Groundtruth位于Groundtruth/Polygon/Test +拷贝两个目录下的所有文件至对应目录 +``` +├── main.py //运行工程项目的主函数 +├── evaluate.py //精度计算 +├── t.pipeline //pipeline +├── model //存放模型文件 +├── test.jpg //测试图像 +├── result.jpg //输出结果 +├── sdk.png //流程图 +├── pipeline.png //pipeline流程图 +├── data + ├── total-text + ├── gt + ├── Test + ├── poly_gt_img1.mat //测试集groundtruth + ... + ├── img1.jpg //测试集图片 + ... +└──README.md +``` + +**步骤 2** 除先前下载的util文件夹之外,还需要从以下网址中下载Deteval.py与polygon_wrapper.py文件,放入util文件夹中 +https://github.com/princewang1994/TextSnake.pytorch/tree/b4ee996d5a4d214ed825350d6b307dd1c31faa07/dataset/total_text/Evaluation_Protocol/Python_scripts + +**步骤 3** 在命令行输入 如下命令运行精度测试 +``` +python3 evaluate.py +``` +得到精度测试的结果: + +![精度测试结果1](./精度1.png) + +![精度测试结果2](./精度2.png) + +与pytorch实现版本的精度结果相对比,其精度相差在1%以下,精度达标。 + +## 常见问题 +本案例中的TextSnake模型适用于图像中弯曲形状文字的检测。 + +本模型在以下几种情况下检测弯曲形状文字的效果良好:含有目标数量少、目标面积占比图像较大、各目标边界清晰。 + +在以下情况检测弯曲形状文字效果不太好:图片中的弯曲形状文字数目较多且大小较小,此时会出现缺漏的情况。 \ No newline at end of file diff --git a/contrib/TextSnake/detection.py b/contrib/TextSnake/detection.py new file mode 100644 index 000000000..3ba6bc316 --- /dev/null +++ b/contrib/TextSnake/detection.py @@ -0,0 +1,325 @@ +import numpy as np +import cv2 +import torch +from util.config import config as cfg +from util.misc import fill_hole, regularize_sin_cos +from util.misc import norm2, vector_cos, vector_sin +from util.misc import disjoint_merge, merge_polygons + + +class TextDetector(object): + + def __init__(self, tr_thresh=0.4, tcl_thresh=0.6): + self.tr_thresh = tr_thresh + self.tcl_thresh = tcl_thresh + + + def find_innerpoint(self, cont): + """ + generate an inner point of input polygon using mean of x coordinate by: + 1. calculate mean of x coordinate(xmean) + 2. calculate maximum and minimum of y coordinate(ymax, ymin) + 3. iterate for each y in range (ymin, ymax), find first segment in the polygon + 4. calculate means of segment + :param cont: input polygon + :return: + """ + + xmean = cont[:, 0, 0].mean() + ymin, ymax = cont[:, 0, 1].min(), cont[:, 0, 1].max() + found = False + found_y = [] + # + for i in np.arange(ymin - 1, ymax + 1, 0.5): + # if in_poly > 0, (xmean, i) is in `cont` + in_poly = cv2.pointPolygonTest(cont, (int(xmean), int(i)), False) + if in_poly > 0: + found = True + found_y.append(i) + # first segment found + if in_poly < 0 and found: + break + + if len(found_y) > 0: + return (xmean, np.array(found_y).mean()) + + # if cannot find using above method, try each point's neighbor + else: + for p in range(len(cont)): + point = cont[p, 0] + for i in range(-1, 2, 1): + for j in range(-1, 2, 1): + test_pt = point + [i, j] + if cv2.pointPolygonTest(cont, (int(test_pt[0]), int(test_pt[1])), False) > 0: + return test_pt + + def in_contour(self, cont, point): + """ + utility function for judging whether `point` is in the `contour` + :param cont: cv2.findCountour result + :param point: 2d coordinate (x, y) + :return: + """ + x, y = point + return cv2.pointPolygonTest(cont, (int(x), int(y)), False) > 0 + + def centerlize(self, x, y, H, W, tangent_cos, tangent_sin, tcl_contour, stride=1.): + """ + centralizing (x, y) using tangent line and normal line. + :return: coordinate after centralizing + """ + + # calculate normal sin and cos + normal_cos = -tangent_sin + normal_sin = tangent_cos + + # find upward + _x, _y = x, y + while self.in_contour(tcl_contour, (_x, _y)): + _x = _x + normal_cos * stride + _y = _y + normal_sin * stride + if int(_x) >= W or int(_x) < 0 or int(_y) >= H or int(_y) < 0: + break + end1 = np.array([_x, _y]) + + # find downward + _x, _y = x, y + while self.in_contour(tcl_contour, (_x, _y)): + _x = _x - normal_cos * stride + _y = _y - normal_sin * stride + if int(_x) >= W or int(_x) < 0 or int(_y) >= H or int(_y) < 0: + break + end2 = np.array([_x, _y]) + + # centralizing + center = (end1 + end2) / 2 + + return center + + def mask_to_tcl(self, pred_sin, pred_cos, pred_radii, tcl_contour, init_xy, direct=1): + """ + Iteratively find center line in tcl mask using initial point (x, y) + :param pred_sin: predict sin map + :param pred_cos: predict cos map + :param tcl_contour: predict tcl contour + :param init_xy: initial (x, y) + :param direct: direction [-1|1] + :return: + """ + + H, W = pred_sin.shape + x_shift, y_shift = init_xy + + result = [] + max_attempt = 200 + attempt = 0 + + while self.in_contour(tcl_contour, (x_shift, y_shift)): + + attempt += 1 + + sin = pred_sin[int(y_shift), int(x_shift)] + cos = pred_cos[int(y_shift), int(x_shift)] + x_c, y_c = self.centerlize(x_shift, y_shift, H, W, cos, sin, tcl_contour) + + sin_c = pred_sin[int(y_c), int(x_c)] + cos_c = pred_cos[int(y_c), int(x_c)] + radii_c = pred_radii[int(y_c), int(x_c)] + + result.append(np.array([x_c, y_c, radii_c])) + + # shift stride + for shrink in [1/2., 1/4., 1/8., 1/16., 1/32.]: + t = shrink * radii_c # stride = +/- 0.5 * [sin|cos](theta), if new point is outside, shrink it until shrink < 1/32., hit ends + x_shift_pos = x_c + cos_c * t * direct # positive direction + y_shift_pos = y_c + sin_c * t * direct # positive direction + x_shift_neg = x_c - cos_c * t * direct # negative direction + y_shift_neg = y_c - sin_c * t * direct # negative direction + + # if first point, select positive direction shift + if len(result) == 1: + x_shift, y_shift = x_shift_pos, y_shift_pos + else: + # else select point further with second last point + dist_pos = norm2(result[-2][:2] - (x_shift_pos, y_shift_pos)) + dist_neg = norm2(result[-2][:2] - (x_shift_neg, y_shift_neg)) + if dist_pos > dist_neg: + x_shift, y_shift = x_shift_pos, y_shift_pos + else: + x_shift, y_shift = x_shift_neg, y_shift_neg + # if out of bounds, skip + if int(x_shift) >= W or int(x_shift) < 0 or int(y_shift) >= H or int(y_shift) < 0: + continue + # found an inside point + if self.in_contour(tcl_contour, (x_shift, y_shift)): + break + # if out of bounds, break + if int(x_shift) >= W or int(x_shift) < 0 or int(y_shift) >= H or int(y_shift) < 0: + break + if attempt > max_attempt: + break + return np.array(result) + + def build_tcl(self, tcl_pred, sin_pred, cos_pred, radii_pred): + """ + Find TCL's center points and radii of each point + :param tcl_pred: output tcl mask, (512, 512) + :param sin_pred: output sin map, (512, 512) + :param cos_pred: output cos map, (512, 512) + :param radii_pred: output radii map, (512, 512) + :return: (list), tcl array: (n, 3), 3 denotes (x, y, radii) + """ + all_tcls = [] + + # find disjoint regions + tcl_mask = fill_hole(tcl_pred) + tcl_contours, _ = cv2.findContours(tcl_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + for cont in tcl_contours: + + # find an inner point of polygon + init = self.find_innerpoint(cont) + + if init is None: + continue + + x_init, y_init = init + + # find left/right tcl + tcl_left = self.mask_to_tcl(sin_pred, cos_pred, radii_pred, cont, (x_init, y_init), direct=1) + tcl_right = self.mask_to_tcl(sin_pred, cos_pred, radii_pred, cont, (x_init, y_init), direct=-1) + # concat + tcl = np.concatenate([tcl_left[::-1][:-1], tcl_right]) + all_tcls.append(tcl) + + return all_tcls + + def detect_contours(self, image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred): + """ + Input: FCN output, Output: text detection after post-processing + + :param image: (np.array) input image (3, H, W) + :param tr_pred: (np.array), text region prediction, (2, H, W) + :param tcl_pred: (np.array), text center line prediction, (2, H, W) + :param sin_pred: (np.array), sin prediction, (H, W) + :param cos_pred: (np.array), cos line prediction, (H, W) + :param radii_pred: (np.array), radii prediction, (H, W) + + :return: + (list), tcl array: (n, 3), 3 denotes (x, y, radii) + """ + + # thresholding + tr_pred_mask = tr_pred[1] > self.tr_thresh + tcl_pred_mask = tcl_pred[1] > self.tcl_thresh + + # multiply TR and TCL + tcl_mask = tcl_pred_mask * tr_pred_mask + + # regularize + sin_pred, cos_pred = regularize_sin_cos(sin_pred, cos_pred) + + # find tcl in each predicted mask + detect_result = self.build_tcl(tcl_mask, sin_pred, cos_pred, radii_pred) + + return self.postprocessing(image, detect_result, tr_pred_mask) + + def detect(self, image): + """ + + :param image: + :return: + """ + # get model output + output = self.model(image) + image = image[0].data.cpu().numpy() + tr_pred = output[0, 0:2].softmax(dim=0).data.cpu().numpy() + tcl_pred = output[0, 2:4].softmax(dim=0).data.cpu().numpy() + sin_pred = output[0, 4].data.cpu().numpy() + cos_pred = output[0, 5].data.cpu().numpy() + radii_pred = output[0, 6].data.cpu().numpy() + + # find text contours + contours = self.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) + + output = { + 'image': image, + 'tr': tr_pred, + 'tcl': tcl_pred, + 'sin': sin_pred, + 'cos': cos_pred, + 'radii': radii_pred + } + return contours, output + + def merge_contours(self, all_contours): + """ Merge overlapped instances to one instance with disjoint find / merge algorithm + :param all_contours: (list(np.array)), each with (n_points, 2) + :return: (list(np.array)), each with (n_points, 2) + """ + + def stride(disks, other_contour, left, step=0.3): + if len(disks) < 2: + return False + if left: + last_point, before_point = disks[:2] + else: + before_point, last_point = disks[-2:] + radius = last_point[2] + cos = vector_cos(last_point[:2] - before_point[:2]) + sin = vector_sin(last_point[:2] - before_point[:2]) + new_point = last_point[:2] + radius * step * np.array([cos, sin]) + return self.in_contour(other_contour, new_point) + + def can_merge(disks, other_contour): + return stride(disks, other_contour, left=True) or stride(disks, other_contour, left=False) + + F = list(range(len(all_contours))) + for i in range(len(all_contours)): + cont_i, disk_i = all_contours[i] + for j in range(i + 1, len(all_contours)): + cont_j, disk_j = all_contours[j] + if can_merge(disk_i, cont_j): + disjoint_merge(i, j, F) + + merged_polygons = merge_polygons([cont for cont, disks in all_contours], F) + return merged_polygons + + def postprocessing(self, image, detect_result, tr_pred_mask): + """ convert geometric info(center_x, center_y, radii) into contours + :param image: (np.array), input image + :param result: (list), each with (n, 3), 3 denotes (x, y, radii) + :param tr_pred_mask: (np.array), predicted text area mask, each with shape (H, W) + :return: (np.ndarray list), polygon format contours + """ + + all_conts = [] + for disk in detect_result: + reconstruct_mask = np.zeros(image.shape[1:], dtype=np.uint8) + for x, y, r in disk: + # expand radius for higher recall + if cfg.post_process_expand > 0.0: + r *= (1. + cfg.post_process_expand) + cv2.circle(reconstruct_mask, (int(x), int(y)), max(1, int(r)), 1, -1) + + # according to the paper, at least half of pixels in the reconstructed text area should be classified as TR + if (reconstruct_mask * tr_pred_mask).sum() < reconstruct_mask.sum() * 0.5: + continue + + # filter out too small objects + conts, _ = cv2.findContours(reconstruct_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + if len(conts) > 1: + conts = list(conts) + conts.sort(key=lambda x: cv2.contourArea(x), reverse=True) + conts = tuple(conts) + elif not conts: + continue + all_conts.append((conts[0][:, 0, :], disk)) + + # merge joined instances + if cfg.post_process_merge: + all_conts = self.merge_contours(all_conts) + else: + all_conts = [cont[0] for cont in all_conts] + + return all_conts \ No newline at end of file diff --git a/contrib/TextSnake/evaluate.py b/contrib/TextSnake/evaluate.py index 843b162f1..5d969bf3f 100644 --- a/contrib/TextSnake/evaluate.py +++ b/contrib/TextSnake/evaluate.py @@ -1,196 +1,196 @@ -#!/usr/bin/env python -# coding=utf-8 - -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import json -import os -import math -import stat -import subprocess -import cv2 -import numpy as np -import MxpiDataType_pb2 as MxpiDataType -from PIL import Image -import torch - -from StreamManagerApi import StreamManagerApi, MxProtobufIn, InProtobufVector, StringVector - -from util.misc import fill_hole, regularize_sin_cos -from util.detection import TextDetector -from util.misc import to_device, mkdirs, rescale_result -from util.config import config as cfg -from util.visualize import visualize_detection - - -def zerodimsoftmax(x): - first = x[0, :, :].reshape(512, 512) - second = x[1, :, :].reshape(512, 512) - fexp = np.exp(first) - sexp = np.exp(second) - sumexp = fexp + sexp - fxf = fexp / sumexp - fxs = sexp / sumexp - fx = np.zeros((2, 512, 512)) - fx[0, :, :] = fxf - fx[1, :, :] = fxs - return fx - - - -def norm(image_n, mean, std): - image_n = image_n.astype(np.float32) - image_n /= 255.0 - image_n -= mean - image_n /= std - return image_n - - -def resize(image_r, size): - h, w, _ = image_r.shape - image_r = cv2.resize(image_r, (size, size)) - return image_r - - -def write_to_file(contours_w, file_path): - with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT, MODES), 'w') as f1: - for cont in contours_w: - cont = np.stack([cont[:, 1], cont[:, 0]], 1) - cont = cont.flatten().astype(str).tolist() - cont = ','.join(cont) - f1.write(cont + '\n') - - -if __name__ == '__main__': - try: - steam_manager_api = StreamManagerApi() - - ret = steam_manager_api.InitManager() - if ret != 0: - print("Failed to init Stream manager, ret=%s" % str(ret)) - exit() - - - MODES = stat.S_IWUSR | stat.S_IRUSR - with os.fdopen(os.open("./t.pipeline", os.O_RDONLY, MODES), 'rb') as f: - pipeline_str = f.read() - ret = steam_manager_api.CreateMultipleStreams(pipeline_str) - if ret != 0: - print("Failed to create Stream, ret=%s" % str(ret)) - exit() - - - means = (0.485, 0.456, 0.406) - stds = (0.229, 0.224, 0.225) - FILEPATH = "./data/total-text/" - if os.path.exists(FILEPATH) != 1: - print("The filepath does not exist !") - exit() - for filename in os.listdir(FILEPATH): - image_path = FILEPATH + filename - if image_path.split('.')[-1] != 'jpg': - continue - IMAGE_PATH = image_path - image = Image.open(IMAGE_PATH) - image = np.array(image) - H, W, _ = image.shape - image = resize(image, cfg.input_size) - image = norm(image, np.array(means), np.array(stds)) - image = image.transpose(2, 0, 1) - visionList = MxpiDataType.MxpiVisionList() - visionVec = visionList.visionVec.add() - - visionInfo = visionVec.visionInfo - visionInfo.width = image.shape[1] - visionInfo.height = image.shape[0] - visionInfo.widthAligned = image.shape[1] - visionInfo.heightAligned = image.shape[0] - - visionData = visionVec.visionData - visionData.dataStr = image.tobytes() - visionData.deviceId = 0 - visionData.memType = 0 - visionData.dataSize = len(image) - - KEY0 = b"appsrc0" - - protobufVec = InProtobufVector() - protobuf = MxProtobufIn() - protobuf.key = KEY0 - protobuf.type = b"MxTools.MxpiVisionList" - protobuf.protobuf = visionList.SerializeToString() - protobufVec.push_back(protobuf) - STEAMNAME = b'detection' - INPLUGINID = 0 - uniqueId = steam_manager_api.SendProtobuf(STEAMNAME, INPLUGINID, protobufVec) - if uniqueId < 0: - print("Failed to send data to stream.") - exit() - keys = [b"mxpi_tensorinfer0"] - keyVec = StringVector() - for key in keys: - keyVec.push_back(key) - - - infer = steam_manager_api.GetResult(STEAMNAME, b'appsink0', keyVec) - if(infer.metadataVec.size() == 0): - print("Get no data from stream !") - exit() - infer_result = infer.metadataVec[0] - if infer_result.errorCode != 0: - print("GetResult error. errorCode=%d , errMsg=%s" % (infer_result.errorCode, infer_result.errMsg)) - exit() - result = MxpiDataType.MxpiTensorPackageList() - result.ParseFromString(infer_result.serializedMetadata) - pred = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32) - pred_array = pred.reshape(1, 7, 512, 512) - tr_pred = pred_array[:, 0: 2, : , :].reshape(2, 512, 512) - tcl_pred = pred_array[:, 2:4, :, :].reshape(2, 512, 512) - sin_pred = pred_array[:, 4, :, :].reshape(512, 512) - cos_pred = pred_array[:, 5, :, :].reshape(512, 512) - radii_pred = pred_array[:, 6, :, :].reshape(512, 512) - tr_pred = zerodimsoftmax(tr_pred) - tcl_pred = zerodimsoftmax(tcl_pred) - td = TextDetector(cfg.tr_thresh, cfg.tcl_thresh) - contours = td.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) - output = { - 'image': image, - 'tr': tr_pred, - 'tcl': tcl_pred, - 'sin': sin_pred, - 'cos': cos_pred, - 'radii': radii_pred - } - tr_pred, tcl_pred = output['tr'], output['tcl'] - img_show = image.transpose(1, 2, 0) - img_show = ((img_show * stds + means) * 255).astype(np.uint8) - img_show, contours = rescale_result(img_show, contours, H, W) - mkdirs(cfg.output_dir+"/test") - write_to_file(contours, os.path.join(cfg.output_dir+"/test", filename.replace('jpg', 'txt'))) - pred_vis = visualize_detection(img_show, contours) - - - steam_manager_api.DestroyAllStreams() - print('Computing DetEval in {}/{}'.format(cfg.output_dir, "test")) - subprocess.call(['python3.9', './util/Deteval.py', "test", '--tr', '0.7', '--tp', '0.6']) - subprocess.call(['python3.9', './util/Deteval.py', "test", '--tr', '0.8', '--tp', '0.4']) - print('End.') - except KeyError: - print("get result dict failed!") - except ValueError: - print("Tensor shape not match,maybe use wrong image type!") - except FileNotFoundError: +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import os +import math +import stat +import subprocess +import cv2 +import numpy as np +import MxpiDataType_pb2 as MxpiDataType +from PIL import Image +import torch + +from StreamManagerApi import StreamManagerApi, MxProtobufIn, InProtobufVector, StringVector + +from util.misc import fill_hole, regularize_sin_cos +from util.detection import TextDetector +from util.misc import to_device, mkdirs, rescale_result +from util.config import config as cfg +from util.visualize import visualize_detection + + +def zerodimsoftmax(x): + first = x[0, :, :].reshape(512, 512) + second = x[1, :, :].reshape(512, 512) + fexp = np.exp(first) + sexp = np.exp(second) + sumexp = fexp + sexp + fxf = fexp / sumexp + fxs = sexp / sumexp + fx = np.zeros((2, 512, 512)) + fx[0, :, :] = fxf + fx[1, :, :] = fxs + return fx + + + +def norm(image_n, mean, std): + image_n = image_n.astype(np.float32) + image_n /= 255.0 + image_n -= mean + image_n /= std + return image_n + + +def resize(image_r, size): + h, w, _ = image_r.shape + image_r = cv2.resize(image_r, (size, size)) + return image_r + + +def write_to_file(contours_w, file_path): + with os.fdopen(os.open(file_path, os.O_WRONLY | os.O_CREAT, MODES), 'w') as f1: + for cont in contours_w: + cont = np.stack([cont[:, 1], cont[:, 0]], 1) + cont = cont.flatten().astype(str).tolist() + cont = ','.join(cont) + f1.write(cont + '\n') + + +if __name__ == '__main__': + try: + steam_manager_api = StreamManagerApi() + + ret = steam_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + + + MODES = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open("./t.pipeline", os.O_RDONLY, MODES), 'rb') as f: + pipeline_str = f.read() + ret = steam_manager_api.CreateMultipleStreams(pipeline_str) + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + exit() + + + means = (0.485, 0.456, 0.406) + stds = (0.229, 0.224, 0.225) + FILEPATH = "./data/total-text/" + if os.path.exists(FILEPATH) != 1: + print("The filepath does not exist !") + exit() + for filename in os.listdir(FILEPATH): + image_path = FILEPATH + filename + if image_path.split('.')[-1] != 'jpg': + continue + IMAGE_PATH = image_path + image = Image.open(IMAGE_PATH) + image = np.array(image) + H, W, _ = image.shape + image = resize(image, cfg.input_size) + image = norm(image, np.array(means), np.array(stds)) + image = image.transpose(2, 0, 1) + visionList = MxpiDataType.MxpiVisionList() + visionVec = visionList.visionVec.add() + + visionInfo = visionVec.visionInfo + visionInfo.width = image.shape[1] + visionInfo.height = image.shape[0] + visionInfo.widthAligned = image.shape[1] + visionInfo.heightAligned = image.shape[0] + + visionData = visionVec.visionData + visionData.dataStr = image.tobytes() + visionData.deviceId = 0 + visionData.memType = 0 + visionData.dataSize = len(image) + + KEY0 = b"appsrc0" + + protobufVec = InProtobufVector() + protobuf = MxProtobufIn() + protobuf.key = KEY0 + protobuf.type = b"MxTools.MxpiVisionList" + protobuf.protobuf = visionList.SerializeToString() + protobufVec.push_back(protobuf) + STEAMNAME = b'detection' + INPLUGINID = 0 + uniqueId = steam_manager_api.SendProtobuf(STEAMNAME, INPLUGINID, protobufVec) + if uniqueId < 0: + print("Failed to send data to stream.") + exit() + keys = [b"mxpi_tensorinfer0"] + keyVec = StringVector() + for key in keys: + keyVec.push_back(key) + + + infer = steam_manager_api.GetResult(STEAMNAME, b'appsink0', keyVec) + if(infer.metadataVec.size() == 0): + print("Get no data from stream !") + exit() + infer_result = infer.metadataVec[0] + if infer_result.errorCode != 0: + print("GetResult error. errorCode=%d , errMsg=%s" % (infer_result.errorCode, infer_result.errMsg)) + exit() + result = MxpiDataType.MxpiTensorPackageList() + result.ParseFromString(infer_result.serializedMetadata) + pred = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32) + pred_array = pred.reshape(1, 7, 512, 512) + tr_pred = pred_array[:, 0: 2, : , :].reshape(2, 512, 512) + tcl_pred = pred_array[:, 2:4, :, :].reshape(2, 512, 512) + sin_pred = pred_array[:, 4, :, :].reshape(512, 512) + cos_pred = pred_array[:, 5, :, :].reshape(512, 512) + radii_pred = pred_array[:, 6, :, :].reshape(512, 512) + tr_pred = zerodimsoftmax(tr_pred) + tcl_pred = zerodimsoftmax(tcl_pred) + td = TextDetector(cfg.tr_thresh, cfg.tcl_thresh) + contours = td.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) + output = { + 'image': image, + 'tr': tr_pred, + 'tcl': tcl_pred, + 'sin': sin_pred, + 'cos': cos_pred, + 'radii': radii_pred + } + tr_pred, tcl_pred = output['tr'], output['tcl'] + img_show = image.transpose(1, 2, 0) + img_show = ((img_show * stds + means) * 255).astype(np.uint8) + img_show, contours = rescale_result(img_show, contours, H, W) + mkdirs(cfg.output_dir+"/test") + write_to_file(contours, os.path.join(cfg.output_dir+"/test", filename.replace('jpg', 'txt'))) + pred_vis = visualize_detection(img_show, contours) + + + steam_manager_api.DestroyAllStreams() + print('Computing DetEval in {}/{}'.format(cfg.output_dir, "test")) + subprocess.call(['python3.9', './util/Deteval.py', "test", '--tr', '0.7', '--tp', '0.6']) + subprocess.call(['python3.9', './util/Deteval.py', "test", '--tr', '0.8', '--tp', '0.4']) + print('End.') + except KeyError: + print("get result dict failed!") + except ValueError: + print("Tensor shape not match,maybe use wrong image type!") + except FileNotFoundError: print("input image not found!") \ No newline at end of file diff --git a/contrib/TextSnake/main.py b/contrib/TextSnake/main.py index a093aac89..75ad92769 100644 --- a/contrib/TextSnake/main.py +++ b/contrib/TextSnake/main.py @@ -1,173 +1,172 @@ -#!/usr/bin/env python -# coding=utf-8 - -# Copyright 2022 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import json -import os -import stat -import cv2 -import numpy as np -from PIL import Image -import MxpiDataType_pb2 as MxpiDataType -from StreamManagerApi import StreamManagerApi, MxProtobufIn, InProtobufVector, StringVector -from util.misc import fill_hole, regularize_sin_cos -from util.detection import TextDetector -from util.misc import to_device, mkdirs, rescale_result -from util.config import config as cfg -from util.visualize import visualize_detection - - -def zerodimsoftmax(x): - first = x[0, :, :].reshape(512, 512) - second = x[1, :, :].reshape(512, 512) - fexp = np.exp(first) - sexp = np.exp(second) - sumexp = fexp + sexp - fxf = fexp / sumexp - fxs = sexp / sumexp - fx = np.zeros((2, 512, 512)) - fx[0, :, :] = fxf - fx[1, :, :] = fxs - return fx - - - -def norm(image_n , mean , std): - image_n = image_n.astype(np.float32) - image_n /= 255.0 - image_n -= mean - image_n /= std - return image_n - - -def resize(image_r , size): - h, w, _ = image_r.shape - image_r = cv2.resize(image_r, (size, size)) - return image_r - - -if __name__ == '__main__': - try: - steam_manager_api = StreamManagerApi() - - ret = steam_manager_api.InitManager() - if ret != 0: - print("Failed to init Stream manager, ret=%s" % str(ret)) - exit() - - - MODES = stat.S_IWUSR | stat.S_IRUSR - with os.fdopen(os.open("./t.pipeline", os.O_RDONLY, MODES), 'rb') as f: - pipeline_str = f.read() - ret = steam_manager_api.CreateMultipleStreams(pipeline_str) - if ret != 0: - print("Failed to create Stream, ret=%s" % str(ret)) - exit() - - means = (0.485, 0.456, 0.406) - stds = (0.229, 0.224, 0.225) - IMAGE_PATH = './test.jpg' - - image = Image.open(IMAGE_PATH) - image = np.array(image) - H, W, _ = image.shape - - image = resize(image, cfg.input_size) - image = norm(image, np.array(means), np.array(stds)) - - image = image.transpose(2, 0, 1) - visionList = MxpiDataType.MxpiVisionList() - visionVec = visionList.visionVec.add() - - visionInfo = visionVec.visionInfo - visionInfo.width = image.shape[1] - visionInfo.height = image.shape[0] - visionInfo.widthAligned = image.shape[1] - visionInfo.heightAligned = image.shape[0] - - visionData = visionVec.visionData - visionData.dataStr = image.tobytes() - visionData.deviceId = 0 - visionData.memType = 0 - visionData.dataSize = len(image) - - KEY0 = b"appsrc0" - - protobufVec = InProtobufVector() - protobuf = MxProtobufIn() - protobuf.key = KEY0 - protobuf.type = b"MxTools.MxpiVisionList" - protobuf.protobuf = visionList.SerializeToString() - protobufVec.push_back(protobuf) - STEAMNAME = b'detection' - INPLUGINID = 0 - uniqueId = steam_manager_api.SendProtobuf(STEAMNAME, INPLUGINID, protobufVec) - if uniqueId < 0: - print("Failed to send data to stream.") - exit() - keys = [b"mxpi_tensorinfer0"] - keyVec = StringVector() - for key in keys: - keyVec.push_back(key) - infer = steam_manager_api.GetResult(STEAMNAME, b'appsink0', keyVec) - if(infer.metadataVec.size() == 0): - print("Get no data from stream !") - exit() - infer_result = infer.metadataVec[0] - if infer_result.errorCode != 0: - print("GetResult error. errorCode=%d , errMsg=%s" % (infer_result.errorCode, infer_result.errMsg)) - exit() - result = MxpiDataType.MxpiTensorPackageList() - result.ParseFromString(infer_result.serializedMetadata) - pred = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32) - pred_array = pred.reshape(1, 7, 512, 512) - tr_pred = pred_array[:, 0:2, :, :].reshape(2, 512, 512) - tcl_pred = pred_array[:, 2:4, :, :].reshape(2, 512, 512) - sin_pred = pred_array[:, 4, :, :].reshape(512, 512) - cos_pred = pred_array[:, 5, :, :].reshape(512, 512) - radii_pred = pred_array[:, 6, :, :].reshape(512, 512) - tr_pred = zerodimsoftmax(tr_pred) - tcl_pred = zerodimsoftmax(tcl_pred) - td = TextDetector(cfg.tr_thresh, cfg.tcl_thresh) - contours = td.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) - output = { - 'image': image, - 'tr': tr_pred, - 'tcl': tcl_pred, - 'sin': sin_pred, - 'cos': cos_pred, - 'radii': radii_pred - } - - tr_pred, tcl_pred = output['tr'], output['tcl'] - - img_show = image.transpose(1, 2, 0) - img_show = ((img_show * stds + means) * 255).astype(np.uint8) - img_show, contours = rescale_result(img_show, contours, H, W) - VIS_DIR = "result.jpg" - pred_vis = visualize_detection(img_show, contours) - mkdirs(VIS_DIR) - cv2.imwrite(VIS_DIR, pred_vis) - steam_manager_api.DestroyAllStreams() - - except KeyError: - print("get result dict failed!") - except ValueError: - print("Tensor shape not match,maybe use wrong image type!") - except FileNotFoundError: - print("input image not found!") +#!/usr/bin/env python +# coding=utf-8 + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import json +import os +import stat +import cv2 +import numpy as np +from PIL import Image +import MxpiDataType_pb2 as MxpiDataType +from StreamManagerApi import StreamManagerApi, MxProtobufIn, InProtobufVector, StringVector +from util.misc import fill_hole, regularize_sin_cos +from util.detection import TextDetector +from util.misc import to_device, mkdirs, rescale_result +from util.config import config as cfg +from util.visualize import visualize_detection + + +def zerodimsoftmax(x): + first = x[0, :, :].reshape(512, 512) + second = x[1, :, :].reshape(512, 512) + fexp = np.exp(first) + sexp = np.exp(second) + sumexp = fexp + sexp + fxf = fexp / sumexp + fxs = sexp / sumexp + fx = np.zeros((2, 512, 512)) + fx[0, :, :] = fxf + fx[1, :, :] = fxs + return fx + + + +def norm(image_n , mean , std): + image_n = image_n.astype(np.float32) + image_n /= 255.0 + image_n -= mean + image_n /= std + return image_n + + +def resize(image_r , size): + h, w, _ = image_r.shape + image_r = cv2.resize(image_r, (size, size)) + return image_r + + +if __name__ == '__main__': + try: + steam_manager_api = StreamManagerApi() + + ret = steam_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + + + MODES = stat.S_IWUSR | stat.S_IRUSR + with os.fdopen(os.open("./t.pipeline", os.O_RDONLY, MODES), 'rb') as f: + pipeline_str = f.read() + ret = steam_manager_api.CreateMultipleStreams(pipeline_str) + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + exit() + + means = (0.485, 0.456, 0.406) + stds = (0.229, 0.224, 0.225) + IMAGE_PATH = './test.jpg' + + image = Image.open(IMAGE_PATH) + image = np.array(image) + H, W, _ = image.shape + + image = resize(image, cfg.input_size) + image = norm(image, np.array(means), np.array(stds)) + + image = image.transpose(2, 0, 1) + visionList = MxpiDataType.MxpiVisionList() + visionVec = visionList.visionVec.add() + + visionInfo = visionVec.visionInfo + visionInfo.width = image.shape[1] + visionInfo.height = image.shape[0] + visionInfo.widthAligned = image.shape[1] + visionInfo.heightAligned = image.shape[0] + + visionData = visionVec.visionData + visionData.dataStr = image.tobytes() + visionData.deviceId = 0 + visionData.memType = 0 + visionData.dataSize = len(image) + + KEY0 = b"appsrc0" + + protobufVec = InProtobufVector() + protobuf = MxProtobufIn() + protobuf.key = KEY0 + protobuf.type = b"MxTools.MxpiVisionList" + protobuf.protobuf = visionList.SerializeToString() + protobufVec.push_back(protobuf) + STEAMNAME = b'detection' + INPLUGINID = 0 + uniqueId = steam_manager_api.SendProtobuf(STEAMNAME, INPLUGINID, protobufVec) + if uniqueId < 0: + print("Failed to send data to stream.") + exit() + keys = [b"mxpi_tensorinfer0"] + keyVec = StringVector() + for key in keys: + keyVec.push_back(key) + infer = steam_manager_api.GetResult(STEAMNAME, b'appsink0', keyVec) + if(infer.metadataVec.size() == 0): + print("Get no data from stream !") + exit() + infer_result = infer.metadataVec[0] + if infer_result.errorCode != 0: + print("GetResult error. errorCode=%d , errMsg=%s" % (infer_result.errorCode, infer_result.errMsg)) + exit() + result = MxpiDataType.MxpiTensorPackageList() + result.ParseFromString(infer_result.serializedMetadata) + pred = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32) + pred_array = pred.reshape(1, 7, 512, 512) + tr_pred = pred_array[:, 0:2, :, :].reshape(2, 512, 512) + tcl_pred = pred_array[:, 2:4, :, :].reshape(2, 512, 512) + sin_pred = pred_array[:, 4, :, :].reshape(512, 512) + cos_pred = pred_array[:, 5, :, :].reshape(512, 512) + radii_pred = pred_array[:, 6, :, :].reshape(512, 512) + tr_pred = zerodimsoftmax(tr_pred) + tcl_pred = zerodimsoftmax(tcl_pred) + td = TextDetector(cfg.tr_thresh, cfg.tcl_thresh) + contours = td.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) + output = { + 'image': image, + 'tr': tr_pred, + 'tcl': tcl_pred, + 'sin': sin_pred, + 'cos': cos_pred, + 'radii': radii_pred + } + + tr_pred, tcl_pred = output['tr'], output['tcl'] + + img_show = image.transpose(1, 2, 0) + img_show = ((img_show * stds + means) * 255).astype(np.uint8) + img_show, contours = rescale_result(img_show, contours, H, W) + VIS_DIR = "result.jpg" + pred_vis = visualize_detection(img_show, contours) + cv2.imwrite(VIS_DIR, pred_vis) + steam_manager_api.DestroyAllStreams() + + except KeyError: + print("get result dict failed!") + except ValueError: + print("Tensor shape not match,maybe use wrong image type!") + except FileNotFoundError: + print("input image not found!") \ No newline at end of file diff --git a/contrib/TextSnake/misc.py b/contrib/TextSnake/misc.py new file mode 100644 index 000000000..c4d2d25db --- /dev/null +++ b/contrib/TextSnake/misc.py @@ -0,0 +1,245 @@ +import numpy as np +import errno +import os +import cv2 +from shapely.geometry import Polygon +from util.config import config as cfg + + +def to_device(*tensors): + if len(tensors) < 2: + return tensors[0].to(cfg.device) + return (t.to(cfg.device) for t in tensors) + + +def mkdirs(newdir): + """ + make directory with parent path + :param newdir: target path + """ + try: + if not os.path.exists(newdir): + os.makedirs(newdir) + except OSError as err: + # Reraise the error unless it's about an already existing directory + if err.errno != errno.EEXIST or not os.path.isdir(newdir): + raise + +def rescale_result(image, contours, H, W): + ori_H, ori_W = image.shape[:2] + image = cv2.resize(image, (W, H)) + for cont in contours: + cont[:, 0] = (cont[:, 0] * W / ori_W).astype(int) + cont[:, 1] = (cont[:, 1] * H / ori_H).astype(int) + return image, contours + + +def fill_hole(input_mask): + h, w = input_mask.shape + canvas = np.zeros((h + 2, w + 2), np.uint8) + canvas[1:h + 1, 1:w + 1] = input_mask.copy() + + mask = np.zeros((h + 4, w + 4), np.uint8) + + cv2.floodFill(canvas, mask, (0, 0), 1) + canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool_) + + return (~canvas | input_mask.astype(np.uint8)) + + +def regularize_sin_cos(sin, cos): + # regularization + scale = np.sqrt(1.0 / (sin ** 2 + cos ** 2)) + return sin * scale, cos * scale + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + +def norm2(x, axis=None): + if axis: + return np.sqrt(np.sum(x ** 2, axis=axis)) + return np.sqrt(np.sum(x ** 2)) + +def cos(p1, p2): + return (p1 * p2).sum() / (norm2(p1) * norm2(p2)) + +def vector_sin(v): + assert len(v) == 2 + # sin = y / (sqrt(x^2 + y^2)) + l = np.sqrt(v[0] ** 2 + v[1] ** 2) + return v[1] / l + +def vector_cos(v): + assert len(v) == 2 + # cos = x / (sqrt(x^2 + y^2)) + l = np.sqrt(v[0] ** 2 + v[1] ** 2) + return v[0] / l + +def find_bottom(pts): + + if len(pts) > 4: + e = np.concatenate([pts, pts[:3]]) + candidate = [] + for i in range(1, len(pts) + 1): + v_prev = e[i] - e[i - 1] + v_next = e[i + 2] - e[i + 1] + if cos(v_prev, v_next) < -0.7: + candidate.append((i % len(pts), (i + 1) % len(pts), norm2(e[i] - e[i + 1]))) + + if len(candidate) != 2 or candidate[0][0] == candidate[1][1] or candidate[0][1] == candidate[1][0]: + # if candidate number < 2, or two bottom are joined, select 2 farthest edge + mid_list = [] + for i in range(len(pts)): + mid_point = (e[i] + e[(i + 1) % len(pts)]) / 2 + mid_list.append((i, (i + 1) % len(pts), mid_point)) + + dist_list = [] + for i in range(len(pts)): + for j in range(len(pts)): + s1, e1, mid1 = mid_list[i] + s2, e2, mid2 = mid_list[j] + dist = norm2(mid1 - mid2) + dist_list.append((s1, e1, s2, e2, dist)) + bottom_idx = np.argsort([dist for s1, e1, s2, e2, dist in dist_list])[-2:] + bottoms = [dist_list[bottom_idx[0]][:2], dist_list[bottom_idx[1]][:2]] + else: + bottoms = [candidate[0][:2], candidate[1][:2]] + + else: + d1 = norm2(pts[1] - pts[0]) + norm2(pts[2] - pts[3]) + d2 = norm2(pts[2] - pts[1]) + norm2(pts[0] - pts[3]) + bottoms = [(0, 1), (2, 3)] if d1 < d2 else [(1, 2), (3, 0)] + assert len(bottoms) == 2, 'fewer than 2 bottoms' + return bottoms + + +def split_long_edges(points, bottoms): + """ + Find two long edge sequence of and polygon + """ + b1_start, b1_end = bottoms[0] + b2_start, b2_end = bottoms[1] + n_pts = len(points) + + i = b1_end + 1 + long_edge_1 = [] + while (i % n_pts != b2_end): + long_edge_1.append((i - 1, i)) + i = (i + 1) % n_pts + + i = b2_end + 1 + long_edge_2 = [] + while (i % n_pts != b1_end): + long_edge_2.append((i - 1, i)) + i = (i + 1) % n_pts + return long_edge_1, long_edge_2 + + +def find_long_edges(points, bottoms): + b1_start, b1_end = bottoms[0] + b2_start, b2_end = bottoms[1] + n_pts = len(points) + i = (b1_end + 1) % n_pts + long_edge_1 = [] + + while (i % n_pts != b2_end): + start = (i - 1) % n_pts + end = i % n_pts + long_edge_1.append((start, end)) + i = (i + 1) % n_pts + + i = (b2_end + 1) % n_pts + long_edge_2 = [] + while (i % n_pts != b1_end): + start = (i - 1) % n_pts + end = i % n_pts + long_edge_2.append((start, end)) + i = (i + 1) % n_pts + return long_edge_1, long_edge_2 + + +def split_edge_seqence(points, long_edge, n_parts): + + edge_length = [norm2(points[e1] - points[e2]) for e1, e2 in long_edge] + point_cumsum = np.cumsum([0] + edge_length) + total_length = sum(edge_length) + length_per_part = total_length / n_parts + + cur_node = 0 # first point + splited_result = [] + + for i in range(1, n_parts): + cur_end = i * length_per_part + + while(cur_end > point_cumsum[cur_node + 1]): + cur_node += 1 + + e1, e2 = long_edge[cur_node] + e1, e2 = points[e1], points[e2] + + # start_point = points[long_edge[cur_node]] + end_shift = cur_end - point_cumsum[cur_node] + ratio = end_shift / edge_length[cur_node] + new_point = e1 + ratio * (e2 - e1) + # print(cur_end, point_cumsum[cur_node], end_shift, edge_length[cur_node], '=', new_point) + splited_result.append(new_point) + + # add first and last point + p_first = points[long_edge[0][0]] + p_last = points[long_edge[-1][1]] + splited_result = [p_first] + splited_result + [p_last] + return np.stack(splited_result) + +def disjoint_find(x, F): + if F[x] == x: + return x + F[x] = disjoint_find(F[x], F) + return F[x] + +def disjoint_merge(x, y, F): + x = disjoint_find(x, F) + y = disjoint_find(y, F) + if x == y: + return False + F[y] = x + return True + + +def merge_polygons(polygons, merge_map): + + def merge_two_polygon(p1, p2): + p2 = Polygon(p2) + merged = p1.union(p2) + return merged + + merge_map = [disjoint_find(x, merge_map) for x in range(len(merge_map))] + merge_map = np.array(merge_map) + final_polygons = [] + + for i in np.unique(merge_map): + merge_idx = np.where(merge_map == i)[0] + if len(merge_idx) > 0: + merged = Polygon(polygons[merge_idx[0]]) + for j in range(1, len(merge_idx)): + merged = merge_two_polygon(merged, polygons[merge_idx[j]]) + x, y = merged.exterior.coords.xy + final_polygons.append(np.stack([x, y], axis=1).astype(int)) + + return final_polygons + + diff --git a/contrib/TextSnake/util/Deteval.py b/contrib/TextSnake/util/Deteval.py new file mode 100644 index 000000000..ba8b10bf7 --- /dev/null +++ b/contrib/TextSnake/util/Deteval.py @@ -0,0 +1,294 @@ +# modified from https://github.com/cs-chan/Total-Text-Dataset/blob/master/Evaluation_Protocol/Python_scripts/Deteval.py +import numpy as np + +from os import listdir +from scipy import io +from polygon_wrapper import iod +from polygon_wrapper import area_of_intersection +from polygon_wrapper import area +import argparse +from tqdm import tqdm + +parser = argparse.ArgumentParser() + +# basic opts +parser.add_argument('exp_name', type=str, help='Model output directory') +parser.add_argument('--tr', type=float, default=0.7, help='Recall threshold') +parser.add_argument('--tp', type=float, default=0.6, help='Precision threshold') +args = parser.parse_args() + +""" +Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')' +""" + +input_dir = 'output/{}'.format(args.exp_name) +gt_dir = 'data/total-text/gt/Test' +fid_path = 'Python_Pascal_result_last_check.txt' + +allInputs = listdir(input_dir) + + +def input_reading_mod(input_dir, input): + """This helper reads input from txt files""" + with open('%s/%s' % (input_dir, input), 'r') as input_fid: + pred = input_fid.readlines() + det = [x.strip('\n') for x in pred] + return det + + +def gt_reading_mod(gt_dir, gt_id): + """This helper reads groundtruths from mat files""" + gt_id = gt_id.split('.')[0] + gt = io.loadmat('%s/poly_gt_%s.mat' % (gt_dir, gt_id)) + gt = gt['polygt'] + return gt + + +def detection_filtering(detections, groundtruths, threshold=0.5): + for gt_id, gt in enumerate(groundtruths): + if (gt[5] == '#') and (gt[1].shape[1] > 1): + gt_x = list(map(int, np.squeeze(gt[1]))) + gt_y = list(map(int, np.squeeze(gt[3]))) + for det_id, detection in enumerate(detections): + detection = detection.split(',') + detection = list(map(int, detection[0:-1])) + det_y = detection[0::2] + det_x = detection[1::2] + det_gt_iou = iod(det_x, det_y, gt_x, gt_y) + if det_gt_iou > threshold: + detections[det_id] = [] + + detections[:] = [item for item in detections if item != []] + return detections + +def sigma_calculation(det_x, det_y, gt_x, gt_y): + """ + sigma = inter_area / gt_area + """ + return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(gt_x, gt_y)), 2) + +def tau_calculation(det_x, det_y, gt_x, gt_y): + """ + tau = inter_area / det_area + """ + return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / area(det_x, det_y)), 2) + + + + +##############################Initialization################################### +global_tp = 0 +global_fp = 0 +global_fn = 0 +global_sigma = [] +global_tau = [] +tr = args.tr +tp = args.tp +fsc_k = 0.8 +k = 2 +############################################################################### + +for i, input_id in enumerate(tqdm(allInputs)): + if (input_id != '.DS_Store'): + # print(i, input_id) + detections = input_reading_mod(input_dir, input_id) + groundtruths = gt_reading_mod(gt_dir, input_id) + detections = detection_filtering(detections, groundtruths) # filters detections overlapping with DC area + dc_id = np.where(groundtruths[:, 5] == '#') + groundtruths = np.delete(groundtruths, (dc_id), (0)) + + local_sigma_table = np.zeros((groundtruths.shape[0], len(detections))) + local_tau_table = np.zeros((groundtruths.shape[0], len(detections))) + for gt_id, gt in enumerate(groundtruths): + if len(detections) > 0: + for det_id, detection in enumerate(detections): + detection = detection.split(',') + detection = list(map(int, detection[:-2])) + det_y = detection[0::2] + det_x = detection[1::2] + gt_x = list(map(int, np.squeeze(gt[1]))) + gt_y = list(map(int, np.squeeze(gt[3]))) + + local_sigma_table[gt_id, det_id] = sigma_calculation(det_x, det_y, gt_x, gt_y) + local_tau_table[gt_id, det_id] = tau_calculation(det_x, det_y, gt_x, gt_y) + global_sigma.append(local_sigma_table) + global_tau.append(local_tau_table) + +global_accumulative_recall = 0 +global_accumulative_precision = 0 +total_num_gt = 0 +total_num_det = 0 + +def one_to_one(local_sigma_table, local_tau_table, local_accumulative_recall, + local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, + gt_flag, det_flag): + for gt_id in range(num_gt): + qualified_sigma_candidates = np.where(local_sigma_table[gt_id, :] > tr) + num_qualified_sigma_candidates = qualified_sigma_candidates[0].shape[0] + qualified_tau_candidates = np.where(local_tau_table[gt_id, :] > tp) + num_qualified_tau_candidates = qualified_tau_candidates[0].shape[0] + + + if (num_qualified_sigma_candidates == 1) and (num_qualified_tau_candidates == 1): + global_accumulative_recall = global_accumulative_recall + 1.0 + global_accumulative_precision = global_accumulative_precision + 1.0 + local_accumulative_recall = local_accumulative_recall + 1.0 + local_accumulative_precision = local_accumulative_precision + 1.0 + + gt_flag[0, gt_id] = 1 + matched_det_id = np.where(local_sigma_table[gt_id, :] > tr) + det_flag[0, matched_det_id] = 1 + return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag + +def one_to_many(local_sigma_table, local_tau_table, local_accumulative_recall, + local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, + gt_flag, det_flag): + for gt_id in range(num_gt): + #skip the following if the groundtruth was matched + if gt_flag[0, gt_id] > 0: + continue + + non_zero_in_sigma = np.where(local_sigma_table[gt_id, :] > 0) + num_non_zero_in_sigma = non_zero_in_sigma[0].shape[0] + + if num_non_zero_in_sigma >= k: + ####search for all detections that overlaps with this groundtruth + qualified_tau_candidates = np.where((local_tau_table[gt_id, :] >= tp) & (det_flag[0, :] == 0)) + num_qualified_tau_candidates = qualified_tau_candidates[0].shape[0] + + if num_qualified_tau_candidates == 1: + if ((local_tau_table[gt_id, qualified_tau_candidates] >= tp) and (local_sigma_table[gt_id, qualified_tau_candidates] >= tr)): + #became an one-to-one case + global_accumulative_recall = global_accumulative_recall + 1.0 + global_accumulative_precision = global_accumulative_precision + 1.0 + local_accumulative_recall = local_accumulative_recall + 1.0 + local_accumulative_precision = local_accumulative_precision + 1.0 + + gt_flag[0, gt_id] = 1 + det_flag[0, qualified_tau_candidates] = 1 + elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates]) >= tr): + gt_flag[0, gt_id] = 1 + det_flag[0, qualified_tau_candidates] = 1 + + global_accumulative_recall = global_accumulative_recall + fsc_k + global_accumulative_precision = global_accumulative_precision + num_qualified_tau_candidates * fsc_k + + local_accumulative_recall = local_accumulative_recall + fsc_k + local_accumulative_precision = local_accumulative_precision + num_qualified_tau_candidates * fsc_k + + return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag + +def many_to_many(local_sigma_table, local_tau_table, local_accumulative_recall, + local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, + gt_flag, det_flag): + for det_id in range(num_det): + # skip the following if the detection was matched + if det_flag[0, det_id] > 0: + continue + + non_zero_in_tau = np.where(local_tau_table[:, det_id] > 0) + num_non_zero_in_tau = non_zero_in_tau[0].shape[0] + + if num_non_zero_in_tau >= k: + ####search for all detections that overlaps with this groundtruth + qualified_sigma_candidates = np.where((local_sigma_table[:, det_id] >= tp) & (gt_flag[0, :] == 0)) + num_qualified_sigma_candidates = qualified_sigma_candidates[0].shape[0] + + if num_qualified_sigma_candidates == 1: + if ((local_tau_table[qualified_sigma_candidates, det_id] >= tp) and (local_sigma_table[qualified_sigma_candidates, det_id] >= tr)): + #became an one-to-one case + global_accumulative_recall = global_accumulative_recall + 1.0 + global_accumulative_precision = global_accumulative_precision + 1.0 + local_accumulative_recall = local_accumulative_recall + 1.0 + local_accumulative_precision = local_accumulative_precision + 1.0 + + gt_flag[0, qualified_sigma_candidates] = 1 + det_flag[0, det_id] = 1 + elif (np.sum(local_tau_table[qualified_sigma_candidates, det_id]) >= tp): + det_flag[0, det_id] = 1 + gt_flag[0, qualified_sigma_candidates] = 1 + + global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k + global_accumulative_precision = global_accumulative_precision + fsc_k + + local_accumulative_recall = local_accumulative_recall + num_qualified_sigma_candidates * fsc_k + local_accumulative_precision = local_accumulative_precision + fsc_k + return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag + +fid = open(fid_path, 'w') + +for idx in range(len(global_sigma)): + + local_sigma_table = global_sigma[idx] + local_tau_table = global_tau[idx] + + num_gt = local_sigma_table.shape[0] + num_det = local_sigma_table.shape[1] + + total_num_gt = total_num_gt + num_gt + total_num_det = total_num_det + num_det + + local_accumulative_recall = 0 + local_accumulative_precision = 0 + gt_flag = np.zeros((1, num_gt)) + det_flag = np.zeros((1, num_det)) + + #######first check for one-to-one case########## + local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ + gt_flag, det_flag = one_to_one(local_sigma_table, local_tau_table, + local_accumulative_recall, local_accumulative_precision, + global_accumulative_recall, global_accumulative_precision, + gt_flag, det_flag) + + #######then check for one-to-many case########## + local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ + gt_flag, det_flag = one_to_many(local_sigma_table, local_tau_table, + local_accumulative_recall, local_accumulative_precision, + global_accumulative_recall, global_accumulative_precision, + gt_flag, det_flag) + + #######then check for many-to-many case########## + local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ + gt_flag, det_flag = many_to_many(local_sigma_table, local_tau_table, + local_accumulative_recall, local_accumulative_precision, + global_accumulative_recall, global_accumulative_precision, + gt_flag, det_flag) + + try: + local_precision = local_accumulative_precision / num_det + except ZeroDivisionError: + local_precision = 0 + + try: + local_recall = local_accumulative_recall / num_gt + except ZeroDivisionError: + local_recall = 0 + + str_write = ('%s: Precision = %.4f - Recall = %.4f\n' % (allInputs[idx], local_precision, local_recall)) + fid.write(str_write) +fid.close() + +try: + recall = global_accumulative_recall / total_num_gt +except ZeroDivisionError: + recall = 0 + +try: + precision = global_accumulative_precision / total_num_det +except ZeroDivisionError: + precision = 0 + +try: + f_score = 2*precision*recall/(precision+recall) +except ZeroDivisionError: + f_score = 0 + +fid = open(fid_path, 'a') +str_write = ('Precision = %.4f - Recall = %.4f - Fscore = %.4f\n' % (precision, recall, f_score)) +fid.write(str_write) +fid.close() + +print('Input: {}'.format(input_dir)) +print('Config: tr: {} - tp: {}'.format(tr, tp)) +print(str_write) +print('Done.') \ No newline at end of file diff --git a/contrib/TextSnake/util/__init__.py b/contrib/TextSnake/util/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/contrib/TextSnake/util/augmentation.py b/contrib/TextSnake/util/augmentation.py new file mode 100644 index 000000000..87fefd44c --- /dev/null +++ b/contrib/TextSnake/util/augmentation.py @@ -0,0 +1,339 @@ +import numpy as np +import math +import cv2 +import numpy.random as random + + +class Compose(object): + """Composes several augmentations together. + Args: + transforms (List[Transform]): list of transforms to compose. + Example: + >>> augmentations.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.ToTensor(), + >>> ]) + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img, pts=None): + for t in self.transforms: + img, pts = t(img, pts) + return img, pts + + +class RandomMirror(object): + def __init__(self): + pass + + def __call__(self, image, polygons=None): + if np.random.randint(2): + image = np.ascontiguousarray(image[:, ::-1]) + _, width, _ = image.shape + for polygon in polygons: + polygon.points[:, 0] = width - polygon.points[:, 0] + return image, polygons + + +class AugmentColor(object): + def __init__(self): + self.U = np.array([[-0.56543481, 0.71983482, 0.40240142], + [-0.5989477, -0.02304967, -0.80036049], + [-0.56694071, -0.6935729, 0.44423429]], dtype=np.float32) + self.EV = np.array([1.65513492, 0.48450358, 0.1565086], dtype=np.float32) + self.sigma = 0.1 + self.color_vec = None + + def __call__(self, img, polygons=None): + color_vec = self.color_vec + if self.color_vec is None: + if not self.sigma > 0.0: + color_vec = np.zeros(3, dtype=np.float32) + else: + color_vec = np.random.normal(0.0, self.sigma, 3) + + alpha = color_vec.astype(np.float32) * self.EV + noise = np.dot(self.U, alpha.T) * 255 + return np.clip(img + noise[np.newaxis, np.newaxis, :], 0, 255), polygons + + +class RandomContrast(object): + def __init__(self, lower=0.5, upper=1.5): + self.lower = lower + self.upper = upper + assert self.upper >= self.lower, "contrast upper must be >= lower." + assert self.lower >= 0, "contrast lower must be non-negative." + + # expects float image + def __call__(self, image, polygons=None): + if random.randint(2): + alpha = random.uniform(self.lower, self.upper) + image *= alpha + return np.clip(image, 0, 255), polygons + + +class RandomBrightness(object): + def __init__(self, delta=32): + assert delta >= 0.0 + assert delta <= 255.0 + self.delta = delta + + def __call__(self, image, polygons=None): + image = image.astype(np.float32) + if random.randint(2): + delta = random.uniform(-self.delta, self.delta) + image += delta + return np.clip(image, 0, 255), polygons + + +class Rotate(object): + def __init__(self, up=30): + self.up = up + + def rotate(self, center, pt, theta): # 二维图形学的旋转 + xr, yr = center + yr = -yr + x, y = pt[:, 0], pt[:, 1] + y = -y + + theta = theta / 360 * 2 * math.pi + cos = math.cos(theta) + sin = math.sin(theta) + + _x = xr + (x - xr) * cos - (y - yr) * sin + _y = yr + (x - xr) * sin + (y - yr) * cos + + return _x, -_y + + def __call__(self, img, polygons=None): + if np.random.randint(2): + return img, polygons + angle = np.random.uniform(-self.up, self.up) # + rows, cols = img.shape[0:2] + M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1.0) + img = cv2.warpAffine(img, M, (cols, rows), borderValue=[0, 0, 0]) + center = cols / 2.0, rows / 2.0 + if polygons is not None: + for polygon in polygons: + x, y = self.rotate(center, polygon.points, angle) + pts = np.vstack([x, y]).T + polygon.points = pts + return img, polygons + +class SquarePadding(object): + + def __call__(self, image, pts=None): + + H, W, _ = image.shape + + if H == W: + return image, pts + + padding_size = max(H, W) + expand_image = np.zeros((padding_size, padding_size, 3), dtype=image.dtype) + + if H > W: + y0, x0 = 0, (H - W) // 2 + else: + y0, x0 = (W - H) // 2, 0 + if pts is not None: + pts[:, 0] += x0 + pts[:, 1] += y0 + + expand_image[y0:y0+H, x0:x0+W] = image + image = expand_image + + return image, pts + + +class Padding(object): + + def __init__(self, fill=0): + self.fill = fill + + def __call__(self, image, polygons=None): + if np.random.randint(2): + return image, polygons + + height, width, depth = image.shape + ratio = np.random.uniform(1, 2) + left = np.random.uniform(0, width * ratio - width) + top = np.random.uniform(0, height * ratio - height) + + expand_image = np.zeros( + (int(height * ratio), int(width * ratio), depth), + dtype=image.dtype) + expand_image[:, :, :] = self.fill + expand_image[int(top):int(top + height), + int(left):int(left + width)] = image + image = expand_image + + if polygons is not None: + for polygon in polygons: + polygon.points[:, 0] = polygon.points[:, 0] + left + polygon.points[:, 1] = polygon.points[:, 1] + top + return image, polygons + + +class RandomResizedCrop(object): + def __init__(self, size, scale=(0.3, 1.0), ratio=(3. / 4., 4. / 3.)): + self.size = (size, size) + self.scale = scale + self.ratio = ratio + + @staticmethod + def get_params(img, scale, ratio): + """Get parameters for ``crop`` for a random sized crop. + + Args: + img (PIL Image): Image to be cropped. + scale (tuple): range of size of the origin size cropped + ratio (tuple): range of aspect ratio of the origin aspect ratio cropped + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for a random + sized crop. + """ + for attempt in range(10): + area = img.shape[0] * img.shape[1] + target_area = np.random.uniform(*scale) * area + aspect_ratio = np.random.uniform(*ratio) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if np.random.random() < 0.5: + w, h = h, w + + if h < img.shape[0] and w < img.shape[1]: + j = np.random.randint(0, img.shape[1] - w) + i = np.random.randint(0, img.shape[0] - h) + return i, j, h, w + + # Fallback + w = min(img.shape[0], img.shape[1]) + i = (img.shape[0] - w) // 2 + j = (img.shape[1] - w) // 2 + return i, j, w, w + + def __call__(self, image, pts=None): + i, j, h, w = self.get_params(image, self.scale, self.ratio) + cropped = image[i:i + h, j:j + w, :] + pts = pts.copy() + mask = (pts[:, 1] >= i) * (pts[:, 0] >= j) * (pts[:, 1] < (i+h)) * (pts[:, 0] < (j+w)) + pts[~mask, 2] = -1 + scales = np.array([self.size[0]/w, self.size[1]/h]) + pts[:, :2] -= np.array([j, i]) + pts[:, :2] = (pts[:, :2] * scales) + img = cv2.resize(cropped, self.size) + return img, pts + + +class RandomResizedLimitCrop(object): + def __init__(self, size, scale=(0.3, 1.0), ratio=(3. / 4., 4. / 3.)): + self.size = (size, size) + self.scale = scale + self.ratio = ratio + + @staticmethod + def get_params(img, scale, ratio): + for attempt in range(10): + area = img.shape[0] * img.shape[1] + target_area = np.random.uniform(*scale) * area + aspect_ratio = np.random.uniform(*ratio) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + if np.random.random() < 0.5: + w, h = h, w + + if h < img.shape[0] and w < img.shape[1]: + j = np.random.randint(0, img.shape[1] - w) + i = np.random.randint(0, img.shape[0] - h) + return i, j, h, w + + # Fallback + w = min(img.shape[0], img.shape[1]) + i = (img.shape[0] - w) // 2 + j = (img.shape[1] - w) // 2 + return i, j, w, w + + def __call__(self, image, polygons=None): + i, j, h, w = self.get_params(image, self.scale, self.ratio) + + cropped = image[i:i + h, j:j + w, :] + scales = np.array([self.size[0] / w, self.size[1] / h]) + if polygons is not None: + for polygon in polygons: + polygon.points[:, 0] = (polygon.points[:, 0] - j) * scales[0] + polygon.points[:, 1] = (polygon.points[:, 1] - i) * scales[1] + + img = cv2.resize(cropped, self.size) + return img, polygons + + +class Normalize(object): + def __init__(self, mean, std): + self.mean = np.array(mean) + self.std = np.array(std) + + def __call__(self, image, polygons=None): + image = image.astype(np.float32) + image /= 255.0 + image -= self.mean + image /= self.std + return image, polygons + + +class Resize(object): + def __init__(self, size=256): + self.size = size + + def __call__(self, image, polygons=None): + h, w, _ = image.shape + image = cv2.resize(image, (self.size, + self.size)) + scales = np.array([self.size / w, self.size / h]) + + if polygons is not None: + for polygon in polygons: + polygon.points = polygon.points * scales + + return image, polygons + + +class Augmentation(object): + + def __init__(self, size, mean, std): + self.size = size + self.mean = mean + self.std = std + self.augmentation = Compose([ + # Resize(size), + Padding(), + RandomResizedLimitCrop(size=size, scale=(0.24, 1.0), ratio=(0.33, 3)), + # RandomBrightness(), + # RandomContrast(), + RandomMirror(), + Rotate(), + Normalize(mean, std) + ]) + + def __call__(self, image, polygons=None): + return self.augmentation(image, polygons) + + +class BaseTransform(object): + def __init__(self, size, mean, std): + self.size = size + self.mean = mean + self.std = std + self.augmentation = Compose([ + Resize(size), + Normalize(mean, std) + ]) + + def __call__(self, image, polygons=None): + return self.augmentation(image, polygons) \ No newline at end of file diff --git a/contrib/TextSnake/util/config.py b/contrib/TextSnake/util/config.py new file mode 100644 index 000000000..7ad54a41f --- /dev/null +++ b/contrib/TextSnake/util/config.py @@ -0,0 +1,60 @@ +from easydict import EasyDict +import torch + +config = EasyDict() + +# dataloader jobs number +config.num_workers = 4 + +# batch_size +config.batch_size = 4 + +# training epoch number +config.max_epoch = 200 + +config.start_epoch = 0 + +# learning rate +config.lr = 1e-4 + +# using GPU +config.cuda = True + +config.n_disk = 15 + +config.output_dir = 'output' + +config.input_size = 512 + +# max polygon per image +config.max_annotation = 200 + +# max point per polygon +config.max_points = 20 + +# use hard examples (annotated as '#') +config.use_hard = True + +# demo tr threshold +config.tr_thresh = 0.6 + +# demo tcl threshold +config.tcl_thresh = 0.4 + +# expand ratio in post processing +config.post_process_expand = 0.3 + +# merge joined text instance when predicting +config.post_process_merge = False + +def update_config(config, extra_config): + for k, v in vars(extra_config).items(): + config[k] = v + config.device = torch.device('cuda') if config.cuda else torch.device('cpu') + + +def print_config(config): + print('==========Options============') + for k, v in config.items(): + print('{}: {}'.format(k, v)) + print('=============End=============') diff --git a/contrib/TextSnake/util/detection.py b/contrib/TextSnake/util/detection.py new file mode 100644 index 000000000..3ba6bc316 --- /dev/null +++ b/contrib/TextSnake/util/detection.py @@ -0,0 +1,325 @@ +import numpy as np +import cv2 +import torch +from util.config import config as cfg +from util.misc import fill_hole, regularize_sin_cos +from util.misc import norm2, vector_cos, vector_sin +from util.misc import disjoint_merge, merge_polygons + + +class TextDetector(object): + + def __init__(self, tr_thresh=0.4, tcl_thresh=0.6): + self.tr_thresh = tr_thresh + self.tcl_thresh = tcl_thresh + + + def find_innerpoint(self, cont): + """ + generate an inner point of input polygon using mean of x coordinate by: + 1. calculate mean of x coordinate(xmean) + 2. calculate maximum and minimum of y coordinate(ymax, ymin) + 3. iterate for each y in range (ymin, ymax), find first segment in the polygon + 4. calculate means of segment + :param cont: input polygon + :return: + """ + + xmean = cont[:, 0, 0].mean() + ymin, ymax = cont[:, 0, 1].min(), cont[:, 0, 1].max() + found = False + found_y = [] + # + for i in np.arange(ymin - 1, ymax + 1, 0.5): + # if in_poly > 0, (xmean, i) is in `cont` + in_poly = cv2.pointPolygonTest(cont, (int(xmean), int(i)), False) + if in_poly > 0: + found = True + found_y.append(i) + # first segment found + if in_poly < 0 and found: + break + + if len(found_y) > 0: + return (xmean, np.array(found_y).mean()) + + # if cannot find using above method, try each point's neighbor + else: + for p in range(len(cont)): + point = cont[p, 0] + for i in range(-1, 2, 1): + for j in range(-1, 2, 1): + test_pt = point + [i, j] + if cv2.pointPolygonTest(cont, (int(test_pt[0]), int(test_pt[1])), False) > 0: + return test_pt + + def in_contour(self, cont, point): + """ + utility function for judging whether `point` is in the `contour` + :param cont: cv2.findCountour result + :param point: 2d coordinate (x, y) + :return: + """ + x, y = point + return cv2.pointPolygonTest(cont, (int(x), int(y)), False) > 0 + + def centerlize(self, x, y, H, W, tangent_cos, tangent_sin, tcl_contour, stride=1.): + """ + centralizing (x, y) using tangent line and normal line. + :return: coordinate after centralizing + """ + + # calculate normal sin and cos + normal_cos = -tangent_sin + normal_sin = tangent_cos + + # find upward + _x, _y = x, y + while self.in_contour(tcl_contour, (_x, _y)): + _x = _x + normal_cos * stride + _y = _y + normal_sin * stride + if int(_x) >= W or int(_x) < 0 or int(_y) >= H or int(_y) < 0: + break + end1 = np.array([_x, _y]) + + # find downward + _x, _y = x, y + while self.in_contour(tcl_contour, (_x, _y)): + _x = _x - normal_cos * stride + _y = _y - normal_sin * stride + if int(_x) >= W or int(_x) < 0 or int(_y) >= H or int(_y) < 0: + break + end2 = np.array([_x, _y]) + + # centralizing + center = (end1 + end2) / 2 + + return center + + def mask_to_tcl(self, pred_sin, pred_cos, pred_radii, tcl_contour, init_xy, direct=1): + """ + Iteratively find center line in tcl mask using initial point (x, y) + :param pred_sin: predict sin map + :param pred_cos: predict cos map + :param tcl_contour: predict tcl contour + :param init_xy: initial (x, y) + :param direct: direction [-1|1] + :return: + """ + + H, W = pred_sin.shape + x_shift, y_shift = init_xy + + result = [] + max_attempt = 200 + attempt = 0 + + while self.in_contour(tcl_contour, (x_shift, y_shift)): + + attempt += 1 + + sin = pred_sin[int(y_shift), int(x_shift)] + cos = pred_cos[int(y_shift), int(x_shift)] + x_c, y_c = self.centerlize(x_shift, y_shift, H, W, cos, sin, tcl_contour) + + sin_c = pred_sin[int(y_c), int(x_c)] + cos_c = pred_cos[int(y_c), int(x_c)] + radii_c = pred_radii[int(y_c), int(x_c)] + + result.append(np.array([x_c, y_c, radii_c])) + + # shift stride + for shrink in [1/2., 1/4., 1/8., 1/16., 1/32.]: + t = shrink * radii_c # stride = +/- 0.5 * [sin|cos](theta), if new point is outside, shrink it until shrink < 1/32., hit ends + x_shift_pos = x_c + cos_c * t * direct # positive direction + y_shift_pos = y_c + sin_c * t * direct # positive direction + x_shift_neg = x_c - cos_c * t * direct # negative direction + y_shift_neg = y_c - sin_c * t * direct # negative direction + + # if first point, select positive direction shift + if len(result) == 1: + x_shift, y_shift = x_shift_pos, y_shift_pos + else: + # else select point further with second last point + dist_pos = norm2(result[-2][:2] - (x_shift_pos, y_shift_pos)) + dist_neg = norm2(result[-2][:2] - (x_shift_neg, y_shift_neg)) + if dist_pos > dist_neg: + x_shift, y_shift = x_shift_pos, y_shift_pos + else: + x_shift, y_shift = x_shift_neg, y_shift_neg + # if out of bounds, skip + if int(x_shift) >= W or int(x_shift) < 0 or int(y_shift) >= H or int(y_shift) < 0: + continue + # found an inside point + if self.in_contour(tcl_contour, (x_shift, y_shift)): + break + # if out of bounds, break + if int(x_shift) >= W or int(x_shift) < 0 or int(y_shift) >= H or int(y_shift) < 0: + break + if attempt > max_attempt: + break + return np.array(result) + + def build_tcl(self, tcl_pred, sin_pred, cos_pred, radii_pred): + """ + Find TCL's center points and radii of each point + :param tcl_pred: output tcl mask, (512, 512) + :param sin_pred: output sin map, (512, 512) + :param cos_pred: output cos map, (512, 512) + :param radii_pred: output radii map, (512, 512) + :return: (list), tcl array: (n, 3), 3 denotes (x, y, radii) + """ + all_tcls = [] + + # find disjoint regions + tcl_mask = fill_hole(tcl_pred) + tcl_contours, _ = cv2.findContours(tcl_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + for cont in tcl_contours: + + # find an inner point of polygon + init = self.find_innerpoint(cont) + + if init is None: + continue + + x_init, y_init = init + + # find left/right tcl + tcl_left = self.mask_to_tcl(sin_pred, cos_pred, radii_pred, cont, (x_init, y_init), direct=1) + tcl_right = self.mask_to_tcl(sin_pred, cos_pred, radii_pred, cont, (x_init, y_init), direct=-1) + # concat + tcl = np.concatenate([tcl_left[::-1][:-1], tcl_right]) + all_tcls.append(tcl) + + return all_tcls + + def detect_contours(self, image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred): + """ + Input: FCN output, Output: text detection after post-processing + + :param image: (np.array) input image (3, H, W) + :param tr_pred: (np.array), text region prediction, (2, H, W) + :param tcl_pred: (np.array), text center line prediction, (2, H, W) + :param sin_pred: (np.array), sin prediction, (H, W) + :param cos_pred: (np.array), cos line prediction, (H, W) + :param radii_pred: (np.array), radii prediction, (H, W) + + :return: + (list), tcl array: (n, 3), 3 denotes (x, y, radii) + """ + + # thresholding + tr_pred_mask = tr_pred[1] > self.tr_thresh + tcl_pred_mask = tcl_pred[1] > self.tcl_thresh + + # multiply TR and TCL + tcl_mask = tcl_pred_mask * tr_pred_mask + + # regularize + sin_pred, cos_pred = regularize_sin_cos(sin_pred, cos_pred) + + # find tcl in each predicted mask + detect_result = self.build_tcl(tcl_mask, sin_pred, cos_pred, radii_pred) + + return self.postprocessing(image, detect_result, tr_pred_mask) + + def detect(self, image): + """ + + :param image: + :return: + """ + # get model output + output = self.model(image) + image = image[0].data.cpu().numpy() + tr_pred = output[0, 0:2].softmax(dim=0).data.cpu().numpy() + tcl_pred = output[0, 2:4].softmax(dim=0).data.cpu().numpy() + sin_pred = output[0, 4].data.cpu().numpy() + cos_pred = output[0, 5].data.cpu().numpy() + radii_pred = output[0, 6].data.cpu().numpy() + + # find text contours + contours = self.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) + + output = { + 'image': image, + 'tr': tr_pred, + 'tcl': tcl_pred, + 'sin': sin_pred, + 'cos': cos_pred, + 'radii': radii_pred + } + return contours, output + + def merge_contours(self, all_contours): + """ Merge overlapped instances to one instance with disjoint find / merge algorithm + :param all_contours: (list(np.array)), each with (n_points, 2) + :return: (list(np.array)), each with (n_points, 2) + """ + + def stride(disks, other_contour, left, step=0.3): + if len(disks) < 2: + return False + if left: + last_point, before_point = disks[:2] + else: + before_point, last_point = disks[-2:] + radius = last_point[2] + cos = vector_cos(last_point[:2] - before_point[:2]) + sin = vector_sin(last_point[:2] - before_point[:2]) + new_point = last_point[:2] + radius * step * np.array([cos, sin]) + return self.in_contour(other_contour, new_point) + + def can_merge(disks, other_contour): + return stride(disks, other_contour, left=True) or stride(disks, other_contour, left=False) + + F = list(range(len(all_contours))) + for i in range(len(all_contours)): + cont_i, disk_i = all_contours[i] + for j in range(i + 1, len(all_contours)): + cont_j, disk_j = all_contours[j] + if can_merge(disk_i, cont_j): + disjoint_merge(i, j, F) + + merged_polygons = merge_polygons([cont for cont, disks in all_contours], F) + return merged_polygons + + def postprocessing(self, image, detect_result, tr_pred_mask): + """ convert geometric info(center_x, center_y, radii) into contours + :param image: (np.array), input image + :param result: (list), each with (n, 3), 3 denotes (x, y, radii) + :param tr_pred_mask: (np.array), predicted text area mask, each with shape (H, W) + :return: (np.ndarray list), polygon format contours + """ + + all_conts = [] + for disk in detect_result: + reconstruct_mask = np.zeros(image.shape[1:], dtype=np.uint8) + for x, y, r in disk: + # expand radius for higher recall + if cfg.post_process_expand > 0.0: + r *= (1. + cfg.post_process_expand) + cv2.circle(reconstruct_mask, (int(x), int(y)), max(1, int(r)), 1, -1) + + # according to the paper, at least half of pixels in the reconstructed text area should be classified as TR + if (reconstruct_mask * tr_pred_mask).sum() < reconstruct_mask.sum() * 0.5: + continue + + # filter out too small objects + conts, _ = cv2.findContours(reconstruct_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + if len(conts) > 1: + conts = list(conts) + conts.sort(key=lambda x: cv2.contourArea(x), reverse=True) + conts = tuple(conts) + elif not conts: + continue + all_conts.append((conts[0][:, 0, :], disk)) + + # merge joined instances + if cfg.post_process_merge: + all_conts = self.merge_contours(all_conts) + else: + all_conts = [cont[0] for cont in all_conts] + + return all_conts \ No newline at end of file diff --git a/contrib/TextSnake/util/misc.py b/contrib/TextSnake/util/misc.py new file mode 100644 index 000000000..c4d2d25db --- /dev/null +++ b/contrib/TextSnake/util/misc.py @@ -0,0 +1,245 @@ +import numpy as np +import errno +import os +import cv2 +from shapely.geometry import Polygon +from util.config import config as cfg + + +def to_device(*tensors): + if len(tensors) < 2: + return tensors[0].to(cfg.device) + return (t.to(cfg.device) for t in tensors) + + +def mkdirs(newdir): + """ + make directory with parent path + :param newdir: target path + """ + try: + if not os.path.exists(newdir): + os.makedirs(newdir) + except OSError as err: + # Reraise the error unless it's about an already existing directory + if err.errno != errno.EEXIST or not os.path.isdir(newdir): + raise + +def rescale_result(image, contours, H, W): + ori_H, ori_W = image.shape[:2] + image = cv2.resize(image, (W, H)) + for cont in contours: + cont[:, 0] = (cont[:, 0] * W / ori_W).astype(int) + cont[:, 1] = (cont[:, 1] * H / ori_H).astype(int) + return image, contours + + +def fill_hole(input_mask): + h, w = input_mask.shape + canvas = np.zeros((h + 2, w + 2), np.uint8) + canvas[1:h + 1, 1:w + 1] = input_mask.copy() + + mask = np.zeros((h + 4, w + 4), np.uint8) + + cv2.floodFill(canvas, mask, (0, 0), 1) + canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool_) + + return (~canvas | input_mask.astype(np.uint8)) + + +def regularize_sin_cos(sin, cos): + # regularization + scale = np.sqrt(1.0 / (sin ** 2 + cos ** 2)) + return sin * scale, cos * scale + + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + +def norm2(x, axis=None): + if axis: + return np.sqrt(np.sum(x ** 2, axis=axis)) + return np.sqrt(np.sum(x ** 2)) + +def cos(p1, p2): + return (p1 * p2).sum() / (norm2(p1) * norm2(p2)) + +def vector_sin(v): + assert len(v) == 2 + # sin = y / (sqrt(x^2 + y^2)) + l = np.sqrt(v[0] ** 2 + v[1] ** 2) + return v[1] / l + +def vector_cos(v): + assert len(v) == 2 + # cos = x / (sqrt(x^2 + y^2)) + l = np.sqrt(v[0] ** 2 + v[1] ** 2) + return v[0] / l + +def find_bottom(pts): + + if len(pts) > 4: + e = np.concatenate([pts, pts[:3]]) + candidate = [] + for i in range(1, len(pts) + 1): + v_prev = e[i] - e[i - 1] + v_next = e[i + 2] - e[i + 1] + if cos(v_prev, v_next) < -0.7: + candidate.append((i % len(pts), (i + 1) % len(pts), norm2(e[i] - e[i + 1]))) + + if len(candidate) != 2 or candidate[0][0] == candidate[1][1] or candidate[0][1] == candidate[1][0]: + # if candidate number < 2, or two bottom are joined, select 2 farthest edge + mid_list = [] + for i in range(len(pts)): + mid_point = (e[i] + e[(i + 1) % len(pts)]) / 2 + mid_list.append((i, (i + 1) % len(pts), mid_point)) + + dist_list = [] + for i in range(len(pts)): + for j in range(len(pts)): + s1, e1, mid1 = mid_list[i] + s2, e2, mid2 = mid_list[j] + dist = norm2(mid1 - mid2) + dist_list.append((s1, e1, s2, e2, dist)) + bottom_idx = np.argsort([dist for s1, e1, s2, e2, dist in dist_list])[-2:] + bottoms = [dist_list[bottom_idx[0]][:2], dist_list[bottom_idx[1]][:2]] + else: + bottoms = [candidate[0][:2], candidate[1][:2]] + + else: + d1 = norm2(pts[1] - pts[0]) + norm2(pts[2] - pts[3]) + d2 = norm2(pts[2] - pts[1]) + norm2(pts[0] - pts[3]) + bottoms = [(0, 1), (2, 3)] if d1 < d2 else [(1, 2), (3, 0)] + assert len(bottoms) == 2, 'fewer than 2 bottoms' + return bottoms + + +def split_long_edges(points, bottoms): + """ + Find two long edge sequence of and polygon + """ + b1_start, b1_end = bottoms[0] + b2_start, b2_end = bottoms[1] + n_pts = len(points) + + i = b1_end + 1 + long_edge_1 = [] + while (i % n_pts != b2_end): + long_edge_1.append((i - 1, i)) + i = (i + 1) % n_pts + + i = b2_end + 1 + long_edge_2 = [] + while (i % n_pts != b1_end): + long_edge_2.append((i - 1, i)) + i = (i + 1) % n_pts + return long_edge_1, long_edge_2 + + +def find_long_edges(points, bottoms): + b1_start, b1_end = bottoms[0] + b2_start, b2_end = bottoms[1] + n_pts = len(points) + i = (b1_end + 1) % n_pts + long_edge_1 = [] + + while (i % n_pts != b2_end): + start = (i - 1) % n_pts + end = i % n_pts + long_edge_1.append((start, end)) + i = (i + 1) % n_pts + + i = (b2_end + 1) % n_pts + long_edge_2 = [] + while (i % n_pts != b1_end): + start = (i - 1) % n_pts + end = i % n_pts + long_edge_2.append((start, end)) + i = (i + 1) % n_pts + return long_edge_1, long_edge_2 + + +def split_edge_seqence(points, long_edge, n_parts): + + edge_length = [norm2(points[e1] - points[e2]) for e1, e2 in long_edge] + point_cumsum = np.cumsum([0] + edge_length) + total_length = sum(edge_length) + length_per_part = total_length / n_parts + + cur_node = 0 # first point + splited_result = [] + + for i in range(1, n_parts): + cur_end = i * length_per_part + + while(cur_end > point_cumsum[cur_node + 1]): + cur_node += 1 + + e1, e2 = long_edge[cur_node] + e1, e2 = points[e1], points[e2] + + # start_point = points[long_edge[cur_node]] + end_shift = cur_end - point_cumsum[cur_node] + ratio = end_shift / edge_length[cur_node] + new_point = e1 + ratio * (e2 - e1) + # print(cur_end, point_cumsum[cur_node], end_shift, edge_length[cur_node], '=', new_point) + splited_result.append(new_point) + + # add first and last point + p_first = points[long_edge[0][0]] + p_last = points[long_edge[-1][1]] + splited_result = [p_first] + splited_result + [p_last] + return np.stack(splited_result) + +def disjoint_find(x, F): + if F[x] == x: + return x + F[x] = disjoint_find(F[x], F) + return F[x] + +def disjoint_merge(x, y, F): + x = disjoint_find(x, F) + y = disjoint_find(y, F) + if x == y: + return False + F[y] = x + return True + + +def merge_polygons(polygons, merge_map): + + def merge_two_polygon(p1, p2): + p2 = Polygon(p2) + merged = p1.union(p2) + return merged + + merge_map = [disjoint_find(x, merge_map) for x in range(len(merge_map))] + merge_map = np.array(merge_map) + final_polygons = [] + + for i in np.unique(merge_map): + merge_idx = np.where(merge_map == i)[0] + if len(merge_idx) > 0: + merged = Polygon(polygons[merge_idx[0]]) + for j in range(1, len(merge_idx)): + merged = merge_two_polygon(merged, polygons[merge_idx[j]]) + x, y = merged.exterior.coords.xy + final_polygons.append(np.stack([x, y], axis=1).astype(int)) + + return final_polygons + + diff --git a/contrib/TextSnake/util/option.py b/contrib/TextSnake/util/option.py new file mode 100644 index 000000000..ffbcf1b71 --- /dev/null +++ b/contrib/TextSnake/util/option.py @@ -0,0 +1,108 @@ +import argparse +import torch +import os +import torch.backends.cudnn as cudnn + +from datetime import datetime + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + +def arg2str(args): + args_dict = vars(args) + option_str = datetime.now().strftime('%b%d_%H-%M-%S') + '\n' + + for k, v in sorted(args_dict.items()): + option_str += ('{}: {}\n'.format(str(k), str(v))) + + return option_str + +class BaseOptions(object): + + def __init__(self): + + self.parser = argparse.ArgumentParser() + + # basic opts + self.parser.add_argument('exp_name', type=str, help='Experiment name') + self.parser.add_argument('--net', default='vgg', type=str, choices=['vgg', 'resnet'], help='Network architecture') + self.parser.add_argument('--dataset', default='total-text', type=str, choices=['synth-text', 'total-text'], help='Dataset name') + self.parser.add_argument('--resume', default=None, type=str, help='Path to target resume checkpoint') + self.parser.add_argument('--num_workers', default=8, type=int, help='Number of workers used in dataloading') + self.parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') + self.parser.add_argument('--mgpu', action='store_true', help='Use multi-gpu to train model') + self.parser.add_argument('--save_dir', default='./save/', help='Path to save checkpoint models') + self.parser.add_argument('--vis_dir', default='./vis/', help='Path to save visualization images') + self.parser.add_argument('--log_dir', default='./logs/', help='Path to tensorboard log') + self.parser.add_argument('--loss', default='CrossEntropyLoss', type=str, help='Training Loss') + self.parser.add_argument('--input_channel', default=1, type=int, help='number of input channels' ) + self.parser.add_argument('--pretrain', default=False, type=str2bool, help='Pretrained AutoEncoder model') + self.parser.add_argument('--verbose', '-v', default=True, type=str2bool, help='Whether to output debug info') + self.parser.add_argument('--viz', action='store_true', help='Whether to output debug info') + + # train opts + self.parser.add_argument('--start_iter', default=0, type=int, help='Begin counting iterations starting from this value (should be used with resume)') + self.parser.add_argument('--max_epoch', default=200, type=int, help='Max epochs') + self.parser.add_argument('--lr', '--learning-rate', default=1e-4, type=float, help='initial learning rate') + self.parser.add_argument('--lr_adjust', default='fix', choices=['fix', 'poly'], type=str, help='Learning Rate Adjust Strategy') + self.parser.add_argument('--stepvalues', default=[], nargs='+', type=int, help='# of iter to change lr') + self.parser.add_argument('--weight_decay', '--wd', default=0., type=float, help='Weight decay for SGD') + self.parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD lr') + self.parser.add_argument('--momentum', default=0.9, type=float, help='momentum') + self.parser.add_argument('--batch_size', default=4, type=int, help='Batch size for training') + self.parser.add_argument('--optim', default='SGD', type=str, choices=['SGD', 'Adam'], help='Optimizer') + self.parser.add_argument('--display_freq', default=50, type=int, help='display training metrics every # iterations') + self.parser.add_argument('--viz_freq', default=50, type=int, help='visualize training process every # iterations') + self.parser.add_argument('--save_freq', default=10, type=int, help='save weights every # epoch') + self.parser.add_argument('--log_freq', default=100, type=int, help='log to tensorboard every # iterations') + self.parser.add_argument('--val_freq', default=100, type=int, help='do validation every # iterations') + + # data args + self.parser.add_argument('--rescale', type=float, default=255.0, help='rescale factor') + self.parser.add_argument('--means', type=int, default=(0.485, 0.456, 0.406), nargs='+', help='mean') + self.parser.add_argument('--stds', type=int, default=(0.229, 0.224, 0.225), nargs='+', help='std') + self.parser.add_argument('--input_size', default=512, type=int, help='model input size') + + # eval args + self.parser.add_argument('--checkepoch', default=-1, type=int, help='Load checkpoint number') + + # demo args + self.parser.add_argument('--img_root', default=None, type=str, help='Path to deploy images') + + def parse(self, fixed=None): + + if fixed is not None: + args = self.parser.parse_args(fixed) + else: + args = self.parser.parse_args() + + return args + + def initialize(self, fixed=None): + + # Parse options + self.args = self.parse(fixed) + + # Setting default torch Tensor type + if self.args.cuda and torch.cuda.is_available(): + torch.set_default_tensor_type('torch.cuda.FloatTensor') + cudnn.benchmark = True + else: + torch.set_default_tensor_type('torch.FloatTensor') + + # Create weights saving directory + if not os.path.exists(self.args.save_dir): + os.mkdir(self.args.save_dir) + + # Create weights saving directory of target model + model_save_path = os.path.join(self.args.save_dir, self.args.exp_name) + + if not os.path.exists(model_save_path): + os.mkdir(model_save_path) + + return self.args + + def update(self, args, extra_options): + + for k, v in extra_options.items(): + setattr(args, k, v) diff --git a/contrib/TextSnake/util/polygon_wrapper.py b/contrib/TextSnake/util/polygon_wrapper.py new file mode 100644 index 000000000..c089b8a3d --- /dev/null +++ b/contrib/TextSnake/util/polygon_wrapper.py @@ -0,0 +1,155 @@ +import numpy as np +from skimage.draw import polygon +from shapely.geometry import Polygon + +""" +:param det_x: [1, N] Xs of detection's vertices +:param det_y: [1, N] Ys of detection's vertices +:param gt_x: [1, N] Xs of groundtruth's vertices +:param gt_y: [1, N] Ys of groundtruth's vertices + +############## +All the calculation of 'AREA' in this script is handled by: +1) First generating a binary mask with the polygon area filled up with 1's +2) Summing up all the 1's +""" + + +def area(x, y): + """ + This helper calculates the area given x and y vertices. + """ + return shapely_area(x, y) + ymax = np.max(y) + xmax = np.max(x) + bin_mask = np.zeros((ymax, xmax)) + rr, cc = polygon(y, x) + bin_mask[rr, cc] = 1 + area = np.sum(bin_mask) + return area + #return np.round(area, 2) + + +def approx_area_of_intersection(det_x, det_y, gt_x, gt_y): + """ + This helper determine if both polygons are intersecting with each others with an approximation method. + Area of intersection represented by the minimum bounding rectangular [xmin, ymin, xmax, ymax] + """ + det_ymax = np.max(det_y) + det_xmax = np.max(det_x) + det_ymin = np.min(det_y) + det_xmin = np.min(det_x) + + gt_ymax = np.max(gt_y) + gt_xmax = np.max(gt_x) + gt_ymin = np.min(gt_y) + gt_xmin = np.min(gt_x) + + all_min_ymax = np.minimum(det_ymax, gt_ymax) + all_max_ymin = np.maximum(det_ymin, gt_ymin) + + intersect_heights = np.maximum(0.0, (all_min_ymax - all_max_ymin)) + + all_min_xmax = np.minimum(det_xmax, gt_xmax) + all_max_xmin = np.maximum(det_xmin, gt_xmin) + intersect_widths = np.maximum(0.0, (all_min_xmax - all_max_xmin)) + + return intersect_heights * intersect_widths + +def shapely_area_of_intersection(det_x, det_y, gt_x, gt_y): + p1 = Polygon(np.stack([det_x, det_y], axis=1)).buffer(0) + p2 = Polygon(np.stack([gt_x, gt_y], axis=1)).buffer(0) + return int(p1.intersection(p2).area) + +def shapely_area(x, y): + polygon = Polygon(np.stack([x, y], axis=1)) + return int(polygon.area) + +def area_of_intersection(det_x, det_y, gt_x, gt_y): + """ + This helper calculates the area of intersection. + """ + return shapely_area_of_intersection(det_x, det_y, gt_x, gt_y) + if approx_area_of_intersection(det_x, det_y, gt_x, gt_y) > 1: #only proceed if it passes the approximation test + ymax = np.maximum(np.max(det_y), np.max(gt_y)) + 1 + xmax = np.maximum(np.max(det_x), np.max(gt_x)) + 1 + bin_mask = np.zeros((ymax, xmax)) + det_bin_mask = np.zeros_like(bin_mask) + gt_bin_mask = np.zeros_like(bin_mask) + + rr, cc = polygon(det_y, det_x) + det_bin_mask[rr, cc] = 1 + + rr, cc = polygon(gt_y, gt_x) + gt_bin_mask[rr, cc] = 1 + + final_bin_mask = det_bin_mask + gt_bin_mask + + inter_map = np.where(final_bin_mask == 2, 1, 0) + inter = np.sum(inter_map) + return inter +# return np.round(inter, 2) + else: + return 0 + + +def iou(det_x, det_y, gt_x, gt_y): + """ + This helper determine the intersection over union of two polygons. + """ + + if approx_area_of_intersection(det_x, det_y, gt_x, gt_y) > 1: #only proceed if it passes the approximation test + ymax = np.maximum(np.max(det_y), np.max(gt_y)) + 1 + xmax = np.maximum(np.max(det_x), np.max(gt_x)) + 1 + bin_mask = np.zeros((ymax, xmax)) + det_bin_mask = np.zeros_like(bin_mask) + gt_bin_mask = np.zeros_like(bin_mask) + + rr, cc = polygon(det_y, det_x) + det_bin_mask[rr, cc] = 1 + + rr, cc = polygon(gt_y, gt_x) + gt_bin_mask[rr, cc] = 1 + + final_bin_mask = det_bin_mask + gt_bin_mask + + #inter_map = np.zeros_like(final_bin_mask) + inter_map = np.where(final_bin_mask == 2, 1, 0) + inter = np.sum(inter_map) + + #union_map = np.zeros_like(final_bin_mask) + union_map = np.where(final_bin_mask > 0, 1, 0) + union = np.sum(union_map) + return inter / float(union + 1.0) + #return np.round(inter / float(union + 1.0), 2) + else: + return 0 + +def iod(det_x, det_y, gt_x, gt_y): + """ + This helper determine the fraction of intersection area over detection area + """ + + if approx_area_of_intersection(det_x, det_y, gt_x, gt_y) > 1: #only proceed if it passes the approximation test + ymax = np.maximum(np.max(det_y), np.max(gt_y)) + 1 + xmax = np.maximum(np.max(det_x), np.max(gt_x)) + 1 + bin_mask = np.zeros((ymax, xmax)) + det_bin_mask = np.zeros_like(bin_mask) + gt_bin_mask = np.zeros_like(bin_mask) + + rr, cc = polygon(det_y, det_x) + det_bin_mask[rr, cc] = 1 + + rr, cc = polygon(gt_y, gt_x) + gt_bin_mask[rr, cc] = 1 + + final_bin_mask = det_bin_mask + gt_bin_mask + + inter_map = np.where(final_bin_mask == 2, 1, 0) + inter = np.round(np.sum(inter_map), 2) + + det = np.round(np.sum(det_bin_mask), 2) + return inter / float(det + 1.0) + #return np.round(inter / float(det + 1.0), 2) + else: + return 0 \ No newline at end of file diff --git a/contrib/TextSnake/util/shedule.py b/contrib/TextSnake/util/shedule.py new file mode 100644 index 000000000..6b250af33 --- /dev/null +++ b/contrib/TextSnake/util/shedule.py @@ -0,0 +1,28 @@ +from torch.optim.lr_scheduler import _LRScheduler + +class FixLR(_LRScheduler): + """Sets the learning rate of each parameter group to the initial lr + decayed by gamma every step_size epochs. When last_epoch=-1, sets + initial lr as lr. + + Args: + optimizer (Optimizer): Wrapped optimizer. + step_size (int): Period of learning rate decay. + gamma (float): Multiplicative factor of learning rate decay. + Default: 0.1. + last_epoch (int): The index of last epoch. Default: -1. + + Example: + >>> # Fixed leraning rate + >>> scheduler = FixLR(optimizer, step_size=30, gamma=0.1) + >>> for epoch in range(100): + >>> scheduler.step() + >>> train(...) + >>> validate(...) + """ + + def __init__(self, optimizer, last_epoch=-1): + super().__init__(optimizer, last_epoch) + + def get_lr(self): + return self.base_lrs \ No newline at end of file diff --git a/contrib/TextSnake/util/summary.py b/contrib/TextSnake/util/summary.py new file mode 100644 index 000000000..2be3a77f0 --- /dev/null +++ b/contrib/TextSnake/util/summary.py @@ -0,0 +1,25 @@ +from tensorboardX import SummaryWriter +from util.misc import mkdirs + +class LogSummary(object): + + def __init__(self, log_path): + + mkdirs(log_path) + self.writer = SummaryWriter(log_path) + + def write_scalars(self, scalar_dict, n_iter, tag=None): + + for name, scalar in scalar_dict.items(): + if tag is not None: + name = '/'.join([tag, name]) + self.writer.add_scalar(name, scalar, n_iter) + + def write_hist_parameters(self, net, n_iter): + for name, param in net.named_parameters(): + self.writer.add_histogram(name, param.clone().cpu().numpy(), n_iter) + + + + + diff --git a/contrib/TextSnake/util/visualize.py b/contrib/TextSnake/util/visualize.py new file mode 100644 index 000000000..5e82c3974 --- /dev/null +++ b/contrib/TextSnake/util/visualize.py @@ -0,0 +1,55 @@ +import torch +import numpy as np +import cv2 +import os +from util.config import config as cfg + + +def visualize_network_output(output, tr_mask, tcl_mask, mode='train'): + + vis_dir = os.path.join(cfg.vis_dir, cfg.exp_name + '_' + mode) + if not os.path.exists(vis_dir): + os.mkdir(vis_dir) + + tr_pred = output[:, :2] + tr_score, tr_predict = tr_pred.max(dim=1) + + tcl_pred = output[:, 2:4] + tcl_score, tcl_predict = tcl_pred.max(dim=1) + + tr_predict = tr_predict.cpu().numpy() + tcl_predict = tcl_predict.cpu().numpy() + + tr_target = tr_mask.cpu().numpy() + tcl_target = tcl_mask.cpu().numpy() + + for i in range(len(tr_pred)): + tr_pred = (tr_predict[i] * 255).astype(np.uint8) + tr_targ = (tr_target[i] * 255).astype(np.uint8) + + tcl_pred = (tcl_predict[i] * 255).astype(np.uint8) + tcl_targ = (tcl_target[i] * 255).astype(np.uint8) + + tr_show = np.concatenate([tr_pred, tr_targ], axis=1) + tcl_show = np.concatenate([tcl_pred, tcl_targ], axis=1) + show = np.concatenate([tr_show, tcl_show], axis=0) + show = cv2.resize(show, (512, 512)) + + path = os.path.join(vis_dir, '{}.png'.format(i)) + cv2.imwrite(path, show) + + +def visualize_detection(image, contours, tr=None, tcl=None): + image_show = image.copy() + image_show = np.ascontiguousarray(image_show[:, :, ::-1]) + image_show = cv2.polylines(image_show, contours, True, (0, 0, 255), 3) + + if (tr is not None) and (tcl is not None): + tr = (tr > cfg.tr_thresh).astype(np.uint8) + tcl = (tcl > cfg.tcl_thresh).astype(np.uint8) + tr = cv2.cvtColor(tr * 255, cv2.COLOR_GRAY2BGR) + tcl = cv2.cvtColor(tcl * 255, cv2.COLOR_GRAY2BGR) + image_show = np.concatenate([image_show, tr, tcl], axis=1) + return image_show + else: + return image_show -- Gitee From df6a424d2ae0a33779d655bfb70f0ef04b7d5dbd Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 16:47:54 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E5=BC=AF=E6=9B=B2=E6=96=87=E5=AD=97?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=8F=82=E8=80=83=E8=AE=BE=E8=AE=A1=E9=80=82?= =?UTF-8?q?=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/TextSnake/{util => }/Deteval.py | 0 contrib/TextSnake/README.md | 16 +- .../TextSnake/{util => }/polygon_wrapper.py | 0 contrib/TextSnake/util/__init__.py | 0 contrib/TextSnake/util/augmentation.py | 339 ------------------ contrib/TextSnake/util/config.py | 60 ---- contrib/TextSnake/util/detection.py | 325 ----------------- contrib/TextSnake/util/misc.py | 245 ------------- contrib/TextSnake/util/option.py | 108 ------ contrib/TextSnake/util/shedule.py | 28 -- contrib/TextSnake/util/summary.py | 25 -- contrib/TextSnake/util/visualize.py | 55 --- 12 files changed, 6 insertions(+), 1195 deletions(-) rename contrib/TextSnake/{util => }/Deteval.py (100%) rename contrib/TextSnake/{util => }/polygon_wrapper.py (100%) delete mode 100644 contrib/TextSnake/util/__init__.py delete mode 100644 contrib/TextSnake/util/augmentation.py delete mode 100644 contrib/TextSnake/util/config.py delete mode 100644 contrib/TextSnake/util/detection.py delete mode 100644 contrib/TextSnake/util/misc.py delete mode 100644 contrib/TextSnake/util/option.py delete mode 100644 contrib/TextSnake/util/shedule.py delete mode 100644 contrib/TextSnake/util/summary.py delete mode 100644 contrib/TextSnake/util/visualize.py diff --git a/contrib/TextSnake/util/Deteval.py b/contrib/TextSnake/Deteval.py similarity index 100% rename from contrib/TextSnake/util/Deteval.py rename to contrib/TextSnake/Deteval.py diff --git a/contrib/TextSnake/README.md b/contrib/TextSnake/README.md index e2f8dfed0..3f7978fe6 100644 --- a/contrib/TextSnake/README.md +++ b/contrib/TextSnake/README.md @@ -33,15 +33,10 @@ TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任 | 软件名称 | 版本 | | -------- |--------| -| cv2 | 4.1.2 | -| numpy | 1.15.1 | -| onnx | 1.8.0 | -| torch | 1.5.0 | -| torchvision | 0.6.0 | -| scikit_image | 0.16.2 | -| scipy | 1.5.4 | +| numpy | 1.25.2 | +| scikit_image | 0.24.0 | +| scipy | 1.13.1 | | easydict | 1.13 | -| tdqm | 4.62.3 | | shapely | 2.0.6 | ### 1.4 代码目录结构与说明 @@ -127,7 +122,7 @@ https://github.com/princewang1994/TextSnake.pytorch/tree/b4ee996d5a4d214ed825350 ``` python3 main.py ``` - +注意:运行过程中可能会出现告警,不影响执行结果 **步骤 4** 图片检测。运行结束输出result.jpg。 @@ -160,13 +155,14 @@ Groundtruth位于Groundtruth/Polygon/Test └──README.md ``` -**步骤 2** 除先前下载的util文件夹之外,还需要从以下网址中下载Deteval.py与polygon_wrapper.py文件,放入util文件夹中 +**步骤 2** 除先前下载的util文件夹之外,还需要从以下网址中下载Deteval.py与polygon_wrapper.py文件,放入util文件夹中(本项目已提供在./TestSnake文件夹下) https://github.com/princewang1994/TextSnake.pytorch/tree/b4ee996d5a4d214ed825350d6b307dd1c31faa07/dataset/total_text/Evaluation_Protocol/Python_scripts **步骤 3** 在命令行输入 如下命令运行精度测试 ``` python3 evaluate.py ``` +注意:运行过程中会出现告警,不影响执行结果 得到精度测试的结果: ![精度测试结果1](./精度1.png) diff --git a/contrib/TextSnake/util/polygon_wrapper.py b/contrib/TextSnake/polygon_wrapper.py similarity index 100% rename from contrib/TextSnake/util/polygon_wrapper.py rename to contrib/TextSnake/polygon_wrapper.py diff --git a/contrib/TextSnake/util/__init__.py b/contrib/TextSnake/util/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/contrib/TextSnake/util/augmentation.py b/contrib/TextSnake/util/augmentation.py deleted file mode 100644 index 87fefd44c..000000000 --- a/contrib/TextSnake/util/augmentation.py +++ /dev/null @@ -1,339 +0,0 @@ -import numpy as np -import math -import cv2 -import numpy.random as random - - -class Compose(object): - """Composes several augmentations together. - Args: - transforms (List[Transform]): list of transforms to compose. - Example: - >>> augmentations.Compose([ - >>> transforms.CenterCrop(10), - >>> transforms.ToTensor(), - >>> ]) - """ - - def __init__(self, transforms): - self.transforms = transforms - - def __call__(self, img, pts=None): - for t in self.transforms: - img, pts = t(img, pts) - return img, pts - - -class RandomMirror(object): - def __init__(self): - pass - - def __call__(self, image, polygons=None): - if np.random.randint(2): - image = np.ascontiguousarray(image[:, ::-1]) - _, width, _ = image.shape - for polygon in polygons: - polygon.points[:, 0] = width - polygon.points[:, 0] - return image, polygons - - -class AugmentColor(object): - def __init__(self): - self.U = np.array([[-0.56543481, 0.71983482, 0.40240142], - [-0.5989477, -0.02304967, -0.80036049], - [-0.56694071, -0.6935729, 0.44423429]], dtype=np.float32) - self.EV = np.array([1.65513492, 0.48450358, 0.1565086], dtype=np.float32) - self.sigma = 0.1 - self.color_vec = None - - def __call__(self, img, polygons=None): - color_vec = self.color_vec - if self.color_vec is None: - if not self.sigma > 0.0: - color_vec = np.zeros(3, dtype=np.float32) - else: - color_vec = np.random.normal(0.0, self.sigma, 3) - - alpha = color_vec.astype(np.float32) * self.EV - noise = np.dot(self.U, alpha.T) * 255 - return np.clip(img + noise[np.newaxis, np.newaxis, :], 0, 255), polygons - - -class RandomContrast(object): - def __init__(self, lower=0.5, upper=1.5): - self.lower = lower - self.upper = upper - assert self.upper >= self.lower, "contrast upper must be >= lower." - assert self.lower >= 0, "contrast lower must be non-negative." - - # expects float image - def __call__(self, image, polygons=None): - if random.randint(2): - alpha = random.uniform(self.lower, self.upper) - image *= alpha - return np.clip(image, 0, 255), polygons - - -class RandomBrightness(object): - def __init__(self, delta=32): - assert delta >= 0.0 - assert delta <= 255.0 - self.delta = delta - - def __call__(self, image, polygons=None): - image = image.astype(np.float32) - if random.randint(2): - delta = random.uniform(-self.delta, self.delta) - image += delta - return np.clip(image, 0, 255), polygons - - -class Rotate(object): - def __init__(self, up=30): - self.up = up - - def rotate(self, center, pt, theta): # 二维图形学的旋转 - xr, yr = center - yr = -yr - x, y = pt[:, 0], pt[:, 1] - y = -y - - theta = theta / 360 * 2 * math.pi - cos = math.cos(theta) - sin = math.sin(theta) - - _x = xr + (x - xr) * cos - (y - yr) * sin - _y = yr + (x - xr) * sin + (y - yr) * cos - - return _x, -_y - - def __call__(self, img, polygons=None): - if np.random.randint(2): - return img, polygons - angle = np.random.uniform(-self.up, self.up) # - rows, cols = img.shape[0:2] - M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1.0) - img = cv2.warpAffine(img, M, (cols, rows), borderValue=[0, 0, 0]) - center = cols / 2.0, rows / 2.0 - if polygons is not None: - for polygon in polygons: - x, y = self.rotate(center, polygon.points, angle) - pts = np.vstack([x, y]).T - polygon.points = pts - return img, polygons - -class SquarePadding(object): - - def __call__(self, image, pts=None): - - H, W, _ = image.shape - - if H == W: - return image, pts - - padding_size = max(H, W) - expand_image = np.zeros((padding_size, padding_size, 3), dtype=image.dtype) - - if H > W: - y0, x0 = 0, (H - W) // 2 - else: - y0, x0 = (W - H) // 2, 0 - if pts is not None: - pts[:, 0] += x0 - pts[:, 1] += y0 - - expand_image[y0:y0+H, x0:x0+W] = image - image = expand_image - - return image, pts - - -class Padding(object): - - def __init__(self, fill=0): - self.fill = fill - - def __call__(self, image, polygons=None): - if np.random.randint(2): - return image, polygons - - height, width, depth = image.shape - ratio = np.random.uniform(1, 2) - left = np.random.uniform(0, width * ratio - width) - top = np.random.uniform(0, height * ratio - height) - - expand_image = np.zeros( - (int(height * ratio), int(width * ratio), depth), - dtype=image.dtype) - expand_image[:, :, :] = self.fill - expand_image[int(top):int(top + height), - int(left):int(left + width)] = image - image = expand_image - - if polygons is not None: - for polygon in polygons: - polygon.points[:, 0] = polygon.points[:, 0] + left - polygon.points[:, 1] = polygon.points[:, 1] + top - return image, polygons - - -class RandomResizedCrop(object): - def __init__(self, size, scale=(0.3, 1.0), ratio=(3. / 4., 4. / 3.)): - self.size = (size, size) - self.scale = scale - self.ratio = ratio - - @staticmethod - def get_params(img, scale, ratio): - """Get parameters for ``crop`` for a random sized crop. - - Args: - img (PIL Image): Image to be cropped. - scale (tuple): range of size of the origin size cropped - ratio (tuple): range of aspect ratio of the origin aspect ratio cropped - - Returns: - tuple: params (i, j, h, w) to be passed to ``crop`` for a random - sized crop. - """ - for attempt in range(10): - area = img.shape[0] * img.shape[1] - target_area = np.random.uniform(*scale) * area - aspect_ratio = np.random.uniform(*ratio) - - w = int(round(math.sqrt(target_area * aspect_ratio))) - h = int(round(math.sqrt(target_area / aspect_ratio))) - - if np.random.random() < 0.5: - w, h = h, w - - if h < img.shape[0] and w < img.shape[1]: - j = np.random.randint(0, img.shape[1] - w) - i = np.random.randint(0, img.shape[0] - h) - return i, j, h, w - - # Fallback - w = min(img.shape[0], img.shape[1]) - i = (img.shape[0] - w) // 2 - j = (img.shape[1] - w) // 2 - return i, j, w, w - - def __call__(self, image, pts=None): - i, j, h, w = self.get_params(image, self.scale, self.ratio) - cropped = image[i:i + h, j:j + w, :] - pts = pts.copy() - mask = (pts[:, 1] >= i) * (pts[:, 0] >= j) * (pts[:, 1] < (i+h)) * (pts[:, 0] < (j+w)) - pts[~mask, 2] = -1 - scales = np.array([self.size[0]/w, self.size[1]/h]) - pts[:, :2] -= np.array([j, i]) - pts[:, :2] = (pts[:, :2] * scales) - img = cv2.resize(cropped, self.size) - return img, pts - - -class RandomResizedLimitCrop(object): - def __init__(self, size, scale=(0.3, 1.0), ratio=(3. / 4., 4. / 3.)): - self.size = (size, size) - self.scale = scale - self.ratio = ratio - - @staticmethod - def get_params(img, scale, ratio): - for attempt in range(10): - area = img.shape[0] * img.shape[1] - target_area = np.random.uniform(*scale) * area - aspect_ratio = np.random.uniform(*ratio) - - w = int(round(math.sqrt(target_area * aspect_ratio))) - h = int(round(math.sqrt(target_area / aspect_ratio))) - if np.random.random() < 0.5: - w, h = h, w - - if h < img.shape[0] and w < img.shape[1]: - j = np.random.randint(0, img.shape[1] - w) - i = np.random.randint(0, img.shape[0] - h) - return i, j, h, w - - # Fallback - w = min(img.shape[0], img.shape[1]) - i = (img.shape[0] - w) // 2 - j = (img.shape[1] - w) // 2 - return i, j, w, w - - def __call__(self, image, polygons=None): - i, j, h, w = self.get_params(image, self.scale, self.ratio) - - cropped = image[i:i + h, j:j + w, :] - scales = np.array([self.size[0] / w, self.size[1] / h]) - if polygons is not None: - for polygon in polygons: - polygon.points[:, 0] = (polygon.points[:, 0] - j) * scales[0] - polygon.points[:, 1] = (polygon.points[:, 1] - i) * scales[1] - - img = cv2.resize(cropped, self.size) - return img, polygons - - -class Normalize(object): - def __init__(self, mean, std): - self.mean = np.array(mean) - self.std = np.array(std) - - def __call__(self, image, polygons=None): - image = image.astype(np.float32) - image /= 255.0 - image -= self.mean - image /= self.std - return image, polygons - - -class Resize(object): - def __init__(self, size=256): - self.size = size - - def __call__(self, image, polygons=None): - h, w, _ = image.shape - image = cv2.resize(image, (self.size, - self.size)) - scales = np.array([self.size / w, self.size / h]) - - if polygons is not None: - for polygon in polygons: - polygon.points = polygon.points * scales - - return image, polygons - - -class Augmentation(object): - - def __init__(self, size, mean, std): - self.size = size - self.mean = mean - self.std = std - self.augmentation = Compose([ - # Resize(size), - Padding(), - RandomResizedLimitCrop(size=size, scale=(0.24, 1.0), ratio=(0.33, 3)), - # RandomBrightness(), - # RandomContrast(), - RandomMirror(), - Rotate(), - Normalize(mean, std) - ]) - - def __call__(self, image, polygons=None): - return self.augmentation(image, polygons) - - -class BaseTransform(object): - def __init__(self, size, mean, std): - self.size = size - self.mean = mean - self.std = std - self.augmentation = Compose([ - Resize(size), - Normalize(mean, std) - ]) - - def __call__(self, image, polygons=None): - return self.augmentation(image, polygons) \ No newline at end of file diff --git a/contrib/TextSnake/util/config.py b/contrib/TextSnake/util/config.py deleted file mode 100644 index 7ad54a41f..000000000 --- a/contrib/TextSnake/util/config.py +++ /dev/null @@ -1,60 +0,0 @@ -from easydict import EasyDict -import torch - -config = EasyDict() - -# dataloader jobs number -config.num_workers = 4 - -# batch_size -config.batch_size = 4 - -# training epoch number -config.max_epoch = 200 - -config.start_epoch = 0 - -# learning rate -config.lr = 1e-4 - -# using GPU -config.cuda = True - -config.n_disk = 15 - -config.output_dir = 'output' - -config.input_size = 512 - -# max polygon per image -config.max_annotation = 200 - -# max point per polygon -config.max_points = 20 - -# use hard examples (annotated as '#') -config.use_hard = True - -# demo tr threshold -config.tr_thresh = 0.6 - -# demo tcl threshold -config.tcl_thresh = 0.4 - -# expand ratio in post processing -config.post_process_expand = 0.3 - -# merge joined text instance when predicting -config.post_process_merge = False - -def update_config(config, extra_config): - for k, v in vars(extra_config).items(): - config[k] = v - config.device = torch.device('cuda') if config.cuda else torch.device('cpu') - - -def print_config(config): - print('==========Options============') - for k, v in config.items(): - print('{}: {}'.format(k, v)) - print('=============End=============') diff --git a/contrib/TextSnake/util/detection.py b/contrib/TextSnake/util/detection.py deleted file mode 100644 index 3ba6bc316..000000000 --- a/contrib/TextSnake/util/detection.py +++ /dev/null @@ -1,325 +0,0 @@ -import numpy as np -import cv2 -import torch -from util.config import config as cfg -from util.misc import fill_hole, regularize_sin_cos -from util.misc import norm2, vector_cos, vector_sin -from util.misc import disjoint_merge, merge_polygons - - -class TextDetector(object): - - def __init__(self, tr_thresh=0.4, tcl_thresh=0.6): - self.tr_thresh = tr_thresh - self.tcl_thresh = tcl_thresh - - - def find_innerpoint(self, cont): - """ - generate an inner point of input polygon using mean of x coordinate by: - 1. calculate mean of x coordinate(xmean) - 2. calculate maximum and minimum of y coordinate(ymax, ymin) - 3. iterate for each y in range (ymin, ymax), find first segment in the polygon - 4. calculate means of segment - :param cont: input polygon - :return: - """ - - xmean = cont[:, 0, 0].mean() - ymin, ymax = cont[:, 0, 1].min(), cont[:, 0, 1].max() - found = False - found_y = [] - # - for i in np.arange(ymin - 1, ymax + 1, 0.5): - # if in_poly > 0, (xmean, i) is in `cont` - in_poly = cv2.pointPolygonTest(cont, (int(xmean), int(i)), False) - if in_poly > 0: - found = True - found_y.append(i) - # first segment found - if in_poly < 0 and found: - break - - if len(found_y) > 0: - return (xmean, np.array(found_y).mean()) - - # if cannot find using above method, try each point's neighbor - else: - for p in range(len(cont)): - point = cont[p, 0] - for i in range(-1, 2, 1): - for j in range(-1, 2, 1): - test_pt = point + [i, j] - if cv2.pointPolygonTest(cont, (int(test_pt[0]), int(test_pt[1])), False) > 0: - return test_pt - - def in_contour(self, cont, point): - """ - utility function for judging whether `point` is in the `contour` - :param cont: cv2.findCountour result - :param point: 2d coordinate (x, y) - :return: - """ - x, y = point - return cv2.pointPolygonTest(cont, (int(x), int(y)), False) > 0 - - def centerlize(self, x, y, H, W, tangent_cos, tangent_sin, tcl_contour, stride=1.): - """ - centralizing (x, y) using tangent line and normal line. - :return: coordinate after centralizing - """ - - # calculate normal sin and cos - normal_cos = -tangent_sin - normal_sin = tangent_cos - - # find upward - _x, _y = x, y - while self.in_contour(tcl_contour, (_x, _y)): - _x = _x + normal_cos * stride - _y = _y + normal_sin * stride - if int(_x) >= W or int(_x) < 0 or int(_y) >= H or int(_y) < 0: - break - end1 = np.array([_x, _y]) - - # find downward - _x, _y = x, y - while self.in_contour(tcl_contour, (_x, _y)): - _x = _x - normal_cos * stride - _y = _y - normal_sin * stride - if int(_x) >= W or int(_x) < 0 or int(_y) >= H or int(_y) < 0: - break - end2 = np.array([_x, _y]) - - # centralizing - center = (end1 + end2) / 2 - - return center - - def mask_to_tcl(self, pred_sin, pred_cos, pred_radii, tcl_contour, init_xy, direct=1): - """ - Iteratively find center line in tcl mask using initial point (x, y) - :param pred_sin: predict sin map - :param pred_cos: predict cos map - :param tcl_contour: predict tcl contour - :param init_xy: initial (x, y) - :param direct: direction [-1|1] - :return: - """ - - H, W = pred_sin.shape - x_shift, y_shift = init_xy - - result = [] - max_attempt = 200 - attempt = 0 - - while self.in_contour(tcl_contour, (x_shift, y_shift)): - - attempt += 1 - - sin = pred_sin[int(y_shift), int(x_shift)] - cos = pred_cos[int(y_shift), int(x_shift)] - x_c, y_c = self.centerlize(x_shift, y_shift, H, W, cos, sin, tcl_contour) - - sin_c = pred_sin[int(y_c), int(x_c)] - cos_c = pred_cos[int(y_c), int(x_c)] - radii_c = pred_radii[int(y_c), int(x_c)] - - result.append(np.array([x_c, y_c, radii_c])) - - # shift stride - for shrink in [1/2., 1/4., 1/8., 1/16., 1/32.]: - t = shrink * radii_c # stride = +/- 0.5 * [sin|cos](theta), if new point is outside, shrink it until shrink < 1/32., hit ends - x_shift_pos = x_c + cos_c * t * direct # positive direction - y_shift_pos = y_c + sin_c * t * direct # positive direction - x_shift_neg = x_c - cos_c * t * direct # negative direction - y_shift_neg = y_c - sin_c * t * direct # negative direction - - # if first point, select positive direction shift - if len(result) == 1: - x_shift, y_shift = x_shift_pos, y_shift_pos - else: - # else select point further with second last point - dist_pos = norm2(result[-2][:2] - (x_shift_pos, y_shift_pos)) - dist_neg = norm2(result[-2][:2] - (x_shift_neg, y_shift_neg)) - if dist_pos > dist_neg: - x_shift, y_shift = x_shift_pos, y_shift_pos - else: - x_shift, y_shift = x_shift_neg, y_shift_neg - # if out of bounds, skip - if int(x_shift) >= W or int(x_shift) < 0 or int(y_shift) >= H or int(y_shift) < 0: - continue - # found an inside point - if self.in_contour(tcl_contour, (x_shift, y_shift)): - break - # if out of bounds, break - if int(x_shift) >= W or int(x_shift) < 0 or int(y_shift) >= H or int(y_shift) < 0: - break - if attempt > max_attempt: - break - return np.array(result) - - def build_tcl(self, tcl_pred, sin_pred, cos_pred, radii_pred): - """ - Find TCL's center points and radii of each point - :param tcl_pred: output tcl mask, (512, 512) - :param sin_pred: output sin map, (512, 512) - :param cos_pred: output cos map, (512, 512) - :param radii_pred: output radii map, (512, 512) - :return: (list), tcl array: (n, 3), 3 denotes (x, y, radii) - """ - all_tcls = [] - - # find disjoint regions - tcl_mask = fill_hole(tcl_pred) - tcl_contours, _ = cv2.findContours(tcl_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - for cont in tcl_contours: - - # find an inner point of polygon - init = self.find_innerpoint(cont) - - if init is None: - continue - - x_init, y_init = init - - # find left/right tcl - tcl_left = self.mask_to_tcl(sin_pred, cos_pred, radii_pred, cont, (x_init, y_init), direct=1) - tcl_right = self.mask_to_tcl(sin_pred, cos_pred, radii_pred, cont, (x_init, y_init), direct=-1) - # concat - tcl = np.concatenate([tcl_left[::-1][:-1], tcl_right]) - all_tcls.append(tcl) - - return all_tcls - - def detect_contours(self, image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred): - """ - Input: FCN output, Output: text detection after post-processing - - :param image: (np.array) input image (3, H, W) - :param tr_pred: (np.array), text region prediction, (2, H, W) - :param tcl_pred: (np.array), text center line prediction, (2, H, W) - :param sin_pred: (np.array), sin prediction, (H, W) - :param cos_pred: (np.array), cos line prediction, (H, W) - :param radii_pred: (np.array), radii prediction, (H, W) - - :return: - (list), tcl array: (n, 3), 3 denotes (x, y, radii) - """ - - # thresholding - tr_pred_mask = tr_pred[1] > self.tr_thresh - tcl_pred_mask = tcl_pred[1] > self.tcl_thresh - - # multiply TR and TCL - tcl_mask = tcl_pred_mask * tr_pred_mask - - # regularize - sin_pred, cos_pred = regularize_sin_cos(sin_pred, cos_pred) - - # find tcl in each predicted mask - detect_result = self.build_tcl(tcl_mask, sin_pred, cos_pred, radii_pred) - - return self.postprocessing(image, detect_result, tr_pred_mask) - - def detect(self, image): - """ - - :param image: - :return: - """ - # get model output - output = self.model(image) - image = image[0].data.cpu().numpy() - tr_pred = output[0, 0:2].softmax(dim=0).data.cpu().numpy() - tcl_pred = output[0, 2:4].softmax(dim=0).data.cpu().numpy() - sin_pred = output[0, 4].data.cpu().numpy() - cos_pred = output[0, 5].data.cpu().numpy() - radii_pred = output[0, 6].data.cpu().numpy() - - # find text contours - contours = self.detect_contours(image, tr_pred, tcl_pred, sin_pred, cos_pred, radii_pred) # (n_tcl, 3) - - output = { - 'image': image, - 'tr': tr_pred, - 'tcl': tcl_pred, - 'sin': sin_pred, - 'cos': cos_pred, - 'radii': radii_pred - } - return contours, output - - def merge_contours(self, all_contours): - """ Merge overlapped instances to one instance with disjoint find / merge algorithm - :param all_contours: (list(np.array)), each with (n_points, 2) - :return: (list(np.array)), each with (n_points, 2) - """ - - def stride(disks, other_contour, left, step=0.3): - if len(disks) < 2: - return False - if left: - last_point, before_point = disks[:2] - else: - before_point, last_point = disks[-2:] - radius = last_point[2] - cos = vector_cos(last_point[:2] - before_point[:2]) - sin = vector_sin(last_point[:2] - before_point[:2]) - new_point = last_point[:2] + radius * step * np.array([cos, sin]) - return self.in_contour(other_contour, new_point) - - def can_merge(disks, other_contour): - return stride(disks, other_contour, left=True) or stride(disks, other_contour, left=False) - - F = list(range(len(all_contours))) - for i in range(len(all_contours)): - cont_i, disk_i = all_contours[i] - for j in range(i + 1, len(all_contours)): - cont_j, disk_j = all_contours[j] - if can_merge(disk_i, cont_j): - disjoint_merge(i, j, F) - - merged_polygons = merge_polygons([cont for cont, disks in all_contours], F) - return merged_polygons - - def postprocessing(self, image, detect_result, tr_pred_mask): - """ convert geometric info(center_x, center_y, radii) into contours - :param image: (np.array), input image - :param result: (list), each with (n, 3), 3 denotes (x, y, radii) - :param tr_pred_mask: (np.array), predicted text area mask, each with shape (H, W) - :return: (np.ndarray list), polygon format contours - """ - - all_conts = [] - for disk in detect_result: - reconstruct_mask = np.zeros(image.shape[1:], dtype=np.uint8) - for x, y, r in disk: - # expand radius for higher recall - if cfg.post_process_expand > 0.0: - r *= (1. + cfg.post_process_expand) - cv2.circle(reconstruct_mask, (int(x), int(y)), max(1, int(r)), 1, -1) - - # according to the paper, at least half of pixels in the reconstructed text area should be classified as TR - if (reconstruct_mask * tr_pred_mask).sum() < reconstruct_mask.sum() * 0.5: - continue - - # filter out too small objects - conts, _ = cv2.findContours(reconstruct_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if len(conts) > 1: - conts = list(conts) - conts.sort(key=lambda x: cv2.contourArea(x), reverse=True) - conts = tuple(conts) - elif not conts: - continue - all_conts.append((conts[0][:, 0, :], disk)) - - # merge joined instances - if cfg.post_process_merge: - all_conts = self.merge_contours(all_conts) - else: - all_conts = [cont[0] for cont in all_conts] - - return all_conts \ No newline at end of file diff --git a/contrib/TextSnake/util/misc.py b/contrib/TextSnake/util/misc.py deleted file mode 100644 index c4d2d25db..000000000 --- a/contrib/TextSnake/util/misc.py +++ /dev/null @@ -1,245 +0,0 @@ -import numpy as np -import errno -import os -import cv2 -from shapely.geometry import Polygon -from util.config import config as cfg - - -def to_device(*tensors): - if len(tensors) < 2: - return tensors[0].to(cfg.device) - return (t.to(cfg.device) for t in tensors) - - -def mkdirs(newdir): - """ - make directory with parent path - :param newdir: target path - """ - try: - if not os.path.exists(newdir): - os.makedirs(newdir) - except OSError as err: - # Reraise the error unless it's about an already existing directory - if err.errno != errno.EEXIST or not os.path.isdir(newdir): - raise - -def rescale_result(image, contours, H, W): - ori_H, ori_W = image.shape[:2] - image = cv2.resize(image, (W, H)) - for cont in contours: - cont[:, 0] = (cont[:, 0] * W / ori_W).astype(int) - cont[:, 1] = (cont[:, 1] * H / ori_H).astype(int) - return image, contours - - -def fill_hole(input_mask): - h, w = input_mask.shape - canvas = np.zeros((h + 2, w + 2), np.uint8) - canvas[1:h + 1, 1:w + 1] = input_mask.copy() - - mask = np.zeros((h + 4, w + 4), np.uint8) - - cv2.floodFill(canvas, mask, (0, 0), 1) - canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool_) - - return (~canvas | input_mask.astype(np.uint8)) - - -def regularize_sin_cos(sin, cos): - # regularization - scale = np.sqrt(1.0 / (sin ** 2 + cos ** 2)) - return sin * scale, cos * scale - - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - -def norm2(x, axis=None): - if axis: - return np.sqrt(np.sum(x ** 2, axis=axis)) - return np.sqrt(np.sum(x ** 2)) - -def cos(p1, p2): - return (p1 * p2).sum() / (norm2(p1) * norm2(p2)) - -def vector_sin(v): - assert len(v) == 2 - # sin = y / (sqrt(x^2 + y^2)) - l = np.sqrt(v[0] ** 2 + v[1] ** 2) - return v[1] / l - -def vector_cos(v): - assert len(v) == 2 - # cos = x / (sqrt(x^2 + y^2)) - l = np.sqrt(v[0] ** 2 + v[1] ** 2) - return v[0] / l - -def find_bottom(pts): - - if len(pts) > 4: - e = np.concatenate([pts, pts[:3]]) - candidate = [] - for i in range(1, len(pts) + 1): - v_prev = e[i] - e[i - 1] - v_next = e[i + 2] - e[i + 1] - if cos(v_prev, v_next) < -0.7: - candidate.append((i % len(pts), (i + 1) % len(pts), norm2(e[i] - e[i + 1]))) - - if len(candidate) != 2 or candidate[0][0] == candidate[1][1] or candidate[0][1] == candidate[1][0]: - # if candidate number < 2, or two bottom are joined, select 2 farthest edge - mid_list = [] - for i in range(len(pts)): - mid_point = (e[i] + e[(i + 1) % len(pts)]) / 2 - mid_list.append((i, (i + 1) % len(pts), mid_point)) - - dist_list = [] - for i in range(len(pts)): - for j in range(len(pts)): - s1, e1, mid1 = mid_list[i] - s2, e2, mid2 = mid_list[j] - dist = norm2(mid1 - mid2) - dist_list.append((s1, e1, s2, e2, dist)) - bottom_idx = np.argsort([dist for s1, e1, s2, e2, dist in dist_list])[-2:] - bottoms = [dist_list[bottom_idx[0]][:2], dist_list[bottom_idx[1]][:2]] - else: - bottoms = [candidate[0][:2], candidate[1][:2]] - - else: - d1 = norm2(pts[1] - pts[0]) + norm2(pts[2] - pts[3]) - d2 = norm2(pts[2] - pts[1]) + norm2(pts[0] - pts[3]) - bottoms = [(0, 1), (2, 3)] if d1 < d2 else [(1, 2), (3, 0)] - assert len(bottoms) == 2, 'fewer than 2 bottoms' - return bottoms - - -def split_long_edges(points, bottoms): - """ - Find two long edge sequence of and polygon - """ - b1_start, b1_end = bottoms[0] - b2_start, b2_end = bottoms[1] - n_pts = len(points) - - i = b1_end + 1 - long_edge_1 = [] - while (i % n_pts != b2_end): - long_edge_1.append((i - 1, i)) - i = (i + 1) % n_pts - - i = b2_end + 1 - long_edge_2 = [] - while (i % n_pts != b1_end): - long_edge_2.append((i - 1, i)) - i = (i + 1) % n_pts - return long_edge_1, long_edge_2 - - -def find_long_edges(points, bottoms): - b1_start, b1_end = bottoms[0] - b2_start, b2_end = bottoms[1] - n_pts = len(points) - i = (b1_end + 1) % n_pts - long_edge_1 = [] - - while (i % n_pts != b2_end): - start = (i - 1) % n_pts - end = i % n_pts - long_edge_1.append((start, end)) - i = (i + 1) % n_pts - - i = (b2_end + 1) % n_pts - long_edge_2 = [] - while (i % n_pts != b1_end): - start = (i - 1) % n_pts - end = i % n_pts - long_edge_2.append((start, end)) - i = (i + 1) % n_pts - return long_edge_1, long_edge_2 - - -def split_edge_seqence(points, long_edge, n_parts): - - edge_length = [norm2(points[e1] - points[e2]) for e1, e2 in long_edge] - point_cumsum = np.cumsum([0] + edge_length) - total_length = sum(edge_length) - length_per_part = total_length / n_parts - - cur_node = 0 # first point - splited_result = [] - - for i in range(1, n_parts): - cur_end = i * length_per_part - - while(cur_end > point_cumsum[cur_node + 1]): - cur_node += 1 - - e1, e2 = long_edge[cur_node] - e1, e2 = points[e1], points[e2] - - # start_point = points[long_edge[cur_node]] - end_shift = cur_end - point_cumsum[cur_node] - ratio = end_shift / edge_length[cur_node] - new_point = e1 + ratio * (e2 - e1) - # print(cur_end, point_cumsum[cur_node], end_shift, edge_length[cur_node], '=', new_point) - splited_result.append(new_point) - - # add first and last point - p_first = points[long_edge[0][0]] - p_last = points[long_edge[-1][1]] - splited_result = [p_first] + splited_result + [p_last] - return np.stack(splited_result) - -def disjoint_find(x, F): - if F[x] == x: - return x - F[x] = disjoint_find(F[x], F) - return F[x] - -def disjoint_merge(x, y, F): - x = disjoint_find(x, F) - y = disjoint_find(y, F) - if x == y: - return False - F[y] = x - return True - - -def merge_polygons(polygons, merge_map): - - def merge_two_polygon(p1, p2): - p2 = Polygon(p2) - merged = p1.union(p2) - return merged - - merge_map = [disjoint_find(x, merge_map) for x in range(len(merge_map))] - merge_map = np.array(merge_map) - final_polygons = [] - - for i in np.unique(merge_map): - merge_idx = np.where(merge_map == i)[0] - if len(merge_idx) > 0: - merged = Polygon(polygons[merge_idx[0]]) - for j in range(1, len(merge_idx)): - merged = merge_two_polygon(merged, polygons[merge_idx[j]]) - x, y = merged.exterior.coords.xy - final_polygons.append(np.stack([x, y], axis=1).astype(int)) - - return final_polygons - - diff --git a/contrib/TextSnake/util/option.py b/contrib/TextSnake/util/option.py deleted file mode 100644 index ffbcf1b71..000000000 --- a/contrib/TextSnake/util/option.py +++ /dev/null @@ -1,108 +0,0 @@ -import argparse -import torch -import os -import torch.backends.cudnn as cudnn - -from datetime import datetime - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -def arg2str(args): - args_dict = vars(args) - option_str = datetime.now().strftime('%b%d_%H-%M-%S') + '\n' - - for k, v in sorted(args_dict.items()): - option_str += ('{}: {}\n'.format(str(k), str(v))) - - return option_str - -class BaseOptions(object): - - def __init__(self): - - self.parser = argparse.ArgumentParser() - - # basic opts - self.parser.add_argument('exp_name', type=str, help='Experiment name') - self.parser.add_argument('--net', default='vgg', type=str, choices=['vgg', 'resnet'], help='Network architecture') - self.parser.add_argument('--dataset', default='total-text', type=str, choices=['synth-text', 'total-text'], help='Dataset name') - self.parser.add_argument('--resume', default=None, type=str, help='Path to target resume checkpoint') - self.parser.add_argument('--num_workers', default=8, type=int, help='Number of workers used in dataloading') - self.parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') - self.parser.add_argument('--mgpu', action='store_true', help='Use multi-gpu to train model') - self.parser.add_argument('--save_dir', default='./save/', help='Path to save checkpoint models') - self.parser.add_argument('--vis_dir', default='./vis/', help='Path to save visualization images') - self.parser.add_argument('--log_dir', default='./logs/', help='Path to tensorboard log') - self.parser.add_argument('--loss', default='CrossEntropyLoss', type=str, help='Training Loss') - self.parser.add_argument('--input_channel', default=1, type=int, help='number of input channels' ) - self.parser.add_argument('--pretrain', default=False, type=str2bool, help='Pretrained AutoEncoder model') - self.parser.add_argument('--verbose', '-v', default=True, type=str2bool, help='Whether to output debug info') - self.parser.add_argument('--viz', action='store_true', help='Whether to output debug info') - - # train opts - self.parser.add_argument('--start_iter', default=0, type=int, help='Begin counting iterations starting from this value (should be used with resume)') - self.parser.add_argument('--max_epoch', default=200, type=int, help='Max epochs') - self.parser.add_argument('--lr', '--learning-rate', default=1e-4, type=float, help='initial learning rate') - self.parser.add_argument('--lr_adjust', default='fix', choices=['fix', 'poly'], type=str, help='Learning Rate Adjust Strategy') - self.parser.add_argument('--stepvalues', default=[], nargs='+', type=int, help='# of iter to change lr') - self.parser.add_argument('--weight_decay', '--wd', default=0., type=float, help='Weight decay for SGD') - self.parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD lr') - self.parser.add_argument('--momentum', default=0.9, type=float, help='momentum') - self.parser.add_argument('--batch_size', default=4, type=int, help='Batch size for training') - self.parser.add_argument('--optim', default='SGD', type=str, choices=['SGD', 'Adam'], help='Optimizer') - self.parser.add_argument('--display_freq', default=50, type=int, help='display training metrics every # iterations') - self.parser.add_argument('--viz_freq', default=50, type=int, help='visualize training process every # iterations') - self.parser.add_argument('--save_freq', default=10, type=int, help='save weights every # epoch') - self.parser.add_argument('--log_freq', default=100, type=int, help='log to tensorboard every # iterations') - self.parser.add_argument('--val_freq', default=100, type=int, help='do validation every # iterations') - - # data args - self.parser.add_argument('--rescale', type=float, default=255.0, help='rescale factor') - self.parser.add_argument('--means', type=int, default=(0.485, 0.456, 0.406), nargs='+', help='mean') - self.parser.add_argument('--stds', type=int, default=(0.229, 0.224, 0.225), nargs='+', help='std') - self.parser.add_argument('--input_size', default=512, type=int, help='model input size') - - # eval args - self.parser.add_argument('--checkepoch', default=-1, type=int, help='Load checkpoint number') - - # demo args - self.parser.add_argument('--img_root', default=None, type=str, help='Path to deploy images') - - def parse(self, fixed=None): - - if fixed is not None: - args = self.parser.parse_args(fixed) - else: - args = self.parser.parse_args() - - return args - - def initialize(self, fixed=None): - - # Parse options - self.args = self.parse(fixed) - - # Setting default torch Tensor type - if self.args.cuda and torch.cuda.is_available(): - torch.set_default_tensor_type('torch.cuda.FloatTensor') - cudnn.benchmark = True - else: - torch.set_default_tensor_type('torch.FloatTensor') - - # Create weights saving directory - if not os.path.exists(self.args.save_dir): - os.mkdir(self.args.save_dir) - - # Create weights saving directory of target model - model_save_path = os.path.join(self.args.save_dir, self.args.exp_name) - - if not os.path.exists(model_save_path): - os.mkdir(model_save_path) - - return self.args - - def update(self, args, extra_options): - - for k, v in extra_options.items(): - setattr(args, k, v) diff --git a/contrib/TextSnake/util/shedule.py b/contrib/TextSnake/util/shedule.py deleted file mode 100644 index 6b250af33..000000000 --- a/contrib/TextSnake/util/shedule.py +++ /dev/null @@ -1,28 +0,0 @@ -from torch.optim.lr_scheduler import _LRScheduler - -class FixLR(_LRScheduler): - """Sets the learning rate of each parameter group to the initial lr - decayed by gamma every step_size epochs. When last_epoch=-1, sets - initial lr as lr. - - Args: - optimizer (Optimizer): Wrapped optimizer. - step_size (int): Period of learning rate decay. - gamma (float): Multiplicative factor of learning rate decay. - Default: 0.1. - last_epoch (int): The index of last epoch. Default: -1. - - Example: - >>> # Fixed leraning rate - >>> scheduler = FixLR(optimizer, step_size=30, gamma=0.1) - >>> for epoch in range(100): - >>> scheduler.step() - >>> train(...) - >>> validate(...) - """ - - def __init__(self, optimizer, last_epoch=-1): - super().__init__(optimizer, last_epoch) - - def get_lr(self): - return self.base_lrs \ No newline at end of file diff --git a/contrib/TextSnake/util/summary.py b/contrib/TextSnake/util/summary.py deleted file mode 100644 index 2be3a77f0..000000000 --- a/contrib/TextSnake/util/summary.py +++ /dev/null @@ -1,25 +0,0 @@ -from tensorboardX import SummaryWriter -from util.misc import mkdirs - -class LogSummary(object): - - def __init__(self, log_path): - - mkdirs(log_path) - self.writer = SummaryWriter(log_path) - - def write_scalars(self, scalar_dict, n_iter, tag=None): - - for name, scalar in scalar_dict.items(): - if tag is not None: - name = '/'.join([tag, name]) - self.writer.add_scalar(name, scalar, n_iter) - - def write_hist_parameters(self, net, n_iter): - for name, param in net.named_parameters(): - self.writer.add_histogram(name, param.clone().cpu().numpy(), n_iter) - - - - - diff --git a/contrib/TextSnake/util/visualize.py b/contrib/TextSnake/util/visualize.py deleted file mode 100644 index 5e82c3974..000000000 --- a/contrib/TextSnake/util/visualize.py +++ /dev/null @@ -1,55 +0,0 @@ -import torch -import numpy as np -import cv2 -import os -from util.config import config as cfg - - -def visualize_network_output(output, tr_mask, tcl_mask, mode='train'): - - vis_dir = os.path.join(cfg.vis_dir, cfg.exp_name + '_' + mode) - if not os.path.exists(vis_dir): - os.mkdir(vis_dir) - - tr_pred = output[:, :2] - tr_score, tr_predict = tr_pred.max(dim=1) - - tcl_pred = output[:, 2:4] - tcl_score, tcl_predict = tcl_pred.max(dim=1) - - tr_predict = tr_predict.cpu().numpy() - tcl_predict = tcl_predict.cpu().numpy() - - tr_target = tr_mask.cpu().numpy() - tcl_target = tcl_mask.cpu().numpy() - - for i in range(len(tr_pred)): - tr_pred = (tr_predict[i] * 255).astype(np.uint8) - tr_targ = (tr_target[i] * 255).astype(np.uint8) - - tcl_pred = (tcl_predict[i] * 255).astype(np.uint8) - tcl_targ = (tcl_target[i] * 255).astype(np.uint8) - - tr_show = np.concatenate([tr_pred, tr_targ], axis=1) - tcl_show = np.concatenate([tcl_pred, tcl_targ], axis=1) - show = np.concatenate([tr_show, tcl_show], axis=0) - show = cv2.resize(show, (512, 512)) - - path = os.path.join(vis_dir, '{}.png'.format(i)) - cv2.imwrite(path, show) - - -def visualize_detection(image, contours, tr=None, tcl=None): - image_show = image.copy() - image_show = np.ascontiguousarray(image_show[:, :, ::-1]) - image_show = cv2.polylines(image_show, contours, True, (0, 0, 255), 3) - - if (tr is not None) and (tcl is not None): - tr = (tr > cfg.tr_thresh).astype(np.uint8) - tcl = (tcl > cfg.tcl_thresh).astype(np.uint8) - tr = cv2.cvtColor(tr * 255, cv2.COLOR_GRAY2BGR) - tcl = cv2.cvtColor(tcl * 255, cv2.COLOR_GRAY2BGR) - image_show = np.concatenate([image_show, tr, tcl], axis=1) - return image_show - else: - return image_show -- Gitee From e9dab08ffac5aac607396d798122cd3757c6d105 Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 16:49:45 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E5=BC=AF=E6=9B=B2=E6=96=87=E5=AD=97?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=8F=82=E8=80=83=E8=AE=BE=E8=AE=A1=E9=80=82?= =?UTF-8?q?=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/TextSnake/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/TextSnake/README.md b/contrib/TextSnake/README.md index 3f7978fe6..631500b4a 100644 --- a/contrib/TextSnake/README.md +++ b/contrib/TextSnake/README.md @@ -49,6 +49,10 @@ TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任 ├── t.pipeline //pipeline ├── sdk.png //流程图 ├── pipeline.png //pipeline流程图 +├── detection.py +├── misc.py +├── polygon_wrapper.py +├── Deteval.py └──README.md ``` -- Gitee From e4eec118cbec296ee51a509cabe3ee31a1d4c28e Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 16:50:48 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E5=BC=AF=E6=9B=B2=E6=96=87=E5=AD=97?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=8F=82=E8=80=83=E8=AE=BE=E8=AE=A1=E9=80=82?= =?UTF-8?q?=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/TextSnake/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/contrib/TextSnake/README.md b/contrib/TextSnake/README.md index 631500b4a..91fe88900 100644 --- a/contrib/TextSnake/README.md +++ b/contrib/TextSnake/README.md @@ -30,7 +30,6 @@ TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任 ### 1.4 三方依赖 - | 软件名称 | 版本 | | -------- |--------| | numpy | 1.25.2 | @@ -39,7 +38,7 @@ TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任 | easydict | 1.13 | | shapely | 2.0.6 | -### 1.4 代码目录结构与说明 +### 1.5 代码目录结构与说明 本工程名称为TextSnake,工程目录如下图所示: @@ -56,7 +55,7 @@ TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任 └──README.md ``` -### 1.5 技术实现流程图 +### 1.6 技术实现流程图 实现流程图如下图所示: @@ -94,15 +93,15 @@ https://mindx.sdk.obs.cn-north-4.myhuaweicloud.com/mindxsdk-referenceapps%20/con 该压缩文件中已存在om文件,需删除后重新进行模型转换 具体步骤如下 -1. 下载上述模型压缩包,获取 TextSnake.onnx 模型文件放置 TextSnake/model 目录下。 +**步骤1** 下载上述模型压缩包,获取 TextSnake.onnx 模型文件放置 TextSnake/model 目录下。 -2. 进入TextSnake/model文件夹下执行命令 +**步骤2** 进入TextSnake/model文件夹下执行命令 ``` atc --model=TextSnake.onnx --framework=5 --output=TextSnake_bs1 --input_format=NCHW --input_shape="image:1,3,512,512" --log=info --soc_version=Ascend310B1 ``` -3. 执行该命令会在当前目录下生成项目需要的模型文件TextSnake_bs1.om。执行后终端输出为 +**步骤3** 执行该命令会在当前目录下生成项目需要的模型文件TextSnake_bs1.om。执行后终端输出为 ``` ATC start working now, please wait for a moment. -- Gitee From b05747206e408143e411c8a7645d82cd442edef7 Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 16:54:14 +0800 Subject: [PATCH 5/8] =?UTF-8?q?ascendFFmpeg=E4=B8=8Evision=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E5=88=9D=E5=A7=8B=E5=8C=96=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Ascendffmpeg/libavutil/hwcontext_ascend.c | 651 +++++++++--------- 1 file changed, 326 insertions(+), 325 deletions(-) diff --git a/mxVision/Ascendffmpeg/libavutil/hwcontext_ascend.c b/mxVision/Ascendffmpeg/libavutil/hwcontext_ascend.c index 0b843f337..c5cb843da 100644 --- a/mxVision/Ascendffmpeg/libavutil/hwcontext_ascend.c +++ b/mxVision/Ascendffmpeg/libavutil/hwcontext_ascend.c @@ -1,326 +1,327 @@ -/* - * Copyright(c) 2020. Huawei Technologies Co.,Ltd. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except int compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "buffer.h" -#include "common.h" -#include "hwcontext.h" -#include "hwcontext_ascend.h" -#include "hwcontext_internal.h" -#include "mem.h" -#include "pixdesc.h" -#include "imgutils.h" -#include "acl/acl.h" -#include "acl/dvpp/hi_dvpp.h" - -static int init_flag = 0; - -static const enum AVPixelFormat supported_formats[] = { - AV_PIX_FMT_NV12, -}; - -#define ASCEND_FRAME_ALIGNMENT 1 - -typedef struct ASCENDFramesContext { - int width; - int height; -} ASCENDFramesContext; - -static int ascend_device_init(AVHWDeviceContext *ctx) -{ - AVASCENDDeviceContext *hwctx = ctx->hwctx; - if(!hwctx->ascend_ctx) { - hwctx->ascend_ctx = av_mallocz(sizeof(*hwctx->ascend_ctx)); - if (!hwctx->ascend_ctx) { - return AVERROR_UNKNOWN; - } - } - return 0; -} - -static void ascend_device_uninit(AVHWDeviceContext *device_ctx) -{ - AVASCENDDeviceContext *hwctx = device_ctx->hwctx; - - if (hwctx->ascend_ctx) { - av_freep(&hwctx->ascend_ctx); - hwctx->ascend_ctx = NULL; - } -} - -static int ascend_device_create(AVHWDeviceContext *device_ctx, const char *device, AVDictionary *opts, int flags) -{ - AVASCENDDeviceContext *hwctx = device_ctx->hwctx; - AscendContext *ascend_ctx = NULL; - - int ret = 0; - int device_idx = 0; - if (device) { - device_idx = strtol(device, NULL, 0); - } - av_log(device_ctx, AV_LOG_INFO, "device id is: %d.\n", device_idx); - - if (ascend_device_init(device_ctx) < 0) - goto error; - - int device_count = 0; - ret = aclrtGetDeviceCount(&device_count); - if (ret != 0) { - goto error; - } - if (device_idx >= device_count) { - av_log(device_ctx, AV_LOG_ERROR, "device id must less than: %d.\n", device_count); - goto error; - } - - ascend_ctx = hwctx->ascend_ctx; - ascend_ctx->device_id = device_idx; - - if (!init_flag) { - ret = aclInit(NULL); - if (ret != 0) { - av_log(device_ctx, AV_LOG_ERROR, "InitDevices failed, ret = %d.\n", ret); - goto error; - } - init_flag = 1; - } - - - ret = aclrtSetDevice(device_idx); - if (ret != 0) { - av_log(device_ctx, AV_LOG_ERROR, "SetDevice failed, ret = %d.\n", ret); - goto error; - } - - aclrtContext context; - ret = aclrtCreateContext(&context, device_idx); - if(ret != 0) { - av_log(device_ctx, AV_LOG_ERROR, "CreateContext failed, ret = %d.\n", ret); - goto error; - } - - hwctx->ascend_ctx->context = context; - return 0; - error: - ascend_device_uninit(device_ctx); - return AVERROR_UNKNOWN; - -} - -static int ascend_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, - AVHWFramesConstraints *constraints) -{ - int i; - constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1, - sizeof(*constraints->valid_sw_formats)); - if (!constraints->valid_sw_formats) { - return AVERROR_EXTERNAL; - } - - for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { - constraints->valid_sw_formats[i] = supported_formats[i]; - } - - constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE; - - constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats)); - if (!constraints->valid_sw_formats) { - return AVERROR_EXTERNAL; - } - - constraints->valid_hw_formats[0] = AV_PIX_FMT_ASCEND; - constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; - - return 0; - -} - -static void ascend_buffer_free(void * opaque, uint8_t* data) -{ - AVHWFramesContext *ctx = opaque; - if (data) { - aclError ret = hi_mpi_dvpp_free(data); - data = NULL; - if (ret != 0) { - av_log(ctx, AV_LOG_ERROR, "HiMpi free faile: dev addr %p \n", data); - } - } -} - -static AVBufferRef *ascend_pool_alloc(void *opaque, int size) -{ - AVHWFramesContext *ctx = opaque; - AVHWDeviceContext *device_ctx = ctx->device_ctx; - AVASCENDDeviceContext *hwctx = device_ctx->hwctx; - AscendContext *ascend_ctx = hwctx->ascend_ctx; - - AVBufferRef *buffer = NULL; - void *data = NULL; - - aclError ret = hi_mpi_dvpp_malloc(ascend_ctx->device_id, (void **)(&data), size); - if (ret != 0) { - av_log(ctx, AV_LOG_ERROR, "HiMpi Malloc failed: dev addr %p, size %d.\n", data, size); - return NULL; - } - buffer = av_buffer_create((uint8_t*)data, size, ascend_buffer_free, ctx, 0); - if (!buffer) { - ret = hi_mpi_dvpp_free(data); - if (ret != 0) { - av_log(ctx, AV_LOG_ERROR, "HiMpi Free failed with no buffer: dev addr %p.\n", data); - } - } - return buffer; -} - -static int ascend_frames_init(AVHWFramesContext *ctx) -{ - ASCENDFramesContext * priv = ctx->internal->priv; - int i; - for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { - if (av_get_pix_fmt_name(ctx->sw_format) == - av_get_pix_fmt_name(supported_formats[i])) { - break; - } - } - - if (i == FF_ARRAY_ELEMS(supported_formats)) { - av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported.\n", - av_get_pix_fmt_name(ctx->sw_format)); - return AVERROR_EXTERNAL; - } - - av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->width, &priv->height); - - if (!ctx->pool) { - int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, - ctx->height, ASCEND_FRAME_ALIGNMENT); - if (size < 0) - return size; - - ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, ascend_pool_alloc, NULL); - if (!ctx->internal->pool_internal) { - av_log(ctx, AV_LOG_DEBUG, "internal pool init failed.\n"); - return AVERROR_EXTERNAL; - } - } - - return 0; -} - -static int ascend_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) -{ - frame->buf[0] = av_buffer_pool_get(ctx->pool); - if (!frame->buf[0]) - return AVERROR_EXTERNAL; - - int ret = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data, - ctx->sw_format, ctx->width, ctx->height, ASCEND_FRAME_ALIGNMENT); - if (ret < 0) - return ret; - - frame->format = AV_PIX_FMT_ASCEND; - frame->width = ctx->width; - frame->height = ctx->height; - - return 0; -} - -static int ascend_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, - enum AVPixelFormat **formats) -{ - enum AVPixelFormat *fmts; - - fmts = av_malloc_array(2, sizeof(*fmts)); - if (!fmts) - return AVERROR_EXTERNAL; - - fmts[0] = ctx->sw_format; - fmts[1] = AV_PIX_FMT_NONE; - - *formats = fmts; - return 0; -} - -static int ascend_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) -{ - AVHWDeviceContext *device_ctx = ctx->device_ctx; - AVASCENDDeviceContext *hwctx = ctx->hwctx; - AscendContext *ascend_ctx = hwctx->ascend_ctx; - - int i; - size_t dstBytes; - size_t srcBytes; - aclError ret; - for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { - dstBytes = src->width * src->height * (i ? 1.0 / 2 : 1); - srcBytes = src->width * src->height * (i ? 1.0 / 2 : 1); - ret = aclrtMemcpy(dst->data[i], dstBytes, src->data[i], srcBytes, ACL_MEMCPY_HOST_TO_DEVICE); - if (ret != 0) { - av_log(ctx, AV_LOG_ERROR, "Mem copy h2d: host %p wigh %lu -> dev %p with %lu.\n", - src->data[i], srcBytes, dst->data[i], dstBytes); - av_log(ctx, AV_LOG_ERROR, "ascendMemcoy H2D error occur, func: %s, line %d.\n", - __func__, __LINE__); - return -1; - } - } - - return 0; -} - -static int ascend_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) -{ - AVHWDeviceContext *device_ctx = ctx->device_ctx; - AVASCENDDeviceContext *hwctx = ctx->hwctx; - AscendContext *ascend_ctx = hwctx->ascend_ctx; - - int i; - size_t dstBytes; - size_t srcBytes; - aclError ret; - for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { - dstBytes = src->width * src->height * (i ? 1.0 / 2 : 1); - srcBytes = src->width * src->height * (i ? 1.0 / 2 : 1); - ret = aclrtMemcpy(dst->data[i], dstBytes, src->data[i], srcBytes, ACL_MEMCPY_DEVICE_TO_HOST); - if (ret != 0) { - av_log(ctx, AV_LOG_ERROR, "Mem copy d2h: dev %p wigh %lu -> host %p with %lu.\n", - src->data[i], srcBytes, dst->data[i], dstBytes); - av_log(ctx, AV_LOG_ERROR, "ascendMemcoy D2H error occur, func: %s, line %d.\n", - __func__, __LINE__); - return -1; - } - } - - return 0; -} - -const HWContextType ff_hwcontext_type_ascend = { - .type = AV_HWDEVICE_TYPE_ASCEND, - .name = "ASCEND", - - .device_hwctx_size = sizeof(AVASCENDDeviceContext), - .frames_priv_size = sizeof(ASCENDFramesContext), - - .device_create = ascend_device_create, - .device_init = ascend_device_init, - .device_uninit = ascend_device_uninit, - .frames_get_constraints = ascend_frames_get_constraints, - .frames_init = ascend_frames_init, - .frames_get_buffer = ascend_get_buffer, - .transfer_get_formats = ascend_transfer_get_formats, - .transfer_data_to = ascend_transfer_data_to, - .transfer_data_from = ascend_transfer_data_from, - - .pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_ASCEND, AV_PIX_FMT_NONE}, +/* + * Copyright(c) 2020. Huawei Technologies Co.,Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except int compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "buffer.h" +#include "common.h" +#include "hwcontext.h" +#include "hwcontext_ascend.h" +#include "hwcontext_internal.h" +#include "mem.h" +#include "pixdesc.h" +#include "imgutils.h" +#include "acl/acl.h" +#include "acl/dvpp/hi_dvpp.h" +#include "acl/acl_base.h" + +static int init_flag = 0; + +static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_NV12, +}; + +#define ASCEND_FRAME_ALIGNMENT 1 + +typedef struct ASCENDFramesContext { + int width; + int height; +} ASCENDFramesContext; + +static int ascend_device_init(AVHWDeviceContext *ctx) +{ + AVASCENDDeviceContext *hwctx = ctx->hwctx; + if(!hwctx->ascend_ctx) { + hwctx->ascend_ctx = av_mallocz(sizeof(*hwctx->ascend_ctx)); + if (!hwctx->ascend_ctx) { + return AVERROR_UNKNOWN; + } + } + return 0; +} + +static void ascend_device_uninit(AVHWDeviceContext *device_ctx) +{ + AVASCENDDeviceContext *hwctx = device_ctx->hwctx; + + if (hwctx->ascend_ctx) { + av_freep(&hwctx->ascend_ctx); + hwctx->ascend_ctx = NULL; + } +} + +static int ascend_device_create(AVHWDeviceContext *device_ctx, const char *device, AVDictionary *opts, int flags) +{ + AVASCENDDeviceContext *hwctx = device_ctx->hwctx; + AscendContext *ascend_ctx = NULL; + + int ret = 0; + int device_idx = 0; + if (device) { + device_idx = strtol(device, NULL, 0); + } + av_log(device_ctx, AV_LOG_INFO, "device id is: %d.\n", device_idx); + + if (ascend_device_init(device_ctx) < 0) + goto error; + + int device_count = 0; + ret = aclrtGetDeviceCount(&device_count); + if (ret != 0) { + goto error; + } + if (device_idx >= device_count) { + av_log(device_ctx, AV_LOG_ERROR, "device id must less than: %d.\n", device_count); + goto error; + } + + ascend_ctx = hwctx->ascend_ctx; + ascend_ctx->device_id = device_idx; + + if (!init_flag) { + ret = aclInit(NULL); + if (ret != 0 && ret != ACL_ERROR_REPEAT_INITIALIZE) { + av_log(device_ctx, AV_LOG_ERROR, "InitDevices failed, ret = %d.\n", ret); + goto error; + } + init_flag = 1; + } + + + ret = aclrtSetDevice(device_idx); + if (ret != 0) { + av_log(device_ctx, AV_LOG_ERROR, "SetDevice failed, ret = %d.\n", ret); + goto error; + } + + aclrtContext context; + ret = aclrtCreateContext(&context, device_idx); + if(ret != 0) { + av_log(device_ctx, AV_LOG_ERROR, "CreateContext failed, ret = %d.\n", ret); + goto error; + } + + hwctx->ascend_ctx->context = context; + return 0; + error: + ascend_device_uninit(device_ctx); + return AVERROR_UNKNOWN; + +} + +static int ascend_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, + AVHWFramesConstraints *constraints) +{ + int i; + constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1, + sizeof(*constraints->valid_sw_formats)); + if (!constraints->valid_sw_formats) { + return AVERROR_EXTERNAL; + } + + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { + constraints->valid_sw_formats[i] = supported_formats[i]; + } + + constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE; + + constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats)); + if (!constraints->valid_sw_formats) { + return AVERROR_EXTERNAL; + } + + constraints->valid_hw_formats[0] = AV_PIX_FMT_ASCEND; + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; + + return 0; + +} + +static void ascend_buffer_free(void * opaque, uint8_t* data) +{ + AVHWFramesContext *ctx = opaque; + if (data) { + aclError ret = hi_mpi_dvpp_free(data); + data = NULL; + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "HiMpi free faile: dev addr %p \n", data); + } + } +} + +static AVBufferRef *ascend_pool_alloc(void *opaque, int size) +{ + AVHWFramesContext *ctx = opaque; + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVASCENDDeviceContext *hwctx = device_ctx->hwctx; + AscendContext *ascend_ctx = hwctx->ascend_ctx; + + AVBufferRef *buffer = NULL; + void *data = NULL; + + aclError ret = hi_mpi_dvpp_malloc(ascend_ctx->device_id, (void **)(&data), size); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "HiMpi Malloc failed: dev addr %p, size %d.\n", data, size); + return NULL; + } + buffer = av_buffer_create((uint8_t*)data, size, ascend_buffer_free, ctx, 0); + if (!buffer) { + ret = hi_mpi_dvpp_free(data); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "HiMpi Free failed with no buffer: dev addr %p.\n", data); + } + } + return buffer; +} + +static int ascend_frames_init(AVHWFramesContext *ctx) +{ + ASCENDFramesContext * priv = ctx->internal->priv; + int i; + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { + if (av_get_pix_fmt_name(ctx->sw_format) == + av_get_pix_fmt_name(supported_formats[i])) { + break; + } + } + + if (i == FF_ARRAY_ELEMS(supported_formats)) { + av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported.\n", + av_get_pix_fmt_name(ctx->sw_format)); + return AVERROR_EXTERNAL; + } + + av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->width, &priv->height); + + if (!ctx->pool) { + int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, + ctx->height, ASCEND_FRAME_ALIGNMENT); + if (size < 0) + return size; + + ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, ascend_pool_alloc, NULL); + if (!ctx->internal->pool_internal) { + av_log(ctx, AV_LOG_DEBUG, "internal pool init failed.\n"); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +static int ascend_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) +{ + frame->buf[0] = av_buffer_pool_get(ctx->pool); + if (!frame->buf[0]) + return AVERROR_EXTERNAL; + + int ret = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data, + ctx->sw_format, ctx->width, ctx->height, ASCEND_FRAME_ALIGNMENT); + if (ret < 0) + return ret; + + frame->format = AV_PIX_FMT_ASCEND; + frame->width = ctx->width; + frame->height = ctx->height; + + return 0; +} + +static int ascend_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +{ + enum AVPixelFormat *fmts; + + fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR_EXTERNAL; + + fmts[0] = ctx->sw_format; + fmts[1] = AV_PIX_FMT_NONE; + + *formats = fmts; + return 0; +} + +static int ascend_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) +{ + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVASCENDDeviceContext *hwctx = ctx->hwctx; + AscendContext *ascend_ctx = hwctx->ascend_ctx; + + int i; + size_t dstBytes; + size_t srcBytes; + aclError ret; + for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { + dstBytes = src->width * src->height * (i ? 1.0 / 2 : 1); + srcBytes = src->width * src->height * (i ? 1.0 / 2 : 1); + ret = aclrtMemcpy(dst->data[i], dstBytes, src->data[i], srcBytes, ACL_MEMCPY_HOST_TO_DEVICE); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "Mem copy h2d: host %p wigh %lu -> dev %p with %lu.\n", + src->data[i], srcBytes, dst->data[i], dstBytes); + av_log(ctx, AV_LOG_ERROR, "ascendMemcoy H2D error occur, func: %s, line %d.\n", + __func__, __LINE__); + return -1; + } + } + + return 0; +} + +static int ascend_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) +{ + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVASCENDDeviceContext *hwctx = ctx->hwctx; + AscendContext *ascend_ctx = hwctx->ascend_ctx; + + int i; + size_t dstBytes; + size_t srcBytes; + aclError ret; + for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { + dstBytes = src->width * src->height * (i ? 1.0 / 2 : 1); + srcBytes = src->width * src->height * (i ? 1.0 / 2 : 1); + ret = aclrtMemcpy(dst->data[i], dstBytes, src->data[i], srcBytes, ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != 0) { + av_log(ctx, AV_LOG_ERROR, "Mem copy d2h: dev %p wigh %lu -> host %p with %lu.\n", + src->data[i], srcBytes, dst->data[i], dstBytes); + av_log(ctx, AV_LOG_ERROR, "ascendMemcoy D2H error occur, func: %s, line %d.\n", + __func__, __LINE__); + return -1; + } + } + + return 0; +} + +const HWContextType ff_hwcontext_type_ascend = { + .type = AV_HWDEVICE_TYPE_ASCEND, + .name = "ASCEND", + + .device_hwctx_size = sizeof(AVASCENDDeviceContext), + .frames_priv_size = sizeof(ASCENDFramesContext), + + .device_create = ascend_device_create, + .device_init = ascend_device_init, + .device_uninit = ascend_device_uninit, + .frames_get_constraints = ascend_frames_get_constraints, + .frames_init = ascend_frames_init, + .frames_get_buffer = ascend_get_buffer, + .transfer_get_formats = ascend_transfer_get_formats, + .transfer_data_to = ascend_transfer_data_to, + .transfer_data_from = ascend_transfer_data_from, + + .pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_ASCEND, AV_PIX_FMT_NONE}, }; \ No newline at end of file -- Gitee From ff955254b7b16f0b9ada6d0e9b95d0d9b0543c6c Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 16:57:58 +0800 Subject: [PATCH 6/8] =?UTF-8?q?ascendFFmpeg=E4=B8=8Evision=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E5=88=9D=E5=A7=8B=E5=8C=96=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/TextSnake/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/TextSnake/README.md b/contrib/TextSnake/README.md index 91fe88900..a4168336f 100644 --- a/contrib/TextSnake/README.md +++ b/contrib/TextSnake/README.md @@ -176,7 +176,5 @@ python3 evaluate.py ## 常见问题 本案例中的TextSnake模型适用于图像中弯曲形状文字的检测。 - 本模型在以下几种情况下检测弯曲形状文字的效果良好:含有目标数量少、目标面积占比图像较大、各目标边界清晰。 - 在以下情况检测弯曲形状文字效果不太好:图片中的弯曲形状文字数目较多且大小较小,此时会出现缺漏的情况。 \ No newline at end of file -- Gitee From 68e582ae2a24e81c417660fbcb7ad235cb639615 Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 09:08:49 +0000 Subject: [PATCH 7/8] =?UTF-8?q?=E9=87=8D=E5=91=BD=E5=90=8D=20contrib/FireD?= =?UTF-8?q?etection/c++/aipp=5Fyolov5.cfg=20=20=E4=B8=BA=20contrib/FireDet?= =?UTF-8?q?ection/c++/aipp=5Fyolov5.cfg?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/FireDetection/c++/{aipp_yolov5.cfg => aipp_yolov5.cfg} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename contrib/FireDetection/c++/{aipp_yolov5.cfg => aipp_yolov5.cfg} (100%) diff --git a/contrib/FireDetection/c++/aipp_yolov5.cfg b/contrib/FireDetection/c++/aipp_yolov5.cfg similarity index 100% rename from contrib/FireDetection/c++/aipp_yolov5.cfg rename to contrib/FireDetection/c++/aipp_yolov5.cfg -- Gitee From 9b914d61cc28c992ae910b4d9ddf59e40a25483d Mon Sep 17 00:00:00 2001 From: Ssayhi_w Date: Sat, 24 Aug 2024 17:14:56 +0800 Subject: [PATCH 8/8] =?UTF-8?q?=E5=8F=82=E8=80=83=E8=AE=BE=E8=AE=A1?= =?UTF-8?q?=E9=80=82=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contrib/TextSnake/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/TextSnake/README.md b/contrib/TextSnake/README.md index a4168336f..8fadd087c 100644 --- a/contrib/TextSnake/README.md +++ b/contrib/TextSnake/README.md @@ -19,7 +19,7 @@ TextSnake 弯曲形状文字检测基于 MindX SDK 开发,对图片中的任 ### 1.2 支持的产品 -本项目以昇腾Atlas 500 A2为主要的硬件平台。 +本项目以昇腾Atlas 500 A2为主要的硬件平台 ### 1.3 支持的版本 -- Gitee