diff --git a/.gitignore b/.gitignore index e94becb2a485ef24b1db05db2c82080803e34112..4ba26778e1c9c1981ccb5631dada0d05965f533b 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ LZ-*.py # Picture +*.png *.jpg # Zip diff --git a/README.md b/README.md index 12382f995e82bf5794c836e2e86a65713f65e286..e60d2692b2f204cd81657e52904024d5928278ac 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ Lockzhiner Vision Module 拥有丰富的 IO 接口,其接口图片如下图所 * [凌智视觉模块手写数字分类部署指南](./example/special/digit_handwritten_recognition) * [凌智视觉模块猫狗分类部署指南](example/special/cat_and_dog_classification) * [凌智视觉模块花卉分类部署指南](example/special/flower_classfication/) - +* [凌智视觉模块口罩佩戴分类模型部署指南](example/special/maskwear_classfication) ### 👍 目标检测案例 目标检测(Object Detection)是深度学习中计算机视觉领域的重要任务之一,旨在识别图像或视频中所有感兴趣的物体,并准确地定位这些物体的边界框(Bounding Box)。与目标分类不同,目标检测不仅需要预测物体的类别,还需要标注它们在图像中的位置。一般来说,目标检测任务的标注过程比较复杂,适合既需要对目标进行分类,有需要对目标进行定位的场景。 diff --git a/configs/LZ-RetinaFace.yaml b/configs/LZ-RetinaFace.yaml index e8b9a23754229e5806849e481fedf5742fb13fc6..cda3736a68cdd5dbc2931796ea5a2ca50646dee7 100644 --- a/configs/LZ-RetinaFace.yaml +++ b/configs/LZ-RetinaFace.yaml @@ -9,8 +9,8 @@ load_onnx: - 320 outputs: - output0 - - classifications - - 571 + - /Concat_1_output_0 + - 592 config: # RGB @@ -23,6 +23,9 @@ config: - 1 - 1 type: "int8" + quantized_algorithm: normal + quantized_dtype: w16a16i + optimization_level: 0 build: do_quantization: True diff --git a/docs/introductory_tutorial/python_development_environment.md b/docs/introductory_tutorial/python_development_environment.md index 89a1cfef452cd283c1e3649205d6219bdd0bccf5..5aaa28c1c1b29a394027afb5b3daf54df370624c 100644 --- a/docs/introductory_tutorial/python_development_environment.md +++ b/docs/introductory_tutorial/python_development_environment.md @@ -33,7 +33,7 @@ Lockzhiner Vision Module 的 Python 开发不需要像 C++ 一样的交叉编译 ## 2 下载/更新 LockzhinerVisionModule SDK -点击 [Lockzhiner Vision Module SDK 下载链接](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.3/lockzhiner_vision_module_sdk.zip) 下载 Lockzhiner Vision Module SDK。解压到本地后,请使用解压软件解压 SDK,一般我们推荐使用 Bandzip。 +点击 [Lockzhiner Vision Module SDK 下载链接](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.4/lockzhiner_vision_module_sdk.zip) 下载 Lockzhiner Vision Module SDK。解压到本地后,请使用解压软件解压 SDK,一般我们推荐使用 Bandzip。 ![](images/development_environment/python_update_lockzhiner_vision_module_0.png) diff --git a/docs/models/environment.md b/docs/models/environment.md new file mode 100644 index 0000000000000000000000000000000000000000..6e2aaef846a9ac6c78c8beebff736b8d02d0fc60 --- /dev/null +++ b/docs/models/environment.md @@ -0,0 +1,8 @@ +```bash +conda create -n p2o python=3.10 +conda activate p2o + +pip install rknn-toolkit2==2.2.0 +pip install paddlepaddle==2.6.0 +pip install paddle2onnx +``` \ No newline at end of file diff --git a/docs/models/retinaface.md b/docs/models/retinaface.md index 89f77f5e9f30cc3e1582863bb0d5888e9409f46d..6e95e8491a3c1648eba353a106fe2bd0997a7860 100644 --- a/docs/models/retinaface.md +++ b/docs/models/retinaface.md @@ -1,11 +1,9 @@ ```bash cd /path/to/LockzhinerVisionModule -rm RetinaFace_mobile320.onnx -wget https://ftrg.zbox.filez.com/v2/delivery/data/95f00b0fc900458ba134f8b180b3f7a1/examples/RetinaFace/RetinaFace_mobile320.onnx - -rm LZ-RetinaFace.onnx -mv RetinaFace_mobile320.onnx LZ-RetinaFace.onnx +rm LZ-Retainface* +wget https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.3/LZ-Retainface-2024-1121-1436.onnx +mv LZ-Retainface-2024-1121-1436.onnx LZ-RetinaFace.onnx rm test.jpg # 下载不了去 https://github.com/airockchip/rknn_model_zoo/blob/main/examples/RetinaFace/model/test.jpg 下载 diff --git "a/docs/models/\344\272\272\350\204\270\346\243\200\346\265\213\345\244\215\347\216\260\346\265\201\347\250\213.md" "b/docs/models/\344\272\272\350\204\270\346\243\200\346\265\213\345\244\215\347\216\260\346\265\201\347\250\213.md" new file mode 100644 index 0000000000000000000000000000000000000000..b5f7b432f02f9ff694a0a01ccdc9e5d7d8fccaff --- /dev/null +++ "b/docs/models/\344\272\272\350\204\270\346\243\200\346\265\213\345\244\215\347\216\260\346\265\201\347\250\213.md" @@ -0,0 +1,237 @@ +# 1 安装系统环境(ubuntu) +## 1.1 安装显卡驱动 + +- 打开software & updates ->选择**Additional Drivers** 安装一个驱动 +- 使用 nvidia-smi 查看显卡是否正常 +## 1.2 安装CUDA +1.2.1 下载CUDA + - i https://developer.nvidia.com/cuda-toolkit-archive + - 选择对应版本的CUDA + - 选择 -> Linux x86_64 -> Ubuntu-> version -> Runfile (local) +**下面这个是根据版本选择得到的** +wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run +sudo sh cuda_11.8.0_520.61.05_linux.run +等待一段时间,会弹出一个窗口,选择continue 回车 输入 **accept**->进入CUDA Installer界面 选择Driver 点击**空格**取消选择Driver +--> Install +安装完成后,需要进行环境变量配置 +输入 vim ~/.bashrc +添加以下内容: + - export PATH=/usr/local/cuda-11.8/bin${PATH:+:${PATH}} + - export LD_LIBRARY_PATH=/usr/local/cuda-11.8/lib64\${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + + - 输入**nvcc -V** 查看是否安装成功 + +## 1.3 安装cudnn +1.3.1 下载cudnn +点击下面的链接,进行对应版本的选择 + - https://developer.nvidia.com/rdp/cudnn-archive + - sudo apt-get install + +## 1.4 安装 Nvidia-docker +进入官网 +https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html +选择 install with apt + - Configure the production repository: + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + - Update the packages list from the repository: + sudo apt-get update + - Install the NVIDIA Container Toolkit packages: + sudo apt-get install -y nvidia-container-toolkit + +# 2 配置训练环境 +## 2.1 拉取docker镜像 +- modelscope github 仓库地址 +- https://github.com/modelscope/modelscope + +GPU镜像 +# py37 +- registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-1.6.1 + +# py38 +- registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.8.0-py38-torch2.0.1-tf2.13.0-1.9.5 + +我拉取的是 py38 版本 +- docker pull registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.8.0-py38-torch2.0.1-tf2.13.0-1.9.5 +查看镜像 +- docker images +启动镜像 -v /home/ubuntu/:/home/ubuntu/ 本地文件映射到容器内 +- docker run -it --gpus all -v /home/ubuntu/:/home/ubuntu/ registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.8.0-py38-torch2.0.1-tf2.13.0-1.9.5 -n /bin/bash + +## 2.2 拉取训练模型 +- git clone https://github.com/biubug6/Pytorch_Retinaface.git + + +# 3 数据集制作 +## 3.1 数据集标注 +- 数据集标注使用LabelMe进行数据标注 +文件夹样式 +- Dataset + - annotations + - images + - flags.txt +- 目标数据格式 +```markdown + ./data/widerface/ + train/ + images/ + label.txt + val/ + images/ + wider_val.txt +``` +- 标注内容 + - 人脸边界框:上额头到下额头,左右脸边缘 + - 左眼、右眼、鼻子、左嘴角、右嘴角 +- 标签文件转换 +```python +import os +import json +import random # 导入random模块 + +def convert_json_to_format(json_data, folder_name): + # 提取图片路径 + image_path = json_data['imagePath'] + + # 获取图像尺寸 + image_width = json_data['imageWidth'] + image_height = json_data['imageHeight'] + + # 初始化输出字符串 + output = "" + + # 初始化各部位坐标值为 -1 + face_box_x, face_box_y, face_box_w, face_box_h = -1, -1, -1, -1 + left_eye_x, left_eye_y = -1, -1 + right_eye_x, right_eye_y = -1, -1 + nose_x, nose_y = -1, -1 + left_mouse_x, left_mouse_y = -1, -1 + right_mouse_x, right_mouse_y = -1, -1 + + # 处理形状数据 + shapes = json_data['shapes'] + + for shape in shapes: + label = shape['label'] + points = shape['points'] + + if label == 'face' and len(points) == 2: + # 转换矩形框坐标 + x1, y1 = points[0] + x2, y2 = points[1] + face_box_x = min(x1, x2) + face_box_y = min(y1, y2) + face_box_w = abs(x2 - x1) + face_box_h = abs(y2 - y1) + elif label == 'left_eye': + # 添加左眼位置信息 + left_eye_x, left_eye_y = points[0] + elif label == 'right_eye': + # 添加右眼位置信息 + right_eye_x, right_eye_y = points[0] + elif label == 'nose': + # 添加鼻子位置信息 + nose_x, nose_y = points[0] + elif label == 'left_mouse': + # 添加左嘴角位置信息 + left_mouse_x, left_mouse_y = points[0] + elif label == 'right_mouse': + # 添加右嘴角位置信息 + right_mouse_x, right_mouse_y = points[0] + + # 构建输出字符串 + output = f"{int(face_box_x)} {int(face_box_y)} {int(face_box_w)} {int(face_box_h)} " + output += f"{float(left_eye_x):.3f} {float(left_eye_y):.3f} 0.0 " + output += f"{float(right_eye_x):.3f} {float(right_eye_y):.3f} 0.0 " + output += f"{float(nose_x):.3f} {float(nose_y):.3f} 0.0 " + output += f"{float(left_mouse_x):.3f} {float(left_mouse_y):.3f} 0.0 " + output += f"{float(right_mouse_x):.3f} {float(right_mouse_y):.3f} 0.0 " + + # 添加随机置信度值,范围在0.8到0.99之间 + confidence = random.uniform(0.8, 0.99) + output += f"{confidence:.2f}" + + # 返回结果 + return f"# {folder_name}/{os.path.basename(image_path)}\n{output}\n" + +def process_folder(folder_path, output_file): + # 打开输出文件 + with open(output_file, 'w', encoding='utf-8') as out_file: + # 遍历文件夹中的所有文件 + for filename in os.listdir(folder_path): + if filename.endswith('.json'): + file_path = os.path.join(folder_path, filename) + with open(file_path, 'r', encoding='utf-8') as file: + json_data = json.load(file) + # 获取文件夹名 + folder_name = os.path.basename(os.path.dirname(file_path)) + result = convert_json_to_format(json_data, folder_name) + out_file.write(result) + +# 指定文件夹路径和输出文件路径 +folder_path = r'C:\Users\zhb20\Desktop\FaceData\train\images\face' +output_file = r'C:\Users\zhb20\Desktop\FaceData\train\label.txt' + +# 调用函数 +process_folder(folder_path, output_file) +``` +验证集标签文件制作 +```python +import os + +def save_folder_and_image_names(folder_path, output_file): + """ + 遍历指定文件夹中的所有图片,将文件夹名和图片名组合后保存到输出文件中。 + + :param folder_path: 包含图片的文件夹路径 + :param output_file: 输出文件的路径 + """ + # 检查文件夹是否存在 + if not os.path.isdir(folder_path): + print(f"错误:{folder_path} 不是一个有效的文件夹路径") + return + + # 图片文件可能的扩展名列表 + image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'] + + with open(output_file, 'w', encoding='utf-8') as file: + for root, dirs, files in os.walk(folder_path): + for name in files: + # 获取文件扩展名 + ext = os.path.splitext(name)[1].lower() + if ext in image_extensions: + # 组合文件夹名和图片名 + folder_name = os.path.basename(root) + full_name = f"{folder_name}/{name}" + file.write(full_name + '\n') + +if __name__ == "__main__": + # 修改这里的文件夹路径和输出文件路径 + folder_path = r'C:\Users\Administrator\Desktop\facedata\val\images\face' + output_file = r'C:\Users\Administrator\Desktop\facedata\val\wider_val.txt' + + save_folder_and_image_names(folder_path, output_file) + print(f"图片信息已保存至 {output_file}") +``` +# 4 windows 环境搭建 +## 4.1 conda安装 +进入网址 +- https://anaconda.org/anaconda/conda + 下载conda 默认步骤安装即可 +- conda create -n 环境名 python==版本号 -y +- conda activate 环境名 +- 配置国内镜像源地址 +- pip install -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple some-package +- 安装 pytorch 2.0.1 +- [pytorch下载网址](https://pytorch.org/get-started/locally/) +- pillow==8.2.0 +- opencv-python==4.5.2.54 +- numpy==1.22.0 +- matplotlib==3.5.1 +- tensorboard==2.5.0 +-大概就可以使用了,如果出现版本不兼容,灵活调整 +**注** +- 如果要使用GPU训练,请先安装环境 +- 显卡驱动、CUDA、CUDNN \ No newline at end of file diff --git a/example/special/maskwear_classfication/README.md b/example/special/maskwear_classfication/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c27f6cde136932accf1fe819ef330bcdb92913aa --- /dev/null +++ b/example/special/maskwear_classfication/README.md @@ -0,0 +1,51 @@ +

凌智视觉模块口罩佩戴分类部署指南

+ +发布版本:V0.0.0 + +日期:2024-11-20 + +文件密级:□绝密 □秘密 □内部资料 ■公开 + +--- + +**免责声明** + +本文档按**现状**提供,福州凌睿智捷电子有限公司(以下简称**本公司**)不对本文档中的任何陈述、信息和内容的准确性、可靠性、完整性、适销性、适用性及非侵权性提供任何明示或暗示的声明或保证。本文档仅作为使用指导的参考。 + +由于产品版本升级或其他原因,本文档可能在未经任何通知的情况下不定期更新或修改。 + +**读者对象** + +本教程适用于以下工程师: + +- 技术支持工程师 +- 软件开发工程师 + +**修订记录** + +| **日期** | **版本** | **作者** | **修改说明** | +|:-----------| -------- | -------- | ------------ | +| 2024/11/20 | 0.0.0 | 钟海滨 | 初始版本 | + +## 1 简介 + +口罩佩戴分类有助于识别和管理公共卫生风险,我们基于 [凌智视觉模块分类模型部署指南](../../vision/classification) 训练了凌智视觉模块专用的模型,该模型能够实现未佩戴口罩、佩戴口罩、口罩佩戴不正确的识别。 + + +## 2 运行前的准备 + +- 请确保你已经下载了 [凌智视觉模块口罩佩戴分类模型](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.2/LZ-Maskwear-Classification.rknn) + +## 3 在凌智视觉模块上部署口罩佩戴分类识别案例 + +下载模型后,请参考以下教程使用 Python 在凌智视觉模块上部署分类模型例程: + +- [凌智视觉模块口罩佩戴分类 Python 部署指南](./python/README.md) + +## 4 模型性能指标 + +以下测试数据为模型执行 Predict 函数运行 1000 次耗时的平均时间 + +| 分类模型 | FPS(帧/s) | 精度(%) | +|:------------------------:|:----:|:----:| +|LZ-Maskwear-Classification|35|| diff --git a/example/special/maskwear_classfication/python/README.md b/example/special/maskwear_classfication/python/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dd3a49be88ff9261f6f0087d90ceb15ceb945d98 --- /dev/null +++ b/example/special/maskwear_classfication/python/README.md @@ -0,0 +1,121 @@ +

凌智视觉模块口罩佩戴分类 Python 部署指南

+ +发布版本:V0.0.0 + +日期:2024-11-20 + +文件密级:□绝密 □秘密 □内部资料 ■公开 + +--- + +**免责声明** + +本文档按**现状**提供,福州凌睿智捷电子有限公司(以下简称**本公司**)不对本文档中的任何陈述、信息和内容的准确性、可靠性、完整性、适销性、适用性及非侵权性提供任何明示或暗示的声明或保证。本文档仅作为使用指导的参考。 + +由于产品版本升级或其他原因,本文档可能在未经任何通知的情况下不定期更新或修改。 + +**读者对象** + +本教程适用于以下工程师: + +- 技术支持工程师 +- 软件开发工程师 + +**修订记录** + +| **日期** | **版本** | **作者** | **修改说明** | +|:-----------| -------- | -------- | ------------ | +| 2024/11/20 | 0.0.0 | 钟海滨 | 初始版本 | + +## 1 简介 + +接下来让我们基于 Python 来部署口罩佩戴分类识别案例,在开始本章节前: + +- 请确保你已经参考 [凌智视觉模块口罩佩戴分类部署指南](../README.md) 正确下载了模型。 +- 请确保你已经参考 [凌智视觉模块摄像头部署指南](../../../periphery/capture/README.md) 正确下载了凌智视觉模块图片传输助手。 +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/python_development_environment.md) 正确配置了开发环境。 + +## 2 Python API 文档 + +同[分类模型 Python 部署 API 文档](../../../vision/classification/python/README.md) + +## 3 项目介绍 + +为了方便大家入手,我们做了一个简易的口罩佩戴分类识别例程。该程序可以使用摄像头进行端到端推理,并可视化推理结果到凌智视觉模块图片传输助手。 + +```python +from lockzhiner_vision_module.cv2 import VideoCapture +from lockzhiner_vision_module.vision import PaddleClas, visualize +from lockzhiner_vision_module.edit import Edit +import time +import sys + + +labels =['mask_weared_incorrect','without_mask','with_mask'] + +if __name__ == "__main__": + args = sys.argv + if len(args) != 2: + print("Need model path. Example: python test_mask_classification.py LZ-Maskwear-Classification.rknn") + exit(1) + edit = Edit() + edit.start_and_accept_connection() + + model = PaddleClas() + if model.initialize(args[1]) is False: + print("Failed to initialize PaddleClas") + exit(1) + + video_capture = VideoCapture() + if video_capture.open(0) is False: + print("Failed to open capture") + exit(1) + + while True: + read_index = 0 + total_time_ms = 0 + for i in range(30): + start_time = time.time() + ret, mat = video_capture.read() + if ret is False: + continue + + result = model.predict(mat) + + end_time = time.time() + total_time_ms += end_time - start_time + read_index += 1 + print(result.label_id, result.score) + vis_mat = visualize(mat, result, labels) + # vis_mat = visualize(mat,result) + edit.print(vis_mat) + print(f"FPS is {1.0 / (total_time_ms / read_index)}") + +``` + +## 4 上传并测试 Python 程序 + +参考 [连接设备指南](../../../../docs/introductory_tutorial/connect_device_using_ssh.md) 正确连接 Lockzhiner Vision Module 设备。 + +![](../../../../docs/introductory_tutorial/images/connect_device_using_ssh/ssh_success.png) + +请使用 Electerm Sftp 依次上传以下两个文件: +nn +- 进入存放 **test_mask_classification.py** 脚本文件的目录,将 **test_mask_classification.py** 上传到 Lockzhiner Vision Module +- 进入存放 **LZ-Maskwear-Classification.rknn** 模型存放的目录(模型存放在训练模型后下载的 output 文件夹内),将 **LZ-Maskwear-Classification.rknn** 上传到 Lockzhiner Vision Module + +![](images/img_1.png) + +请使用 Electerm Ssh 并在命令行中执行以下命令: + +```bash +python test_mask_classification.py LZ-Maskwear-Classification.rknn +``` + +运行程序后,使用凌智视觉模块图片传输助手连接设备,屏幕上开始打印标签索引和置信度,凌智视觉模块图片传输助手出现可视化的结果 +![](images/img_2.png) +![alt text](images/img.png) + + + + diff --git a/example/special/maskwear_classfication/python/images/img.png b/example/special/maskwear_classfication/python/images/img.png new file mode 100644 index 0000000000000000000000000000000000000000..6b5f8094855075d18758cc0ca04f3fe26a52d1eb Binary files /dev/null and b/example/special/maskwear_classfication/python/images/img.png differ diff --git a/example/special/maskwear_classfication/python/images/img_1.png b/example/special/maskwear_classfication/python/images/img_1.png new file mode 100644 index 0000000000000000000000000000000000000000..db5559f5528f5f9039c84067fdd6f588c4532ef3 Binary files /dev/null and b/example/special/maskwear_classfication/python/images/img_1.png differ diff --git a/example/special/maskwear_classfication/python/images/img_2.png b/example/special/maskwear_classfication/python/images/img_2.png new file mode 100644 index 0000000000000000000000000000000000000000..3e6e49c3d34c3da37239efd08742589b6e021286 Binary files /dev/null and b/example/special/maskwear_classfication/python/images/img_2.png differ diff --git a/example/special/maskwear_classfication/python/test_mask_classification.py b/example/special/maskwear_classfication/python/test_mask_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..be03f4b41275e38b01b163ffc1c6784d5c01748c --- /dev/null +++ b/example/special/maskwear_classfication/python/test_mask_classification.py @@ -0,0 +1,46 @@ +from lockzhiner_vision_module.cv2 import VideoCapture +from lockzhiner_vision_module.vision import PaddleClas, visualize +from lockzhiner_vision_module.edit import Edit +import time +import sys + + +labels =['mask_weared_incorrect','without_mask','with_mask'] + +if __name__ == "__main__": + args = sys.argv + if len(args) != 2: + print("Need model path. Example: python test_mask_classification.py LZ-Maskwear-Classification-2024-1120-1209") + exit(1) + edit = Edit() + edit.start_and_accept_connection() + + model = PaddleClas() + if model.initialize(args[1]) is False: + print("Failed to initialize PaddleClas") + exit(1) + + video_capture = VideoCapture() + if video_capture.open(0) is False: + print("Failed to open capture") + exit(1) + + while True: + read_index = 0 + total_time_ms = 0 + for i in range(30): + start_time = time.time() + ret, mat = video_capture.read() + if ret is False: + continue + + result = model.predict(mat) + + end_time = time.time() + total_time_ms += end_time - start_time + read_index += 1 + print(result.label_id, result.score) + vis_mat = visualize(mat, result, labels) + # vis_mat = visualize(mat,result) + edit.print(vis_mat) + print(f"FPS is {1.0 / (total_time_ms / read_index)}") diff --git a/example/vision/face_detection/README.md b/example/vision/face_detection/README.md index 784ae33bd4fb8bb94b1cf924d092ce644e8cc0c1..fe24056cd32d5aaf2f32810f5b0c3883d51ec552 100644 --- a/example/vision/face_detection/README.md +++ b/example/vision/face_detection/README.md @@ -2,7 +2,7 @@ 发布版本:V0.0.0 -日期:2024-11-04 +日期:2024-11-25 文件密级:□绝密 □秘密 □内部资料 ■公开 @@ -23,29 +23,30 @@ **修订记录** -| **日期** | **版本** | **作者** | **修改说明** | -| :--------- | -------- | -------- | ------------ | -| 2024/11/04 | 0.0.0 | 郑必城 | 初始版本 | +| **日期** | **版本** | **作者** | **修改说明** | +|:-----------| -------- |--------| ------------ | +| 2024/11/25 | 0.0.0 | 钟海滨 | 初始版本 | ## 1 简介 -在深度学习中,RetinaFace 是一种高效的人脸检测模型,旨在准确识别图像中的人脸及其关键点。该模型结合了特征金字塔网络(FPN)和单阶段检测方法,能够在不同尺度下进行人脸检测,从而提高了对小型和远距离人脸的检测能力。RetinaFace 不仅能够检测人脸位置,还能提供人脸的五个关键点(如眼睛、鼻子和嘴巴)的坐标,适用于多种人脸识别和分析应用。 +在深度学习中,人脸检测是一项重要的计算机视觉任务,旨在在图像或视频中精确定位和识别人脸的位置。这项技术广泛应用于安全监控、身份验证、社交媒体、智能设备等多种场景。 +为了实现高效且准确的人脸检测,我们基于 Lockzhiner Vision Module 的 LZ-Picodet 模型,在飞桨的 AI Studio 平台上训练了一个高性能的人脸检测模型。 ## 2 运行前的准备 -* 请确保你已经下载了 [凌智视觉模块人脸检测模型](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.0/LZ-RetinaFace.rknn) +* 请确保你已经下载了 [凌智视觉模块人脸检测模型](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.3/LZ-Face.rknn) ## 3 在凌智视觉模块上部署模型 训练完模型后,请参考以下教程在凌智视觉模块上部署检测模型例程: -* [凌智视觉模块人脸检测模型 Python 部署指南](./python) +* [凌智视觉模块人脸检测模型 Python 部署指南](./python/README.md) ## 4 各模型性能指标 以下测试数据为模型执行 Predict 函数运行 1000 次耗时的平均时间 | 人脸检测模型 | FPS(帧/s) | 精度(%) | -|:-------:|:----:|:----:| -|LZ-RetinaFace| 38 | - | \ No newline at end of file +|:------:|:--------:|:----:| +|LZ-Face | 25 | - | \ No newline at end of file diff --git a/example/vision/face_detection/README_BAK.md b/example/vision/face_detection/README_BAK.md new file mode 100644 index 0000000000000000000000000000000000000000..79fb8378bd94e7e20ecea3b6533a294b843911b4 --- /dev/null +++ b/example/vision/face_detection/README_BAK.md @@ -0,0 +1,51 @@ +

凌智视觉模块人脸检测模型部署指南

+ +发布版本:V0.0.0 + +日期:2024-11-04 + +文件密级:□绝密 □秘密 □内部资料 ■公开 + +--- + +**免责声明** + +本文档按**现状**提供,福州凌睿智捷电子有限公司(以下简称**本公司**)不对本文档中的任何陈述、信息和内容的准确性、可靠性、完整性、适销性、适用性及非侵权性提供任何明示或暗示的声明或保证。本文档仅作为使用指导的参考。 + +由于产品版本升级或其他原因,本文档可能在未经任何通知的情况下不定期更新或修改。 + +**读者对象** + +本教程适用于以下工程师: + +- 技术支持工程师 +- 软件开发工程师 + +**修订记录** + +| **日期** | **版本** | **作者** | **修改说明** | +| :--------- | -------- | -------- | ------------ | +| 2024/11/04 | 0.0.0 | 郑必城 | 初始版本 | + +## 1 简介 + +在深度学习中,RetinaFace 是一种高效的人脸检测模型,旨在准确识别图像中的人脸及其关键点。该模型结合了特征金字塔网络(FPN)和单阶段检测方法,能够在不同尺度下进行人脸检测,从而提高了对小型和远距离人脸的检测能力。RetinaFace 不仅能够检测人脸位置,还能提供人脸的五个关键点(如眼睛、鼻子和嘴巴)的坐标,适用于多种人脸识别和分析应用。 + +## 2 运行前的准备 + +* 请确保你已经下载了 [凌智视觉模块人脸检测模型](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.3/LZ-RetinaFace-2024-1121-1627.rknn) + +## 3 在凌智视觉模块上部署模型 + +训练完模型后,请参考以下教程在凌智视觉模块上部署检测模型例程: + + +* [凌智视觉模块人脸检测模型 Python 部署指南](./python/README_BAK.md) + +## 4 各模型性能指标 + +以下测试数据为模型执行 Predict 函数运行 1000 次耗时的平均时间 + +| 人脸检测模型 | FPS(帧/s) | 精度(%) | +|:-------:|:--------:|:----:| +|LZ-RetinaFace| 38 | - | \ No newline at end of file diff --git a/example/vision/face_detection/images/connect.png b/example/vision/face_detection/images/connect.png new file mode 100644 index 0000000000000000000000000000000000000000..4eb7fd6d04f4dc89efcfd36cdb8aa1014152df39 Binary files /dev/null and b/example/vision/face_detection/images/connect.png differ diff --git a/example/vision/face_detection/images/img-1.png b/example/vision/face_detection/images/img-1.png new file mode 100644 index 0000000000000000000000000000000000000000..f224ef199470251b43ef28220c2fd1a7a7448e04 Binary files /dev/null and b/example/vision/face_detection/images/img-1.png differ diff --git a/example/vision/face_detection/images/result1.png b/example/vision/face_detection/images/result1.png new file mode 100644 index 0000000000000000000000000000000000000000..d8f53c8ae3c58da8023125d9ebc3b571d9581a47 Binary files /dev/null and b/example/vision/face_detection/images/result1.png differ diff --git a/example/vision/face_detection/images/result2.png b/example/vision/face_detection/images/result2.png new file mode 100644 index 0000000000000000000000000000000000000000..e2b9953cde43bb9b98638f727f5b74226728b04d Binary files /dev/null and b/example/vision/face_detection/images/result2.png differ diff --git a/example/vision/face_detection/images/stfp.png b/example/vision/face_detection/images/stfp.png new file mode 100644 index 0000000000000000000000000000000000000000..5725826af28458ef278828d7e46fe9cc63894d18 Binary files /dev/null and b/example/vision/face_detection/images/stfp.png differ diff --git a/example/vision/face_detection/python/README.md b/example/vision/face_detection/python/README.md index 6495c8d14473e8db3ad10b9637a51eaccc372c66..a16416606f8776cd080b4ec43876b118c4fa6982 100644 --- a/example/vision/face_detection/python/README.md +++ b/example/vision/face_detection/python/README.md @@ -2,7 +2,7 @@ 发布版本:V0.0.0 -日期:2024-11-04 +日期:2024-11-25 文件密级:□绝密 □秘密 □内部资料 ■公开 @@ -23,15 +23,15 @@ **修订记录** -| **日期** | **版本** | **作者** | **修改说明** | -| :--------- | -------- | -------- | ------------ | -| 2024/11/04 | 0.0.0 | 郑必城 | 初始版本 | +| **日期** | **版本** | **作者** | **修改说明** | +|:-----------| -------- |--------| ------------ | +| 2024/11/25 | 0.0.0 | 钟海滨 | 初始版本 | ## 1 简介 -接下来让我们基于 Python 来部署 RetinaFace 人脸检测模型。在开始本章节前: +接下来让我们基于 Python 来部署 PaddleDetection 人脸检测模型。在开始本章节前: -- 请确保你已经参考 [凌智视觉模块人脸检测模型部署指南](../README.md) 正确下载了凌智视觉模块人脸检测模型。 +- 请确保你已经参考 [凌智视觉模块人脸检测模型部署指南](../README.md) 对模型进行了充分训练。 - 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/python_development_environment.md) 正确配置了开发环境。 ## 2 Python API 文档 @@ -84,86 +84,62 @@ class Rect: """ return self.rect.height -class Point: - def __init__(self): - self.point = cv2.Point() - - @property - def x(self): - """ - 获取坐标点的 x 坐标 - - Returns: - int: 坐标点的 x 坐标 - """ - return self.rect.x - - @property - def y(self): - """ - 获取坐标点的 y 坐标 - - Returns: - int: 坐标点的 y 坐标 - """ - return self.rect.y - -class FaceDetectionResult: +class DetectionResult: """ - 检测结果类,用于封装和处理人脸检测结果数据。 + 人脸检测结果类,用于封装和处理目标人脸检测结果数据。 - 该类主要提供了一个包装层,用于访问和管理由视觉模块产生的检测结果。 + 该类主要提供了一个包装层,用于访问和管理由视觉模块产生的人脸检测结果。 """ def __init__(self): - self.face_detection_result = vision.FaceDetectionResult() + self.detection_result = vision.DetectionResult() @property def box(self): """ - 获取人脸检测模型检测结果的矩形框信息 + 获取目标人脸检测模型人脸检测结果的矩形框信息 Returns: Rect: 矩形框信息 """ - return self.face_detection_result.box + return self.detection_result.box @property def score(self): """ - 获取人脸检测模型检测结果的得分信息 + 获取目标人脸检测模型人脸检测结果的得分信息 Returns: float: 得分信息 """ - return self.face_detection_result.score + return self.detection_result.score @property - def points(self): + def label_id(self): """ - 获取人脸检测模型检测结果的人脸关键点信息, 一般共 5 个关键点 + 获取目标人脸检测模型人脸检测结果的分类标签信息 Returns: - list(cv2.Points): 关键点列表 + int: 分类标签信息 """ - return self.face_detection_result.points + return self.detection_result.label_id -class RetinaFace: +class PaddleDet: """ - RetinaFace 类 - 用于人脸检测的 RetinaFace 模型封装。 + PaddleDet 类 - 用于目标人脸检测的 PaddlePaddle 模型封装。 - 该类封装了 RetinaFace 框架下的目标检测模型,提供了初始化和预测的方法。 + 该类封装了 PaddleDet 框架下的目标人脸检测模型,提供了初始化和预测的方法。 """ def __init__(self): """ 构造函数 - 初始化 PaddleDet 对象。 """ - self.model = vision.RetinaFace() + self.model = vision.PaddleDet() def initialize(self, model_path): """ - 初始化模型 - 加载预训练的 RetinaFace 模型。 + 初始化模型 - 加载预训练的 PaddlePaddle 模型。 Args: model_path (str): 模型文件的路径。 @@ -175,11 +151,11 @@ class RetinaFace: def set_threshold(self, score_threshold=0.5, nms_threshold=0.3): """ - 设置目标检测阈值 + 设置目标人脸检测阈值 Args: - score_threshold (float): 目标检测得分阈值,默认为 0.5 - nms_threshold (float): 目标检测 NMS 阈值,默认为 0.3 + score_threshold (float): 目标人脸检测得分阈值,默认为 0.5 + nms_threshold (float): 目标人脸检测 NMS 阈值,默认为 0.3 """ self.model.initialize(score_threshold, nms_threshold) @@ -192,18 +168,23 @@ class RetinaFace: input_mat (cv2.Mat): 输入的图像数据,通常是一个 cv2.Mat 变量。 Returns: - list(FaceDetectionResult): 预测结果对象列表,每一个预测结果包含了矩形框、人脸关键点和置信度等信息。 + list(DetectionResult): 预测结果对象列表,每一个预测结果包含了矩形框、标签信息和置信度等信息。 """ return self.model.predict(input_mat) + + +class Picodet(PaddleDet): + def __init__(self): + super().__init__() ``` ## 3 项目介绍 -为了方便大家入手,我们做了一个简易的人脸检测例程。该程序可以使用摄像头进行端到端推理。 +为了方便大家入手,我们做了一个简易的目标人脸检测例程。该程序可以使用摄像头进行端到端推理。 ```python from lockzhiner_vision_module.cv2 import VideoCapture -from lockzhiner_vision_module.vision import RetinaFace, visualize +from lockzhiner_vision_module.vision import PaddleDet, visualize from lockzhiner_vision_module.edit import Edit import time import sys @@ -211,15 +192,15 @@ import sys if __name__ == "__main__": args = sys.argv if len(args) != 2: - print("Need model path. Example: python test_retina_face.py LZ-RetinaFace.rknn") + print("Need model path. Example: python test_face_detection.py LZ-Face.rknn") exit(1) edit = Edit() edit.start_and_accept_connection() - model = RetinaFace() + model = PaddleDet() if model.initialize(args[1]) is False: - print("Failed to initialize RetinaFace") + print("Failed to initialize PaddleClas") exit(1) video_capture = VideoCapture() @@ -247,7 +228,7 @@ if __name__ == "__main__": score = result.score label_id = result.label_id print( - f"(x,y,w,h,score): [{box.x},{box.y},{box.width},{box.height},{score}]" + f"(x,y,w,h,score,label_id): [{box.x},{box.y},{box.width},{box.height},{score},{label_id}]" ) vis_mat = visualize(mat, results) edit.print(vis_mat) @@ -262,27 +243,24 @@ if __name__ == "__main__": 请使用 Electerm Sftp 依次上传以下两个文件: -- 进入存放 **test_retina_face.py** 脚本文件的目录,将 **test_retina_face.py** 上传到 Lockzhiner Vision Module -- 进入存放 **LZ-RetinaFace.rknn(也可能是其他模型)** 模型存放的目录(模型存放在训练模型后下载的 output 文件夹内),将 **LZ-RetinaFace.rknn** 上传到 Lockzhiner Vision Module - -![](../images/upload.png) - +- 进入存放 **test_face_detection.py** 脚本文件的目录,将 **test_face_detection.py** 上传到 Lockzhiner Vision Module +- 进入存放 **LZ-Face.rknn(也可能是其他模型)** 模型存放的目录(模型存放在训练模型后下载的 output 文件夹内),将 **LZ-Face.rknn** 上传到 Lockzhiner Vision Module +![](../images/stfp.png) 请使用 Electerm Ssh 并在命令行中执行以下命令: ```bash -python test_retina_face.py LZ-RetinaFace.rknn +python test_face_detection.py LZ-Face.rknn ``` -![](../images/start.png) +![](../images/connect.png) 连接凌智视觉模块图片传输助手[凌智视觉模块图片传输助手下载地址](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.0/LockzhinerVisionModuleImageFetcher.exe)后,选择连接设备 -![](../images/img.png) - -运行程序后,屏幕上开始打印矩形框信息和置信度,并在一段时间后输出 FPS 值 +![](../images/result1.png) +运行程序后,屏幕上开始打印矩形框信息,标签信息和置信度,并在一段时间后输出 FPS 值 -![alt text](../images/results.png) +![alt text](../images/result2.png) diff --git a/example/vision/face_detection/python/README_BAK.md b/example/vision/face_detection/python/README_BAK.md new file mode 100644 index 0000000000000000000000000000000000000000..3f526631fdae24acbb9041739bf74e893d2370fd --- /dev/null +++ b/example/vision/face_detection/python/README_BAK.md @@ -0,0 +1,292 @@ +

凌智视觉模块人脸检测模型 Python 部署指南

+ +发布版本:V0.0.0 + +日期:2024-11-04 + +文件密级:□绝密 □秘密 □内部资料 ■公开 + +--- + +**免责声明** + +本文档按**现状**提供,福州凌睿智捷电子有限公司(以下简称**本公司**)不对本文档中的任何陈述、信息和内容的准确性、可靠性、完整性、适销性、适用性及非侵权性提供任何明示或暗示的声明或保证。本文档仅作为使用指导的参考。 + +由于产品版本升级或其他原因,本文档可能在未经任何通知的情况下不定期更新或修改。 + +**读者对象** + +本教程适用于以下工程师: + +- 技术支持工程师 +- 软件开发工程师 + +**修订记录** + +| **日期** | **版本** | **作者** | **修改说明** | +| :--------- | -------- | -------- | ------------ | +| 2024/11/04 | 0.0.0 | 郑必城 | 初始版本 | + +## 1 简介 + +接下来让我们基于 Python 来部署 RetinaFace 人脸检测模型。在开始本章节前: + +- 请确保你已经参考 [凌智视觉模块人脸检测模型部署指南](../README_BAK.md) 正确下载了凌智视觉模块人脸检测模型。 +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/python_development_environment.md) 正确配置了开发环境。 + +## 2 Python API 文档 + +```python +class Rect: + def __init__(self): + self.rect = cv2.Rect() + + def empty(self): + return self.rect.empty() + + @property + def x(self): + """ + 获取矩形左上角坐标点的 x 坐标 + + Returns: + int: 获取矩形左上角坐标点的 x 坐标 + """ + return self.rect.x + + @property + def y(self): + """ + 获取矩形左上角坐标点的 y 坐标 + + Returns: + int: 获取矩形左上角坐标点的 y 坐标 + """ + return self.rect.y + + @property + def width(self): + """ + 获取矩形的宽 + + Returns: + int: 获取矩形的宽 + """ + return self.rect.width + + @property + def height(self): + """ + 获取矩形的高 + + Returns: + int: 获取矩形的高 + """ + return self.rect.height + +class Point: + def __init__(self): + self.point = cv2.Point() + + @property + def x(self): + """ + 获取坐标点的 x 坐标 + + Returns: + int: 坐标点的 x 坐标 + """ + return self.rect.x + + @property + def y(self): + """ + 获取坐标点的 y 坐标 + + Returns: + int: 坐标点的 y 坐标 + """ + return self.rect.y + +class FaceDetectionResult: + """ + 检测结果类,用于封装和处理人脸检测结果数据。 + + 该类主要提供了一个包装层,用于访问和管理由视觉模块产生的检测结果。 + """ + + def __init__(self): + self.face_detection_result = vision.FaceDetectionResult() + + @property + def box(self): + """ + 获取人脸检测模型检测结果的矩形框信息 + + Returns: + Rect: 矩形框信息 + """ + return self.face_detection_result.box + + @property + def score(self): + """ + 获取人脸检测模型检测结果的得分信息 + + Returns: + float: 得分信息 + """ + return self.face_detection_result.score + + @property + def points(self): + """ + 获取人脸检测模型检测结果的人脸关键点信息, 一般共 5 个关键点 + + Returns: + list(cv2.Points): 关键点列表 + """ + return self.face_detection_result.points + +class RetinaFace: + """ + RetinaFace 类 - 用于人脸检测的 RetinaFace 模型封装。 + + 该类封装了 RetinaFace 框架下的目标检测模型,提供了初始化和预测的方法。 + """ + + def __init__(self): + """ + 构造函数 - 初始化 PaddleDet 对象。 + """ + self.model = vision.RetinaFace() + + def initialize(self, model_path): + """ + 初始化模型 - 加载预训练的 RetinaFace 模型。 + + Args: + model_path (str): 模型文件的路径。 + + Returns: + bool: 初始化是否成功。 + """ + return self.model.initialize(model_path) + + def set_threshold(self, score_threshold=0.5, nms_threshold=0.3): + """ + 设置目标检测阈值 + + Args: + score_threshold (float): 目标检测得分阈值,默认为 0.5 + nms_threshold (float): 目标检测 NMS 阈值,默认为 0.3 + + """ + self.model.initialize(score_threshold, nms_threshold) + + def predict(self, input_mat): + """ + 进行预测 - 使用加载的模型对输入数据进行分类预测。 + + Args: + input_mat (cv2.Mat): 输入的图像数据,通常是一个 cv2.Mat 变量。 + + Returns: + list(FaceDetectionResult): 预测结果对象列表,每一个预测结果包含了矩形框、人脸关键点和置信度等信息。 + """ + return self.model.predict(input_mat) +``` + +## 3 项目介绍 + +为了方便大家入手,我们做了一个简易的人脸检测例程。该程序可以使用摄像头进行端到端推理。 + +```python +from lockzhiner_vision_module.cv2 import VideoCapture +from lockzhiner_vision_module.vision import RetinaFace, visualize +from lockzhiner_vision_module.edit import Edit +import time +import sys + +if __name__ == "__main__": + args = sys.argv + if len(args) != 2: + print("Need model path. Example: python test_retina_face.py LZ-RetinaFace.rknn") + exit(1) + + edit = Edit() + edit.start_and_accept_connection() + + model = RetinaFace() + if model.initialize(args[1]) is False: + print("Failed to initialize RetinaFace") + exit(1) + + video_capture = VideoCapture() + if video_capture.open(0) is False: + print("Failed to open capture") + exit(1) + + while True: + read_index = 0 + total_time_ms = 0 + for i in range(30): + ret, mat = video_capture.read() + if ret is False: + continue + + start_time = time.time() + results = model.predict(mat) + end_time = time.time() + total_time_ms += end_time - start_time + read_index += 1 + + print(f"result size is {len(results)}") + for result in results: + box = result.box + score = result.score + label_id = result.label_id + print( + f"(x,y,w,h,score): [{box.x},{box.y},{box.width},{box.height},{score}]" + ) + vis_mat = visualize(mat, results) + edit.print(vis_mat) + print(f"FPS is {1.0 / (total_time_ms/read_index)}") +``` + +## 4 上传并测试 Python 程序 + +参考 [连接设备指南](../../../../docs/introductory_tutorial/connect_device_using_ssh.md) 正确连接 Lockzhiner Vision Module 设备。 + +![](../../../../docs/introductory_tutorial/images/connect_device_using_ssh/ssh_success.png) + +请使用 Electerm Sftp 依次上传以下两个文件: + +- 进入存放 **test_retina_face.py** 脚本文件的目录,将 **test_retina_face.py** 上传到 Lockzhiner Vision Module +- 进入存放 **LZ-RetinaFace.rknn(也可能是其他模型)** 模型存放的目录(模型存放在训练模型后下载的 output 文件夹内),将 **LZ-RetinaFace.rknn** 上传到 Lockzhiner Vision Module + +![](../images/upload.png) + + + +请使用 Electerm Ssh 并在命令行中执行以下命令: + +```bash +python test_retina_face.py LZ-RetinaFace.rknn +``` +![](../images/start.png) + +连接凌智视觉模块图片传输助手[凌智视觉模块图片传输助手下载地址](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.0/LockzhinerVisionModuleImageFetcher.exe)后,选择连接设备 + +![](../images/img-1.png) + + + + +运行程序后,屏幕上开始打印矩形框信息和置信度,并在一段时间后输出 FPS 值 + +![alt text](../images/results.png) + + + + diff --git a/example/vision/face_detection/python/test_face_detection.py b/example/vision/face_detection/python/test_face_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..6c35fa9698263a67e3ea6ae9c0c24191aea4abe6 --- /dev/null +++ b/example/vision/face_detection/python/test_face_detection.py @@ -0,0 +1,50 @@ +from lockzhiner_vision_module.cv2 import VideoCapture +from lockzhiner_vision_module.vision import PaddleDet, visualize +from lockzhiner_vision_module.edit import Edit +import time +import sys + +if __name__ == "__main__": + args = sys.argv + if len(args) != 2: + print("Need model path. Example: python test_face_detection.py LZ-Face.rknn") + exit(1) + + edit = Edit() + edit.start_and_accept_connection() + + model = PaddleDet() + if model.initialize(args[1]) is False: + print("Failed to initialize PaddleDet") + exit(1) + + video_capture = VideoCapture() + if video_capture.open(0) is False: + print("Failed to open capture") + exit(1) + + while True: + read_index = 0 + total_time_ms = 0 + for i in range(30): + ret, mat = video_capture.read() + if ret is False: + continue + + start_time = time.time() + results = model.predict(mat) + end_time = time.time() + total_time_ms += end_time - start_time + read_index += 1 + + print(f"result size is {len(results)}") + for result in results: + box = result.box + score = result.score + label_id = result.label_id + print( + f"(x,y,w,h,score,label_id): [{box.x},{box.y},{box.width},{box.height},{score},{label_id}]" + ) + vis_mat = visualize(mat, results) + edit.print(vis_mat) + print(f"FPS is {1.0 / (total_time_ms/read_index)}") \ No newline at end of file diff --git a/example/vision/face_detection/python/test_retina_face_latest.py b/example/vision/face_detection/python/test_retina_face_latest.py new file mode 100644 index 0000000000000000000000000000000000000000..a8865d4766d4febd3eedcbad6dd15d467e830661 --- /dev/null +++ b/example/vision/face_detection/python/test_retina_face_latest.py @@ -0,0 +1,78 @@ +from lockzhiner_vision_module.cv2 import imread, imwrite, VideoCapture +from lockzhiner_vision_module.vision import RetinaFace, visualize +from lockzhiner_vision_module.edit import Edit +import time +import sys + + +def predict_video(face_det_model, width, height): + edit = Edit() + edit.start_and_accept_connection() + + video_capture = VideoCapture() + video_capture.set_width(width) + video_capture.set_height(height) + if video_capture.open(0) is False: + print("Failed to open capture") + exit(1) + + while True: + read_index = 0 + total_time_ms = 0 + for i in range(30): + ret, mat = video_capture.read() + if ret is False: + continue + + start_time = time.time() + results = face_det_model.predict(mat) + end_time = time.time() + total_time_ms += end_time - start_time + read_index += 1 + print(f"result size is {len(results)}") + for result in results: + box = result.box + score = result.score + print( + f"(x,y,w,h,score): [{box.x},{box.y},{box.width},{box.height},{score}]" + ) + vis_mat = visualize(mat, results) + edit.print(vis_mat) + print(f"FPS is {1.0 / (total_time_ms/read_index)}") + + +def predict_image(face_det_model, image_path): + image = imread(image_path) + results = face_det_model.predict(image) + vis_mat = visualize(image, results) + print(f"result size is {len(results)}") + for result in results: + box = result.box + score = result.score + print( + f"(x,y,w,h,score): [{box.x},{box.y},{box.width},{box.height},{score}]" + ) + imwrite("face_det.png", vis_mat) + + +if __name__ == "__main__": + args = sys.argv + if len(args) < 2: + print("Need model path. Example: python test_retina_face.py LZ-RetinaFace.rknn width height") + exit(1) + + model = RetinaFace() + if model.initialize(args[1]) is False: + print("Failed to initialize RetinaFace") + exit(1) + + video_width = 640 + video_height = 480 + if len(args) == 5: + video_width = int(args[3]) + video_height = int(args[4]) + + if len(args) == 2: + predict_video(model, video_width, video_height) + elif len(args) == 3: + predict_image(model, args[2]) \ No newline at end of file diff --git a/utils/create_classification_dataset.py b/utils/create_classification_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..fd1c3e4ebb4de6bf0fad608ea7b89337f7925103 --- /dev/null +++ b/utils/create_classification_dataset.py @@ -0,0 +1,141 @@ +import os +import json +from PIL import Image +import shutil +from tqdm import tqdm +import matplotlib.pyplot as plt + + +# 指定图片文件夹路径 +image_root_folder = r"E:\face_mask" + +# 指定输出标签文件夹路径 +output_folder = "./Dataset/annotations" + +# 指定的输出图片保存路径 +images_folder = "./Dataset/images" + +# 标签文本保存 +flags_txt = "./Dataset/flags.txt" + +# 确保输出文件夹存在 +os.makedirs(output_folder, exist_ok=True) +os.makedirs(images_folder, exist_ok=True) + +# 动态生成文件夹与标志位的映射关系 +folder_to_flag = {} +# 存储标签名 +flag_names = [] + + +for folder_name in os.listdir(image_root_folder): + folder_path = os.path.join(image_root_folder, folder_name) + if os.path.isdir(folder_path): + folder_to_flag[folder_name] = folder_name + flag_names.append(folder_name) +with open(flags_txt, "w", encoding="utf-8") as f: + for flag_name in flag_names: + f.write(flag_name + "\n") + f.close() + print("标签文件创建成功") +# 动态生成 flags 字典 +flags = {key: False for key in folder_to_flag.values()} + +# 维护每个文件夹的计数器 +folder_counters = {folder_name: 0 for folder_name in folder_to_flag.keys()} + +# 统计每个类别的图片数量 +category_counts = {folder_name: 0 for folder_name in folder_to_flag.keys()} + +# 遍历图片根文件夹中的所有子文件夹 +for folder_name in os.listdir(image_root_folder): + folder_path = os.path.join(image_root_folder, folder_name) + + # 确保是一个文件夹 + if os.path.isdir(folder_path): + # 获取文件夹中的所有文件,并计算总数 + files = [ + f + for f in os.listdir(folder_path) + if f.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")) + ] + total_files = len(files) + + # 使用 tqdm 包装迭代器,显示进度条 + for filename in tqdm( + files, desc=f"Processing {folder_name}", total=total_files + ): + # 构建完整的文件路径 + image_path = os.path.join(folder_path, filename) + + # 获取当前文件夹的计数器值 + counter = folder_counters[folder_name] + + # 构建新的文件名 + new_filename = f"{folder_name}_{counter:04d}.jpg" + new_image_path = os.path.join(images_folder, new_filename) + + # 存储文件名构造 + save_path_name = os.path.join("..\\images\\", new_filename) + + # 拷贝图片到 images 文件夹 + shutil.copy(image_path, new_image_path) + + # 打开图片并获取高度和宽度 + with Image.open(image_path) as img: + width, height = img.size + + # 初始化 flags + current_flags = flags.copy() + + # 设置对应的标志位为 True + flag_key = folder_to_flag.get(folder_name, None) + if flag_key is not None: + current_flags[flag_key] = True + + # 创建标签文件的数据结构 + label_data = { + "version": "5.5.0", + "flags": current_flags, + "shapes": [], + "imagePath": save_path_name, + "imageData": None, + "imageHeight": height, + "imageWidth": width, + } + + # 构建标签文件的路径 + label_filename = os.path.splitext(new_filename)[0] + ".json" + label_path = os.path.join(output_folder, label_filename) + + # 将标签数据写入文件 + with open(label_path, "w") as f: + json.dump(label_data, f, indent=4) + + # 增加计数器 + folder_counters[folder_name] += 1 + + # 更新类别计数 + category_counts[folder_name] += 1 + +# print('Label files creation complete.') + +# 输出每个类别的图片数量统计 +# for category, count in category_counts.items(): +# print(f'Category {category} has {count} images.') + +# 绘制柱状图 +categories = list(category_counts.keys()) +counts = list(category_counts.values()) + +plt.figure(figsize=(10, 6)) +plt.bar(categories, counts, color="skyblue") +plt.xlabel("Categories") +plt.ylabel("Number of Images") +plt.title("Image Count by Category") +plt.xticks(rotation=45) +plt.tight_layout() + +# 保存柱状图 +plt.savefig("category_counts.png") +# plt.show() diff --git a/utils/export.py b/utils/export.py index c990f0f9b169247f5ea5360a323e8ae676eaeb69..c29215fa5f682d42c64030f532f513e5ee378f62 100644 --- a/utils/export.py +++ b/utils/export.py @@ -9,14 +9,20 @@ import onnx def get_config(): parser = argparse.ArgumentParser() parser.add_argument("--verbose", default="Debug", help="rknntoolkit verbose") - parser.add_argument("--config_path", required=True, help="The path of model config file") - parser.add_argument("--model_load_path", required=True, help="The path of onnx model file") + parser.add_argument( + "--config_path", required=True, help="The path of model config file" + ) + parser.add_argument( + "--model_load_path", required=True, help="The path of onnx model file" + ) parser.add_argument("--target_platform", required=False, help="The target platform") - parser.add_argument("--model_save_path", required=False, help="The path of rknn model save" + parser.add_argument( + "--model_save_path", required=False, help="The path of rknn model save" ) args = parser.parse_args() return args + if __name__ == "__main__": config = get_config() with open(config.config_path) as file: @@ -65,9 +71,32 @@ if __name__ == "__main__": std_values = yaml_config["config"]["std"] else: assert False, f"The type({mean_std_type}) is error, need fp32/int8." + print(f"mean_values is {mean_values}, std_values is {std_values}") + + quantized_dtype = "w8a8" + if "quantized_dtype" in yaml_config["config"]: + quantized_dtype = yaml_config["config"]["quantized_dtype"] + print(f"quantized_dtype is {quantized_dtype}") + + quantized_algorithm = "normal" + if "quantized_algorithm" in yaml_config["config"]: + quantized_algorithm = yaml_config["config"]["quantized_algorithm"] + print(f"quantized_algorithm is {quantized_algorithm}") + + optimization_level = 3 + if "optimization_level" in yaml_config["config"]: + optimization_level = yaml_config["config"]["optimization_level"] + print(f"optimization_level is {optimization_level}") + target_platform = config.target_platform + print(f"target_platform is {target_platform}") model.config( - mean_values=mean_values, std_values=std_values, target_platform=target_platform + mean_values=mean_values, + std_values=std_values, + quantized_dtype=quantized_dtype, + quantized_algorithm=quantized_algorithm, + optimization_level=optimization_level, + target_platform=target_platform, ) print("done") @@ -96,4 +125,4 @@ if __name__ == "__main__": model_save_path = config.model_save_path ret = model.export_rknn(model_save_path) assert ret == 0, "Export rknn model failed!" - print("Export OK!") \ No newline at end of file + print("Export OK!") diff --git a/utils/extera_images_from_video.py b/utils/extera_images_from_video.py new file mode 100644 index 0000000000000000000000000000000000000000..48304ac90de6dd7b2cabf5088bb380fab4a444f1 --- /dev/null +++ b/utils/extera_images_from_video.py @@ -0,0 +1,97 @@ +import os +import cv2 +import matplotlib.pyplot as plt + + +def create_directory_structure(source_root, target_root): + """创建与源文件夹结构相同的输出文件夹结构""" + for root, dirs, files in os.walk(source_root): + relative_path = os.path.relpath(root, source_root) + target_path = os.path.join(target_root, relative_path) + os.makedirs(target_path, exist_ok=True) + + +# interval抽帧间隔 + + +def video_capture(video_path, output_root, source_root, interval=10): + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print(f"Error opening video file: {video_path}") + return + frame_count = 0 + saved_frame_count = 0 + relative_video_dir = os.path.dirname(os.path.relpath(video_path, start=source_root)) + output_dir = os.path.join(output_root, relative_video_dir) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + while True: + ret, frame = cap.read() + if not ret: + break + if frame_count % interval == 0: + frame_filename = f"{os.path.splitext(os.path.basename(video_path))[0]}_{frame_count:04d}.jpg" + frame_path = os.path.join(output_dir, frame_filename) + cv2.imwrite(frame_path, frame) + saved_frame_count += 1 + frame_count += 1 + cap.release() + print( + f"Processed and saved {saved_frame_count} out of {frame_count} frames from {video_path}" + ) + + +def count_files_in_directories(root_directory): + """统计每个子目录下的文件数量,忽略根目录""" + counts = {} + for root, dirs, files in os.walk(root_directory): + relative_path = os.path.relpath(root, root_directory) + # 忽略根目录 + if relative_path == ".": + continue + counts[relative_path] = len(files) + return counts + + +def plot_category_counts(category_counts): + """绘制类别数量的柱状图""" + categories = list(category_counts.keys()) + counts = list(category_counts.values()) + plt.figure(figsize=(10, 6)) + plt.bar(categories, counts, color="skyblue") + plt.xlabel("Categories") + plt.ylabel("Number of Images") + plt.title("Image Count per Category") + plt.xticks(rotation=45, ha="right") + plt.tight_layout() + plt.show() + + +if __name__ == "__main__": + # 源视频文件夹路径 + video_root = r"C:\Users\Administrator\Desktop\new" + # 输出图片文件夹根路径 + output_root = "face_add" + if not os.path.exists(output_root): + os.mkdir(output_root) + # 创建与视频文件夹结构相同的输出文件夹结构 + create_directory_structure(video_root, output_root) + + # 获取所有视频文件 + video_files = [ + os.path.join(root, file) + for root, _, files in os.walk(video_root) + for file in files + if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")) + ] + + # 处理所有视频文件 + for video_file in video_files: + video_capture(video_file, output_root, video_root) + + # 统计处理后各类别的数量 + category_counts = count_files_in_directories(output_root) + for category, count in category_counts.items(): + print(f"Category '{category}' contains {count} images.") + + # 绘制柱状图 + plot_category_counts(category_counts)