diff --git a/Cpp_example/D10_yolov5/CMakeLists.txt b/Cpp_example/D10_yolov5/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..3cca410735c558341b175dc9c50a2671dde2728c --- /dev/null +++ b/Cpp_example/D10_yolov5/CMakeLists.txt @@ -0,0 +1,43 @@ +# CMake最低版本要求 +cmake_minimum_required(VERSION 3.10) + +project(test-find-blobs) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +set(RKNPU2_BACKEND_BASE_DIR "${LockzhinerVisionModule_ROOT_PATH}/include/lockzhiner_vision_module/vision/deep_learning/runtime") +if(NOT EXISTS ${RKNPU2_BACKEND_BASE_DIR}) + message(FATAL_ERROR "RKNPU2 backend base dir missing: ${RKNPU2_BACKEND_BASE_DIR}") +endif() + +add_executable(yolov5_main + main.cc + postprocess.cc + yolov5.cc + yolov5.h + postprocess.h +) +target_include_directories(yolov5_main PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS} ${rknpu2_INCLUDE_DIRS} ${RKNPU2_BACKEND_BASE_DIR}) +target_link_libraries(yolov5_main PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS yolov5_main + RUNTIME DESTINATION . +) \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/README.md b/Cpp_example/D10_yolov5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..98ce23290acc789ba0a1b3639bf35300f3e6468c --- /dev/null +++ b/Cpp_example/D10_yolov5/README.md @@ -0,0 +1,333 @@ +# YOLOV5目标检测 +本章节基于 Lockzhiner Vision Module 和 YOLOv5 目标检测模型,实现实时目标检测功能。 +## 1. 基本知识简介 +### 1.1 目标检测简介 +目标检测是计算机视觉领域中的一个关键任务,它不仅需要识别图像中存在哪些对象,还需要定位这些对象的位置。具体来说,目标检测算法会输出每个检测到的对象的边界框(Bounding Box)以及其所属类别的概率或置信度得分。 +- 应用场景:目标检测技术广泛应用于多个领域,包括但不限于安全监控、自动驾驶汽车、智能零售和医疗影像分析。 +### 1.2 YOLOv5简介 +YOLOv5 是 Ultralytics 在 2020 年基于 PyTorch 开发的一种高效目标检测模型,属于 YOLO 系列。它通过一次前向传播即可预测目标的类别和位置,适用于实时检测任务。YOLOv5 提供了多种模型大小(如 YOLOv5s、m、l、x),适应不同硬件条件。其结构包括骨干网络 CSPDarknet、特征融合层和检测头,支持多尺度预测和自定义训练,广泛用于各种检测场景。 + +## 2. API 文档 +### 2.1 YOLOv5模型类 +#### 2.1.1 头文件 +```cpp +#include "yolov5.h" +``` +#### 2.1.2 模型初始化函数 +```cpp +int init_yolov5_model(const char* model_path, rknn_app_context_t* ctx); +``` +- 作用:加载YOLOv5 RKNN模型并初始化推理上下文 +- 参数 + - model_path:RKNN模型文件路径 + - ctx:模型上下文指针 +- 返回值: + - 0:初始化成功 + - -1:初始化失败 +#### 2.1.3 模型推理函数 +```cpp +int inference_yolov5_model(rknn_app_context_t* ctx, + object_detect_result_list* od_results); +``` +- 作用:执行模型推理并获取检测结果 +- 参数: + - ctx:已初始化的模型上下文 + - od_results:检测结果存储结构体指针 +- 返回值: + - 0:推理成功 + - -1 :推理失败 +#### 2.1.4 模型释放函数 +```cpp +void release_yolov5_model(rknn_app_context_t* ctx); +``` +- 作用:释放模型相关资源 +- 参数: + - ctx:待释放的模型上下文 +- 返回值:无 +### 2.2 图像处理函数 +#### 2.2.1 Letterbox处理 +```cpp +cv::Mat letterbox(cv::Mat& image); +``` +- 作用:保持图像比例进行缩放,添加灰边填充 +- 参数: + -image:输入图像(RGB格式) +- 返回值: + - 返回预处理图像 +#### 2.2.2 坐标映射函数 +```cpp +void mapCoordinates(float& x, float& y); +``` +- 作用:将模型输出坐标映射回原始图像坐标系 +- 参数: + - x/y:模型输出坐标(输入输出参数) +- 返回值:无 +### 2.3 结果处理函数 +#### 2.3.1 后处理初始化 +```cpp +void init_post_process(); +``` +- 作用:加载类别标签文件 +- 参数:无 +- 返回值:无 +#### 2.3.2 结果绘制函数 +```cpp +void draw_detections(int count, + object_detect_result* results, + cv::Mat& frame, + void (*mapFunc)(float&, float&)); +``` +- 作用:在图像上绘制检测框和标签 +- 参数: + - count:检测结果数量 + - results:检测结果数组 + - frame:目标图像帧 + - mapFunc:坐标映射函数指针 +- 返回值:无 +## 3. 代码解析 +### 3.1 流程图 + + + +### 3.2核心代码解析 +- 模型初始化 +```cpp +rknn_app_context_t rknn_app_ctx; +init_yolov5_model("yolov5s.rknn", &rknn_app_ctx); +``` +- 图像预处理 +```cpp +cv::Mat letterboxImage = letterbox(frame); // 保持比例缩放 +memcpy(rknn_app_ctx.input_mems[0]->virt_addr, + letterboxImage.data, + model_width * model_height * 3); +``` +- 模型推理 +```cpp +object_detect_result_list od_results; +inference_yolov5_model(&rknn_app_ctx, &od_results); +``` +- 结果可视化 +```cpp +draw_detections(od_results.count, + od_results.results, + frame, + mapCoordinates); +``` +### 3.3 完整代码实现 +```cpp +#include +#include +#include +#include "yolov5.h" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include +#include +// output img size +#define DISP_WIDTH 320 +#define DISP_HEIGHT 320 + +// disp size +int width = DISP_WIDTH; +int height = DISP_HEIGHT; + +// model size +int model_width = 320; +int model_height = 320; + +int leftPadding; +int topPadding; + +// label size +extern int obj_class_num; +char *lable; + +int main(int argc, char *argv[]) +{ + if (argc != 4) + { + LOGGER_INFO("Usage: %s ./yolov5_main model_path ./label size\n ./label_txt"); + } + obj_class_num = atoi(argv[2]); + lable = argv[3]; + // Rknn model + char text[16]; + // rknn上下文结构体 + rknn_app_context_t rknn_app_ctx; + object_detect_result_list od_results; + int ret; + const char *model_path = argv[1]; + memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t)); + + // Step 1: Load RKNN model + if (init_yolov5_model(model_path, &rknn_app_ctx) != 0) + { + printf("❌ Failed to load RKNN model!\n"); + return -1; + } + printf("✅ RKNN model loaded successfully.\n"); + + // 加载标签文件 + init_post_process(); + + // 打开摄像头 + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) + { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) + { + std::cerr << "Error: Could not open camera." << std::endl; + return 1; + } + cv::Mat frame; + // 在 while 循环外声明 start 和 end 变量 + std::chrono::steady_clock::time_point start, end; + + while (true) + { + // 记录开始时间 + start = std::chrono::steady_clock::now(); + // Step 2: Load image from command line + cap >> frame; + if (frame.empty()) + { + LOGGER_INFO("❌ Failed to read frame from camera.\n"); + continue; + } + cv::resize(frame, frame, cv::Size(width, height), 0, 0, cv::INTER_LINEAR); + cv::Mat letterboxImage = letterbox(frame); + + if (letterboxImage.empty() || letterboxImage.total() * letterboxImage.elemSize() != model_width * model_height * 3) + { + + LOGGER_ERROR("❌ Input image format or size mismatch!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + if (rknn_app_ctx.input_mems == nullptr || rknn_app_ctx.input_mems[0] == nullptr) + { + + LOGGER_ERROR("❌ RKNN input memory not allocated!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + memcpy(rknn_app_ctx.input_mems[0]->virt_addr, letterboxImage.data, model_width * model_height * 3); + + if (inference_yolov5_model(&rknn_app_ctx, &od_results) != 0) + { + LOGGER_ERROR("inference_yolov5_model failed"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + draw_detections(od_results.count, // 传入结果数量 + od_results.results, // 传入结果数组 + frame, // 图像帧 + mapCoordinates); // 直接使用现有坐标映射函数 + + edit.Print(frame); + // 记录结束时间 + end = std::chrono::steady_clock::now(); + // 计算耗时(秒) + double elapsed_time = std::chrono::duration(end - start).count(); + printf("Frame processed in %.4f seconds\n", elapsed_time); + } + release_yolov5_model(&rknn_app_ctx); + deinit_post_process(); + cap.release(); + return 0; +} +``` + +## 4. 编译过程 +### 4.1 编译环境搭建 +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/cpp_development_environment.md) 正确配置了开发环境。 +- 同时以正确连接开发板。 +### 4.2 Cmake介绍 +```cmake +# CMake最低版本要求 +cmake_minimum_required(VERSION 3.10) + +project(test-find-blobs) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +set(RKNPU2_BACKEND_BASE_DIR "${LockzhinerVisionModule_ROOT_PATH}/include/lockzhiner_vision_module/vision/deep_learning/runtime") +if(NOT EXISTS ${RKNPU2_BACKEND_BASE_DIR}) + message(FATAL_ERROR "RKNPU2 backend base dir missing: ${RKNPU2_BACKEND_BASE_DIR}") +endif() + +add_executable(yolov5_main + main.cc + postprocess.cc + yolov5.cc + yolov5.h + postprocess.h +) +target_include_directories(yolov5_main PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS} ${rknpu2_INCLUDE_DIRS} ${RKNPU2_BACKEND_BASE_DIR}) +target_link_libraries(yolov5_main PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS yolov5_main + RUNTIME DESTINATION . +) +``` +### 4.3 编译项目 +使用 Docker Destop 打开 LockzhinerVisionModule 容器并执行以下命令来编译项目 +```bash +# 进入Demo所在目录 +cd /LockzhinerVisionModuleWorkSpace/LockzhinerVisionModule/Cpp_example/D10_yolov5 +# 创建编译目录 +rm -rf build && mkdir build && cd build +# 配置交叉编译工具链 +export TOOLCHAIN_ROOT_PATH="/LockzhinerVisionModuleWorkSpace/arm-rockchip830-linux-uclibcgnueabihf" +# 使用cmake配置项目 +cmake .. +# 执行编译项目 +make -j8 && make install +``` + +在执行完上述命令后,会在build目录下生成可执行文件。 + +## 5. 例程运行示例 +### 5.1 运行 +```shell +chmod 777 yolov5_main +# 在实际应用的过程中LZ-Picodet需要替换为下载的或者你的rknn模型 +./yolov5_main ./voc320.rknn 20 label +``` +### 5.2 结果展示 + +- 可以看到我们可以正确识别多种类别的 + +![](./images/D10_yolov5.png) \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/images/D10_1.png b/Cpp_example/D10_yolov5/images/D10_1.png new file mode 100755 index 0000000000000000000000000000000000000000..ad1ef2cfb0a3e0699a37ee3e3038e735523ce306 Binary files /dev/null and b/Cpp_example/D10_yolov5/images/D10_1.png differ diff --git a/Cpp_example/D10_yolov5/images/D10_yolov5.png b/Cpp_example/D10_yolov5/images/D10_yolov5.png new file mode 100755 index 0000000000000000000000000000000000000000..e1a8fd8cf6f69823c2cca86f3e31da18c6d055af Binary files /dev/null and b/Cpp_example/D10_yolov5/images/D10_yolov5.png differ diff --git a/Cpp_example/D10_yolov5/main.cc b/Cpp_example/D10_yolov5/main.cc new file mode 100755 index 0000000000000000000000000000000000000000..a26a784cbc2141c48cb3d9073895082e74b0031e --- /dev/null +++ b/Cpp_example/D10_yolov5/main.cc @@ -0,0 +1,134 @@ +#include +#include +#include +#include "yolov5.h" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include +#include +// output img size +#define DISP_WIDTH 320 +#define DISP_HEIGHT 320 + +// disp size +int width = DISP_WIDTH; +int height = DISP_HEIGHT; + +// model size +int model_width = 320; +int model_height = 320; + +int leftPadding; +int topPadding; + +// label size +extern int obj_class_num; +char *lable; + +int main(int argc, char *argv[]) +{ + if (argc != 4) + { + LOGGER_INFO("Usage: %s ./yolov5_main model_path ./label size ./label txtn ./label_txt"); + } + obj_class_num = atoi(argv[2]); + lable = argv[3]; + // Rknn model + char text[16]; + // rknn上下文结构体 + rknn_app_context_t rknn_app_ctx; + object_detect_result_list od_results; + int ret; + const char *model_path = argv[1]; + memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t)); + + // Step 1: Load RKNN model + if (init_yolov5_model(model_path, &rknn_app_ctx) != 0) + { + printf("❌ Failed to load RKNN model!\n"); + return -1; + } + printf("✅ RKNN model loaded successfully.\n"); + + // 加载标签文件 + init_post_process(); + + // 打开摄像头 + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) + { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) + { + std::cerr << "Error: Could not open camera." << std::endl; + return 1; + } + cv::Mat frame; + // 在 while 循环外声明 start 和 end 变量 + std::chrono::steady_clock::time_point start, end; + + while (true) + { + // 记录开始时间 + start = std::chrono::steady_clock::now(); + // Step 2: Load image from command line + cap >> frame; + if (frame.empty()) + { + LOGGER_INFO("❌ Failed to read frame from camera.\n"); + continue; + } + cv::resize(frame, frame, cv::Size(width, height), 0, 0, cv::INTER_LINEAR); + cv::Mat letterboxImage = letterbox(frame); + + if (letterboxImage.empty() || letterboxImage.total() * letterboxImage.elemSize() != model_width * model_height * 3) + { + + LOGGER_ERROR("❌ Input image format or size mismatch!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + if (rknn_app_ctx.input_mems == nullptr || rknn_app_ctx.input_mems[0] == nullptr) + { + + LOGGER_ERROR("❌ RKNN input memory not allocated!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + memcpy(rknn_app_ctx.input_mems[0]->virt_addr, letterboxImage.data, model_width * model_height * 3); + + if (inference_yolov5_model(&rknn_app_ctx, &od_results) != 0) + { + LOGGER_ERROR("inference_yolov5_model failed"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + draw_detections(od_results.count, // 传入结果数量 + od_results.results, // 传入结果数组 + frame, // 图像帧 + mapCoordinates); // 直接使用现有坐标映射函数 + + edit.Print(frame); + // 记录结束时间 + end = std::chrono::steady_clock::now(); + // 计算耗时(秒) + double elapsed_time = std::chrono::duration(end - start).count(); + printf("Frame processed in %.4f seconds\n", elapsed_time); + } + release_yolov5_model(&rknn_app_ctx); + deinit_post_process(); + cap.release(); + return 0; +} \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/model/anchors_yolov5.txt b/Cpp_example/D10_yolov5/model/anchors_yolov5.txt new file mode 100755 index 0000000000000000000000000000000000000000..680a13e3f7c1e79210a7f53a8e985f55057adde5 --- /dev/null +++ b/Cpp_example/D10_yolov5/model/anchors_yolov5.txt @@ -0,0 +1,18 @@ +10.0 +13.0 +16.0 +30.0 +33.0 +23.0 +30.0 +61.0 +62.0 +45.0 +59.0 +119.0 +116.0 +90.0 +156.0 +198.0 +373.0 +326.0 diff --git a/Cpp_example/D10_yolov5/model/label.txt b/Cpp_example/D10_yolov5/model/label.txt new file mode 100755 index 0000000000000000000000000000000000000000..f1c7699934d3168347389030cb142afe4105eade --- /dev/null +++ b/Cpp_example/D10_yolov5/model/label.txt @@ -0,0 +1,20 @@ +aeroplane +bicycle +bird +boat +bottle +bus +car +cat +chair +cow +diningtable +dog +horse +motorbike +person +pottedplant +sheep +sofa +train +tvmonitor \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/postprocess.cc b/Cpp_example/D10_yolov5/postprocess.cc new file mode 100755 index 0000000000000000000000000000000000000000..063e23dc9405f6004a38236d9dd322d191a13ffb --- /dev/null +++ b/Cpp_example/D10_yolov5/postprocess.cc @@ -0,0 +1,594 @@ +// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "yolov5.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#define LABEL_NALE_TXT_PATH "./model/coco_80_labels_list.txt" + +int OBJ_CLASS_NUM = 20; // 默认值(通过 init_post_process 修改) +int PROP_BOX_SIZE = 5 + OBJ_CLASS_NUM; +static char** labels = nullptr; // 动态分配的标签数组 +int obj_class_num = 0; +const int anchor[3][6] = {{10, 13, 16, 30, 33, 23}, + {30, 61, 62, 45, 59, 119}, + {116, 90, 156, 198, 373, 326}}; + +inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; } +extern char *lable; +static char *readLine(FILE *fp, char *buffer, int *len) +{ + int ch; + int i = 0; + size_t buff_len = 0; + + buffer = (char *)malloc(buff_len + 1); + if (!buffer) + return NULL; // Out of memory + + while ((ch = fgetc(fp)) != '\n' && ch != EOF) + { + buff_len++; + void *tmp = realloc(buffer, buff_len + 1); + if (tmp == NULL) + { + free(buffer); + return NULL; // Out of memory + } + buffer = (char *)tmp; + + buffer[i] = (char)ch; + i++; + } + buffer[i] = '\0'; + + *len = buff_len; + + // Detect end + if (ch == EOF && (i == 0 || ferror(fp))) + { + free(buffer); + return NULL; + } + return buffer; +} + +static int readLines(const char *fileName, char *lines[], int max_line) +{ + FILE *file = fopen(fileName, "r"); + char *s; + int i = 0; + int n = 0; + + if (file == NULL) + { + printf("Open %s fail!\n", fileName); + return -1; + } + + while ((s = readLine(file, s, &n)) != NULL) + { + lines[i++] = s; + if (i >= max_line) + break; + } + fclose(file); + return i; +} + +static int loadLabelName(const char *locationFilename, char *label[]) +{ + printf("load lable %s\n", locationFilename); + readLines(locationFilename, label, OBJ_CLASS_NUM); + return 0; +} + +static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, + float ymax1) +{ + float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0); + float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0); + float i = w * h; + float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i; + return u <= 0.f ? 0.f : (i / u); +} + +static int nms(int validCount, std::vector &outputLocations, std::vector classIds, std::vector &order, + int filterId, float threshold) +{ + for (int i = 0; i < validCount; ++i) + { + if (order[i] == -1 || classIds[i] != filterId) + { + continue; + } + int n = order[i]; + for (int j = i + 1; j < validCount; ++j) + { + int m = order[j]; + if (m == -1 || classIds[i] != filterId) + { + continue; + } + float xmin0 = outputLocations[n * 4 + 0]; + float ymin0 = outputLocations[n * 4 + 1]; + float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2]; + float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3]; + + float xmin1 = outputLocations[m * 4 + 0]; + float ymin1 = outputLocations[m * 4 + 1]; + float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2]; + float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3]; + + float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1); + + if (iou > threshold) + { + order[j] = -1; + } + } + } + return 0; +} + +static int quick_sort_indice_inverse(std::vector &input, int left, int right, std::vector &indices) +{ + float key; + int key_index; + int low = left; + int high = right; + if (left < right) + { + key_index = indices[left]; + key = input[left]; + while (low < high) + { + while (low < high && input[high] <= key) + { + high--; + } + input[low] = input[high]; + indices[low] = indices[high]; + while (low < high && input[low] >= key) + { + low++; + } + input[high] = input[low]; + indices[high] = indices[low]; + } + input[low] = key; + indices[low] = key_index; + quick_sort_indice_inverse(input, left, low - 1, indices); + quick_sort_indice_inverse(input, low + 1, right, indices); + } + return low; +} + +static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); } + +static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); } + +inline static int32_t __clip(float val, float min, float max) +{ + float f = val <= min ? min : (val >= max ? max : val); + return f; +} + +static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale) +{ + float dst_val = (f32 / scale) + zp; + int8_t res = (int8_t)__clip(dst_val, -128, 127); + return res; +} + +static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; } + +static int process_i8(int8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &objProbs, std::vector &classId, float threshold, + int32_t zp, float scale) +{ + int validCount = 0; + int grid_len = grid_h * grid_w; + int8_t thres_i8 = qnt_f32_to_affine(threshold, zp, scale); + for (int a = 0; a < 3; a++) + { + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + int8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= thres_i8) + { + int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j; + int8_t *in_ptr = input + offset; + float box_x = (deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5; + float box_y = (deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5; + float box_w = (deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0; + float box_h = (deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0; + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w = box_w * box_w * (float)anchor[a * 2]; + box_h = box_h * box_h * (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + int8_t maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < OBJ_CLASS_NUM; ++k) + { + int8_t prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) + { + maxClassId = k; + maxClassProbs = prob; + } + } + if (maxClassProbs > thres_i8) + { + objProbs.push_back((deqnt_affine_to_f32(maxClassProbs, zp, scale)) * (deqnt_affine_to_f32(box_confidence, zp, scale))); + classId.push_back(maxClassId); + validCount++; + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + } + } + } + } + } + return validCount; +} + +static int process_i8_rv1106(int8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &boxScores, std::vector &classId, float threshold, + int32_t zp, float scale) { + int validCount = 0; + int8_t thres_i8 = qnt_f32_to_affine(threshold, zp, scale); + + int anchor_per_branch = 3; + int align_c = PROP_BOX_SIZE * anchor_per_branch; + + for (int h = 0; h < grid_h; h++) { + for (int w = 0; w < grid_w; w++) { + for (int a = 0; a < anchor_per_branch; a++) { + int hw_offset = h * grid_w * align_c + w * align_c + a * PROP_BOX_SIZE; + int8_t *hw_ptr = input + hw_offset; + int8_t box_confidence = hw_ptr[4]; + + if (box_confidence >= thres_i8) { + int8_t maxClassProbs = hw_ptr[5]; + int maxClassId = 0; + for (int k = 1; k < OBJ_CLASS_NUM; ++k) { + int8_t prob = hw_ptr[5 + k]; + if (prob > maxClassProbs) { + maxClassId = k; + maxClassProbs = prob; + } + } + + float box_conf_f32 = deqnt_affine_to_f32(box_confidence, zp, scale); + float class_prob_f32 = deqnt_affine_to_f32(maxClassProbs, zp, scale); + float limit_score = box_conf_f32 * class_prob_f32; + + if (limit_score > threshold) { + float box_x, box_y, box_w, box_h; + + box_x = deqnt_affine_to_f32(hw_ptr[0], zp, scale) * 2.0 - 0.5; + box_y = deqnt_affine_to_f32(hw_ptr[1], zp, scale) * 2.0 - 0.5; + box_w = deqnt_affine_to_f32(hw_ptr[2], zp, scale) * 2.0; + box_h = deqnt_affine_to_f32(hw_ptr[3], zp, scale) * 2.0; + box_w = box_w * box_w; + box_h = box_h * box_h; + + + box_x = (box_x + w) * (float)stride; + box_y = (box_y + h) * (float)stride; + box_w *= (float)anchor[a * 2]; + box_h *= (float)anchor[a * 2 + 1]; + + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + boxScores.push_back(limit_score); + classId.push_back(maxClassId); + validCount++; + } + } + } + } + } + return validCount; +} + +static int process_fp32(float *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &objProbs, std::vector &classId, float threshold) +{ + int validCount = 0; + int grid_len = grid_h * grid_w; + + for (int a = 0; a < 3; a++) + { + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + float box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= threshold) + { + int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j; + float *in_ptr = input + offset; + float box_x = *in_ptr * 2.0 - 0.5; + float box_y = in_ptr[grid_len] * 2.0 - 0.5; + float box_w = in_ptr[2 * grid_len] * 2.0; + float box_h = in_ptr[3 * grid_len] * 2.0; + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w = box_w * box_w * (float)anchor[a * 2]; + box_h = box_h * box_h * (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + float maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < OBJ_CLASS_NUM; ++k) + { + float prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) + { + maxClassId = k; + maxClassProbs = prob; + } + } + if (maxClassProbs > threshold) + { + objProbs.push_back(maxClassProbs * box_confidence); + classId.push_back(maxClassId); + validCount++; + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + } + } + } + } + } + return validCount; +} + +int post_process(rknn_app_context_t *app_ctx, void *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results) +{ + rknn_tensor_mem **_outputs = (rknn_tensor_mem **)outputs; + std::vector filterBoxes; + std::vector objProbs; + std::vector classId; + int validCount = 0; + int stride = 0; + int grid_h = 0; + int grid_w = 0; + int model_in_w = app_ctx->model_width; + int model_in_h = app_ctx->model_height; + printf("post process start"); + memset(od_results, 0, sizeof(object_detect_result_list)); + + for (int i = 0; i < 3; i++) + { + grid_h = app_ctx->output_attrs[i].dims[1]; + grid_w = app_ctx->output_attrs[i].dims[2]; + stride = model_in_h / grid_h; + //RV1106 only support i8 + if (app_ctx->is_quant) { + validCount += process_i8_rv1106((int8_t *)(_outputs[i]->virt_addr), (int *)anchor[i], grid_h, grid_w, model_in_h, model_in_w, stride, filterBoxes, objProbs, + classId, conf_threshold, app_ctx->output_attrs[i].zp, app_ctx->output_attrs[i].scale); + } + + } + + // no object detect + if (validCount <= 0) + { + return 0; + } + std::vector indexArray; + for (int i = 0; i < validCount; ++i) + { + indexArray.push_back(i); + } + quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray); + + std::set class_set(std::begin(classId), std::end(classId)); + + for (auto c : class_set) + { + nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold); + } + + int last_count = 0; + od_results->count = 0; + + /* box valid detect target */ + for (int i = 0; i < validCount; ++i) + { + if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) + { + continue; + } + int n = indexArray[i]; + + float x1 = filterBoxes[n * 4 + 0] ; + float y1 = filterBoxes[n * 4 + 1] ; + float x2 = x1 + filterBoxes[n * 4 + 2]; + float y2 = y1 + filterBoxes[n * 4 + 3]; + int id = classId[n]; + float obj_conf = objProbs[i]; + + od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w)); + od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h)); + od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w)); + od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h)); + od_results->results[last_count].prop = obj_conf; + od_results->results[last_count].cls_id = id; + last_count++; + } + od_results->count = last_count; + return 0; +} + +// int post_process(rknn_app_context_t *app_ctx, void *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results) +// { +// rknn_tensor_mem **_outputs = (rknn_tensor_mem **)outputs; +// std::vector filterBoxes; +// std::vector objProbs; +// std::vector classId; +// int validCount = 0; +// int stride = 0; +// int grid_h = 0; +// int grid_w = 0; +// int model_in_w = app_ctx->model_width; +// int model_in_h = app_ctx->model_height; +// printf("post process start\n"); +// memset(od_results, 0, sizeof(object_detect_result_list)); + +// for (int i = 0; i < 3; i++) +// { +// grid_h = app_ctx->output_attrs[i].dims[2]; +// grid_w = app_ctx->output_attrs[i].dims[1]; +// stride = model_in_h / grid_h; +// // RV1106 only support i8 +// if (app_ctx->is_quant) { +// validCount += process_i8_rv1106((int8_t *)(_outputs[i]->virt_addr), (int *)anchor[i], grid_h, grid_w, model_in_h, model_in_w, stride, filterBoxes, objProbs, +// classId, conf_threshold, app_ctx->output_attrs[i].zp, app_ctx->output_attrs[i].scale); +// } +// // 如果需要支持fp32版本,请取消下面注释并实现process_fp32函数 +// /* +// else { +// validCount += process_fp32((float *)(_outputs[i]->virt_addr), (int *)anchor[i], grid_h, grid_w, model_in_h, model_in_w, stride, filterBoxes, objProbs, +// classId, conf_threshold); +// } +// */ +// } + +// // no object detect +// if (validCount <= 0) +// { +// return 0; +// } +// std::vector indexArray; +// for (int i = 0; i < validCount; ++i) +// { +// indexArray.push_back(i); +// } +// quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray); + +// std::set class_set(std::begin(classId), std::end(classId)); + +// for (auto c : class_set) +// { +// nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold); +// } + +// int last_count = 0; +// od_results->count = 0; + +// /* box valid detect target */ +// for (int i = 0; i < validCount; ++i) +// { +// if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) +// { +// continue; +// } +// int n = indexArray[i]; + +// float x1 = filterBoxes[n * 4 + 0]; +// float y1 = filterBoxes[n * 4 + 1]; +// float x2 = x1 + filterBoxes[n * 4 + 2]; +// float y2 = y1 + filterBoxes[n * 4 + 3]; +// int id = classId[n]; +// float obj_conf = objProbs[i]; + +// od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w)); +// od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h)); +// od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w)); +// od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h)); +// od_results->results[last_count].prop = obj_conf; +// od_results->results[last_count].cls_id = id; +// last_count++; +// } +// od_results->count = last_count; +// return 0; +// } + +int init_post_process() +{ + OBJ_CLASS_NUM = obj_class_num; + PROP_BOX_SIZE = 5 + OBJ_CLASS_NUM; + + // 释放旧内存(防止内存泄漏) + if (labels != nullptr) { + deinit_post_process(); + } + + // 分配新内存 + labels = new char*[OBJ_CLASS_NUM]; + int ret = 0; + ret = loadLabelName(lable, labels); + if (ret < 0) + { + printf("Load %s failed!\n", lable); + return -1; + } + return 0; +} + +char *coco_cls_to_name(int cls_id) +{ + static char null_str[] = "null"; + + if (cls_id >= OBJ_CLASS_NUM) + { + return null_str; + } + + if (labels[cls_id]) + { + return labels[cls_id]; + } + + return null_str; +} + +void deinit_post_process() +{ + for (int i = 0; i < OBJ_CLASS_NUM; i++) + { + if (labels[i] != nullptr) + { + free(labels[i]); + labels[i] = nullptr; + } + } +} diff --git a/Cpp_example/D10_yolov5/postprocess.h b/Cpp_example/D10_yolov5/postprocess.h new file mode 100755 index 0000000000000000000000000000000000000000..59622541acbda820eaa4bb54732f1279caecbb32 --- /dev/null +++ b/Cpp_example/D10_yolov5/postprocess.h @@ -0,0 +1,41 @@ +#ifndef _RKNN_YOLOV5_DEMO_POSTPROCESS_H_ +#define _RKNN_YOLOV5_DEMO_POSTPROCESS_H_ + +#include +#include +#include "rknpu2_backend/rknpu2_backend.h" + +#define OBJ_NAME_MAX_SIZE 64 +#define OBJ_NUMB_MAX_SIZE 128 +// #define OBJ_CLASS_NUM 20 +#define NMS_THRESH 0.45 +#define BOX_THRESH 0.25 +// #define PROP_BOX_SIZE (5 + OBJ_CLASS_NUM) + +// class rknn_app_context_t; +typedef struct { + int left; + int top; + int right; + int bottom; +} image_rect_t; + +typedef struct { + image_rect_t box; + float prop; + int cls_id; +} object_detect_result; + +typedef struct { + int id; + int count; + object_detect_result results[OBJ_NUMB_MAX_SIZE]; +} object_detect_result_list; + +int init_post_process(); +void deinit_post_process(); +char *coco_cls_to_name(int cls_id); +int post_process(rknn_app_context_t *app_ctx, void *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results); + +void deinitPostProcess(); +#endif //_RKNN_YOLOV5_DEMO_POSTPROCESS_H_ diff --git a/Cpp_example/D10_yolov5/yolov5.cc b/Cpp_example/D10_yolov5/yolov5.cc new file mode 100755 index 0000000000000000000000000000000000000000..1df2811868e101f0e27da38a7b2b57e1388b8167 --- /dev/null +++ b/Cpp_example/D10_yolov5/yolov5.cc @@ -0,0 +1,288 @@ +// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "yolov5.h" + +static void dump_tensor_attr(rknn_tensor_attr *attr) +{ + printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, " + "zp=%d, scale=%f\n", + attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3], + attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type), + get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale); +} + +int init_yolov5_model(const char *model_path, rknn_app_context_t *app_ctx) +{ + int ret; + int model_len = 0; + char *model; + rknn_context ctx = 0; + + ret = rknn_init(&ctx, (char *)model_path, 0, 0, NULL); + if (ret < 0) + { + printf("rknn_init fail! ret=%d\n", ret); + return -1; + } + + // Get Model Input Output Number + rknn_input_output_num io_num; + ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num)); + if (ret != RKNN_SUCC) + { + printf("rknn_query fail! ret=%d\n", ret); + return -1; + } + //printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output); + + // Get Model Input Info + //printf("input tensors:\n"); + rknn_tensor_attr input_attrs[io_num.n_input]; + memset(input_attrs, 0, sizeof(input_attrs)); + for (int i = 0; i < io_num.n_input; i++) + { + input_attrs[i].index = i; + ret = rknn_query(ctx, RKNN_QUERY_NATIVE_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) + { + printf("rknn_query fail! ret=%d\n", ret); + return -1; + } + dump_tensor_attr(&(input_attrs[i])); + } + + // Get Model Output Info + //printf("output tensors:\n"); + rknn_tensor_attr output_attrs[io_num.n_output]; + memset(output_attrs, 0, sizeof(output_attrs)); + for (int i = 0; i < io_num.n_output; i++) + { + output_attrs[i].index = i; + //When using the zero-copy API interface, query the native output tensor attribute + ret = rknn_query(ctx, RKNN_QUERY_NATIVE_NHWC_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) + { + printf("rknn_query fail! ret=%d\n", ret); + return -1; + } + dump_tensor_attr(&(output_attrs[i])); + } + + // default input type is int8 (normalize and quantize need compute in outside) + // if set uint8, will fuse normalize and quantize to npu + input_attrs[0].type = RKNN_TENSOR_UINT8; + // default fmt is NHWC,1106 npu only support NHWC in zero copy mode + input_attrs[0].fmt = RKNN_TENSOR_NHWC; + //printf("input_attrs[0].size_with_stride=%d\n", input_attrs[0].size_with_stride); + app_ctx->input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride); + + // Set input tensor memory + ret = rknn_set_io_mem(ctx, app_ctx->input_mems[0], &input_attrs[0]); + if (ret < 0) { + printf("input_mems rknn_set_io_mem fail! ret=%d\n", ret); + return -1; + } + + // Set output tensor memory + for (uint32_t i = 0; i < io_num.n_output; ++i) { + app_ctx->output_mems[i] = rknn_create_mem(ctx, output_attrs[i].size_with_stride); + ret = rknn_set_io_mem(ctx, app_ctx->output_mems[i], &output_attrs[i]); + if (ret < 0) { + printf("output_mems rknn_set_io_mem fail! ret=%d\n", ret); + return -1; + } + } + + // Set to context + app_ctx->rknn_ctx = ctx; + + // TODO + if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC) + { + app_ctx->is_quant = true; + } + else + { + app_ctx->is_quant = false; + } + + app_ctx->io_num = io_num; + app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr)); + memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr)); + app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr)); + memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr)); + + if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) + { + printf("model is NCHW input fmt\n"); + app_ctx->model_channel = input_attrs[0].dims[1]; + app_ctx->model_height = input_attrs[0].dims[2]; + app_ctx->model_width = input_attrs[0].dims[3]; + } else + { + printf("model is NHWC input fmt\n"); + app_ctx->model_height = input_attrs[0].dims[1]; + app_ctx->model_width = input_attrs[0].dims[2]; + app_ctx->model_channel = input_attrs[0].dims[3]; + } + + printf("model input height=%d, width=%d, channel=%d\n", + app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel); + + return 0; +} + +int release_yolov5_model(rknn_app_context_t *app_ctx) +{ + if (app_ctx->input_attrs != NULL) + { + free(app_ctx->input_attrs); + app_ctx->input_attrs = NULL; + } + if (app_ctx->output_attrs != NULL) + { + free(app_ctx->output_attrs); + app_ctx->output_attrs = NULL; + } + for (int i = 0; i < app_ctx->io_num.n_input; i++) { + if (app_ctx->input_mems[i] != NULL) { + rknn_destroy_mem(app_ctx->rknn_ctx, app_ctx->input_mems[i]); + } + } + for (int i = 0; i < app_ctx->io_num.n_output; i++) { + if (app_ctx->output_mems[i] != NULL) { + rknn_destroy_mem(app_ctx->rknn_ctx, app_ctx->output_mems[i]); + } + } + if (app_ctx->rknn_ctx != 0) + { + rknn_destroy(app_ctx->rknn_ctx); + + app_ctx->rknn_ctx = 0; + } + + printf("Release success\n"); + return 0; +} + +int inference_yolov5_model(rknn_app_context_t *app_ctx, object_detect_result_list *od_results) +{ + printf("Inference start\n"); + int ret; + const float nms_threshold = NMS_THRESH; // 默认的NMS阈值 + const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值 + printf("rknn_run\n"); + ret = rknn_run(app_ctx->rknn_ctx, nullptr); + printf("rknn_run end\n"); + printf("rknn_run ret=%d\n", ret); + if (ret < 0) { + printf("rknn_run fail! ret=%d\n", ret); + return -1; + } + printf("post_process"); + // Post Process + post_process(app_ctx, app_ctx->output_mems, box_conf_threshold, nms_threshold, od_results); +out: + return ret; +} + +/* +预处理 +图片填充缩放 +*/ +float scale; +extern int model_width; +extern int model_height; + +extern int width; +extern int height; + +extern int leftPadding; +extern int topPadding; +cv::Mat letterbox(cv::Mat input) +{ + float scaleX = (float)model_width / (float)width; + float scaleY = (float)model_height / (float)height; + scale = scaleX < scaleY ? scaleX : scaleY; + + int inputWidth = (int)((float)width * scale); + int inputHeight = (int)((float)height * scale); + + leftPadding = (model_width - inputWidth) / 2; + topPadding = (model_height - inputHeight) / 2; + + cv::Mat inputScale; + cv::resize(input, inputScale, cv::Size(inputWidth, inputHeight), 0, 0, cv::INTER_LINEAR); + cv::Mat letterboxImage(model_width, model_width, CV_8UC3, cv::Scalar(0, 0, 0)); + cv::Rect roi(leftPadding, topPadding, inputWidth, inputHeight); + inputScale.copyTo(letterboxImage(roi)); + + return letterboxImage; +} +/* +将经过 letterbox 处理后的坐标映射回原始图像坐标系 +*/ +void mapCoordinates(int *x, int *y) +{ + int mx = *x - leftPadding; + int my = *y - topPadding; + + *x = (int)((float)mx / scale); + *y = (int)((float)my / scale); +} + +void draw_detections(int count, + object_detect_result *results, + cv::Mat &frame, + void (*mapCoord)(int *, int *)) +{ + char text[128]; + for (int i = 0; i < count; i++) + { + object_detect_result *det = &results[i]; + + // 提取坐标 + int sX = (int)det->box.left; + int sY = (int)det->box.top; + int eX = (int)det->box.right; + int eY = (int)det->box.bottom; + + // 应用坐标映射 + mapCoord(&sX, &sY); + mapCoord(&eX, &eY); + + // 绘制检测框 + cv::rectangle(frame, + cv::Point(sX, sY), + cv::Point(eX, eY), + cv::Scalar(0, 255, 0), 1); + + // 生成标签文本 + sprintf(text, "%s %.1f%%", + coco_cls_to_name(det->cls_id), + det->prop * 100); + + // 绘制文本 + cv::putText(frame, text, + cv::Point(sX, sY - 8), + cv::FONT_HERSHEY_SIMPLEX, 1, + cv::Scalar(0, 255, 0), 1); + } +} \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/yolov5.h b/Cpp_example/D10_yolov5/yolov5.h new file mode 100755 index 0000000000000000000000000000000000000000..5ba24ddba3222f71c9832bc222a9196de580549f --- /dev/null +++ b/Cpp_example/D10_yolov5/yolov5.h @@ -0,0 +1,61 @@ +// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef _RKNN_DEMO_YOLOV5_H_ +#define _RKNN_DEMO_YOLOV5_H_ + +#include "rknn_api.h" + +typedef struct +{ + char *dma_buf_virt_addr; + int dma_buf_fd; + int size; +} rknn_dma_buf; + +typedef struct +{ + rknn_context rknn_ctx; + rknn_input_output_num io_num; + rknn_tensor_attr *input_attrs; + rknn_tensor_attr *output_attrs; + rknn_tensor_mem *net_mem; + + rknn_tensor_mem *input_mems[1]; + rknn_tensor_mem *output_mems[3]; + rknn_dma_buf img_dma_buf; + + int model_channel; + int model_width; + int model_height; + bool is_quant; +} rknn_app_context_t; + +#include "postprocess.h" + +int init_yolov5_model(const char *model_path, rknn_app_context_t *app_ctx); + +int release_yolov5_model(rknn_app_context_t *app_ctx); + +int inference_yolov5_model(rknn_app_context_t *app_ctx, object_detect_result_list *od_results); + +#endif //_RKNN_DEMO_YOLOV5_H_ + +cv::Mat letterbox(cv::Mat input); +void mapCoordinates(int *x, int *y); + +void draw_detections(int count, + object_detect_result *results, + cv::Mat &frame, + void (*mapCoord)(int *, int *)); \ No newline at end of file diff --git a/README.md b/README.md index 7ced98bf942edfeeb3b0edcb55745981f1bfa10a..6fa4470123a2c8862d55aa686501bf2bb7a1ad80 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,11 @@ OCR(Optical Character Recognition,光学字符识别)是一种将图像中 * [凌智视觉模块车牌识别](./Cpp_example/D09_plate_recognize/README.md) +### 👍 YOLOv5目标检测 +目标检测(Object Detection)是深度学习中计算机视觉领域的重要任务之一,旨在识别图像或视频中所有感兴趣的物体,并准确地定位这些物体的边界框(Bounding Box)。与目标分类不同,目标检测不仅需要预测物体的类别,还需要标注它们在图像中的位置。一般来说,目标检测任务的标注过程比较复杂,适合既需要对目标进行分类,有需要对目标进行定位的场景。该案例使用YOLOv5进行目标检测。 + +* [YOLOv5目标检测](./Cpp_example/D10_yolov5//README.md) + ## 🏀 C++ 开发案例 C++ 开发案例以A、B、C、D进行不同类别进行分类,方便初学者进行使用和二次开发。 * `A01 - A99`: 基础外设类 @@ -177,7 +182,7 @@ C++ 开发案例以A、B、C、D进行不同类别进行分类,方便初学者 | D07 | 神经网络类 | OCR Synthesis | [OCR综合示例](./Cpp_example/D07_ocr_synthesis/README.md) | | D08 | 神经网络类 | PPHumanSeg | [图像分割](./Cpp_example/D08_pp_humanseg/README.md) | | D09 | 神经网络类 | Plate Recognition | [车牌识别](./Cpp_example/D09_plate_recognize/README.md) | - +| D10 | 神经网络类 | YOLOv5 | [YOLOv5目标检测](./Cpp_example/D10_yolov5//README.md) | ## 🐛 Bug反馈 如果您遇到问题,您可以前往 [Lockzhiner Vision Module Issues](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/issues) 并点击已完成按钮查看其他用户反馈且我们已经解决的 Bug。