diff --git a/Cpp_example/D10_yolov5/CMakeLists.txt b/Cpp_example/D10_yolov5/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..3cca410735c558341b175dc9c50a2671dde2728c --- /dev/null +++ b/Cpp_example/D10_yolov5/CMakeLists.txt @@ -0,0 +1,43 @@ +# CMake最低版本要求 +cmake_minimum_required(VERSION 3.10) + +project(test-find-blobs) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +set(RKNPU2_BACKEND_BASE_DIR "${LockzhinerVisionModule_ROOT_PATH}/include/lockzhiner_vision_module/vision/deep_learning/runtime") +if(NOT EXISTS ${RKNPU2_BACKEND_BASE_DIR}) + message(FATAL_ERROR "RKNPU2 backend base dir missing: ${RKNPU2_BACKEND_BASE_DIR}") +endif() + +add_executable(yolov5_main + main.cc + postprocess.cc + yolov5.cc + yolov5.h + postprocess.h +) +target_include_directories(yolov5_main PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS} ${rknpu2_INCLUDE_DIRS} ${RKNPU2_BACKEND_BASE_DIR}) +target_link_libraries(yolov5_main PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS yolov5_main + RUNTIME DESTINATION . +) \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/README.md b/Cpp_example/D10_yolov5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..98ce23290acc789ba0a1b3639bf35300f3e6468c --- /dev/null +++ b/Cpp_example/D10_yolov5/README.md @@ -0,0 +1,333 @@ +# YOLOV5目标检测 +本章节基于 Lockzhiner Vision Module 和 YOLOv5 目标检测模型,实现实时目标检测功能。 +## 1. 基本知识简介 +### 1.1 目标检测简介 +目标检测是计算机视觉领域中的一个关键任务,它不仅需要识别图像中存在哪些对象,还需要定位这些对象的位置。具体来说,目标检测算法会输出每个检测到的对象的边界框(Bounding Box)以及其所属类别的概率或置信度得分。 +- 应用场景:目标检测技术广泛应用于多个领域,包括但不限于安全监控、自动驾驶汽车、智能零售和医疗影像分析。 +### 1.2 YOLOv5简介 +YOLOv5 是 Ultralytics 在 2020 年基于 PyTorch 开发的一种高效目标检测模型,属于 YOLO 系列。它通过一次前向传播即可预测目标的类别和位置,适用于实时检测任务。YOLOv5 提供了多种模型大小(如 YOLOv5s、m、l、x),适应不同硬件条件。其结构包括骨干网络 CSPDarknet、特征融合层和检测头,支持多尺度预测和自定义训练,广泛用于各种检测场景。 + +## 2. API 文档 +### 2.1 YOLOv5模型类 +#### 2.1.1 头文件 +```cpp +#include "yolov5.h" +``` +#### 2.1.2 模型初始化函数 +```cpp +int init_yolov5_model(const char* model_path, rknn_app_context_t* ctx); +``` +- 作用:加载YOLOv5 RKNN模型并初始化推理上下文 +- 参数 + - model_path:RKNN模型文件路径 + - ctx:模型上下文指针 +- 返回值: + - 0:初始化成功 + - -1:初始化失败 +#### 2.1.3 模型推理函数 +```cpp +int inference_yolov5_model(rknn_app_context_t* ctx, + object_detect_result_list* od_results); +``` +- 作用:执行模型推理并获取检测结果 +- 参数: + - ctx:已初始化的模型上下文 + - od_results:检测结果存储结构体指针 +- 返回值: + - 0:推理成功 + - -1 :推理失败 +#### 2.1.4 模型释放函数 +```cpp +void release_yolov5_model(rknn_app_context_t* ctx); +``` +- 作用:释放模型相关资源 +- 参数: + - ctx:待释放的模型上下文 +- 返回值:无 +### 2.2 图像处理函数 +#### 2.2.1 Letterbox处理 +```cpp +cv::Mat letterbox(cv::Mat& image); +``` +- 作用:保持图像比例进行缩放,添加灰边填充 +- 参数: + -image:输入图像(RGB格式) +- 返回值: + - 返回预处理图像 +#### 2.2.2 坐标映射函数 +```cpp +void mapCoordinates(float& x, float& y); +``` +- 作用:将模型输出坐标映射回原始图像坐标系 +- 参数: + - x/y:模型输出坐标(输入输出参数) +- 返回值:无 +### 2.3 结果处理函数 +#### 2.3.1 后处理初始化 +```cpp +void init_post_process(); +``` +- 作用:加载类别标签文件 +- 参数:无 +- 返回值:无 +#### 2.3.2 结果绘制函数 +```cpp +void draw_detections(int count, + object_detect_result* results, + cv::Mat& frame, + void (*mapFunc)(float&, float&)); +``` +- 作用:在图像上绘制检测框和标签 +- 参数: + - count:检测结果数量 + - results:检测结果数组 + - frame:目标图像帧 + - mapFunc:坐标映射函数指针 +- 返回值:无 +## 3. 代码解析 +### 3.1 流程图 + + + +### 3.2核心代码解析 +- 模型初始化 +```cpp +rknn_app_context_t rknn_app_ctx; +init_yolov5_model("yolov5s.rknn", &rknn_app_ctx); +``` +- 图像预处理 +```cpp +cv::Mat letterboxImage = letterbox(frame); // 保持比例缩放 +memcpy(rknn_app_ctx.input_mems[0]->virt_addr, + letterboxImage.data, + model_width * model_height * 3); +``` +- 模型推理 +```cpp +object_detect_result_list od_results; +inference_yolov5_model(&rknn_app_ctx, &od_results); +``` +- 结果可视化 +```cpp +draw_detections(od_results.count, + od_results.results, + frame, + mapCoordinates); +``` +### 3.3 完整代码实现 +```cpp +#include +#include +#include +#include "yolov5.h" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include +#include +// output img size +#define DISP_WIDTH 320 +#define DISP_HEIGHT 320 + +// disp size +int width = DISP_WIDTH; +int height = DISP_HEIGHT; + +// model size +int model_width = 320; +int model_height = 320; + +int leftPadding; +int topPadding; + +// label size +extern int obj_class_num; +char *lable; + +int main(int argc, char *argv[]) +{ + if (argc != 4) + { + LOGGER_INFO("Usage: %s ./yolov5_main model_path ./label size\n ./label_txt"); + } + obj_class_num = atoi(argv[2]); + lable = argv[3]; + // Rknn model + char text[16]; + // rknn上下文结构体 + rknn_app_context_t rknn_app_ctx; + object_detect_result_list od_results; + int ret; + const char *model_path = argv[1]; + memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t)); + + // Step 1: Load RKNN model + if (init_yolov5_model(model_path, &rknn_app_ctx) != 0) + { + printf("❌ Failed to load RKNN model!\n"); + return -1; + } + printf("✅ RKNN model loaded successfully.\n"); + + // 加载标签文件 + init_post_process(); + + // 打开摄像头 + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) + { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) + { + std::cerr << "Error: Could not open camera." << std::endl; + return 1; + } + cv::Mat frame; + // 在 while 循环外声明 start 和 end 变量 + std::chrono::steady_clock::time_point start, end; + + while (true) + { + // 记录开始时间 + start = std::chrono::steady_clock::now(); + // Step 2: Load image from command line + cap >> frame; + if (frame.empty()) + { + LOGGER_INFO("❌ Failed to read frame from camera.\n"); + continue; + } + cv::resize(frame, frame, cv::Size(width, height), 0, 0, cv::INTER_LINEAR); + cv::Mat letterboxImage = letterbox(frame); + + if (letterboxImage.empty() || letterboxImage.total() * letterboxImage.elemSize() != model_width * model_height * 3) + { + + LOGGER_ERROR("❌ Input image format or size mismatch!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + if (rknn_app_ctx.input_mems == nullptr || rknn_app_ctx.input_mems[0] == nullptr) + { + + LOGGER_ERROR("❌ RKNN input memory not allocated!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + memcpy(rknn_app_ctx.input_mems[0]->virt_addr, letterboxImage.data, model_width * model_height * 3); + + if (inference_yolov5_model(&rknn_app_ctx, &od_results) != 0) + { + LOGGER_ERROR("inference_yolov5_model failed"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + draw_detections(od_results.count, // 传入结果数量 + od_results.results, // 传入结果数组 + frame, // 图像帧 + mapCoordinates); // 直接使用现有坐标映射函数 + + edit.Print(frame); + // 记录结束时间 + end = std::chrono::steady_clock::now(); + // 计算耗时(秒) + double elapsed_time = std::chrono::duration(end - start).count(); + printf("Frame processed in %.4f seconds\n", elapsed_time); + } + release_yolov5_model(&rknn_app_ctx); + deinit_post_process(); + cap.release(); + return 0; +} +``` + +## 4. 编译过程 +### 4.1 编译环境搭建 +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/cpp_development_environment.md) 正确配置了开发环境。 +- 同时以正确连接开发板。 +### 4.2 Cmake介绍 +```cmake +# CMake最低版本要求 +cmake_minimum_required(VERSION 3.10) + +project(test-find-blobs) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +set(RKNPU2_BACKEND_BASE_DIR "${LockzhinerVisionModule_ROOT_PATH}/include/lockzhiner_vision_module/vision/deep_learning/runtime") +if(NOT EXISTS ${RKNPU2_BACKEND_BASE_DIR}) + message(FATAL_ERROR "RKNPU2 backend base dir missing: ${RKNPU2_BACKEND_BASE_DIR}") +endif() + +add_executable(yolov5_main + main.cc + postprocess.cc + yolov5.cc + yolov5.h + postprocess.h +) +target_include_directories(yolov5_main PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS} ${rknpu2_INCLUDE_DIRS} ${RKNPU2_BACKEND_BASE_DIR}) +target_link_libraries(yolov5_main PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS yolov5_main + RUNTIME DESTINATION . +) +``` +### 4.3 编译项目 +使用 Docker Destop 打开 LockzhinerVisionModule 容器并执行以下命令来编译项目 +```bash +# 进入Demo所在目录 +cd /LockzhinerVisionModuleWorkSpace/LockzhinerVisionModule/Cpp_example/D10_yolov5 +# 创建编译目录 +rm -rf build && mkdir build && cd build +# 配置交叉编译工具链 +export TOOLCHAIN_ROOT_PATH="/LockzhinerVisionModuleWorkSpace/arm-rockchip830-linux-uclibcgnueabihf" +# 使用cmake配置项目 +cmake .. +# 执行编译项目 +make -j8 && make install +``` + +在执行完上述命令后,会在build目录下生成可执行文件。 + +## 5. 例程运行示例 +### 5.1 运行 +```shell +chmod 777 yolov5_main +# 在实际应用的过程中LZ-Picodet需要替换为下载的或者你的rknn模型 +./yolov5_main ./voc320.rknn 20 label +``` +### 5.2 结果展示 + +- 可以看到我们可以正确识别多种类别的 + +![](./images/D10_yolov5.png) \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/images/D10_1.png b/Cpp_example/D10_yolov5/images/D10_1.png new file mode 100755 index 0000000000000000000000000000000000000000..ad1ef2cfb0a3e0699a37ee3e3038e735523ce306 Binary files /dev/null and b/Cpp_example/D10_yolov5/images/D10_1.png differ diff --git a/Cpp_example/D10_yolov5/images/D10_yolov5.png b/Cpp_example/D10_yolov5/images/D10_yolov5.png new file mode 100755 index 0000000000000000000000000000000000000000..e1a8fd8cf6f69823c2cca86f3e31da18c6d055af Binary files /dev/null and b/Cpp_example/D10_yolov5/images/D10_yolov5.png differ diff --git a/Cpp_example/D10_yolov5/main.cc b/Cpp_example/D10_yolov5/main.cc new file mode 100755 index 0000000000000000000000000000000000000000..a26a784cbc2141c48cb3d9073895082e74b0031e --- /dev/null +++ b/Cpp_example/D10_yolov5/main.cc @@ -0,0 +1,134 @@ +#include +#include +#include +#include "yolov5.h" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include +#include +// output img size +#define DISP_WIDTH 320 +#define DISP_HEIGHT 320 + +// disp size +int width = DISP_WIDTH; +int height = DISP_HEIGHT; + +// model size +int model_width = 320; +int model_height = 320; + +int leftPadding; +int topPadding; + +// label size +extern int obj_class_num; +char *lable; + +int main(int argc, char *argv[]) +{ + if (argc != 4) + { + LOGGER_INFO("Usage: %s ./yolov5_main model_path ./label size ./label txtn ./label_txt"); + } + obj_class_num = atoi(argv[2]); + lable = argv[3]; + // Rknn model + char text[16]; + // rknn上下文结构体 + rknn_app_context_t rknn_app_ctx; + object_detect_result_list od_results; + int ret; + const char *model_path = argv[1]; + memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t)); + + // Step 1: Load RKNN model + if (init_yolov5_model(model_path, &rknn_app_ctx) != 0) + { + printf("❌ Failed to load RKNN model!\n"); + return -1; + } + printf("✅ RKNN model loaded successfully.\n"); + + // 加载标签文件 + init_post_process(); + + // 打开摄像头 + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) + { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) + { + std::cerr << "Error: Could not open camera." << std::endl; + return 1; + } + cv::Mat frame; + // 在 while 循环外声明 start 和 end 变量 + std::chrono::steady_clock::time_point start, end; + + while (true) + { + // 记录开始时间 + start = std::chrono::steady_clock::now(); + // Step 2: Load image from command line + cap >> frame; + if (frame.empty()) + { + LOGGER_INFO("❌ Failed to read frame from camera.\n"); + continue; + } + cv::resize(frame, frame, cv::Size(width, height), 0, 0, cv::INTER_LINEAR); + cv::Mat letterboxImage = letterbox(frame); + + if (letterboxImage.empty() || letterboxImage.total() * letterboxImage.elemSize() != model_width * model_height * 3) + { + + LOGGER_ERROR("❌ Input image format or size mismatch!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + if (rknn_app_ctx.input_mems == nullptr || rknn_app_ctx.input_mems[0] == nullptr) + { + + LOGGER_ERROR("❌ RKNN input memory not allocated!\n"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + memcpy(rknn_app_ctx.input_mems[0]->virt_addr, letterboxImage.data, model_width * model_height * 3); + + if (inference_yolov5_model(&rknn_app_ctx, &od_results) != 0) + { + LOGGER_ERROR("inference_yolov5_model failed"); + release_yolov5_model(&rknn_app_ctx); + return -1; + } + + draw_detections(od_results.count, // 传入结果数量 + od_results.results, // 传入结果数组 + frame, // 图像帧 + mapCoordinates); // 直接使用现有坐标映射函数 + + edit.Print(frame); + // 记录结束时间 + end = std::chrono::steady_clock::now(); + // 计算耗时(秒) + double elapsed_time = std::chrono::duration(end - start).count(); + printf("Frame processed in %.4f seconds\n", elapsed_time); + } + release_yolov5_model(&rknn_app_ctx); + deinit_post_process(); + cap.release(); + return 0; +} \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/model/anchors_yolov5.txt b/Cpp_example/D10_yolov5/model/anchors_yolov5.txt new file mode 100755 index 0000000000000000000000000000000000000000..680a13e3f7c1e79210a7f53a8e985f55057adde5 --- /dev/null +++ b/Cpp_example/D10_yolov5/model/anchors_yolov5.txt @@ -0,0 +1,18 @@ +10.0 +13.0 +16.0 +30.0 +33.0 +23.0 +30.0 +61.0 +62.0 +45.0 +59.0 +119.0 +116.0 +90.0 +156.0 +198.0 +373.0 +326.0 diff --git a/Cpp_example/D10_yolov5/model/label.txt b/Cpp_example/D10_yolov5/model/label.txt new file mode 100755 index 0000000000000000000000000000000000000000..f1c7699934d3168347389030cb142afe4105eade --- /dev/null +++ b/Cpp_example/D10_yolov5/model/label.txt @@ -0,0 +1,20 @@ +aeroplane +bicycle +bird +boat +bottle +bus +car +cat +chair +cow +diningtable +dog +horse +motorbike +person +pottedplant +sheep +sofa +train +tvmonitor \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/postprocess.cc b/Cpp_example/D10_yolov5/postprocess.cc new file mode 100755 index 0000000000000000000000000000000000000000..063e23dc9405f6004a38236d9dd322d191a13ffb --- /dev/null +++ b/Cpp_example/D10_yolov5/postprocess.cc @@ -0,0 +1,594 @@ +// Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "yolov5.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#define LABEL_NALE_TXT_PATH "./model/coco_80_labels_list.txt" + +int OBJ_CLASS_NUM = 20; // 默认值(通过 init_post_process 修改) +int PROP_BOX_SIZE = 5 + OBJ_CLASS_NUM; +static char** labels = nullptr; // 动态分配的标签数组 +int obj_class_num = 0; +const int anchor[3][6] = {{10, 13, 16, 30, 33, 23}, + {30, 61, 62, 45, 59, 119}, + {116, 90, 156, 198, 373, 326}}; + +inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; } +extern char *lable; +static char *readLine(FILE *fp, char *buffer, int *len) +{ + int ch; + int i = 0; + size_t buff_len = 0; + + buffer = (char *)malloc(buff_len + 1); + if (!buffer) + return NULL; // Out of memory + + while ((ch = fgetc(fp)) != '\n' && ch != EOF) + { + buff_len++; + void *tmp = realloc(buffer, buff_len + 1); + if (tmp == NULL) + { + free(buffer); + return NULL; // Out of memory + } + buffer = (char *)tmp; + + buffer[i] = (char)ch; + i++; + } + buffer[i] = '\0'; + + *len = buff_len; + + // Detect end + if (ch == EOF && (i == 0 || ferror(fp))) + { + free(buffer); + return NULL; + } + return buffer; +} + +static int readLines(const char *fileName, char *lines[], int max_line) +{ + FILE *file = fopen(fileName, "r"); + char *s; + int i = 0; + int n = 0; + + if (file == NULL) + { + printf("Open %s fail!\n", fileName); + return -1; + } + + while ((s = readLine(file, s, &n)) != NULL) + { + lines[i++] = s; + if (i >= max_line) + break; + } + fclose(file); + return i; +} + +static int loadLabelName(const char *locationFilename, char *label[]) +{ + printf("load lable %s\n", locationFilename); + readLines(locationFilename, label, OBJ_CLASS_NUM); + return 0; +} + +static float CalculateOverlap(float xmin0, float ymin0, float xmax0, float ymax0, float xmin1, float ymin1, float xmax1, + float ymax1) +{ + float w = fmax(0.f, fmin(xmax0, xmax1) - fmax(xmin0, xmin1) + 1.0); + float h = fmax(0.f, fmin(ymax0, ymax1) - fmax(ymin0, ymin1) + 1.0); + float i = w * h; + float u = (xmax0 - xmin0 + 1.0) * (ymax0 - ymin0 + 1.0) + (xmax1 - xmin1 + 1.0) * (ymax1 - ymin1 + 1.0) - i; + return u <= 0.f ? 0.f : (i / u); +} + +static int nms(int validCount, std::vector &outputLocations, std::vector classIds, std::vector &order, + int filterId, float threshold) +{ + for (int i = 0; i < validCount; ++i) + { + if (order[i] == -1 || classIds[i] != filterId) + { + continue; + } + int n = order[i]; + for (int j = i + 1; j < validCount; ++j) + { + int m = order[j]; + if (m == -1 || classIds[i] != filterId) + { + continue; + } + float xmin0 = outputLocations[n * 4 + 0]; + float ymin0 = outputLocations[n * 4 + 1]; + float xmax0 = outputLocations[n * 4 + 0] + outputLocations[n * 4 + 2]; + float ymax0 = outputLocations[n * 4 + 1] + outputLocations[n * 4 + 3]; + + float xmin1 = outputLocations[m * 4 + 0]; + float ymin1 = outputLocations[m * 4 + 1]; + float xmax1 = outputLocations[m * 4 + 0] + outputLocations[m * 4 + 2]; + float ymax1 = outputLocations[m * 4 + 1] + outputLocations[m * 4 + 3]; + + float iou = CalculateOverlap(xmin0, ymin0, xmax0, ymax0, xmin1, ymin1, xmax1, ymax1); + + if (iou > threshold) + { + order[j] = -1; + } + } + } + return 0; +} + +static int quick_sort_indice_inverse(std::vector &input, int left, int right, std::vector &indices) +{ + float key; + int key_index; + int low = left; + int high = right; + if (left < right) + { + key_index = indices[left]; + key = input[left]; + while (low < high) + { + while (low < high && input[high] <= key) + { + high--; + } + input[low] = input[high]; + indices[low] = indices[high]; + while (low < high && input[low] >= key) + { + low++; + } + input[high] = input[low]; + indices[high] = indices[low]; + } + input[low] = key; + indices[low] = key_index; + quick_sort_indice_inverse(input, left, low - 1, indices); + quick_sort_indice_inverse(input, low + 1, right, indices); + } + return low; +} + +static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); } + +static float unsigmoid(float y) { return -1.0 * logf((1.0 / y) - 1.0); } + +inline static int32_t __clip(float val, float min, float max) +{ + float f = val <= min ? min : (val >= max ? max : val); + return f; +} + +static int8_t qnt_f32_to_affine(float f32, int32_t zp, float scale) +{ + float dst_val = (f32 / scale) + zp; + int8_t res = (int8_t)__clip(dst_val, -128, 127); + return res; +} + +static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; } + +static int process_i8(int8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &objProbs, std::vector &classId, float threshold, + int32_t zp, float scale) +{ + int validCount = 0; + int grid_len = grid_h * grid_w; + int8_t thres_i8 = qnt_f32_to_affine(threshold, zp, scale); + for (int a = 0; a < 3; a++) + { + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + int8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= thres_i8) + { + int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j; + int8_t *in_ptr = input + offset; + float box_x = (deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5; + float box_y = (deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5; + float box_w = (deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0; + float box_h = (deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0; + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w = box_w * box_w * (float)anchor[a * 2]; + box_h = box_h * box_h * (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + int8_t maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < OBJ_CLASS_NUM; ++k) + { + int8_t prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) + { + maxClassId = k; + maxClassProbs = prob; + } + } + if (maxClassProbs > thres_i8) + { + objProbs.push_back((deqnt_affine_to_f32(maxClassProbs, zp, scale)) * (deqnt_affine_to_f32(box_confidence, zp, scale))); + classId.push_back(maxClassId); + validCount++; + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + } + } + } + } + } + return validCount; +} + +static int process_i8_rv1106(int8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &boxScores, std::vector &classId, float threshold, + int32_t zp, float scale) { + int validCount = 0; + int8_t thres_i8 = qnt_f32_to_affine(threshold, zp, scale); + + int anchor_per_branch = 3; + int align_c = PROP_BOX_SIZE * anchor_per_branch; + + for (int h = 0; h < grid_h; h++) { + for (int w = 0; w < grid_w; w++) { + for (int a = 0; a < anchor_per_branch; a++) { + int hw_offset = h * grid_w * align_c + w * align_c + a * PROP_BOX_SIZE; + int8_t *hw_ptr = input + hw_offset; + int8_t box_confidence = hw_ptr[4]; + + if (box_confidence >= thres_i8) { + int8_t maxClassProbs = hw_ptr[5]; + int maxClassId = 0; + for (int k = 1; k < OBJ_CLASS_NUM; ++k) { + int8_t prob = hw_ptr[5 + k]; + if (prob > maxClassProbs) { + maxClassId = k; + maxClassProbs = prob; + } + } + + float box_conf_f32 = deqnt_affine_to_f32(box_confidence, zp, scale); + float class_prob_f32 = deqnt_affine_to_f32(maxClassProbs, zp, scale); + float limit_score = box_conf_f32 * class_prob_f32; + + if (limit_score > threshold) { + float box_x, box_y, box_w, box_h; + + box_x = deqnt_affine_to_f32(hw_ptr[0], zp, scale) * 2.0 - 0.5; + box_y = deqnt_affine_to_f32(hw_ptr[1], zp, scale) * 2.0 - 0.5; + box_w = deqnt_affine_to_f32(hw_ptr[2], zp, scale) * 2.0; + box_h = deqnt_affine_to_f32(hw_ptr[3], zp, scale) * 2.0; + box_w = box_w * box_w; + box_h = box_h * box_h; + + + box_x = (box_x + w) * (float)stride; + box_y = (box_y + h) * (float)stride; + box_w *= (float)anchor[a * 2]; + box_h *= (float)anchor[a * 2 + 1]; + + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + boxScores.push_back(limit_score); + classId.push_back(maxClassId); + validCount++; + } + } + } + } + } + return validCount; +} + +static int process_fp32(float *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, + std::vector &boxes, std::vector &objProbs, std::vector &classId, float threshold) +{ + int validCount = 0; + int grid_len = grid_h * grid_w; + + for (int a = 0; a < 3; a++) + { + for (int i = 0; i < grid_h; i++) + { + for (int j = 0; j < grid_w; j++) + { + float box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; + if (box_confidence >= threshold) + { + int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j; + float *in_ptr = input + offset; + float box_x = *in_ptr * 2.0 - 0.5; + float box_y = in_ptr[grid_len] * 2.0 - 0.5; + float box_w = in_ptr[2 * grid_len] * 2.0; + float box_h = in_ptr[3 * grid_len] * 2.0; + box_x = (box_x + j) * (float)stride; + box_y = (box_y + i) * (float)stride; + box_w = box_w * box_w * (float)anchor[a * 2]; + box_h = box_h * box_h * (float)anchor[a * 2 + 1]; + box_x -= (box_w / 2.0); + box_y -= (box_h / 2.0); + + float maxClassProbs = in_ptr[5 * grid_len]; + int maxClassId = 0; + for (int k = 1; k < OBJ_CLASS_NUM; ++k) + { + float prob = in_ptr[(5 + k) * grid_len]; + if (prob > maxClassProbs) + { + maxClassId = k; + maxClassProbs = prob; + } + } + if (maxClassProbs > threshold) + { + objProbs.push_back(maxClassProbs * box_confidence); + classId.push_back(maxClassId); + validCount++; + boxes.push_back(box_x); + boxes.push_back(box_y); + boxes.push_back(box_w); + boxes.push_back(box_h); + } + } + } + } + } + return validCount; +} + +int post_process(rknn_app_context_t *app_ctx, void *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results) +{ + rknn_tensor_mem **_outputs = (rknn_tensor_mem **)outputs; + std::vector filterBoxes; + std::vector objProbs; + std::vector classId; + int validCount = 0; + int stride = 0; + int grid_h = 0; + int grid_w = 0; + int model_in_w = app_ctx->model_width; + int model_in_h = app_ctx->model_height; + printf("post process start"); + memset(od_results, 0, sizeof(object_detect_result_list)); + + for (int i = 0; i < 3; i++) + { + grid_h = app_ctx->output_attrs[i].dims[1]; + grid_w = app_ctx->output_attrs[i].dims[2]; + stride = model_in_h / grid_h; + //RV1106 only support i8 + if (app_ctx->is_quant) { + validCount += process_i8_rv1106((int8_t *)(_outputs[i]->virt_addr), (int *)anchor[i], grid_h, grid_w, model_in_h, model_in_w, stride, filterBoxes, objProbs, + classId, conf_threshold, app_ctx->output_attrs[i].zp, app_ctx->output_attrs[i].scale); + } + + } + + // no object detect + if (validCount <= 0) + { + return 0; + } + std::vector indexArray; + for (int i = 0; i < validCount; ++i) + { + indexArray.push_back(i); + } + quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray); + + std::set class_set(std::begin(classId), std::end(classId)); + + for (auto c : class_set) + { + nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold); + } + + int last_count = 0; + od_results->count = 0; + + /* box valid detect target */ + for (int i = 0; i < validCount; ++i) + { + if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) + { + continue; + } + int n = indexArray[i]; + + float x1 = filterBoxes[n * 4 + 0] ; + float y1 = filterBoxes[n * 4 + 1] ; + float x2 = x1 + filterBoxes[n * 4 + 2]; + float y2 = y1 + filterBoxes[n * 4 + 3]; + int id = classId[n]; + float obj_conf = objProbs[i]; + + od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w)); + od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h)); + od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w)); + od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h)); + od_results->results[last_count].prop = obj_conf; + od_results->results[last_count].cls_id = id; + last_count++; + } + od_results->count = last_count; + return 0; +} + +// int post_process(rknn_app_context_t *app_ctx, void *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results) +// { +// rknn_tensor_mem **_outputs = (rknn_tensor_mem **)outputs; +// std::vector filterBoxes; +// std::vector objProbs; +// std::vector classId; +// int validCount = 0; +// int stride = 0; +// int grid_h = 0; +// int grid_w = 0; +// int model_in_w = app_ctx->model_width; +// int model_in_h = app_ctx->model_height; +// printf("post process start\n"); +// memset(od_results, 0, sizeof(object_detect_result_list)); + +// for (int i = 0; i < 3; i++) +// { +// grid_h = app_ctx->output_attrs[i].dims[2]; +// grid_w = app_ctx->output_attrs[i].dims[1]; +// stride = model_in_h / grid_h; +// // RV1106 only support i8 +// if (app_ctx->is_quant) { +// validCount += process_i8_rv1106((int8_t *)(_outputs[i]->virt_addr), (int *)anchor[i], grid_h, grid_w, model_in_h, model_in_w, stride, filterBoxes, objProbs, +// classId, conf_threshold, app_ctx->output_attrs[i].zp, app_ctx->output_attrs[i].scale); +// } +// // 如果需要支持fp32版本,请取消下面注释并实现process_fp32函数 +// /* +// else { +// validCount += process_fp32((float *)(_outputs[i]->virt_addr), (int *)anchor[i], grid_h, grid_w, model_in_h, model_in_w, stride, filterBoxes, objProbs, +// classId, conf_threshold); +// } +// */ +// } + +// // no object detect +// if (validCount <= 0) +// { +// return 0; +// } +// std::vector indexArray; +// for (int i = 0; i < validCount; ++i) +// { +// indexArray.push_back(i); +// } +// quick_sort_indice_inverse(objProbs, 0, validCount - 1, indexArray); + +// std::set class_set(std::begin(classId), std::end(classId)); + +// for (auto c : class_set) +// { +// nms(validCount, filterBoxes, classId, indexArray, c, nms_threshold); +// } + +// int last_count = 0; +// od_results->count = 0; + +// /* box valid detect target */ +// for (int i = 0; i < validCount; ++i) +// { +// if (indexArray[i] == -1 || last_count >= OBJ_NUMB_MAX_SIZE) +// { +// continue; +// } +// int n = indexArray[i]; + +// float x1 = filterBoxes[n * 4 + 0]; +// float y1 = filterBoxes[n * 4 + 1]; +// float x2 = x1 + filterBoxes[n * 4 + 2]; +// float y2 = y1 + filterBoxes[n * 4 + 3]; +// int id = classId[n]; +// float obj_conf = objProbs[i]; + +// od_results->results[last_count].box.left = (int)(clamp(x1, 0, model_in_w)); +// od_results->results[last_count].box.top = (int)(clamp(y1, 0, model_in_h)); +// od_results->results[last_count].box.right = (int)(clamp(x2, 0, model_in_w)); +// od_results->results[last_count].box.bottom = (int)(clamp(y2, 0, model_in_h)); +// od_results->results[last_count].prop = obj_conf; +// od_results->results[last_count].cls_id = id; +// last_count++; +// } +// od_results->count = last_count; +// return 0; +// } + +int init_post_process() +{ + OBJ_CLASS_NUM = obj_class_num; + PROP_BOX_SIZE = 5 + OBJ_CLASS_NUM; + + // 释放旧内存(防止内存泄漏) + if (labels != nullptr) { + deinit_post_process(); + } + + // 分配新内存 + labels = new char*[OBJ_CLASS_NUM]; + int ret = 0; + ret = loadLabelName(lable, labels); + if (ret < 0) + { + printf("Load %s failed!\n", lable); + return -1; + } + return 0; +} + +char *coco_cls_to_name(int cls_id) +{ + static char null_str[] = "null"; + + if (cls_id >= OBJ_CLASS_NUM) + { + return null_str; + } + + if (labels[cls_id]) + { + return labels[cls_id]; + } + + return null_str; +} + +void deinit_post_process() +{ + for (int i = 0; i < OBJ_CLASS_NUM; i++) + { + if (labels[i] != nullptr) + { + free(labels[i]); + labels[i] = nullptr; + } + } +} diff --git a/Cpp_example/D10_yolov5/postprocess.h b/Cpp_example/D10_yolov5/postprocess.h new file mode 100755 index 0000000000000000000000000000000000000000..59622541acbda820eaa4bb54732f1279caecbb32 --- /dev/null +++ b/Cpp_example/D10_yolov5/postprocess.h @@ -0,0 +1,41 @@ +#ifndef _RKNN_YOLOV5_DEMO_POSTPROCESS_H_ +#define _RKNN_YOLOV5_DEMO_POSTPROCESS_H_ + +#include +#include +#include "rknpu2_backend/rknpu2_backend.h" + +#define OBJ_NAME_MAX_SIZE 64 +#define OBJ_NUMB_MAX_SIZE 128 +// #define OBJ_CLASS_NUM 20 +#define NMS_THRESH 0.45 +#define BOX_THRESH 0.25 +// #define PROP_BOX_SIZE (5 + OBJ_CLASS_NUM) + +// class rknn_app_context_t; +typedef struct { + int left; + int top; + int right; + int bottom; +} image_rect_t; + +typedef struct { + image_rect_t box; + float prop; + int cls_id; +} object_detect_result; + +typedef struct { + int id; + int count; + object_detect_result results[OBJ_NUMB_MAX_SIZE]; +} object_detect_result_list; + +int init_post_process(); +void deinit_post_process(); +char *coco_cls_to_name(int cls_id); +int post_process(rknn_app_context_t *app_ctx, void *outputs, float conf_threshold, float nms_threshold, object_detect_result_list *od_results); + +void deinitPostProcess(); +#endif //_RKNN_YOLOV5_DEMO_POSTPROCESS_H_ diff --git a/Cpp_example/D10_yolov5/yolov5.cc b/Cpp_example/D10_yolov5/yolov5.cc new file mode 100755 index 0000000000000000000000000000000000000000..1df2811868e101f0e27da38a7b2b57e1388b8167 --- /dev/null +++ b/Cpp_example/D10_yolov5/yolov5.cc @@ -0,0 +1,288 @@ +// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "yolov5.h" + +static void dump_tensor_attr(rknn_tensor_attr *attr) +{ + printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, " + "zp=%d, scale=%f\n", + attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3], + attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type), + get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale); +} + +int init_yolov5_model(const char *model_path, rknn_app_context_t *app_ctx) +{ + int ret; + int model_len = 0; + char *model; + rknn_context ctx = 0; + + ret = rknn_init(&ctx, (char *)model_path, 0, 0, NULL); + if (ret < 0) + { + printf("rknn_init fail! ret=%d\n", ret); + return -1; + } + + // Get Model Input Output Number + rknn_input_output_num io_num; + ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num)); + if (ret != RKNN_SUCC) + { + printf("rknn_query fail! ret=%d\n", ret); + return -1; + } + //printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output); + + // Get Model Input Info + //printf("input tensors:\n"); + rknn_tensor_attr input_attrs[io_num.n_input]; + memset(input_attrs, 0, sizeof(input_attrs)); + for (int i = 0; i < io_num.n_input; i++) + { + input_attrs[i].index = i; + ret = rknn_query(ctx, RKNN_QUERY_NATIVE_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) + { + printf("rknn_query fail! ret=%d\n", ret); + return -1; + } + dump_tensor_attr(&(input_attrs[i])); + } + + // Get Model Output Info + //printf("output tensors:\n"); + rknn_tensor_attr output_attrs[io_num.n_output]; + memset(output_attrs, 0, sizeof(output_attrs)); + for (int i = 0; i < io_num.n_output; i++) + { + output_attrs[i].index = i; + //When using the zero-copy API interface, query the native output tensor attribute + ret = rknn_query(ctx, RKNN_QUERY_NATIVE_NHWC_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) + { + printf("rknn_query fail! ret=%d\n", ret); + return -1; + } + dump_tensor_attr(&(output_attrs[i])); + } + + // default input type is int8 (normalize and quantize need compute in outside) + // if set uint8, will fuse normalize and quantize to npu + input_attrs[0].type = RKNN_TENSOR_UINT8; + // default fmt is NHWC,1106 npu only support NHWC in zero copy mode + input_attrs[0].fmt = RKNN_TENSOR_NHWC; + //printf("input_attrs[0].size_with_stride=%d\n", input_attrs[0].size_with_stride); + app_ctx->input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride); + + // Set input tensor memory + ret = rknn_set_io_mem(ctx, app_ctx->input_mems[0], &input_attrs[0]); + if (ret < 0) { + printf("input_mems rknn_set_io_mem fail! ret=%d\n", ret); + return -1; + } + + // Set output tensor memory + for (uint32_t i = 0; i < io_num.n_output; ++i) { + app_ctx->output_mems[i] = rknn_create_mem(ctx, output_attrs[i].size_with_stride); + ret = rknn_set_io_mem(ctx, app_ctx->output_mems[i], &output_attrs[i]); + if (ret < 0) { + printf("output_mems rknn_set_io_mem fail! ret=%d\n", ret); + return -1; + } + } + + // Set to context + app_ctx->rknn_ctx = ctx; + + // TODO + if (output_attrs[0].qnt_type == RKNN_TENSOR_QNT_AFFINE_ASYMMETRIC) + { + app_ctx->is_quant = true; + } + else + { + app_ctx->is_quant = false; + } + + app_ctx->io_num = io_num; + app_ctx->input_attrs = (rknn_tensor_attr *)malloc(io_num.n_input * sizeof(rknn_tensor_attr)); + memcpy(app_ctx->input_attrs, input_attrs, io_num.n_input * sizeof(rknn_tensor_attr)); + app_ctx->output_attrs = (rknn_tensor_attr *)malloc(io_num.n_output * sizeof(rknn_tensor_attr)); + memcpy(app_ctx->output_attrs, output_attrs, io_num.n_output * sizeof(rknn_tensor_attr)); + + if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) + { + printf("model is NCHW input fmt\n"); + app_ctx->model_channel = input_attrs[0].dims[1]; + app_ctx->model_height = input_attrs[0].dims[2]; + app_ctx->model_width = input_attrs[0].dims[3]; + } else + { + printf("model is NHWC input fmt\n"); + app_ctx->model_height = input_attrs[0].dims[1]; + app_ctx->model_width = input_attrs[0].dims[2]; + app_ctx->model_channel = input_attrs[0].dims[3]; + } + + printf("model input height=%d, width=%d, channel=%d\n", + app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel); + + return 0; +} + +int release_yolov5_model(rknn_app_context_t *app_ctx) +{ + if (app_ctx->input_attrs != NULL) + { + free(app_ctx->input_attrs); + app_ctx->input_attrs = NULL; + } + if (app_ctx->output_attrs != NULL) + { + free(app_ctx->output_attrs); + app_ctx->output_attrs = NULL; + } + for (int i = 0; i < app_ctx->io_num.n_input; i++) { + if (app_ctx->input_mems[i] != NULL) { + rknn_destroy_mem(app_ctx->rknn_ctx, app_ctx->input_mems[i]); + } + } + for (int i = 0; i < app_ctx->io_num.n_output; i++) { + if (app_ctx->output_mems[i] != NULL) { + rknn_destroy_mem(app_ctx->rknn_ctx, app_ctx->output_mems[i]); + } + } + if (app_ctx->rknn_ctx != 0) + { + rknn_destroy(app_ctx->rknn_ctx); + + app_ctx->rknn_ctx = 0; + } + + printf("Release success\n"); + return 0; +} + +int inference_yolov5_model(rknn_app_context_t *app_ctx, object_detect_result_list *od_results) +{ + printf("Inference start\n"); + int ret; + const float nms_threshold = NMS_THRESH; // 默认的NMS阈值 + const float box_conf_threshold = BOX_THRESH; // 默认的置信度阈值 + printf("rknn_run\n"); + ret = rknn_run(app_ctx->rknn_ctx, nullptr); + printf("rknn_run end\n"); + printf("rknn_run ret=%d\n", ret); + if (ret < 0) { + printf("rknn_run fail! ret=%d\n", ret); + return -1; + } + printf("post_process"); + // Post Process + post_process(app_ctx, app_ctx->output_mems, box_conf_threshold, nms_threshold, od_results); +out: + return ret; +} + +/* +预处理 +图片填充缩放 +*/ +float scale; +extern int model_width; +extern int model_height; + +extern int width; +extern int height; + +extern int leftPadding; +extern int topPadding; +cv::Mat letterbox(cv::Mat input) +{ + float scaleX = (float)model_width / (float)width; + float scaleY = (float)model_height / (float)height; + scale = scaleX < scaleY ? scaleX : scaleY; + + int inputWidth = (int)((float)width * scale); + int inputHeight = (int)((float)height * scale); + + leftPadding = (model_width - inputWidth) / 2; + topPadding = (model_height - inputHeight) / 2; + + cv::Mat inputScale; + cv::resize(input, inputScale, cv::Size(inputWidth, inputHeight), 0, 0, cv::INTER_LINEAR); + cv::Mat letterboxImage(model_width, model_width, CV_8UC3, cv::Scalar(0, 0, 0)); + cv::Rect roi(leftPadding, topPadding, inputWidth, inputHeight); + inputScale.copyTo(letterboxImage(roi)); + + return letterboxImage; +} +/* +将经过 letterbox 处理后的坐标映射回原始图像坐标系 +*/ +void mapCoordinates(int *x, int *y) +{ + int mx = *x - leftPadding; + int my = *y - topPadding; + + *x = (int)((float)mx / scale); + *y = (int)((float)my / scale); +} + +void draw_detections(int count, + object_detect_result *results, + cv::Mat &frame, + void (*mapCoord)(int *, int *)) +{ + char text[128]; + for (int i = 0; i < count; i++) + { + object_detect_result *det = &results[i]; + + // 提取坐标 + int sX = (int)det->box.left; + int sY = (int)det->box.top; + int eX = (int)det->box.right; + int eY = (int)det->box.bottom; + + // 应用坐标映射 + mapCoord(&sX, &sY); + mapCoord(&eX, &eY); + + // 绘制检测框 + cv::rectangle(frame, + cv::Point(sX, sY), + cv::Point(eX, eY), + cv::Scalar(0, 255, 0), 1); + + // 生成标签文本 + sprintf(text, "%s %.1f%%", + coco_cls_to_name(det->cls_id), + det->prop * 100); + + // 绘制文本 + cv::putText(frame, text, + cv::Point(sX, sY - 8), + cv::FONT_HERSHEY_SIMPLEX, 1, + cv::Scalar(0, 255, 0), 1); + } +} \ No newline at end of file diff --git a/Cpp_example/D10_yolov5/yolov5.h b/Cpp_example/D10_yolov5/yolov5.h new file mode 100755 index 0000000000000000000000000000000000000000..5ba24ddba3222f71c9832bc222a9196de580549f --- /dev/null +++ b/Cpp_example/D10_yolov5/yolov5.h @@ -0,0 +1,61 @@ +// Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef _RKNN_DEMO_YOLOV5_H_ +#define _RKNN_DEMO_YOLOV5_H_ + +#include "rknn_api.h" + +typedef struct +{ + char *dma_buf_virt_addr; + int dma_buf_fd; + int size; +} rknn_dma_buf; + +typedef struct +{ + rknn_context rknn_ctx; + rknn_input_output_num io_num; + rknn_tensor_attr *input_attrs; + rknn_tensor_attr *output_attrs; + rknn_tensor_mem *net_mem; + + rknn_tensor_mem *input_mems[1]; + rknn_tensor_mem *output_mems[3]; + rknn_dma_buf img_dma_buf; + + int model_channel; + int model_width; + int model_height; + bool is_quant; +} rknn_app_context_t; + +#include "postprocess.h" + +int init_yolov5_model(const char *model_path, rknn_app_context_t *app_ctx); + +int release_yolov5_model(rknn_app_context_t *app_ctx); + +int inference_yolov5_model(rknn_app_context_t *app_ctx, object_detect_result_list *od_results); + +#endif //_RKNN_DEMO_YOLOV5_H_ + +cv::Mat letterbox(cv::Mat input); +void mapCoordinates(int *x, int *y); + +void draw_detections(int count, + object_detect_result *results, + cv::Mat &frame, + void (*mapCoord)(int *, int *)); \ No newline at end of file diff --git a/README.md b/README.md index 7ced98bf942edfeeb3b0edcb55745981f1bfa10a..f3084c3a5bfcc5a85c4004336a98550a3456246a 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,11 @@ * [凌智视觉模块 C++ 开发环境搭建指南](./docs/introductory_tutorial/cpp_development_environment.md) * [基于 C++ 编写 Hello World 程序](./example/hello_world) --> +## 📺 入门学习视频教程 + +为了你有一个更加直观的学习体验,凌智电子推出了 Lockzhiner Vision Module 系列教学视频,现在你可以通过下面的链接,学习如何烧录镜像、连接设备、搭建开发环境和如何进行外部通讯。同时你还可以根据自己的需求,根据教程训练属于自己的神经网络模型。 + +* [凌智视觉模块视频专栏连接](https://space.bilibili.com/1360647720/lists/4727318?type=series) ## 🔌 外设控制教程 @@ -142,6 +147,11 @@ OCR(Optical Character Recognition,光学字符识别)是一种将图像中 * [凌智视觉模块车牌识别](./Cpp_example/D09_plate_recognize/README.md) +### 👍 YOLOv5目标检测 +目标检测(Object Detection)是深度学习中计算机视觉领域的重要任务之一,旨在识别图像或视频中所有感兴趣的物体,并准确地定位这些物体的边界框(Bounding Box)。与目标分类不同,目标检测不仅需要预测物体的类别,还需要标注它们在图像中的位置。一般来说,目标检测任务的标注过程比较复杂,适合既需要对目标进行分类,有需要对目标进行定位的场景。该案例使用YOLOv5进行目标检测。 + +* [YOLOv5目标检测](./Cpp_example/D10_yolov5//README.md) + ## 🏀 C++ 开发案例 C++ 开发案例以A、B、C、D进行不同类别进行分类,方便初学者进行使用和二次开发。 * `A01 - A99`: 基础外设类 @@ -177,7 +187,7 @@ C++ 开发案例以A、B、C、D进行不同类别进行分类,方便初学者 | D07 | 神经网络类 | OCR Synthesis | [OCR综合示例](./Cpp_example/D07_ocr_synthesis/README.md) | | D08 | 神经网络类 | PPHumanSeg | [图像分割](./Cpp_example/D08_pp_humanseg/README.md) | | D09 | 神经网络类 | Plate Recognition | [车牌识别](./Cpp_example/D09_plate_recognize/README.md) | - +| D10 | 神经网络类 | YOLOv5 | [YOLOv5目标检测](./Cpp_example/D10_yolov5//README.md) | ## 🐛 Bug反馈 如果您遇到问题,您可以前往 [Lockzhiner Vision Module Issues](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/issues) 并点击已完成按钮查看其他用户反馈且我们已经解决的 Bug。 diff --git a/docs/models/tensorflow_model_convert.md b/docs/models/tensorflow_model_convert.md new file mode 100644 index 0000000000000000000000000000000000000000..820e81daac45c388390549ffacdda0b0fa26436c --- /dev/null +++ b/docs/models/tensorflow_model_convert.md @@ -0,0 +1,615 @@ +# tensorflow 模型训练及转换 + +```markdown +由于 TensorFlow 在 Windows 下编译安装比较麻烦,推荐使用 Docker 容器进行模型训练及转化。 +- **参考网址**: [TensorFlow Docker 安装指南](https://www.tensorflow.org/install/docker?hl=zh-cn) +- **注意**: Docker 容器的拉取安装可以参考本仓库的开发环境搭建文档 +- **注意**: Docker 容器的拉取安装可以参考本仓库的开发环境搭建文档 + +``` + +## 模型训练及转换例子 + +### 训练模型: + +以下代码展示了如何使用 MobileNetV2 进行图像分类模型的训练。 + +- **数据预处理**:利用 `ImageDataGenerator` 对图像进行缩放和分割成训练集与验证集。 +- **模型构建**:基于预训练的 MobileNetV2 构建自己的分类模型,并通过冻结基础模型层来微调模型。 +- **模型训练**:在训练数据上训练模型,并在验证集上评估其性能。 +- **导出模型**:训练完成后,将模型导出以便后续使用或部署。 + +```python +import tensorflow as tf +from tensorflow.keras import layers, models, regularizers +from tensorflow.keras.applications import MobileNetV2 +from tensorflow.keras.preprocessing.image import ImageDataGenerator + +# 假设你有两类或更多类别的图像,每类放在单独的文件夹中,如 'dataset/1/', 'dataset/2/' ... + +IMG_SIZE = 224 + +# 数据预处理 +datagen = ImageDataGenerator( + rescale=1./255, # 缩放像素值到0-1之间 + validation_split=0.2 # 分割数据集,这里使用20%作为验证集 +) + +train_generator = datagen.flow_from_directory( + './catdog', # 根目录 + target_size=(IMG_SIZE, IMG_SIZE), + batch_size=32, + class_mode='sparse', + subset='training' # 使用训练集 +) + +validation_generator = datagen.flow_from_directory( + './catdog', + target_size=(IMG_SIZE, IMG_SIZE), + batch_size=32, + class_mode='sparse', + subset='validation' # 使用验证集 +) + +# 构建 MobileNetV2 模型 +base_model = MobileNetV2( + input_shape=(IMG_SIZE, IMG_SIZE, 3), + include_top=False, + weights='imagenet', + alpha=0.75 +) + +base_model.trainable = False + +global_avg_pooling = layers.GlobalAveragePooling2D()(base_model.output) +dense1 = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(global_avg_pooling) +output = layers.Dense(train_generator.num_classes, activation='softmax')(dense1) # 动态获取类别数 + +model = models.Model(inputs=base_model.input, outputs=output) + +# 编译模型 +model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + +# 训练模型 +history = model.fit( + train_generator, + epochs=10, # 示例中仅训练一个epoch,实际应用中应根据情况调整 + validation_data=validation_generator +) + +# 评估模型(可选) +# 注意:ImageDataGenerator生成器不支持evaluate方法,需要手动加载测试集进行评估 + +# 导出模型 +model.export('./cd') +``` + +### 推理预测 + +```python +import tensorflow as tf +from tensorflow.keras.preprocessing.image import ImageDataGenerator +import numpy as np +from sklearn.metrics import classification_report, accuracy_score + +# 参数设置 +IMG_SIZE = 224 # 确保与训练时一致 +BATCH_SIZE = 32 + +# 数据预处理 +datagen = ImageDataGenerator( + rescale=1./255, # 缩放像素值到0-1之间 +) + +# 加载并预处理测试数据 +test_generator = datagen.flow_from_directory( + './catdog', # 替换为你的测试数据目录 + target_size=(IMG_SIZE, IMG_SIZE), + batch_size=BATCH_SIZE, + class_mode='sparse', + shuffle=False # 关闭shuffle确保顺序正确 +) + +# 获取一批数据看看输入形状 +sample_input, _ = next(test_generator) +input_shape = sample_input.shape[1:] # 应该是 (224, 224, 3) + +# 构建包含 TFSMLayer 的 Keras 模型 +model = tf.keras.Sequential([ + tf.keras.Input(shape=input_shape), + tf.keras.layers.TFSMLayer('./cd', call_endpoint='serving_default') +]) + +# 进行预测 +predictions = model.predict(test_generator) + +# 查看 predictions 结构(调试用) +print("Raw predictions type:", type(predictions)) +if isinstance(predictions, dict): + print("Keys in predictions:", predictions.keys()) + predictions = next(iter(predictions.values())) # 提取第一个输出作为 logits + +# 判断 predictions 维度,决定是否需要 argmax +if predictions.ndim == 2: + predicted_classes = np.argmax(predictions, axis=1) +else: + predicted_classes = predictions.astype(int).flatten() + +# 获取真实标签 +true_classes = test_generator.classes + +# 验证预测与真实标签维度是否匹配 +assert len(predicted_classes) == len(true_classes), \ + f"Length mismatch: predicted_classes={len(predicted_classes)}, true_classes={len(true_classes)}" + +# 计算准确率 +accuracy = accuracy_score(true_classes, predicted_classes) +print(f"Test Accuracy: {accuracy * 100:.2f}%") + +# 分类报告 +target_names = list(test_generator.class_indices.keys()) # 动态获取类别名称 +print("\nClassification Report:") +print(classification_report(true_classes, predicted_classes, target_names=target_names)) +``` + +### 模型转换:导出为.pb + +此部分介绍了如何将训练好的 Keras 模型转换为冻结的 `.pb` 文件格式,适用于需要在其他平台(如 TensorFlow Serving)上部署的情况。 + +- **变量转常量**:将模型中的所有变量转换为常量以确保它们在模型被加载时不会改变。 + +```python +import tensorflow as tf +from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 + +# 输入SavedModel路径和输出.pb文件名 +input_saved_model_dir = "./cd/" +output_node_names = ["output_0"] # 替换为实际输出节点名 +output_graph_filename = 'frozen_graph.pb' + +# 加载 SavedModel +saved_model = tf.saved_model.load(input_saved_model_dir) + +# 获取默认签名函数 +concrete_function = saved_model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + +# 转换变量为常量 +frozen_func = convert_variables_to_constants_v2(concrete_function) + +# 获取 GraphDef 并保存为 .pb 文件 +graph_def = frozen_func.graph.as_graph_def() + +with tf.io.gfile.GFile(output_graph_filename, "wb") as f: + f.write(graph_def.SerializeToString()) + +print(f"Frozen graph saved to {output_graph_filename}") +``` + +导出为 tflite 模型 + +TFLite 是一种轻量级的模型格式,非常适合移动设备和嵌入式系统。该段落展示了如何从 SavedModel 转换到 TFLite 格式。 + +```python +import tensorflow as tf + +# 定义SavedModel的路径 +saved_model_dir = './catdog' + +# 创建转换器对象 +converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) + +# 如果需要的话,可以设置优化选项等参数 +# converter.optimizations = [tf.lite.Optimize.DEFAULT] + +# 执行转换 +tflite_model = converter.convert() + +# 保存转换后的模型到文件 +with open('digitalhand.tflite', 'wb') as f: + f.write(tflite_model) +``` + +### 导出成onnx模型 + +```bash +python -m tf2onnx.convert --saved-model ./pb_model --output ./test.onnx --opset 13 +``` + +### 固定onnx输入 + +```python +import onnx + +# 加载 ONNX 模型 +model_path = 'mobile.onnx' +model = onnx.load(model_path) + +# 获取输入节点 +input_tensor = model.graph.input[0] + +# 修改输入节点的形状 +batch_size = 1 # 将批次大小固定为 1 +new_shape = [batch_size, 224, 224, 3] # 假设你的输入形状是 [batch_size, 224, 224, 3] + +# 更新输入节点的形状 +dim_proto = input_tensor.type.tensor_type.shape.dim +dim_proto[0].dim_value = batch_size # 固定批次大小 + +# 保存修改后的模型 +output_model_path = 'mobile.onnx' +onnx.save(model, output_model_path) + +print(f"Model with fixed batch size saved to {output_model_path}") +``` + +## rknn 转换测试 + +config文件内容 + +```markdowm +load_onnx: + input_size_list: + - + - 1 + - 224 + - 224 + - 3 + outputs: + - output_0 + +config: + # RGB + mean: + - 0.406 + - 0.456 + - 0.485 + std: + - 0.225 + - 0.224 + - 0.229 + type: "fp32" + quantized_algorithm: mmse + quantized_dtype: w16a16i + optimization_level: 0 +build: + do_quantization: True + dataset: ./dataset.txt +``` + +### pb转rknn + +```python +import os +import yaml +import argparse +from rknn.api import RKNN +import onnx + + +def get_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", default="Debug", help="rknntoolkit verbose") + parser.add_argument( + "--config_path", required=True, help="The path of model config file" + ) + parser.add_argument( + "--model_load_path", required=True, help="The path of onnx model file" + ) + parser.add_argument("--target_platform", required=False, help="The target platform") + parser.add_argument( + "--model_save_path", required=False, help="The path of rknn model save" + ) + args = parser.parse_args() + return args + + +if __name__ == "__main__": + config = get_config() + with open(config.config_path) as file: + file_data = file.read() + yaml_config = yaml.safe_load(file_data) + print(yaml_config) + + # config_inputs = [ + # str(input_name) for input_name in yaml_config["load_onnx"]["inputs"] + # ] + config_input_size_list = yaml_config["load_onnx"]["input_size_list"] + config_outputs = [ + str(output_name) for output_name in yaml_config["load_onnx"]["outputs"] + ] + model = RKNN(config.verbose) + # Config + print("--> Config") + mean_std_type = yaml_config["config"]["type"] + if mean_std_type == "fp32": + mean_values = [255 * mean for mean in yaml_config["config"]["mean"]] + std_values = [255 * std for std in yaml_config["config"]["std"]] + elif mean_std_type == "int8": + mean_values = yaml_config["config"]["mean"] + std_values = yaml_config["config"]["std"] + else: + assert False, f"The type({mean_std_type}) is error, need fp32/int8." + print(f"mean_values is {mean_values}, std_values is {std_values}") + + quantized_dtype = "w8a8" + if "quantized_dtype" in yaml_config["config"]: + quantized_dtype = yaml_config["config"]["quantized_dtype"] + print(f"quantized_dtype is {quantized_dtype}") + + quantized_algorithm = "normal" + if "quantized_algorithm" in yaml_config["config"]: + quantized_algorithm = yaml_config["config"]["quantized_algorithm"] + print(f"quantized_algorithm is {quantized_algorithm}") + + optimization_level = 3 + if "optimization_level" in yaml_config["config"]: + optimization_level = yaml_config["config"]["optimization_level"] + print(f"optimization_level is {optimization_level}") + + target_platform = config.target_platform + print(f"target_platform is {target_platform}") + model.config( + mean_values=mean_values, + std_values=std_values, + quantized_dtype=quantized_dtype, + quantized_algorithm=quantized_algorithm, + optimization_level=optimization_level, + target_platform=target_platform, + ) + print("done") + + # Load ONNX model + print("--> Load ONNX model") + model_path = config.model_load_path + ret = model.load_tensorflow(tf_pb='./frozen_graph.pb',inputs=['input_1'],outputs=['StatefulPartitionedCall/model/dense_1/BiasAdd'],input_size_list=[[1,224,224,3]]) + assert ret == 0, "Load model failed!" + print("done") + + # Build model + print("--> Build model") + do_quantization = yaml_config["build"]["do_quantization"] + dataset = yaml_config["build"]["dataset"] + ret = model.build(do_quantization=do_quantization, dataset=dataset) + assert ret == 0, "Build model failed!" + print("done") + + # Init Runtime + print("--> Init Runtime") + ret = model.init_runtime() + assert ret == 0, "Init runtime environment failed!" + print("done") + + # Export + model_save_path = config.model_save_path + ret = model.export_rknn(model_save_path) + assert ret == 0, "Export rknn model failed!" + print("Export OK!") + + +``` + +### tflite 转rknn + +```python +import os +import yaml +import argparse +from rknn.api import RKNN +import onnx + + +def get_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", default="Debug", help="rknntoolkit verbose") + parser.add_argument( + "--config_path", required=True, help="The path of model config file" + ) + parser.add_argument( + "--model_load_path", required=True, help="The path of onnx model file" + ) + parser.add_argument("--target_platform", required=False, help="The target platform") + parser.add_argument( + "--model_save_path", required=False, help="The path of rknn model save" + ) + args = parser.parse_args() + return args + + +if __name__ == "__main__": + config = get_config() + with open(config.config_path) as file: + file_data = file.read() + yaml_config = yaml.safe_load(file_data) + print(yaml_config) + + config_inputs = [ + str(input_name) for input_name in yaml_config["load_onnx"]["inputs"] + ] + config_input_size_list = yaml_config["load_onnx"]["input_size_list"] + config_outputs = [ + str(output_name) for output_name in yaml_config["load_onnx"]["outputs"] + ] + model = RKNN(config.verbose) + # Config + print("--> Config") + mean_std_type = yaml_config["config"]["type"] + if mean_std_type == "fp32": + mean_values = [255 * mean for mean in yaml_config["config"]["mean"]] + std_values = [255 * std for std in yaml_config["config"]["std"]] + elif mean_std_type == "int8": + mean_values = yaml_config["config"]["mean"] + std_values = yaml_config["config"]["std"] + else: + assert False, f"The type({mean_std_type}) is error, need fp32/int8." + print(f"mean_values is {mean_values}, std_values is {std_values}") + + quantized_dtype = "w8a8" + if "quantized_dtype" in yaml_config["config"]: + quantized_dtype = yaml_config["config"]["quantized_dtype"] + print(f"quantized_dtype is {quantized_dtype}") + + quantized_algorithm = "normal" + if "quantized_algorithm" in yaml_config["config"]: + quantized_algorithm = yaml_config["config"]["quantized_algorithm"] + print(f"quantized_algorithm is {quantized_algorithm}") + + optimization_level = 3 + if "optimization_level" in yaml_config["config"]: + optimization_level = yaml_config["config"]["optimization_level"] + print(f"optimization_level is {optimization_level}") + + target_platform = config.target_platform + print(f"target_platform is {target_platform}") + model.config( + mean_values=mean_values, + std_values=std_values, + quantized_dtype=quantized_dtype, + quantized_algorithm=quantized_algorithm, + optimization_level=optimization_level, + target_platform=target_platform, + ) + print("done") + + # Load ONNX model + print("--> Load ONNX model") + model_path = config.model_load_path + ret = model.load_tflite(model=model_path) + assert ret == 0, "Load model failed!" + print("done") + + # Build model + print("--> Build model") + do_quantization = yaml_config["build"]["do_quantization"] + dataset = yaml_config["build"]["dataset"] + ret = model.build(do_quantization=do_quantization, dataset=dataset) + assert ret == 0, "Build model failed!" + print("done") + + # Init Runtime + print("--> Init Runtime") + ret = model.init_runtime() + assert ret == 0, "Init runtime environment failed!" + print("done") + + # Export + model_save_path = config.model_save_path + ret = model.export_rknn(model_save_path) + assert ret == 0, "Export rknn model failed!" + print("Export OK!") + + +``` + +**推理测试** + +```cpp +#include +#include +#include +#include +#include +#include +#include +int main(int argc, char **argv) +{ + if (argc != 3) + { + LOGGER_ERROR("Usage: ./mobilenet model img_path"); + return -1; + } + + +const std::string model_path = argv[1]; +lockzhiner_vision_module::vision::RKNPU2Backend backend; + +if (!backend.Initialize(model_path)) +{ + LOGGER_ERROR("Initialize failed"); + return -1; +} + +cv::Mat img = cv::imread(argv[2]); +if (img.empty()) +{ + LOGGER_ERROR("img is empty"); + return -1; +} + +// 转换为 RGB 格式(如果模型输入是 BGR,则删除此行) +// cv::cvtColor(img, img, cv::COLOR_BGR2RGB); + +// 转换为浮点型 [0, 1] +img.convertTo(img, CV_32F, 1.0f / 255.0f); + +// 拆分图像通道 +std::vector channels; +cv::split(img, channels); + +// 定义 mean 和 std(根据你的配置) +std::vector mean = {0.485f, 0.456f, 0.406f}; // 注意顺序是 RGB +std::vector std = {0.229f, 0.224f, 0.225f}; + +// 对每个通道做标准化处理 +for (int c = 0; c < channels.size(); ++c) +{ + channels[c] = (channels[c] - mean[c]) / std[c]; +} + +// 合并回多通道图像 +cv::Mat normalized_img; +cv::merge(channels, normalized_img); + +// 缩放图像到网络输入尺寸 +int input_w = backend.GetInputTensor(0).GetDims()[2]; // Width +int input_h = backend.GetInputTensor(0).GetDims()[1]; // Height +cv::resize(normalized_img, normalized_img, cv::Size(input_w, input_h)); + +// 将数据复制到输入张量 +lockzhiner_vision_module::LockzhinerMemoryCopy( + backend.GetInputTensor(0).GetData(), + normalized_img.data, + backend.GetInputTensor(0).GetElemsBytes()); + +if (!backend.Run()) +{ + LOGGER_ERROR("Inference failed"); + return -1; +} + +// 获取输出张量 +auto output_tensor = backend.GetOutputTensor(0); + +// 反量化准备 +auto output_scale = output_tensor.GetScale(); +auto output_zero_point = output_tensor.GetZp(); + +using OutputDataType = int16_t; // 或 uint8_t,取决于模型输出类型 + +OutputDataType *raw_output_data = static_cast(output_tensor.GetData()); + +std::vector output_data(output_tensor.GetElemsBytes() / sizeof(OutputDataType)); + +// 创建 MaxSoftmax 实例 +using OutputDataType = int16_t; // 根据模型实际输出类型修改 +lockzhiner_vision_module::MaxSoftmax max_softmax(output_zero_point, output_scale); + +float max_value; +size_t max_index; + +// 调用 MaxSoftmax 进行计算 +max_softmax.Run(raw_output_data, output_data.size(), &max_value, &max_index); + +LOGGER_INFO("Inference success"); +std::cerr << "Predicted class index: " << max_index << std::endl; +std::cerr << "Confidence score: " << max_value << std::endl; +return 0; +} + + +``` + +**存在问题** + +模型虽然能够成功导出为 **rknn**格式并推理,但模型的推理结果是错误的,暂时还没有排查到问题, \ No newline at end of file diff --git a/utils/adb.py b/utils/adb.py new file mode 100644 index 0000000000000000000000000000000000000000..981f0db3506f62dd73ec12fc29795bfdc6160a6a --- /dev/null +++ b/utils/adb.py @@ -0,0 +1,95 @@ +# from rknn.api import RKNN +# import cv2 + +# if __name__ == '__main__': +# # 第一步:创建RKNN对象 +# rknn = RKNN(verbose=True) + +# # 第二步:配置RKNN对象参数 +# rknn.config( +# mean_values=[[149.94, 149.94, 149.94]], +# std_values=[[255 * 0.193, 255 * 0.193, 255 * 0.193]], +# target_platform='rv1106', +# ) + +# # 第三步:加载 TFLite 模型(注意:不要重新赋值给 rknn) +# rknn.load_tflite(model='./model.tflite') + +# # 第四步:构建RKNN模型 +# rknn.build( +# do_quantization=True, +# dataset='./dataset.txt', +# ) + +# # 第五步:导出RKNN模型 +# rknn.export_rknn(export_path='./resnet.rknn') + +# # 第六步:进行精度分析 +# img = cv2.imread('../data/0.png') +# img = cv2.resize(img, (96, 96)) +# rknn.accuracy_analysis( +# inputs=[img], +# output_dir='snapshot', +# target='rv1106', +# device_id='10.1.1.144:5555', +# ) + +# # 最后一步:释放RKNN对象 +# rknn.release() +import numpy as np +from rknn.api import RKNN +import cv2 + +if __name__ == '__main__': + # 创建RKNN对象 + rknn = RKNN(verbose=True) + rknn.config( + mean_values=[[149.94, 149.94, 149.94]], + std_values=[[255 * 0.193, 255 * 0.193, 255 * 0.193]], + target_platform='rv1106', + ) + # 配置参数... + rknn.config( + mean_values=[[149.94, 149.94, 149.94]], + std_values=[[255 * 0.193, 255 * 0.193, 255 * 0.193]], + target_platform='rv1106', + ) + + # 加载 TFLite 模型 + rknn.load_tflite(model='./model.tflite') + + # 构建模型 + rknn.build(do_quantization=True, dataset='./dataset.txt') + + # 导出模型 + rknn.export_rknn(export_path='./resnet.rknn') + + # 图像预处理 + img = cv2.imread('../data/0.png') + img = cv2.resize(img, (96, 96)) + + # 归一化 + 量化(根据你设置的 mean_values 和 std_values) + mean = [149.94, 149.94, 149.94] + std = [255 * 0.193, 255 * 0.193, 255 * 0.193] + + # 转换为 float32 并应用归一化 + img = img.astype(np.float32) + img -= mean # 减均值 + img /= std # 除以标准差 + + # 量化到 uint8 + img = np.round(img).astype(np.uint8) + + # 调整通道顺序为 CHW 并添加 batch 维度 + img = np.transpose(img, (2, 0, 1)) # HWC -> CHW + img = np.expand_dims(img, axis=0) # CHW -> BCHW + # 精度分析 + rknn.accuracy_analysis( + inputs=[img], + output_dir='snapshot', + target='rv1106', + device_id='10.1.1.144:5555', + ) + + # 释放资源 + rknn.release() \ No newline at end of file diff --git a/utils/export_tensorflow.py b/utils/export_tensorflow.py new file mode 100644 index 0000000000000000000000000000000000000000..a26f48c795f0fcf4f85a0b0276322099004f5615 --- /dev/null +++ b/utils/export_tensorflow.py @@ -0,0 +1,105 @@ +import os +import yaml +import argparse +from rknn.api import RKNN +import onnx + + +def get_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", default="Debug", help="rknntoolkit verbose") + parser.add_argument( + "--config_path", required=True, help="The path of model config file" + ) + parser.add_argument( + "--model_load_path", required=True, help="The path of onnx model file" + ) + parser.add_argument("--target_platform", required=False, help="The target platform") + parser.add_argument( + "--model_save_path", required=False, help="The path of rknn model save" + ) + args = parser.parse_args() + return args + + +if __name__ == "__main__": + config = get_config() + with open(config.config_path) as file: + file_data = file.read() + yaml_config = yaml.safe_load(file_data) + print(yaml_config) + + # config_inputs = [ + # str(input_name) for input_name in yaml_config["load_onnx"]["inputs"] + # ] + config_input_size_list = yaml_config["load_onnx"]["input_size_list"] + config_outputs = [ + str(output_name) for output_name in yaml_config["load_onnx"]["outputs"] + ] + model = RKNN(config.verbose) + # Config + print("--> Config") + mean_std_type = yaml_config["config"]["type"] + if mean_std_type == "fp32": + mean_values = [255 * mean for mean in yaml_config["config"]["mean"]] + std_values = [255 * std for std in yaml_config["config"]["std"]] + elif mean_std_type == "int8": + mean_values = yaml_config["config"]["mean"] + std_values = yaml_config["config"]["std"] + else: + assert False, f"The type({mean_std_type}) is error, need fp32/int8." + print(f"mean_values is {mean_values}, std_values is {std_values}") + + quantized_dtype = "w8a8" + if "quantized_dtype" in yaml_config["config"]: + quantized_dtype = yaml_config["config"]["quantized_dtype"] + print(f"quantized_dtype is {quantized_dtype}") + + quantized_algorithm = "normal" + if "quantized_algorithm" in yaml_config["config"]: + quantized_algorithm = yaml_config["config"]["quantized_algorithm"] + print(f"quantized_algorithm is {quantized_algorithm}") + + optimization_level = 3 + if "optimization_level" in yaml_config["config"]: + optimization_level = yaml_config["config"]["optimization_level"] + print(f"optimization_level is {optimization_level}") + + target_platform = config.target_platform + print(f"target_platform is {target_platform}") + model.config( + mean_values=mean_values, + std_values=std_values, + quantized_dtype=quantized_dtype, + quantized_algorithm=quantized_algorithm, + optimization_level=optimization_level, + target_platform=target_platform, + ) + print("done") + + # Load ONNX model + print("--> Load ONNX model") + model_path = config.model_load_path + ret = model.load_tensorflow(tf_pb='./frozen_graph.pb',inputs=['input_1'],outputs=['StatefulPartitionedCall/model/dense_1/BiasAdd'],input_size_list=[[1,224,224,3]]) + assert ret == 0, "Load model failed!" + print("done") + + # Build model + print("--> Build model") + do_quantization = yaml_config["build"]["do_quantization"] + dataset = yaml_config["build"]["dataset"] + ret = model.build(do_quantization=do_quantization, dataset=dataset) + assert ret == 0, "Build model failed!" + print("done") + + # Init Runtime + print("--> Init Runtime") + ret = model.init_runtime() + assert ret == 0, "Init runtime environment failed!" + print("done") + + # Export + model_save_path = config.model_save_path + ret = model.export_rknn(model_save_path) + assert ret == 0, "Export rknn model failed!" + print("Export OK!") diff --git a/utils/export_tflite.py b/utils/export_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..95edbf94e6c57c88262389ea69439cd012ccb778 --- /dev/null +++ b/utils/export_tflite.py @@ -0,0 +1,105 @@ +import os +import yaml +import argparse +from rknn.api import RKNN +import onnx + + +def get_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", default="Debug", help="rknntoolkit verbose") + parser.add_argument( + "--config_path", required=True, help="The path of model config file" + ) + parser.add_argument( + "--model_load_path", required=True, help="The path of onnx model file" + ) + parser.add_argument("--target_platform", required=False, help="The target platform") + parser.add_argument( + "--model_save_path", required=False, help="The path of rknn model save" + ) + args = parser.parse_args() + return args + + +if __name__ == "__main__": + config = get_config() + with open(config.config_path) as file: + file_data = file.read() + yaml_config = yaml.safe_load(file_data) + print(yaml_config) + + config_inputs = [ + str(input_name) for input_name in yaml_config["load_onnx"]["inputs"] + ] + config_input_size_list = yaml_config["load_onnx"]["input_size_list"] + config_outputs = [ + str(output_name) for output_name in yaml_config["load_onnx"]["outputs"] + ] + model = RKNN(config.verbose) + # Config + print("--> Config") + mean_std_type = yaml_config["config"]["type"] + if mean_std_type == "fp32": + mean_values = [255 * mean for mean in yaml_config["config"]["mean"]] + std_values = [255 * std for std in yaml_config["config"]["std"]] + elif mean_std_type == "int8": + mean_values = yaml_config["config"]["mean"] + std_values = yaml_config["config"]["std"] + else: + assert False, f"The type({mean_std_type}) is error, need fp32/int8." + print(f"mean_values is {mean_values}, std_values is {std_values}") + + quantized_dtype = "w8a8" + if "quantized_dtype" in yaml_config["config"]: + quantized_dtype = yaml_config["config"]["quantized_dtype"] + print(f"quantized_dtype is {quantized_dtype}") + + quantized_algorithm = "normal" + if "quantized_algorithm" in yaml_config["config"]: + quantized_algorithm = yaml_config["config"]["quantized_algorithm"] + print(f"quantized_algorithm is {quantized_algorithm}") + + optimization_level = 3 + if "optimization_level" in yaml_config["config"]: + optimization_level = yaml_config["config"]["optimization_level"] + print(f"optimization_level is {optimization_level}") + + target_platform = config.target_platform + print(f"target_platform is {target_platform}") + model.config( + mean_values=mean_values, + std_values=std_values, + quantized_dtype=quantized_dtype, + quantized_algorithm=quantized_algorithm, + optimization_level=optimization_level, + target_platform=target_platform, + ) + print("done") + + # Load ONNX model + print("--> Load ONNX model") + model_path = config.model_load_path + ret = model.load_tflite(model=model_path) + assert ret == 0, "Load model failed!" + print("done") + + # Build model + print("--> Build model") + do_quantization = yaml_config["build"]["do_quantization"] + dataset = yaml_config["build"]["dataset"] + ret = model.build(do_quantization=do_quantization, dataset=dataset) + assert ret == 0, "Build model failed!" + print("done") + + # Init Runtime + print("--> Init Runtime") + ret = model.init_runtime() + assert ret == 0, "Init runtime environment failed!" + print("done") + + # Export + model_save_path = config.model_save_path + ret = model.export_rknn(model_save_path) + assert ret == 0, "Export rknn model failed!" + print("Export OK!") diff --git a/utils/quantized_data.py b/utils/quantized_data.py new file mode 100644 index 0000000000000000000000000000000000000000..e2dc1bcdf9a632ab64a8874d251cf195b93501d4 --- /dev/null +++ b/utils/quantized_data.py @@ -0,0 +1,11 @@ +import os +img_path = os.listdir("../data") +print(len(img_path)) +path = [] +for i in range(len(img_path)): + img_path[i] = "../data/" + img_path[i] + path.append(img_path[i]) + print(img_path[i]) +with open("dataset.txt", "w") as f: + for i in range(len(path)): + f.write(path[i] + "\n")