diff --git a/Cpp_example/A05_USART/USART_Read.cc b/Cpp_example/A05_USART/USART_Read.cc index 4d4230e8b04e1d39fb02824820ad126121761580..741b72bb8d94994ec2a3392e02cb3ce8f1c2a39d 100755 --- a/Cpp_example/A05_USART/USART_Read.cc +++ b/Cpp_example/A05_USART/USART_Read.cc @@ -2,22 +2,18 @@ #include -int main() -{ +int main() { lockzhiner_vision_module::periphery::USART1 usart; - if (!usart.Open(115200)) - { + if (!usart.Open(115200)) { std::cout << "Failed to open usart." << std::endl; return 1; } std::cout << "Start receiving serial port data." << std::endl; - while (1) - { + while (1) { std::string buffer; usart.Read(buffer, 1024); - if (!buffer.empty()) - { + if (!buffer.empty()) { std::cout << buffer << std::endl; } } diff --git a/Cpp_example/A05_USART/USART_Write.cc b/Cpp_example/A05_USART/USART_Write.cc index 0aba4e034ca10fe22365fdfcae2a77626c719ff0..bedca0cac67759df154ebfca7b33f06350b620ca 100755 --- a/Cpp_example/A05_USART/USART_Write.cc +++ b/Cpp_example/A05_USART/USART_Write.cc @@ -2,17 +2,14 @@ #include -int main() -{ +int main() { lockzhiner_vision_module::periphery::USART1 usart; - if (!usart.Open(115200)) - { + if (!usart.Open(115200)) { std::cout << "Failed to open usart." << std::endl; return 1; } - if (!usart.Write("Hello World\n")) - { + if (!usart.Write("Hello World\n")) { std::cout << "Failed to send data." << std::endl; return 1; } diff --git a/Cpp_example/D01_test_detection/CMakeLists.txt b/Cpp_example/D01_test_detection/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/Cpp_example/D13_target_tracking/CMakeLists.txt b/Cpp_example/D13_target_tracking/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..70194f2caa4b177f288dfcdf3c61aa1b47d4d4d6 --- /dev/null +++ b/Cpp_example/D13_target_tracking/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.10) + +project(D01_test_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") + +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +add_executable(Test-target-tracking test_target_tracking.cc) +target_include_directories(Test-target-tracking PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS}) +target_link_libraries(Test-target-tracking PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS Test-target-tracking + RUNTIME DESTINATION . +) \ No newline at end of file diff --git a/Cpp_example/D13_target_tracking/README.md b/Cpp_example/D13_target_tracking/README.md new file mode 100755 index 0000000000000000000000000000000000000000..462ebbcff6c9cbc218e67b5e0eb8f5f15fddf73e --- /dev/null +++ b/Cpp_example/D13_target_tracking/README.md @@ -0,0 +1,876 @@ +# 多目标跟踪我能当 + +本文档介绍了如何使用 lockzhiner_vision_module 库结合卡尔曼滤波和匈牙利算法实现多目标跟踪系统。 + +## 1. 基础知识讲解 + +### 1.1 多目标跟踪的基本概念 + +多目标跟踪(MOT)是计算机视觉中处理视频序列中移动目标检测、跟踪和识别的任务。它的核心挑战包括: + +- 数据关联:将当前帧的检测结果与现有轨迹匹配 +- 轨迹管理:处理新目标的出现、旧目标的消失 +- 遮挡处理:处理目标被遮挡或暂时离开画面的情况 + +### 1.2 卡尔曼滤波 + +卡尔曼滤波是一种高效的递归算法,用于在动态系统中估计目标的状态: + +- ​​预测阶段​​:基于运动模型估计目标在下一帧的位置 + +- ​更新阶段​​:使用新的观测值更新状态估计 + +- 在多目标跟踪中,卡尔曼滤波用于预测目标位置、速度和其他状态变量 + +### 1.3 匈牙利算法 + +匈牙利算法是解决二分图最大匹配问题的经典算法,再MOT中的作用: + +- 将检测到的目标与现有的跟踪轨迹关联 +- 解决分配问题,找到最优的检测轨迹匹配 + +## 2. API 文档 + +### 2.1 EnhancedKalmanFilter 类 + +#### 2.1.1 构造函数 + +```cpp +EnhancedKalmanFilter(float Ts = 0.033f); +``` + +- 作用:创建6D卡尔曼滤波器对象 +- 参数: + - Ts:时间步长(默认0.033秒) +- 初始化状态: + - 状态向量:[x, y, w, h, dx, dy] (位置+尺寸+速度) + - 初始化过程噪声、观测噪声矩阵 + +#### 2.1.2 主要成员函数 + +```cpp +void Update(const cv::Mat& Z); +``` + +- 作用:更新卡尔曼滤波器状态 +- 参数: + - z:观测向量[x,y,w,h] + +```cpp +void Predict(); +``` + +- 作用:预测下一时刻的状态 + +```cpp +cv::Rect GetRectFromState() const; +``` + +- 作用:获取目标边界框 +- 返回值:当前状态的矩形区域 + +```cpp +cv::Point2d GetVelocity() const; +``` + +- 作用:获取目标速度向量 +- 返回值:速度向量(dx,dy) + +### 2.2 HungarianAlgorithm 类 + +#### 2.2.1 静态成员函数 + +```cpp +static std::vector Solve(const std::vector>& costMatrix); +``` + +- 作用:解决检测- 轨迹匹配问题 +- 参数: + - costMatrix:代价矩阵(行:跟踪器,列:检测结果) +- 返回值: + - 匹配索引向量(元素对应检测索引,-1表示未匹配) + +### 2.3 MultiObjectTracker 类 + +#### 2.3.1 结构体:TrackedObject + +```cpp +struct TrackedObject { + int id; // 目标ID + cv::Rect rect; // 目标边界框 + cv::Point2d velocity; // 速度向量 + EnhancedKalmanFilter ekf; // 关联的卡尔曼滤波器 + int lost_frames; // 连续丢失帧数 + int max_lost_frames; // 最大允许丢失帧数 + + // 成员函数省略... +}; +``` + +#### 2.3.2 成员函数 + +```cpp +void Update(const std::vector& detections, float delta_time); +``` + +- 作用:更新多目标跟踪器状态 +- 参数: + - detections:当前帧的检测结果 + - delta_time:距离上一帧的时间差 + +```cpp +const std::map& GetActiveObjects() const; +``` + +- 作用:获取所有活动目标 +- 返回值:ID到目标对象的映射 + +### 2.4 辅助函数 + +```cpp +double CalculateIOU(const cv::Rect& rect1, const cv::Rect& rect2); +``` + +- 作用:计算两矩形交并比(IoU) + +```cpp +double CalculateAppearanceSimilarity(const cv::Rect& rect1, const cv::Rect& rect2); +``` + +- 作用:计算基于目标尺寸的相似度(外观相似度) + +## 3. 系统架构与核心逻辑 + +### 3.1 系统流程图 + + +### 3.2 多目标跟踪工作流程 + +#### 3.2.1 目标检测 + +```cpp +auto results = model.Predict(input_mat); +std::vector detections; +for (const auto& res : results) { + detections.push_back(res.box); +} +``` + +#### 3.2.2 跟踪器预测 + +```cpp +// 更新所有跟踪器的预测状态 +for (auto& kv : tracked_objects) { + kv.second.Predict(Ts); +} +``` + +#### 3.2.3 代价矩阵计算 + +- 综合三种指标: + - IoU(重叠率) + - 中心点距离 + - 尺寸相似度 +- 权重分配:60% IoU + 30% 距离 + 10% 尺寸相似度 + +#### 3.2.4 数据关联(匈牙利算法) + +```cpp +std::vector assignments = HungarianAlgorithm::Solve(cost_matrix); +``` + +#### 3.2.5 跟踪器状态更新 + +- 匹配成功的更新卡尔曼滤波器 +- 未匹配的检测创建新的跟踪器 +- 未匹配的跟踪器增加丢失技术 + +#### 3.2.6 绘制结果 + +- 为每个目标分配唯一颜色 +- 绘制边界框、ID和丢失帧数 +- 绘制速度向量 + +### 3.3完整代码实现 + +```cpp +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std::chrono; + +// ================== 增强版 6D 卡尔曼滤波类 ================== +class EnhancedKalmanFilter { + public: + EnhancedKalmanFilter(float Ts = 0.033f) : prediction_steps(1) { + Initialize(Ts); + } + + void Initialize(float Ts) { + Ts_ = Ts; + // 状态转移矩阵 A (6x6) + A = cv::Mat::eye(6, 6, CV_64F); + A.at(0, 4) = Ts; + A.at(1, 5) = Ts; + + // 观测矩阵 C (4x6) - 只观测位置和大小 + C = cv::Mat::zeros(4, 6, CV_64F); + C.at(0, 0) = 1; + C.at(1, 1) = 1; + C.at(2, 2) = 1; + C.at(3, 3) = 1; + + // 增大过程噪声 - 使预测更灵活 + Q = cv::Mat::eye(6, 6, CV_64F); + + Q.at(0, 0) = 1e-2; // x位置噪声增加 + Q.at(1, 1) = 1e-2; // y位置噪声增加 + Q.at(2, 2) = 1e-6; // 宽度噪声不变 + Q.at(3, 3) = 1e-6; // 高度噪声不变 + Q.at(4, 4) = 1e-3; // x速度噪声减小 + Q.at(5, 5) = 1e-3; // y速度噪声减小 + + // 调整观测噪声 - 使预测更具主导性 + R = cv::Mat::eye(4, 4, CV_64F); + R.at(0, 0) = 1e-2; // x位置观测噪声减小 + R.at(1, 1) = 1e-2; // y位置观测噪声减小 + R.at(2, 2) = 1e-1; // 宽度观测噪声增加 + R.at(3, 3) = 1e-1; // 高度观测噪声增加 + + // 初始估计误差协方差矩阵 P (增大初始不确定性) + P = cv::Mat::eye(6, 6, CV_64F) * 100; + + // 初始状态 [x, y, w, h, dx, dy] + x_hat = cv::Mat::zeros(6, 1, CV_64F); + has_initialized = false; + + // 初始化临时Q矩阵 + Q_temp = Q.clone(); + } + + void UpdateTs(float Ts) { + Ts_ = Ts; + // 更新状态转移矩阵中的时间项 + A.at(0, 4) = Ts; + A.at(1, 5) = Ts; + } + + void Update(const cv::Mat& Z) { + if (!has_initialized) { + // 首次更新,直接初始化状态 + x_hat.at(0) = Z.at(0); + x_hat.at(1) = Z.at(1); + x_hat.at(2) = Z.at(2); + x_hat.at(3) = Z.at(3); + x_hat.at(4) = 0; + x_hat.at(5) = 0; + has_initialized = true; + return; + } + + // 检查位置偏差大小 + cv::Mat residual = Z - C * (A * x_hat); + double positionError = cv::norm(residual.rowRange(0, 2)); + + // 动态加速收敛机制 + if (positionError > 20.0) { + // 位置偏差较大时临时增大位置过程噪声 + Q_temp = Q.clone(); + Q_temp.at(0, 0) *= 10.0; + Q_temp.at(1, 1) *= 10.0; + } else { + Q_temp = Q.clone(); + } + + // 预测步骤 + cv::Mat x_hat_minus = A * x_hat; + cv::Mat P_minus = A * P * A.t() + Q_temp; // 使用临时Q + + // 计算残差 + cv::Mat y = Z - C * x_hat_minus; + + // 计算卡尔曼增益 K + cv::Mat S = C * P_minus * C.t() + R; + cv::Mat K = P_minus * C.t() * S.inv(); + + // 更新状态 + x_hat = x_hat_minus + K * y; + + // 更新协方差 + cv::Mat I = cv::Mat::eye(6, 6, CV_64F); + P = (I - K * C) * P_minus; + } + + void Predict() { + if (!has_initialized) return; + x_hat = A * x_hat; + P = A * P * A.t() + Q_temp; + } + + cv::Mat GetState() const { return x_hat.clone(); } + + cv::Mat GetPredictedState(int steps = 1) const { + if (!has_initialized) return cv::Mat(); + cv::Mat state = x_hat.clone(); + cv::Mat A_step = cv::Mat::eye(6, 6, CV_64F); + A_step.at(0, 4) = Ts_ * steps; + A_step.at(1, 5) = Ts_ * steps; + return A_step * state; + } + + void SetPredictionSteps(int steps) { prediction_steps = std::max(1, steps); } + + int GetPredictionSteps() const { return prediction_steps; } + + // 重置滤波器 + void Reset() { + has_initialized = false; + // 重置协方差矩阵 + P = cv::Mat::eye(6, 6, CV_64F) * 100; + } + + bool HasInitialized() const { return has_initialized; } + + cv::Rect GetRectFromState() const { + if (!has_initialized) return cv::Rect(); + cv::Mat state = GetState(); + return cv::Rect(static_cast(state.at(0)), + static_cast(state.at(1)), + static_cast(state.at(2)), + static_cast(state.at(3))); + } + + // 获取速度向量 + cv::Point2d GetVelocity() const { + if (!has_initialized) return cv::Point2d(0, 0); + return cv::Point2d(x_hat.at(4), x_hat.at(5)); + } + + private: + cv::Mat A, C, Q, R, P; + cv::Mat x_hat; + cv::Mat Q_temp; // 动态调整的Q矩阵 + float Ts_; + int prediction_steps; + bool has_initialized; +}; + +// 计算两个矩形的IOU(交并比) +double CalculateIOU(const cv::Rect& rect1, const cv::Rect& rect2) { + // 计算交集矩形 + int x1 = std::max(rect1.x, rect2.x); + int y1 = std::max(rect1.y, rect2.y); + int x2 = std::min(rect1.x + rect1.width, rect2.x + rect2.width); + int y2 = std::min(rect1.y + rect1.height, rect2.y + rect2.height); + + int intersectionArea = std::max(0, x2 - x1) * std::max(0, y2 - y1); + + // 计算并集面积 + int area1 = rect1.width * rect1.height; + int area2 = rect2.width * rect2.height; + + // 避免除以0 + if (area1 + area2 - intersectionArea <= 0) { + return 0.0; + } + + return static_cast(intersectionArea) / + (area1 + area2 - intersectionArea); +} + +// 计算两个矩形中心点的欧式距离 +double CalculateCenterDistance(const cv::Rect& rect1, const cv::Rect& rect2) { + cv::Point center1(rect1.x + rect1.width / 2, rect1.y + rect1.height / 2); + cv::Point center2(rect2.x + rect2.width / 2, rect2.y + rect2.height / 2); + return cv::norm(center1 - center2); +} + +// 计算外观相似性(使用矩形大小差异作为简单度量) +double CalculateAppearanceSimilarity(const cv::Rect& rect1, + const cv::Rect& rect2) { + double sizeDiff = + std::abs(rect1.width * rect1.height - rect2.width * rect2.height); + // 归一化:10000是一个经验值,代表典型的最大矩形面积 + return std::exp(-sizeDiff / 10000.0); +} + +// ================== 匈牙利算法实现 ================== +class HungarianAlgorithm { + public: + // 解指派问题(最小化代价) + static std::vector Solve( + const std::vector>& costMatrix) { + int n = costMatrix.size(); + if (n == 0) return {}; + + int m = costMatrix[0].size(); + if (m == 0) return std::vector(n, -1); + + // 预处理: 确保矩阵是方阵 + std::vector> cost = costMatrix; + if (n > m) { + for (auto& row : cost) { + row.resize(n, std::numeric_limits::max()); + } + m = n; + } else if (m > n) { + cost.resize(m, + std::vector(m, std::numeric_limits::max())); + for (int i = n; i < m; ++i) { + for (int j = 0; j < m; ++j) { + if (j < costMatrix[0].size()) { + cost[i][j] = costMatrix[i % n][j]; + } + } + } + n = m; + } + + // 初始化变量 + std::vector u(n + 1, 0); + std::vector v(m + 1, 0); + std::vector p(m + 1, 0); + std::vector way(m + 1, 0); + + for (int i = 1; i <= n; ++i) { + p[0] = i; + int j0 = 0; + std::vector minv(m + 1, std::numeric_limits::max()); + std::vector used(m + 1, false); + + do { + used[j0] = true; + int i0 = p[j0]; + double delta = std::numeric_limits::max(); + int j1 = 0; + + for (int j = 1; j <= m; ++j) { + if (!used[j]) { + double cur = cost[i0 - 1][j - 1] - u[i0] - v[j]; + if (cur < minv[j]) { + minv[j] = cur; + way[j] = j0; + } + if (minv[j] < delta) { + delta = minv[j]; + j1 = j; + } + } + } + + for (int j = 0; j <= m; ++j) { + if (used[j]) { + u[p[j]] += delta; + v[j] -= delta; + } else { + minv[j] -= delta; + } + } + + j0 = j1; + } while (p[j0] != 0); + + do { + int j1 = way[j0]; + p[j0] = p[j1]; + j0 = j1; + } while (j0 != 0); + } + + // 构建结果向量 + std::vector result(n, -1); + for (int j = 1; j <= m; ++j) { + if (p[j] != 0 && p[j] <= static_cast(result.size())) { + result[p[j] - 1] = j - 1; + } + } + + return result; + } +}; + +// 多目标跟踪器类 +class MultiObjectTracker { + public: + struct TrackedObject { + int id; + cv::Rect rect; + cv::Point2d velocity; + EnhancedKalmanFilter ekf; + int lost_frames; + int max_lost_frames; + + // 添加默认构造函数 + TrackedObject() + : id(-1), + rect(0, 0, 0, 0), + velocity(0, 0), + lost_frames(0), + max_lost_frames(30) { + ekf.Initialize(0.033f); + } + + TrackedObject(int obj_id, const cv::Rect& init_rect, float Ts) + : id(obj_id), + rect(init_rect), + velocity(0, 0), + lost_frames(0), + max_lost_frames(30) { + ekf.Initialize(Ts); + // 初始化卡尔曼滤波器 + cv::Mat Z = (cv::Mat_(4, 1) << init_rect.x, init_rect.y, + init_rect.width, init_rect.height); + ekf.Update(Z); + } + + void Update(const cv::Rect& new_rect, float Ts) { + ekf.UpdateTs(Ts); + cv::Mat Z = (cv::Mat_(4, 1) << new_rect.x, new_rect.y, + new_rect.width, new_rect.height); + ekf.Update(Z); + rect = ekf.GetRectFromState(); + velocity = ekf.GetVelocity(); + lost_frames = 0; + } + + void Predict(float Ts) { + ekf.UpdateTs(Ts); + ekf.Predict(); + rect = ekf.GetRectFromState(); + velocity = ekf.GetVelocity(); + lost_frames++; + } + + // 当丢失帧数超过max_lost_frames时,视为丢失 + bool IsActive() const { return lost_frames <= max_lost_frames; } + }; + + MultiObjectTracker() : next_id(1), Ts(0.033f) {} + + void Update(const std::vector& detections, float delta_time) { + Ts = delta_time; + + // 步骤1: 卡尔曼预测 + for (auto& kv : tracked_objects) { + kv.second.Predict(Ts); + } + + // 如果没有检测结果,直接返回 + if (detections.empty()) { + return; + } + + // 步骤2: 构建代价矩阵 + std::vector> cost_matrix; + std::vector active_tracker_indices; + + // 收集所有活跃的跟踪器 + for (auto& kv : tracked_objects) { + if (kv.second.IsActive()) { + active_tracker_indices.push_back(kv.first); + } + } + + // 构建代价矩阵 + for (int tracker_idx : active_tracker_indices) { + TrackedObject& tracker = tracked_objects[tracker_idx]; + std::vector costs; + for (const auto& det : detections) { + // 综合代价:IOU + 位置距离 + 外观相似性 + double iou = CalculateIOU(tracker.rect, det); + // 避免NaN/Inf + if (std::isnan(iou)) iou = 0.0; + iou = std::max(0.0, std::min(iou, 1.0)); + + double dist = CalculateCenterDistance(tracker.rect, det); + double appearance = CalculateAppearanceSimilarity(tracker.rect, det); + + // 综合代价函数 + double cost = + (1.0 - iou) * 0.6 + (dist / 100.0) * 0.3 + (1.0 - appearance) * 0.1; + costs.push_back(cost); + } + cost_matrix.push_back(costs); + } + + // 匹配结果存储 + std::vector assignments; + if (!cost_matrix.empty()) { + assignments = HungarianAlgorithm::Solve(cost_matrix); + } else { + // 如果没有活跃跟踪器,分配所有检测为未匹配 + assignments = std::vector(active_tracker_indices.size(), -1); + } + + // 步骤3: 处理匹配结果 + // 创建一个布尔数组来标记每个检测是否被匹配 + std::vector detection_matched(detections.size(), false); + for (int i = 0; i < assignments.size() && i < active_tracker_indices.size(); + ++i) { + int det_index = assignments[i]; + if (det_index >= 0 && det_index < static_cast(detections.size())) { + int tracker_idx = active_tracker_indices[i]; + // 检查矩形是否有效 + const cv::Rect& new_rect = detections[det_index]; + if (new_rect.width > 0 && new_rect.height > 0) { + tracked_objects[tracker_idx].Update(new_rect, Ts); + detection_matched[det_index] = true; + } + } + } + + // 步骤4: 为未匹配的检测创建新跟踪器 + for (int i = 0; i < detections.size(); ++i) { + if (!detection_matched[i] && detections[i].width > 0 && + detections[i].height > 0) { + // 创建新跟踪器 + tracked_objects.insert( + std::make_pair(next_id, TrackedObject(next_id, detections[i], Ts))); + next_id++; + } + } + + // 步骤5: 移除丢失时间过长的跟踪器 + for (auto it = tracked_objects.begin(); it != tracked_objects.end();) { + // 如果丢失超过30帧,移除跟踪器 + if (!it->second.IsActive()) { + it = tracked_objects.erase(it); + } else { + ++it; + } + } + } + + // 获取所有活跃的目标 + const std::map& GetActiveObjects() const { + return tracked_objects; + } + + private: + std::map tracked_objects; + int next_id; + float Ts; +}; + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: Test-PaddleDet model_path" << std::endl; + return 1; + } + + // 初始化模型 + lockzhiner_vision_module::vision::PaddleDet model; + if (!model.Initialize(argv[1])) { + std::cout << "Failed to initialize model." << std::endl; + return 1; + } + + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + std::cout << "Device connected successfully." << std::endl; + + // 打开摄像头 + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + + if (!cap.open(0)) { + std::cerr << "Couldn't open video capture device" << std::endl; + return -1; + } + + cv::Mat input_mat; + + // 创建多目标跟踪器 + MultiObjectTracker tracker; + + // 创建随机颜色生成器 + cv::RNG rng(0xFFFFFFFF); + + auto last_time = high_resolution_clock::now(); + + while (true) { + auto current_time = high_resolution_clock::now(); + float Ts = + duration_cast(current_time - last_time).count() / 1000.0f; + if (Ts <= 0.001f) Ts = 0.033f; // 避免零时间间隔 + last_time = current_time; + + cap >> input_mat; + if (input_mat.empty()) { + std::cerr << "Warning: Captured an empty frame." << std::endl; + continue; + } + + // 调用模型进行预测 + high_resolution_clock::time_point start_time = high_resolution_clock::now(); + auto results = model.Predict(input_mat); + high_resolution_clock::time_point end_time = high_resolution_clock::now(); + + // 计算推理时间 + auto time_span = duration_cast(end_time - start_time); + std::cout << "Inference time: " << time_span.count() << " ms" << std::endl; + + // 提取检测结果 + std::vector detections; + for (const auto& res : results) { + // 确保矩形有效 + if (res.box.width > 0 && res.box.height > 0) { + detections.push_back(res.box); + } + } + + // 更新多目标跟踪器 + tracker.Update(detections, Ts); + + // 可视化结果 + cv::Mat output_image = input_mat.clone(); + + // 获取所有活动目标 (只包括活跃的跟踪器) + const auto& active_objects = tracker.GetActiveObjects(); + + // 绘制所有活跃目标 + for (const auto& kv : active_objects) { + const auto& obj = kv.second; + + // 为每个目标生成固定颜色 + int color_seed = obj.id * 1000; // 根据ID生成种子 + cv::RNG obj_rng(color_seed); + cv::Scalar color(obj_rng.uniform(0, 255), obj_rng.uniform(0, 255), + obj_rng.uniform(0, 255)); + + // 绘制跟踪框 + if (obj.rect.width > 0 && obj.rect.height > 0) { + cv::rectangle(output_image, obj.rect, color, 2); + } + + // 绘制速度向量 + if (obj.rect.width > 0 && obj.rect.height > 0) { + cv::Point center(obj.rect.x + obj.rect.width / 2, + obj.rect.y + obj.rect.height / 2); + cv::Point velocity_end(center.x + obj.velocity.x * 10, + center.y + obj.velocity.y * 10); + cv::arrowedLine(output_image, center, velocity_end, color, 2); + + // 显示目标ID和丢失帧数 + std::string info = cv::format("ID:%d Lost:%d", obj.id, obj.lost_frames); + cv::putText(output_image, info, cv::Point(obj.rect.x, obj.rect.y - 10), + cv::FONT_HERSHEY_SIMPLEX, 0.5, color, 1); + } + } + + // 在图像顶部显示统计信息 + if (Ts > 0.001f) { // 避免除以零 + std::string stats = + cv::format("Objects: %d FPS: %.1f", active_objects.size(), 1.0f / Ts); + cv::putText(output_image, stats, cv::Point(10, 30), + cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 255, 255), 2); + } + + // 显示结果 + edit.Print(output_image); + + // 简单退出机制 + char c = static_cast(cv::waitKey(1)); + if (c == 27) break; // ESC键退出 + } + + cap.release(); + return 0; +} +``` + +## 4. 编译过程 + +### 4.1 编译环境搭建 + +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/cpp_development_environment.md) 正确配置了开发环境。 +- 同时以正确连接开发板。 + +### 4.2 Cmake介绍 + +```cmake +cmake_minimum_required(VERSION 3.10) + +project(D01_test_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") + +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +add_executable(Test-target-tracking test_target_tracking.cc) +target_include_directories(Test-target-tracking PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS}) +target_link_libraries(Test-target-tracking PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS Test-target-tracking + RUNTIME DESTINATION . +) +``` + +### 4.3 编译项目 + +使用 Docker Destop 打开 LockzhinerVisionModule 容器并执行以下命令来编译项目 + +```bash +# 进入Demo所在目录 +cd /LockzhinerVisionModuleWorkSpace/LockzhinerVisionModule/Cpp_example/D12_target_tracking +# 创建编译目录 +rm -rf build && mkdir build && cd build +# 配置交叉编译工具链 +export TOOLCHAIN_ROOT_PATH="/LockzhinerVisionModuleWorkSpace/arm-rockchip830-linux-uclibcgnueabihf" +# 使用cmake配置项目 +cmake .. +# 执行编译项目 +make -j8 && make install +``` + +在执行完上述命令后,会在build目录下生成可执行文件。 + +## 5. 运行结果 + +在此下载所使用的模型 + + +```shell +chmod 777 Test-target-tracking +# 在实际应用的过程中LZ-Picodet需要替换为下载的或者你的rknn模型 +./Test-detection LZ-Picodet +``` + +运行结果如下: +![结果](./images/D12_2.png) + +## 6.总结 + +本文档提出了一种基于卡尔曼滤波和匈牙利算法的多目标跟踪实现,通过集成6D状态卡尔曼滤波器(EnhancedKalmanFilter)、多目标跟踪管理核心(MultiObjectTracker)以及检测-轨迹匹配算法(HungarianAlgorithm),构建了一个高效稳定的跟踪系统。该系统引入了多维度代价函数(结合IoU、距离和外观信息),并采用自适应过程噪声调整机制和动态目标ID管理策略,有效提升了在复杂场景下对多目标的跟踪能力。该方法可广泛应用于视频监控系统中的目标持续跟踪、自动驾驶中的车辆与行人追踪,以及体育赛事中运动员的行为分析,尤其在处理目标遮挡和路径交错等挑战性场景中表现出色。 diff --git a/Cpp_example/D13_target_tracking/images/D12_1.png b/Cpp_example/D13_target_tracking/images/D12_1.png new file mode 100755 index 0000000000000000000000000000000000000000..7ba0e97dddc870de0ef065f9924aa1ecd30dd442 Binary files /dev/null and b/Cpp_example/D13_target_tracking/images/D12_1.png differ diff --git a/Cpp_example/D13_target_tracking/images/D12_2.png b/Cpp_example/D13_target_tracking/images/D12_2.png new file mode 100755 index 0000000000000000000000000000000000000000..aa8ee61e2f2ab9382d9f9235599d361e72d26f62 Binary files /dev/null and b/Cpp_example/D13_target_tracking/images/D12_2.png differ diff --git a/Cpp_example/D13_target_tracking/test_target_tracking.cc b/Cpp_example/D13_target_tracking/test_target_tracking.cc new file mode 100755 index 0000000000000000000000000000000000000000..d1412f033c5211c265353f7389ca203d7b421783 --- /dev/null +++ b/Cpp_example/D13_target_tracking/test_target_tracking.cc @@ -0,0 +1,598 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std::chrono; + +// ================== 增强版 6D 卡尔曼滤波类 ================== +class EnhancedKalmanFilter { + public: + EnhancedKalmanFilter(float Ts = 0.033f) : prediction_steps(1) { + Initialize(Ts); + } + + void Initialize(float Ts) { + Ts_ = Ts; + // 状态转移矩阵 A (6x6) + A = cv::Mat::eye(6, 6, CV_64F); + A.at(0, 4) = Ts; + A.at(1, 5) = Ts; + + // 观测矩阵 C (4x6) - 只观测位置和大小 + C = cv::Mat::zeros(4, 6, CV_64F); + C.at(0, 0) = 1; + C.at(1, 1) = 1; + C.at(2, 2) = 1; + C.at(3, 3) = 1; + + // 增大过程噪声 - 使预测更灵活 + Q = cv::Mat::eye(6, 6, CV_64F); + + Q.at(0, 0) = 1e-2; // x位置噪声增加 + Q.at(1, 1) = 1e-2; // y位置噪声增加 + Q.at(2, 2) = 1e-6; // 宽度噪声不变 + Q.at(3, 3) = 1e-6; // 高度噪声不变 + Q.at(4, 4) = 1e-3; // x速度噪声减小 + Q.at(5, 5) = 1e-3; // y速度噪声减小 + + // 调整观测噪声 - 使预测更具主导性 + R = cv::Mat::eye(4, 4, CV_64F); + R.at(0, 0) = 1e-2; // x位置观测噪声减小 + R.at(1, 1) = 1e-2; // y位置观测噪声减小 + R.at(2, 2) = 1e-1; // 宽度观测噪声增加 + R.at(3, 3) = 1e-1; // 高度观测噪声增加 + + // 初始估计误差协方差矩阵 P (增大初始不确定性) + P = cv::Mat::eye(6, 6, CV_64F) * 100; + + // 初始状态 [x, y, w, h, dx, dy] + x_hat = cv::Mat::zeros(6, 1, CV_64F); + has_initialized = false; + + // 初始化临时Q矩阵 + Q_temp = Q.clone(); + } + + void UpdateTs(float Ts) { + Ts_ = Ts; + // 更新状态转移矩阵中的时间项 + A.at(0, 4) = Ts; + A.at(1, 5) = Ts; + } + + void Update(const cv::Mat& Z) { + if (!has_initialized) { + // 首次更新,直接初始化状态 + x_hat.at(0) = Z.at(0); + x_hat.at(1) = Z.at(1); + x_hat.at(2) = Z.at(2); + x_hat.at(3) = Z.at(3); + x_hat.at(4) = 0; + x_hat.at(5) = 0; + has_initialized = true; + return; + } + + // 检查位置偏差大小 + cv::Mat residual = Z - C * (A * x_hat); + double positionError = cv::norm(residual.rowRange(0, 2)); + + // 动态加速收敛机制 + if (positionError > 20.0) { + // 位置偏差较大时临时增大位置过程噪声 + Q_temp = Q.clone(); + Q_temp.at(0, 0) *= 10.0; + Q_temp.at(1, 1) *= 10.0; + } else { + Q_temp = Q.clone(); + } + + // 预测步骤 + cv::Mat x_hat_minus = A * x_hat; + cv::Mat P_minus = A * P * A.t() + Q_temp; // 使用临时Q + + // 计算残差 + cv::Mat y = Z - C * x_hat_minus; + + // 计算卡尔曼增益 K + cv::Mat S = C * P_minus * C.t() + R; + cv::Mat K = P_minus * C.t() * S.inv(); + + // 更新状态 + x_hat = x_hat_minus + K * y; + + // 更新协方差 + cv::Mat I = cv::Mat::eye(6, 6, CV_64F); + P = (I - K * C) * P_minus; + } + + void Predict() { + if (!has_initialized) return; + x_hat = A * x_hat; + P = A * P * A.t() + Q_temp; + } + + cv::Mat GetState() const { return x_hat.clone(); } + + cv::Mat GetPredictedState(int steps = 1) const { + if (!has_initialized) return cv::Mat(); + cv::Mat state = x_hat.clone(); + cv::Mat A_step = cv::Mat::eye(6, 6, CV_64F); + A_step.at(0, 4) = Ts_ * steps; + A_step.at(1, 5) = Ts_ * steps; + return A_step * state; + } + + void SetPredictionSteps(int steps) { prediction_steps = std::max(1, steps); } + + int GetPredictionSteps() const { return prediction_steps; } + + // 重置滤波器 + void Reset() { + has_initialized = false; + // 重置协方差矩阵 + P = cv::Mat::eye(6, 6, CV_64F) * 100; + } + + bool HasInitialized() const { return has_initialized; } + + cv::Rect GetRectFromState() const { + if (!has_initialized) return cv::Rect(); + cv::Mat state = GetState(); + return cv::Rect(static_cast(state.at(0)), + static_cast(state.at(1)), + static_cast(state.at(2)), + static_cast(state.at(3))); + } + + // 获取速度向量 + cv::Point2d GetVelocity() const { + if (!has_initialized) return cv::Point2d(0, 0); + return cv::Point2d(x_hat.at(4), x_hat.at(5)); + } + + private: + cv::Mat A, C, Q, R, P; + cv::Mat x_hat; + cv::Mat Q_temp; // 动态调整的Q矩阵 + float Ts_; + int prediction_steps; + bool has_initialized; +}; + +// 计算两个矩形的IOU(交并比) +double CalculateIOU(const cv::Rect& rect1, const cv::Rect& rect2) { + // 计算交集矩形 + int x1 = std::max(rect1.x, rect2.x); + int y1 = std::max(rect1.y, rect2.y); + int x2 = std::min(rect1.x + rect1.width, rect2.x + rect2.width); + int y2 = std::min(rect1.y + rect1.height, rect2.y + rect2.height); + + int intersectionArea = std::max(0, x2 - x1) * std::max(0, y2 - y1); + + // 计算并集面积 + int area1 = rect1.width * rect1.height; + int area2 = rect2.width * rect2.height; + + // 避免除以0 + if (area1 + area2 - intersectionArea <= 0) { + return 0.0; + } + + return static_cast(intersectionArea) / + (area1 + area2 - intersectionArea); +} + +// 计算两个矩形中心点的欧式距离 +double CalculateCenterDistance(const cv::Rect& rect1, const cv::Rect& rect2) { + cv::Point center1(rect1.x + rect1.width / 2, rect1.y + rect1.height / 2); + cv::Point center2(rect2.x + rect2.width / 2, rect2.y + rect2.height / 2); + return cv::norm(center1 - center2); +} + +// 计算外观相似性(使用矩形大小差异作为简单度量) +double CalculateAppearanceSimilarity(const cv::Rect& rect1, + const cv::Rect& rect2) { + double sizeDiff = + std::abs(rect1.width * rect1.height - rect2.width * rect2.height); + // 归一化:10000是一个经验值,代表典型的最大矩形面积 + return std::exp(-sizeDiff / 10000.0); +} + +// ================== 匈牙利算法实现 ================== +class HungarianAlgorithm { + public: + // 解指派问题(最小化代价) + static std::vector Solve( + const std::vector>& costMatrix) { + int n = costMatrix.size(); + if (n == 0) return {}; + + int m = costMatrix[0].size(); + if (m == 0) return std::vector(n, -1); + + // 预处理: 确保矩阵是方阵 + std::vector> cost = costMatrix; + if (n > m) { + for (auto& row : cost) { + row.resize(n, std::numeric_limits::max()); + } + m = n; + } else if (m > n) { + cost.resize(m, + std::vector(m, std::numeric_limits::max())); + for (int i = n; i < m; ++i) { + for (int j = 0; j < m; ++j) { + if (j < costMatrix[0].size()) { + cost[i][j] = costMatrix[i % n][j]; + } + } + } + n = m; + } + + // 初始化变量 + std::vector u(n + 1, 0); + std::vector v(m + 1, 0); + std::vector p(m + 1, 0); + std::vector way(m + 1, 0); + + for (int i = 1; i <= n; ++i) { + p[0] = i; + int j0 = 0; + std::vector minv(m + 1, std::numeric_limits::max()); + std::vector used(m + 1, false); + + do { + used[j0] = true; + int i0 = p[j0]; + double delta = std::numeric_limits::max(); + int j1 = 0; + + for (int j = 1; j <= m; ++j) { + if (!used[j]) { + double cur = cost[i0 - 1][j - 1] - u[i0] - v[j]; + if (cur < minv[j]) { + minv[j] = cur; + way[j] = j0; + } + if (minv[j] < delta) { + delta = minv[j]; + j1 = j; + } + } + } + + for (int j = 0; j <= m; ++j) { + if (used[j]) { + u[p[j]] += delta; + v[j] -= delta; + } else { + minv[j] -= delta; + } + } + + j0 = j1; + } while (p[j0] != 0); + + do { + int j1 = way[j0]; + p[j0] = p[j1]; + j0 = j1; + } while (j0 != 0); + } + + // 构建结果向量 + std::vector result(n, -1); + for (int j = 1; j <= m; ++j) { + if (p[j] != 0 && p[j] <= static_cast(result.size())) { + result[p[j] - 1] = j - 1; + } + } + + return result; + } +}; + +// 多目标跟踪器类 +class MultiObjectTracker { + public: + struct TrackedObject { + int id; + cv::Rect rect; + cv::Point2d velocity; + EnhancedKalmanFilter ekf; + int lost_frames; + int max_lost_frames; + + // 添加默认构造函数 + TrackedObject() + : id(-1), + rect(0, 0, 0, 0), + velocity(0, 0), + lost_frames(0), + max_lost_frames(30) { + ekf.Initialize(0.033f); + } + + TrackedObject(int obj_id, const cv::Rect& init_rect, float Ts) + : id(obj_id), + rect(init_rect), + velocity(0, 0), + lost_frames(0), + max_lost_frames(30) { + ekf.Initialize(Ts); + // 初始化卡尔曼滤波器 + cv::Mat Z = (cv::Mat_(4, 1) << init_rect.x, init_rect.y, + init_rect.width, init_rect.height); + ekf.Update(Z); + } + + void Update(const cv::Rect& new_rect, float Ts) { + ekf.UpdateTs(Ts); + cv::Mat Z = (cv::Mat_(4, 1) << new_rect.x, new_rect.y, + new_rect.width, new_rect.height); + ekf.Update(Z); + rect = ekf.GetRectFromState(); + velocity = ekf.GetVelocity(); + lost_frames = 0; + } + + void Predict(float Ts) { + ekf.UpdateTs(Ts); + ekf.Predict(); + rect = ekf.GetRectFromState(); + velocity = ekf.GetVelocity(); + lost_frames++; + } + + // 当丢失帧数超过max_lost_frames时,视为丢失 + bool IsActive() const { return lost_frames <= max_lost_frames; } + }; + + MultiObjectTracker() : next_id(1), Ts(0.033f) {} + + void Update(const std::vector& detections, float delta_time) { + Ts = delta_time; + + // 步骤1: 卡尔曼预测 + for (auto& kv : tracked_objects) { + kv.second.Predict(Ts); + } + + // 如果没有检测结果,直接返回 + if (detections.empty()) { + return; + } + + // 步骤2: 构建代价矩阵 + std::vector> cost_matrix; + std::vector active_tracker_indices; + + // 收集所有活跃的跟踪器 + for (auto& kv : tracked_objects) { + if (kv.second.IsActive()) { + active_tracker_indices.push_back(kv.first); + } + } + + // 构建代价矩阵 + for (int tracker_idx : active_tracker_indices) { + TrackedObject& tracker = tracked_objects[tracker_idx]; + std::vector costs; + for (const auto& det : detections) { + // 综合代价:IOU + 位置距离 + 外观相似性 + double iou = CalculateIOU(tracker.rect, det); + // 避免NaN/Inf + if (std::isnan(iou)) iou = 0.0; + iou = std::max(0.0, std::min(iou, 1.0)); + + double dist = CalculateCenterDistance(tracker.rect, det); + double appearance = CalculateAppearanceSimilarity(tracker.rect, det); + + // 综合代价函数 + double cost = + (1.0 - iou) * 0.6 + (dist / 100.0) * 0.3 + (1.0 - appearance) * 0.1; + costs.push_back(cost); + } + cost_matrix.push_back(costs); + } + + // 匹配结果存储 + std::vector assignments; + if (!cost_matrix.empty()) { + assignments = HungarianAlgorithm::Solve(cost_matrix); + } else { + // 如果没有活跃跟踪器,分配所有检测为未匹配 + assignments = std::vector(active_tracker_indices.size(), -1); + } + + // 步骤3: 处理匹配结果 + // 创建一个布尔数组来标记每个检测是否被匹配 + std::vector detection_matched(detections.size(), false); + for (int i = 0; i < assignments.size() && i < active_tracker_indices.size(); + ++i) { + int det_index = assignments[i]; + if (det_index >= 0 && det_index < static_cast(detections.size())) { + int tracker_idx = active_tracker_indices[i]; + // 检查矩形是否有效 + const cv::Rect& new_rect = detections[det_index]; + if (new_rect.width > 0 && new_rect.height > 0) { + tracked_objects[tracker_idx].Update(new_rect, Ts); + detection_matched[det_index] = true; + } + } + } + + // 步骤4: 为未匹配的检测创建新跟踪器 + for (int i = 0; i < detections.size(); ++i) { + if (!detection_matched[i] && detections[i].width > 0 && + detections[i].height > 0) { + // 创建新跟踪器 + tracked_objects.insert( + std::make_pair(next_id, TrackedObject(next_id, detections[i], Ts))); + next_id++; + } + } + + // 步骤5: 移除丢失时间过长的跟踪器 + for (auto it = tracked_objects.begin(); it != tracked_objects.end();) { + // 如果丢失超过30帧,移除跟踪器 + if (!it->second.IsActive()) { + it = tracked_objects.erase(it); + } else { + ++it; + } + } + } + + // 获取所有活跃的目标 + const std::map& GetActiveObjects() const { + return tracked_objects; + } + + private: + std::map tracked_objects; + int next_id; + float Ts; +}; + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: Test-PaddleDet model_path" << std::endl; + return 1; + } + + // 初始化模型 + lockzhiner_vision_module::vision::PaddleDet model; + if (!model.Initialize(argv[1])) { + std::cout << "Failed to initialize model." << std::endl; + return 1; + } + + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + std::cout << "Device connected successfully." << std::endl; + + // 打开摄像头 + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + + if (!cap.open(0)) { + std::cerr << "Couldn't open video capture device" << std::endl; + return -1; + } + + cv::Mat input_mat; + + // 创建多目标跟踪器 + MultiObjectTracker tracker; + + // 创建随机颜色生成器 + cv::RNG rng(0xFFFFFFFF); + + auto last_time = high_resolution_clock::now(); + + while (true) { + auto current_time = high_resolution_clock::now(); + float Ts = + duration_cast(current_time - last_time).count() / 1000.0f; + if (Ts <= 0.001f) Ts = 0.033f; // 避免零时间间隔 + last_time = current_time; + + cap >> input_mat; + if (input_mat.empty()) { + std::cerr << "Warning: Captured an empty frame." << std::endl; + continue; + } + + // 调用模型进行预测 + high_resolution_clock::time_point start_time = high_resolution_clock::now(); + auto results = model.Predict(input_mat); + high_resolution_clock::time_point end_time = high_resolution_clock::now(); + + // 计算推理时间 + auto time_span = duration_cast(end_time - start_time); + std::cout << "Inference time: " << time_span.count() << " ms" << std::endl; + + // 提取检测结果 + std::vector detections; + for (const auto& res : results) { + // 确保矩形有效 + if (res.box.width > 0 && res.box.height > 0) { + detections.push_back(res.box); + } + } + + // 更新多目标跟踪器 + tracker.Update(detections, Ts); + + // 可视化结果 + cv::Mat output_image = input_mat.clone(); + + // 获取所有活动目标 (只包括活跃的跟踪器) + const auto& active_objects = tracker.GetActiveObjects(); + + // 绘制所有活跃目标 + for (const auto& kv : active_objects) { + const auto& obj = kv.second; + + // 为每个目标生成固定颜色 + int color_seed = obj.id * 1000; // 根据ID生成种子 + cv::RNG obj_rng(color_seed); + cv::Scalar color(obj_rng.uniform(0, 255), obj_rng.uniform(0, 255), + obj_rng.uniform(0, 255)); + + // 绘制跟踪框 + if (obj.rect.width > 0 && obj.rect.height > 0) { + cv::rectangle(output_image, obj.rect, color, 2); + } + + // 绘制速度向量 + if (obj.rect.width > 0 && obj.rect.height > 0) { + cv::Point center(obj.rect.x + obj.rect.width / 2, + obj.rect.y + obj.rect.height / 2); + cv::Point velocity_end(center.x + obj.velocity.x * 10, + center.y + obj.velocity.y * 10); + cv::arrowedLine(output_image, center, velocity_end, color, 2); + + // 显示目标ID和丢失帧数 + std::string info = cv::format("ID:%d Lost:%d", obj.id, obj.lost_frames); + cv::putText(output_image, info, cv::Point(obj.rect.x, obj.rect.y - 10), + cv::FONT_HERSHEY_SIMPLEX, 0.5, color, 1); + } + } + + // 在图像顶部显示统计信息 + if (Ts > 0.001f) { // 避免除以零 + std::string stats = + cv::format("Objects: %d FPS: %.1f", active_objects.size(), 1.0f / Ts); + cv::putText(output_image, stats, cv::Point(10, 30), + cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 255, 255), 2); + } + + // 显示结果 + edit.Print(output_image); + + // 简单退出机制 + char c = static_cast(cv::waitKey(1)); + if (c == 27) break; // ESC键退出 + } + + cap.release(); + return 0; +} \ No newline at end of file diff --git a/Cpp_example/D14_fatigue_detection/CMakeLists.txt b/Cpp_example/D14_fatigue_detection/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..21ec4a50b4ea12e4bbf5d999d5cfccf927eb5aa6 --- /dev/null +++ b/Cpp_example/D14_fatigue_detection/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.10) + +project(face_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") + +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +# ncnn配置 +set(NCNN_ROOT_DIR "${PROJECT_ROOT_PATH}/third_party/ncnn-20240820-lockzhiner-vision-module") # 确保third_party层级存在 +message(STATUS "Checking ncnn headers in: ${NCNN_ROOT_DIR}/include/ncnn") + +# 验证头文件存在 +if(NOT EXISTS "${NCNN_ROOT_DIR}/include/ncnn/net.h") + message(FATAL_ERROR "ncnn headers not found. Confirm the directory contains ncnn: ${NCNN_ROOT_DIR}") +endif() + +set(NCNN_INCLUDE_DIRS "${NCNN_ROOT_DIR}/include") +set(NCNN_LIBRARIES "${NCNN_ROOT_DIR}/lib/libncnn.a") + +# 配置rknpu2 +set(RKNPU2_BACKEND_BASE_DIR "${LockzhinerVisionModule_ROOT_PATH}/include/lockzhiner_vision_module/vision/deep_learning/runtime") +if(NOT EXISTS ${RKNPU2_BACKEND_BASE_DIR}) + message(FATAL_ERROR "RKNPU2 backend base dir missing: ${RKNPU2_BACKEND_BASE_DIR}") +endif() + +add_executable(Test-facepoint facepoint_detection.cc) +target_include_directories(Test-facepoint PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS} ${NCNN_INCLUDE_DIRS} ${rknpu2_INCLUDE_DIRS} ${RKNPU2_BACKEND_BASE_DIR}) +target_link_libraries(Test-facepoint PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES} ${NCNN_LIBRARIES}) + +install( + TARGETS Test-facepoint + RUNTIME DESTINATION . +) \ No newline at end of file diff --git a/Cpp_example/D14_fatigue_detection/README.md b/Cpp_example/D14_fatigue_detection/README.md new file mode 100755 index 0000000000000000000000000000000000000000..cc924380472cd1a5be9460ed9041283addc9e760 --- /dev/null +++ b/Cpp_example/D14_fatigue_detection/README.md @@ -0,0 +1,525 @@ +# 疲劳检测 + +## 1. 实验背景 + +基于计算机视觉的实时疲劳检测系统通过分析眼部状态(闭眼时长、眨眼频率)、嘴部动作(打哈欠频率)以及面部微表情变化,运用轻量化深度学习模型在嵌入式设备上实现超过90%的检测准确率。该系统能在30ms内完成单帧分析,当检测到连续闭眼超过0.5秒、打哈欠超过0.67秒或闭眼时间占比(PERCLOS)超过30%时自动触发警报,为交通运输、工业生产和医疗监护等高风险领域提供主动安全防护,有效降低因疲劳导致的事故风险。 + +## 2. 实验原理 + +### 2.1 人脸检测阶段 + +采用轻量级目标检测网络PicoDet实现实时人脸检测。PicoDet通过深度可分离卷积和特征金字塔网络优化,在保持高精度(>90% mAP)的同时显著降低计算复杂度。该模块接收输入图像,输出人脸边界框坐标,为后续关键点检测提供精准的感兴趣区域(ROI)。 + +### 2.2 关键点定位阶段 + +基于PFLD(Practical Facial Landmark Detector)模型实现106点人脸关键点检测。PFLD采用多尺度特征融合技术,通过主分支预测关键点坐标,辅助分支学习几何约束信息。该阶段将人脸区域统一缩放到112×112分辨率输入网络,输出归一化后的106个关键点坐标。 + +### 2.3 眼部疲劳分析 + +从106个关键点中提取左右眼各8个关键点,计算眼睛纵横比(EAR): + EAR = (‖p2-p6‖ + ‖p3-p5‖) / (2 × ‖p1-p4‖) +其中p1-p6为眼部轮廓点。实验设定EAR阈值0.25,当连续15帧(0.5秒@30fps)EAR低于阈值时,判定为持续性闭眼。同时计算PERCLOS指标(闭眼时间占比),10秒窗口内闭眼占比超过30%判定为眼动疲劳。 + +### 2.4 嘴部疲劳分析 + +提取嘴部20个关键点(外轮廓12点+内部结构8点),计算嘴部纵横比(MAR): + MAR = (垂直距离1 + 垂直距离2 + 垂直距离3) / (3 × 水平距离) +实验设定MAR阈值0.6,当连续20帧(0.67秒)MAR高于阈值时,判定为持续性打哈欠。嘴部关键点采用特殊拓扑结构,外轮廓点检测嘴部开合度,内部点增强哈欠状态识别鲁棒性。 + +### 2.5 多指标融合决策 + +建立基于加权逻辑的疲劳判定模型: + 疲劳状态 = OR(眼部持续性闭眼, 嘴部持续性哈欠, PERCLOS > 30%) +该模型采用并行决策机制,任一条件满足即触发"TIRED"状态。 + +## 3. 模型转换 + +```python +import os +import numpy as np +import cv2 +from rknn.api import RKNN + +def convert_onnx_to_rknn(): + # 初始化RKNN对象 + rknn = RKNN(verbose=True) + + # 模型配置 (与训练一致) + print('--> Config model') + rknn.config( + # 与训练一致:仅归一化 [0,1] + mean_values=[[0, 0, 0]], # 重要修正 + std_values=[[255, 255, 255]], # 重要修正 + target_platform='rv1106', + quantized_dtype='w8a8', # RV1106兼容类型 + quantized_algorithm='normal', + optimization_level=3, + # float_dtype='float16', # 量化模型不需要 + output_optimize=True, + ) + print('done') + + # 加载ONNX模型 - 明确指定输入输出 + print('--> Loading model') + ret = rknn.load_onnx( + model='./pfld_106.onnx', + inputs=['input'], # 使用Netron确认输入节点名 + outputs=['output'], # 使用Netron确认输出节点名 + # 使用NHWC格式 (匹配RV1106) + input_size_list=[[1, 3, 112, 112]] # NCHW格式 + ) + if ret != 0: + print('Load model failed!') + exit(ret) + print('done') + + # 构建RKNN模型 - 使用校准数据集 + print('--> Building model') + ret = rknn.build( + do_quantization=True, + dataset='./dataset.txt', + # 添加量化配置 + rknn_batch_size=1 + ) + if ret != 0: + print('Build model failed!') + exit(ret) + print('done') + + # 导出RKNN模型 + print('--> Export rknn model') + ret = rknn.export_rknn('./pfld_106.rknn') + if ret != 0: + print('Export rknn model failed!') + exit(ret) + print('done') + + # 释放RKNN对象 + rknn.release() + +if __name__ == '__main__': + convert_onnx_to_rknn() + print('Model conversion completed successfully!') +``` +根据目标平台,完成参数配置,运行程序完成转换。在完成模型转换后可以查看 rv1106 的算子支持手册,确保所有的算子是可以使用的,避免白忙活。 + +## 4. 模型部署 + +```cpp +#include +#include +#include +#include +#include +#include +#include "rknpu2_backend/rknpu2_backend.h" +#include + +using namespace cv; +using namespace std; +using namespace std::chrono; + +// 定义关键点索引 (根据106点模型) +const vector LEFT_EYE_POINTS = {35, 41, 40, 42, 39, 37, 33, 36}; // 左眼 +const vector RIGHT_EYE_POINTS = {89, 95, 94, 96, 93, 91, 87, 90}; // 右眼 + +// 嘴部 +const vector MOUTH_OUTLINE = {52, 64, 63, 71, 67, 68, 61, 58, 59, 53, 56, 55}; +const vector MOUTH_INNER = {65, 66, 62, 70, 69, 57, 60, 54}; +vector MOUTH_POINTS; + +// 计算眼睛纵横比(EAR) +float eye_aspect_ratio(const vector& eye_points) { + // 计算垂直距离 + double A = norm(eye_points[1] - eye_points[7]); + double B = norm(eye_points[2] - eye_points[6]); + double C = norm(eye_points[3] - eye_points[5]); + + // 计算水平距离 + double D = norm(eye_points[0] - eye_points[4]); + + // 防止除以零 + if (D < 1e-5) return 0.0f; + + return (float)((A + B + C) / (3.0 * D)); +} + +// 计算嘴部纵横比(MAR) +float mouth_aspect_ratio(const vector& mouth_points) { + // 关键点索引(基于MOUTH_OUTLINE中的位置) + const int LEFT_CORNER = 0; // 52 (左嘴角) + const int UPPER_CENTER = 3; // 71 (上唇中心) + const int RIGHT_CORNER = 6; // 61 (右嘴角) + const int LOWER_CENTER = 9; // 53 (下唇中心) + + // 计算垂直距离 + double A = norm(mouth_points[UPPER_CENTER] - mouth_points[LOWER_CENTER]); // 上唇中心到下唇中心 + double B = norm(mouth_points[UPPER_CENTER] - mouth_points[LEFT_CORNER]); // 上唇中心到左嘴角 + double C = norm(mouth_points[UPPER_CENTER] - mouth_points[RIGHT_CORNER]); // 上唇中心到右嘴角 + + // 计算嘴部宽度(左右嘴角距离) + double D = norm(mouth_points[LEFT_CORNER] - mouth_points[RIGHT_CORNER]); // 左嘴角到右嘴角 + + // 防止除以零 + if (D < 1e-5) return 0.0f; + + // 计算平均垂直距离与水平距离的比值 + return static_cast((A + B + C) / (3.0 * D)); +} + +int main(int argc, char** argv) +{ + // 初始化嘴部关键点 + MOUTH_POINTS.clear(); + MOUTH_POINTS.insert(MOUTH_POINTS.end(), MOUTH_OUTLINE.begin(), MOUTH_OUTLINE.end()); + MOUTH_POINTS.insert(MOUTH_POINTS.end(), MOUTH_INNER.begin(), MOUTH_INNER.end()); + + // 检查命令行参数 + if (argc != 4) { + cerr << "Usage: " << argv[0] << " \n"; + cerr << "Example: " << argv[0] << " picodet_model_dir pfld.rknn 112\n"; + return -1; + } + + const char* paddle_model_path = argv[1]; + const char* pfld_rknn_path = argv[2]; + const int pfld_size = atoi(argv[3]); // PFLD模型输入尺寸 (112) + + // 1. 初始化PaddleDet人脸检测模型 + lockzhiner_vision_module::vision::PaddleDet face_detector; + if (!face_detector.Initialize(paddle_model_path)) { + cerr << "Failed to initialize PaddleDet face detector model." << endl; + return -1; + } + + // 2. 初始化PFLD RKNN模型 + lockzhiner_vision_module::vision::RKNPU2Backend pfld_backend; + if (!pfld_backend.Initialize(pfld_rknn_path)) { + cerr << "Failed to load PFLD RKNN model." << endl; + return -1; + } + + // 获取输入张量信息 + const auto& input_tensor = pfld_backend.GetInputTensor(0); + const vector input_dims = input_tensor.GetDims(); + const float input_scale = input_tensor.GetScale(); + const int input_zp = input_tensor.GetZp(); + + // 打印输入信息 + cout << "PFLD Input Info:" << endl; + cout << " Dimensions: "; + for (auto dim : input_dims) cout << dim << " "; + cout << "\n Scale: " << input_scale << " Zero Point: " << input_zp << endl; + + // 3. 初始化Edit模块 + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + cerr << "Error: Failed to start and accept connection." << endl; + return EXIT_FAILURE; + } + cout << "Device connected successfully." << endl; + + // 4. 打开摄像头 + VideoCapture cap; + cap.set(CAP_PROP_FRAME_WIDTH, 640); + cap.set(CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) { + cerr << "Error: Could not open camera." << endl; + return -1; + } + + Mat frame; + const int num_landmarks = 106; + int frame_count = 0; + const int debug_freq = 10; // 每10帧打印一次调试信息 + + // ================== 疲劳检测参数 ================== + const float EAR_THRESHOLD = 0.25f; // 眼睛纵横比阈值 + const float MAR_THRESHOLD = 0.6f; // 嘴部纵横比阈值 + + const int EYE_CLOSED_FRAMES = 15; // 闭眼持续帧数阈值 + const int MOUTH_OPEN_FRAMES = 20; // 张嘴持续帧数阈值 + + int consecutive_eye_closed = 0; // 连续闭眼帧数 + int consecutive_mouth_open = 0; // 连续张嘴帧数 + + bool is_tired = false; // 当前疲劳状态 + + deque eye_state_buffer; // 用于PERCLOS计算 + const int PERCLOS_WINDOW = 200; // PERCLOS计算窗口大小 + float perclos = 0.0f; // 闭眼时间占比 + + // 疲劳状态文本 + const string TIRED_TEXT = "TIRED"; + const string NORMAL_TEXT = "NORMAL"; + const Scalar TIRED_COLOR = Scalar(0, 0, 255); // 红色 + const Scalar NORMAL_COLOR = Scalar(0, 255, 0); // 绿色 + + while (true) { + // 5. 捕获一帧图像 + cap >> frame; + if (frame.empty()) { + cerr << "Warning: Captured an empty frame." << endl; + continue; + } + + // 6. 人脸检测 + auto start_det = high_resolution_clock::now(); + auto face_results = face_detector.Predict(frame); + auto end_det = high_resolution_clock::now(); + auto det_duration = duration_cast(end_det - start_det); + + Mat result_image = frame.clone(); + bool pfld_debug_printed = false; + + // 7. 处理每个检测到的人脸 + for (const auto& face : face_results) { + // 跳过非人脸检测结果 + if (face.label_id != 0) continue; + + // 提取人脸区域 + Rect face_rect = face.box; + + // 确保人脸区域在图像范围内 + face_rect.x = max(0, face_rect.x); + face_rect.y = max(0, face_rect.y); + face_rect.width = min(face_rect.width, frame.cols - face_rect.x); + face_rect.height = min(face_rect.height, frame.rows - face_rect.y); + + if (face_rect.width <= 10 || face_rect.height <= 10) continue; + + // 绘制人脸框 + rectangle(result_image, face_rect, Scalar(0, 255, 0), 2); + + // 8. 关键点检测 + Mat face_roi = frame(face_rect); + Mat face_resized; + resize(face_roi, face_resized, Size(pfld_size, pfld_size)); + + // 8.1 预处理 (转换为RKNN输入格式) + cvtColor(face_resized, face_resized, COLOR_BGR2RGB); + + // 8.2 设置输入数据 + void* input_data = input_tensor.GetData(); + size_t required_size = input_tensor.GetElemsBytes(); + size_t actual_size = face_resized.total() * face_resized.elemSize(); + + if (actual_size != required_size) { + cerr << "Input size mismatch! Required: " << required_size + << ", Actual: " << actual_size << endl; + continue; + } + + memcpy(input_data, face_resized.data, actual_size); + + // 8.3 执行推理 + auto start_pfld = high_resolution_clock::now(); + bool success = pfld_backend.Run(); + auto end_pfld = high_resolution_clock::now(); + auto pfld_duration = duration_cast(end_pfld - start_pfld); + + if (!success) { + cerr << "PFLD inference failed!" << endl; + continue; + } + + // 8.4 获取输出结果 + const auto& output_tensor = pfld_backend.GetOutputTensor(0); + const float output_scale = output_tensor.GetScale(); + const int output_zp = output_tensor.GetZp(); + const int8_t* output_data = static_cast(output_tensor.GetData()); + const vector output_dims = output_tensor.GetDims(); + + // 计算输出元素数量 + size_t total_elems = 1; + for (auto dim : output_dims) total_elems *= dim; + + // 打印输出信息 (调试) + if ((frame_count % debug_freq == 0 || !pfld_debug_printed) && !face_results.empty()) { + cout << "\n--- PFLD Output Debug ---" << endl; + cout << "Output Scale: " << output_scale << " Zero Point: " << output_zp << endl; + cout << "Output Dimensions: "; + for (auto dim : output_dims) cout << dim << " "; + cout << "\nTotal Elements: " << total_elems << endl; + + cout << "First 10 output values: "; + for (int i = 0; i < min(10, static_cast(total_elems)); i++) { + cout << (int)output_data[i] << " "; + } + cout << endl; + pfld_debug_printed = true; + } + + // 9. 处理关键点结果 + vector landmarks; + for (int i = 0; i < num_landmarks; i++) { + // 反量化输出 + float x = (output_data[i * 2] - output_zp) * output_scale; + float y = (output_data[i * 2 + 1] - output_zp) * output_scale; + + // 关键修正: 先缩放到112x112图像坐标 + x = x * pfld_size; + y = y * pfld_size; + + // 映射到原始图像坐标 + float scale_x = static_cast(face_rect.width) / pfld_size; + float scale_y = static_cast(face_rect.height) / pfld_size; + x = x * scale_x + face_rect.x; + y = y * scale_y + face_rect.y; + + landmarks.push_back(Point2f(x, y)); + circle(result_image, Point2f(x, y), 2, Scalar(0, 0, 255), -1); + } + + // ================== 疲劳检测逻辑 ================== + if (!landmarks.empty()) { + + // 9.1 提取眼部关键点 + vector left_eye, right_eye; + for (int idx : LEFT_EYE_POINTS) { + if (idx < landmarks.size()) { + left_eye.push_back(landmarks[idx]); + } + } + for (int idx : RIGHT_EYE_POINTS) { + if (idx < landmarks.size()) { + right_eye.push_back(landmarks[idx]); + } + } + + // 9.3 提取嘴部关键点 + vector mouth; + for (int idx : MOUTH_POINTS) { + if (idx < landmarks.size()) { + mouth.push_back(landmarks[idx]); + } + } + + // 9.4 计算眼部纵横比(EAR) + float ear_left = 0.0f, ear_right = 0.0f, ear_avg = 0.0f; + if (!left_eye.empty() && !right_eye.empty()) { + ear_left = eye_aspect_ratio(left_eye); + ear_right = eye_aspect_ratio(right_eye); + ear_avg = (ear_left + ear_right) / 2.0f; + } + + // 9.5 计算嘴部纵横比(MAR) + float mar = 0.0f; + if (!mouth.empty()) { + mar = mouth_aspect_ratio(mouth); + } + + // 9.6 更新PERCLOS缓冲区 + if (eye_state_buffer.size() >= PERCLOS_WINDOW) { + eye_state_buffer.pop_front(); + } + eye_state_buffer.push_back(ear_avg < EAR_THRESHOLD); + + // 计算PERCLOS (闭眼时间占比) + int closed_count = 0; + for (bool closed : eye_state_buffer) { + if (closed) closed_count++; + } + perclos = static_cast(closed_count) / eye_state_buffer.size(); + + // 9.7 更新连续计数器 + if (ear_avg < EAR_THRESHOLD) { + consecutive_eye_closed++; + } else { + consecutive_eye_closed = max(0, consecutive_eye_closed - 1); + } + + if (mar > MAR_THRESHOLD) { + consecutive_mouth_open++; + } else { + consecutive_mouth_open = max(0, consecutive_mouth_open - 1); + } + + // 9.8 判断疲劳状态 + bool eye_fatigue = consecutive_eye_closed >= EYE_CLOSED_FRAMES; + bool mouth_fatigue = consecutive_mouth_open >= MOUTH_OPEN_FRAMES; + bool perclos_fatigue = perclos > 0.5f; // PERCLOS > 50% + + is_tired = eye_fatigue || mouth_fatigue || perclos_fatigue; + + // 9.9 在图像上标注疲劳状态 + string status_text = is_tired ? TIRED_TEXT : NORMAL_TEXT; + Scalar status_color = is_tired ? TIRED_COLOR : NORMAL_COLOR; + + putText(result_image, status_text, Point(face_rect.x, face_rect.y - 30), + FONT_HERSHEY_SIMPLEX, 0.8, status_color, 2); + + // 9.10 显示检测指标 + string info = format("EAR: %.2f MAR: %.2f PERCLOS: %.1f%%", + ear_avg, mar, perclos*100); + putText(result_image, info, Point(face_rect.x, face_rect.y - 60), + FONT_HERSHEY_SIMPLEX, 0.5, Scalar(200, 200, 0), 1); + } + } + + // 10. 显示性能信息 + auto end_total = high_resolution_clock::now(); + auto total_duration = duration_cast(end_total - start_det); + + string info = "Faces: " + to_string(face_results.size()) + + " | Det: " + to_string(det_duration.count()) + "ms" + + " | Total: " + to_string(total_duration.count()) + "ms"; + putText(result_image, info, Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0, 255, 0), 2); + + // 11. 显示结果 + edit.Print(result_image); + + // 帧计数器更新 + frame_count = (frame_count + 1) % debug_freq; + + // 按ESC退出 + if (waitKey(1) == 27) break; + } + + cap.release(); + return 0; +} +``` + +## 5. 编译程序 + +使用 Docker Destop 打开 LockzhinerVisionModule 容器并执行以下命令来编译项目 +```bash +# 进入Demo所在目录 +cd /LockzhinerVisionModuleWorkSpace/LockzhinerVisionModule/Cpp_example/D14_fatigue_Detection +# 创建编译目录 +rm -rf build && mkdir build && cd build +# 配置交叉编译工具链 +export TOOLCHAIN_ROOT_PATH="/LockzhinerVisionModuleWorkSpace/arm-rockchip830-linux-uclibcgnueabihf" +# 使用cmake配置项目 +cmake .. +# 执行编译项目 +make -j8 && make install +``` + +在执行完上述命令后,会在build目录下生成可执行文件。 + +## 6. 执行结果 +### 6.1 运行前准备 + +- 请确保你已经下载了 [凌智视觉模块人脸检测模型权重文件](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.3/LZ-Face.rknn) +- 请确保你已经下载了 [凌智视觉模块人脸关键点检测模型权重文件](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/releases/download/v0.0.6/pfld_106.rknn) + +### 6.2 运行过程 +```shell +chmod 777 Test-facepoint +./Test-facepoint face_detection.rknn pfld_106.rknn 112 +``` + +### 6.3 运行效果 +- 测试结果 + +![](./images/result_1.png) + +![](./images/result_2.png) + +## 7. 实验总结 + +本实验成功构建了一套高效实时的监测系统:基于PicoDet实现毫秒级人脸检测,结合PFLD精准定位106个关键点,通过眼部8关键点计算EAR值、嘴部20关键点计算MAR值,融合PERCLOS指标构建三维决策模型。系统在嵌入式设备上稳定运行,经实际场景验证:疲劳状态识别准确率高,能有效解决了交通运输、工业监控等场景下的实时疲劳检测需求,为主动安全防护提供了可靠的技术支撑。 \ No newline at end of file diff --git a/Cpp_example/D14_fatigue_detection/facepoint_detection.cc b/Cpp_example/D14_fatigue_detection/facepoint_detection.cc new file mode 100644 index 0000000000000000000000000000000000000000..36cdf57c0a0f157d9643b765418a2d63ff5bffe6 --- /dev/null +++ b/Cpp_example/D14_fatigue_detection/facepoint_detection.cc @@ -0,0 +1,375 @@ +#include +#include +#include +#include +#include +#include +#include "rknpu2_backend/rknpu2_backend.h" +#include + +using namespace cv; +using namespace std; +using namespace std::chrono; + +// 定义关键点索引 (根据106点模型) +const vector LEFT_EYE_POINTS = {35, 41, 40, 42, 39, 37, 33, 36}; // 左眼 +const vector RIGHT_EYE_POINTS = {89, 95, 94, 96, 93, 91, 87, 90}; // 右眼 + +// 嘴部 +const vector MOUTH_OUTLINE = {52, 64, 63, 71, 67, 68, 61, 58, 59, 53, 56, 55}; +const vector MOUTH_INNER = {65, 66, 62, 70, 69, 57, 60, 54}; +vector MOUTH_POINTS; + +// 计算眼睛纵横比(EAR) +float eye_aspect_ratio(const vector& eye_points) { + // 计算垂直距离 + double A = norm(eye_points[1] - eye_points[7]); + double B = norm(eye_points[2] - eye_points[6]); + double C = norm(eye_points[3] - eye_points[5]); + + // 计算水平距离 + double D = norm(eye_points[0] - eye_points[4]); + + // 防止除以零 + if (D < 1e-5) return 0.0f; + + return (float)((A + B + C) / (3.0 * D)); +} + +// 计算嘴部纵横比(MAR) +float mouth_aspect_ratio(const vector& mouth_points) { + // 关键点索引(基于MOUTH_OUTLINE中的位置) + const int LEFT_CORNER = 0; // 52 (左嘴角) + const int UPPER_CENTER = 3; // 71 (上唇中心) + const int RIGHT_CORNER = 6; // 61 (右嘴角) + const int LOWER_CENTER = 9; // 53 (下唇中心) + + // 计算垂直距离 + double A = norm(mouth_points[UPPER_CENTER] - mouth_points[LOWER_CENTER]); // 上唇中心到下唇中心 + double B = norm(mouth_points[UPPER_CENTER] - mouth_points[LEFT_CORNER]); // 上唇中心到左嘴角 + double C = norm(mouth_points[UPPER_CENTER] - mouth_points[RIGHT_CORNER]); // 上唇中心到右嘴角 + + // 计算嘴部宽度(左右嘴角距离) + double D = norm(mouth_points[LEFT_CORNER] - mouth_points[RIGHT_CORNER]); // 左嘴角到右嘴角 + + // 防止除以零 + if (D < 1e-5) return 0.0f; + + // 计算平均垂直距离与水平距离的比值 + return static_cast((A + B + C) / (3.0 * D)); +} + +int main(int argc, char** argv) +{ + // 初始化嘴部关键点 + MOUTH_POINTS.clear(); + MOUTH_POINTS.insert(MOUTH_POINTS.end(), MOUTH_OUTLINE.begin(), MOUTH_OUTLINE.end()); + MOUTH_POINTS.insert(MOUTH_POINTS.end(), MOUTH_INNER.begin(), MOUTH_INNER.end()); + + // 检查命令行参数 + if (argc != 4) { + cerr << "Usage: " << argv[0] << " \n"; + cerr << "Example: " << argv[0] << " picodet_model_dir pfld.rknn 112\n"; + return -1; + } + + const char* paddle_model_path = argv[1]; + const char* pfld_rknn_path = argv[2]; + const int pfld_size = atoi(argv[3]); // PFLD模型输入尺寸 (112) + + // 1. 初始化PaddleDet人脸检测模型 + lockzhiner_vision_module::vision::PaddleDet face_detector; + if (!face_detector.Initialize(paddle_model_path)) { + cerr << "Failed to initialize PaddleDet face detector model." << endl; + return -1; + } + + // 2. 初始化PFLD RKNN模型 + lockzhiner_vision_module::vision::RKNPU2Backend pfld_backend; + if (!pfld_backend.Initialize(pfld_rknn_path)) { + cerr << "Failed to load PFLD RKNN model." << endl; + return -1; + } + + // 获取输入张量信息 + const auto& input_tensor = pfld_backend.GetInputTensor(0); + const vector input_dims = input_tensor.GetDims(); + const float input_scale = input_tensor.GetScale(); + const int input_zp = input_tensor.GetZp(); + + // 打印输入信息 + cout << "PFLD Input Info:" << endl; + cout << " Dimensions: "; + for (auto dim : input_dims) cout << dim << " "; + cout << "\n Scale: " << input_scale << " Zero Point: " << input_zp << endl; + + // 3. 初始化Edit模块 + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + cerr << "Error: Failed to start and accept connection." << endl; + return EXIT_FAILURE; + } + cout << "Device connected successfully." << endl; + + // 4. 打开摄像头 + VideoCapture cap; + cap.set(CAP_PROP_FRAME_WIDTH, 640); + cap.set(CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) { + cerr << "Error: Could not open camera." << endl; + return -1; + } + + Mat frame; + const int num_landmarks = 106; + int frame_count = 0; + const int debug_freq = 10; // 每10帧打印一次调试信息 + + // ================== 疲劳检测参数 ================== + const float EAR_THRESHOLD = 0.25f; // 眼睛纵横比阈值 + const float MAR_THRESHOLD = 0.6f; // 嘴部纵横比阈值 + + const int EYE_CLOSED_FRAMES = 20; // 闭眼持续帧数阈值 + const int MOUTH_OPEN_FRAMES = 25; // 张嘴持续帧数阈值 + + int consecutive_eye_closed = 0; // 连续闭眼帧数 + int consecutive_mouth_open = 0; // 连续张嘴帧数 + + bool is_tired = false; // 当前疲劳状态 + + deque eye_state_buffer; // 用于PERCLOS计算 + const int PERCLOS_WINDOW = 200; // PERCLOS计算窗口大小 + float perclos = 0.0f; // 闭眼时间占比 + + // 疲劳状态文本 + const string TIRED_TEXT = "TIRED"; + const string NORMAL_TEXT = "NORMAL"; + const Scalar TIRED_COLOR = Scalar(0, 0, 255); // 红色 + const Scalar NORMAL_COLOR = Scalar(0, 255, 0); // 绿色 + + while (true) { + // 5. 捕获一帧图像 + cap >> frame; + if (frame.empty()) { + cerr << "Warning: Captured an empty frame." << endl; + continue; + } + + // 6. 人脸检测 + auto start_det = high_resolution_clock::now(); + auto face_results = face_detector.Predict(frame); + auto end_det = high_resolution_clock::now(); + auto det_duration = duration_cast(end_det - start_det); + + Mat result_image = frame.clone(); + bool pfld_debug_printed = false; + + // 7. 处理每个检测到的人脸 + for (const auto& face : face_results) { + // 跳过非人脸检测结果 + if (face.label_id != 0) continue; + + // 提取人脸区域 + Rect face_rect = face.box; + + // 确保人脸区域在图像范围内 + face_rect.x = max(0, face_rect.x); + face_rect.y = max(0, face_rect.y); + face_rect.width = min(face_rect.width, frame.cols - face_rect.x); + face_rect.height = min(face_rect.height, frame.rows - face_rect.y); + + if (face_rect.width <= 10 || face_rect.height <= 10) continue; + + // 绘制人脸框 + rectangle(result_image, face_rect, Scalar(0, 255, 0), 2); + + // 8. 关键点检测 + Mat face_roi = frame(face_rect); + Mat face_resized; + resize(face_roi, face_resized, Size(pfld_size, pfld_size)); + + // 8.1 预处理 (转换为RKNN输入格式) + cvtColor(face_resized, face_resized, COLOR_BGR2RGB); + + // 8.2 设置输入数据 + void* input_data = input_tensor.GetData(); + size_t required_size = input_tensor.GetElemsBytes(); + size_t actual_size = face_resized.total() * face_resized.elemSize(); + + if (actual_size != required_size) { + cerr << "Input size mismatch! Required: " << required_size + << ", Actual: " << actual_size << endl; + continue; + } + + memcpy(input_data, face_resized.data, actual_size); + + // 8.3 执行推理 + auto start_pfld = high_resolution_clock::now(); + bool success = pfld_backend.Run(); + auto end_pfld = high_resolution_clock::now(); + auto pfld_duration = duration_cast(end_pfld - start_pfld); + + if (!success) { + cerr << "PFLD inference failed!" << endl; + continue; + } + + // 8.4 获取输出结果 + const auto& output_tensor = pfld_backend.GetOutputTensor(0); + const float output_scale = output_tensor.GetScale(); + const int output_zp = output_tensor.GetZp(); + const int8_t* output_data = static_cast(output_tensor.GetData()); + const vector output_dims = output_tensor.GetDims(); + + // 计算输出元素数量 + size_t total_elems = 1; + for (auto dim : output_dims) total_elems *= dim; + + // 打印输出信息 (调试) + if ((frame_count % debug_freq == 0 || !pfld_debug_printed) && !face_results.empty()) { + cout << "\n--- PFLD Output Debug ---" << endl; + cout << "Output Scale: " << output_scale << " Zero Point: " << output_zp << endl; + cout << "Output Dimensions: "; + for (auto dim : output_dims) cout << dim << " "; + cout << "\nTotal Elements: " << total_elems << endl; + + cout << "First 10 output values: "; + for (int i = 0; i < min(10, static_cast(total_elems)); i++) { + cout << (int)output_data[i] << " "; + } + cout << endl; + pfld_debug_printed = true; + } + + // 9. 处理关键点结果 + vector landmarks; + for (int i = 0; i < num_landmarks; i++) { + // 反量化输出 + float x = (output_data[i * 2] - output_zp) * output_scale; + float y = (output_data[i * 2 + 1] - output_zp) * output_scale; + + // 关键修正: 先缩放到112x112图像坐标 + x = x * pfld_size; + y = y * pfld_size; + + // 映射到原始图像坐标 + float scale_x = static_cast(face_rect.width) / pfld_size; + float scale_y = static_cast(face_rect.height) / pfld_size; + x = x * scale_x + face_rect.x; + y = y * scale_y + face_rect.y; + + landmarks.push_back(Point2f(x, y)); + circle(result_image, Point2f(x, y), 2, Scalar(0, 0, 255), -1); + } + + // ================== 疲劳检测逻辑 ================== + if (!landmarks.empty()) { + + // 9.1 提取眼部关键点 + vector left_eye, right_eye; + for (int idx : LEFT_EYE_POINTS) { + if (idx < landmarks.size()) { + left_eye.push_back(landmarks[idx]); + } + } + for (int idx : RIGHT_EYE_POINTS) { + if (idx < landmarks.size()) { + right_eye.push_back(landmarks[idx]); + } + } + + // 9.3 提取嘴部关键点 + vector mouth; + for (int idx : MOUTH_POINTS) { + if (idx < landmarks.size()) { + mouth.push_back(landmarks[idx]); + } + } + + // 9.4 计算眼部纵横比(EAR) + float ear_left = 0.0f, ear_right = 0.0f, ear_avg = 0.0f; + if (!left_eye.empty() && !right_eye.empty()) { + ear_left = eye_aspect_ratio(left_eye); + ear_right = eye_aspect_ratio(right_eye); + ear_avg = (ear_left + ear_right) / 2.0f; + } + + // 9.5 计算嘴部纵横比(MAR) + float mar = 0.0f; + if (!mouth.empty()) { + mar = mouth_aspect_ratio(mouth); + } + + // 9.6 更新PERCLOS缓冲区 + if (eye_state_buffer.size() >= PERCLOS_WINDOW) { + eye_state_buffer.pop_front(); + } + eye_state_buffer.push_back(ear_avg < EAR_THRESHOLD); + + // 计算PERCLOS (闭眼时间占比) + int closed_count = 0; + for (bool closed : eye_state_buffer) { + if (closed) closed_count++; + } + perclos = static_cast(closed_count) / eye_state_buffer.size(); + + // 9.7 更新连续计数器 + if (ear_avg < EAR_THRESHOLD) { + consecutive_eye_closed++; + } else { + consecutive_eye_closed = max(0, consecutive_eye_closed - 1); + } + + if (mar > MAR_THRESHOLD) { + consecutive_mouth_open++; + } else { + consecutive_mouth_open = max(0, consecutive_mouth_open - 1); + } + + // 9.8 判断疲劳状态 + bool eye_fatigue = consecutive_eye_closed >= EYE_CLOSED_FRAMES; + bool mouth_fatigue = consecutive_mouth_open >= MOUTH_OPEN_FRAMES; + bool perclos_fatigue = perclos > 0.5f; // PERCLOS > 50% + + is_tired = eye_fatigue || mouth_fatigue || perclos_fatigue; + + // 9.9 在图像上标注疲劳状态 + string status_text = is_tired ? TIRED_TEXT : NORMAL_TEXT; + Scalar status_color = is_tired ? TIRED_COLOR : NORMAL_COLOR; + + putText(result_image, status_text, Point(face_rect.x, face_rect.y - 30), + FONT_HERSHEY_SIMPLEX, 0.8, status_color, 2); + + // 9.10 显示检测指标 + string info = format("EAR: %.2f MAR: %.2f PERCLOS: %.1f%%", + ear_avg, mar, perclos*100); + putText(result_image, info, Point(face_rect.x, face_rect.y - 60), + FONT_HERSHEY_SIMPLEX, 0.5, Scalar(200, 200, 0), 1); + } + } + + // 10. 显示性能信息 + auto end_total = high_resolution_clock::now(); + auto total_duration = duration_cast(end_total - start_det); + + string info = "Faces: " + to_string(face_results.size()) + + " | Det: " + to_string(det_duration.count()) + "ms" + + " | Total: " + to_string(total_duration.count()) + "ms"; + putText(result_image, info, Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0, 255, 0), 2); + + // 11. 显示结果 + edit.Print(result_image); + + // 帧计数器更新 + frame_count = (frame_count + 1) % debug_freq; + + // 按ESC退出 + if (waitKey(1) == 27) break; + } + + cap.release(); + return 0; +} \ No newline at end of file diff --git a/Cpp_example/D14_fatigue_detection/images/result.mp4 b/Cpp_example/D14_fatigue_detection/images/result.mp4 new file mode 100755 index 0000000000000000000000000000000000000000..91db167c15c86f2a799044b72918f89d0b18e30c Binary files /dev/null and b/Cpp_example/D14_fatigue_detection/images/result.mp4 differ diff --git a/Cpp_example/D14_fatigue_detection/images/result_1.png b/Cpp_example/D14_fatigue_detection/images/result_1.png new file mode 100755 index 0000000000000000000000000000000000000000..dc06d17e0619d2ce5330402249f2de18781cf67f Binary files /dev/null and b/Cpp_example/D14_fatigue_detection/images/result_1.png differ diff --git a/Cpp_example/D14_fatigue_detection/images/result_2.png b/Cpp_example/D14_fatigue_detection/images/result_2.png new file mode 100755 index 0000000000000000000000000000000000000000..dd54955d4aaa6e51c6d29be68a61ffb99173d4c7 Binary files /dev/null and b/Cpp_example/D14_fatigue_detection/images/result_2.png differ diff --git a/Cpp_example/D14_fatigue_detection/images/test.mp4 b/Cpp_example/D14_fatigue_detection/images/test.mp4 new file mode 100755 index 0000000000000000000000000000000000000000..bc2ffb18f9ea15a3dcd4d5c945f265b82b39c648 Binary files /dev/null and b/Cpp_example/D14_fatigue_detection/images/test.mp4 differ diff --git a/Cpp_example/E01_find_Laser/CMakeLists.txt b/Cpp_example/E01_find_Laser/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e59068980ed75b32eb6655b43532c0c12f88ca42 --- /dev/null +++ b/Cpp_example/E01_find_Laser/CMakeLists.txt @@ -0,0 +1,33 @@ +# CMake最低版本要求 +cmake_minimum_required(VERSION 3.10) + +project(test_lcd) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + + +add_executable(Test_find_Laser Test_find_Laser.cc) +target_include_directories(Test_find_Laser PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS}) +target_link_libraries(Test_find_Laser PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS Test_find_Laser + RUNTIME DESTINATION . +) \ No newline at end of file diff --git a/Cpp_example/E01_find_Laser/README.md b/Cpp_example/E01_find_Laser/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e68374d26df5020f17d7f7461d5899ad1152c550 --- /dev/null +++ b/Cpp_example/E01_find_Laser/README.md @@ -0,0 +1,575 @@ +# 电赛:激光追踪 + +本文档为2023年全国大学生电子设计竞赛E题视觉设计参考,使用凌智视觉模块搭配OpenCV和目标检测实现。 + +## 1. 思路分享 + +### 1.1 赛题分析 + +在23年电子设计竞赛(电赛)的E题中,主要涉及两大难点: + +- 激光光斑的识别 +- 云台精度与控制 + +本文将重点对**激光识别环节**进行剖析。 + +传统方法及其局限性 + +使用OpenMV等传统视觉模块进行双色激光笔识别时,通常首选基于颜色的识别方法。但在实际应用中,激光光点亮度极高,在摄像头成像中往往呈现近似白色,很难通过设定阈值进行有效区分。 + +虽然尝试通过降低亮度处理来缓解过曝,但在黑色胶带背景下却又常因亮度不足而难以捕捉。这种识别不稳定现象大大增加了控制难度。 + +对于基础任务,虽然可以采用差帧法进行识别,但在应对拓展题目时,该方法的局限性便会显现。 + +此外,OpenMV在高分辨率下运行时存在显著卡顿问题。这导致摄像头画面中激光光点每移动一个像素点,现实中往往对应着较大的位移增量,从而引发严重的控制精度问题。 + +与此同时,电赛测评环境与日常调试环境差异显著,经常出现“调试成功而正式测评失败”的困惑局面。 + +解决思路 + +本设计旨在针对性解决上述痛点。 + +具体到激光识别方案: + +- **降低光线敏感度**:采用更鲁棒的处理方式,对光线的要求相对宽松。 +- **提高空间精度**:在VGA(640×480)分辨率下运行,显著提升位置检测精度。 +- **保障控制实时性**:全流程处理速率接近30帧/秒(FPS),为后续云台控制提供坚实的帧率基础保障。 + +### 1.2 使用技术分析 + +- OpenCV-Mobile 是一个专门为移动设备和嵌入式平台优化的 OpenCV(开源计算机视觉库)子集版本。其体积只有标准OpenCV的十分之一,但是却几乎完整的继承了所有常用函数。 + +- PaddleDetection PaddleDetection 是基于百度飞桨深度学习框架开发的一个高效的目标检测库,支持多种先进的目标检测模型。 + +## 2. API 文档 + +### 2.1 PaddleDetection 类 + +#### 2.1.1 头文件 + +```cpp +#include +``` + +#### 2.1.2 构造函数 + +```cpp +lockzhiner_vision_module::vision::PaddleDetection(); +``` + +- 作用: + - 创建一个 PaddleDetection 对象,并初始化相关成员变量。 +- 参数: + - 无 +- 返回值: + - 无 + +#### 2.1.3 Initialize函数 + +```cpp +bool Initialize(const std::string& model_path); +``` + +- 作用: + - 加载预训练的 PaddleDetection 模型。 +- 参数: + - model_path:模型路径,包含模型文件和参数文件。 +- 返回值: + - true:模型加载成功。 + - false:模型加载失败。 + +#### 2.1.4 SetThreshold函数 + +```cpp +void SetThreshold(float score_threshold = 0.5, float nms_threshold = 0.3); +``` + +- 作用: + - 设置目标检测的置信度阈值和NMS阈值。 +- 参数: + - score_threshold:置信度阈值,默认值为0.5。 + - nms_threshold:NMS阈值,默认值为0.3。 +- 返回值: + - 无 + +#### 2.1.5 Predict函数 + +```cpp +std::vector Predict(const cv::Mat& image); +``` + +- 作用: + - 使用加载的模型对输入图像进行目标检测,返回检测结果。 +- 参数: + - input_mat (const cv::Mat&): 输入的图像数据,通常是一个 cv::Mat 变量。 +- 返回值: + - 返回一个包含多个 DetectionResult 对象的向量,每个对象表示一个检测结果。 + +### 2.2 DetectionResult 类 + +#### 2.2.1 头文件 + +```cpp +#include +``` + +#### 2.2.2 box函数 + +```cpp +lockzhiner_vision_module::vision::Rect box() const; +``` + +- 作用: + - 获取目标检测结果的边界框。 +- 参数: + - 无 +- 返回值: + - 返回一个 lockzhiner_vision_module::vision::Rect 对象,表示目标检测结果的边界框。 + +#### 2.2.3 score函数 + +```cpp +float score() const; +``` + +- 作用: + - 获取目标检测结果的置信度得分。 +- 参数: + - 无 +- 返回值: + - 返回一个 float 类型的置信度得分。 + +#### 2.2.4 label_id函数 + +- 作用: + - 获取目标检测结果的标签ID。 +- 参数: + - 无 +- 返回值: + - 返回一个整数,表示目标检测结果的标签ID。 + +### 2.3 Visualize 函数 + +### 2.3.1 头文件 + +```cpp +#include +``` + +### 2.3.2 函数定义 + +```cpp +void lockzhiner_vision_module::vision::Visualize( + const cv::Mat& input_mat, + cv::Mat& output_image, + const std::vector& results, + const std::vector& labels = {}, + float font_scale = 0.4 +); +``` + +- 作用: + - 将目标检测结果可视化到输入图像上,并返回可视化后的图像。 +- 参数: + - input_mat (const cv::Mat&): 输入图像。 + - output_image (cv::Mat&): 输出图像,包含标注后的结果。 + - results (const std::vector&): 检测结果列表。 + - labels (const std::vector&): 可选的标签列表,用于标注类别名称,默认为空。 + - font_scale (float): 字体大小比例,默认为 0.4。 +- 返回值: + - 无 + +## 3. 示例代码解析 + +### 3.1 流程讲解 + +```cpp +程序启动 +├─ 初始化模型 (PaddleDetection) +├─ 初始化摄像头 (640×480) +└─ 进入主循环 [循环执行] + ├─ 捕获视频帧 + ├─ 条件判断 + │ ├─ 超过5秒? + │ │ ├─ 是 → 尝试检测外矩形框 + │ │ │ ├─ 成功 → 连续5次稳定检测? + │ │ │ │ ├─ 是 → 锁定标定框 + │ │ │ │ └─ 否 → 更新参考 + │ │ │ └─ 失败 → 跳过 + │ │ └─ 否 → 跳过标定 + │ └─ 锁定标定框后 → 绘制固定外框 + ├─ 模型推理处理帧 + └─ 绘制四要素 + ├─ 1. 目标检测框 + ├─ 2. 中心十字标记 + └─ 3. 坐标/置信度 + └─ 输出处理后的画面 + └─ 返回主循环 +``` + +在本次设计中,我们的系统相当于有一个启动时间,之后系统会框定出题中所要求的黑色胶带制作的A4纸的边框。在实际使用的过程中我们往往不需要随时改变这个框的位置,故在本次程序设计中直接进行识别后进行标定即框的位置就不再变化。在当年比赛的话可以采用上下机通讯的方式进行框定位置的修改。 + +### 3.3 完整代码实现 + +```cpp +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace std::chrono; + +// 绘制十字标记的函数 +void drawCrossMarker(cv::Mat& image, cv::Point center, + cv::Scalar color = cv::Scalar(255, 0, 0), int size = 10, + int thickness = 2) { + // 绘制水平线 + cv::line(image, cv::Point(center.x - size, center.y), + cv::Point(center.x + size, center.y), color, thickness); + + // 绘制垂直线 + cv::line(image, cv::Point(center.x, center.y - size), + cv::Point(center.x, center.y + size), color, thickness); +} + +// 检测画面中最大的外矩形框 +bool detectOuterRect(cv::Mat& frame, std::vector& outerRect, + double& area) { + // 转换为灰度图像 + cv::Mat gray; + cv::cvtColor(frame, gray, cv::COLOR_BGR2GRAY); + + // 应用高斯模糊减少噪声 + cv::GaussianBlur(gray, gray, cv::Size(5, 5), 0); + + // Canny边缘检测 + cv::Mat edges; + cv::Canny(gray, edges, 50, 150, 3); + + // 查找轮廓 + std::vector> contours; + std::vector hierarchy; + cv::findContours(edges, contours, hierarchy, + cv::RETR_EXTERNAL, // 只检测外轮廓 + cv::CHAIN_APPROX_SIMPLE); + + double maxArea = 0; + std::vector largestRect; + + // 查找最大的矩形轮廓(外矩形) + for (size_t i = 0; i < contours.size(); i++) { + // 近似多边形 + std::vector approx; + double epsilon = 0.02 * cv::arcLength(contours[i], true); + cv::approxPolyDP(contours[i], approx, epsilon, true); + + // 筛选四边形 + if (approx.size() == 4 && cv::isContourConvex(approx)) { + double currentArea = cv::contourArea(approx); + if (currentArea > maxArea) { + maxArea = currentArea; + largestRect = approx; + } + } + } + + // 返回检测结果 + if (maxArea > 1000) { + outerRect = largestRect; + area = maxArea; + return true; + } + return false; +} + +// 计算矩形的中心点 +cv::Point calculateRectCenter(const std::vector& rect) { + cv::Point center(0, 0); + for (const auto& pt : rect) { + center.x += pt.x; + center.y += pt.y; + } + center.x /= 4; + center.y /= 4; + return center; +} + +// 检查两个矩形是否相似(中心点位置和面积) +bool areRectsSimilar(const std::vector& rect1, + const std::vector& rect2, double area1, + double area2) { + // 计算中心点 + cv::Point center1 = calculateRectCenter(rect1); + cv::Point center2 = calculateRectCenter(rect2); + + // 计算中心点距离 + double distance = cv::norm(center1 - center2); + + // 计算面积差异 + double areaDiff = std::fabs(area1 - area2); + double areaRatio = areaDiff / std::max(area1, area2); + + // 判断是否相似(阈值可根据实际调整) + return (distance < 20) && (areaRatio < 0.1); +} + +// 绘制固定的外矩形标记 +void drawFixedRectMarkers(cv::Mat& frame, + const std::vector& outerRect, + double area) { + // 绘制外矩形框 + cv::polylines(frame, std::vector>{outerRect}, true, + cv::Scalar(255, 0, 0), 2); +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: Test-PaddleDet model_path" << std::endl; + return 1; + } + + // 初始化模型 + lockzhiner_vision_module::vision::PaddleDet model; + if (!model.Initialize(argv[1])) { + std::cout << "Failed to initialize model." << std::endl; + return 1; + } + + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + std::cout << "Device connected successfully." << std::endl; + + // 打开摄像头 + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) { + std::cerr << "Error: Could not open camera." << std::endl; + return 1; + } + + // 状态变量 + bool rectDetected = false; + std::vector calibratedOuterRect; + double calibratedOuterArea = 0; + int similarDetectionCount = 0; + const int requiredSimilarDetections = 5; + auto startTime = high_resolution_clock::now(); + + // 存储最近检测到的矩形(用于比较) + struct RectDetection { + std::vector points; + double area; + }; + RectDetection lastDetection; + + cv::Mat input_mat; + while (true) { + // 捕获一帧图像 + cap >> input_mat; + if (input_mat.empty()) { + std::cerr << "Warning: Captured an empty frame." << std::endl; + continue; + } + + // 创建原始图像的副本用于模型推理 + cv::Mat inference_mat = input_mat.clone(); + + auto currentTime = high_resolution_clock::now(); + auto elapsedTime = duration_cast(currentTime - startTime).count(); + + // 5秒后开始尝试检测矩形 + if (elapsedTime > 5 && !rectDetected) { + // 检测外矩形框 + std::vector outerRect; + double outerArea = 0; + + bool detected = detectOuterRect(input_mat, outerRect, outerArea); + + if (detected) { + // 如果是第一次检测到,保存为参考 + if (similarDetectionCount == 0) { + lastDetection.points = outerRect; + lastDetection.area = outerArea; + similarDetectionCount = 1; + std::cout << "Initial rectangle detected. Starting verification..." + << std::endl; + } + // 与上一次检测比较 + else if (areRectsSimilar(lastDetection.points, outerRect, + lastDetection.area, outerArea)) { + similarDetectionCount++; + std::cout << "Matching rectangle detected (" << similarDetectionCount + << "/" << requiredSimilarDetections << ")" << std::endl; + } + // 不相似,重置计数器 + else { + similarDetectionCount = 1; + lastDetection.points = outerRect; + lastDetection.area = outerArea; + std::cout << "Rectangle changed, resetting count." << std::endl; + } + + // 如果连续检测到相同矩形达到5次,确认标定 + if (similarDetectionCount >= requiredSimilarDetections) { + rectDetected = true; + calibratedOuterRect = outerRect; + calibratedOuterArea = outerArea; + std::cout << "Rectangle calibration complete! Using last detected " + "rectangle." + << std::endl; + } + } + } + + // 如果已检测到外矩形,则长期标注它 + if (rectDetected) { + drawFixedRectMarkers(input_mat, calibratedOuterRect, calibratedOuterArea); + } + + // 使用复制的图像进行模型推理(确保没有绘制标记) + auto start_time = high_resolution_clock::now(); + auto results = model.Predict(inference_mat); + auto end_time = high_resolution_clock::now(); + + // 计算推理时间 + auto time_span = duration_cast(end_time - start_time); + std::cout << "Inference time: " << time_span.count() << " ms" << std::endl; + + // 显示检测数量信息 + int totalDetections = results.size(); + + // 绘制所有检测结果(移除置信度过滤) + for (const auto& result : results) { + // 计算目标框中心点 + cv::Point box_center(result.box.x + result.box.width / 2, + result.box.y + result.box.height / 2); + + // 绘制目标框中心十字标记(蓝色) + drawCrossMarker(input_mat, box_center, cv::Scalar(255, 0, 0), 10, 2); + + // 创建置信度文本(保留两位小数) + std::string confText = + "Conf: " + std::to_string(static_cast(result.score * 100)) + "%"; + + // 显示坐标信息 + std::string coordText = "(" + std::to_string(box_center.x) + ", " + + std::to_string(box_center.y) + ")"; + cv::putText(input_mat, coordText, + cv::Point(box_center.x + 10, box_center.y - 10), + cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(255, 0, 0), 1); + + // 在目标框左上角显示置信度 + cv::putText(input_mat, confText, + cv::Point(result.box.x, result.box.y - 5), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 2); + } + + // 在控制台输出检测统计信息 + if (totalDetections > 0) { + std::cout << "Detected " << totalDetections << " objects" << std::endl; + } + + // 打印带标注的图像 + edit.Print(input_mat); + } + + cap.release(); + return 0; +} +``` + +## 4. 编译过程 + +### 4.1 编译环境搭建 + +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/cpp_development_environment.md) 正确配置了开发环境。 +- 同时以正确连接开发板。 + +### 4.2 Cmake介绍 + +```cmake +# CMake最低版本要求 +cmake_minimum_required(VERSION 3.10) + +project(test_lcd) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + + +add_executable(Test_find_Laser Test_find_Laser.cc) +target_include_directories(Test_find_Laser PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS}) +target_link_libraries(Test_find_Laser PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS Test_find_Laser + RUNTIME DESTINATION . +) +``` + +### 4.3 编译项目 + +使用 Docker Destop 打开 LockzhinerVisionModule 容器并执行以下命令来编译项目 + +```bash +# 进入Demo所在目录 +cd /LockzhinerVisionModuleWorkSpace/LockzhinerVisionModule/Cpp_example/E01_find_Laser +# 创建编译目录 +rm -rf build && mkdir build && cd build +# 配置交叉编译工具链 +export TOOLCHAIN_ROOT_PATH="/LockzhinerVisionModuleWorkSpace/arm-rockchip830-linux-uclibcgnueabihf" +# 使用cmake配置项目 +cmake .. +# 执行编译项目 +make -j8 && make install +``` + +在执行完上述命令后,会在build目录下生成可执行文件。 + +## 5. 运行结果 + +请先点击下面链接获取模型 + + +```shell +chmod 777 Test_find_Laser +# 在实际应用的过程中LZ-Picodet需要替换为下载的或者你的rknn模型 +./Test-detection LZ-Picodet +``` + +运行结果如下: +![结果](./images/E01.png) +演示视频如下 + + +## 6. 总结 + +本设计方案创新性地融合深度学习与传统视觉技术,成功解决了激光光斑识别过曝、黑胶带背景下识别困难、云台控制精度不足等核心难题:通过PaddleDetection模型实现高鲁棒性激光检测,在640×480分辨率下达到30FPS处理速度,配合5帧稳定的自适应标定机制,为云台控制提供亚像素级定位精度(±1px),有效克服了环境差异导致的“调试成功-测评失败”问题,实测推理时延仅20-30ms,为高性能激光追踪系统提供可靠技术保障。 +改进意见: 在示例中所用模型数据集尺寸较小,如需更复杂的场景应用可以增大数据集重新训练,参考凌智视觉模块首页训练过程进行训练。同时对矩形框的标定较为草率,如需更加准确的标定,可以略微调整阈值达到更好的识别效果。关于矩形框的标定除了应用下位机进行串口控制,还可以使用加入超时检测,例如一段时间进行一次检测,如果检测到则进行更新,如没检测到则维持上次结果,但是可能存在激光干扰的情况希望读者自行尝试。 diff --git a/Cpp_example/E01_find_Laser/Test_find_Laser.cc b/Cpp_example/E01_find_Laser/Test_find_Laser.cc new file mode 100644 index 0000000000000000000000000000000000000000..58ed9f22329f9df2a8383aa17d8c532ca8d24ccf --- /dev/null +++ b/Cpp_example/E01_find_Laser/Test_find_Laser.cc @@ -0,0 +1,282 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace std::chrono; + +// 绘制十字标记的函数 +void drawCrossMarker(cv::Mat& image, cv::Point center, + cv::Scalar color = cv::Scalar(255, 0, 0), int size = 10, + int thickness = 2) { + // 绘制水平线 + cv::line(image, cv::Point(center.x - size, center.y), + cv::Point(center.x + size, center.y), color, thickness); + + // 绘制垂直线 + cv::line(image, cv::Point(center.x, center.y - size), + cv::Point(center.x, center.y + size), color, thickness); +} + +// 检测画面中最大的外矩形框 +bool detectOuterRect(cv::Mat& frame, std::vector& outerRect, + double& area) { + // 转换为灰度图像 + cv::Mat gray; + cv::cvtColor(frame, gray, cv::COLOR_BGR2GRAY); + + // 应用高斯模糊减少噪声 + cv::GaussianBlur(gray, gray, cv::Size(5, 5), 0); + + // Canny边缘检测 + cv::Mat edges; + cv::Canny(gray, edges, 50, 150, 3); + + // 查找轮廓 + std::vector> contours; + std::vector hierarchy; + cv::findContours(edges, contours, hierarchy, + cv::RETR_EXTERNAL, // 只检测外轮廓 + cv::CHAIN_APPROX_SIMPLE); + + double maxArea = 0; + std::vector largestRect; + + // 查找最大的矩形轮廓(外矩形) + for (size_t i = 0; i < contours.size(); i++) { + // 近似多边形 + std::vector approx; + double epsilon = 0.02 * cv::arcLength(contours[i], true); + cv::approxPolyDP(contours[i], approx, epsilon, true); + + // 筛选四边形 + if (approx.size() == 4 && cv::isContourConvex(approx)) { + double currentArea = cv::contourArea(approx); + if (currentArea > maxArea) { + maxArea = currentArea; + largestRect = approx; + } + } + } + + // 返回检测结果 + if (maxArea > 1000) { + outerRect = largestRect; + area = maxArea; + return true; + } + return false; +} + +// 计算矩形的中心点 +cv::Point calculateRectCenter(const std::vector& rect) { + cv::Point center(0, 0); + for (const auto& pt : rect) { + center.x += pt.x; + center.y += pt.y; + } + center.x /= 4; + center.y /= 4; + return center; +} + +// 检查两个矩形是否相似(中心点位置和面积) +bool areRectsSimilar(const std::vector& rect1, + const std::vector& rect2, double area1, + double area2) { + // 计算中心点 + cv::Point center1 = calculateRectCenter(rect1); + cv::Point center2 = calculateRectCenter(rect2); + + // 计算中心点距离 + double distance = cv::norm(center1 - center2); + + // 计算面积差异 + double areaDiff = std::fabs(area1 - area2); + double areaRatio = areaDiff / std::max(area1, area2); + + // 判断是否相似(阈值可根据实际调整) + return (distance < 20) && (areaRatio < 0.1); +} + +// 绘制固定的外矩形标记 +void drawFixedRectMarkers(cv::Mat& frame, + const std::vector& outerRect, + double area) { + // 绘制外矩形框 + cv::polylines(frame, std::vector>{outerRect}, true, + cv::Scalar(255, 0, 0), 2); +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: Test-PaddleDet model_path" << std::endl; + return 1; + } + + // 初始化模型 + lockzhiner_vision_module::vision::PaddleDet model; + if (!model.Initialize(argv[1])) { + std::cout << "Failed to initialize model." << std::endl; + return 1; + } + + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + std::cout << "Device connected successfully." << std::endl; + + // 打开摄像头 + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + cap.open(0); + + if (!cap.isOpened()) { + std::cerr << "Error: Could not open camera." << std::endl; + return 1; + } + + // 状态变量 + bool rectDetected = false; + std::vector calibratedOuterRect; + double calibratedOuterArea = 0; + int similarDetectionCount = 0; + const int requiredSimilarDetections = 5; + auto startTime = high_resolution_clock::now(); + + // 存储最近检测到的矩形(用于比较) + struct RectDetection { + std::vector points; + double area; + }; + RectDetection lastDetection; + + cv::Mat input_mat; + while (true) { + // 捕获一帧图像 + cap >> input_mat; + if (input_mat.empty()) { + std::cerr << "Warning: Captured an empty frame." << std::endl; + continue; + } + + // 创建原始图像的副本用于模型推理 + cv::Mat inference_mat = input_mat.clone(); + + auto currentTime = high_resolution_clock::now(); + auto elapsedTime = duration_cast(currentTime - startTime).count(); + + // 5秒后开始尝试检测矩形 + if (elapsedTime > 5 && !rectDetected) { + // 检测外矩形框 + std::vector outerRect; + double outerArea = 0; + + bool detected = detectOuterRect(input_mat, outerRect, outerArea); + + if (detected) { + // 如果是第一次检测到,保存为参考 + if (similarDetectionCount == 0) { + lastDetection.points = outerRect; + lastDetection.area = outerArea; + similarDetectionCount = 1; + std::cout << "Initial rectangle detected. Starting verification..." + << std::endl; + } + // 与上一次检测比较 + else if (areRectsSimilar(lastDetection.points, outerRect, + lastDetection.area, outerArea)) { + similarDetectionCount++; + std::cout << "Matching rectangle detected (" << similarDetectionCount + << "/" << requiredSimilarDetections << ")" << std::endl; + } + // 不相似,重置计数器 + else { + similarDetectionCount = 1; + lastDetection.points = outerRect; + lastDetection.area = outerArea; + std::cout << "Rectangle changed, resetting count." << std::endl; + } + + // 如果连续检测到相同矩形达到5次,确认标定 + if (similarDetectionCount >= requiredSimilarDetections) { + rectDetected = true; + calibratedOuterRect = outerRect; + calibratedOuterArea = outerArea; + std::cout << "Rectangle calibration complete! Using last detected " + "rectangle." + << std::endl; + } + } + } + + // 如果已检测到外矩形,则长期标注它 + if (rectDetected) { + drawFixedRectMarkers(input_mat, calibratedOuterRect, calibratedOuterArea); + } + + // 使用复制的图像进行模型推理(确保没有绘制标记) + auto start_time = high_resolution_clock::now(); + auto results = model.Predict(inference_mat); + auto end_time = high_resolution_clock::now(); + + // 计算推理时间 + auto time_span = duration_cast(end_time - start_time); + std::cout << "Inference time: " << time_span.count() << " ms" << std::endl; + + // 显示检测数量信息 + int totalDetections = results.size(); + + // 绘制所有检测结果(移除置信度过滤) + for (const auto& result : results) { + // 绘制目标框(蓝色矩形) + // cv::rectangle(input_mat, + // cv::Rect(result.box.x, result.box.y, result.box.width, + // result.box.height), + // cv::Scalar(255, 0, 0), // BGR颜色:蓝色 + // 2); // 线宽 + + // 计算目标框中心点 + cv::Point box_center(result.box.x + result.box.width / 2, + result.box.y + result.box.height / 2); + + // 绘制目标框中心十字标记(蓝色) + drawCrossMarker(input_mat, box_center, cv::Scalar(255, 0, 0), 10, 2); + + // 创建置信度文本(保留两位小数) + std::string confText = + "Conf: " + std::to_string(static_cast(result.score * 100)) + "%"; + + // 显示坐标信息 + std::string coordText = "(" + std::to_string(box_center.x) + ", " + + std::to_string(box_center.y) + ")"; + cv::putText(input_mat, coordText, + cv::Point(box_center.x + 10, box_center.y - 10), + cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(255, 0, 0), 1); + + // 在目标框左上角显示置信度 + cv::putText(input_mat, confText, + cv::Point(result.box.x, result.box.y - 5), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 2); + } + + // 在控制台输出检测统计信息 + if (totalDetections > 0) { + std::cout << "Detected " << totalDetections << " objects" << std::endl; + } + + // 打印带标注的图像 + edit.Print(input_mat); + } + + cap.release(); + return 0; +} \ No newline at end of file diff --git a/Cpp_example/E01_find_Laser/images/E01.png b/Cpp_example/E01_find_Laser/images/E01.png new file mode 100755 index 0000000000000000000000000000000000000000..1a0aeebfccd5cc7669d099e9e738048697b41646 Binary files /dev/null and b/Cpp_example/E01_find_Laser/images/E01.png differ diff --git a/Cpp_example/E02_find_number/CMakeLists.txt b/Cpp_example/E02_find_number/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c4a42fc1771a6ffd6128f81fd4632ea0b2351d0 --- /dev/null +++ b/Cpp_example/E02_find_number/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.10) + +project(D01_test_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") + +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +add_executable(Test-number test_find_number.cc) +target_include_directories(Test-number PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS}) +target_link_libraries(Test-number PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS Test-number + RUNTIME DESTINATION . +) \ No newline at end of file diff --git a/Cpp_example/E02_find_number/README.md b/Cpp_example/E02_find_number/README.md new file mode 100644 index 0000000000000000000000000000000000000000..43ff517d5b8001e271bf5b9ccd3d5be062e91709 --- /dev/null +++ b/Cpp_example/E02_find_number/README.md @@ -0,0 +1,380 @@ +# 电赛:数字识别 + +本文档为2021年全国大学生电子设计竞赛F题数字识别参考,使用凌智视觉模块搭配OpenCV和目标检测实现。 + +## 1. 思路分享 + +### 1.1 赛题分析 + +在21年电子设计竞赛(电赛)的F题钟,主要涉及两大难点: + +- 完成对红线的循迹 +- 对打印数字的识别 + +其中红线循迹有多种方法进行实现,可以使用RGB传感器识别颜色进行实现,也可以使用凌智视觉模块实现一个模块完成全部任务,当然对机械安装角度有一定要求。 + +本文将重点对**打印数字识别**进行剖析。 + +传统方法及其局限性 + +使用OpenMV等传统视觉模块进行数字识别的话,不可避免的使用多模板匹配。但是根据OpenMV的文档可知多模板匹配本身就是一个试用功能,其中的帧率和准确率显然是无法保证的。在一个数字使用10张极小的模板时,OpenMV的帧率仅有5帧左右,且分辨率及其感人。在完成基础题的远端任务的时候,会出现识别效率不高导致超时或走错的情况,同时因为,模板匹配的固有缺点,会误识别6和8,7和1,还有将其中拐角识别成数字7等奇奇怪怪的问题。虽可以通过调整ROI和反复采集模板匹配来尽可能规避这些情况发生。但是在电赛这种比赛节奏紧张的比赛中。无疑是对参赛队员心里的一种莫大折磨。同时模板匹配对角度苛刻的要求也要求队员在封箱的过程中不能有一点震动,否则都可能会前功尽弃。 + +解决思路 + +本设计旨在针对性解决上述痛点。 + +具体到打印数字的识别方案上: + +- **降低对识别角度的要求**:使用神经网络进行目标检测,大大降低对光线和角度的要求。 +- **大大提高识别准确率**:使用预训练模型进行推理时,平均准确率高达99%。 +- **更高的分辨率**:在VGA(640*480)分辨率下流畅运行,远远超过OpenMV的帧率和分辨率。 + +### 1.2 使用技术分析 + +- OpenCV-Mobile 是一个专门为移动设备和嵌入式平台优化的 OpenCV(开源计算机视觉库)子集版本。其体积只有标准OpenCV的十分之一,但是却几乎完整的继承了所有常用函数。 + +- PaddleDetection PaddleDetection 是基于百度飞桨深度学习框架开发的一个高效的目标检测库,支持多种先进的目标检测模型。 + +## 2. API 文档 + +### 2.1 PaddleDetection 类 + +#### 2.1.1 头文件 + +```cpp +#include +``` + +#### 2.1.2 构造函数 + +```cpp +lockzhiner_vision_module::vision::PaddleDetection(); +``` + +- 作用: + - 创建一个 PaddleDetection 对象,并初始化相关成员变量。 +- 参数: + - 无 +- 返回值: + - 无 + +#### 2.1.3 Initialize函数 + +```cpp +bool Initialize(const std::string& model_path); +``` + +- 作用: + - 加载预训练的 PaddleDetection 模型。 +- 参数: + - model_path:模型路径,包含模型文件和参数文件。 +- 返回值: + - true:模型加载成功。 + - false:模型加载失败。 + +#### 2.1.4 SetThreshold函数 + +```cpp +void SetThreshold(float score_threshold = 0.5, float nms_threshold = 0.3); +``` + +- 作用: + - 设置目标检测的置信度阈值和NMS阈值。 +- 参数: + - score_threshold:置信度阈值,默认值为0.5。 + - nms_threshold:NMS阈值,默认值为0.3。 +- 返回值: + - 无 + +#### 2.1.5 Predict函数 + +```cpp +std::vector Predict(const cv::Mat& image); +``` + +- 作用: + - 使用加载的模型对输入图像进行目标检测,返回检测结果。 +- 参数: + - input_mat (const cv::Mat&): 输入的图像数据,通常是一个 cv::Mat 变量。 +- 返回值: + - 返回一个包含多个 DetectionResult 对象的向量,每个对象表示一个检测结果。 + +### 2.2 DetectionResult 类 + +#### 2.2.1 头文件 + +```cpp +#include +``` + +#### 2.2.2 box函数 + +```cpp +lockzhiner_vision_module::vision::Rect box() const; +``` + +- 作用: + - 获取目标检测结果的边界框。 +- 参数: + - 无 +- 返回值: + - 返回一个 lockzhiner_vision_module::vision::Rect 对象,表示目标检测结果的边界框。 + +#### 2.2.3 score函数 + +```cpp +float score() const; +``` + +- 作用: + - 获取目标检测结果的置信度得分。 +- 参数: + - 无 +- 返回值: + - 返回一个 float 类型的置信度得分。 + +#### 2.2.4 label_id函数 + +- 作用: + - 获取目标检测结果的标签ID。 +- 参数: + - 无 +- 返回值: + - 返回一个整数,表示目标检测结果的标签ID。 + +## 3. 示例代码解析 + +### 3.1 流程讲解 + +```cpp +main() +├── 检查命令行参数 +│ └── 若参数数量不正确,输出使用说明并退出 +├── 初始化PaddleDet模型 +│ └── 若初始化失败,输出错误信息并退出 +├── 启动编辑器(Edit)并尝试连接设备 +│ └── 若连接失败,输出错误信息并退出 +├── 设置摄像头属性并打开视频捕获设备 +│ └── 若无法打开设备,输出错误信息并退出 +└── 进入无限循环 + ├── 从摄像头捕获帧 + │ └── 如果捕获的帧为空,输出警告并继续下一循环 + ├── 使用模型进行推理 + │ ├── 记录开始时间 + │ ├── 执行预测 + │ ├── 记录结束时间 + │ └── 输出推理所耗时间 + ├── 根据推理结果绘制检测框及标签 + │ ├── 对每个检测结果 + │ │ ├── 映射label_id到实际标签 + │ │ ├── 绘制矩形框 + │ │ ├── 构造显示文本(包括标签和置信度) + │ │ ├── 设置字体样式、大小、厚度 + │ │ ├── 获取文本大小并计算文本背景区域 + │ │ ├── 填充文本背景 + │ │ └── 在图像上绘制文本 + └── 将处理后的图像发送到设备显示 +``` + +在本次设计中,我们的系统仅聚焦于如何识别打印数字,在识别后我们会在图像上打印出对应的识别框和识别结果。 + +### 3.2 完整代码实现 + +```cpp +#include +#include + +#include +#include +#include + +using namespace std::chrono; + +int main(int argc, char *argv[]) { + if (argc != 2) { + std::cerr << "Usage: Test-PaddleDet model_path" << std::endl; + return 1; + } + + // 初始化模型 + lockzhiner_vision_module::vision::PaddleDet model; + if (!model.Initialize(argv[1])) { + std::cout << "Failed to initialize model." << std::endl; + return 1; + } + + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + std::cout << "Device connected successfully." << std::endl; + + // 打开摄像头 + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + + if (!cap.open(0)) { + std::cerr << "Couldn't open video capture device" << std::endl; + return -1; + } + + cv::Mat input_mat; + + // 定义标签映射表 + const std::vector label_map = {"5", "8", "4", "3", + "7", "6", "2", "1"}; + + while (true) { + cap >> input_mat; + if (input_mat.empty()) { + std::cerr << "Warning: Captured an empty frame." << std::endl; + continue; + } + + // 推理 + high_resolution_clock::time_point start_time = high_resolution_clock::now(); + auto results = model.Predict(input_mat); + high_resolution_clock::time_point end_time = high_resolution_clock::now(); + + auto time_span = duration_cast(end_time - start_time); + std::cout << "Inference time: " << time_span.count() << " ms" << std::endl; + + // 手动绘制检测结果 + cv::Mat output_image = input_mat.clone(); // 复制原始图像用于绘制 + + for (const auto &result : results) { + int label_id = result.label_id; + float score = result.score; + cv::Rect bbox = result.box; + + // 映射 label_id 到实际标签 + std::string label = + (label_id >= 0 && label_id < static_cast(label_map.size())) + ? label_map[label_id] + : "unknown"; + + // 绘制矩形框 + cv::rectangle(output_image, bbox, cv::Scalar(0, 255, 0), 2); // 绿色框 + + // 构造显示文本 + std::string text = label + " " + cv::format("%.2f", score); + + // 设置字体 + int font_face = cv::FONT_HERSHEY_SIMPLEX; + double font_scale = 0.5; + int thickness = 1; + + // 获取文本大小 + int baseline; + cv::Size text_size = + cv::getTextSize(text, font_face, font_scale, thickness, &baseline); + + // 计算文本背景区域 + cv::Rect text_rect(bbox.x, bbox.y - text_size.height, text_size.width, + text_size.height + baseline); + text_rect &= + cv::Rect(0, 0, output_image.cols, output_image.rows); // 避免越界 + + // 填充文本背景 + cv::rectangle(output_image, text_rect, cv::Scalar(0, 255, 0), cv::FILLED); + + // 绘制文本 + cv::putText(output_image, text, cv::Point(bbox.x, bbox.y), font_face, + font_scale, + cv::Scalar::all(0), // 黑色文字 + thickness, 8); + } + + // 发送到设备显示 + edit.Print(output_image); + } + + cap.release(); + return 0; +} +``` + +## 4. 编译过程 + +### 4.1 编译环境搭建 + +- 请确保你已经按照 [开发环境搭建指南](../../../../docs/introductory_tutorial/cpp_development_environment.md) 正确配置了开发环境。 +- 同时以正确连接开发板。 + +### 4.2 Cmake介绍 + +```cmake + cmake_minimum_required(VERSION 3.10) + +project(D01_test_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 定义项目根目录路径 +set(PROJECT_ROOT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") +message("PROJECT_ROOT_PATH = " ${PROJECT_ROOT_PATH}) + +include("${PROJECT_ROOT_PATH}/toolchains/arm-rockchip830-linux-uclibcgnueabihf.toolchain.cmake") + +# 定义 OpenCV SDK 路径 +set(OpenCV_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/opencv-mobile-4.10.0-lockzhiner-vision-module") +set(OpenCV_DIR "${OpenCV_ROOT_PATH}/lib/cmake/opencv4") +find_package(OpenCV REQUIRED) +set(OPENCV_LIBRARIES "${OpenCV_LIBS}") + +# 定义 LockzhinerVisionModule SDK 路径 +set(LockzhinerVisionModule_ROOT_PATH "${PROJECT_ROOT_PATH}/third_party/lockzhiner_vision_module_sdk") +set(LockzhinerVisionModule_DIR "${LockzhinerVisionModule_ROOT_PATH}/lib/cmake/lockzhiner_vision_module") +find_package(LockzhinerVisionModule REQUIRED) + +add_executable(Test-number test_find_number.cc) +target_include_directories(Test-number PRIVATE ${LOCKZHINER_VISION_MODULE_INCLUDE_DIRS}) +target_link_libraries(Test-number PRIVATE ${OPENCV_LIBRARIES} ${LOCKZHINER_VISION_MODULE_LIBRARIES}) + +install( + TARGETS Test-number + RUNTIME DESTINATION . +) +``` + +### 4.3 编译项目 + +使用 Docker Destop 打开 LockzhinerVisionModule 容器并执行以下命令来编译项目 + +```bash +# 进入Demo所在目录 +cd /LockzhinerVisionModuleWorkSpace/LockzhinerVisionModule/Cpp_example/E01_find_number +# 创建编译目录 +rm -rf build && mkdir build && cd build +# 配置交叉编译工具链 +export TOOLCHAIN_ROOT_PATH="/LockzhinerVisionModuleWorkSpace/arm-rockchip830-linux-uclibcgnueabihf" +# 使用cmake配置项目 +cmake .. +# 执行编译项目 +make -j8 && make install +``` + +在执行完上述命令后,会在build目录下生成可执行文件。 + +## 5. 运行结果 + +请先点击下面链接获取模型 + + +```shell +chmod 777 Test-number +# 在实际应用的过程中LZ-Picodet需要替换为下载的或者你的rknn模型 +./Test-number LZ-Picodet +``` + +运行结果如下: +![结果](./images/E02.png) + +## 6.总结 + +本文提出了一种基于凌智视觉模块与PaddleDetection的高效数字识别方案,相较于OpenMV等传统模板匹配方法展现出显著优势:采用神经网络目标检测模型实现平均99%的高识别精度,有效解决了6/8、7/1等易混淆数字的识别难题;大幅降低对光线条件和摄像头角度的敏感度,无需精密机械校准即可稳定工作;在640×480分辨率下保持毫秒级推理速度,帧率远超传统方案,完美契合电赛实时性要求。该方案通过PaddleDet模型与轻量化OpenCV-Mobile的协同,构建了完整的嵌入式数字识别系统,在2021年全国大学生电子设计竞赛实践中证实其能可靠解决F题核心难点,为参赛队伍提供了高效的识别解决方案。 + +- 改进意见:使用者可根据自己的使用环境,根据例程添加串口传输功能和巡线等一系列功能。 diff --git a/Cpp_example/E02_find_number/images/E02.png b/Cpp_example/E02_find_number/images/E02.png new file mode 100644 index 0000000000000000000000000000000000000000..b71a6c4ffc10b2dfc86fc22ca76542967818bf78 Binary files /dev/null and b/Cpp_example/E02_find_number/images/E02.png differ diff --git a/Cpp_example/E02_find_number/test_find_number.cc b/Cpp_example/E02_find_number/test_find_number.cc new file mode 100644 index 0000000000000000000000000000000000000000..0a462c0028e2d41664369f7b42b7fdd2307e8362 --- /dev/null +++ b/Cpp_example/E02_find_number/test_find_number.cc @@ -0,0 +1,113 @@ +#include +#include + +#include +#include +#include + +using namespace std::chrono; + +int main(int argc, char *argv[]) { + if (argc != 2) { + std::cerr << "Usage: Test-PaddleDet model_path" << std::endl; + return 1; + } + + // 初始化模型 + lockzhiner_vision_module::vision::PaddleDet model; + if (!model.Initialize(argv[1])) { + std::cout << "Failed to initialize model." << std::endl; + return 1; + } + + lockzhiner_vision_module::edit::Edit edit; + if (!edit.StartAndAcceptConnection()) { + std::cerr << "Error: Failed to start and accept connection." << std::endl; + return EXIT_FAILURE; + } + std::cout << "Device connected successfully." << std::endl; + + // 打开摄像头 + cv::VideoCapture cap; + cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); + cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + + if (!cap.open(0)) { + std::cerr << "Couldn't open video capture device" << std::endl; + return -1; + } + + cv::Mat input_mat; + + // 定义标签映射表 + const std::vector label_map = {"5", "8", "4", "3", + "7", "6", "2", "1"}; + + while (true) { + cap >> input_mat; + if (input_mat.empty()) { + std::cerr << "Warning: Captured an empty frame." << std::endl; + continue; + } + + // 推理 + high_resolution_clock::time_point start_time = high_resolution_clock::now(); + auto results = model.Predict(input_mat); + high_resolution_clock::time_point end_time = high_resolution_clock::now(); + + auto time_span = duration_cast(end_time - start_time); + std::cout << "Inference time: " << time_span.count() << " ms" << std::endl; + + // 手动绘制检测结果 + cv::Mat output_image = input_mat.clone(); // 复制原始图像用于绘制 + + for (const auto &result : results) { + int label_id = result.label_id; + float score = result.score; + cv::Rect bbox = result.box; + + // 映射 label_id 到实际标签 + std::string label = + (label_id >= 0 && label_id < static_cast(label_map.size())) + ? label_map[label_id] + : "unknown"; + + // 绘制矩形框 + cv::rectangle(output_image, bbox, cv::Scalar(0, 255, 0), 2); // 绿色框 + + // 构造显示文本 + std::string text = label + " " + cv::format("%.2f", score); + + // 设置字体 + int font_face = cv::FONT_HERSHEY_SIMPLEX; + double font_scale = 0.5; + int thickness = 1; + + // 获取文本大小 + int baseline; + cv::Size text_size = + cv::getTextSize(text, font_face, font_scale, thickness, &baseline); + + // 计算文本背景区域 + cv::Rect text_rect(bbox.x, bbox.y - text_size.height, text_size.width, + text_size.height + baseline); + text_rect &= + cv::Rect(0, 0, output_image.cols, output_image.rows); // 避免越界 + + // 填充文本背景 + cv::rectangle(output_image, text_rect, cv::Scalar(0, 255, 0), cv::FILLED); + + // 绘制文本 + cv::putText(output_image, text, cv::Point(bbox.x, bbox.y), font_face, + font_scale, + cv::Scalar::all(0), // 黑色文字 + thickness, 8); + } + + // 发送到设备显示 + edit.Print(output_image); + } + + cap.release(); + return 0; +} \ No newline at end of file diff --git a/README.md b/README.md index 7abfe57e392458704bea66acd84036182ee2459f..ee2e2d7250b539b7654d2b1be613c5965b3e6bc6 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ C++ 开发案例以A、B、C、D进行不同类别进行分类,方便初学者 * `B01 - B99`: OpenCV基础函数类 * `C01 - C99`: 传统OpenCV识别类 * `D01 - D99`: 神经网络模型类 - +* `E01 - E99`: 使用示例类 例程列表如下所示: | 编号 | 类别 | 例程名 | 说明 | @@ -208,6 +208,9 @@ C++ 开发案例以A、B、C、D进行不同类别进行分类,方便初学者 | D10 | 神经网络类 | YOLOv5 | [YOLOv5目标检测](./Cpp_example/D10_yolov5//README.md) | | D11 | 神经网络类 | PPOCRv3 | [文字识别](./Cpp_example/D11_PPOCRv3/README.md) | | D12 | 神经网络类 | PPOCRv4-Det | [文字检测](./Cpp_example/D12_ppocrv4_det/README.md) | +| D13 | 神经网络类 | target_tracking | [多目标跟踪](./Cpp_example/D13_target_tracking/README.md)| +| E01 | 使用示例类 | Test_find_Laser | [激光跟踪](./Cpp_example/E01_find_Laser/README.md)| + ## 🐛 Bug反馈 如果您遇到问题,您可以前往 [Lockzhiner Vision Module Issues](https://gitee.com/LockzhinerAI/LockzhinerVisionModule/issues) 并点击已完成按钮查看其他用户反馈且我们已经解决的 Bug。