diff --git a/samples/contribute/ACT/NNN/CMakeLists.txt b/samples/contribute/ACT/NNN/CMakeLists.txt new file mode 100755 index 0000000000000000000000000000000000000000..63f9d9ab8eabb06c7c974f7a82aab40951b8bb2d --- /dev/null +++ b/samples/contribute/ACT/NNN/CMakeLists.txt @@ -0,0 +1,7 @@ +# CMake lowest version requirement +cmake_minimum_required(VERSION 3.5.1) + +# project information +project(ACL_CLASSIFICATION_RESNET50) + +add_subdirectory("./src") diff --git a/samples/contribute/ACT/NNN/README.md b/samples/contribute/ACT/NNN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6d4b39d4f25d6a270c47e8e9ac372808c134a320 --- /dev/null +++ b/samples/contribute/ACT/NNN/README.md @@ -0,0 +1,302 @@ +# 基于ACT网络实现模仿学习 +## 概述 +ACT(Action Chunking with Transformers)是面向机器人学习场景的高性能端到端动作控制模型。相比传统模块化机器人控制模型,ACT采用轻量化Transformer架构作为核心骨干进行动作表征学习,结合多模态感知融合模块和时序动作优化网络,在控制精度和实时响应速度上均有显著提升。 + +- 参考实现: + ``` + https://gitcode.com/openeuler/lerobot_ros2/tree/master/src/lerobot/policies/act + ``` + +- 输入数据 + | 输入数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | ---------------- | ------------ | + | observation.state | FP32 | 1 x 6 | - | + | observation.images.top | RGB_FP32 | 1 x 3 x 240 x 320 | NCHW | + | observation.images.wrist | RGB_FP32 | 1 x 3 x 240 x 320 | NCHW | + +- 输出数据 + | 输出数据 | 数据类型 | 大小 | 数据排布格式 | + | -------- | -------- | ----------- | ----------- | + | feature_map_1 | FP32 | 1x100x6 | [N, action_n, motor_n] | + +## 原理介绍 +本样例涉及的关键功能点如下: +- **初始化** + - 调用`aclInit`接口初始化ACL配置; + - 调用`aclFinalize`接口实现ACL去初始化。 +- **Device管理** + - 调用`aclrtSetDevice`接口指定运算Device; + - 调用`aclrtGetRunMode`接口获取运行模式,按模式差异化处理流程; + - 调用`aclrtResetDevice`接口复位Device,回收资源。 +- **Context管理** + - 调用`aclrtCreateContext`接口创建Context; + - 调用`aclrtDestroyContext`接口销毁Context。 +- **Stream管理** + - 调用`aclrtCreateStream`接口创建Stream; + - 调用`aclrtDestroyStream`接口销毁Stream。 +- **内存管理** + - 调用`aclrtMalloc`接口申请Device内存; + - 调用`aclrtFree`接口释放Device内存。 +- **数据传输** + - 调用`aclrtMemcpy`接口通过内存复制实现数据传输。 +- **模型推理** + - 调用`aclmdlLoadFromMem`接口从`*.om`文件加载模型; + - 调用`aclmdlExecute`接口执行同步模型推理; + - 调用`aclmdlUnload`接口卸载模型。 + +## 目录结构 +样例代码结构如下: +``` +├── ACT // 项目根目录(基于ACT网络的机械臂模仿学习) +│ +├── build // 编译构建目录(存放中间文件、可执行程序) +│ +├── data // 输入数据与推理结果目录 +│ ├── observation.images.top_240_320.bin // 顶部摄像头图像数据(二进制) +│ ├── observation.images.wrist_240_320.bin // 腕部摄像头图像数据(二进制) +│ ├── observation.state_240_320.bin // 机械臂状态数据(关节角度、位姿等,二进制) +│ +├── inc // 头文件目录(存放CPP头文件) +├── model // 模型文件目录(存放OM离线模型、配置文件) +├── out // 输出目录(存放推理结果、日志) +│ +├── script // 辅助脚本目录 +│ ├── model_test.py // ACT模型推理测试脚本(验证功能、调试输入输出) +│ +├── src // 核心代码目录(CPP实现推理、数据处理逻辑) +│ ├── acl.json // ACL系统初始化配置文件(指定设备、上下文等) +│ ├── CMakeLists.txt // src目录编译脚本(定义编译规则) +│ ├── main.cpp // 主函数入口(调度:数据加载→模型推理→结果处理) +│ ├── model_process.cpp // 模型处理模块(OM加载、异步推理提交) +│ ├── result.txt // 推理结果临时存储文件(记录机械臂动作输出) +│ ├── sample_process.cpp // 样本处理模块(二进制数据加载、解析、预处理) +│ +├── utils // 工具模块目录(通用辅助功能) +│ ├── CMakeLists.txt // utils目录编译脚本 +│ +├── config.yaml // 全局配置文件(定义数据路径、模型参数、推理参数) +├── README.md // 项目说明文档(环境依赖、编译运行步骤、目录解释) +``` + +## 推理环境准备 +1. 查看芯片名称: + ```bash + cat /proc/umap/sys + # 示例回显(芯片名为SS928V100,需自行替换实际芯片名) + [SYS] Version: [SS928V100XXXXXXXXX] + ``` + +2. 环境版本配套要求: + | 芯片型号 | 算力引擎 | soc_version | 环境准备指导 | CANN包版本 | 编译工具链 | 板端OS | SDK | + | -------- | ------- | ----------- | ------------ | ---------- | ---------- | --- | ---- | + | Hi3403V100 | NNN | SS928V100 | [推理环境准备](https://gitee.com/HiSpark/modelzoo/blob/master/docs/Hi3403V100%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E6%90%AD%E5%BB%BA.md) | [5.20.t6.2.b060] | aarch64-openeuler-linux-gnu-g++ | [openEuler](https://pages.openeuler.openatom.cn/embedded/docs/build/html/master/bsp/arm64/hisilicon/hieulerpi/update.html) | SS928 V100R001C02SPC022 | + + 系统驱动安装,参考:https://gitee.com/HiSpark/ss928v100_gcc/tree/Beta-v0.9.2 + + SDK编译工具链的安装,参考:https://pages.openeuler.openatom.cn/embedded/docs/build/html/master/getting_started/index.html#install-openeuler-embedded-sdk + + +## 快速上手 +### 获取源码 +1. 克隆参考代码仓: + ```bash + git clone https://gitcode.com/openeuler/lerobot_ros2.git + cd lerobot_ros2 + ``` +2. 安装依赖:参考代码仓README文档完成环境依赖安装。 +3. 训练数据集下载:https://huggingface.co/datasets/lwh2017/grab_banana/tree/main/banana_grasp_100_320x240 + +### 模型转化 +通过PyTorch将多`.safetensors`权重文件夹转为`.onnx`文件,再用ATC工具转为`.om`离线推理模型: +1. 准备权重文件: + 模型下载链接:https://huggingface.co/datasets/lwh2017/grab_banana + ```bash + mkdir model # 创建模型目录 + # 将下载的模型权重文件夹(含.safetensors文件)放入model目录 + ``` +2. 导出ONNX文件: + ```bash + cd lerobot_ros2 + # pretrained_model:权重文件夹路径;act:模型类型 + python src/lerobot/oee/export_onnx.py ../model/pretrained_model/ act + # 移动生成的ONNX模型到model目录 + mv ./model/pretrained_model/act_ros2_simplified.onnx ../model/act_ros2_simplified.onnx + cd .. + ``` + **export_onnx.py参数说明**: + | 参数/位置参数 | 说明 | + |---------------|------| + | 位置参数1(pretrained_model) | 必选,ACT预训练权重文件夹路径(内含.safetensors文件) | + | 位置参数2(model_type) | 必选,模型类型,固定为"act" | +3. ATC工具转OM模型(Hi3403V100 SVP_NNN平台): + ```bash + # 若无数值校准bin文件,需先通过preprocess.py生成;多文件用;分隔 + atc --model="./act_ros2_simplified.onnx" \ + --framework="5" \ + --input_format="NCHW" \ + --save_original_model="true" \ + --output="./act_ros2_simplified" \ + --soc_version=OPTG \ + --release=0 + ``` + 成功后生成`act_ros2_simplified.om`文件,通过以下命令,将模型进行重命名,供main文件加载。 + ``` + mv ./model/act_ros2_simplified.om ./model/act_distill_fp32_for_mindcmd_simp_release.om + ``` + **ATC命令核心参数说明**: + | 参数 | 说明 | + |------|------| + | --model | 必选,待转换的ONNX模型文件路径 | + | --framework | 必选,原始框架类型(5=ONNX) | + | --output | 必选,输出OM模型的路径(无需后缀) | + | --image_list | 必选,量化校准数据路径,格式为“输入名:文件路径;输入名:文件路径” | + | --soc_version | 必选,处理器型号(如SS928V100) | +| + 注意:若找不到atc命令,参考“推理环境准备”配置环境。 + +### 模型推理 +#### 步骤1:编译代码 +1. 创建编译目录: + ```bash + mkdir -p build + cd build + ``` +2. 生成编译文件(交叉编译示例:X86→ARM): + ```bash + cmake ../src -Dtarget=board -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=aarch64-mix210-linux-g++ -DCMAKE_C_COMPILER=/usr/bin/cc -DCMAKE_SKIP_RPATH=TRUE -DCMAKE_CXX_FLAGS="-I/home/Ascend/ascend-toolkit/5.20.t6.2.b060/arm64-lmixlinux200/aarch64-linux/include" -DCMAKE_CXX_LINK_FLAGS="-L/home/Ascend/ascend-toolkit/5.20.t6.2.b060/arm64-lmixlinux200/aarch64-linux/devlib -lascendcl -lpthread -ldl" -DCMAKE_CXX_COMPILER_WORKS=1 + ``` + | 参数 | 说明 | + |------|------| + |-Dtarget=board |必选,指定编译目标为板端运行 + |-DCMAKE_BUILD_TYPE=Release |可选,编译模式(Release = 生产模式,Debug = 调试模式) + |-DCMAKE_CXX_COMPILER=aarch64-mix210-linux-gnu-g++ | 必选,指定 C++ 交叉编译工具链为 aarch64-mix210-linux-gnu-g++ + |-DCMAKE_C_COMPILER=/usr/bin/cc | 必选,指定 C 语言编译器路径为系统默认的 /usr/bin/cc + |-DCMAKE_SKIP_RPATH=TRUE |可选,禁用运行时库路径(RPATH)的生成,避免编译产物依赖特定库路径 + |-DCMAKE_CXX_FLAGS="-I/home/Ascend/ascend-toolkit/5.20.t6.2.b060/arm64-lmixlinux200/aarch64-linux/include" |必选,C++ 编译选项:添加 Ascend(昇腾)工具链的头文件搜索路径 + |-DCMAKE_CXX_LINK_FLAGS="-L/home/Ascend/ascend-toolkit/5.20.t6.2.b060/arm64-lmixlinux200/aarch64-linux/devlib -lascendcl -lpthread -ldl" | 必选,C++ 链接 + |-DCMAKE_CXX_COMPILER_WORKS=1 |必选,强制指定 C++ 编译器可用 + +3. 编译生成可执行文件: + ```bash + make # 生成的main在./out目录 + ``` + +#### 步骤2:运行推理应用 +在运行环境(板端)通过`model_test.py`调用C++可执行文件,完成数据预处理、推理、结果解析: +1. 部署文件:将样例目录上传至运行环境(Host),如`$HOME/ACT`; +2. 授权可执行文件: + ```bash + cd $HOME/ACT/out + chmod +x main + ``` +3. 配置Python环境: + ```bash + # 创建并激活conda虚拟环境 + conda create -n act_om python=3.8 -y + conda activate act_om + # 安装依赖 + pip install numpy==1.24.3 + ``` +4. 准备推理数据:将输入`.bin`文件放入`./data`目录; +5. 运行推理脚本: + ```bash + python model_test.py --image_list "bin文件路径1;bin文件路径2;..." + ``` + **model_test.py参数说明**: + | 参数 | 类型 | 必选 | 说明 | + |------|------|------|------| + | --image_list | str | 是 | 以分号分隔的bin文件路径列表,顺序需与模型输入(state/top/wrist)对应 | +6. 推理结果:保存于`./result.txt`。 + +#### 步骤3:验证精度和性能 +1. 精度验证: + - 测试方法 + 1. 数据准备:选取20个有效测试样本,构建标准化输入批次(batch_path: ../ACT/data/batches.json); + 2. 基准推理:通过PyTorch原生框架加载ACT模型,完成20个样本的推理,保存首个1×6 action向量作为基准值; + 3. OM推理:通过昇腾OM模型加载器执行相同20个样本的推理,提取首个1×6 action向量; + 4. 误差计算:逐样本计算OM输出与PyTorch输出的L1 Loss,统计平均值、极值等关键指标。 + - 开发环境生成目标动作: + ```bash + cd lerobot_ros2 + python ./src/lerobot/oee/ascend/utils/loss_compare.py \ + --device cpu --generate-target \ + --batch_path ../ACT/data/batches.json \ + --target_path ../ACT/data/target.json \ + --policy_path_act model/pretrained_model/ + ``` + - 运行环境对比误差: + ```bash + cd lerobot_ros2 + python ./src/lerobot/oee/ascend/utils/loss_compare.py \ + --device cpu \ + --batch_path ../ACT/data/batches.json \ + --target_path ../ACT/data/target.json \ + --policy_path_act model/pretrained_model/ + ``` + **loss_compare.py核心参数说明**: + | 参数 | 类型 | 必选 | 说明 | + |------|------|------|------| + | --device | str | 是 | 运行设备,可选"cpu"/"cuda"/"npu" | + | --generate-target | 开关 | 否 | 若指定,基于PyTorch模型生成目标动作并保存至target_path | + | --batch_path | str | 是 | 输入batches.json文件路径(机械臂观测数据) | + | --target_path | str | 是 | 目标动作文件(target.json)路径,用于精度对比 | + | --policy_path_act | str | 是 | ACT预训练模型权重文件夹路径 | + + NNN平台精度结果:(根据实际测试补充) + + 1. 测试对象 + - 模型类型:ACT动作预测模型,推理输出维度为`1×100×6`的动作矩阵(对应100个动作步,每个步为6维动作向量); + - 对比维度:取模型输出矩阵中首个`1×6`的action向量作为核心对比对象(实际部署中优先执行该向量); + - 测试样本:共20个独立测试样本,覆盖不同观测输入场景。 + + 2. 测试指标 + 采用**L1 Loss(平均绝对误差)** 作为精度差异量化指标,计算公式为: + $$L1_{loss} = \sum_{i=1}^6 |a_{OM,i} - a_{PyTorch,i}|$$ + 其中,$a_{OM,i}$ 为OM模型输出的1×6向量第$i$维值,$a_{PyTorch,i}$ 为PyTorch原生模型输出的1×6向量第$i$维值。 + + 3. 测试结果 + - 单样本L1 Loss明细 + + | 样本序号 | L1 Loss值 | 样本序号 | L1 Loss值 | + |----------|-----------------|----------|-----------------| + | 0 | 0.000008265178 | 10 | 0.000013351440 | + | 1 | 0.000010410945 | 11 | 0.000003337860 | + | 2 | 0.000002702077 | 12 | 0.000007390976 | + | 3 | 0.000007867813 | 13 | 0.000007947286 | + | 4 | 0.000006357829 | 14 | 0.000004688899 | + | 5 | 0.000006675720 | 15 | 0.000004847845 | + | 6 | 0.000008344650 | 16 | 0.000008821487 | + | 7 | 0.000011364619 | 17 | 0.000007947286 | + | 8 | 0.000008583069 | 18 | 0.000002940496 | + | 9 | 0.000006198883 | 19 | 0.000000715256 | + + - 统计指标 + + | 统计项 | 数值 | + |--------------|-----------------| + | 平均L1 Loss | 0.000006937981 | + | 最小L1 Loss | 0.000000715256(样本 19)| + | 最大L1 Loss | 0.000013351440(样本 10)| + | 中位数L1 Loss| 0.000007029395 | + | 标准差 | 0.0000028943| + +2. 性能验证 + ```bash + cd lerobot_ros2 + python ./src/lerobot/oee/ascend/utils/loss_compare.py \ + --device cpu --generate-target \ + --batch_path ../ACT/data/batches.json \ + --target_path ../ACT/data/target.json \ + --policy_path_act model/pretrained_model/ + ``` + + **性能验证说明** + 脚本运行时会输出多维度性能指标和数据规格信息,用于全面评估ACT模型推理性能,核心信息如下: + 1. **输入数据规格**(单次推理输入): + - 共3路输入数据,具体维度/类型/大小: + - 输入0:形状`(1, 6)`,float32类型,元素数6,字节数24; + - 输入1:形状`(1, 3, 240, 320)`(3通道240×320图像),float32类型,元素数230400,字节数921600; + - 输入2:形状`(1, 3, 240, 320)`(3通道240×320图像),float32类型,元素数230400,字节数921600; + 1. **推理耗时指标**: + - 模型核心推理时间:单次推理约**2.69秒**(仅模型前向计算耗时,不含数据传输/解析); + - 端到端推理时间:单次推理约**8秒**(含数据打包、C++进程通信、输出解析、张量转换全流程耗时),可通过流水线设计增加推理吞吐; diff --git a/samples/contribute/ACT/NNN/inc/model_process.h b/samples/contribute/ACT/NNN/inc/model_process.h new file mode 100644 index 0000000000000000000000000000000000000000..3f7ffcc56596e0111c7b0e9404a5368cfe0c62a3 --- /dev/null +++ b/samples/contribute/ACT/NNN/inc/model_process.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2026 Huawei Technologies Co., Ltd + * This file is part of [Hispark/modelzoo]. + * + * [Hispark/modelzoo] is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, version 3 of the License only. + * + * [Hispark/modelzoo] is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with [Hispark/modelzoo]. If not, see . + */ +#ifndef MODEL_PROCESS_H +#define MODEL_PROCESS_H + +#include +#include +#include "utils.h" +#include "acl/acl.h" + +class ModelProcess { +public: + /** + * Function: Class constructor + * Description: Initialize a ModelProcess instance with default resource states + */ + ModelProcess(); + + /** + * Function: Class destructor + * Description: Release all allocated resources and destroy the ModelProcess instance + */ + ~ModelProcess(); + + /** + * Function: Load model file into memory and initialize model resources + * Input: modelPath - full file path of the target model file (including file name and extension) + * Return: Result status code (success if the code indicates normal execution, failure otherwise) + */ + Result LoadModelFromFileWithMem(const std::string& modelPath); + + /** + * Function: Unload loaded model and release associated memory + * Description: Clear model-related resources without returning any status + */ + void Unload(); + + /** + * Function: Initialize input dataset for model inference + * Return: Result status code (success/failure of initialization) + */ + Result InitInput(); + + /** + * Function: Create model description structure + * Description: Build and initialize the model descriptor to store model metadata + * Return: Result status code (success/failure of descriptor creation) + */ + Result CreateDesc(); + + /** + * Function: Destroy model description structure + * Description: Release memory occupied by the model descriptor and clear related data + */ + void DestroyDesc(); + + /** + * Function: Create input buffer for model inference + * Input: inputDataBuffer - pointer to the host/device buffer containing input data + * Input: bufferSize - total byte size of the inputDataBuffer + * Input: stride - alignment step size for input data processing + * Return: Result status code (success/failure of input buffer creation) + */ + Result CreateInput(void *inputDataBuffer, size_t bufferSize, int stride); + + /** + * Function: Create input buffer from a specified file + * Input: filePath - full path of the file containing input data + * Return: Result status code (success/failure of buffer creation) + */ + Result CreateInputBuf(const std::string& filePath); + + /** + * Function: Create task buffer and work buffer for model execution + * Return: Result status code (success/failure of buffer allocation) + */ + Result CreateTaskBufAndWorkBuf(); + + /** + * Function: Release all resources related to model input + * Description: Free input buffers and clear input dataset configuration + */ + void DestroyInput(); + + /** + * Function: Allocate output buffer to store model inference results + * Return: Result status code (success/failure of output buffer creation) + */ + Result CreateOutput(); + + /** + * Function: Release all resources related to model output + * Description: Free output buffers and clear inference result data + */ + void DestroyOutput(); + + /** + * Function: Execute model inference with prepared input data + * Return: Result status code (success/failure of inference execution) + */ + Result Execute(); + + /** + * Function: Export model inference output results to a file + * Description: Write output buffer data to a local file (read-only operation, no state change) + */ + void DumpModelOutputResult() const; + + /** + * Function: Retrieve and display model inference output results + * Description: Extract data from output buffer and present it (read-only operation, no state change) + */ + void OutputModelResult() const; + + /** + * Function: Create dedicated buffer for specified input/output index + * Input: index - target index of the input/output buffer (starting from 0) + * Return: Result status code (success/failure of buffer creation) + */ + Result CreateBuf(int index); + + // Result GetInputStrideParam(int index, size_t& bufSize, size_t& stride, aclmdlIODims& dims) const; + + /** + * Function: Get stride parameters of specified output index + * Input: index - target index of the output buffer (starting from 0) + * Output: bufSize - byte size of the output buffer (output parameter) + * Output: stride - alignment step size of the output data (output parameter) + * Output: dims - dimension information of the output model I/O (output parameter) + * Return: Result status code (success/failure of parameter retrieval) + */ + Result GetOutputStrideParam(int index, size_t& bufSize, size_t& stride, aclmdlIODims& dims) const; + + /** + * Function: Get data size of specified input index + * Input: index - target index of the input buffer (starting from 0) + * Return: Total byte size of the input buffer at the specified index + */ + size_t GetInputDataSize(int index) const; + + /** + * Function: Get data size of specified output index + * Input: index - target index of the output buffer (starting from 0) + * Return: Total byte size of the output buffer at the specified index + */ + size_t GetOutputDataSize(int index) const; + + /** + * Function: Get the total number of model input nodes + * Return: Count of input nodes (non-negative integer) + */ + size_t GetInputNum() const; + + /** + * Function: Get stride parameters of specified input index + * Input: index - target index of the input buffer (starting from 0) + * Output: buf_size - byte size of the input buffer (output parameter) + * Output: stride - alignment step size of the input data (output parameter) + * Output: dims - dimension information of the input model I/O (output parameter) + * Return: Result status code (success/failure of parameter retrieval) + */ + Result GetInputStrideParam(int index, size_t& buf_size, size_t& stride, aclmdlIODims& dims) const; + + /** + * Function: Create model input buffer from raw data + * Input: data - pointer to the raw input data buffer + * Input: data_size - total byte size of the raw data buffer + * Return: Result status code (success/failure of input buffer creation) + */ + Result CreateInputFromData(const void* data, size_t data_size); + + /** + * Function: Create model input buffers from multiple raw data sources + * Input: input_datas - vector of pointers to multiple raw input data buffers + * Input: input_sizes - vector of byte sizes corresponding to each input data buffer + * Return: Result status code (success/failure of multi-input buffer creation) + */ + Result CreateInputFromData(const std::vector& input_datas, + const std::vector& input_sizes); + + /** + * Function: Release all model-related resources + * Description: Unified release of input/output buffers, model descriptor, task/work buffers, etc. + */ + void DestroyResource(); + +private: + void WriteOutput(const std::string& outputFileName, size_t index) const; + + Result ClearOutputStrideInvalidBuf(std::vector& buffer, size_t index) const; + + uint32_t executeNum_ { 0 }; + uint32_t modelId_ { 0 }; + size_t modelMemSize_ { 0 }; + size_t modelWeightSize_ { 0 }; + void *modelMemPtr_ { nullptr }; + void *modelWeightPtr_ { nullptr }; + bool loadFlag_ { false }; + aclmdlDesc *modelDesc_ { nullptr }; + aclmdlDataset *input_ { nullptr }; + aclmdlDataset *output_ { nullptr }; +}; + +#endif // MODEL_PROCESS_H diff --git a/samples/contribute/ACT/NNN/inc/sample_process.h b/samples/contribute/ACT/NNN/inc/sample_process.h new file mode 100644 index 0000000000000000000000000000000000000000..fc5b549b59b58d0d22650e241267711afbea35b8 --- /dev/null +++ b/samples/contribute/ACT/NNN/inc/sample_process.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2026 Huawei Technologies Co., Ltd + * This file is part of [Hispark/modelzoo]. + * + * [Hispark/modelzoo] is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, version 3 of the License only. + * + * [Hispark/modelzoo] is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with [Hispark/modelzoo]. If not, see . + */ + +#ifndef SAMPLE_PROCESS_H +#define SAMPLE_PROCESS_H + +#include +#include "utils.h" +#include "acl/acl.h" +#include "model_process.h" + +/** +* Class: SampleProcess +* Description: Core processing class for model inference sample, manages input data, model loading and resource lifecycle +*/ +class SampleProcess { +public: + /** + * Function: Class constructor + * Description: Initialize a SampleProcess instance with default values for all member variables + */ + SampleProcess(); + + /** + * Function: Class destructor + * Description: Release allocated resources and reset member variable states when instance is destroyed + */ + ~SampleProcess(); + + /** + * Function: Initialize system and model inference related resources + * Description: Set up device context, stream and basic runtime environment + * Return: Result status code (success if the code indicates normal execution, failure otherwise) + */ + Result InitResource(); + + /** + * Function: Execute the full process of sample inference + * Description: Orchestrate input data preparation, model loading and inference execution + * Return: Result status code (success/failure of the entire inference process) + */ + Result Process(); + // void DestroyResource(); + + /** + * Function: Set the file path of the input data source + * Input: path - full file path of the input data file (including file name and extension) + */ + void SetInputPath(const std::string& path); + std::string input_path_; // Member variable: Stores the full file path of the input data for inference + + const char* input_data_ = nullptr; // Member variable: Pointer to the binary input data buffer (nullptr by default) + size_t input_data_size_ = 0; // Member variable: Total byte size of the binary input data buffer (0 by default) + + /** + * Function: Set multiple input data sources for multi-input model inference + * Input: input_datas - Vector of pointers to multiple binary input data buffers + * Input: input_sizes - Vector of byte sizes corresponding to each input data buffer + */ + void SetInputDatas(const std::vector& input_datas, + const std::vector& input_sizes); + std::vector input_datas_; // Member variable: Stores pointers to multiple input data buffers + std::vector input_sizes_; // Member variable: Stores byte sizes of corresponding multiple input data buffers + + /** + * Function: Load pre-configured model file into memory + * Description: Initialize model resources and prepare for inference execution + * Return: Result status code (success/failure of model loading) + */ + Result LoadModel(); + + /** + * Function: Release all allocated resources + * Description: Unified release of device context, stream, model resources and input/output buffers + */ + void DestroyResource(); + +private: + int32_t deviceId_ { 0 }; // Member variable: ID of the target computing device (default to 0) + aclrtContext context_ { nullptr }; // Member variable: Runtime context of the computing device (nullptr by default) + aclrtStream stream_ { nullptr }; // Member variable: Runtime stream for asynchronous execution (nullptr by default) + + bool isInited_ = false; // Member variable: Flag indicating if resource initialization is completed (false by default) + ModelProcess modelProcess_; // Member variable: Instance of ModelProcess class (used for model management, not local variable) + bool isModelLoaded_ = false; // Member variable: Flag indicating if model loading is completed (false by default) +}; + +/** +* Function: Friend function to set input file path for SampleProcess instance +* Description: Directly access the input_path_ member variable of SampleProcess (no need to add new Set method) +* Input: sample - Reference to the target SampleProcess instance +* Input: path - Full file path to be set for input data +*/ +inline void set_input_path(SampleProcess& sample, const std::string& path) { + sample.input_path_ = path; // Assign input file path to the member variable of the SampleProcess instance +} + +#endif // SAMPLE_PROCESS_H diff --git a/samples/contribute/ACT/NNN/inc/utils.h b/samples/contribute/ACT/NNN/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ff58c760a46c60549db6263c4c66987188bd3add --- /dev/null +++ b/samples/contribute/ACT/NNN/inc/utils.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2026 Huawei Technologies Co., Ltd + * This file is part of [Hispark/modelzoo]. + * + * [Hispark/modelzoo] is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, version 3 of the License only. + * + * [Hispark/modelzoo] is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with [Hispark/modelzoo]. If not, see . + */ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include "acl/acl.h" +#include "acl/acl_mdl.h" + +#define INFO_LOG(fmt, ...) fprintf(stdout, "[INFO] " fmt "\n", ##__VA_ARGS__) +#define WARN_LOG(fmt, ...) fprintf(stdout, "[WARN] " fmt "\n", ##__VA_ARGS__) +#define ERROR_LOG(fmt, ...) fprintf(stdout, "[ERROR] " fmt "\n", ##__VA_ARGS__) + +#ifdef _WIN32 +#define S_ISREG(m) (((m) & 0170000) == (0100000)) +#endif + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +class Utils { +public: + /** + * Function: Allocate and initialize device-side buffer using data from a specified file + * Input: fileName - full path/name of the input file + * Input: dims - dimension information of the model I/O data + * Input: stride - step size for data alignment + * Input: dataSize - total size of the data to be read + * Return: Pointer to the allocated device buffer (nullptr if failed) + */ + static void* GetDeviceBufferOfFile(const std::string& fileName, const aclmdlIODims& dims, + size_t stride, size_t dataSize); + + /** + * Function: Read binary file content into a memory buffer + * Input: fileName - path and name of the binary file to read + * Output: fileSize - total byte size of the read file (output parameter) + * Return: Pointer to the host-side buffer containing file data (nullptr if failed) + */ + static void* ReadBinFile(const std::string& fileName, uint32_t& fileSize); + + /** + * Function: Get the total byte size of a specified file + * Input: fileName - path and name of the target file + * Output: fileSize - byte size of the file (output parameter) + * Return: Result code (success/failure status) + */ + static Result GetFileSize(const std::string& fileName, uint32_t& fileSize); + + /** + * Function: Read binary file with stride alignment and fill to specified dimension + * Input: fileName - path and name of the binary file + * Input: dims - dimension configuration of the model I/O + * Input: stride - alignment step size for data processing + * Input: dataSize - expected total size of the output buffer + * Return: Pointer to the aligned data buffer (nullptr if failed) + */ + static void* ReadBinFileWithStride(const std::string& fileName, const aclmdlIODims& dims, + size_t stride, size_t dataSize); + + /** + * Function: Initialize int8_t type data buffer with default values + * Input: data - pointer to the int8_t data buffer to initialize + * Input: dataSize - total size of the data buffer in bytes + * Note: This function will overwrite all existing data in the buffer + */ + static void InitData(int8_t* data, size_t dataSize); +}; + +#endif diff --git a/samples/contribute/ACT/NNN/script/model_test.py b/samples/contribute/ACT/NNN/script/model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..272ad47bd00a2b9580c378be63323f2865ffdc78 --- /dev/null +++ b/samples/contribute/ACT/NNN/script/model_test.py @@ -0,0 +1,137 @@ +""" +ACTWrapper.py + +加载 act om 模型并推理 +""" + +import numpy as np +import subprocess +import os +import re +import struct +import json +import sys +import time +import argparse + + +class ACT3403Policy: + def __init__(self, cpp_executable): + super().__init__() + self.cpp_executable = cpp_executable + self.cpp_dir = os.path.dirname(cpp_executable) # 获取二进制所在目录 + # self.cpp_process = self.model_init() + + def predict(self, batch) -> tuple: + input_arr = batch + start_time = time.perf_counter() + cpp_outputs = self.run_cpp_and_get_float_output(input_arr) + + if cpp_outputs is not None: + data = cpp_outputs.get(2) + if data is not None: + action = [] + for i in range(0, len(data), 8): + action.append(data[i:i+6]) + action = np.array(action, dtype=np.float32) + action = action.reshape([1, 100, 6]) + return action + return None + + def run_cpp_and_get_float_output(self, input_arrays): + # 启动C++进程,通过管道通信 + process = subprocess.Popen( + self.cpp_executable, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=False, # 二进制模式通信 + cwd=self.cpp_dir + ) + + try: + # 发送输入数据 + for i, arr in enumerate(input_arrays): + data_bytes = arr.tobytes() # 获取原始二进制数据 + data_size = struct.pack(' +#include +#include "sample_process.h" +#include "utils.h" +#include +#include +using namespace std; + +int main() { + // 初始化推理环境(只执行一次) + SampleProcess sample; + if (sample.InitResource() != SUCCESS) { + cerr << "Init resource failed" << endl; + return -1; + } + + // 加载模型(只执行一次) + if (sample.LoadModel() != SUCCESS) { + cerr << "Load model failed" << endl; + sample.DestroyResource(); + return -1; + } + + // 循环处理多次输入 + while (true) { + vector input_datas; + vector input_sizes; + const int INPUT_COUNT = 3; + + // 读取输入数据(保持原有逻辑) + bool readSuccess = true; + for (int i = 0; i < INPUT_COUNT; ++i) { + uint32_t data_size; + cin.read(reinterpret_cast(&data_size), sizeof(data_size)); + if (!cin.good()) { + cerr << "Read input " << i << " size failed" << endl; + readSuccess = false; + break; + } + + void* data = nullptr; + aclError ret = aclrtMalloc(&data, data_size, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_SUCCESS || data == nullptr) { + cerr << "Malloc buffer for input " << i << " failed" << endl; + readSuccess = false; + break; + } + + cin.read(reinterpret_cast(data), data_size); + if (!cin.good()) { + cerr << "Read input " << i << " data failed" << endl; + aclrtFree(data); + readSuccess = false; + break; + } + + input_datas.push_back(data); + input_sizes.push_back(data_size); + } + + // 检查是否读取失败(比如到达输入末尾) + if (!readSuccess) { + // 释放已分配的内存 + for (auto ptr : input_datas) aclrtFree(const_cast(ptr)); + break; + } + + // 设置输入并执行推理 + sample.SetInputDatas(input_datas, input_sizes); + if (sample.Process() != SUCCESS) { + cerr << "Inference failed" << endl; + } else { + cout << "3-input inference success" << endl; // 注意这里修正了原代码的数字错误(5->3) + } + + // 释放当前批次的输入内存 + for (auto data : input_datas) aclrtFree(const_cast(data)); + } + + // 最后释放所有资源 + sample.DestroyResource(); + return 0; +} diff --git a/samples/contribute/ACT/NNN/src/model_porcess.cpp b/samples/contribute/ACT/NNN/src/model_porcess.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c850fe5785f6cd1f60de1894c42581bff2f490ba --- /dev/null +++ b/samples/contribute/ACT/NNN/src/model_porcess.cpp @@ -0,0 +1,744 @@ +/** +* Copyright (c) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at + +* http://www.apache.org/licenses/LICENSE-2.0 + +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "model_process.h" + +#include +#include +#include +#include +#include "utils.h" + +using namespace std; + +static const int BYTE_BIT_NUM = 8; // 1 byte = 8 bit +static const size_t FIXED_STRIDE = 256; // 固定stride为256字节 +static const size_t LAST_DIM = 8; // 每个对齐块的有效float数 +static const size_t TOTAL_EFFECTIVE_FLOAT = 800; // 总有效float数 + +ModelProcess::ModelProcess() +{ +} + +// 析构函数:对象销毁时自动调用,释放模型相关所有资源 +ModelProcess::~ModelProcess() +{ + Unload(); // 卸载模型 + DestroyDesc(); // 销毁模型描述信息 + DestroyInput(); // 销毁模型输入数据集 + DestroyOutput(); // 销毁模型输出数据集 +} + +// 手动销毁模型资源接口,功能与析构函数一致 +void ModelProcess::DestroyResource() +{ + Unload(); // 卸载模型 + DestroyDesc(); // 销毁模型描述信息 + DestroyInput(); // 销毁模型输入数据集 + DestroyOutput(); // 销毁模型输出数据集 +} + +// 从文件加载模型到内存,并加载到昇腾AI处理器 +// modelPath:模型文件路径 +Result ModelProcess::LoadModelFromFileWithMem(const std::string& modelPath) +{ + uint32_t fileSize = 0; // 定义变量存储模型文件大小 + modelMemPtr_ = Utils::ReadBinFile(modelPath, fileSize); // 读取二进制模型文件到内存 + modelMemSize_ = fileSize; // 记录模型内存大小 + + // 从内存中加载模型到AI处理器,获取模型ID + aclError ret = aclmdlLoadFromMem(static_cast(modelMemPtr_), modelMemSize_, &modelId_); + if (ret != ACL_SUCCESS) { // 判断模型加载是否失败 + aclrtFree(modelMemPtr_); // 加载失败,释放已申请的模型内存 + ERROR_LOG("load model from file failed, model file is %s", modelPath.c_str()); // 打印错误日志 + return FAILED; // 返回失败状态 + } + + loadFlag_ = true; // 模型加载成功,置位加载标志 + INFO_LOG("load model %s success", modelPath.c_str()); // 打印成功日志 + return SUCCESS; // 返回成功状态 +} + +// 创建模型描述信息,用于获取模型输入输出等属性 +Result ModelProcess::CreateDesc() +{ + // 创建模型描述句柄 + modelDesc_ = aclmdlCreateDesc(); + if (modelDesc_ == nullptr) { // 判断创建是否失败 + ERROR_LOG("create model description failed"); // 打印错误日志 + return FAILED; // 返回失败状态 + } + + // 根据模型ID,获取模型详细描述信息 + aclError ret = aclmdlGetDesc(modelDesc_, modelId_); + if (ret != ACL_SUCCESS) { // 判断获取描述信息是否失败 + ERROR_LOG("get model description failed"); // 打印错误日志 + return FAILED; // 返回失败状态 + } + + INFO_LOG("create model description success"); // 打印成功日志 + return SUCCESS; // 返回成功状态 +} + +// 销毁模型描述信息,释放资源 +void ModelProcess::DestroyDesc() +{ + if (modelDesc_ != nullptr) { // 判断模型描述句柄是否存在 + (void)aclmdlDestroyDesc(modelDesc_); // 销毁模型描述 + modelDesc_ = nullptr; // 指针置空,防止野指针 + } +} + +// 初始化模型输入数据集(只创建空数据集,不添加数据) +Result ModelProcess::InitInput() +{ + // 创建ACL数据集,用于存放模型输入 + input_ = aclmdlCreateDataset(); + if (input_ == nullptr) { // 判断创建是否失败 + ERROR_LOG("can't create dataset, create input failed"); // 打印错误日志 + return FAILED; // 返回失败状态 + } + return SUCCESS; // 返回成功状态 +} + +// 为模型输入数据集添加具体数据 +// inputDataBuffer:输入数据内存地址 +// bufferSize:输入数据大小 +Result ModelProcess::CreateInput(void *inputDataBuffer, size_t bufferSize, int) +{ + // 根据输入数据和大小,创建ACL数据缓冲 + aclDataBuffer* inputData = aclCreateDataBuffer(inputDataBuffer, bufferSize); + if (inputData == nullptr) { // 判断创建缓冲是否失败 + ERROR_LOG("can't create data buffer, create input failed"); // 打印错误日志 + return FAILED; // 返回失败状态 + } + + // 将数据缓冲添加到输入数据集 + aclError ret = aclmdlAddDatasetBuffer(input_, inputData); + if (ret != ACL_SUCCESS) { // 判断添加是否失败 + ERROR_LOG("add input dataset buffer failed"); // 打印错误日志 + aclDestroyDataBuffer(inputData); // 销毁已创建的数据缓冲 + inputData = nullptr; // 指针置空 + return FAILED; // 返回失败状态 + } + + return SUCCESS; // 返回成功状态 +} + +// 获取模型指定输入端口的数据类型大小(单位:字节) +// index:输入端口索引 +size_t ModelProcess::GetInputDataSize(int index) const +{ + // 获取模型输入数据类型 + aclDataType dataType = aclmdlGetInputDataType(modelDesc_, index); + // 计算数据类型占用字节数并返回 + return aclDataTypeSize(dataType) / BYTE_BIT_NUM; +} + +// 获取模型指定输出端口的数据类型大小(单位:字节) +// index:输出端口索引 +size_t ModelProcess::GetOutputDataSize(int index) const +{ + // 获取模型输出数据类型 + aclDataType dataType = aclmdlGetOutputDataType(modelDesc_, index); + // 计算数据类型占用字节数并返回 + return aclDataTypeSize(dataType) / BYTE_BIT_NUM; +} + +// 获取模型输出的步长、缓冲区大小、维度信息 +// index:输出端口索引 +// bufSize:输出参数,存储输出缓冲区大小 +// stride:输出参数,存储输出步长 +// dims:输出参数,存储输出维度信息 +Result ModelProcess::GetOutputStrideParam(int index, size_t& bufSize, size_t& stride, aclmdlIODims& dims) const +{ + // 获取模型指定输出端口的维度信息 + aclError ret = aclmdlGetOutputDims(modelDesc_, index, &dims); + if (ret != ACL_SUCCESS) { + ERROR_LOG("aclmdlGetOutputDims error!"); + return FAILED; + } + + // 固定stride为256 + stride = FIXED_STRIDE; + + // 获取模型指定输出端口的缓冲区大小 + bufSize = aclmdlGetOutputSizeByIndex(modelDesc_, index); + if (bufSize == 0) { + ERROR_LOG("aclmdlGetOutputSizeByIndex error!"); + return FAILED; + } + return SUCCESS; +} + +// 创建模型输出数据集,并为每个输出分配内存 +Result ModelProcess::CreateOutput() +{ + // 创建输出数据集 + output_ = aclmdlCreateDataset(); + if (output_ == nullptr) { + ERROR_LOG("can't create dataset, create output failed"); + return FAILED; + } + + // 获取模型输出端口数量 + size_t outputSize = aclmdlGetNumOutputs(modelDesc_); + // 遍历所有输出端口,依次创建输出内存和数据缓冲 + for (size_t i = 0; i < outputSize; ++i) { + size_t stride = FIXED_STRIDE; // 使用固定步长 + + if (stride == 0) { + ERROR_LOG("Error, output default stride is %lu.", stride); + return FAILED; + } + + // 获取当前输出端口需要的内存大小 + size_t bufferSize = aclmdlGetOutputSizeByIndex(modelDesc_, i); + if (bufferSize == 0) { + ERROR_LOG("Error, output size is %lu.", bufferSize); + return FAILED; + } + + void *outputBuffer = nullptr; + // 在设备上申请输出内存 + aclError ret = aclrtMalloc(&outputBuffer, bufferSize, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_SUCCESS) { + ERROR_LOG("can't malloc buffer, size is %zu, create output failed", bufferSize); + return FAILED; + } + // 初始化输出内存数据 + Utils::InitData(static_cast(outputBuffer), bufferSize); + + // 根据输出内存创建数据缓冲 + aclDataBuffer* outputData = aclCreateDataBuffer(outputBuffer, bufferSize); + if (outputData == nullptr) { + ERROR_LOG("can't create data buffer, create output failed"); + aclrtFree(outputBuffer); // 创建失败,释放已申请内存 + return FAILED; + } + + // 将输出数据缓冲添加到输出数据集 + ret = aclmdlAddDatasetBuffer(output_, outputData); + if (ret != ACL_SUCCESS) { + ERROR_LOG("can't add data buffer, create output failed"); + aclrtFree(outputBuffer); // 释放内存 + aclDestroyDataBuffer(outputData); // 销毁数据缓冲 + return FAILED; + } + } + + INFO_LOG("create model output success"); // 输出创建成功日志 + return SUCCESS; +} + +Result ModelProcess::ClearOutputStrideInvalidBuf(std::vector& buffer, size_t index) const +{ + size_t bufSize = 0; + size_t bufStride = 0; + aclmdlIODims dims; + aclError ret = GetOutputStrideParam(index, bufSize, bufStride, dims); + if (ret != SUCCESS) { + ERROR_LOG("Error, GetOutputStrideParam failed"); + return FAILED; + } + if ((bufSize == 0) || (bufStride == 0)) { + ERROR_LOG("Error, bufSize(%zu) bufStride(%zu) invalid", bufSize, bufStride); + return FAILED; + } + if ((dims.dimCount == 0) || (dims.dims[dims.dimCount - 1] <= 0)) { + ERROR_LOG("Error, dims para invalid"); + return FAILED; + } + int64_t lastDim = dims.dims[dims.dimCount - 1]; + + size_t dataSize = GetOutputDataSize(index); + if (dataSize == 0) { + ERROR_LOG("Error, dataSize == 0 invalid"); + return FAILED; + } + size_t lastDimSize = dataSize * lastDim; + size_t loopNum = bufSize / bufStride; + size_t invalidSize = bufStride - lastDimSize; + if (invalidSize == 0) { + return SUCCESS; + } + + for (size_t i = 0; i < loopNum; ++i) { + size_t offset = bufStride * i + lastDimSize; + int8_t* ptr = &buffer[offset]; + for (size_t idx = 0; idx < invalidSize; idx++) { + ptr[idx] = 0; + } + } + return SUCCESS; +} + +void ModelProcess::WriteOutput(const string& outputFileName, size_t index) const +{ + aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, index); + if (dataBuffer == nullptr) { + ERROR_LOG("output[%zu] dataBuffer nullptr invalid", index); + return; + } + int8_t* outData = (int8_t*)aclGetDataBufferAddr(dataBuffer); + size_t outSize = aclGetDataBufferSize(dataBuffer); + if (outData == nullptr || outSize == 0) { + ERROR_LOG("output[%zu] data or size(%zu) invalid", index, outSize); + return; + } + + std::vector effectiveData; + effectiveData.reserve(TOTAL_EFFECTIVE_FLOAT); + float* floatData = reinterpret_cast(outData); + const size_t BLOCK_STEP = FIXED_STRIDE / sizeof(float); // 每个块跳64个float位置 + + size_t validCount = 0; + size_t blockIndex = 0; + while (validCount < TOTAL_EFFECTIVE_FLOAT) { + size_t blockStart = blockIndex * BLOCK_STEP; + for (size_t j = 0; j < LAST_DIM && validCount < TOTAL_EFFECTIVE_FLOAT; ++j) { + effectiveData.push_back(floatData[blockStart + j]); + validCount++; + } + blockIndex++; + } + + ofstream fout(outputFileName, ios::out|ios::binary); + if (fout.good() == false) { + ERROR_LOG("create output file [%s] failed", outputFileName.c_str()); + return; + } + fout.write((char*)&effectiveData[0], effectiveData.size() * sizeof(float)); + fout.close(); + INFO_LOG("Write %zu effective float to %s", effectiveData.size(), outputFileName.c_str()); + return; +} + +// 新增:获取模型输入个数 +size_t ModelProcess::GetInputNum() const { + if (modelDesc_ == nullptr) return 0; + return aclmdlGetNumInputs(modelDesc_); +} + +Result ModelProcess::GetInputStrideParam(int index, size_t& buf_size, size_t& stride, aclmdlIODims& dims) const { + if (modelDesc_ == nullptr || index < 0 || static_cast(index) >= GetInputNum()) { + ERROR_LOG("Invalid input index or model desc"); + return FAILED; + } + + // 获取输入维度 + aclError ret = aclmdlGetInputDims(modelDesc_, index, &dims); + if (ret != ACL_SUCCESS) { + ERROR_LOG("Get input dims failed"); + return FAILED; + } + // 获取指定输入索引对应的输入数据大小(单batch大小) + buf_size = aclmdlGetInputSizeByIndex(modelDesc_, index); + + // 业务自定义配置:将内存对齐步长固定为256字节 + stride = FIXED_STRIDE; + + // 函数执行成功,返回成功状态 + return SUCCESS; +} + +// 从外部输入数据数组中,创建并填充模型的输入数据集 +// input_datas:外部输入数据的指针数组,每个元素对应一个输入端口的数据地址 +// input_sizes:外部输入数据的大小数组,每个元素对应一个输入端口的数据字节大小 +Result ModelProcess::CreateInputFromData(const std::vector& input_datas, + const std::vector& input_sizes) { + // 若当前已存在输入数据集,先销毁旧的输入资源,避免内存泄漏和重复创建 + if (input_ != nullptr) { DestroyInput(); } + + // 创建ACL框架的模型输入数据集,用于承载所有模型输入数据 + input_ = aclmdlCreateDataset(); + + // 判断输入数据集是否创建失败 + if (input_ == nullptr) { + // 打印错误日志 + ERROR_LOG("Create input dataset failed"); + // 返回失败状态 + return FAILED; + } + // 为每个输入创建缓冲区并绑定数据 + for (size_t i = 0; i < input_datas.size(); ++i) { + size_t buf_size = 0; + size_t stride = 0; + aclmdlIODims dims; + + // 获取当前输入的参数 + if (GetInputStrideParam(i, buf_size, stride, dims) != SUCCESS) { + ERROR_LOG("Get input %zu param failed", i); + return FAILED; + } + + aclDataBuffer* input_buf = aclCreateDataBuffer( + const_cast(input_datas[i]), buf_size); + if (input_buf == nullptr) { + ERROR_LOG("Create input %zu buffer failed", i); + return FAILED; + } + + // 将缓冲区添加到输入数据集 + if (aclmdlAddDatasetBuffer(input_, input_buf) != ACL_SUCCESS) { + ERROR_LOG("Add input %zu buffer to dataset failed", i); + aclDestroyDataBuffer(input_buf); + return FAILED; + } + } + + return SUCCESS; +} + +// 修改:销毁输入(适配多输入缓冲区释放) +void ModelProcess::DestroyInput() { + if (input_ == nullptr) return; + + // 遍历所有输入缓冲区,释放内存并销毁缓冲区对象 + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) { + aclDataBuffer* buf = aclmdlGetDatasetBuffer(input_, i); + if (buf != nullptr) { + void* data_addr = aclGetDataBufferAddr(buf); + if (data_addr != nullptr) { + aclrtFree(data_addr); // 释放输入数据内存 + } + aclDestroyDataBuffer(buf); // 销毁缓冲区对象 + } + } + + aclmdlDestroyDataset(input_); + input_ = nullptr; +} + +Result ModelProcess::CreateInputFromData(const void* data, size_t data_size) +{ + // 1. 初始化输入数据集 + if (InitInput() != SUCCESS) { + ERROR_LOG("Init input failed"); + return FAILED; + } + + // 2. 获取模型输入的 stride 和维度信息(复用原逻辑) + size_t bufSize = 0; + size_t stride = 0; + aclmdlIODims dims; + if (GetInputStrideParam(0, bufSize, stride, dims) != SUCCESS) { // 假设单输入 + ERROR_LOG("Get input param failed"); + return FAILED; + } + // 3. 分配设备内存并复制数据 + // 定义设备端内存指针,初始化为空 + void* device_buf = nullptr; + // 调用ACL接口申请设备内存,指定内存大小与分配类型 + aclError ret = aclrtMalloc(&device_buf, bufSize, ACL_MEM_MALLOC_NORMAL_ONLY); + // 判断设备内存申请是否失败 + if (ret != ACL_SUCCESS) { + // 打印错误日志 + ERROR_LOG("Malloc device buffer failed"); + // 返回失败状态 + return FAILED; + } + + // 初始化设备内存数据,将内存置为初始值 + Utils::InitData(static_cast(device_buf), bufSize); + // 将主机端的输入数据拷贝到已申请的设备内存中 + memcpy(device_buf, data, data_size); // 将内存数据复制到设备缓冲区 + + // 调用CreateInput接口,将设备内存封装为模型输入,完成输入创建 + return CreateInput(device_buf, bufSize, 0); +} + +// 转储模型推理输出结果到本地二进制文件 +void ModelProcess::DumpModelOutputResult() const +{ + // 定义字符串流,用于拼接输出文件名 + stringstream ss; + // 获取模型输出数据集的缓冲区数量 + size_t outputNum = aclmdlGetDatasetNumBuffers(output_); + // 遍历所有输出缓冲区 + for (size_t i = 0; i < outputNum; ++i) { + // 拼接输出文件名:output + 推理次数 + 输出索引 + ss << "output" << executeNum_ << "_" << i << ".bin"; + // 将字符串流转换为字符串文件名 + string outputFileName = ss.str(); + // 调用WriteOutput将当前输出数据写入文件 + WriteOutput(outputFileName, i); + // 清空字符串流,准备下一次拼接 + ss.str(""); + } + // 打印数据转储成功日志 + INFO_LOG("dump data success"); +} + +// 读取800个有效float数据并打印 +void ModelProcess::OutputModelResult() const { + // 判断输出数据集是否为空,为空则无法处理 + if (output_ == nullptr) { + // 打印错误日志 + ERROR_LOG("Output dataset is null, cannot output result"); + // 直接返回 + return; + } + + // 从模型描述中获取模型总输出端口数量 + size_t outputNum = aclmdlGetNumOutputs(modelDesc_); + // 打印总输出端口数量日志 + INFO_LOG("Total output count: %zu", outputNum); + + // 遍历模型的每个输出端口(从索引1开始) + for (size_t i = 1; i < outputNum; ++i) { + // 从输出数据集中获取当前索引对应的输出数据缓冲区 + aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i); + // 判断当前输出缓冲区是否为空 + if (dataBuffer == nullptr) { + // 打印索引对应缓冲区为空的错误日志 + ERROR_LOG("Output[%zu] buffer is null", i); + // 跳过当前无效输出,继续处理下一个 + continue; + } + + // 获取输出数据缓冲区的设备内存地址,并转换为int8_t类型指针 + int8_t* outputData = static_cast(aclGetDataBufferAddr(dataBuffer)); + // 判断输出数据指针是否有效 + if (outputData == nullptr) { + // 打印数据无效错误日志 + ERROR_LOG("Output[%zu] data is invalid", i); + // 跳过当前无效输出,继续处理下一个 + continue; + } + + // 打印当前输出的基本信息 + INFO_LOG("\nOutput[%zu] (only 800 valid float):", i); + INFO_LOG("----------------------------------------"); + + // 转换为float指针 + float* floatData = reinterpret_cast(outputData); + // 每个对齐块跳64个float位置(256字节 ÷ 4字节/float) + const size_t BLOCK_STEP = FIXED_STRIDE / sizeof(float); + size_t validCount = 0; + size_t blockIndex = 0; + std::cout << "FLOAT_OUTPUT_START " << i << " " << TOTAL_EFFECTIVE_FLOAT << std::endl; + + while (validCount < TOTAL_EFFECTIVE_FLOAT) { + // 计算当前块的起始位置 + size_t blockStart = blockIndex * BLOCK_STEP; + // 读取当前块的前8个有效float + for (size_t j = 0; j < LAST_DIM && validCount < TOTAL_EFFECTIVE_FLOAT; ++j) { + std::cout << floatData[blockStart + j] << " "; + validCount++; + } + // 跳到下一个块 + blockIndex++; + } + + std::cout << std::endl << "FLOAT_OUTPUT_END " << i << std::endl; + } +} +// 销毁模型输出相关资源(内存、数据集、数据缓冲) +void ModelProcess::DestroyOutput() +{ + // 如果输出数据集为空,直接返回,无需销毁 + if (output_ == nullptr) { + return; + } + + // 遍历输出数据集中所有的数据缓冲区,逐个释放资源 + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) { + // 获取当前索引对应的输出数据缓冲区 + aclDataBuffer* dataBuffer = aclmdlGetDatasetBuffer(output_, i); + // 获取数据缓冲区对应的设备内存地址 + void* data = aclGetDataBufferAddr(dataBuffer); + // 释放设备内存(忽略返回值) + (void)aclrtFree(data); + // 销毁数据缓冲区(忽略返回值) + (void)aclDestroyDataBuffer(dataBuffer); + } + + // 销毁整个输出数据集(忽略返回值) + (void)aclmdlDestroyDataset(output_); + // 指针置空,防止野指针 + output_ = nullptr; +} + +// 执行模型推理 +Result ModelProcess::Execute() +{ + // 调用ACL接口执行模型推理,传入模型ID、输入数据集、输出数据集 + aclError ret = aclmdlExecute(modelId_, input_, output_); + // 判断推理是否执行失败 + if (ret != ACL_SUCCESS) { + ERROR_LOG("execute model failed, modelId is %u", modelId_); + return FAILED; + } + // 推理成功,推理次数自增 + executeNum_++; + INFO_LOG("model execute success"); + return SUCCESS; +} + +// 根据输入索引创建设备内存缓冲区,并封装为模型输入 +// index:模型输入端口索引 +Result ModelProcess::CreateBuf(int index) +{ + // 定义设备内存指针 + void *bufPtr = nullptr; + // 缓冲区大小 + size_t bufSize = 0; + // 内存对齐步长 + size_t bufStride = 0; + // 输入维度信息 + aclmdlIODims inDims; + // 获取输入的步长、大小、维度信息 + aclError ret = GetInputStrideParam(index, bufSize, bufStride, inDims); + if (ret != SUCCESS) { + ERROR_LOG("Error, GetInputStrideParam failed"); + return FAILED; + } + + // 为输入数据申请设备内存 + ret = aclrtMalloc(&bufPtr, bufSize, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_SUCCESS) { + ERROR_LOG("malloc device buffer failed. size is %zu", bufSize); + return FAILED; + } + // 初始化设备内存数据 + Utils::InitData(static_cast(bufPtr), bufSize); + + // 将申请的设备内存封装为模型输入 + ret = CreateInput(bufPtr, bufSize, 0); + if (ret != SUCCESS) { + ERROR_LOG("execute CreateInput failed"); + // 创建输入失败,释放已申请的设备内存 + aclrtFree(bufPtr); + return FAILED; + } + return SUCCESS; +} + +// 根据图片文件路径创建模型输入缓冲区 +// filePath:输入图片文件路径 +Result ModelProcess::CreateInputBuf(const string& filePath) +{ + // 设备内存大小 + size_t devSize = 0; + // 内存对齐步长 + size_t stride = 0; + // 模型输入维度 + aclmdlIODims inputDims; + // 仅支持单输入模型,获取0号输入的参数信息 + Result ret = GetInputStrideParam(0, devSize, stride, inputDims); + if (ret != SUCCESS) { + ERROR_LOG("GetStrideParam error"); + return FAILED; + } + // 获取输入数据类型大小 + size_t dataSize = GetInputDataSize(0); + if (dataSize == 0) { + ERROR_LOG("GetInputDataSize == 0 error"); + return FAILED; + } + // 读取文件并加载到设备内存,返回设备内存指针 + void *picDevBuffer = Utils::GetDeviceBufferOfFile(filePath, inputDims, stride, dataSize); + if (picDevBuffer == nullptr) { + ERROR_LOG("get pic device buffer failed"); + return FAILED; + } + + // 初始化输入数据集 + ret = InitInput(); + if (ret != SUCCESS) { + ERROR_LOG("execute InitInput failed"); + // 初始化失败,释放设备内存 + aclrtFree(picDevBuffer); + return FAILED; + } + + // 创建模型输入,stride参数不传入 + ret = CreateInput(picDevBuffer, devSize, 0); + if (ret != SUCCESS) { + ERROR_LOG("execute CreateInput failed"); + // 创建失败,释放设备内存 + aclrtFree(picDevBuffer); + return FAILED; + } + return SUCCESS; +} + +// 创建任务缓冲区和工作缓冲区(模型额外需要的辅助输入) +Result ModelProcess::CreateTaskBufAndWorkBuf() +{ + // 模型输入数量必须大于2,2代表taskbuf和workbuf之外的业务输入 + if (aclmdlGetNumInputs(modelDesc_) <= 2) { + ERROR_LOG("input dataset Num is error."); + return FAILED; + } + // 获取当前已创建的输入缓冲区数量 + size_t datasetSize = aclmdlGetDatasetNumBuffers(input_); + if (datasetSize == 0) { + ERROR_LOG("input dataset Num is 0."); + return FAILED; + } + // 遍历未创建的额外输入,依次创建任务缓冲区和工作缓冲区 + for (size_t loop = datasetSize; loop < aclmdlGetNumInputs(modelDesc_); loop++) { + Result ret = CreateBuf(loop); + if (ret != SUCCESS) { + ERROR_LOG("execute Create taskBuffer and workBuffer failed"); + return FAILED; + } + } + return SUCCESS; +} + +// 卸载模型,释放模型相关所有资源 +void ModelProcess::Unload() +{ + // 如果未加载过模型,直接提示并返回 + if (!loadFlag_) { + WARN_LOG("no model had been loaded, unload failed"); + return; + } + + // 卸载模型 + aclError ret = aclmdlUnload(modelId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("unload model failed, modelId is %u", modelId_); + } + + // 销毁模型描述信息 + if (modelDesc_ != nullptr) { + (void)aclmdlDestroyDesc(modelDesc_); + modelDesc_ = nullptr; + } + + // 释放模型文件内存 + if (modelMemPtr_ != nullptr) { + aclrtFree(modelMemPtr_); + modelMemPtr_ = nullptr; + modelMemSize_ = 0; + } + + // 释放模型权重内存 + if (modelWeightPtr_ != nullptr) { + aclrtFree(modelWeightPtr_); + modelWeightPtr_ = nullptr; + modelWeightSize_ = 0; + } + + // 清除模型加载标志 + loadFlag_ = false; + INFO_LOG("unload model success, modelId is %u", modelId_); +} + diff --git a/samples/contribute/ACT/NNN/src/sample_process.cpp b/samples/contribute/ACT/NNN/src/sample_process.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9a235a6fc9cb3aae3c8536df0593ddb8aee054bf --- /dev/null +++ b/samples/contribute/ACT/NNN/src/sample_process.cpp @@ -0,0 +1,220 @@ +/** +* @file sample_process.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include "sample_process.h" +#include "model_process.h" +#include "acl/acl.h" +#include "utils.h" +#include + +using namespace std; + +SampleProcess::SampleProcess() +{ +} + +SampleProcess::~SampleProcess() +{ + // 销毁模型资源 + modelProcess_.DestroyResource(); + DestroyResource(); +} +Result SampleProcess::InitResource() +{ + // ACL init + const char* aclConfigPath = "../src/acl.json"; + INFO_LOG("===== Start InitResource: aclInit with config: %s =====", aclConfigPath); + aclError ret = aclInit(aclConfigPath); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl init failed, ret = %d", ret); + return FAILED; + } + INFO_LOG("acl init success"); + + // set device + INFO_LOG("===== Set device: %d =====", deviceId_); + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl open device %d failed, ret = %d", deviceId_, ret); + return FAILED; + } + INFO_LOG("open device %d success", deviceId_); + // create context (set current) + INFO_LOG("===== Create context for device: %d =====", deviceId_); + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create context failed, ret = %d", ret); + return FAILED; + } + INFO_LOG("create context success"); + + // create stream + INFO_LOG("===== Create stream ====="); + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl create stream failed, ret = %d", ret); + return FAILED; + } + INFO_LOG("create stream success"); + + // get run mode + INFO_LOG("===== Get run mode ====="); + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + ERROR_LOG("acl get run mode failed, ret = %d", ret); + return FAILED; + } + INFO_LOG("run mode get success, runMode = %d (ACL_DEVICE=0, ACL_HOST=1)", runMode); + if (runMode != ACL_DEVICE) { + ERROR_LOG("acl run mode failed, expect ACL_DEVICE(0), got %d", runMode); + return FAILED; + } + isInited_ = true; + INFO_LOG("===== InitResource success ====="); + return SUCCESS; +} + +// 新增模型加载方法,只执行一次 +Result SampleProcess::LoadModel() { + INFO_LOG("===== Start LoadModel, isModelLoaded_ = %d =====", isModelLoaded_); + if (isModelLoaded_) { + INFO_LOG("Model already loaded, skip"); + return SUCCESS; + } + + const string omModelPath = "../model/act_model.om"; + INFO_LOG("Load model from path: %s", omModelPath.c_str()); + Result ret = modelProcess_.LoadModelFromFileWithMem(omModelPath.c_str()); + if (ret != SUCCESS) { + ERROR_LOG("execute LoadModelFromFileWithMem failed, ret = %d (SUCCESS=0, FAILED=1)", ret); + return FAILED; + } + INFO_LOG("LoadModelFromFileWithMem success"); + + INFO_LOG("Create model desc"); + ret = modelProcess_.CreateDesc(); + if (ret != SUCCESS) { + ERROR_LOG("execute CreateDesc failed, ret = %d", ret); + return FAILED; + } + INFO_LOG("CreateDesc success"); + + INFO_LOG("Create model output buffer"); + ret = modelProcess_.CreateOutput(); + if (ret != SUCCESS) { + ERROR_LOG("execute CreateOutput failed, ret = %d", ret); + return FAILED; + } + INFO_LOG("CreateOutput success"); + + isModelLoaded_ = true; + INFO_LOG("===== LoadModel success, isModelLoaded_ = %d =====", isModelLoaded_); + return SUCCESS; +} + +// 修改Process方法,只处理单次推理 +Result SampleProcess::Process() { + if (!isInited_ || !isModelLoaded_) { + ERROR_LOG("Resource or model not initialized"); + return FAILED; + } + + // 创建输入(使用已加载的模型) + Result ret = modelProcess_.CreateInputFromData(input_datas_, input_sizes_); + if (ret != SUCCESS) { + ERROR_LOG("Create multi-input failed"); + return FAILED; + } + + ret = modelProcess_.CreateTaskBufAndWorkBuf(); + if (ret != SUCCESS) { + ERROR_LOG("CreateTaskBufAndWorkBuf failed"); + return FAILED; + } + + // 记录推理开始时间 + auto start = std::chrono::high_resolution_clock::now(); + + ret = modelProcess_.Execute(); + if (ret != SUCCESS) { + ERROR_LOG("execute inference failed"); + modelProcess_.DestroyInput(); + return FAILED; + } + + // 记录推理结束时间 + auto end = std::chrono::high_resolution_clock::now(); + double elapsed_ms = std::chrono::duration(end - start).count(); + std::cout << "INFERENCE_TIME:" << elapsed_ms << std::endl; + + // 输出结果 + modelProcess_.OutputModelResult(); + modelProcess_.DumpModelOutputResult(); + + // 释放当前输入缓冲区(保留模型资源) + modelProcess_.DestroyInput(); + + return SUCCESS; +} + +// 新增:保存输入文件路径 +void SampleProcess::SetInputPath(const std::string& path) { + this->input_path_ = path; // 在类中新增私有成员变量input_path_ +} + +void SampleProcess::SetInputDatas(const std::vector& input_datas, + const std::vector& input_sizes) { + input_datas_ = input_datas; + input_sizes_ = input_sizes; +} + +void SampleProcess::DestroyResource() +{ + aclError ret; + // 1. 先销毁流 + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy stream failed"); + } + stream_ = nullptr; + } + INFO_LOG("end to destroy stream"); + + // 2. 再销毁上下文 + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("destroy context failed"); + } + context_ = nullptr; + } + INFO_LOG("end to destroy context"); + + // 3. 重置设备(确保流和上下文已销毁) + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + ERROR_LOG("reset device failed"); + } + INFO_LOG("end to reset device %d", deviceId_); + + // 4. 最后执行finalize(全局只执行一次) + static bool isFinalized = false; + if (!isFinalized) { + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + ERROR_LOG("finalize acl failed"); + } else { + isFinalized = true; + } + } + INFO_LOG("end to finalize acl"); +} diff --git a/samples/contribute/ACT/NNN/src/utils.cpp b/samples/contribute/ACT/NNN/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d9a460a13f791fe3441bcd7d2c8b2d145af63499 --- /dev/null +++ b/samples/contribute/ACT/NNN/src/utils.cpp @@ -0,0 +1,219 @@ +/** +* Copyright (C) 2020. Huawei Technologies Co.; Ltd. All rights reserved. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at + +* http://www.apache.org/licenses/LICENSE-2.0 + +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "utils.h" +#include +#include +#include +#include +#include "acl/acl.h" + +// 功能:数据内存初始化,将指定内存区域全部置为0 +// data:待初始化的数据指针 +// dataSize:需要初始化的内存大小 +void Utils::InitData(int8_t* data, size_t dataSize) +{ + // 循环遍历内存,逐字节置0 + for (size_t i = 0; i < dataSize; i++) { + data[i] = 0; + } +} + +// 功能:获取二进制文件的大小 +// fileName:文件路径 +// fileSize:输出参数,用于存储获取到的文件大小 +// 返回值:成功/失败 +Result Utils::GetFileSize(const std::string& fileName, uint32_t& fileSize) +{ + // 以二进制只读方式打开文件 + std::ifstream binFile(fileName, std::ifstream::binary); + // 判断文件是否打开成功 + if (binFile.is_open() == false) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return FAILED; + } + // 将文件读取指针移动到文件末尾 + binFile.seekg(0, binFile.end); + // 获取文件读取指针当前位置,即文件总大小 + int binFileBufferLen = binFile.tellg(); + // 判断文件是否为空 + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return FAILED; + } + // 将文件大小赋值给输出参数 + fileSize = static_cast(binFileBufferLen); + // 关闭文件 + binFile.close(); + return SUCCESS; +} + +// 功能:读取二进制文件内容,并直接加载到昇腾设备内存 +// fileName:文件路径 +// fileSize:输出参数,返回读取的文件大小 +// 返回值:存储文件内容的设备内存指针 +void* Utils::ReadBinFile(const std::string& fileName, uint32_t &fileSize) +{ + // 定义文件状态结构体 + struct stat sBuf; + // 获取文件状态信息 + int fileStatus = stat(fileName.data(), &sBuf); + // 判断获取文件状态是否失败 + if (fileStatus == -1) { + ERROR_LOG("failed to get file %s", fileName.c_str()); + return nullptr; + } + // 判断是否为常规文件 + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", fileName.c_str()); + return nullptr; + } + // 以二进制方式打开文件 + std::ifstream binFile(fileName, std::ifstream::binary); + // 判断文件是否打开失败 + if (binFile.is_open() == false) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return nullptr; + } + // 移动文件指针到末尾 + binFile.seekg(0, binFile.end); + // 获取文件大小 + int binFileBufferLen = binFile.tellg(); + // 判断文件是否为空 + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return nullptr; + } + // 移动文件指针到文件开头 + binFile.seekg(0, binFile.beg); + + // 定义设备内存指针 + void* binFileBufferData = nullptr; + // 在昇腾AI芯片上申请设备内存 + aclError ret = aclrtMalloc(&binFileBufferData, binFileBufferLen, ACL_MEM_MALLOC_NORMAL_ONLY); + // 判断内存申请是否失败 + if (ret != ACL_SUCCESS) { + ERROR_LOG("malloc device buffer failed. size is %u", binFileBufferLen); + binFile.close(); + return nullptr; + } + // 初始化设备内存,全部置0 + InitData(static_cast(binFileBufferData), binFileBufferLen); + + // 将文件内容读取到设备内存中 + binFile.read(static_cast(binFileBufferData), binFileBufferLen); + // 关闭文件 + binFile.close(); + // 设置文件大小输出参数 + fileSize = static_cast(binFileBufferLen); + // 返回设备内存指针 + return binFileBufferData; +} + +// 功能:按照指定步长(stride)读取文件到设备内存,适配模型输入格式 +// fileName:文件路径 +// dims:模型输入维度信息 +// stride:内存对齐步长 +// dataSize:单个数据元素的字节大小 +// 返回值:加载完成的设备内存指针 +void* Utils::ReadBinFileWithStride(const std::string& fileName, const aclmdlIODims& dims, + size_t stride, size_t dataSize) +{ + // 定义文件状态结构体 + struct stat sBuf; + // 获取文件状态 + int fileStatus = stat(fileName.data(), &sBuf); + // 判断获取文件状态失败 + if (fileStatus == -1) { + ERROR_LOG("failed to get file %s", fileName.c_str()); + return nullptr; + } + + // 判断是否为常规文件 + if (S_ISREG(sBuf.st_mode) == 0) { + ERROR_LOG("%s is not a file, please enter a file", fileName.c_str()); + return nullptr; + } + + // 以二进制方式打开文件 + std::ifstream binFile(fileName, std::ifstream::binary); + // 判断文件打开失败 + if (binFile.is_open() == false) { + ERROR_LOG("open file %s failed", fileName.c_str()); + return nullptr; + } + // 移动指针到文件末尾获取大小 + binFile.seekg(0, binFile.end); + int binFileBufferLen = binFile.tellg(); + // 判断文件为空 + if (binFileBufferLen == 0) { + ERROR_LOG("binfile is empty, filename is %s", fileName.c_str()); + binFile.close(); + return nullptr; + } + // 移动指针到文件开头 + binFile.seekg(0, binFile.beg); + + // 设备内存指针初始化 + void* binFileBufferData = nullptr; + // 计算循环次数:维度前N-1维相乘 + int64_t loopTimes = 1; + for (size_t loop = 0; loop < dims.dimCount - 1; loop++) { + loopTimes *= dims.dims[loop]; + } + + // 计算需要申请的总缓冲区大小 + size_t bufferSize = loopTimes * stride; + // 申请设备内存 + aclError ret = aclrtMalloc(&binFileBufferData, bufferSize, ACL_MEM_MALLOC_NORMAL_ONLY); + // 判断内存申请失败 + if (ret != ACL_SUCCESS) { + ERROR_LOG("malloc device buffer failed. size is %u", binFileBufferLen); + binFile.close(); + return nullptr; + } + // 初始化内存为0 + InitData(static_cast(binFileBufferData), bufferSize); + + // 获取最后一维的维度值 + int64_t dimValue = dims.dims[dims.dimCount - 1]; + // 计算单行数据大小 + size_t lineSize = dimValue * dataSize; + // 按行读取数据,按stride间隔存储 + for (int64_t loop = 0; loop < loopTimes; loop++) { + binFile.read((static_cast(binFileBufferData) + loop * stride), lineSize); + } + + // 关闭文件 + binFile.close(); + // 返回设备内存指针 + return binFileBufferData; +} + +// 功能:对外提供的接口,根据文件路径获取带步长格式的设备内存 +// fileName:文件路径 +// dims:模型输入维度 +// stride:内存对齐步长 +// dataSize:单个数据元素大小 +// 返回值:设备内存指针 +void* Utils::GetDeviceBufferOfFile(const std::string& fileName, const aclmdlIODims& dims, + size_t stride, size_t dataSize) +{ + // 调用带步长的文件读取函数 + return Utils::ReadBinFileWithStride(fileName, dims, stride, dataSize); +} diff --git a/samples/contribute/ACT/SVP_NNN/README.md b/samples/contribute/ACT/SVP_NNN/README.md index e4c93a8988a624a48ee84434508be1a75f66cfdc..589398cf38a3805821405e6dbea24e994c866d46 100755 --- a/samples/contribute/ACT/SVP_NNN/README.md +++ b/samples/contribute/ACT/SVP_NNN/README.md @@ -110,7 +110,7 @@ ACT(Action Chunking with Transformers)是面向机器人学习场景的高 ### 模型转化 通过PyTorch将多`.safetensors`权重文件夹转为`.onnx`文件,再用ATC工具转为`.om`离线推理模型: 1. 准备权重文件: - 模型下载链接:https://huggingface.co/datasets/lwh2017/grab_banana(待修改) + 模型下载链接:https://huggingface.co/datasets/lwh2017/grab_banana ```bash mkdir model # 创建模型目录 # 将下载的模型权重文件夹(含.safetensors文件)放入model目录 @@ -297,8 +297,6 @@ ACT(Action Chunking with Transformers)是面向机器人学习场景的高 - 输入0:形状`(1, 6)`,float32类型,元素数6,字节数24; - 输入1:形状`(1, 3, 240, 320)`(3通道240×320图像),float32类型,元素数230400,字节数921600; - 输入2:形状`(1, 3, 240, 320)`(3通道240×320图像),float32类型,元素数230400,字节数921600; - 2. **推理耗时指标**: - - 模型核心推理时间:单次推理约**39.5毫秒**; - - 端到端推理时间:单次推理约**63.0毫秒**; - - 非模型开销:约**23.5毫秒**(含数据准备、IPC通信、后处理); - - 首次请求响应时间:首次推理请求至返回结果的耗时,约**70秒**(含模型加载和端到端推理时间); + 1. **推理耗时指标**: + - 模型核心推理时间:单次推理约**37毫秒**(仅模型前向计算耗时,不含数据传输/解析); + - 端到端推理时间:单次推理约**1.2秒**(含数据打包、C++进程通信、输出解析、张量转换全流程耗时),可通过流水线设计增加推理吞吐;