diff --git a/operator/op_using/BatchMatMulV2/README.md b/operator/op_using/BatchMatMulV2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..048f019c3522e8d892d0a2ad4d753e46831102ac --- /dev/null +++ b/operator/op_using/BatchMatMulV2/README.md @@ -0,0 +1,75 @@ +# BatchMatMulV2算子ACL接口调用样例 + +## 功能描述 + +该样例实现了使用ACL接口aclopExecuteV2对BatchMatMulV2算子的流程演示,展示了无输入,有输出,有属性算子的调用流程。 + +## 目录结构 + +``` +| +├────── data // 测试输入数据和算子计算结果保存目录 +├────── model // 单算子模型目录 +├ ├── acl_op.json // BatchMatMulV2单算子模型描述文件,用于借助ATC工具生成om模型 +├────── scripts // 脚本目录 +├ ├── build_exe.sh // 样例编译脚本 +├ ├── generate_om.sh // om生成脚本 +├────── src +├ ├── BatchMatMulV2Dynamic.cpp // C++ 单算子调用流程实现代码 +├── README.md // 样例说明文件 +``` + +## 环境要求 + +- 操作系统及架构:CentOS x86\_64、Ubuntu 18.04 x86\_64、EulerOS x86 +- 编译器: + - 运行环境操作系统架构为x86,编译器为g++ +- python及依赖的库:Python3.7.*x*(3.7.0 ~ 3.7.11)、Python3.8.*x*(3.8.0 ~ 3.8.11) +- 已完成昇腾AI软件栈的部署。 + + + +## 配置环境变量 + +- 开发环境上环境变量配置 + + 1. CANN-Toolkit包提供进程级环境变量配置脚本,供用户在进程中引用,以自动完成CANN基础环境变量的配置,配置示例如下所示 + + ``` + . ${HOME}/Ascend/ascend-toolkit/set_env.sh + ``` + + “$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。 + + +## 编译运行 +1. 在samples项目根目录下执行cd operator/op_using/BatchMatMulV2,进入样例目录。 + +2. 执行 + ``` + chmod +x scripts/* + ``` + + 给脚本可执行权限。 + +3. 执行 + 生成om模型,该脚本默认执行在310P3芯片: + ``` + ./scripts/generate_om.sh + ``` + 脚本会自动调用atc对model目录下的json文件进行转om操作,可以存放多个json生成不同输入的om模型。json配置可以参考[单算子描述文件配置](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/63RC2alpha001/infacldevg/atctool/atlasatc_16_0036.html)。 + +4. 执行 + ``` + ./scripts/build_exe.sh + ``` + 基于样例代码编译出可执行程序acl_exe + +5. 执行 + ``` + ./acl_exe + ``` + 运行可执行程序,程序正常运行会打印: + ``` + [INFO] ACL run op success! + ``` \ No newline at end of file diff --git a/operator/op_using/.keep b/operator/op_using/BatchMatMulV2/data/.keep similarity index 100% rename from operator/op_using/.keep rename to operator/op_using/BatchMatMulV2/data/.keep diff --git a/operator/op_using/BatchMatMulV2/model/acl_op.json b/operator/op_using/BatchMatMulV2/model/acl_op.json new file mode 100644 index 0000000000000000000000000000000000000000..df386bde2f090d8b622a75f639dc0a30604f90f5 --- /dev/null +++ b/operator/op_using/BatchMatMulV2/model/acl_op.json @@ -0,0 +1,57 @@ +[ + { + "attr": [ + { + "name": "adj_x1", + "type": "bool", + "value": false + }, + { + "name": "adj_x2", + "type": "bool", + "value": false + } + ], + "input_desc": [ + { + "format": "ND", + "name": "x1", + "shape": [ + -1,-1,-1 + ], + "shape_range": [[3,16],[1,512],[1,1536]], + "type": "float32" + }, + { + "format": "ND", + "name": "x2", + "shape": [ + -1,-1,-1 + ], + "shape_range": [[3,16],[1,1536],[1,512]], + "type": "float32" + }, + { + "format": "ND", + "name": "bias", + "shape": [ + -1 + ], + "shape_range": [[3,512]], + "type": "float32" + } + ], + "op": "BatchMatMulV2", + "output_desc": [ + { + "format": "ND", + "name": "y", + "shape": [ + -1,-1,-1 + ], + "shape_range": [[3,16],[1,512],[1,512]], + "type": "float32" + } + ] + } + ] \ No newline at end of file diff --git a/operator/op_using/BatchMatMulV2/scripts/build_exe.sh b/operator/op_using/BatchMatMulV2/scripts/build_exe.sh new file mode 100644 index 0000000000000000000000000000000000000000..7215ac3f6e6a88994b6e6ef682c5ef521faeea35 --- /dev/null +++ b/operator/op_using/BatchMatMulV2/scripts/build_exe.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright (c) 2023-2023 Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +source ${HOME}/Ascend/ascend-toolkit/set_env.sh +rm -rf acl_exe +g++ src/BatchMatMulV2Dynamic.cpp -I $ASCEND_HOME_PATH/include -L $ASCEND_HOME_PATH/lib64 -lascendcl -l acl_op_compiler -o acl_exe +./acl_exe diff --git a/operator/op_using/BatchMatMulV2/scripts/generate_om.sh b/operator/op_using/BatchMatMulV2/scripts/generate_om.sh new file mode 100644 index 0000000000000000000000000000000000000000..6997c5f7b64bc20764266bae5ac1ba86bfe8b6b0 --- /dev/null +++ b/operator/op_using/BatchMatMulV2/scripts/generate_om.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2023-2023 Huawei Technologies Co., Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +default_version=Ascend910A +if [ $# -eq 0 ]; then + echo "atc run on default Ascend910A" +else + echo "atc run on $1" + default_version=$1 +fi +for file in $(find ./model -name '*.json'); do + atc --singleop=$file --output=model --soc_version=$default_version +done diff --git a/operator/op_using/BatchMatMulV2/src/BatchMatMulV2Dynamic.cpp b/operator/op_using/BatchMatMulV2/src/BatchMatMulV2Dynamic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0ce375597c543cb0166a3ce84d137e40a7487459 --- /dev/null +++ b/operator/op_using/BatchMatMulV2/src/BatchMatMulV2Dynamic.cpp @@ -0,0 +1,274 @@ +/** +* @file sampleBatchMatMulV2DynamicOPExcute.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "acl/acl.h" +using namespace std; +#define CHECK_ACL_RESULT(err, errMsg) \ + {if (err != ACL_SUCCESS) \ + { \ + ERROR_LOG("%s error code = %d", errMsg, err); \ + }} + +#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args) +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) +namespace { + bool runOnDevice = true; + int32_t deviceId = 0; + aclrtStream stream = nullptr; + const int32_t inputNums = 3; + const int32_t outputNums = 1; + aclTensorDesc *inputDescs[inputNums]; + aclTensorDesc *outputDescs[outputNums]; + std::vector inputShapeX1 = {3, 32, 96}; + std::vector inputShapeX2 = {3, 96, 64}; + std::vector inputShapeBias = {64}; + std::vector inputShapeOffsetW = {}; + std::vector outputShapeY = {3, 32, 64}; + std::vector> inputShapes = {inputShapeX1, inputShapeX2, inputShapeBias}; + std::vector> outputShapes = {outputShapeY}; + std::vector devMemInputs; + std::vector devMemOutputs; + std::vector hostMemInputs; + std::vector hostMemOutputs; + std::vector inBuffers; + std::vector outBuffers; + aclopAttr *opAttr; + int randNum = 1000; +} + +typedef enum Result { + SUCCESS = 0, + FAILED = 1 +} Result; + +void DataProductor(float data[], size_t size){ + float c; + for(size_t i = 0; i < size; i++ ){ + c = static_cast(rand() % (randNum + 1) / (float)(randNum + 1)); + data[i] = c; + } +} + +bool WriteFile(const std::string &filePath, const void *buffer, size_t size) +{ + if (buffer == nullptr) { + ERROR_LOG("Write file failed. buffer is nullptr"); + return false; + } + + FILE *outputFile = fopen(filePath.c_str(), "wb"); + if (outputFile == nullptr) { + ERROR_LOG("Open file failed. path = %s", filePath.c_str()); + return false; + } + + fwrite(buffer, size, sizeof(char), outputFile); + fclose(outputFile); + return true; +} + +Result InitResource() +{ + const char *aclConfigPath = ""; + CHECK_ACL_RESULT(aclInit(aclConfigPath), "aclInit fail"); + CHECK_ACL_RESULT(aclrtSetDevice(deviceId), "aclrtSetDevice fail"); + aclrtRunMode runMode; + CHECK_ACL_RESULT(aclrtGetRunMode(&runMode), "aclrtGetRunMode fail"); + runOnDevice = (runMode == ACL_DEVICE); + CHECK_ACL_RESULT(aclopSetModelDir("./model"), "aclopSetModelDir fail"); + return SUCCESS; +} + +Result ProcessInput() +{ + for (size_t i = 0; i < inputNums; i++) + { + inputDescs[i] = aclCreateTensorDesc(ACL_FLOAT, inputShapes[i].size(), inputShapes[i].data(), ACL_FORMAT_ND); + } + + for (size_t i = 0; i < outputNums; i++) + { + outputDescs[i] = aclCreateTensorDesc(ACL_FLOAT, outputShapes[i].size(), outputShapes[i].data(), ACL_FORMAT_ND); + } + + for (size_t i = 0; i < inputNums; i++) + { + void *devMemInput = nullptr; + CHECK_ACL_RESULT(aclrtMalloc(&devMemInput, aclGetTensorDescSize(inputDescs[i]), + ACL_MEM_MALLOC_NORMAL_ONLY),"aclrtMalloc devMemInput fail"); + + devMemInputs.emplace_back(devMemInput); + aclDataBuffer *inputBuffer = aclCreateDataBuffer(devMemInput, aclGetTensorDescSize(inputDescs[i])); + inBuffers.emplace_back(inputBuffer); + } + + for (size_t i = 0; i < outputNums; i++) + { + void *devMemOutY = nullptr; + CHECK_ACL_RESULT(aclrtMalloc(&devMemOutY, aclGetTensorDescSize(outputDescs[i]), + ACL_MEM_MALLOC_NORMAL_ONLY),"aclrtMalloc devMemOutY fail"); + + devMemOutputs.emplace_back(devMemOutY); + aclDataBuffer *outputBuffer = aclCreateDataBuffer(devMemOutY, aclGetTensorDescSize(outputDescs[i])); + outBuffers.push_back(outputBuffer); + } + + for (size_t i = 0; i < inputNums; i++) + { + void *hostMemInput = nullptr; + if (runOnDevice) + { + CHECK_ACL_RESULT(aclrtMalloc(&hostMemInput, aclGetTensorDescSize(inputDescs[i]), + ACL_MEM_MALLOC_NORMAL_ONLY),"aclrtMallocHost hostMemInput fail"); + } + else + { + CHECK_ACL_RESULT(aclrtMallocHost(&hostMemInput, aclGetTensorDescSize(inputDescs[i])), + "aclrtMallocHost hostMemInput fail"); + } + hostMemInputs.emplace_back(hostMemInput); + } + + for (size_t i = 0; i < outputNums; i++) + { + void *hostOutput = nullptr; + if (runOnDevice){ + CHECK_ACL_RESULT(aclrtMalloc(&hostOutput, aclGetTensorDescSize(outputDescs[i]), + ACL_MEM_MALLOC_NORMAL_ONLY), "aclrtMallocHost hostOutput fail"); + }else{ + CHECK_ACL_RESULT(aclrtMallocHost(&hostOutput, aclGetTensorDescSize(outputDescs[i])), + "aclrtMallocHost hostOutput fail"); + } + hostMemOutputs.emplace_back(hostOutput); + } + + for (size_t i = 0; i < inputNums; i++) + { + size_t size = 1; + for (size_t j=0; j(data); + } + + CHECK_ACL_RESULT(aclrtCreateStream(&stream), "aclrtCreateStream fail"); + for (size_t i = 0; i < inputNums; i++) + { + if (runOnDevice) + { + CHECK_ACL_RESULT(aclrtMemcpy(devMemInputs[i], aclGetTensorDescSize(inputDescs[i]), + hostMemInputs[i], aclGetTensorDescSize(inputDescs[i]), + ACL_MEMCPY_DEVICE_TO_DEVICE),"aclrtMemcpy d2d fail"); + } + else + { + CHECK_ACL_RESULT(aclrtMemcpy(devMemInputs[i], aclGetTensorDescSize(inputDescs[i]), + hostMemInputs[i], aclGetTensorDescSize(inputDescs[i]), + ACL_MEMCPY_HOST_TO_DEVICE),"aclrtMemcpy h2d fail"); + } + } + opAttr = aclopCreateAttr(); + CHECK_ACL_RESULT(aclopSetAttrBool(opAttr, "adj_x1", false), "aclopSetAttrBool adj_x1 fail"); + CHECK_ACL_RESULT(aclopSetAttrBool(opAttr, "adj_x2", false), "aclopSetAttrBool adj_x2 fail"); + return SUCCESS; +} + +Result Inference() +{ + aclopHandle *batchMatMulV2handle; + CHECK_ACL_RESULT(aclopExecuteV2("BatchMatMulV2", inputNums, inputDescs, inBuffers.data(), + outputNums, outputDescs, outBuffers.data(), opAttr, stream), "aclopExecuteV2 fail"); + + CHECK_ACL_RESULT(aclrtSynchronizeStream(stream), "aclrtSynchronizeStream fail"); + CHECK_ACL_RESULT(aclrtDestroyStream(stream), "aclrtDestroyStream fail"); + return SUCCESS; +} + +Result GetResult() +{ + for (size_t i = 0; i < outputNums; i++) + { + if (runOnDevice) + { + aclrtMemcpy(hostMemOutputs[i], aclGetTensorDescSize(outputDescs[i]), devMemOutputs[i], + aclGetTensorDescSize(outputDescs[i]), ACL_MEMCPY_DEVICE_TO_DEVICE); + } + else + { + aclrtMemcpy(hostMemOutputs[i], aclGetTensorDescSize(outputDescs[i]), devMemOutputs[i], + aclGetTensorDescSize(outputDescs[i]), ACL_MEMCPY_DEVICE_TO_HOST); + } + } + WriteFile("./data/output.bin", hostMemOutputs[0], aclGetTensorDescSize(outputDescs[0])); + return SUCCESS; +} + +Result ReleaseResource() +{ + for (size_t i; i < outBuffers.size(); i++){ + CHECK_ACL_RESULT(aclDestroyDataBuffer(outBuffers[i]), "Finalize acl failed"); + } + + for (size_t i; i < inBuffers.size(); i++){ + CHECK_ACL_RESULT(aclDestroyDataBuffer(inBuffers[i]), "Finalize acl failed"); + } + + aclDestroyTensorDesc(*inputDescs); + aclDestroyTensorDesc(*outputDescs); + CHECK_ACL_RESULT(aclrtResetDevice(deviceId), "ResetDevice fail"); + CHECK_ACL_RESULT(aclFinalize(), "Finalize acl failed"); + return SUCCESS; +} + +int main(int argc, char const *argv[]) +{ + Result ret = InitResource(); + if (ret == FAILED) { + ERROR_LOG("Init detect failed"); + return FAILED; + } + + ret = ProcessInput(); + if (ret == FAILED) { + ERROR_LOG("process input failed"); + return FAILED; + } + + ret = Inference(); + if (ret == FAILED) { + ERROR_LOG("model inference failed"); + return FAILED; + } + + ret = GetResult(); + if (ret == FAILED) { + ERROR_LOG("GetResult failed"); + return FAILED; + } + ret = ReleaseResource(); + if (ret == FAILED) { + ERROR_LOG("Release Resource failed"); + return FAILED; + } + INFO_LOG("ACL run op success!"); + return SUCCESS; +} \ No newline at end of file