diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/CMakeLists.txt b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb8a058bc80da4b55a6c164b859af8ef4d3ef11b --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright 2024 Tencent Inc. All rights reserved. +# +# ============================================================================== +cmake_minimum_required(VERSION 3.14) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") +file(GLOB_RECURSE atb_plugin_operations_SRCS "./*.cpp") +set(CUST_PKG_PATH "$ENV{ASCEND_HOME_PATH}/opp/vendors/customize/op_api") +message(STATUS "atb_plugin_operations_SRCS: ${atb_plugin_operations_SRCS}") +include_directories( + $ENV{ATB_HOME_PATH}/include + $ENV{ASCEND_HOME_PATH}/include + ${CUST_PKG_PATH}/include # 添加自定义算子头文件路径 + ${CMAKE_CURRENT_SOURCE_DIR} + ) +link_directories( + $ENV{ATB_HOME_PATH}/lib + $ENV{ASCEND_HOME_PATH}/lib64 + ${CUST_PKG_PATH}/lib # 添加自定义算子so路径 + ) +add_library(atb_plugin_operations STATIC ${atb_plugin_operations_SRCS}) +target_link_libraries(atb_plugin_operations PRIVATE ${ACL_SHARED_LIBS} ascendcl) \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/aclnn_add_operation.cpp b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/aclnn_add_operation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..468b84aff8d62a53534f7e9160742b74be9ca352 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/aclnn_add_operation.cpp @@ -0,0 +1,142 @@ +#include "aclnn_add_operation.h" +#include "aclnn_add_custom.h" + +AddOperation::AddOperation(const std::string &name, AddAttrParam param){ + attrParam = param; + opName_ = name; +} + +atb::SVector GetCopyTensorStride(atb::Dims &tensorDims) +{ + atb::SVector tmpStrides(tensorDims.dimNum, 1); + if (tensorDims.dimNum > 8) { // 8: tensor最大维度数量 + printf("tensor's dimNum is larger than 8, GetCopyTensorStride failed."); + return tmpStrides; + } + for (int64_t i = static_cast(tensorDims.dimNum) - 2; i >= 0; i--) { + tmpStrides[i] = (tensorDims.dims[i + 1] * tmpStrides[i + 1]); + } + return tmpStrides; +} + +std::shared_ptr AddOperation::CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx) +{ + auto aclnnTensor = std::make_shared(); + aclnnTensor->tensorIdx = static_cast(tensorIdx); + aclnnTensor->needUpdateTensorDataPtr = true; + aclnnTensor->atbTensor = atbTensor; + aclnnTensor->strides = GetCopyTensorStride(atbTensor.desc.shape); + + // 创建Aclnn tensor + aclnnTensor->tensor = aclCreateTensor(atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.desc.dtype, + aclnnTensor->strides.data(), + 0, + atbTensor.desc.format, + atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.deviceData); + return aclnnTensor; +} + +atb::Status AddOperation::UpdateAclnnVariantPack(const atb::VariantPack &variantPack) +{ + // 更新inTensor的device地址 + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + int ret = -1; + if (!aclInTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclInTensors_[i]->atbTensor = variantPack.inTensors.at(i); + ret = aclSetInputTensorAddr(aclExecutor_, + aclInTensors_[i]->tensorIdx, + aclInTensors_[i]->tensor, + aclInTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + printf("set input fail"); + return atb::ERROR_CANN_ERROR; + } + } + + // 更新outTensor的device地址 + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + int ret = -1; + if (!aclOutTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclOutTensors_[i]->atbTensor = variantPack.outTensors.at(i); + ret = aclSetOutputTensorAddr(aclExecutor_, + aclOutTensors_[i]->tensorIdx, + aclOutTensors_[i]->tensor, + aclOutTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + printf("set output fail"); + return atb::ERROR_CANN_ERROR; + } + } + return atb::NO_ERROR; +} + +atb::Status AddOperation::Setup(const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) { + + aclInTensors_.resize(GetInputNum()); + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.inTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + printf("creat input tensor %ld fail", i); + return atb::ERROR_INTERNAL_ERROR; + } + aclInTensors_[i] = aclnnTensor; + } + + aclOutTensors_.resize(GetOutputNum()); + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.outTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + printf("creat output tensor %ld fail", i); + return atb::ERROR_INTERNAL_ERROR; + } + aclOutTensors_[i] = aclnnTensor; + } + + + auto ret = aclnnAddCustomGetWorkspaceSize(aclInTensors_.at(0)->tensor, + aclInTensors_.at(1)->tensor, + aclOutTensors_.at(0)->tensor, + &workspaceSize_, + &aclExecutor_); + + workspaceSize = workspaceSize_; + return ret; + +} + +atb::Status AddOperation::Execute(const atb::VariantPack &variantPack, uint8_t *workspace, uint64_t workspaceSize, atb::Context *context) { + + + + aclrtStream stream = context->GetExecuteStream(); + if (!stream) { + printf("get stream fail"); + return atb::ERROR_INVALID_PARAM; + } + // 更新数据传入的地址 + int ret = UpdateAclnnVariantPack(variantPack); + if (ret != 0) { + printf("UpdateAclnnVariantPack fail"); + return atb::ERROR_CANN_ERROR; + } + ret = aclnnAddCustom(workspace, workspaceSize_, aclExecutor_, stream); + + return ret; +} + +atb::Status AddOperation::InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const +{ + outTensorDesc.at(0) = inTensorDesc.at(0); + return atb::NO_ERROR; +} diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/aclnn_add_operation.h b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/aclnn_add_operation.h new file mode 100644 index 0000000000000000000000000000000000000000..8670088d3ac11cb94b3349e550c9df7663d809d8 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/aclnn_add_operation.h @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" + + +struct AddAttrParam +{ + // add没属性,此处空 +}; + +struct AclnnTensor +{ +public: + atb::Tensor atbTensor; // + aclTensor *tensor = nullptr; + int tensorIdx = -1; // aclTensor在aclExecutor中的index + bool needUpdateTensorDataPtr = false; + atb::SVector strides = {}; +}; + +class AddOperation: public atb::Operation{ +public: + AddOperation(const std::string &name, AddAttrParam param); + atb::Status Setup(const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) override; + atb::Status Execute(const atb::VariantPack &variantPack, uint8_t *workspace, + uint64_t workspaceSize, atb::Context *context) override; + atb::Status InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const; + std::shared_ptr CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx); + atb::Status UpdateAclnnVariantPack(const atb::VariantPack &variantPack); + + uint32_t GetInputNum() const + { + return 2; // 算子入参个数 + } + + uint32_t GetOutputNum() const + { + return 1; // 算子出参个数 + } + std::string GetName() const + { + return opName_; + } + + aclOpExecutor *aclExecutor_ = nullptr; + AddAttrParam attrParam; + std::string opName_; + uint64_t workspaceSize_; + + atb::SVector> aclInTensors_; + atb::SVector> aclOutTensors_; +}; \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/build.sh b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..ba28cc1436ceca957cc344ed67dc6fcd2b7b9083 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationATBPlugin/build.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# 定义构建目录 +BUILD_DIR="build" + +# 创建构建目录 +mkdir -p "$BUILD_DIR" +cd "$BUILD_DIR" + +# 运行 CMake 配置和编译 +cmake .. +make + +# 查找生成的 .a 文件 +A_FILE=$(find . -name "*.a" -type f) + +# 检查是否找到了 .a 文件 +if [ -z "$A_FILE" ]; then + echo "未找到 .a 文件,编译可能失败。" + exit 1 +fi + +# 复制头文件到 /usr/include +HEADER_FILES=$(find .. -name "*.h" -type f) +for header in $HEADER_FILES; do + cp "$header" /usr/include/ +done + +# 复制 .a 文件到 /usr/local/lib +cp "$A_FILE" /usr/local/lib/ + +echo "构建完成,头文件和 .a 文件已复制到目标目录。" + diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/CMakeLists.txt b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c2cec465e6a62ca1857e81b2be52c9c49cd0a8 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/CMakeLists.txt @@ -0,0 +1,40 @@ +cmake_minimum_required(VERSION 3.16) +project("test_model") +option(USE_CXX11_ABI "USE_CXX11_ABI" OFF) +set(CMAKE_BUILD_TYPE Debug) +if(USE_CXX11_ABI) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + +set(CUST_PKG_PATH "$ENV{ASCEND_HOME_PATH}/opp/vendors/customize/op_api") + +include_directories( + $ENV{ATB_HOME_PATH}/include + $ENV{ASCEND_HOME_PATH}/include + ${CUST_PKG_PATH}/include # 添加自定义算子头文件路径 + ${CMAKE_CURRENT_SOURCE_DIR} + ./src/ + ) + +message(".h path = ${CUST_PKG_PATH}/include") +link_directories( + $ENV{ATB_HOME_PATH}/lib + $ENV{ASCEND_HOME_PATH}/lib64 + ${CUST_PKG_PATH}/lib # 添加自定义算子so路径 + ) + + +link_libraries(atb_plugin_operations) +add_executable(test_model ./src/main.cpp) + +# 列出所有的头文件目录 +target_link_libraries(test_model PRIVATE atb ascendcl cust_opapi opapi nnopbase pthread) # 添加自定义算子so及适配so + + + + + diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/gen_data.py b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/gen_data.py new file mode 100644 index 0000000000000000000000000000000000000000..b85e30bc78dd3ef51b5bd968a20b24f253276f76 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/gen_data.py @@ -0,0 +1,14 @@ +import torch +import os +def gen_golden_data_simple(): + input1 = torch.randn(108, 4096, dtype=torch.float16) + input2 = torch.randn(108, 4096, dtype=torch.float16) + + + golden = input1 + input2 + input1.numpy().tofile('./script/input/input0.bin') + input2.numpy().tofile('./script/input/input1.bin') + golden.numpy().tofile("./script/output/golden0.bin") + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/run.sh b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..450b717f2afbc27a3a87cc5d476e7f22f3096f02 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash + + +# 步骤1: 运行gen_data.py生成输入bin文件和golden标杆输出数据 +echo "正在生成输入数据和golden标杆数据..." +mkdir -p script/input +mkdir -p script/output +python3 script/gen_data.py +if [ $? -ne 0 ]; then + echo "生成数据失败,脚本终止。" + exit 1 +fi + +# 步骤2: 创建构建目录并进入 +mkdir -p build +cd build +if [ $? -ne 0 ]; then + echo "无法进入构建目录,脚本终止。" + exit 1 +fi + +# 步骤3: 使用CMake配置项目 +echo "正在配置CMake项目..." +cmake .. +if [ $? -ne 0 ]; then + echo "CMake配置失败,脚本终止。" + exit 1 +fi + +# 步骤4: 编译代码 +echo "正在编译代码..." +make +if [ $? -ne 0 ]; then + echo "编译失败,脚本终止。" + exit 1 +fi + +mv test_model ../ +cd .. + +# 步骤5: 运行可执行文件生成实际输出文件 +echo "正在运行可执行文件生成实际输出..." +./test_model +if [ $? -ne 0 ]; then + echo "运行可执行文件失败,脚本终止。" + exit 1 +fi + +# 步骤6: 调用verify_result.py进行golden标杆数据和实际输出数据的比对 +echo "正在验证结果..." +python3 script/verify_result.py script/output/output_0.bin script/output/golden0.bin + diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/verify_result.py b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/verify_result.py new file mode 100644 index 0000000000000000000000000000000000000000..d64d11014d7e6881ee3edc0243b5da5b0bcb0cdd --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/script/verify_result.py @@ -0,0 +1,25 @@ +import os +import sys +import numpy + +loss = 1e-3 # 容忍偏差,一般fp16要求绝对误差和相对误差均不超过千分之一 +minimum = 10e-10 + +def verify_result(real_result, golden): + real_result = numpy.fromfile(real_result, dtype=numpy.float16) # 从bin文件读取实际运算结果 + golden = numpy.fromfile(golden, dtype=numpy.float16) # 从bin文件读取预期运算结果 + print("=" * 50, real_result[:5], golden[:5], "=" * 50, sep='\n', end='\n', file=sys.stderr) + result = numpy.abs(real_result - golden) # 计算运算结果和预期结果偏差 + deno = numpy.maximum(numpy.abs(real_result), numpy.abs(golden)) # 获取最大值并组成新数组 + result_atol = numpy.less_equal(result, loss) # 计算绝对误差 + result_rtol = numpy.less_equal(result / numpy.add(deno, minimum), loss) # 计算相对误差 + if not result_rtol.all() and not result_atol.all(): + if numpy.sum(result_rtol == False) > real_result.size * loss and numpy.sum(result_atol == False) > real_result.size * loss: # 误差超出预期时返回打印错误,返回对比失败 + print("[ERROR] result error") + return False + print("test Operation success!") + return True + + +if __name__ == '__main__': + verify_result(sys.argv[1],sys.argv[2]) diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/src/main.cpp b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2500dd95f876ea73db2f7a8819b40dea32e69e1b --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/src/main.cpp @@ -0,0 +1,218 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "main.h" + +bool SetInputData(std::vector &inputData){ + char *xPath = "./script/input/input0.bin"; + char *yPath = "./script/input/input1.bin"; + InputData inputX; + InputData inputY; + inputX.data = ReadBinFile(xPath,inputX.size); + inputY.data = ReadBinFile(yPath,inputY.size); + inputData.push_back(inputX); + inputData.push_back(inputY); + return true; +} + +bool SetOperationInputDesc(atb::SVector &intensorDescs){ + atb::TensorDesc xDesc; + xDesc.dtype = ACL_FLOAT16; + xDesc.format = ACL_FORMAT_ND; + xDesc.shape.dimNum = 2; + xDesc.shape.dims[0] = 108; + xDesc.shape.dims[1] = 4096; + + atb::TensorDesc yDesc; + yDesc.dtype = ACL_FLOAT16; + yDesc.format = ACL_FORMAT_ND; + yDesc.shape.dimNum = 2; + yDesc.shape.dims[0] = 108; + yDesc.shape.dims[1] = 4096; + + intensorDescs.at(0) = xDesc; + intensorDescs.at(1) = yDesc; +} + + + +static void SetCurrentDevice() +{ + const int deviceId = 0; + std::cout << "[INFO]: aclrtSetDevice " << deviceId << std::endl; + int ret = aclrtSetDevice(deviceId); + if (ret != 0) { + std::cout << "[ERROR]: aclrtSetDevice fail, error:" << ret << std::endl; + return; + } + std::cout << "[INFO]: aclrtSetDevice success" << std::endl; +} + + +static void FreeTensor(atb::Tensor &tensor) +{ + if (tensor.deviceData) { + int ret = aclrtFree(tensor.deviceData); + if (ret != 0) { + std::cout << "[ERROR]: aclrtFree fail" << std::endl; + } + tensor.deviceData = nullptr; + tensor.dataSize = 0; + } + if (tensor.hostData) { + int ret = aclrtFreeHost(tensor.hostData); + if (ret != 0) { + std::cout << "[ERROR]: aclrtFreeHost fail, ret = " << ret << std::endl; + } + tensor.hostData = nullptr; + tensor.dataSize = 0; + } +} + +static void FreeTensors(atb::SVector &inTensors, atb::SVector &outTensors) +{ + for (size_t i = 0; i < inTensors.size(); ++i) { + FreeTensor(inTensors.at(i)); + } + for (size_t i = 0; i < outTensors.size(); ++i) { + FreeTensor(outTensors.at(i)); + } +} +bool SaveMemoryToBinFile(void* memoryAddress, size_t memorySize, size_t i) { + // 创建 output 目录(如果不存在) + std::filesystem::create_directories("output"); + + // 生成文件名 + std::string filename = "script/output/output_" + std::to_string(i) + ".bin"; + + // 打开文件以二进制写入模式 + std::ofstream file(filename, std::ios::binary); + if (!file) { + std::cerr << "无法打开文件: " << filename << std::endl; + return false; + } + + // 写入数据 + file.write(static_cast(memoryAddress), memorySize); + if (!file) { + std::cerr << "写入文件时出错: " << filename << std::endl; + file.close(); + return false; + } + + // 关闭文件 + file.close(); + std::cout << "数据已成功保存到: " << filename << std::endl; + return true; +} + +int main(int argc, const char *argv[]) +{ + const int deviceId = 0; + std::cout << "[INFO]: aclrtSetDevice " << deviceId << std::endl; + int ret = aclrtSetDevice(deviceId); + if (ret != 0) { + std::cout << "[ERROR]: aclrtSetDevice fail, error:" << ret << std::endl; + return 1; + } + std::cout << "[INFO]: aclrtSetDevice success" << std::endl; + atb::Context *context = nullptr; + ret = atb::CreateContext(&context); + void *stream = nullptr; + ret = aclrtCreateStream(&stream); + if (ret != 0) { + std::cout << "[ERROR]: AsdRtStreamCreate fail, ret:" << ret << std::endl; + return 1; + } + context->SetExecuteStream(stream); + + std::vector input; + SetInputData(input); + + AddAttrParam addAttrParam; + AddOperation *op = new AddOperation("Add",addAttrParam); + std::cout << "[INFO]: complete CreateOp!" << std::endl; + + if(input.size() != op->GetInputNum()) std::cout << "[ERROR]: Operation actual input num is not equal to GetInputNum()"; + + atb::SVector intensorDescs; + atb::SVector outtensorDescs; + intensorDescs.resize(op->GetInputNum()); + outtensorDescs.resize(op->GetOutputNum()); + SetOperationInputDesc(intensorDescs); + atb::Status st = op->InferShape(intensorDescs,outtensorDescs); + if (st != 0) { + std::cout << "[ERROR]: Operation InferShape fail" << std::endl; + return -1; + } + std::cout << "[INFO]: Operation InferShape success" << std::endl; + atb::VariantPack variantPack; + variantPack.inTensors.resize(op->GetInputNum()); + variantPack.outTensors.resize(op->GetOutputNum()); + for(size_t i=0;iGetInputNum();i++){ + variantPack.inTensors.at(i).desc = intensorDescs.at(i); + variantPack.inTensors.at(i).hostData = input[i].data; + variantPack.inTensors.at(i).dataSize = input[i].size; + CheckAcl(aclrtMalloc(&variantPack.inTensors.at(i).deviceData, input[i].size, ACL_MEM_MALLOC_HUGE_FIRST)); + CheckAcl(aclrtMemcpy(variantPack.inTensors.at(i).deviceData, input[i].size, input[i].data, input[i].size, ACL_MEMCPY_HOST_TO_DEVICE)); + } + std::cout << "[INFO]: Operation Input prepare sucess" << std::endl; + for(size_t i=0;iGetOutputNum();i++){ + int64_t *dims = new int64_t[outtensorDescs.at(i).shape.dimNum]; + for(size_t j=0;jSetup(variantPack, workspaceSize, context); + if (st != 0) { + std::cout << "[ERROR]: Operation setup fail" << std::endl; + return -1; + } + std::cout << "[INFO]: Operation setup success" << std::endl; + void *workspace = nullptr; + if (workspaceSize > 0) { + aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + } + + std::cout << "[INFO]: Operation execute start" << std::endl; + st = op->Execute(variantPack, (uint8_t*)workspace, workspaceSize, context); + if (st != 0) { + std::cout << "[ERROR]: Operation execute fail" << std::endl; + return -1; + } + ret = aclrtSynchronizeStream(stream); + std::cout << "[INFO]: Operation execute success" << std::endl; + for(size_t i = 0; i < op->GetOutputNum(); i++){ + CheckAcl(aclrtMemcpy(variantPack.outTensors.at(i).hostData, variantPack.outTensors.at(i).dataSize, variantPack.outTensors.at(0).deviceData, + variantPack.outTensors.at(i).dataSize, ACL_MEMCPY_DEVICE_TO_HOST)); + SaveMemoryToBinFile(variantPack.outTensors.at(i).hostData,variantPack.outTensors.at(i).dataSize,i); + } + + FreeTensors(variantPack.inTensors, variantPack.outTensors); + st = atb::DestroyContext(context); + CheckAcl(aclrtDestroyStream(stream)); + CheckAcl(aclrtResetDevice(0)); + CheckAcl(aclFinalize()); + return atb::ErrorType::NO_ERROR; +} diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/src/main.h b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/src/main.h new file mode 100644 index 0000000000000000000000000000000000000000..44a94e9ec418420920eee70ebf270a9df0d4052b --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/AddOperationTest/src/main.h @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include +#include +#include "securec.h" +#include "atb/atb_infer.h" +#include "aclnn_add_operation.h" +#include + +struct InputData{ + void* data; + uint64_t size; +}; +aclError CheckAcl(aclError ret) +{ + if (ret != ACL_ERROR_NONE) { + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << ret << std::endl; + } + return ret; +} +void* ReadBinFile(const char* filename, size_t& size) { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file) { + std::cerr << "无法打开文件: " << filename << std::endl; + return nullptr; + } + + // 获取文件大小 + size = file.tellg(); + file.seekg(0, std::ios::beg); + + // 分配内存 + void* buffer; + int ret = aclrtMallocHost(&buffer,size); + if (!buffer) { + std::cerr << "内存分配失败" << std::endl; + file.close(); + return nullptr; + } + + // 读取文件内容到内存 + file.read(static_cast(buffer), size); + if (!file) { + std::cerr << "读取文件失败" << std::endl; + delete[] static_cast(buffer); + file.close(); + return nullptr; + } + + file.close(); + return buffer; +} \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/readme.md b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..714c8c791ac9d2e8a926a33c70a3eaa99daf7d50 --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation/readme.md @@ -0,0 +1,68 @@ +## 概述 + +本样例基于AscendC自定义[Add](https://gitee.com/ascend/cann-ops/tree/master/src/math/add_custom)算子,开发了ATB插件并进行了插件调用测试. + +## 项目结构介绍 +``` + +├── AddOperationATBPlugin //AddOperation ATB插件代码 + +├── AddOperationTest //AddOperation 测试代码 +``` +## 样例运行 + +### Add AscendC自定义算子部署 + +参照cann-ops仓[add_custom算子](https://gitee.com/ascend/cann-ops/tree/master/src/math/add_custom)" **算子包编译部署** "章节 + +### AddOperation ATB插件部署 + +- 运行编译脚本完成部署(脚本会生成静态库.a文件,同时将头文件拷贝到/usr/include,.a文件拷贝到/usr/local/lib下) + + ``` + cd AddOperationATBPlugin + bash build.sh + ``` + +### AddOperation测试 + +- 运行脚本完成算子测试 + + ```shell + cd AddOperationTest + bash script/run.sh + ``` + +## AddOperation算子介绍 + +### 功能 + +实现两个输入张量相加 + +### 定义 + +``` +struct AddParam { +}; +``` + +### 参数列表 + +该算子参数为空 + +### 输入 + +| **参数** | **维度** | **数据类型** | **格式** | 描述 | +| -------- | -------------------------- | --------------------- | -------- | ---------- | +| x | [dim_0,dim_1,...,dim_n] | float/half/int8/int32 | ND | 输入tensor | +| y | [dim_0,dim_1,...,dim_n] | float/half/int8/int32 | ND | 输入tensor | + +### 输出 + +| **参数** | **维度** | **数据类型** | **格式** | 描述 | +| -------- | -------------------------- | --------------------- | -------- | ---------------------------------------- | +| output | [dim_0,dim_1,...,dim_n] | float/half/int8/int32 | ND | 输出tensor。数据类型和shape与x保持一致。 | + +### 规格约束 + +暂无 \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/24_add_custom/AddCustom/README.md b/operator/ascendc/4_best_practices/24_add_custom/AddCustom/README.md new file mode 100644 index 0000000000000000000000000000000000000000..25c5ffc7e5d111a1578ad6b497f756a3c9d7889a --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/AddCustom/README.md @@ -0,0 +1,3 @@ +### 概述 + +具体算子工程,编译部署请见[AddCustom](https://gitee.com/ascend/cann-ops/tree/master/src/math/add_custom) \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/24_add_custom/README.md b/operator/ascendc/4_best_practices/24_add_custom/README.md new file mode 100644 index 0000000000000000000000000000000000000000..16780e8fabb685a68e31c351f3411151dff02a8b --- /dev/null +++ b/operator/ascendc/4_best_practices/24_add_custom/README.md @@ -0,0 +1,49 @@ +### 概述 + +本样例介基于AddCustom算子工程,介绍了单算子工程以及ATB插件调用方法 + +### 目录结构介绍 + +``` +├── 24_add_custom // 使用框架调用的方式调用AllGatherMatmul自定义算子 +│ ├── ATBInvocation // 通过ATB调用的方式调用 +│ ├── AddCustom // AddCustom算子工程,引用cann-ops仓的AddCustom工程 +``` + +### 算子描述 + +`AddCustom`算子返回两个数据相加的结果。 + +### 算子规格描述 + +| 算子类型(OpType) | AddCustom | | | | +| ---------------- | ---------- | -------- | --------- | ------ | +| 算子输入 | name | shape | data type | format | +| x | 8 * 2048 | float16 | ND | | +| y | 8 * 2048 | float16 | ND | | +| 算子输出 | z | 8 * 2048 | float16 | ND | +| 核函数名 | add_custom | | | | + +### 支持的产品型号 + +本样例支持如下产品型号: + +- Atlas 训练系列产品 +- Atlas 推理系列产品 +- Atlas A2训练系列产品 +- Atlas 800I A2推理产品 +- Atlas 200I/500 A2推理产品 + +### 算子编译部署 + +- [AddCustom编译部署(需要下载cann-ops仓)](https://gitee.com/ascend/cann-ops/tree/master/src/math/add_custom) + +### 算子ATB插件调用 + +- [AddOperation编译调用](https://gitee.com/youmoxiao/samples/tree/atb_op/operator/ascendc/4_best_practices/24_add_custom/ATBInvocation) + +## 更新说明 + +| 时间 | 更新事项 | +| -------- | -------- | +| 2025/4/1 | 新增样例 | \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/CMakeLists.txt b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c89578fcd90a89e47eac5cd21c33f8c0a0c2993e --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright 2024 Tencent Inc. All rights reserved. +# +# ============================================================================== +cmake_minimum_required(VERSION 3.14) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") +file(GLOB_RECURSE atb_plugin_operations_SRCS "./*.cpp") +set(CUST_PKG_PATH "$ENV{ASCEND_HOME_PATH}/opp/vendors/customize/op_api") +message(STATUS "atb_plugin_operations_SRCS: ${atb_plugin_operations_SRCS}") +include_directories( + $ENV{ATB_HOME_PATH}/include + $ENV{ASCEND_HOME_PATH}/include + ${CUST_PKG_PATH}/include # 添加自定义算子头文件路径 + ${CMAKE_CURRENT_SOURCE_DIR} + ) +link_directories( + $ENV{ATB_HOME_PATH}/lib + $ENV{ASCEND_HOME_PATH}/lib64 + ${CUST_PKG_PATH}/lib # 添加自定义算子so路径 + ) +add_library(atb_plugin_operations STATIC ${atb_plugin_operations_SRCS}) +target_link_libraries(atb_plugin_operations PRIVATE ${ACL_SHARED_LIBS} ${ACL_SHARED_LIBS} ascendcl) \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/aclnn_eye_operation.cpp b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/aclnn_eye_operation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4f156c4bac53d6b9984cb22a3d0bc81a1266ba9 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/aclnn_eye_operation.cpp @@ -0,0 +1,133 @@ +#include "aclnn_eye_operation.h" +#include "aclnn_eye.h" + +EyeOperation::EyeOperation(const std::string &name, EyeAttrParam param){ + attrParam = param; + opName_ = name; +} + +atb::SVector GetCopyTensorStride(atb::Dims &tensorDims) +{ + atb::SVector tmpStrides(tensorDims.dimNum, 1); + if (tensorDims.dimNum > 8) { // 8: tensor最大维度数量 + printf("tensor's dimNum is larger than 8, GetCopyTensorStride failed."); + return tmpStrides; + } + for (int64_t i = static_cast(tensorDims.dimNum) - 2; i >= 0; i--) { + tmpStrides[i] = (tensorDims.dims[i + 1] * tmpStrides[i + 1]); + } + return tmpStrides; +} + +std::shared_ptr EyeOperation::CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx) +{ + auto aclnnTensor = std::make_shared(); + aclnnTensor->tensorIdx = static_cast(tensorIdx); + aclnnTensor->needUpdateTensorDataPtr = true; + aclnnTensor->atbTensor = atbTensor; + aclnnTensor->strides = GetCopyTensorStride(atbTensor.desc.shape); + + // 创建Aclnn tensor + aclnnTensor->tensor = aclCreateTensor(atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.desc.dtype, + aclnnTensor->strides.data(), + 0, + atbTensor.desc.format, + atbTensor.desc.shape.dims, + atbTensor.desc.shape.dimNum, + atbTensor.deviceData); + return aclnnTensor; +} + +atb::Status EyeOperation::UpdateAclnnVariantPack(const atb::VariantPack &variantPack) +{ + // 更新inTensor的device地址 + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + int ret = -1; + if (!aclInTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclInTensors_[i]->atbTensor = variantPack.inTensors.at(i); + ret = aclSetInputTensorAddr(aclExecutor_, + aclInTensors_[i]->tensorIdx, + aclInTensors_[i]->tensor, + aclInTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + printf("set input fail"); + return atb::ERROR_CANN_ERROR; + } + } + + // 更新outTensor的device地址 + for (size_t i = 0; i < aclOutTensors_.size(); ++i) { + int ret = -1; + if (!aclOutTensors_[i]->needUpdateTensorDataPtr) { + continue; + } + aclOutTensors_[i]->atbTensor = variantPack.outTensors.at(i); + ret = aclSetOutputTensorAddr(aclExecutor_, + aclOutTensors_[i]->tensorIdx, + aclOutTensors_[i]->tensor, + aclOutTensors_[i]->atbTensor.deviceData); + + if (ret != 0) { + printf("set output fail"); + return atb::ERROR_CANN_ERROR; + } + } + return atb::NO_ERROR; +} + +atb::Status EyeOperation::Setup(const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) { + + aclInTensors_.resize(GetInputNum()); + for (size_t i = 0; i < aclInTensors_.size(); ++i) { + auto aclnnTensor = CreateAclnnTensor(variantPack.inTensors.at(i), i); + if (aclnnTensor->tensor == nullptr) { + printf("creat input tensor %ld fail", i); + return atb::ERROR_INTERNAL_ERROR; + } + aclInTensors_[i] = aclnnTensor; + } + + auto ret = aclnnEyeGetWorkspaceSize(aclInTensors_.at(0)->tensor, + attrParam.num_rows, + attrParam.num_columns, + attrParam.batch_shape, + attrParam.dtype, + &workspaceSize_, + &aclExecutor_); + + workspaceSize = workspaceSize_; + return ret; + +} + +atb::Status EyeOperation::Execute(const atb::VariantPack &variantPack, uint8_t *workspace, uint64_t workspaceSize, atb::Context *context) { + + + + aclrtStream stream = context->GetExecuteStream(); + if (!stream) { + printf("get stream fail"); + return atb::ERROR_INVALID_PARAM; + } + // 更新数据传入的地址 + int ret = UpdateAclnnVariantPack(variantPack); + if (ret != 0) { + printf("UpdateAclnnVariantPack fail"); + return atb::ERROR_CANN_ERROR; + } + ret = aclnnEye(workspace, workspaceSize_, aclExecutor_, stream); + + return ret; +} + +atb::Status EyeOperation::InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const +{ + outTensorDesc.at(0) = inTensorDesc.at(0); + return atb::NO_ERROR; +} diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/aclnn_eye_operation.h b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/aclnn_eye_operation.h new file mode 100644 index 0000000000000000000000000000000000000000..b09697135291359df923108807606ad81e813f41 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/aclnn_eye_operation.h @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include "atb/infer_op_params.h" +#include + +struct EyeAttrParam +{ + uint64_t num_rows; + uint64_t num_columns = 0; + std::vector batchShape = {1}; + aclIntArray* batch_shape = aclCreateIntArray(batchShape.data(),batchShape.size()); + uint64_t dtype = 0; +}; + +struct AclnnTensor +{ +public: + atb::Tensor atbTensor; // + aclTensor *tensor = nullptr; + int tensorIdx = -1; // aclTensor在aclExecutor中的index + bool needUpdateTensorDataPtr = false; + atb::SVector strides = {}; +}; + +class EyeOperation: public atb::Operation{ +public: + EyeOperation(const std::string &name, EyeAttrParam param); + atb::Status Setup(const atb::VariantPack &variantPack, uint64_t &workspaceSize, atb::Context *context) override; + atb::Status Execute(const atb::VariantPack &variantPack, uint8_t *workspace, + uint64_t workspaceSize, atb::Context *context) override; + atb::Status InferShape( + const atb::SVector &inTensorDesc, atb::SVector &outTensorDesc) const; + std::shared_ptr CreateAclnnTensor(atb::Tensor atbTensor, size_t tensorIdx); + atb::Status UpdateAclnnVariantPack(const atb::VariantPack &variantPack); + + uint32_t GetInputNum() const + { + return 1; // 算子入参个数 + } + + uint32_t GetOutputNum() const + { + return 1; // 算子出参个数 + } + std::string GetName() const + { + return opName_; + } + + aclOpExecutor *aclExecutor_ = nullptr; + EyeAttrParam attrParam; + std::string opName_; + uint64_t workspaceSize_; + + atb::SVector> aclInTensors_; + atb::SVector> aclOutTensors_; +}; \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/build.sh b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..ba28cc1436ceca957cc344ed67dc6fcd2b7b9083 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationATBPlugin/build.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# 定义构建目录 +BUILD_DIR="build" + +# 创建构建目录 +mkdir -p "$BUILD_DIR" +cd "$BUILD_DIR" + +# 运行 CMake 配置和编译 +cmake .. +make + +# 查找生成的 .a 文件 +A_FILE=$(find . -name "*.a" -type f) + +# 检查是否找到了 .a 文件 +if [ -z "$A_FILE" ]; then + echo "未找到 .a 文件,编译可能失败。" + exit 1 +fi + +# 复制头文件到 /usr/include +HEADER_FILES=$(find .. -name "*.h" -type f) +for header in $HEADER_FILES; do + cp "$header" /usr/include/ +done + +# 复制 .a 文件到 /usr/local/lib +cp "$A_FILE" /usr/local/lib/ + +echo "构建完成,头文件和 .a 文件已复制到目标目录。" + diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/CMakeLists.txt b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9c2cec465e6a62ca1857e81b2be52c9c49cd0a8 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/CMakeLists.txt @@ -0,0 +1,40 @@ +cmake_minimum_required(VERSION 3.16) +project("test_model") +option(USE_CXX11_ABI "USE_CXX11_ABI" OFF) +set(CMAKE_BUILD_TYPE Debug) +if(USE_CXX11_ABI) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + +set(CUST_PKG_PATH "$ENV{ASCEND_HOME_PATH}/opp/vendors/customize/op_api") + +include_directories( + $ENV{ATB_HOME_PATH}/include + $ENV{ASCEND_HOME_PATH}/include + ${CUST_PKG_PATH}/include # 添加自定义算子头文件路径 + ${CMAKE_CURRENT_SOURCE_DIR} + ./src/ + ) + +message(".h path = ${CUST_PKG_PATH}/include") +link_directories( + $ENV{ATB_HOME_PATH}/lib + $ENV{ASCEND_HOME_PATH}/lib64 + ${CUST_PKG_PATH}/lib # 添加自定义算子so路径 + ) + + +link_libraries(atb_plugin_operations) +add_executable(test_model ./src/main.cpp) + +# 列出所有的头文件目录 +target_link_libraries(test_model PRIVATE atb ascendcl cust_opapi opapi nnopbase pthread) # 添加自定义算子so及适配so + + + + + diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/gen_data.py b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/gen_data.py new file mode 100644 index 0000000000000000000000000000000000000000..b0a8ba36f4ee1f21e7fe52a9786bf7f440933d87 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/gen_data.py @@ -0,0 +1,16 @@ +import torch +import os +def gen_golden_data_simple(): + dtype = "float32" + input1 = torch.zeros(3 , 4, 133, 4095, dtype=torch.float) + golden = torch.eye(133,4095, dtype=torch.float) + golden = golden.unsqueeze(0).unsqueeze(0) + golden = golden.repeat(3,4,1,1) + input1.numpy().tofile('./script/input/input0.bin') + golden.numpy().tofile("./script/output/golden0.bin") + + with open("./script/output/meta", "w") as fp: + print(dtype, file=fp) + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/run.sh b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..450b717f2afbc27a3a87cc5d476e7f22f3096f02 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash + + +# 步骤1: 运行gen_data.py生成输入bin文件和golden标杆输出数据 +echo "正在生成输入数据和golden标杆数据..." +mkdir -p script/input +mkdir -p script/output +python3 script/gen_data.py +if [ $? -ne 0 ]; then + echo "生成数据失败,脚本终止。" + exit 1 +fi + +# 步骤2: 创建构建目录并进入 +mkdir -p build +cd build +if [ $? -ne 0 ]; then + echo "无法进入构建目录,脚本终止。" + exit 1 +fi + +# 步骤3: 使用CMake配置项目 +echo "正在配置CMake项目..." +cmake .. +if [ $? -ne 0 ]; then + echo "CMake配置失败,脚本终止。" + exit 1 +fi + +# 步骤4: 编译代码 +echo "正在编译代码..." +make +if [ $? -ne 0 ]; then + echo "编译失败,脚本终止。" + exit 1 +fi + +mv test_model ../ +cd .. + +# 步骤5: 运行可执行文件生成实际输出文件 +echo "正在运行可执行文件生成实际输出..." +./test_model +if [ $? -ne 0 ]; then + echo "运行可执行文件失败,脚本终止。" + exit 1 +fi + +# 步骤6: 调用verify_result.py进行golden标杆数据和实际输出数据的比对 +echo "正在验证结果..." +python3 script/verify_result.py script/output/output_0.bin script/output/golden0.bin + diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/verify_result.py b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/verify_result.py new file mode 100644 index 0000000000000000000000000000000000000000..a0798d50aa2cbbf7537479774bc090a234a3cece --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/script/verify_result.py @@ -0,0 +1,25 @@ +import os +import sys +import numpy + +loss = 1e-3 # 容忍偏差,一般fp16要求绝对误差和相对误差均不超过千分之一 +minimum = 10e-10 + +def verify_result(real_result, golden): + real_result = numpy.fromfile(real_result, dtype=numpy.float32) # 从bin文件读取实际运算结果 + golden = numpy.fromfile(golden, dtype=numpy.float32) # 从bin文件读取预期运算结果 + print("=" * 50, real_result[:5], golden[:5], "=" * 50, sep='\n', end='\n', file=sys.stderr) + result = numpy.abs(real_result - golden) # 计算运算结果和预期结果偏差 + deno = numpy.maximum(numpy.abs(real_result), numpy.abs(golden)) # 获取最大值并组成新数组 + result_atol = numpy.less_equal(result, loss) # 计算绝对误差 + result_rtol = numpy.less_equal(result / numpy.add(deno, minimum), loss) # 计算相对误差 + if not result_rtol.all() and not result_atol.all(): + if numpy.sum(result_rtol == False) > real_result.size * loss and numpy.sum(result_atol == False) > real_result.size * loss: # 误差超出预期时返回打印错误,返回对比失败 + print("[ERROR] result error") + return False + print("test Operation success!") + return True + + +if __name__ == '__main__': + verify_result(sys.argv[1],sys.argv[2]) diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/src/main.cpp b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9075d32604122e11026d2eeec0aa562517759ccb --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/src/main.cpp @@ -0,0 +1,196 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "main.h" + +bool SetInputData(std::vector &inputData){ + char *xPath = "./script/input/input0.bin"; + InputData inputX; + inputX.data = ReadBinFile(xPath,inputX.size); + inputData.push_back(inputX); + return true; +} + +bool SetOperationInputDesc(atb::SVector &intensorDescs){ + atb::TensorDesc xDesc; + xDesc.dtype = ACL_FLOAT; + xDesc.format = ACL_FORMAT_ND; + xDesc.shape.dimNum = 4; + xDesc.shape.dims[0] = 3; + xDesc.shape.dims[1] = 4; + xDesc.shape.dims[2] = 133; + xDesc.shape.dims[3] = 4095; + intensorDescs.at(0) = xDesc; +} + + + +static void SetCurrentDevice() +{ + const int deviceId = 0; + std::cout << "[INFO]: aclrtSetDevice " << deviceId << std::endl; + int ret = aclrtSetDevice(deviceId); + if (ret != 0) { + std::cout << "[ERROR]: aclrtSetDevice fail, error:" << ret << std::endl; + return; + } + std::cout << "[INFO]: aclrtSetDevice success" << std::endl; +} + + +static void FreeTensor(atb::Tensor &tensor) +{ + if (tensor.deviceData) { + int ret = aclrtFree(tensor.deviceData); + if (ret != 0) { + std::cout << "[ERROR]: aclrtFree fail" << std::endl; + } + tensor.deviceData = nullptr; + tensor.dataSize = 0; + } + if (tensor.hostData) { + int ret = aclrtFreeHost(tensor.hostData); + if (ret != 0) { + std::cout << "[ERROR]: aclrtFreeHost fail, ret = " << ret << std::endl; + } + tensor.hostData = nullptr; + tensor.dataSize = 0; + } +} + +static void FreeTensors(atb::SVector &inTensors) +{ + for (size_t i = 0; i < inTensors.size(); ++i) { + FreeTensor(inTensors.at(i)); + } +} +bool SaveMemoryToBinFile(void* memoryAddress, size_t memorySize, size_t i) { + // 创建 output 目录(如果不存在) + std::filesystem::create_directories("output"); + + // 生成文件名 + std::string filename = "script/output/output_" + std::to_string(i) + ".bin"; + + // 打开文件以二进制写入模式 + std::ofstream file(filename, std::ios::binary); + if (!file) { + std::cerr << "无法打开文件: " << filename << std::endl; + return false; + } + + // 写入数据 + file.write(static_cast(memoryAddress), memorySize); + if (!file) { + std::cerr << "写入文件时出错: " << filename << std::endl; + file.close(); + return false; + } + + // 关闭文件 + file.close(); + std::cout << "数据已成功保存到: " << filename << std::endl; + return true; +} + +int main(int argc, const char *argv[]) +{ + const int deviceId = 0; + std::cout << "[INFO]: aclrtSetDevice " << deviceId << std::endl; + int ret = aclrtSetDevice(deviceId); + if (ret != 0) { + std::cout << "[ERROR]: aclrtSetDevice fail, error:" << ret << std::endl; + return 1; + } + std::cout << "[INFO]: aclrtSetDevice success" << std::endl; + atb::Context *context = nullptr; + ret = atb::CreateContext(&context); + void *stream = nullptr; + ret = aclrtCreateStream(&stream); + if (ret != 0) { + std::cout << "[ERROR]: AsdRtStreamCreate fail, ret:" << ret << std::endl; + return 1; + } + context->SetExecuteStream(stream); + + std::vector input; + SetInputData(input); + + EyeAttrParam eyeAttrParam; + eyeAttrParam.num_rows = 133; + eyeAttrParam.num_columns = 4095; + + std::vector batchShape = {3,4}; + eyeAttrParam.batch_shape = aclCreateIntArray(batchShape.data(),batchShape.size()); + eyeAttrParam.dtype = 0; + EyeOperation *op = new EyeOperation("Eye",eyeAttrParam); + std::cout << "[INFO]: complete CreateOp!" << std::endl; + + if(input.size() != op->GetInputNum()) std::cout << "[ERROR]: Operation actual input num is not equal to GetInputNum()"; + + atb::SVector intensorDescs; + atb::SVector outtensorDescs; + intensorDescs.resize(op->GetInputNum()); + outtensorDescs.resize(op->GetOutputNum()); + SetOperationInputDesc(intensorDescs); + atb::Status st = op->InferShape(intensorDescs,outtensorDescs); + if (st != 0) { + std::cout << "[ERROR]: Operation InferShape fail" << std::endl; + return -1; + } + std::cout << "[INFO]: Operation InferShape success" << std::endl; + atb::VariantPack variantPack; + variantPack.inTensors.resize(op->GetInputNum()); + for(size_t i=0;iGetInputNum();i++){ + variantPack.inTensors.at(i).desc = intensorDescs.at(i); + variantPack.inTensors.at(i).hostData = input[i].data; + variantPack.inTensors.at(i).dataSize = input[i].size; + CheckAcl(aclrtMalloc(&variantPack.inTensors.at(i).deviceData, input[i].size, ACL_MEM_MALLOC_HUGE_FIRST)); + CheckAcl(aclrtMemcpy(variantPack.inTensors.at(i).deviceData, input[i].size, input[i].data, input[i].size, ACL_MEMCPY_HOST_TO_DEVICE)); + } + std::cout << "[INFO]: Operation Input prepare sucess" << std::endl; + + uint64_t workspaceSize = 0; + st = op->Setup(variantPack, workspaceSize, context); + if (st != 0) { + std::cout << "[ERROR]: Operation setup fail" << std::endl; + return -1; + } + std::cout << "[INFO]: Operation setup success" << std::endl; + void *workspace = nullptr; + if (workspaceSize > 0) { + aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); + } + + std::cout << "[INFO]: Operation execute start" << std::endl; + st = op->Execute(variantPack, (uint8_t*)workspace, workspaceSize, context); + if (st != 0) { + std::cout << "[ERROR]: Operation execute fail" << std::endl; + return -1; + } + ret = aclrtSynchronizeStream(stream); + std::cout << "[INFO]: Operation execute success" << std::endl; + + CheckAcl(aclrtMemcpy(variantPack.inTensors.at(0).hostData, variantPack.inTensors.at(0).dataSize, variantPack.inTensors.at(0).deviceData, + variantPack.inTensors.at(0).dataSize, ACL_MEMCPY_DEVICE_TO_HOST)); + SaveMemoryToBinFile(variantPack.inTensors.at(0).hostData,variantPack.inTensors.at(0).dataSize,0); + + + FreeTensors(variantPack.inTensors); + st = atb::DestroyContext(context); + CheckAcl(aclrtDestroyStream(stream)); + CheckAcl(aclrtResetDevice(0)); + CheckAcl(aclFinalize()); + return atb::ErrorType::NO_ERROR; +} diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/src/main.h b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/src/main.h new file mode 100644 index 0000000000000000000000000000000000000000..015c779c3092de6e661462edb5b6d3d7b498e7dd --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/EyeOperationTest/src/main.h @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include +#include +#include "securec.h" +#include "atb/atb_infer.h" +#include "aclnn_eye_operation.h" +#include + +struct InputData{ + void* data; + uint64_t size; +}; +aclError CheckAcl(aclError ret) +{ + if (ret != ACL_ERROR_NONE) { + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << ret << std::endl; + } + return ret; +} +void* ReadBinFile(const char* filename, size_t& size) { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file) { + std::cerr << "无法打开文件: " << filename << std::endl; + return nullptr; + } + + // 获取文件大小 + size = file.tellg(); + file.seekg(0, std::ios::beg); + + // 分配内存 + void* buffer; + int ret = aclrtMallocHost(&buffer,size); + if (!buffer) { + std::cerr << "内存分配失败" << std::endl; + file.close(); + return nullptr; + } + + // 读取文件内容到内存 + file.read(static_cast(buffer), size); + if (!file) { + std::cerr << "读取文件失败" << std::endl; + delete[] static_cast(buffer); + file.close(); + return nullptr; + } + + file.close(); + return buffer; +} \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/26_eye/ATBInvocation/readme.md b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..8a9d273f5a4f2523a62e3ef4dd906a7ea1bfdc4f --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/ATBInvocation/readme.md @@ -0,0 +1,81 @@ +## 概述 + +本样例基于AscendC自定义[Eye](https://gitee.com/ascend/cann-ops/tree/master/src/math/eye)算子,开发了ATB插件并进行了插件调用测试. + +## 项目结构介绍 + +``` +├── EyeOperationATBPlugin //EyeOperation ATB插件代码 + +├── EyeOperationTest //EyeOperation 测试代码 +``` + +## 样例运行 + +### Eye AscendC自定义算子部署 + +参照cann-ops仓[eye算子](https://gitee.com/ascend/cann-ops/tree/master/src/math/eye)" **算子包编译部署** "章节 + +### EyeOperation ATB插件部署 + +- 运行编译脚本完成部署(脚本会生成静态库.a文件,同时将头文件拷贝到/usr/include,.a文件拷贝到/usr/local/lib下) + + ``` + cd EyeOperationATBPlugin + bash build.sh + ``` + +### EyeOperation测试 + +- 运行脚本完成算子测试 + + ```shell + cd EyeOperationTest + bash run.sh + ``` + +## EyeOperation算子介绍 + +### 功能 + +实现两个输入张量相加 + +### 定义 + +``` +struct EyeAttrParam +{ + uint64_t num_rows; + uint64_t num_columns = 0; + std::vector batchShape = {1}; + aclIntArray* batch_shape = aclCreateIntArray(batchShape.data(),batchShape.size()); + uint64_t dtype = 0; +}; +``` + +### 参数列表 + +| **成员名称** | 类型 | 默认值 | 取值范围 | **描述** | 是否必选 | +| ------------ | ------------ | ------ | -------- | ------------------------- | -------- | +| num_rows | uint64_t | / | / | 生成的矩阵的行数 | 是 | +| num_columns | uint64_t | 0 | / | 生成的矩阵的列数 | 是 | +| batch_shape | aclIntArray* | {1} | - | | 是 | +| dtype | uint64_t | 0 | 0,1 | 0表示float32,1表示float16 | 是 | + + + +### 输入 + +| **参数** | **维度** | **数据类型** | **格式** | 描述 | +| -------- | -------------------------- | --------------- | -------- | ---------------------------------------- | +| y | [dim_0,dim_1,...,dim_n] | float16/float32 | ND | 输出tensor。数据类型和shape与x保持一致。 | + +### 输出 + +| **参数** | **维度** | **数据类型** | **格式** | 描述 | +| -------- | -------------------------- | --------------- | -------- | ---------------------------------------- | +| y | [dim_0,dim_1,...,dim_n] | float16/float32 | ND | 输出tensor。数据类型和shape与x保持一致。 | + +### 规格约束 + +暂无 \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/26_eye/EyeCustom/README.md b/operator/ascendc/4_best_practices/26_eye/EyeCustom/README.md new file mode 100644 index 0000000000000000000000000000000000000000..140123bcbaee5020618c1bb59904d3177c68e0d4 --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/EyeCustom/README.md @@ -0,0 +1,3 @@ +### 概述 + +具体算子工程,编译部署请见[Eye](https://gitee.com/ascend/cann-ops/tree/master/src/math/eye) \ No newline at end of file diff --git a/operator/ascendc/4_best_practices/26_eye/README.md b/operator/ascendc/4_best_practices/26_eye/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30ef01ac2d543a751c5bf02cd4f4dbfa59b89f6f --- /dev/null +++ b/operator/ascendc/4_best_practices/26_eye/README.md @@ -0,0 +1,70 @@ +### 概述 + +本样例介基于AddCustom算子工程,介绍了单算子工程以及ATB插件调用方法 + +### 目录结构介绍 + +``` +├── 24_eye // 使用框架调用的方式调用AllGatherMatmul自定义算子 +│ ├── ATBInvocation // 通过ATB调用的方式调用 +│ ├── Eye // AddCustom算子工程,引用cann-ops仓的AddCustom工程 +``` + +### 算子描述 + +创建一个二维矩阵 m×nm×n ,对角元素全为1,其它元素都为0 + +### 算子规格描述 + + + + + + + + + + + + + + + + + + + + + + + + + + + +
算子类型(OpType)Eye
nameshapedata typeformat默认值
算子输入y-float32, float16, int32ND\
算子输出y-float32, float16, int32ND\
attr属性num_rows\int\\
num_columns\int\0
batch_shape\list_int\{1}
dtype\int\0
核函数名eye
+ + +### 支持的产品型号 + +本样例支持如下产品型号: + +- Atlas 训练系列产品 +- Atlas 推理系列产品 +- Atlas A2训练系列产品 +- Atlas 800I A2推理产品 +- Atlas 200I/500 A2推理产品 + +### 算子编译部署 + +- [Eye编译部署(需要下载cann-ops仓)](https://gitee.com/ascend/cann-ops/tree/master/src/math/eye) + +### 算子ATB插件调用 + +- [EyeOperation编译调用](https://gitee.com/youmoxiao/samples/tree/atb_op/operator/ascendc/4_best_practices/25_eye/ATBInvocation) + +## 更新说明 + +| 时间 | 更新事项 | +| -------- | -------- | +| 2025/4/1 | 新增样例 | \ No newline at end of file