From 7af31c0b89d2647b014e382ba41b9b2b087cd193 Mon Sep 17 00:00:00 2001 From: Hanglei Zhang Date: Sat, 23 Nov 2024 10:19:18 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=87=8F=E5=8C=96?= =?UTF-8?q?=E5=8F=8D=E9=87=8F=E5=8C=96main=E5=87=BD=E6=95=B0bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quantization/dequant/README.md | 68 ++++ .../host_tiling/dequant_custom_tiling.h | 41 +++ .../dequant/kernel_impl/dequant_custom.h | 96 ++++++ .../CMakeLists.txt | 72 ++++ .../kernel_launch_method_by_direct/README.md | 52 +++ .../cmake/cpu_lib.cmake | 26 ++ .../cmake/npu_lib.cmake | 19 ++ .../dequant_custom.cpp | 43 +++ .../dequant_custom_tiling.cpp | 30 ++ .../kernel_launch_method_by_direct/main.cpp | 173 ++++++++++ .../kernel_launch_method_by_direct/run.sh | 52 +++ .../scripts/gen_data.py | 33 ++ .../CMakeLists.txt | 69 ++++ .../CMakePresets.json | 63 ++++ .../README.md | 81 +++++ .../build.sh | 76 +++++ .../cmake/config.cmake | 25 ++ .../cmake/func.cmake | 192 +++++++++++ .../cmake/intf.cmake | 26 ++ .../cmake/makeself.cmake | 17 + .../op_host/CMakeLists.txt | 82 +++++ .../op_host/dequant_custom.cpp | 75 +++++ .../op_host/dequant_custom_tiling.h | 41 +++ .../op_kernel/CMakeLists.txt | 69 ++++ .../op_kernel/dequant_custom.cpp | 26 ++ .../scripts/gen_data.py | 33 ++ .../scripts/help.info | 1 + .../scripts/install.sh | 318 ++++++++++++++++++ .../scripts/upgrade.sh | 151 +++++++++ .../testcases/CMakeLists.txt | 2 + .../testcases/cmake/fun.cmake | 53 +++ .../testcases/npu/CMakeLists.txt | 10 + .../testcases/npu/dequant_custom_main.cpp | 187 ++++++++++ examples/quantization/quant/README.md | 69 ++++ .../quant/host_tiling/quant_custom_tiling.h | 37 ++ .../quant/kernel_impl/quant_custom.h | 75 +++++ .../CMakeLists.txt | 72 ++++ .../kernel_launch_method_by_direct/README.md | 52 +++ .../cmake/cpu_lib.cmake | 26 ++ .../cmake/npu_lib.cmake | 19 ++ .../kernel_launch_method_by_direct/main.cpp | 159 +++++++++ .../quant_custom.cpp | 40 +++ .../quant_custom_tiling.cpp | 30 ++ .../kernel_launch_method_by_direct/run.sh | 52 +++ .../scripts/gen_data.py | 29 ++ .../CMakeLists.txt | 69 ++++ .../CMakePresets.json | 63 ++++ .../README.md | 81 +++++ .../build.sh | 76 +++++ .../cmake/config.cmake | 25 ++ .../cmake/func.cmake | 192 +++++++++++ .../cmake/intf.cmake | 26 ++ .../cmake/makeself.cmake | 17 + .../op_host/CMakeLists.txt | 82 +++++ .../op_host/quant_custom.cpp | 69 ++++ .../op_host/quant_custom_tiling.h | 37 ++ .../op_kernel/CMakeLists.txt | 69 ++++ .../op_kernel/quant_custom.cpp | 26 ++ .../scripts/gen_data.py | 29 ++ .../scripts/help.info | 1 + .../scripts/install.sh | 318 ++++++++++++++++++ .../scripts/upgrade.sh | 151 +++++++++ .../testcases/CMakeLists.txt | 2 + .../testcases/cmake/fun.cmake | 53 +++ .../testcases/npu/CMakeLists.txt | 10 + .../testcases/npu/quant_custom_main.cpp | 175 ++++++++++ 66 files changed, 4533 insertions(+) create mode 100644 examples/quantization/dequant/README.md create mode 100644 examples/quantization/dequant/host_tiling/dequant_custom_tiling.h create mode 100644 examples/quantization/dequant/kernel_impl/dequant_custom.h create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/README.md create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/run.sh create mode 100644 examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/README.md create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/build.sh create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt create mode 100644 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp create mode 100644 examples/quantization/quant/README.md create mode 100644 examples/quantization/quant/host_tiling/quant_custom_tiling.h create mode 100644 examples/quantization/quant/kernel_impl/quant_custom.h create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/README.md create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/main.cpp create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/run.sh create mode 100644 examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/README.md create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/build.sh create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt create mode 100644 examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp diff --git a/examples/quantization/dequant/README.md b/examples/quantization/dequant/README.md new file mode 100644 index 00000000..ba4c0a9d --- /dev/null +++ b/examples/quantization/dequant/README.md @@ -0,0 +1,68 @@ + + +## 概述 + +本样例介绍了调用AscendDequant高阶API实现反量化dequant单算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 + +- 直调:使用核函数直调dequant自定义算子。 + + 核函数的基础调用方式,开发者完成算子核函数的开发和Tiling实现后,即可通过AscendCL运行时接口,完成算子的调用。 + +- 框架调用:使用框架调用dequant自定义算子。 + + 按照工程创建->算子实现->编译部署>算子调用的流程完成算子开发。整个过程都依赖于算子工程:基于工程代码框架完成算子核函数的开发和Tiling实现,通过工程编译脚本完成算子的编译部署,继而实现单算子调用或第三方框架中的算子调用。 + +本样例中包含如下调用方式: + +| 调用方式 | 目录 | **描述** | +| --------- | ------------------------------------------------------------ | ---------------------------------------------------------- | +| 直调 | [kernel_launch_method_by_direct](./kernel_launch_method_by_direct) | host侧的核函数调用程序,包含CPU侧、NPU侧、仿真侧三种运行验证方法。 | +| 框架调用 | [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用dequant算子。 | + +## 样例支持的产品型号为: +- Atlas A2训练系列产品/Atlas 800I A2推理产品 +- Atlas推理系列产品(Ascend 310P处理器)AI Core + +## 目录结构 + +| 目录 | 描述 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| [kernel_launch_method_by_direct](./kernel_launch_method_by_direct) | 通过kernel直调的方式调用自定义算子工程样例目录 | +| [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用自定义算子工程样例目录 | +| [host_tiling](./host_tiling) | 本样例tiling代码实现 | +| [kernel_impl](./kernel_impl) | 本样例kernel侧代码实现 | + +## 算子描述 + +dequant单算子,对输入tensor做反量化计算。 + +dequant算子规格: + + + + + + + + + + + + + + +
算子类型(OpType)DequantCustom
算子输入
nameshapedata typeformat
src4*8int_32ND
deq_scale8floatND
算子输出
dst4*8floatND
核函数名dequant_custom
+ +## 算子实现介绍 + +本样例实现了dequant算子,其中固定shape输入为src[4,8], deq_scale[8],输出为dst[4,8]。 + +- kernel实现 + + 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendDequant高阶API接口完成dequant计算,得到最终结果,再搬出到外部存储上。 + + dequant算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm与deq_scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行dequant计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 + +- tiling实现 + + dequant算子的tiling实现流程如下:首先获取AscendDequant接口能完成计算所需最大/最小临时空间大小,根据该范围结合实际的内存使用情况设置合适的空间大小,然后根据输入长度dataLength确定所需tiling参数。 \ No newline at end of file diff --git a/examples/quantization/dequant/host_tiling/dequant_custom_tiling.h b/examples/quantization/dequant/host_tiling/dequant_custom_tiling.h new file mode 100644 index 00000000..f1cde965 --- /dev/null +++ b/examples/quantization/dequant/host_tiling/dequant_custom_tiling.h @@ -0,0 +1,41 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_TILING_H +#define EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_TILING_H +#include "register/tilingdata_base.h" +#include "tiling/tiling_api.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(DequantCustomTilingData) + TILING_DATA_FIELD_DEF(uint32_t, m); + TILING_DATA_FIELD_DEF(uint32_t, n); + TILING_DATA_FIELD_DEF(uint32_t, calCount); + TILING_DATA_FIELD_DEF(uint32_t, sharedTmpBufferSize); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(DequantCustom, DequantCustomTilingData) +} // namespace optiling + +void ComputeTiling(const uint32_t m, const uint32_t n, const uint32_t calCount, optiling::DequantCustomTilingData &tiling){ + std::vector shapeVec = {m, n}; + ge::Shape srcShape(shapeVec); + uint32_t typeSize = 4; + uint32_t maxTmpSize; + uint32_t minTmpSize; + AscendC::GetAscendDequantMaxMinTmpSize(srcShape, typeSize, maxTmpSize, minTmpSize); + uint32_t localWorkspaceSize = minTmpSize; + tiling.set_m(m); + tiling.set_n(n); + tiling.set_calCount(calCount); + tiling.set_sharedTmpBufferSize(localWorkspaceSize); +} + +#endif // EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_TILING_H diff --git a/examples/quantization/dequant/kernel_impl/dequant_custom.h b/examples/quantization/dequant/kernel_impl/dequant_custom.h new file mode 100644 index 00000000..66eb781e --- /dev/null +++ b/examples/quantization/dequant/kernel_impl/dequant_custom.h @@ -0,0 +1,96 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_H +#define EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_H +#include "kernel_operator.h" + +#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) + +namespace MyCustomKernel { +struct VecTiling{ + uint32_t m; + uint32_t n; + uint32_t calCount; + uint32_t sharedTmpBufferSize; +}; + +template class KernelDequant { +public: + __aicore__ inline KernelDequant() {} + __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR deqScaleGm, uint32_t m, uint32_t n, uint32_t calCount) + { + rowLen = m; + colLen = n; + dataSize = m*n; + scaleSize = calCount; + + srcGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ int32_t *>(srcGm), dataSize); + deqScaleGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ scaleT *>(deqScaleGm), scaleSize); + dstGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ dstT *>(dstGm), dataSize); + + pipe.InitBuffer(inQueueX, 1, dataSize * sizeof(int32_t)); + pipe.InitBuffer(inQueueDeqScale, 1, scaleSize * sizeof(scaleT)); + pipe.InitBuffer(outQueue, 1, dataSize * sizeof(dstT)); + } + __aicore__ inline void Process() + { + CopyIn(); + Compute(); + CopyOut(); + } + +private: + __aicore__ inline void CopyIn() + { + AscendC::LocalTensor srcLocal = inQueueX.AllocTensor(); + AscendC::DataCopy(srcLocal, srcGlobal, dataSize); + inQueueX.EnQue(srcLocal); + AscendC::LocalTensor deqScaleLocal = inQueueDeqScale.AllocTensor(); + AscendC::DataCopy(deqScaleLocal, deqScaleGlobal, scaleSize); + inQueueDeqScale.EnQue(deqScaleLocal); + } + __aicore__ inline void Compute() + { + AscendC::LocalTensor dstLocal = outQueue.AllocTensor(); + AscendC::LocalTensor srcLocal = inQueueX.DeQue(); + AscendC::LocalTensor deqScaleLocal = inQueueDeqScale.DeQue(); + AscendC::AscendDequant(dstLocal, srcLocal, deqScaleLocal, {rowLen, colLen, deqScaleLocal.GetSize()}); + outQueue.EnQue(dstLocal); + inQueueX.FreeTensor(srcLocal); + inQueueDeqScale.FreeTensor(deqScaleLocal); + } + __aicore__ inline void CopyOut() + { + AscendC::LocalTensor dstLocal = outQueue.DeQue(); + AscendC::DataCopy(dstGlobal, dstLocal, dataSize); + outQueue.FreeTensor(dstLocal); + } + +private: + AscendC::GlobalTensor srcGlobal; + AscendC::GlobalTensor deqScaleGlobal; + AscendC::GlobalTensor dstGlobal; + AscendC::TPipe pipe; + AscendC::TQue inQueueX; + AscendC::TQue inQueueDeqScale; + AscendC::TQue outQueue; + uint32_t dataSize = 0; + uint32_t scaleSize = 0; + uint32_t rowLen = 0; + uint32_t colLen = 0; +}; + +} + +#endif // EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_H + + + diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt new file mode 100644 index 00000000..31c63a8a --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt @@ -0,0 +1,72 @@ +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) +if(${RUN_MODE}) + set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +endif() +if (${SOC_VERSION}) + set(SOC_VERSION "Ascend910" CACHE STRING "system on chip type") +endif() + +set(ASCEND_CANN_PACKAGE_PATH "/home/ma-user/work/Ascend/ascend-toolkit/latest" CACHE STRING "ASCEND CANN package installation directory") +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() + +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +file(GLOB KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/dequant_custom.cpp +) +set(CUSTOM_ASCEND310P_LIST "Ascend310P1" "Ascend310P3") + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() + +add_executable(dequant_direct_kernel_op + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dequant_custom_tiling.cpp +) + +target_compile_options(dequant_direct_kernel_op PRIVATE + $:-g>> + -O2 + -std=c++17 + -D_GLIBCXX_USE_CXX11_ABI=0 +) + +target_compile_definitions(dequant_direct_kernel_op PRIVATE + $<$>:CUSTOM_ASCEND310P> +) + +target_include_directories(dequant_direct_kernel_op PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + $:${ASCEND_CANN_PACKAGE_PATH}/include>> + $:${ASCEND_CANN_PACKAGE_PATH}/runtime/include>> +) + +target_link_libraries(dequant_direct_kernel_op PRIVATE + $,$>:host_intf_pub>> + $:tikicpulib::${SOC_VERSION}>> + $:ascendcl>> + $:c_sec>> + ascendc_kernels_${RUN_MODE} + tiling_api + register + platform + ascendalog + dl + graph_base +) + +install(TARGETS dequant_direct_kernel_op + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md new file mode 100644 index 00000000..75bc9271 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md @@ -0,0 +1,52 @@ + + +## 概述 + +本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为src[4,8], deq_scale[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 + +## 目录结构介绍 +| 目录及文件 | 描述 | +|---------------------|----------------------| +| [cmake](./cmake) | 编译工程文件 | +| [scripts](./scripts) | 包含输入数据和真值数据生成脚本文件 | +| main.cpp | 主函数,调用算子的应用程序,含CPU域及NPU域调用 | +| dequant_custom.cpp | 算子kernel实现 | +| dequant_custom_tiling.cpp | 算子tiling实现 | +| run.sh | 编译执行脚本 | +| CMakeLists.txt | 编译工程文件 | + + +## 编译运行样例 + + - 配置环境变量 + + 这里的\$ASCEND_CANN_PACKAGE_PATH需要替换为CANN包的存储路径。例如:/usr/local/Ascend/ascend-toolkit/latest + ``` + export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH + ``` + 若执行sim仿真,可自行配置仿真日志文件目录,默认仿真日志会在build目录下生成。若需要详细了解sim仿真相关内容,请参考[《AscendC算子调测工具》](https://hiascend.com/document/redirect/CannCommunityToolAscendebug)中的 调测功能说明 > Simulator性能仿真功能 > CAModel性能仿真 章节。 + ``` + # 设置仿真模式日志生成目录(可选),需要自行确保设置的目录已存在。若设置为相对路径下的目录,则以程序执行时的目录作为当前目录。例如,执行如下设置时,需要确保./目录下存在xxx目录 + export CAMODEL_LOG_PATH=./xxx + ``` + + - 生成输入和真值 + ``` + python3 scripts/gen_data.py + ``` + + - 编译执行 + + ``` + bash run.sh -r [RUN_MODE] -v [SOC_VERSION] + ``` + 其中cmake参数说明如下: + - RUN_MODE :编译方式,可选择CPU调试,NPU仿真,NPU上板。支持参数为[cpu / sim/ npu] + - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下参数取值(xxx请替换为具体取值): + - Atlas 推理系列产品(Ascend 310P处理器)参数值:Ascend310P1、Ascend310P3 + - Atlas A2训练系列产品参数值:AscendxxxB1、AscendxxxB2、AscendxxxB3、AscendxxxB4 + + 示例如下: + ``` + bash run.sh -r cpu -v Ascend310P1 + ``` diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake b/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake new file mode 100644 index 00000000..693f15ac --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake @@ -0,0 +1,26 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels_${RUN_MODE} SHARED + ${KERNEL_FILES} +) + +target_link_libraries(ascendc_kernels_${RUN_MODE} PRIVATE + tikicpulib::${SOC_VERSION} +) + +target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$>:CUSTOM_ASCEND310P> +) + +target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE + -g + -O0 + -std=c++17 +) + +install(TARGETS ascendc_kernels_${RUN_MODE} +DESTINATION ${CMAKE_INSTALL_LIBDIR} +) \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake b/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake new file mode 100644 index 00000000..afdb61f5 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake @@ -0,0 +1,19 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +ascendc_library(ascendc_kernels_${RUN_MODE} STATIC + ${KERNEL_FILES} +) + +ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$>:CUSTOM_ASCEND310P> + -DASCENDC_DUMP + -DHAVE_WORKSPACE + -DHAVE_TILING + ) \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp b/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp new file mode 100644 index 00000000..4d5786a3 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "kernel_operator.h" +#include "../kernel_impl/dequant_custom.h" + + +__aicore__ inline void CopyTiling(MyCustomKernel::VecTiling* tiling, GM_ADDR tilingGM) +{ + uint32_t* ptr = reinterpret_cast(tiling); + auto tiling32 = reinterpret_cast<__gm__ uint32_t*>(tilingGM); + + for (uint32_t i = 0; i < sizeof(MyCustomKernel::VecTiling) / sizeof(uint32_t); i++, ptr++) { + *ptr = *(tiling32 + i); + } + + return; +} + +extern "C" __global__ __aicore__ void dequant_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR deqScaleGm, GM_ADDR workspace, GM_ADDR tiling) +{ + MyCustomKernel::KernelDequant op; + MyCustomKernel::VecTiling tilingData; + CopyTiling(&tilingData, tiling); + op.Init(srcGm, dstGm, deqScaleGm, tilingData.m, tilingData.n, tilingData.calCount); + op.Process(); +} + +#ifndef ASCENDC_CPU_DEBUG +// call of kernel function +void dequant_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *srcGm, uint8_t *dstGm, uint8_t *deqScaleGm, + uint8_t *workspace, uint8_t *tiling) +{ + dequant_custom<<>>(srcGm, dstGm, deqScaleGm, workspace, tiling); +} +#endif \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp b/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp new file mode 100644 index 00000000..1ea63939 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp @@ -0,0 +1,30 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include +#include +#include +#include +#include +#include "tiling/tiling_api.h" +#include "../host_tiling/dequant_custom_tiling.h" + +uint8_t *GetTilingBuf(optiling::DequantCustomTilingData *tilingData) { + uint32_t tilingSize = sizeof(optiling::DequantCustomTilingData); + uint8_t *buf = (uint8_t *)malloc(tilingSize); + tilingData->SaveToBuffer(buf, tilingSize); + return buf; +} + +uint8_t* GenerateTiling(uint32_t m, uint32_t n, uint32_t scaleSize){ + optiling::DequantCustomTilingData tiling; + ComputeTiling(m, n, scaleSize, tiling); + return GetTilingBuf(&tiling); +} \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp b/examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp new file mode 100644 index 00000000..cad80480 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp @@ -0,0 +1,173 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "../../../common/data_utils.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +extern void dequant_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *srcGm, uint8_t *dstGm, + uint8_t *deqScaleGm, uint8_t *workspace, uint8_t *tiling); +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void dequant_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR deqScaleGm, + GM_ADDR workspace, GM_ADDR tiling); +#endif + +constexpr uint32_t BLOCK_DIM = 1; +constexpr uint32_t M = 4; +constexpr uint32_t N = 8; +constexpr uint32_t CAL_COUNT = 8; +constexpr uint32_t SCALE_SIZE = 8; +constexpr uint32_t TILINGDATA_SIZE = 4; +constexpr uint32_t WORKSPACE_SIZE = 16*512*512; + +extern uint8_t *GenerateTiling(uint32_t m, uint32_t n, uint32_t calCount); + +static bool CompareResult(const void *outputData, int64_t outSize) +{ + void *goldenData; +#ifdef ASCENDC_CPU_DEBUG + goldenData = (uint8_t *)AscendC::GmAlloc(outSize); +#else + CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize)); +#endif + size_t goldenSize = outSize; + bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize); + if (ret) { + printf("ReadFile golden.bin success!\n"); + } else { + printf("test failed!\n"); + return false; + } + constexpr float EPS = 1e-5; + int64_t wrongNum = 0; + + for (int i = 0; i < outSize / sizeof(float); i++) { + float a = (reinterpret_cast(outputData))[i]; + float b = (reinterpret_cast(goldenData))[i]; + float ae = std::abs(a - b); + float re = ae / abs(b); + if (ae > EPS && re > EPS) { + printf("CompareResult golden.bin failed. Output[%d] is %lf, golden is %lf\n", i, a, b); + wrongNum++; + } + } +#ifdef ASCENDC_CPU_DEBUG + AscendC::GmFree((void *)goldenData); +#else + CHECK_ACL(aclrtFreeHost(goldenData)); +#endif + if (wrongNum != 0) { + return false; + } else { + printf("CompareResult golden.bin success!\n"); + return true; + } +} + +int32_t main(int32_t argc, char *argv[]) +{ + uint32_t blockDim = BLOCK_DIM; + size_t m = M; + size_t n = N; + size_t calCount = CAL_COUNT; + size_t scaleSize = SCALE_SIZE * sizeof(float); + size_t inpSize = M * N * sizeof(int32_t); + size_t outSize = M * N * sizeof(float); + size_t tilingFileSize = TILINGDATA_SIZE * sizeof(uint32_t); + size_t workspaceSize = WORKSPACE_SIZE; + +#ifdef ASCENDC_CPU_DEBUG + uint8_t *input = (uint8_t *)AscendC::GmAlloc(inpSize); + uint8_t *scale = (uint8_t *)AscendC::GmAlloc(scaleSize); + uint8_t *output = (uint8_t *)AscendC::GmAlloc(outSize); + uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingFileSize); + uint8_t *workspace = (uint8_t *)AscendC::GmAlloc(workspaceSize); + + ReadFile("../input/input.bin", inpSize, input, inpSize); + ReadFile("../input/scale.bin", scaleSize, scale, scaleSize); + memcpy_s(tiling, tilingFileSize, GenerateTiling(m,n,calCount), tilingFileSize); + AscendC::SetKernelMode(KernelMode::AIV_MODE); + + ICPU_RUN_KF(dequant_custom, blockDim, input, output, scale, workspace, tiling); // use this macro for cpu debug + + WriteFile("../output/output.bin", output, outSize); + bool goldenResult = CompareResult(output, outSize); + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + + AscendC::GmFree((void *)input); + AscendC::GmFree((void *)scale); + AscendC::GmFree((void *)output); + AscendC::GmFree((void *)tiling); + AscendC::GmFree((void *)workspace); +#else + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *inputHost, *outputHost, *scaleHost, *workspaceHost, *tilingHost; + uint8_t *inputDevice, *outputDevice, *scaleDevice, *workspaceDevice, *tilingDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&inputHost), inpSize)); + CHECK_ACL(aclrtMallocHost((void **)(&outputHost), outSize)); + CHECK_ACL(aclrtMallocHost((void **)(&scaleHost), scaleSize)); + CHECK_ACL(aclrtMallocHost((void **)(&workspaceHost), workspaceSize)); + CHECK_ACL(aclrtMallocHost((void **)(&tilingHost), tilingFileSize)); + CHECK_ACL(aclrtMalloc((void **)&inputDevice, inpSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&outputDevice, outSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&scaleDevice, scaleSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&workspaceDevice, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&tilingDevice, tilingFileSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("../input/input.bin", inpSize, inputHost, inpSize); + ReadFile("../input/scale.bin", scaleSize, scaleHost, scaleSize); + CHECK_ACL(aclrtMemcpy(tilingDevice, tilingFileSize, GenerateTiling(m, n, calCount), + tilingFileSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(inputDevice, inpSize, inputHost, inpSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(scaleDevice, scaleSize, scaleHost, scaleSize, ACL_MEMCPY_HOST_TO_DEVICE)); + + dequant_custom_do(blockDim, nullptr, stream, inputDevice, outputDevice, scaleDevice, workspaceDevice, tilingDevice); + CHECK_ACL(aclrtSynchronizeStream(stream)); + CHECK_ACL(aclrtMemcpy(outputHost, outSize, outputDevice, outSize, ACL_MEMCPY_DEVICE_TO_HOST)); + + WriteFile("../output/output.bin", outputHost, outSize); + + bool goldenResult = CompareResult(outputHost, outSize); + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + + CHECK_ACL(aclrtFree(inputDevice)); + CHECK_ACL(aclrtFree(scaleDevice)); + CHECK_ACL(aclrtFree(outputDevice)); + CHECK_ACL(aclrtFree(tilingDevice)); + CHECK_ACL(aclrtFreeHost(inputHost)); + CHECK_ACL(aclrtFreeHost(scaleHost)); + CHECK_ACL(aclrtFreeHost(outputHost)); + CHECK_ACL(aclrtFreeHost(tilingHost)); + CHECK_ACL(aclrtFree(workspaceDevice)); + CHECK_ACL(aclrtFreeHost(workspaceHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + return 0; +} diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/run.sh b/examples/quantization/dequant/kernel_launch_method_by_direct/run.sh new file mode 100644 index 00000000..1567588e --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +SHORT=r:,v:, +LONG=run-mode:,soc-version:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" +while : +do + case "$1" in + (-r | --run-mode ) + RUN_MODE="$2" + shift 2;; + (-v | --soc-version ) + SOC_VERSION="$2" + shift 2;; + (--) + shift; + break;; + (*) + echo "[ERROR] Unexpected option: $1"; + break;; + esac +done + +rm -rf build +mkdir build +cd build + +# in case of running op in simulator, use stub so instead +if [ "${RUN_MODE}" = "sim" ]; then + export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's/\/.*\/runtime\/lib64://g') + export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/runtime/lib64/stub:$LD_LIBRARY_PATH + + if [ ! $CAMODEL_LOG_PATH ]; then + export CAMODEL_LOG_PATH=./ # default log save in build dir + else + export CAMODEL_LOG_PATH=../$CAMODEL_LOG_PATH + rm -rf $CAMODEL_LOG_PATH + mkdir -p $CAMODEL_LOG_PATH + fi +fi + +if [ "${RUN_MODE}" = "cpu" ]; then + export CAMODEL_LOG_PATH=./ # cpu run mode set fixed log path +fi + +source $ASCEND_HOME_DIR/bin/setenv.bash +export LD_LIBRARY_PATH=${ASCEND_HOME_DIR}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH + +cmake -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION} -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} .. +make -j16 +./dequant_direct_kernel_op \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py b/examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py new file mode 100644 index 00000000..a19dd2b2 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 +# coding=utf-8 + +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== + +import numpy as np +import os + +def gen_golden_data_simple(): + shape=[4,8] + scale_size = [8] + inp = np.random.randint(low=-10, high=10, size=shape).astype(np.int32) + scale = np.random.uniform(low=-100, high=100, size=scale_size).astype(np.float32) + golden = np.zeros(shape) + for i in range(shape[1]): + for j in range(shape[0]): + golden[j][i]=inp[j][i]*scale[i] + golden = golden.astype(np.float32) + os.system("mkdir -p input") + os.system("mkdir -p output") + inp.tofile("./input/input.bin") + scale.tofile("./input/scale.bin") + golden.tofile("./output/golden.bin") + +if __name__ == "__main__": + gen_golden_data_simple() \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt new file mode 100644 index 00000000..584132d8 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt @@ -0,0 +1,69 @@ +cmake_minimum_required(VERSION 3.16.0) +project(opp) +if(ENABLE_CROSS_COMPILE) + if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) + set(CROSS_COMPILE_PLATFORM aarch64) + else() + set(CROSS_COMPILE_PLATFORM x86_64) + endif() + set(PLATFORM ${CMAKE_SYSTEM_PROCESSOR}) + set(CMAKE_COMPILE_COMPILER_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/linux/${CROSS_COMPILE_PLATFORM}/) + set(CMAKE_COMPILE_RUNTIME_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/${CROSS_COMPILE_PLATFORM}/) + set(CMAKE_SYSTEM_PROCESSOR ${CROSS_COMPILE_PLATFORM}) + set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER ${CMAKE_CROSS_PLATFORM_COMPILER}) +else() + set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) +endif() + +include(cmake/config.cmake) +include(cmake/func.cmake) +include(cmake/intf.cmake) + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) + add_subdirectory(framework) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +# modify vendor_name in install.sh and upgrade.sh +add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh + COMMAND mkdir -p ${CMAKE_BINARY_DIR}/scripts + COMMAND cp -r ${CMAKE_SOURCE_DIR}/scripts/* ${CMAKE_BINARY_DIR}/scripts/ + COMMAND sed -i "s/vendor_name=customize/vendor_name=${vendor_name}/g" ${CMAKE_BINARY_DIR}/scripts/* +) +add_custom_target(modify_vendor ALL DEPENDS ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh) +install(DIRECTORY ${CMAKE_BINARY_DIR}/scripts/ DESTINATION . FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_READ) + +install(FILES ${CMAKE_SOURCE_DIR}/custom.proto DESTINATION packages OPTIONAL) + +get_system_info(SYSTEM_INFO) + +# gen version.info +add_custom_target(gen_version_info ALL + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh ${ASCEND_CANN_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR} +) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.info + DESTINATION packages/vendors/${vendor_name}/) + +# CPack config +set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) +set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) +set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") +set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) +set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") +set(CPACK_GENERATOR External) +set(CPACK_CMAKE_GENERATOR "Unix Makefiles") +set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) +set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) +set(CPACK_EXTERNAL_BUILT_PACKAGES ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME}) +include(CPack) diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json new file mode 100644 index 00000000..dc47e02c --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json @@ -0,0 +1,63 @@ +{ + "version": 1, + "cmakeMinimumRequired": { + "major": 3, + "minor": 19, + "patch": 0 + }, + "configurePresets": [ + { + "name": "default", + "displayName": "Default Config", + "description": "Default build using Unix Makefiles generator", + "generator": "Unix Makefiles", + "binaryDir": "${sourceDir}/build_out", + "cacheVariables": { + "CMAKE_BUILD_TYPE": { + "type": "STRING", + "value": "Release" + }, + "ENABLE_SOURCE_PACKAGE": { + "type": "BOOL", + "value": "True" + }, + "ENABLE_BINARY_PACKAGE": { + "type": "BOOL", + "value": "True" + }, + "ASCEND_COMPUTE_UNIT": { + "type": "STRING", + "value": "ascend310p;ascend910b" + }, + "ENABLE_TEST": { + "type": "BOOL", + "value": "True" + }, + "vendor_name": { + "type": "STRING", + "value": "customize" + }, + "ASCEND_CANN_PACKAGE_PATH": { + "type": "PATH", + "value": "/home/ma-user/work/Ascend/ascend-toolkit/latest" + }, + "ASCEND_PYTHON_EXECUTABLE": { + "type": "STRING", + "value": "python3" + }, + "CMAKE_INSTALL_PREFIX": { + "type": "PATH", + "value": "${sourceDir}/build_out" + }, + "ENABLE_CROSS_COMPILE": { + "type": "BOOL", + "value": "False" + }, + "CMAKE_CROSS_PLATFORM_COMPILER": { + "type": "PATH", + "value": "/usr/bin/aarch64-linux-gnu-g++" + } + } + } + ] +} \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md new file mode 100644 index 00000000..d3034029 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md @@ -0,0 +1,81 @@ + + +## 概述 + +本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为src[4,8], deq_scale[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 + +## 样例支持的产品型号为: +- Atlas A2训练系列产品/Atlas 800I A2推理产品 + +## 目录结构 +| 目录 | 描述 | +|---------------------|----------------------| +| [cmake](./cmake) | 编译工程文件 | +| [op_host](./op_host) | host侧实现文件 | +| [op_kernel](./op_kernel) | kernel侧实现文件 | +| [scripts](./scripts) | 包含输入数据和真值数据生成脚本文件 | +| [testcases](./testcases) | 包含cpu域以及npu域的用例主函数,以及真值校验函数 | +| build.sh | 编译运行算子的脚本 | +| CMakeLists.txt | 编译工程文件 | +| CMakePresets.json | 编译工程配置文件 | + +## 编译运行样例 + +## 1.配置环境变量 + + 这里的\$ASCEND_CANN_PACKAGE_PATH需要替换为CANN包的存储路径。例如:/usr/local/Ascend/ascend-toolkit/latest + ``` + export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH + source $ASCEND_HOME_DIR/../set_env.sh + ``` +### 2.生成输入和真值 + ``` + python3 scripts/gen_data.py + ``` + +### 3.编译算子工程 + + - 修改CMakePresets.json中ASCEND_CANN_PACKAGE_PATH为CANN软件包安装后的实际路径。 + + + ``` + { + …… + "configurePresets": [ + { + …… + "ASCEND_CANN_PACKAGE_PATH": { + "type": "PATH", + "value": "~/Ascend/ascend-toolkit/latest" //请替换为CANN软件包安装后的实际路径。eg:/home/HwHiAiUser/Ascend/ascend-toolkit/latest + }, + …… + } + ] + } + ``` + - 在当前算子工程目录下执行如下命令,进行算子工程编译。 + + ``` + bash build.sh + ``` + 编译成功后,会在当前目录下创建build_out目录,并在build_out目录下生成自定义算子安装包custom_opp_\_\.run,例如“custom_opp_ubuntu_x86_64.run”。 + + +### 4.部署算子包 + + - 执行如下命令,在自定义算子安装包所在路径下,安装自定义算子包。 + + ``` + cd build_out + ./custom_opp__.run + ``` + + 命令执行成功后,自定义算子包中的相关文件将部署至当前环境的OPP算子库的vendors/customize目录中。 +### 5.执行样例 + - 在build_out目录下执行如下命令 + + ``` + ./dequant_custom_npu + ``` +### 注意事项 +本样例工程会自动识别执行的硬件平台,无需单独设置SOC_VERSION \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/build.sh b/examples/quantization/dequant/kernel_launch_method_by_framework/build.sh new file mode 100644 index 00000000..6f3ab962 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/build.sh @@ -0,0 +1,76 @@ +#!/bin/bash +script_path=$(realpath $(dirname $0)) + +source $ASCEND_HOME_DIR/bin/setenv.bash +cp -rf ../host_tiling/* op_host/ +ln -s $ASCEND_HOME_DIR/tools/op_project_templates/ascendc/customize/cmake/util/ ./cmake/util +mkdir -p build_out +rm -rf build_out/* +cd build_out + +opts=$(python3 $script_path/cmake/util/preset_parse.py $script_path/CMakePresets.json) +ENABLE_CROSS="-DENABLE_CROSS_COMPILE=True" +ENABLE_BINARY="-DENABLE_BINARY_PACKAGE=True" +cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') + +cmake_run_package() +{ + target=$1 + cmake --build . --target $target -j16 + if [ $? -ne 0 ]; then exit 1; fi + + if [ $target = "package" ]; then + if test -d ./op_kernel/binary ; then + ./cust*.run + if [ $? -ne 0 ]; then exit 1; fi + cmake --build . --target binary -j16 + if [ $? -ne 0 ]; then exit 1; fi + cmake --build . --target $target -j16 + fi + fi +} + +if [[ $opts =~ $ENABLE_CROSS ]] && [[ $opts =~ $ENABLE_BINARY ]] +then + target=package + if [ "$1"x != ""x ]; then target=$1; fi + if [ "$cmake_version" \< "3.19.0" ] ; then + cmake .. $opts -DENABLE_CROSS_COMPILE=0 + else + cmake .. --preset=default -DENABLE_CROSS_COMPILE=0 + fi + cmake_run_package $target + cp -r kernel ../ + rm -rf * + if [ "$cmake_version" \< "3.19.0" ] ; then + cmake .. $opts + else + cmake .. --preset=default + fi + + cmake --build . --target $target -j16 + if [ $? -ne 0 ]; then exit 1; fi + if [ $target = "package" ]; then + if test -d ./op_kernel/binary ; then + ./cust*.run + fi + fi + rm -rf ../kernel + +else + target=package + if [ "$1"x != ""x ]; then target=$1; fi + if [ "$cmake_version" \< "3.19.0" ] ; then + cmake .. $opts + else + cmake .. --preset=default + fi + cmake_run_package $target +fi + + +# for debug +# cd build_out +# make +# cpack +# verbose append -v \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake new file mode 100644 index 00000000..886119da --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake @@ -0,0 +1,25 @@ + +set(CMAKE_CXX_FLAGS_DEBUG "") +set(CMAKE_CXX_FLAGS_RELEASE "") + +if (NOT DEFINED vendor_name) + set(vendor_name customize CACHE STRING "") +endif() +if (NOT DEFINED ASCEND_CANN_PACKAGE_PATH) + set(ASCEND_CANN_PACKAGE_PATH /usr/local/Ascend/latest CACHE PATH "") +endif() +if (NOT DEFINED ASCEND_PYTHON_EXECUTABLE) + set(ASCEND_PYTHON_EXECUTABLE python3 CACHE STRING "") +endif() +if (NOT DEFINED ASCEND_COMPUTE_UNIT) + message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! +") +endif() +set(ASCEND_TENSOR_COMPILER_PATH ${ASCEND_CANN_PACKAGE_PATH}/compiler) +set(ASCEND_CCEC_COMPILER_PATH ${ASCEND_TENSOR_COMPILER_PATH}/ccec_compiler/bin) +set(ASCEND_AUTOGEN_PATH ${CMAKE_BINARY_DIR}/autogen) +set(ASCEND_FRAMEWORK_TYPE tensorflow) +file(MAKE_DIRECTORY ${ASCEND_AUTOGEN_PATH}) +set(CUSTOM_COMPILE_OPTIONS "custom_compile_options.ini") +execute_process(COMMAND rm -rf ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} + COMMAND touch ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS}) diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake new file mode 100644 index 00000000..4179dfd2 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake @@ -0,0 +1,192 @@ + +function(get_system_info SYSTEM_INFO) + if (UNIX) + execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) + string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) + set(${SYSTEM_INFO} ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} PARENT_SCOPE) + elseif (WIN32) + message(STATUS "System is Windows. Only for pre-build.") + else () + message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") + endif () +endfunction() + +function(opbuild) + message(STATUS "Opbuild generating sources") + cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" "OPS_SRC" ${ARGN}) + execute_process(COMMAND ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} -D_GLIBCXX_USE_CXX11_ABI=0 + -I ${ASCEND_CANN_PACKAGE_PATH}/include -L ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api + -o ${OPBUILD_OUT_DIR}/libascend_all_ops.so + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("build ops lib info: ${EXEC_INFO}") + message("build ops lib error: ${EXEC_ERROR}") + message(FATAL_ERROR "opbuild run failed!") + endif() + set(proj_env "") + set(prefix_env "") + if (NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") + set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") + endif() + if (NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") + set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") + endif() + execute_process(COMMAND ${proj_env} ${prefix_env} ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build + ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("opbuild ops info: ${EXEC_INFO}") + message("opbuild ops error: ${EXEC_ERROR}") + endif() + message(STATUS "Opbuild generating sources - done") +endfunction() + +function(add_ops_info_target) + cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" ${ARGN}) + get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) + add_custom_command(OUTPUT ${OPINFO_OUTPUT} + COMMAND mkdir -p ${opinfo_file_path} + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py + ${OPINFO_OPS_INFO} ${OPINFO_OUTPUT} + ) + add_custom_target(${OPINFO_TARGET} ALL + DEPENDS ${OPINFO_OUTPUT} + ) + install(FILES ${OPINFO_OUTPUT} + DESTINATION ${OPINFO_INSTALL_DIR} + ) +endfunction() + +function(add_ops_compile_options OP_TYPE) + cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) + file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} + "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") +endfunction() + +function(add_ops_impl_target) + cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) + add_custom_command(OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py + ${OPIMPL_OPS_INFO} + \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" + ${OPIMPL_IMPL_DIR} + ${OPIMPL_OUT_DIR}/dynamic + ${ASCEND_AUTOGEN_PATH} + + COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp + DEPENDS ${OPIMPL_OPS_INFO} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py + ) + add_custom_target(${OPIMPL_TARGET} ALL + DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) + if (${ENABLE_SOURCE_PACKAGE}) + install(DIRECTORY ${OPIMPL_OUT_DIR}/dynamic + DESTINATION ${OPIMPL_INSTALL_DIR} + ) + endif() +endfunction() + +function(add_npu_support_target) + cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) + add_custom_command(OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json + COMMAND mkdir -p ${NPUSUP_OUT_DIR} + COMMAND ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh + ${NPUSUP_OPS_INFO_DIR} + ${NPUSUP_OUT_DIR} + ) + add_custom_target(npu_supported_ops ALL + DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json + ) + install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json + DESTINATION ${NPUSUP_INSTALL_DIR} + ) +endfunction() + +function(add_bin_compile_target) + cmake_parse_arguments(BINCMP "" "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) + execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py + ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("ops binary compile scripts gen info: ${EXEC_INFO}") + message("ops binary compile scripts gen error: ${EXEC_ERROR}") + message(FATAL_ERROR "ops binary compile scripts gen failed!") + endif() + if (NOT TARGET binary) + add_custom_target(binary) + endif() + add_custom_target(${BINCMP_TARGET} + COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* ${BINCMP_OUT_DIR}/src + ) + add_custom_target(${BINCMP_TARGET}_gen_ops_config + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p ${BINCMP_OUT_DIR}/bin + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p ${BINCMP_OUT_DIR}/bin + -s ${BINCMP_COMPUTE_UNIT} + ) + add_dependencies(binary ${BINCMP_TARGET}_gen_ops_config) + file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) + foreach(bin_script ${bin_scripts}) + get_filename_component(bin_file ${bin_script} NAME_WE) + string(REPLACE "-" ";" bin_sep ${bin_file}) + list(GET bin_sep 0 op_type) + list(GET bin_sep 1 op_file) + list(GET bin_sep 2 op_index) + if (NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file}) + add_custom_target(${BINCMP_TARGET}_${op_file}_copy + COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py ${BINCMP_OUT_DIR}/src/${op_type}.py + ) + install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} + DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT}/ OPTIONAL + ) + endif() + add_custom_target(${BINCMP_TARGET}_${op_file}_${op_index} + COMMAND export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py ${BINCMP_OUT_DIR}/bin/${op_file} && echo $(MAKE) + WORKING_DIRECTORY ${BINCMP_OUT_DIR} + ) + add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} ${BINCMP_TARGET}_${op_file}_copy) + add_dependencies(${BINCMP_TARGET}_gen_ops_config ${BINCMP_TARGET}_${op_file}_${op_index}) + endforeach() + install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + + install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/../build_out/kernel/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/../build_out/kernel/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/../build_out/kernel/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + +endfunction() + +function(add_cross_compile_target) + cmake_parse_arguments(CROSSMP "" "TARGET;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + add_custom_target(${CROSSMP_TARGET} ALL + DEPENDS ${CROSSMP_OUT_DIR} + ) + install(DIRECTORY ${CROSSMP_OUT_DIR} + DESTINATION ${CROSSMP_INSTALL_DIR} + ) +endfunction() diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake new file mode 100644 index 00000000..2f362c39 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake @@ -0,0 +1,26 @@ + +add_library(intf_pub INTERFACE) +target_compile_options(intf_pub INTERFACE + -fPIC + -fvisibility=hidden + -fvisibility-inlines-hidden + $<$:-O2> + $<$:-O0 -g> + $<$:-std=c++11> + $<$,$>:-ftrapv -fstack-check> + $<$:-pthread -Wfloat-equal -Wshadow -Wformat=2 -Wno-deprecated -Wextra> + $,-fstack-protector-strong,-fstack-protector-all> +) +target_compile_definitions(intf_pub INTERFACE + _GLIBCXX_USE_CXX11_ABI=0 + $<$:_FORTIFY_SOURCE=2> +) +target_include_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include) +target_link_options(intf_pub INTERFACE + $<$,EXECUTABLE>:-pie> + $<$:-s> + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack +) +target_link_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/lib64) diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake new file mode 100644 index 00000000..48c565bf --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake @@ -0,0 +1,17 @@ +execute_process(COMMAND chmod +x ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) +execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh + --header ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh + --help-header ./help.info + --gzip --complevel 4 --nomd5 --sha256 + ./ ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} + RESULT_VARIABLE EXEC_RESULT + ERROR_VARIABLE EXEC_ERROR +) +if (NOT "${EXEC_RESULT}x" STREQUAL "0x") + message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") +endif() +execute_process(COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ + COMMAND echo "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} +) diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt new file mode 100644 index 00000000..40dd51cf --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt @@ -0,0 +1,82 @@ + +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs) + +opbuild(OPS_SRC ${ops_srcs} + OUT_DIR ${ASCEND_AUTOGEN_PATH} +) + +add_library(cust_op_proto SHARED ${ops_srcs} ${ASCEND_AUTOGEN_PATH}/op_proto.cc) +target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) +target_compile_options(cust_op_proto PRIVATE + -fvisibility=hidden +) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_op_proto PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_op_proto PRIVATE + intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive +) +set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME + cust_opsproto_rt2.0 +) +add_library(cust_optiling SHARED ${ops_srcs}) +target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) +target_compile_options(cust_optiling PRIVATE + -fvisibility=hidden +) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_optiling PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_optiling PRIVATE + intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive +) +set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME + cust_opmaster_rt2.0 +) + +file(GLOB aclnn_src ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) +file(GLOB aclnn_inc ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) +add_library(cust_opapi SHARED ${aclnn_src}) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_opapi PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) + +add_custom_target(optiling_compat ALL + COMMAND ln -sf lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ + ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so +) + +install(TARGETS cust_op_proto + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install(FILES ${ASCEND_AUTOGEN_PATH}/op_proto.h + DESTINATION packages/vendors/${vendor_name}/op_proto/inc) +install(TARGETS cust_optiling + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) +install(TARGETS cust_opapi + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_api/lib) +install(FILES ${aclnn_inc} + DESTINATION packages/vendors/${vendor_name}/op_api/include) diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp new file mode 100644 index 00000000..58200cea --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#include "dequant_custom_tiling.h" +#include "register/op_def_registry.h" + +namespace optiling { +constexpr uint32_t BLOCK_DIM = 48; + +static ge::graphStatus TilingFunc(gert::TilingContext *context) +{ + DequantCustomTilingData tiling; + const gert::StorageShape *x1Shape = context->GetInputShape(0); + const gert::StorageShape *scaleShape = context->GetInputShape(1); + const gert::Shape shape = x1Shape->GetStorageShape(); + const gert::Shape scaleLen = scaleShape->GetStorageShape(); + ComputeTiling(shape[0],shape[1],scaleLen[0], tiling); + + context->SetBlockDim(BLOCK_DIM); + context->SetTilingKey(1); + tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); + context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); + return ge::GRAPH_SUCCESS; +} +} + + +namespace ge { +static ge::graphStatus InferShape(gert::InferShapeContext *context) +{ + const gert::Shape *x1Shape = context->GetInputShape(0); + gert::Shape *yShape = context->GetOutputShape(0); + *yShape = *x1Shape; + return GRAPH_SUCCESS; +} +} + + +namespace ops { +class DequantCustom : public OpDef { +public: + explicit DequantCustom(const char *name) : OpDef(name) + { + this->Input("inputGm") + .ParamType(REQUIRED) + .DataType({ ge::DT_INT32 }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); + this->Input("scaleGm") + .ParamType(REQUIRED) + .DataType({ ge::DT_FLOAT }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); + this->Output("outputGm") + .ParamType(REQUIRED) + .DataType({ ge::DT_FLOAT }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); + + this->SetInferShape(ge::InferShape); + + this->AICore().SetTiling(optiling::TilingFunc); + this->AICore().AddConfig("ascend910b"); + this->AICore().AddConfig("ascend310p"); + } +}; + +OP_ADD(DequantCustom); +} diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h new file mode 100644 index 00000000..f1cde965 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h @@ -0,0 +1,41 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_TILING_H +#define EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_TILING_H +#include "register/tilingdata_base.h" +#include "tiling/tiling_api.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(DequantCustomTilingData) + TILING_DATA_FIELD_DEF(uint32_t, m); + TILING_DATA_FIELD_DEF(uint32_t, n); + TILING_DATA_FIELD_DEF(uint32_t, calCount); + TILING_DATA_FIELD_DEF(uint32_t, sharedTmpBufferSize); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(DequantCustom, DequantCustomTilingData) +} // namespace optiling + +void ComputeTiling(const uint32_t m, const uint32_t n, const uint32_t calCount, optiling::DequantCustomTilingData &tiling){ + std::vector shapeVec = {m, n}; + ge::Shape srcShape(shapeVec); + uint32_t typeSize = 4; + uint32_t maxTmpSize; + uint32_t minTmpSize; + AscendC::GetAscendDequantMaxMinTmpSize(srcShape, typeSize, maxTmpSize, minTmpSize); + uint32_t localWorkspaceSize = minTmpSize; + tiling.set_m(m); + tiling.set_n(n); + tiling.set_calCount(calCount); + tiling.set_sharedTmpBufferSize(localWorkspaceSize); +} + +#endif // EXAMPLES_QUANTIZATION_DEQUANT_CUSTOM_TILING_H diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt new file mode 100644 index 00000000..c50a409a --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt @@ -0,0 +1,69 @@ +# set custom compile options +if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") + add_ops_compile_options(ALL OPTIONS -g -O0) +endif() +add_ops_compile_options(ALL OPTIONS -mllvm -cce-aicore-jump-expand=true) + +foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) + + # generate aic-${compute_unit}-ops-info.json + add_ops_info_target(TARGET ops_info_gen_${compute_unit} + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core/${compute_unit}/aic-${compute_unit}-ops-info.json + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit} + ) + + # generate ascendc impl py once + if (NOT TARGET ascendc_impl_gen) + add_ops_impl_target(TARGET ascendc_impl_gen + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl + ) + endif() + + # dynamic shape binary compile + if (${ENABLE_BINARY_PACKAGE} AND NOT ${ENABLE_CROSS_COMPILE}) + add_bin_compile_target(TARGET ascendc_bin_${compute_unit} + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} + ADP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/dynamic + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel + COMPUTE_UNIT ${compute_unit} + ) + add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) + endif() + + if (${ENABLE_CROSS_COMPILE} AND ${ENABLE_BINARY_PACKAGE}) + add_cross_compile_target( + TARGET bin_${compute_unit} + OUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../kernel + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/ + ) + endif() +endforeach() + +# generate npu_supported_ops.json +add_npu_support_target(TARGET npu_supported_ops + OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH} + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core + INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE} +) + +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +# install kernel file +if (${ENABLE_SOURCE_PACKAGE}) + file(GLOB KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/*.py + ) + install(FILES ${KERNEL_FILES} + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic + ) +endif() diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp b/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp new file mode 100644 index 00000000..50c1cb4f --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "../../../../../../kernel_impl/dequant_custom.h" + +extern "C" __global__ __aicore__ void dequant_custom(GM_ADDR srcGm, GM_ADDR deqScaleGm, GM_ADDR dstGm, GM_ADDR workspace, + GM_ADDR tiling) +{ + if ASCEND_IS_AIC { + return; + } + GET_TILING_DATA(tilingData, tiling); + MyCustomKernel::VecTiling vecTiling = *reinterpret_cast(&tilingData); + if (TILING_KEY_IS(1)) { + MyCustomKernel::KernelDequant op; + op.Init(srcGm, dstGm, deqScaleGm, vecTiling.m, vecTiling.n, vecTiling.calCount); + op.Process(); + } +} \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py new file mode 100644 index 00000000..a19dd2b2 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 +# coding=utf-8 + +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== + +import numpy as np +import os + +def gen_golden_data_simple(): + shape=[4,8] + scale_size = [8] + inp = np.random.randint(low=-10, high=10, size=shape).astype(np.int32) + scale = np.random.uniform(low=-100, high=100, size=scale_size).astype(np.float32) + golden = np.zeros(shape) + for i in range(shape[1]): + for j in range(shape[0]): + golden[j][i]=inp[j][i]*scale[i] + golden = golden.astype(np.float32) + os.system("mkdir -p input") + os.system("mkdir -p output") + inp.tofile("./input/input.bin") + scale.tofile("./input/scale.bin") + golden.tofile("./output/golden.bin") + +if __name__ == "__main__": + gen_golden_data_simple() \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info new file mode 100644 index 00000000..f4b28d57 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info @@ -0,0 +1 @@ + --install-path Install operator package to specific dir path \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh new file mode 100644 index 00000000..8468c5a2 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh @@ -0,0 +1,318 @@ +#!/bin/bash +vendor_name=customize +targetdir=/usr/local/Ascend/opp +target_custom=0 + +sourcedir=$PWD/packages +vendordir=vendors/$vendor_name + +QUIET="y" + +while true +do + case $1 in + --quiet) + QUIET="y" + shift + ;; + --install-path=*) + INSTALL_PATH=$(echo $1 | cut -d"=" -f2-) + INSTALL_PATH=${INSTALL_PATH%*/} + shift + ;; + --*) + shift + ;; + *) + break + ;; + esac +done + +log() { + cur_date=`date +"%Y-%m-%d %H:%M:%S"` + echo "[runtime] [$cur_date] "$1 +} + +if [ -n "${INSTALL_PATH}" ]; then + if [[ ! "${INSTALL_PATH}" = /* ]]; then + log "[ERROR] use absolute path for --install-path argument" + exit 1 + fi + if [ ! -d ${INSTALL_PATH} ]; then + mkdir ${INSTALL_PATH} >> /dev/null 2>&1 + if [ $? -ne 0 ]; then + log "[ERROR] create ${INSTALL_PATH} failed" + exit 1 + fi + fi + targetdir=${INSTALL_PATH} +elif [ -n "${ASCEND_CUSTOM_OPP_PATH}" ]; then + if [ ! -d ${ASCEND_CUSTOM_OPP_PATH} ]; then + mkdir -p ${ASCEND_CUSTOM_OPP_PATH} >> /dev/null 2>&1 + if [ $? -ne 0 ]; then + log "[ERROR] create ${ASCEND_CUSTOM_OPP_PATH} failed" + fi + fi + targetdir=${ASCEND_CUSTOM_OPP_PATH} +else + if [ "x${ASCEND_OPP_PATH}" == "x" ]; then + log "[ERROR] env ASCEND_OPP_PATH no exist" + exit 1 + fi + targetdir="${ASCEND_OPP_PATH}" +fi + +if [ ! -d $targetdir ];then + log "[ERROR] $targetdir no exist" + exit 1 +fi + +upgrade() +{ + if [ ! -d ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 files" + return 0 + fi + + if [ ! -d ${targetdir}/$vendordir/$1 ];then + log "[INFO] create ${targetdir}/$vendordir/$1." + mkdir -p ${targetdir}/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/$vendordir/$1 failed" + return 1 + fi + else + has_same_file=-1 + for file_a in ${sourcedir}/$vendordir/$1/*; do + file_b=${file_a##*/}; + if [ "ls ${targetdir}/$vendordir/$1" = "" ]; then + log "[INFO] ${targetdir}/$vendordir/$1 is empty !!" + return 1 + fi + grep -q $file_b <<<`ls ${targetdir}/$vendordir/$1`; + if [[ $? -eq 0 ]]; then + echo -n "${file_b} " + has_same_file=0 + fi + done + if [ 0 -eq $has_same_file ]; then + if test $QUIET = "n"; then + echo "[INFO]: has old version in ${targetdir}/$vendordir/$1, \ + you want to Overlay Installation , please enter:[o]; \ + or replace directory installation , please enter: [r]; \ + or not install , please enter:[n]." + + while true + do + read orn + if [ "$orn" = n ]; then + return 0 + elif [ "$orn" = m ]; then + break; + elif [ "$0rn" = r ]; then + [ -n "${targetdir}/$vendordir/$1/" ] && rm -rf "${targetdir}/$vendordir/$1"/* + break; + else + echo "[ERROR] input error, please input again!" + fi + done + fi + fi + log "[INFO] replace or merge old ops $1 files .g....." + fi + + log "copy new ops $1 files ......" + if [ -d ${targetdir}/$vendordir/$1/ ]; then + chmod -R +w "$targetdir/$vendordir/$1/" >/dev/null 2>&1 + fi + cp -rf ${sourcedir}/$vendordir/$1/* $targetdir/$vendordir/$1/ + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 files failed" + return 1 + fi + + return 0 +} +upgrade_proto() +{ + if [ ! -f ${sourcedir}/$vendordir/custom.proto ]; then + log "[INFO] no need to upgrade custom.proto files" + return 0 + fi + if [ ! -d ${targetdir}/$vendordir/framework/caffe ];then + log "[INFO] create ${targetdir}/$vendordir/framework/caffe." + mkdir -p ${targetdir}/$vendordir/framework/caffe + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/$vendordir/framework/caffe failed" + return 1 + fi + else + if [ -f ${targetdir}/$vendordir/framework/caffe/custom.proto ]; then + # 有老版本,判断是否要覆盖式安装 + if test $QUIET = "n"; then + echo "[INFO] ${targetdir}/$vendordir/framework/caffe has old version"\ + "custom.proto file. Do you want to replace? [y/n] " + + while true + do + read yn + if [ "$yn" = n ]; then + return 0 + elif [ "$yn" = y ]; then + break; + else + echo "[ERROR] input error, please input again!" + fi + done + fi + fi + log "[INFO] replace old caffe.proto files ......" + fi + chmod -R +w "$targetdir/$vendordir/framework/caffe/" >/dev/null 2>&1 + cp -rf ${sourcedir}/$vendordir/custom.proto ${targetdir}/$vendordir/framework/caffe/ + if [ $? -ne 0 ];then + log "[ERROR] copy new custom.proto failed" + return 1 + fi + log "[INFO] copy custom.proto success" + + return 0 +} + +upgrade_file() +{ + if [ ! -e ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 file" + return 0 + fi + + log "copy new $1 files ......" + cp -f ${sourcedir}/$vendordir/$1 $targetdir/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 file failed" + return 1 + fi + + return 0 +} + +delete_optiling_file() +{ + if [ ! -d ${targetdir}/vendors ];then + log "[INFO] $1 not exist, no need to uninstall" + return 0 + fi + sys_info=$(uname -m) + if [ ! -d ${sourcedir}/$vendordir/$1/ai_core/tbe/op_tiling/lib/linux/${sys_info} ];then + rm -rf ${sourcedir}/$vendordir/$1/ai_core/tbe/op_tiling/liboptiling.so + fi + return 0 +} + +log "[INFO] copy uninstall sh success" + +if [ ! -d ${targetdir}/vendors ];then + log "[INFO] create ${targetdir}/vendors." + mkdir -p ${targetdir}/vendors + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/vendors failed" + return 1 + fi +fi +chmod u+w ${targetdir}/vendors + +echo "[ops_custom]upgrade framework" +upgrade framework +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op proto" +upgrade op_proto +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade version.info" +upgrade_file version.info +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op impl" +delete_optiling_file op_impl +upgrade op_impl +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op api" +upgrade op_api +if [ $? -ne 0 ];then + exit 1 +fi + +upgrade_proto +if [ $? -ne 0 ];then + exit 1 +fi + +# set the set_env.bash +if [ -n "${INSTALL_PATH}" ] && [ -d ${INSTALL_PATH} ]; then + _ASCEND_CUSTOM_OPP_PATH=${targetdir}/${vendordir} + bin_path="${_ASCEND_CUSTOM_OPP_PATH}/bin" + set_env_variable="#!/bin/bash\nexport ASCEND_CUSTOM_OPP_PATH=${_ASCEND_CUSTOM_OPP_PATH}:\${ASCEND_CUSTOM_OPP_PATH}" + if [ ! -d ${bin_path} ]; then + mkdir -p ${bin_path} >> /dev/null 2>&1 + if [ $? -ne 0 ]; then + log "[ERROR] create ${bin_path} failed" + exit 1 + fi + fi + echo -e ${set_env_variable} > ${bin_path}/set_env.bash + if [ $? -ne 0 ]; then + log "[ERROR] write ASCEND_CUSTOM_OPP_PATH to set_env.bash failed" + exit 1 + else + log "[INFO] using requirements: when custom module install finished or before you run the custom module, \ + execute the command [ source ${bin_path}/set_env.bash ] to set the environment path" + fi +else + config_file=${targetdir}/vendors/config.ini + if [ ! -f ${config_file} ]; then + touch ${config_file} + chmod 640 ${config_file} + echo "load_priority=$vendor_name" > ${config_file} + if [ $? -ne 0 ];then + echo "echo load_priority failed" + exit 1 + fi + else + found_vendors="$(grep -w "load_priority" "$config_file" | cut --only-delimited -d"=" -f2-)" + found_vendor=$(echo $found_vendors | sed "s/$vendor_name//g" | tr ',' ' ') + vendor=$(echo $found_vendor | tr -s ' ' ',') + if [ "$vendor" != "" ]; then + sed -i "/load_priority=$found_vendors/s@load_priority=$found_vendors@load_priority=$vendor_name,$vendor@g" "$config_file" + fi + fi +fi + +chmod u-w ${targetdir}/vendors + +if [ -d ${targetdir}/$vendordir/op_impl/cpu/aicpu_kernel/impl/ ]; then + chmod -R 440 ${targetdir}/$vendordir/op_impl/cpu/aicpu_kernel/impl/* >/dev/null 2>&1 +fi +if [ -f ${targetdir}/ascend_install.info ]; then + chmod -R 440 ${targetdir}/ascend_install.info +fi +if [ -f ${targetdir}/scene.info ]; then + chmod -R 440 ${targetdir}/scene.info +fi +if [ -f ${targetdir}/version.info ]; then + chmod -R 440 ${targetdir}/version.info +fi + +echo "SUCCESS" +exit 0 + diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh new file mode 100644 index 00000000..e0917348 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh @@ -0,0 +1,151 @@ +#!/bin/bash +vendor_name=customize +targetdir=/usr/local/Ascend/opp +target_custom=0 + +sourcedir=$PWD/packages +vendordir=vendors/$vendor_name + +log() { + cur_date=`date +"%Y-%m-%d %H:%M:%S"` + echo "[runtime] [$cur_date] "$1 +} + +if [[ "x${ASCEND_OPP_PATH}" == "x" ]];then + log "[ERROR] env ASCEND_OPP_PATH no exist" + exit 1 +fi + +targetdir=${ASCEND_OPP_PATH} + +if [ ! -d $targetdir ];then + log "[ERROR] $targetdir no exist" + exit 1 +fi + +upgrade() +{ + if [ ! -d ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 files" + return 0 + fi + + if [ ! -d ${targetdir}/$vendordir/$1 ];then + log "[INFO] create ${targetdir}/$vendordir/$1." + mkdir -p ${targetdir}/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/$vendordir/$1 failed" + return 1 + fi + else + vendor_installed_dir=$(ls "$targetdir/vendors" 2> /dev/null) + for i in $vendor_installed_dir;do + vendor_installed_file=$(ls "$vendor_installed_dir/$vendor_name/$i" 2> /dev/null) + if [ "$i" = "$vendor_name" ] && [ "$vendor_installed_file" != "" ]; then + echo "[INFO]: $vendor_name custom opp package has been installed on the path $vendor_installed_dir, \ + you want to Overlay Installation , please enter:[o]; \ + or replace directory installation , please enter: [r]; \ + or not install , please enter:[n]." + fi + while true + do + read mrn + if [ "$mrn" = m ]; then + break + elif [ "$mrn" = r ]; then + [ -n "$vendor_installed_file"] && rm -rf "$vendor_installed_file" + break + elif [ "$mrn" = n ]; then + return 0 + else + echo "[WARNING]: Input error, please input m or r or n to choose!" + fi + done + done + log "[INFO] replace old ops $1 files ......" + fi + + log "copy new ops $1 files ......" + cp -rf ${sourcedir}/$vendordir/$1/* $targetdir/$vendordir/$1/ + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 files failed" + return 1 + fi + + return 0 +} + +upgrade_file() +{ + if [ ! -e ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 file" + return 0 + fi + + log "copy new $1 files ......" + cp -f ${sourcedir}/$vendordir/$1 $targetdir/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 file failed" + return 1 + fi + + return 0 +} + +log "[INFO] copy uninstall sh success" + +echo "[ops_custom]upgrade framework" +upgrade framework +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op proto" +upgrade op_proto +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op impl" +upgrade op_impl +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op api" +upgrade op_api +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade version.info" +upgrade_file version.info +if [ $? -ne 0 ];then + exit 1 +fi + +config_file=${targetdir}/vendors/config.ini +found_vendors="$(grep -w "load_priority" "$config_file" | cut --only-delimited -d"=" -f2-)" +found_vendor=$(echo $found_vendors | sed "s/$vendor_name//g" | tr ',' ' ') +vendor=$(echo $found_vendor | tr -s ' ' ',') +if [ "$vendor" != "" ]; then + sed -i "/load_priority=$found_vendors/s@load_priority=$found_vendors@load_priority=$vendor_name,$vendor@g" "$config_file" +fi + +changemode() +{ + if [ -d ${targetdir} ];then + chmod -R 550 ${targetdir}>/dev/null 2>&1 + fi + + return 0 +} +echo "[ops_custom]changemode..." +#changemode +if [ $? -ne 0 ];then + exit 1 +fi + +echo "SUCCESS" +exit 0 + diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt new file mode 100644 index 00000000..8d2d11c9 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt @@ -0,0 +1,2 @@ +include(cmake/fun.cmake) +add_subdirectory(npu) \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake new file mode 100644 index 00000000..024e2630 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake @@ -0,0 +1,53 @@ + +set(UPER_CHARS A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) +function(string_to_snake str_in snake_out) + set(str_cam ${str_in}) + foreach(uper_char ${UPER_CHARS}) + string(TOLOWER "${uper_char}" lower_char) + string(REPLACE ${uper_char} "_${lower_char}" str_cam ${str_cam}) + endforeach() + string(SUBSTRING ${str_cam} 1 -1 str_cam) + set(${snake_out} "${str_cam}" PARENT_SCOPE) +endfunction() + +function(add_cpu_target) + cmake_parse_arguments(CPU_TEST "" "OP" "SRC" ${ARGN}) + string_to_snake("${CPU_TEST_OP}" op_snake) + add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h + COMMAND python3 ${CMAKE_SOURCE_DIR}/cmake/util/tiling_data_def_build.py + ${CMAKE_SOURCE_DIR}/op_host/${op_snake}_tiling.h + ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h + DEPENDS ${CMAKE_SOURCE_DIR}/op_host/${op_snake}_tiling.h + ) + add_custom_target(gen_${op_snake}_tiling_header + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h + ) + + add_executable(${op_snake}_cpu ${CPU_TEST_SRC}) + add_dependencies(${op_snake}_cpu gen_${op_snake}_tiling_header) + target_compile_options(${op_snake}_cpu PRIVATE -g -include ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h) + target_link_libraries(${op_snake}_cpu PRIVATE tikicpulib::ascend910B1) + set_target_properties(${op_snake}_cpu PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + ) +endfunction() + +function(add_npu_target) + cmake_parse_arguments(NPU_TEST "" "OP" "SRC" ${ARGN}) + string_to_snake("${NPU_TEST_OP}" op_snake) + add_executable(${op_snake}_npu ${NPU_TEST_SRC}) + target_compile_options(${op_snake}_npu PRIVATE -g) + target_include_directories(${op_snake}_npu PRIVATE + ${ASCEND_CANN_PACKAGE_PATH}/include/acl + ${ASCEND_AUTOGEN_PATH} + ) + target_link_libraries(${op_snake}_npu PRIVATE + intf_pub + cust_opapi + ascendcl + nnopbase + ) + set_target_properties(${op_snake}_npu PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + ) +endfunction() diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt new file mode 100644 index 00000000..c19064f6 --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt @@ -0,0 +1,10 @@ +add_npu_target(OP DequantCustom SRC dequant_custom_main.cpp) + +add_custom_target(run_npu_test + COMMAND echo "===============================================================================" + COMMAND echo " Run NPU test at ${CMAKE_CURRENT_BINARY_DIR}" + COMMAND echo "===============================================================================" + COMMAND $ + COMMAND echo "===============================================================================" + ) +add_dependencies(run_npu_test dequant_custom_npu) \ No newline at end of file diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp new file mode 100644 index 00000000..c301440c --- /dev/null +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp @@ -0,0 +1,187 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#include "aclnn_dequant_custom.h" +#include "acl/acl_rt.h" +#include "acl/acl.h" +#include +#include +#include "../../../../../common/data_utils.h" + +aclrtStream CreateStream(int device) +{ + if (aclInit(NULL) != ACL_SUCCESS) { + printf("acl init failed\n"); + return NULL; + } + if (aclrtSetDevice(device) != ACL_SUCCESS) { + printf("Set device failed\n"); + (void)aclFinalize(); + return NULL; + } + aclrtStream stream = nullptr; + if (aclrtCreateStream(&stream) != ACL_SUCCESS) { + printf("Create stream failed\n"); + return NULL; + } + return stream; +} + +void DestroyStream(aclrtStream stream, int device) +{ + (void)aclrtDestroyStream(stream); + if (aclrtResetDevice(device) != ACL_SUCCESS) { + printf("Reset device failed\n"); + } + if (aclFinalize() != ACL_SUCCESS) { + printf("Finalize acl failed\n"); + } +} + +struct tensorInfo { + int64_t *dims; + int64_t dimCnt; + aclDataType dtype; + aclFormat fmt; +}; + +int64_t GetDataSize(struct tensorInfo *desc) +{ + if (!desc->dims) + return 0; + int64_t size = 1; + for (auto i = 0; i < desc->dimCnt; i++) { + size *= desc->dims[i]; + } + return size *aclDataTypeSize(desc->dtype); +} + +static bool CompareResult(const void *outputData, int64_t outSize) +{ + void *goldenData; + CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize)); + size_t goldenSize = outSize; + bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize); + if (ret) { + printf("ReadFile golden.bin success!\n"); + } else { + printf("test failed!\n"); + return false; + } + constexpr float EPS = 1e-6; + int64_t wrongNum = 0; + + for (int i = 0; i < outSize / sizeof(float); i++) { + float a = (reinterpret_cast(outputData))[i]; + float b = (reinterpret_cast(goldenData))[i]; + float ae = std::abs(a - b); + float re = ae / abs(b); + if (ae > EPS && re > EPS) { + printf("CompareResult golden.bin failed output is %lf, golden is %lf\n", a, b); + wrongNum++; + } + } + CHECK_ACL(aclrtFreeHost(goldenData)); + + if (wrongNum != 0) { + return false; + } else { + printf("CompareResult golden.bin success\n"); + return true; + } +} + +int main(void) +{ + aclrtStream stream; + int64_t input[] = {4,8}; + int64_t scale[] = {8}; + int64_t output[] = {4,8}; + struct tensorInfo tensorDesc[] = {{input, 2, ACL_INT32, ACL_FORMAT_ND}, + {scale, 1, ACL_FLOAT, ACL_FORMAT_ND}, + {output, 2, ACL_FLOAT, ACL_FORMAT_ND}, + }; + stream = CreateStream(0); + aclTensor *tensors[sizeof(tensorDesc) / sizeof(struct tensorInfo)]; + void *devMem[sizeof(tensorDesc) / sizeof(struct tensorInfo)]; + for (auto i = 0; i < sizeof(tensorDesc) / sizeof(struct tensorInfo); i++) { + void *data; + struct tensorInfo *info = &(tensorDesc[i]); + int64_t size = GetDataSize(info); + if (size == 0) { + tensors[i] = NULL; + devMem[i] = NULL; + continue; + } + CHECK_ACL(aclrtMalloc(&data, size, ACL_MEM_MALLOC_HUGE_FIRST)); + // read input + if (i == 0) { + size_t inputSize = size; + void *dataHost; + CHECK_ACL(aclrtMallocHost((void **)(&dataHost), inputSize)); + ReadFile("../input/input.bin", inputSize, dataHost, inputSize); + CHECK_ACL(aclrtMemcpy(data, size, dataHost, size, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtFreeHost(dataHost)); + } + if (i == 1) { + size_t inputSize = size; + void *dataHost; + CHECK_ACL(aclrtMallocHost((void **)(&dataHost), inputSize)); + ReadFile("../input/scale.bin", inputSize, dataHost, inputSize); + CHECK_ACL(aclrtMemcpy(data, size, dataHost, size, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtFreeHost(dataHost)); + } + + devMem[i] = data; + + tensors[i] = aclCreateTensor(info->dims, info->dimCnt, info->dtype, NULL, 0, info->fmt, info->dims, info->dimCnt, data); + } + + size_t workspaceSize = 0; + aclOpExecutor *handle; + int32_t ret; + ret = aclnnDequantCustomGetWorkspaceSize(tensors[0], tensors[1],tensors[2], &workspaceSize, &handle); + printf("aclnnDequantCustomGetWorkspaceSize ret %u workspace size %lu\n", ret, workspaceSize); + void *workspace = NULL; + if (workspaceSize != 0) { + CHECK_ACL(aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST)); + } + ret = aclnnDequantCustom(workspace, workspaceSize, handle, stream); + printf("aclnnDequantCustom ret %u\n", ret); + if (aclrtSynchronizeStreamWithTimeout(stream, 5000) != ACL_SUCCESS) { + printf("Synchronize stream failed\n"); + } + + uint8_t *outputHost; + int64_t outputHostSize = GetDataSize(&(tensorDesc[2])); + + CHECK_ACL(aclrtMallocHost((void **)(&outputHost), outputHostSize)); + CHECK_ACL(aclrtMemcpy(outputHost, outputHostSize, devMem[2], outputHostSize, ACL_MEMCPY_DEVICE_TO_HOST)); + + WriteFile("../output/output.bin", outputHost, outputHostSize); + bool goldenResult = CompareResult(outputHost, outputHostSize); + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + + CHECK_ACL(aclrtFreeHost(outputHost)); + + for (auto i = 0; i < sizeof(tensorDesc) / sizeof(struct tensorInfo); i++) { + if (!tensors[i]) + continue; + if (devMem[i]) { + CHECK_ACL(aclrtFree(devMem[i])); + } + aclDestroyTensor(tensors[i]); + } + DestroyStream(stream, 0); + return 0; +} diff --git a/examples/quantization/quant/README.md b/examples/quantization/quant/README.md new file mode 100644 index 00000000..4e4bdca8 --- /dev/null +++ b/examples/quantization/quant/README.md @@ -0,0 +1,69 @@ + + +## 概述 + +本样例介绍了调用AscendQuant高阶API实现量化算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 + +- 直调:使用核函数直调quant自定义算子。 + + 核函数的基础调用方式,开发者完成算子核函数的开发和Tiling实现后,即可通过AscendCL运行时接口,完成算子的调用。 + +- 框架调用:使用框架调用quant自定义算子。 + + 按照工程创建->算子实现->编译部署>算子调用的流程完成算子开发。整个过程都依赖于算子工程:基于工程代码框架完成算子核函数的开发和Tiling实现,通过工程编译脚本完成算子的编译部署,继而实现单算子调用或第三方框架中的算子调用。 + +本样例中包含如下调用方式: + +| 调用方式 | 目录 | **描述** | +| --------- | ------------------------------------------------------------ | ---------------------------------------------------------- | +| 直调 | [kernel_launch_method_by_direct](./kernel_launch_method_by_direct) | host侧的核函数调用程序,包含CPU侧、NPU侧、仿真侧三种运行验证方法。 | +| 框架调用 | [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用AscendQuant算子。 | + +## 样例支持的产品型号为: +- Atlas A2训练系列产品/Atlas 800I A2推理产品 +- Atlas推理系列产品(Ascend 310P处理器)AI Core + +## 目录结构 + +| 目录 | 描述 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| [kernel_launch_method_by_direct](./kernel_launch_method_by_direct) | 通过kernel直调的方式调用自定义算子工程样例目录 | +| [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用自定义算子工程样例目录 | +| [host_tiling](./host_tiling) | 本样例tiling代码实现 | +| [kernel_impl](./kernel_impl) | 本样例kernel侧代码实现 | + +## 算子描述 + +QuantCustom单算子,对输入tensor按元素进行per_tensor量化计算。 + +QuantCustom算子规格: + + + + + + + + + + + + + + + +
算子类型(OpType)QuantCustom
算子输入
nameshapedata typeformat
src1024floatND
算子输出
dst1024int_8ND
核函数名quant_custom
+ +## 算子实现介绍 + +本样例实现了quant算子,其中输入src为固定shape[1024]。 + +- kernel实现 + + 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendQuant高阶API接口完成ascendquant计算,得到最终结果,再搬出到外部存储上。 + + ascendquant算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm存储在srcLocal中,Compute任务负责对srcLocal执行ascendquant计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 + +- tiling实现 + + quant算子的tiling实现流程如下:首先获取AscendQuant接口能完成计算所需最大/最小临时空间大小,根据该范围结合实际的内存使用情况设置合适的空间大小,然后根据输入长度dataLength确定所需tiling参数。 \ No newline at end of file diff --git a/examples/quantization/quant/host_tiling/quant_custom_tiling.h b/examples/quantization/quant/host_tiling/quant_custom_tiling.h new file mode 100644 index 00000000..962fb876 --- /dev/null +++ b/examples/quantization/quant/host_tiling/quant_custom_tiling.h @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_QUANTIZATION_QUANT_CUSTOM_TILING_H +#define EXAMPLES_QUANTIZATION_QUANT_CUSTOM_TILING_H +#include "register/tilingdata_base.h" +#include "tiling/tiling_api.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(QuantCustomTilingData) + TILING_DATA_FIELD_DEF(uint32_t, dataLength); + TILING_DATA_FIELD_DEF(uint32_t, sharedTmpBufferSize); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(QuantCustom, QuantCustomTilingData) +} // namespace optiling + +void ComputeTiling(const uint32_t dataLength, optiling::QuantCustomTilingData &tiling){ + std::vector shapeVec = {dataLength}; + ge::Shape srcShape(shapeVec); + uint32_t typeSize = sizeof(float); + uint32_t maxTmpSize; + uint32_t minTmpSize; + AscendC::GetAscendQuantMaxMinTmpSize(srcShape, typeSize, maxTmpSize, minTmpSize); + uint32_t localWorkspaceSize = minTmpSize; + tiling.set_dataLength(dataLength); + tiling.set_sharedTmpBufferSize(localWorkspaceSize); +} + +#endif // EXAMPLES_QUANTIZATION_QUANT_CUSTOM_TILING_H diff --git a/examples/quantization/quant/kernel_impl/quant_custom.h b/examples/quantization/quant/kernel_impl/quant_custom.h new file mode 100644 index 00000000..a035da04 --- /dev/null +++ b/examples/quantization/quant/kernel_impl/quant_custom.h @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_QUANTIZATION_QUANT_CUSTOM_H +#define EXAMPLES_QUANTIZATION_QUANT_CUSTOM_H +#include "kernel_operator.h" + +namespace MyCustomKernel { +struct VecTiling{ + uint32_t dataLength; + uint32_t sharedTmpBufferSize; +}; + +template class KernelQuant { +public: + __aicore__ inline KernelQuant() {} + __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstGm, uint32_t inputSize) + { + dataSize = inputSize; + + srcGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ srcType*>(srcGm), dataSize); + dstGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ int8_t*>(dstGm), dataSize); + + pipe.InitBuffer(inQueueX, 1, dataSize * sizeof(srcType)); + pipe.InitBuffer(outQueue, 1, dataSize * sizeof(int8_t)); + } + __aicore__ inline void Process() + { + CopyIn(); + Compute(); + CopyOut(); + } + +private: + __aicore__ inline void CopyIn() + { + AscendC::LocalTensor srcLocal = inQueueX.AllocTensor(); + AscendC::DataCopy(srcLocal, srcGlobal, dataSize); + inQueueX.EnQue(srcLocal); + } + __aicore__ inline void Compute() + { + AscendC::LocalTensor dstLocal = outQueue.AllocTensor(); + AscendC::LocalTensor srcLocal = inQueueX.DeQue(); + AscendC::AscendQuant(dstLocal, srcLocal, 2.0f, 0.9f, dataSize); + //scale=2.0, offset=0.9 + outQueue.EnQue(dstLocal); + inQueueX.FreeTensor(srcLocal); + } + __aicore__ inline void CopyOut() + { + AscendC::LocalTensor dstLocal = outQueue.DeQue(); + AscendC::DataCopy(dstGlobal, dstLocal, dataSize); + outQueue.FreeTensor(dstLocal); + } + +private: + AscendC::GlobalTensor srcGlobal; + AscendC::GlobalTensor dstGlobal; + AscendC::TPipe pipe; + AscendC::TQue inQueueX; + AscendC::TQue outQueue; + uint32_t dataSize = 0; +}; + +} + +#endif // EXAMPLES_QUANTIZATION_QUANT_CUSTOM_H \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt new file mode 100644 index 00000000..6cf54e17 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt @@ -0,0 +1,72 @@ +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) +if(${RUN_MODE}) + set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +endif() +if (${SOC_VERSION}) + set(SOC_VERSION "Ascend910" CACHE STRING "system on chip type") +endif() + +set(ASCEND_CANN_PACKAGE_PATH "/home/ma-user/work/Ascend/ascend-toolkit/latest" CACHE STRING "ASCEND CANN package installation directory") +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() + +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +file(GLOB KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/quant_custom.cpp +) +set(CUSTOM_ASCEND310P_LIST "Ascend310P1" "Ascend310P3") + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() + +add_executable(quant_direct_kernel_op + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/quant_custom_tiling.cpp +) + +target_compile_options(quant_direct_kernel_op PRIVATE + $:-g>> + -O2 + -std=c++17 + -D_GLIBCXX_USE_CXX11_ABI=0 +) + +target_compile_definitions(quant_direct_kernel_op PRIVATE + $<$>:CUSTOM_ASCEND310P> +) + +target_include_directories(quant_direct_kernel_op PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + $:${ASCEND_CANN_PACKAGE_PATH}/include>> + $:${ASCEND_CANN_PACKAGE_PATH}/runtime/include>> +) + +target_link_libraries(quant_direct_kernel_op PRIVATE + $,$>:host_intf_pub>> + $:tikicpulib::${SOC_VERSION}>> + $:ascendcl>> + $:c_sec>> + ascendc_kernels_${RUN_MODE} + tiling_api + register + platform + ascendalog + dl + graph_base +) + +install(TARGETS quant_direct_kernel_op + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/README.md b/examples/quantization/quant/kernel_launch_method_by_direct/README.md new file mode 100644 index 00000000..bf2ec349 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/README.md @@ -0,0 +1,52 @@ + + +## 概述 + +本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入src为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 + +## 目录结构介绍 +| 目录及文件 | 描述 | +|---------------------|----------------------| +| [cmake](./cmake) | 编译工程文件 | +| [scripts](./scripts) | 包含输入数据和真值数据生成脚本文件 | +| main.cpp | 主函数,调用算子的应用程序,含CPU域及NPU域调用 | +| quant_custom.cpp | 算子kernel实现 | +| quant_custom_tiling.cpp | 算子tiling实现 | +| run.sh | 编译执行脚本 | +| CMakeLists.txt | 编译工程文件 | + + +## 编译运行样例 + + - 配置环境变量 + + 这里的\$ASCEND_CANN_PACKAGE_PATH需要替换为CANN包的存储路径。例如:/usr/local/Ascend/ascend-toolkit/latest + ``` + export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH + ``` + 若执行sim仿真,可自行配置仿真日志文件目录,默认仿真日志会在build目录下生成。若需要详细了解sim仿真相关内容,请参考[《AscendC算子调测工具》](https://hiascend.com/document/redirect/CannCommunityToolAscendebug)中的 调测功能说明 > Simulator性能仿真功能 > CAModel性能仿真 章节。 + ``` + # 设置仿真模式日志生成目录(可选),需要自行确保设置的目录已存在。若设置为相对路径下的目录,则以程序执行时的目录作为当前目录。例如,执行如下设置时,需要确保./目录下存在xxx目录 + export CAMODEL_LOG_PATH=./xxx + ``` + + - 生成输入和真值 + ``` + python3 scripts/gen_data.py + ``` + + - 编译执行 + + ``` + bash run.sh -r [RUN_MODE] -v [SOC_VERSION] + ``` + 其中cmake参数说明如下: + - RUN_MODE :编译方式,可选择CPU调试,NPU仿真,NPU上板。支持参数为[cpu / sim/ npu] + - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下参数取值(xxx请替换为具体取值): + - Atlas 推理系列产品(Ascend 310P处理器)参数值:Ascend310P1、Ascend310P3 + - Atlas A2训练系列产品参数值:AscendxxxB1、AscendxxxB2、AscendxxxB3、AscendxxxB4 + + 示例如下: + ``` + bash run.sh -r cpu -v Ascend310P1 + ``` diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake b/examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake new file mode 100644 index 00000000..693f15ac --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake @@ -0,0 +1,26 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels_${RUN_MODE} SHARED + ${KERNEL_FILES} +) + +target_link_libraries(ascendc_kernels_${RUN_MODE} PRIVATE + tikicpulib::${SOC_VERSION} +) + +target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$>:CUSTOM_ASCEND310P> +) + +target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE + -g + -O0 + -std=c++17 +) + +install(TARGETS ascendc_kernels_${RUN_MODE} +DESTINATION ${CMAKE_INSTALL_LIBDIR} +) \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake b/examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake new file mode 100644 index 00000000..afdb61f5 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake @@ -0,0 +1,19 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +ascendc_library(ascendc_kernels_${RUN_MODE} STATIC + ${KERNEL_FILES} +) + +ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$>:CUSTOM_ASCEND310P> + -DASCENDC_DUMP + -DHAVE_WORKSPACE + -DHAVE_TILING + ) \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/main.cpp b/examples/quantization/quant/kernel_launch_method_by_direct/main.cpp new file mode 100644 index 00000000..d860cab6 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/main.cpp @@ -0,0 +1,159 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "../../../common/data_utils.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +extern void quant_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *srcGm, uint8_t *dstGm, + uint8_t *workspace, uint8_t *tiling); +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void quant_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR workspace, + GM_ADDR tiling); +#endif + +constexpr uint32_t BLOCK_DIM = 1; +constexpr uint32_t DATALENGTH = 1024; +constexpr uint32_t TILINGDATA_SIZE = 2; +constexpr uint32_t WORKSPACE_SIZE = 16*1024*1024; + +extern uint8_t *GenerateTiling(uint32_t dataSize); + +static bool CompareResult(const void *outputData, int64_t outSize) +{ + void *goldenData; +#ifdef ASCENDC_CPU_DEBUG + goldenData = (uint8_t *)AscendC::GmAlloc(outSize); +#else + CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize)); +#endif + size_t goldenSize = outSize; + bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize); + if (ret) { + printf("ReadFile golden.bin success!\n"); + } else { + printf("test failed!\n"); + return false; + } + constexpr float EPS = 1e-5; + int64_t wrongNum = 0; + + for (int i = 0; i < outSize / sizeof(int8_t); i++) { + float a = (reinterpret_cast(outputData))[i]; + float b = (reinterpret_cast(goldenData))[i]; + float ae = std::abs(a - b); + float re = ae / abs(b); + if (ae > EPS && re > EPS) { + printf("CompareResult golden.bin failed. Output[%d] is %lf, golden is %lf\n", i, a, b); + wrongNum++; + } + } +#ifdef ASCENDC_CPU_DEBUG + AscendC::GmFree((void *)goldenData); +#else + CHECK_ACL(aclrtFreeHost(goldenData)); +#endif + if (wrongNum != 0) { + return false; + } else { + printf("CompareResult golden.bin success!\n"); + return true; + } +} + +int32_t main(int32_t argc, char *argv[]) +{ + uint32_t blockDim = BLOCK_DIM; + size_t dataLength = DATALENGTH; + size_t inpSize = DATALENGTH * sizeof(float); + size_t outSize = DATALENGTH * sizeof(int8_t); + size_t tilingFileSize = TILINGDATA_SIZE * sizeof(uint32_t); + size_t workspaceSize = WORKSPACE_SIZE; + +#ifdef ASCENDC_CPU_DEBUG + uint8_t *input = (uint8_t *)AscendC::GmAlloc(inpSize); + uint8_t *output = (uint8_t *)AscendC::GmAlloc(outSize); + uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingFileSize); + uint8_t *workspace = (uint8_t *)AscendC::GmAlloc(workspaceSize); + + ReadFile("../input/input.bin", inpSize, input, inpSize); + memcpy_s(tiling, tilingFileSize, GenerateTiling(dataLength), tilingFileSize); + + AscendC::SetKernelMode(KernelMode::AIV_MODE); + ICPU_RUN_KF(quant_custom, blockDim, input, output, workspace, tiling); // use this macro for cpu debug + + WriteFile("../output/output.bin", output, outSize); + bool goldenResult = CompareResult(output, outSize); + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + + AscendC::GmFree((void *)input); + AscendC::GmFree((void *)output); + AscendC::GmFree((void *)tiling); + AscendC::GmFree((void *)workspace); +#else + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *inputHost, *outputHost, *workspaceHost, *tilingHost; + uint8_t *inputDevice, *outputDevice, *workspaceDevice, *tilingDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&inputHost), inpSize)); + CHECK_ACL(aclrtMallocHost((void **)(&outputHost), outSize)); + CHECK_ACL(aclrtMallocHost((void **)(&workspaceHost), workspaceSize)); + CHECK_ACL(aclrtMallocHost((void **)(&tilingHost), tilingFileSize)); + CHECK_ACL(aclrtMalloc((void **)&inputDevice, inpSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&outputDevice, outSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&workspaceDevice, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&tilingDevice, tilingFileSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("../input/input.bin", inpSize, inputHost, inpSize); + + CHECK_ACL(aclrtMemcpy(tilingDevice, tilingFileSize, GenerateTiling(dataLength), + tilingFileSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(inputDevice, inpSize, inputHost, inpSize, ACL_MEMCPY_HOST_TO_DEVICE)); + + quant_custom_do(blockDim, nullptr, stream, inputDevice, outputDevice, workspaceDevice, tilingDevice); + CHECK_ACL(aclrtSynchronizeStream(stream)); + CHECK_ACL(aclrtMemcpy(outputHost, outSize, outputDevice, outSize, ACL_MEMCPY_DEVICE_TO_HOST)); + + WriteFile("../output/output.bin", outputHost, outSize); + + bool goldenResult = CompareResult(outputHost, outSize); + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + + CHECK_ACL(aclrtFree(inputDevice)); + CHECK_ACL(aclrtFree(outputDevice)); + CHECK_ACL(aclrtFree(tilingDevice)); + CHECK_ACL(aclrtFreeHost(inputHost)); + CHECK_ACL(aclrtFreeHost(outputHost)); + CHECK_ACL(aclrtFreeHost(tilingHost)); + CHECK_ACL(aclrtFree(workspaceDevice)); + CHECK_ACL(aclrtFreeHost(workspaceHost)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + return 0; +} diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp b/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp new file mode 100644 index 00000000..a4f73cde --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#include "kernel_operator.h" +#include "../kernel_impl/quant_custom.h" + +__aicore__ inline void CopyTiling(MyCustomKernel::VecTiling* tiling, GM_ADDR tilingGM) +{ + uint32_t* ptr = reinterpret_cast(tiling); + auto tiling32 = reinterpret_cast<__gm__ uint32_t*>(tilingGM); + + for (uint32_t i = 0; i < sizeof(MyCustomKernel::VecTiling) / sizeof(uint32_t); i++, ptr++) { + *ptr = *(tiling32 + i); + } + return; +} + +extern "C" __global__ __aicore__ void quant_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR workspace, GM_ADDR tiling) +{ + MyCustomKernel::KernelQuant op; + MyCustomKernel::VecTiling tilingData; + CopyTiling(&tilingData, tiling); + op.Init(srcGm, dstGm, tilingData.dataLength); + op.Process(); +} + +#ifndef ASCENDC_CPU_DEBUG +// call of kernel function +void quant_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *srcGm, uint8_t *dstGm, + uint8_t *workspace, uint8_t *tiling) +{ + quant_custom<<>>(srcGm, dstGm, workspace, tiling); +} +#endif \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp b/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp new file mode 100644 index 00000000..342ebb91 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp @@ -0,0 +1,30 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include +#include +#include +#include +#include +#include "tiling/tiling_api.h" +#include "../host_tiling/quant_custom_tiling.h" + +uint8_t *GetTilingBuf(optiling::QuantCustomTilingData *tilingData) { + uint32_t tilingSize = sizeof(optiling::QuantCustomTilingData); + uint8_t *buf = (uint8_t *)malloc(tilingSize); + tilingData->SaveToBuffer(buf, tilingSize); + return buf; +} + +uint8_t* GenerateTiling(uint32_t dataLength){ + optiling::QuantCustomTilingData tiling; + ComputeTiling(dataLength, tiling); + return GetTilingBuf(&tiling); +} \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/run.sh b/examples/quantization/quant/kernel_launch_method_by_direct/run.sh new file mode 100644 index 00000000..f9e15533 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +SHORT=r:,v:, +LONG=run-mode:,soc-version:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" +while : +do + case "$1" in + (-r | --run-mode ) + RUN_MODE="$2" + shift 2;; + (-v | --soc-version ) + SOC_VERSION="$2" + shift 2;; + (--) + shift; + break;; + (*) + echo "[ERROR] Unexpected option: $1"; + break;; + esac +done + +rm -rf build +mkdir build +cd build + +# in case of running op in simulator, use stub so instead +if [ "${RUN_MODE}" = "sim" ]; then + export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's/\/.*\/runtime\/lib64://g') + export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/runtime/lib64/stub:$LD_LIBRARY_PATH + + if [ ! $CAMODEL_LOG_PATH ]; then + export CAMODEL_LOG_PATH=./ # default log save in build dir + else + export CAMODEL_LOG_PATH=../$CAMODEL_LOG_PATH + rm -rf $CAMODEL_LOG_PATH + mkdir -p $CAMODEL_LOG_PATH + fi +fi + +if [ "${RUN_MODE}" = "cpu" ]; then + export CAMODEL_LOG_PATH=./ # cpu run mode set fixed log path +fi + +source $ASCEND_HOME_DIR/bin/setenv.bash +export LD_LIBRARY_PATH=${ASCEND_HOME_DIR}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH + +cmake -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION} -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} .. +make -j16 +./quant_direct_kernel_op \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py b/examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py new file mode 100644 index 00000000..4a47bade --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py @@ -0,0 +1,29 @@ +#!/usr/bin/python3 +# coding=utf-8 + +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== + +import numpy as np +import os + +def gen_golden_data_simple(): + shape=[1024] + + src = np.random.uniform(low=-4, high=4, size=shape).astype(np.float16) + # half conversion is important otherwise test failed + golden = np.round(src*2.0 + 0.9).astype(np.int8) + src = src.astype(np.float32) + os.system("mkdir -p input") + os.system("mkdir -p output") + src.tofile("./input/input.bin") + golden.tofile("./output/golden.bin") + +if __name__ == "__main__": + gen_golden_data_simple() \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt new file mode 100644 index 00000000..584132d8 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt @@ -0,0 +1,69 @@ +cmake_minimum_required(VERSION 3.16.0) +project(opp) +if(ENABLE_CROSS_COMPILE) + if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) + set(CROSS_COMPILE_PLATFORM aarch64) + else() + set(CROSS_COMPILE_PLATFORM x86_64) + endif() + set(PLATFORM ${CMAKE_SYSTEM_PROCESSOR}) + set(CMAKE_COMPILE_COMPILER_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/linux/${CROSS_COMPILE_PLATFORM}/) + set(CMAKE_COMPILE_RUNTIME_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/${CROSS_COMPILE_PLATFORM}/) + set(CMAKE_SYSTEM_PROCESSOR ${CROSS_COMPILE_PLATFORM}) + set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER ${CMAKE_CROSS_PLATFORM_COMPILER}) +else() + set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) +endif() + +include(cmake/config.cmake) +include(cmake/func.cmake) +include(cmake/intf.cmake) + +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) + add_subdirectory(framework) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) + add_subdirectory(op_host) +endif() +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) + add_subdirectory(op_kernel) +endif() +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +# modify vendor_name in install.sh and upgrade.sh +add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh + COMMAND mkdir -p ${CMAKE_BINARY_DIR}/scripts + COMMAND cp -r ${CMAKE_SOURCE_DIR}/scripts/* ${CMAKE_BINARY_DIR}/scripts/ + COMMAND sed -i "s/vendor_name=customize/vendor_name=${vendor_name}/g" ${CMAKE_BINARY_DIR}/scripts/* +) +add_custom_target(modify_vendor ALL DEPENDS ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh) +install(DIRECTORY ${CMAKE_BINARY_DIR}/scripts/ DESTINATION . FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_READ) + +install(FILES ${CMAKE_SOURCE_DIR}/custom.proto DESTINATION packages OPTIONAL) + +get_system_info(SYSTEM_INFO) + +# gen version.info +add_custom_target(gen_version_info ALL + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh ${ASCEND_CANN_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR} +) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.info + DESTINATION packages/vendors/${vendor_name}/) + +# CPack config +set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) +set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) +set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") +set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) +set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") +set(CPACK_GENERATOR External) +set(CPACK_CMAKE_GENERATOR "Unix Makefiles") +set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) +set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) +set(CPACK_EXTERNAL_BUILT_PACKAGES ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME}) +include(CPack) diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json b/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json new file mode 100644 index 00000000..dc47e02c --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json @@ -0,0 +1,63 @@ +{ + "version": 1, + "cmakeMinimumRequired": { + "major": 3, + "minor": 19, + "patch": 0 + }, + "configurePresets": [ + { + "name": "default", + "displayName": "Default Config", + "description": "Default build using Unix Makefiles generator", + "generator": "Unix Makefiles", + "binaryDir": "${sourceDir}/build_out", + "cacheVariables": { + "CMAKE_BUILD_TYPE": { + "type": "STRING", + "value": "Release" + }, + "ENABLE_SOURCE_PACKAGE": { + "type": "BOOL", + "value": "True" + }, + "ENABLE_BINARY_PACKAGE": { + "type": "BOOL", + "value": "True" + }, + "ASCEND_COMPUTE_UNIT": { + "type": "STRING", + "value": "ascend310p;ascend910b" + }, + "ENABLE_TEST": { + "type": "BOOL", + "value": "True" + }, + "vendor_name": { + "type": "STRING", + "value": "customize" + }, + "ASCEND_CANN_PACKAGE_PATH": { + "type": "PATH", + "value": "/home/ma-user/work/Ascend/ascend-toolkit/latest" + }, + "ASCEND_PYTHON_EXECUTABLE": { + "type": "STRING", + "value": "python3" + }, + "CMAKE_INSTALL_PREFIX": { + "type": "PATH", + "value": "${sourceDir}/build_out" + }, + "ENABLE_CROSS_COMPILE": { + "type": "BOOL", + "value": "False" + }, + "CMAKE_CROSS_PLATFORM_COMPILER": { + "type": "PATH", + "value": "/usr/bin/aarch64-linux-gnu-g++" + } + } + } + ] +} \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/README.md b/examples/quantization/quant/kernel_launch_method_by_framework/README.md new file mode 100644 index 00000000..76a5d354 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/README.md @@ -0,0 +1,81 @@ + + +## 概述 + +本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入src为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 + +## 样例支持的产品型号为: +- Atlas A2训练系列产品/Atlas 800I A2推理产品 + +## 目录结构 +| 目录 | 描述 | +|---------------------|----------------------| +| [cmake](./cmake) | 编译工程文件 | +| [op_host](./op_host) | host侧实现文件 | +| [op_kernel](./op_kernel) | kernel侧实现文件 | +| [scripts](./scripts) | 包含输入数据和真值数据生成脚本文件 | +| [testcases](./testcases) | 包含cpu域以及npu域的用例主函数,以及真值校验函数 | +| build.sh | 编译运行算子的脚本 | +| CMakeLists.txt | 编译工程文件 | +| CMakePresets.json | 编译工程配置文件 | + +## 编译运行样例 + +## 1.配置环境变量 + + 这里的\$ASCEND_CANN_PACKAGE_PATH需要替换为CANN包的存储路径。例如:/usr/local/Ascend/ascend-toolkit/latest + ``` + export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH + source $ASCEND_HOME_DIR/../set_env.sh + ``` +### 2.生成输入和真值 + ``` + python3 scripts/gen_data.py + ``` + +### 3.编译算子工程 + + - 修改CMakePresets.json中ASCEND_CANN_PACKAGE_PATH为CANN软件包安装后的实际路径。 + + + ``` + { + …… + "configurePresets": [ + { + …… + "ASCEND_CANN_PACKAGE_PATH": { + "type": "PATH", + "value": "~/Ascend/ascend-toolkit/latest" //请替换为CANN软件包安装后的实际路径。eg:/home/HwHiAiUser/Ascend/ascend-toolkit/latest + }, + …… + } + ] + } + ``` + - 在当前算子工程目录下执行如下命令,进行算子工程编译。 + + ``` + bash build.sh + ``` + 编译成功后,会在当前目录下创建build_out目录,并在build_out目录下生成自定义算子安装包custom_opp_\_\.run,例如“custom_opp_ubuntu_x86_64.run”。 + + +### 4.部署算子包 + + - 执行如下命令,在自定义算子安装包所在路径下,安装自定义算子包。 + + ``` + cd build_out + ./custom_opp__.run + ``` + + 命令执行成功后,自定义算子包中的相关文件将部署至当前环境的OPP算子库的vendors/customize目录中。 +### 5.执行样例 + - 在build_out目录下执行如下命令 + + ``` + ./quant_custom_npu + ``` +### 注意事项 +本样例工程会自动识别执行的硬件平台,无需单独设置SOC_VERSION \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/build.sh b/examples/quantization/quant/kernel_launch_method_by_framework/build.sh new file mode 100644 index 00000000..6f3ab962 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/build.sh @@ -0,0 +1,76 @@ +#!/bin/bash +script_path=$(realpath $(dirname $0)) + +source $ASCEND_HOME_DIR/bin/setenv.bash +cp -rf ../host_tiling/* op_host/ +ln -s $ASCEND_HOME_DIR/tools/op_project_templates/ascendc/customize/cmake/util/ ./cmake/util +mkdir -p build_out +rm -rf build_out/* +cd build_out + +opts=$(python3 $script_path/cmake/util/preset_parse.py $script_path/CMakePresets.json) +ENABLE_CROSS="-DENABLE_CROSS_COMPILE=True" +ENABLE_BINARY="-DENABLE_BINARY_PACKAGE=True" +cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') + +cmake_run_package() +{ + target=$1 + cmake --build . --target $target -j16 + if [ $? -ne 0 ]; then exit 1; fi + + if [ $target = "package" ]; then + if test -d ./op_kernel/binary ; then + ./cust*.run + if [ $? -ne 0 ]; then exit 1; fi + cmake --build . --target binary -j16 + if [ $? -ne 0 ]; then exit 1; fi + cmake --build . --target $target -j16 + fi + fi +} + +if [[ $opts =~ $ENABLE_CROSS ]] && [[ $opts =~ $ENABLE_BINARY ]] +then + target=package + if [ "$1"x != ""x ]; then target=$1; fi + if [ "$cmake_version" \< "3.19.0" ] ; then + cmake .. $opts -DENABLE_CROSS_COMPILE=0 + else + cmake .. --preset=default -DENABLE_CROSS_COMPILE=0 + fi + cmake_run_package $target + cp -r kernel ../ + rm -rf * + if [ "$cmake_version" \< "3.19.0" ] ; then + cmake .. $opts + else + cmake .. --preset=default + fi + + cmake --build . --target $target -j16 + if [ $? -ne 0 ]; then exit 1; fi + if [ $target = "package" ]; then + if test -d ./op_kernel/binary ; then + ./cust*.run + fi + fi + rm -rf ../kernel + +else + target=package + if [ "$1"x != ""x ]; then target=$1; fi + if [ "$cmake_version" \< "3.19.0" ] ; then + cmake .. $opts + else + cmake .. --preset=default + fi + cmake_run_package $target +fi + + +# for debug +# cd build_out +# make +# cpack +# verbose append -v \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake new file mode 100644 index 00000000..886119da --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake @@ -0,0 +1,25 @@ + +set(CMAKE_CXX_FLAGS_DEBUG "") +set(CMAKE_CXX_FLAGS_RELEASE "") + +if (NOT DEFINED vendor_name) + set(vendor_name customize CACHE STRING "") +endif() +if (NOT DEFINED ASCEND_CANN_PACKAGE_PATH) + set(ASCEND_CANN_PACKAGE_PATH /usr/local/Ascend/latest CACHE PATH "") +endif() +if (NOT DEFINED ASCEND_PYTHON_EXECUTABLE) + set(ASCEND_PYTHON_EXECUTABLE python3 CACHE STRING "") +endif() +if (NOT DEFINED ASCEND_COMPUTE_UNIT) + message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! +") +endif() +set(ASCEND_TENSOR_COMPILER_PATH ${ASCEND_CANN_PACKAGE_PATH}/compiler) +set(ASCEND_CCEC_COMPILER_PATH ${ASCEND_TENSOR_COMPILER_PATH}/ccec_compiler/bin) +set(ASCEND_AUTOGEN_PATH ${CMAKE_BINARY_DIR}/autogen) +set(ASCEND_FRAMEWORK_TYPE tensorflow) +file(MAKE_DIRECTORY ${ASCEND_AUTOGEN_PATH}) +set(CUSTOM_COMPILE_OPTIONS "custom_compile_options.ini") +execute_process(COMMAND rm -rf ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} + COMMAND touch ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS}) diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake new file mode 100644 index 00000000..4179dfd2 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake @@ -0,0 +1,192 @@ + +function(get_system_info SYSTEM_INFO) + if (UNIX) + execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) + string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) + set(${SYSTEM_INFO} ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} PARENT_SCOPE) + elseif (WIN32) + message(STATUS "System is Windows. Only for pre-build.") + else () + message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") + endif () +endfunction() + +function(opbuild) + message(STATUS "Opbuild generating sources") + cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" "OPS_SRC" ${ARGN}) + execute_process(COMMAND ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} -D_GLIBCXX_USE_CXX11_ABI=0 + -I ${ASCEND_CANN_PACKAGE_PATH}/include -L ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api + -o ${OPBUILD_OUT_DIR}/libascend_all_ops.so + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("build ops lib info: ${EXEC_INFO}") + message("build ops lib error: ${EXEC_ERROR}") + message(FATAL_ERROR "opbuild run failed!") + endif() + set(proj_env "") + set(prefix_env "") + if (NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") + set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") + endif() + if (NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") + set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") + endif() + execute_process(COMMAND ${proj_env} ${prefix_env} ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build + ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("opbuild ops info: ${EXEC_INFO}") + message("opbuild ops error: ${EXEC_ERROR}") + endif() + message(STATUS "Opbuild generating sources - done") +endfunction() + +function(add_ops_info_target) + cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" ${ARGN}) + get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) + add_custom_command(OUTPUT ${OPINFO_OUTPUT} + COMMAND mkdir -p ${opinfo_file_path} + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py + ${OPINFO_OPS_INFO} ${OPINFO_OUTPUT} + ) + add_custom_target(${OPINFO_TARGET} ALL + DEPENDS ${OPINFO_OUTPUT} + ) + install(FILES ${OPINFO_OUTPUT} + DESTINATION ${OPINFO_INSTALL_DIR} + ) +endfunction() + +function(add_ops_compile_options OP_TYPE) + cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) + file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} + "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") +endfunction() + +function(add_ops_impl_target) + cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) + add_custom_command(OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py + ${OPIMPL_OPS_INFO} + \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" + ${OPIMPL_IMPL_DIR} + ${OPIMPL_OUT_DIR}/dynamic + ${ASCEND_AUTOGEN_PATH} + + COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp + COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp + DEPENDS ${OPIMPL_OPS_INFO} + ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py + ) + add_custom_target(${OPIMPL_TARGET} ALL + DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) + if (${ENABLE_SOURCE_PACKAGE}) + install(DIRECTORY ${OPIMPL_OUT_DIR}/dynamic + DESTINATION ${OPIMPL_INSTALL_DIR} + ) + endif() +endfunction() + +function(add_npu_support_target) + cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) + add_custom_command(OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json + COMMAND mkdir -p ${NPUSUP_OUT_DIR} + COMMAND ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh + ${NPUSUP_OPS_INFO_DIR} + ${NPUSUP_OUT_DIR} + ) + add_custom_target(npu_supported_ops ALL + DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json + ) + install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json + DESTINATION ${NPUSUP_INSTALL_DIR} + ) +endfunction() + +function(add_bin_compile_target) + cmake_parse_arguments(BINCMP "" "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) + execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py + ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} + RESULT_VARIABLE EXEC_RESULT + OUTPUT_VARIABLE EXEC_INFO + ERROR_VARIABLE EXEC_ERROR + ) + if (${EXEC_RESULT}) + message("ops binary compile scripts gen info: ${EXEC_INFO}") + message("ops binary compile scripts gen error: ${EXEC_ERROR}") + message(FATAL_ERROR "ops binary compile scripts gen failed!") + endif() + if (NOT TARGET binary) + add_custom_target(binary) + endif() + add_custom_target(${BINCMP_TARGET} + COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* ${BINCMP_OUT_DIR}/src + ) + add_custom_target(${BINCMP_TARGET}_gen_ops_config + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p ${BINCMP_OUT_DIR}/bin + COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p ${BINCMP_OUT_DIR}/bin + -s ${BINCMP_COMPUTE_UNIT} + ) + add_dependencies(binary ${BINCMP_TARGET}_gen_ops_config) + file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) + foreach(bin_script ${bin_scripts}) + get_filename_component(bin_file ${bin_script} NAME_WE) + string(REPLACE "-" ";" bin_sep ${bin_file}) + list(GET bin_sep 0 op_type) + list(GET bin_sep 1 op_file) + list(GET bin_sep 2 op_index) + if (NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) + file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file}) + add_custom_target(${BINCMP_TARGET}_${op_file}_copy + COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py ${BINCMP_OUT_DIR}/src/${op_type}.py + ) + install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} + DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT}/ OPTIONAL + ) + endif() + add_custom_target(${BINCMP_TARGET}_${op_file}_${op_index} + COMMAND export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py ${BINCMP_OUT_DIR}/bin/${op_file} && echo $(MAKE) + WORKING_DIRECTORY ${BINCMP_OUT_DIR} + ) + add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} ${BINCMP_TARGET}_${op_file}_copy) + add_dependencies(${BINCMP_TARGET}_gen_ops_config ${BINCMP_TARGET}_${op_file}_${op_index}) + endforeach() + install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json + DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + + install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/../build_out/kernel/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/../build_out/kernel/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/../build_out/kernel/config/${BINCMP_COMPUTE_UNIT} OPTIONAL + ) + +endfunction() + +function(add_cross_compile_target) + cmake_parse_arguments(CROSSMP "" "TARGET;OUT_DIR;INSTALL_DIR" "" ${ARGN}) + add_custom_target(${CROSSMP_TARGET} ALL + DEPENDS ${CROSSMP_OUT_DIR} + ) + install(DIRECTORY ${CROSSMP_OUT_DIR} + DESTINATION ${CROSSMP_INSTALL_DIR} + ) +endfunction() diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake new file mode 100644 index 00000000..2f362c39 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake @@ -0,0 +1,26 @@ + +add_library(intf_pub INTERFACE) +target_compile_options(intf_pub INTERFACE + -fPIC + -fvisibility=hidden + -fvisibility-inlines-hidden + $<$:-O2> + $<$:-O0 -g> + $<$:-std=c++11> + $<$,$>:-ftrapv -fstack-check> + $<$:-pthread -Wfloat-equal -Wshadow -Wformat=2 -Wno-deprecated -Wextra> + $,-fstack-protector-strong,-fstack-protector-all> +) +target_compile_definitions(intf_pub INTERFACE + _GLIBCXX_USE_CXX11_ABI=0 + $<$:_FORTIFY_SOURCE=2> +) +target_include_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include) +target_link_options(intf_pub INTERFACE + $<$,EXECUTABLE>:-pie> + $<$:-s> + -Wl,-z,relro + -Wl,-z,now + -Wl,-z,noexecstack +) +target_link_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/lib64) diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake new file mode 100644 index 00000000..48c565bf --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake @@ -0,0 +1,17 @@ +execute_process(COMMAND chmod +x ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) +execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh + --header ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh + --help-header ./help.info + --gzip --complevel 4 --nomd5 --sha256 + ./ ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} + RESULT_VARIABLE EXEC_RESULT + ERROR_VARIABLE EXEC_ERROR +) +if (NOT "${EXEC_RESULT}x" STREQUAL "0x") + message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") +endif() +execute_process(COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ + COMMAND echo "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" + WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} +) diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt new file mode 100644 index 00000000..40dd51cf --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt @@ -0,0 +1,82 @@ + +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs) + +opbuild(OPS_SRC ${ops_srcs} + OUT_DIR ${ASCEND_AUTOGEN_PATH} +) + +add_library(cust_op_proto SHARED ${ops_srcs} ${ASCEND_AUTOGEN_PATH}/op_proto.cc) +target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) +target_compile_options(cust_op_proto PRIVATE + -fvisibility=hidden +) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_op_proto PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_op_proto PRIVATE + intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive +) +set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME + cust_opsproto_rt2.0 +) +add_library(cust_optiling SHARED ${ops_srcs}) +target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) +target_compile_options(cust_optiling PRIVATE + -fvisibility=hidden +) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_optiling PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_optiling PRIVATE + intf_pub + exe_graph + register + tiling_api + -Wl,--whole-archive + rt2_registry + -Wl,--no-whole-archive +) +set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME + cust_opmaster_rt2.0 +) + +file(GLOB aclnn_src ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) +file(GLOB aclnn_inc ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) +add_library(cust_opapi SHARED ${aclnn_src}) +if(ENABLE_CROSS_COMPILE) + target_link_directories(cust_opapi PRIVATE + ${CMAKE_COMPILE_COMPILER_LIBRARY} + ${CMAKE_COMPILE_RUNTIME_LIBRARY} + ) +endif() +target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) + +add_custom_target(optiling_compat ALL + COMMAND ln -sf lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ + ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so +) + +install(TARGETS cust_op_proto + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install(FILES ${ASCEND_AUTOGEN_PATH}/op_proto.h + DESTINATION packages/vendors/${vendor_name}/op_proto/inc) +install(TARGETS cust_optiling + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) +install(TARGETS cust_opapi + LIBRARY DESTINATION packages/vendors/${vendor_name}/op_api/lib) +install(FILES ${aclnn_inc} + DESTINATION packages/vendors/${vendor_name}/op_api/include) diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp new file mode 100644 index 00000000..692c503b --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp @@ -0,0 +1,69 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#include "quant_custom_tiling.h" +#include "register/op_def_registry.h" + +namespace optiling { +constexpr uint32_t BLOCK_DIM = 48; + +static ge::graphStatus TilingFunc(gert::TilingContext *context) +{ + QuantCustomTilingData tiling; + const gert::StorageShape *x1Shape = context->GetInputShape(0); + const gert::Shape shape = x1Shape->GetStorageShape(); + + ComputeTiling(shape[0], tiling); + + context->SetBlockDim(BLOCK_DIM); + context->SetTilingKey(1); + tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); + context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); + return ge::GRAPH_SUCCESS; +} +} + + +namespace ge { +static ge::graphStatus InferShape(gert::InferShapeContext *context) +{ + const gert::Shape *x1Shape = context->GetInputShape(0); + gert::Shape *yShape = context->GetOutputShape(0); + *yShape = *x1Shape; + return GRAPH_SUCCESS; +} +} + + +namespace ops { +class QuantCustom : public OpDef { +public: + explicit QuantCustom(const char *name) : OpDef(name) + { + this->Input("inputGm") + .ParamType(REQUIRED) + .DataType({ ge::DT_FLOAT }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); + this->Output("outputGm") + .ParamType(REQUIRED) + .DataType({ ge::DT_INT8 }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); + + this->SetInferShape(ge::InferShape); + + this->AICore().SetTiling(optiling::TilingFunc); + this->AICore().AddConfig("ascend910b"); + this->AICore().AddConfig("ascend310p"); + } +}; + +OP_ADD(QuantCustom); +} diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h new file mode 100644 index 00000000..962fb876 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_QUANTIZATION_QUANT_CUSTOM_TILING_H +#define EXAMPLES_QUANTIZATION_QUANT_CUSTOM_TILING_H +#include "register/tilingdata_base.h" +#include "tiling/tiling_api.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(QuantCustomTilingData) + TILING_DATA_FIELD_DEF(uint32_t, dataLength); + TILING_DATA_FIELD_DEF(uint32_t, sharedTmpBufferSize); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(QuantCustom, QuantCustomTilingData) +} // namespace optiling + +void ComputeTiling(const uint32_t dataLength, optiling::QuantCustomTilingData &tiling){ + std::vector shapeVec = {dataLength}; + ge::Shape srcShape(shapeVec); + uint32_t typeSize = sizeof(float); + uint32_t maxTmpSize; + uint32_t minTmpSize; + AscendC::GetAscendQuantMaxMinTmpSize(srcShape, typeSize, maxTmpSize, minTmpSize); + uint32_t localWorkspaceSize = minTmpSize; + tiling.set_dataLength(dataLength); + tiling.set_sharedTmpBufferSize(localWorkspaceSize); +} + +#endif // EXAMPLES_QUANTIZATION_QUANT_CUSTOM_TILING_H diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt new file mode 100644 index 00000000..c50a409a --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt @@ -0,0 +1,69 @@ +# set custom compile options +if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") + add_ops_compile_options(ALL OPTIONS -g -O0) +endif() +add_ops_compile_options(ALL OPTIONS -mllvm -cce-aicore-jump-expand=true) + +foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) + + # generate aic-${compute_unit}-ops-info.json + add_ops_info_target(TARGET ops_info_gen_${compute_unit} + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core/${compute_unit}/aic-${compute_unit}-ops-info.json + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit} + ) + + # generate ascendc impl py once + if (NOT TARGET ascendc_impl_gen) + add_ops_impl_target(TARGET ascendc_impl_gen + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl + ) + endif() + + # dynamic shape binary compile + if (${ENABLE_BINARY_PACKAGE} AND NOT ${ENABLE_CROSS_COMPILE}) + add_bin_compile_target(TARGET ascendc_bin_${compute_unit} + OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini + IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} + ADP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/dynamic + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel + COMPUTE_UNIT ${compute_unit} + ) + add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) + endif() + + if (${ENABLE_CROSS_COMPILE} AND ${ENABLE_BINARY_PACKAGE}) + add_cross_compile_target( + TARGET bin_${compute_unit} + OUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../kernel + INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/ + ) + endif() +endforeach() + +# generate npu_supported_ops.json +add_npu_support_target(TARGET npu_supported_ops + OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH} + OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core + INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE} +) + +if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) + add_subdirectory(testcases) +endif() + +# install kernel file +if (${ENABLE_SOURCE_PACKAGE}) + file(GLOB KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/*.py + ) + install(FILES ${KERNEL_FILES} + DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic + ) +endif() diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp b/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp new file mode 100644 index 00000000..8f44584f --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "../../../../../../kernel_impl/quant_custom.h" + +extern "C" __global__ __aicore__ void quant_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR workspace, + GM_ADDR tiling) +{ + if ASCEND_IS_AIC { + return; + } + GET_TILING_DATA(tilingData, tiling); + MyCustomKernel::VecTiling vecTiling = *reinterpret_cast(&tilingData); + if (TILING_KEY_IS(1)) { + MyCustomKernel::KernelQuant op; + op.Init(srcGm, dstGm, vecTiling.dataLength); + op.Process(); + } +} \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py new file mode 100644 index 00000000..4a47bade --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py @@ -0,0 +1,29 @@ +#!/usr/bin/python3 +# coding=utf-8 + +# Copyright (c) 2024 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== + +import numpy as np +import os + +def gen_golden_data_simple(): + shape=[1024] + + src = np.random.uniform(low=-4, high=4, size=shape).astype(np.float16) + # half conversion is important otherwise test failed + golden = np.round(src*2.0 + 0.9).astype(np.int8) + src = src.astype(np.float32) + os.system("mkdir -p input") + os.system("mkdir -p output") + src.tofile("./input/input.bin") + golden.tofile("./output/golden.bin") + +if __name__ == "__main__": + gen_golden_data_simple() \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info new file mode 100644 index 00000000..f4b28d57 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info @@ -0,0 +1 @@ + --install-path Install operator package to specific dir path \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh new file mode 100644 index 00000000..8468c5a2 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh @@ -0,0 +1,318 @@ +#!/bin/bash +vendor_name=customize +targetdir=/usr/local/Ascend/opp +target_custom=0 + +sourcedir=$PWD/packages +vendordir=vendors/$vendor_name + +QUIET="y" + +while true +do + case $1 in + --quiet) + QUIET="y" + shift + ;; + --install-path=*) + INSTALL_PATH=$(echo $1 | cut -d"=" -f2-) + INSTALL_PATH=${INSTALL_PATH%*/} + shift + ;; + --*) + shift + ;; + *) + break + ;; + esac +done + +log() { + cur_date=`date +"%Y-%m-%d %H:%M:%S"` + echo "[runtime] [$cur_date] "$1 +} + +if [ -n "${INSTALL_PATH}" ]; then + if [[ ! "${INSTALL_PATH}" = /* ]]; then + log "[ERROR] use absolute path for --install-path argument" + exit 1 + fi + if [ ! -d ${INSTALL_PATH} ]; then + mkdir ${INSTALL_PATH} >> /dev/null 2>&1 + if [ $? -ne 0 ]; then + log "[ERROR] create ${INSTALL_PATH} failed" + exit 1 + fi + fi + targetdir=${INSTALL_PATH} +elif [ -n "${ASCEND_CUSTOM_OPP_PATH}" ]; then + if [ ! -d ${ASCEND_CUSTOM_OPP_PATH} ]; then + mkdir -p ${ASCEND_CUSTOM_OPP_PATH} >> /dev/null 2>&1 + if [ $? -ne 0 ]; then + log "[ERROR] create ${ASCEND_CUSTOM_OPP_PATH} failed" + fi + fi + targetdir=${ASCEND_CUSTOM_OPP_PATH} +else + if [ "x${ASCEND_OPP_PATH}" == "x" ]; then + log "[ERROR] env ASCEND_OPP_PATH no exist" + exit 1 + fi + targetdir="${ASCEND_OPP_PATH}" +fi + +if [ ! -d $targetdir ];then + log "[ERROR] $targetdir no exist" + exit 1 +fi + +upgrade() +{ + if [ ! -d ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 files" + return 0 + fi + + if [ ! -d ${targetdir}/$vendordir/$1 ];then + log "[INFO] create ${targetdir}/$vendordir/$1." + mkdir -p ${targetdir}/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/$vendordir/$1 failed" + return 1 + fi + else + has_same_file=-1 + for file_a in ${sourcedir}/$vendordir/$1/*; do + file_b=${file_a##*/}; + if [ "ls ${targetdir}/$vendordir/$1" = "" ]; then + log "[INFO] ${targetdir}/$vendordir/$1 is empty !!" + return 1 + fi + grep -q $file_b <<<`ls ${targetdir}/$vendordir/$1`; + if [[ $? -eq 0 ]]; then + echo -n "${file_b} " + has_same_file=0 + fi + done + if [ 0 -eq $has_same_file ]; then + if test $QUIET = "n"; then + echo "[INFO]: has old version in ${targetdir}/$vendordir/$1, \ + you want to Overlay Installation , please enter:[o]; \ + or replace directory installation , please enter: [r]; \ + or not install , please enter:[n]." + + while true + do + read orn + if [ "$orn" = n ]; then + return 0 + elif [ "$orn" = m ]; then + break; + elif [ "$0rn" = r ]; then + [ -n "${targetdir}/$vendordir/$1/" ] && rm -rf "${targetdir}/$vendordir/$1"/* + break; + else + echo "[ERROR] input error, please input again!" + fi + done + fi + fi + log "[INFO] replace or merge old ops $1 files .g....." + fi + + log "copy new ops $1 files ......" + if [ -d ${targetdir}/$vendordir/$1/ ]; then + chmod -R +w "$targetdir/$vendordir/$1/" >/dev/null 2>&1 + fi + cp -rf ${sourcedir}/$vendordir/$1/* $targetdir/$vendordir/$1/ + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 files failed" + return 1 + fi + + return 0 +} +upgrade_proto() +{ + if [ ! -f ${sourcedir}/$vendordir/custom.proto ]; then + log "[INFO] no need to upgrade custom.proto files" + return 0 + fi + if [ ! -d ${targetdir}/$vendordir/framework/caffe ];then + log "[INFO] create ${targetdir}/$vendordir/framework/caffe." + mkdir -p ${targetdir}/$vendordir/framework/caffe + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/$vendordir/framework/caffe failed" + return 1 + fi + else + if [ -f ${targetdir}/$vendordir/framework/caffe/custom.proto ]; then + # 有老版本,判断是否要覆盖式安装 + if test $QUIET = "n"; then + echo "[INFO] ${targetdir}/$vendordir/framework/caffe has old version"\ + "custom.proto file. Do you want to replace? [y/n] " + + while true + do + read yn + if [ "$yn" = n ]; then + return 0 + elif [ "$yn" = y ]; then + break; + else + echo "[ERROR] input error, please input again!" + fi + done + fi + fi + log "[INFO] replace old caffe.proto files ......" + fi + chmod -R +w "$targetdir/$vendordir/framework/caffe/" >/dev/null 2>&1 + cp -rf ${sourcedir}/$vendordir/custom.proto ${targetdir}/$vendordir/framework/caffe/ + if [ $? -ne 0 ];then + log "[ERROR] copy new custom.proto failed" + return 1 + fi + log "[INFO] copy custom.proto success" + + return 0 +} + +upgrade_file() +{ + if [ ! -e ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 file" + return 0 + fi + + log "copy new $1 files ......" + cp -f ${sourcedir}/$vendordir/$1 $targetdir/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 file failed" + return 1 + fi + + return 0 +} + +delete_optiling_file() +{ + if [ ! -d ${targetdir}/vendors ];then + log "[INFO] $1 not exist, no need to uninstall" + return 0 + fi + sys_info=$(uname -m) + if [ ! -d ${sourcedir}/$vendordir/$1/ai_core/tbe/op_tiling/lib/linux/${sys_info} ];then + rm -rf ${sourcedir}/$vendordir/$1/ai_core/tbe/op_tiling/liboptiling.so + fi + return 0 +} + +log "[INFO] copy uninstall sh success" + +if [ ! -d ${targetdir}/vendors ];then + log "[INFO] create ${targetdir}/vendors." + mkdir -p ${targetdir}/vendors + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/vendors failed" + return 1 + fi +fi +chmod u+w ${targetdir}/vendors + +echo "[ops_custom]upgrade framework" +upgrade framework +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op proto" +upgrade op_proto +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade version.info" +upgrade_file version.info +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op impl" +delete_optiling_file op_impl +upgrade op_impl +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op api" +upgrade op_api +if [ $? -ne 0 ];then + exit 1 +fi + +upgrade_proto +if [ $? -ne 0 ];then + exit 1 +fi + +# set the set_env.bash +if [ -n "${INSTALL_PATH}" ] && [ -d ${INSTALL_PATH} ]; then + _ASCEND_CUSTOM_OPP_PATH=${targetdir}/${vendordir} + bin_path="${_ASCEND_CUSTOM_OPP_PATH}/bin" + set_env_variable="#!/bin/bash\nexport ASCEND_CUSTOM_OPP_PATH=${_ASCEND_CUSTOM_OPP_PATH}:\${ASCEND_CUSTOM_OPP_PATH}" + if [ ! -d ${bin_path} ]; then + mkdir -p ${bin_path} >> /dev/null 2>&1 + if [ $? -ne 0 ]; then + log "[ERROR] create ${bin_path} failed" + exit 1 + fi + fi + echo -e ${set_env_variable} > ${bin_path}/set_env.bash + if [ $? -ne 0 ]; then + log "[ERROR] write ASCEND_CUSTOM_OPP_PATH to set_env.bash failed" + exit 1 + else + log "[INFO] using requirements: when custom module install finished or before you run the custom module, \ + execute the command [ source ${bin_path}/set_env.bash ] to set the environment path" + fi +else + config_file=${targetdir}/vendors/config.ini + if [ ! -f ${config_file} ]; then + touch ${config_file} + chmod 640 ${config_file} + echo "load_priority=$vendor_name" > ${config_file} + if [ $? -ne 0 ];then + echo "echo load_priority failed" + exit 1 + fi + else + found_vendors="$(grep -w "load_priority" "$config_file" | cut --only-delimited -d"=" -f2-)" + found_vendor=$(echo $found_vendors | sed "s/$vendor_name//g" | tr ',' ' ') + vendor=$(echo $found_vendor | tr -s ' ' ',') + if [ "$vendor" != "" ]; then + sed -i "/load_priority=$found_vendors/s@load_priority=$found_vendors@load_priority=$vendor_name,$vendor@g" "$config_file" + fi + fi +fi + +chmod u-w ${targetdir}/vendors + +if [ -d ${targetdir}/$vendordir/op_impl/cpu/aicpu_kernel/impl/ ]; then + chmod -R 440 ${targetdir}/$vendordir/op_impl/cpu/aicpu_kernel/impl/* >/dev/null 2>&1 +fi +if [ -f ${targetdir}/ascend_install.info ]; then + chmod -R 440 ${targetdir}/ascend_install.info +fi +if [ -f ${targetdir}/scene.info ]; then + chmod -R 440 ${targetdir}/scene.info +fi +if [ -f ${targetdir}/version.info ]; then + chmod -R 440 ${targetdir}/version.info +fi + +echo "SUCCESS" +exit 0 + diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh new file mode 100644 index 00000000..e0917348 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh @@ -0,0 +1,151 @@ +#!/bin/bash +vendor_name=customize +targetdir=/usr/local/Ascend/opp +target_custom=0 + +sourcedir=$PWD/packages +vendordir=vendors/$vendor_name + +log() { + cur_date=`date +"%Y-%m-%d %H:%M:%S"` + echo "[runtime] [$cur_date] "$1 +} + +if [[ "x${ASCEND_OPP_PATH}" == "x" ]];then + log "[ERROR] env ASCEND_OPP_PATH no exist" + exit 1 +fi + +targetdir=${ASCEND_OPP_PATH} + +if [ ! -d $targetdir ];then + log "[ERROR] $targetdir no exist" + exit 1 +fi + +upgrade() +{ + if [ ! -d ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 files" + return 0 + fi + + if [ ! -d ${targetdir}/$vendordir/$1 ];then + log "[INFO] create ${targetdir}/$vendordir/$1." + mkdir -p ${targetdir}/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] create ${targetdir}/$vendordir/$1 failed" + return 1 + fi + else + vendor_installed_dir=$(ls "$targetdir/vendors" 2> /dev/null) + for i in $vendor_installed_dir;do + vendor_installed_file=$(ls "$vendor_installed_dir/$vendor_name/$i" 2> /dev/null) + if [ "$i" = "$vendor_name" ] && [ "$vendor_installed_file" != "" ]; then + echo "[INFO]: $vendor_name custom opp package has been installed on the path $vendor_installed_dir, \ + you want to Overlay Installation , please enter:[o]; \ + or replace directory installation , please enter: [r]; \ + or not install , please enter:[n]." + fi + while true + do + read mrn + if [ "$mrn" = m ]; then + break + elif [ "$mrn" = r ]; then + [ -n "$vendor_installed_file"] && rm -rf "$vendor_installed_file" + break + elif [ "$mrn" = n ]; then + return 0 + else + echo "[WARNING]: Input error, please input m or r or n to choose!" + fi + done + done + log "[INFO] replace old ops $1 files ......" + fi + + log "copy new ops $1 files ......" + cp -rf ${sourcedir}/$vendordir/$1/* $targetdir/$vendordir/$1/ + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 files failed" + return 1 + fi + + return 0 +} + +upgrade_file() +{ + if [ ! -e ${sourcedir}/$vendordir/$1 ]; then + log "[INFO] no need to upgrade ops $1 file" + return 0 + fi + + log "copy new $1 files ......" + cp -f ${sourcedir}/$vendordir/$1 $targetdir/$vendordir/$1 + if [ $? -ne 0 ];then + log "[ERROR] copy new $1 file failed" + return 1 + fi + + return 0 +} + +log "[INFO] copy uninstall sh success" + +echo "[ops_custom]upgrade framework" +upgrade framework +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op proto" +upgrade op_proto +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op impl" +upgrade op_impl +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade op api" +upgrade op_api +if [ $? -ne 0 ];then + exit 1 +fi + +echo "[ops_custom]upgrade version.info" +upgrade_file version.info +if [ $? -ne 0 ];then + exit 1 +fi + +config_file=${targetdir}/vendors/config.ini +found_vendors="$(grep -w "load_priority" "$config_file" | cut --only-delimited -d"=" -f2-)" +found_vendor=$(echo $found_vendors | sed "s/$vendor_name//g" | tr ',' ' ') +vendor=$(echo $found_vendor | tr -s ' ' ',') +if [ "$vendor" != "" ]; then + sed -i "/load_priority=$found_vendors/s@load_priority=$found_vendors@load_priority=$vendor_name,$vendor@g" "$config_file" +fi + +changemode() +{ + if [ -d ${targetdir} ];then + chmod -R 550 ${targetdir}>/dev/null 2>&1 + fi + + return 0 +} +echo "[ops_custom]changemode..." +#changemode +if [ $? -ne 0 ];then + exit 1 +fi + +echo "SUCCESS" +exit 0 + diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt new file mode 100644 index 00000000..8d2d11c9 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt @@ -0,0 +1,2 @@ +include(cmake/fun.cmake) +add_subdirectory(npu) \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake new file mode 100644 index 00000000..024e2630 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake @@ -0,0 +1,53 @@ + +set(UPER_CHARS A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) +function(string_to_snake str_in snake_out) + set(str_cam ${str_in}) + foreach(uper_char ${UPER_CHARS}) + string(TOLOWER "${uper_char}" lower_char) + string(REPLACE ${uper_char} "_${lower_char}" str_cam ${str_cam}) + endforeach() + string(SUBSTRING ${str_cam} 1 -1 str_cam) + set(${snake_out} "${str_cam}" PARENT_SCOPE) +endfunction() + +function(add_cpu_target) + cmake_parse_arguments(CPU_TEST "" "OP" "SRC" ${ARGN}) + string_to_snake("${CPU_TEST_OP}" op_snake) + add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h + COMMAND python3 ${CMAKE_SOURCE_DIR}/cmake/util/tiling_data_def_build.py + ${CMAKE_SOURCE_DIR}/op_host/${op_snake}_tiling.h + ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h + DEPENDS ${CMAKE_SOURCE_DIR}/op_host/${op_snake}_tiling.h + ) + add_custom_target(gen_${op_snake}_tiling_header + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h + ) + + add_executable(${op_snake}_cpu ${CPU_TEST_SRC}) + add_dependencies(${op_snake}_cpu gen_${op_snake}_tiling_header) + target_compile_options(${op_snake}_cpu PRIVATE -g -include ${CMAKE_CURRENT_SOURCE_DIR}/${op_snake}_tiling.h) + target_link_libraries(${op_snake}_cpu PRIVATE tikicpulib::ascend910B1) + set_target_properties(${op_snake}_cpu PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + ) +endfunction() + +function(add_npu_target) + cmake_parse_arguments(NPU_TEST "" "OP" "SRC" ${ARGN}) + string_to_snake("${NPU_TEST_OP}" op_snake) + add_executable(${op_snake}_npu ${NPU_TEST_SRC}) + target_compile_options(${op_snake}_npu PRIVATE -g) + target_include_directories(${op_snake}_npu PRIVATE + ${ASCEND_CANN_PACKAGE_PATH}/include/acl + ${ASCEND_AUTOGEN_PATH} + ) + target_link_libraries(${op_snake}_npu PRIVATE + intf_pub + cust_opapi + ascendcl + nnopbase + ) + set_target_properties(${op_snake}_npu PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} + ) +endfunction() diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt new file mode 100644 index 00000000..02f2b726 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt @@ -0,0 +1,10 @@ +add_npu_target(OP QuantCustom SRC quant_custom_main.cpp) + +add_custom_target(run_npu_test + COMMAND echo "===============================================================================" + COMMAND echo " Run NPU test at ${CMAKE_CURRENT_BINARY_DIR}" + COMMAND echo "===============================================================================" + COMMAND $ + COMMAND echo "===============================================================================" + ) +add_dependencies(run_npu_test quant_custom_npu) \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp new file mode 100644 index 00000000..6729b949 --- /dev/null +++ b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp @@ -0,0 +1,175 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#include "aclnn_quant_custom.h" +#include "acl/acl_rt.h" +#include "acl/acl.h" +#include +#include +#include "../../../../../common/data_utils.h" + +aclrtStream CreateStream(int device) +{ + if (aclInit(NULL) != ACL_SUCCESS) { + printf("acl init failed\n"); + return NULL; + } + if (aclrtSetDevice(device) != ACL_SUCCESS) { + printf("Set device failed\n"); + (void)aclFinalize(); + return NULL; + } + aclrtStream stream = nullptr; + if (aclrtCreateStream(&stream) != ACL_SUCCESS) { + printf("Create stream failed\n"); + return NULL; + } + return stream; +} + +void DestroyStream(aclrtStream stream, int device) +{ + (void)aclrtDestroyStream(stream); + if (aclrtResetDevice(device) != ACL_SUCCESS) { + printf("Reset device failed\n"); + } + if (aclFinalize() != ACL_SUCCESS) { + printf("Finalize acl failed\n"); + } +} + +struct tensorInfo { + int64_t *dims; + int64_t dimCnt; + aclDataType dtype; + aclFormat fmt; +}; + +int64_t GetDataSize(struct tensorInfo *desc) +{ + if (!desc->dims) + return 0; + int64_t size = 1; + for (auto i = 0; i < desc->dimCnt; i++) { + size *= desc->dims[i]; + } + return size *aclDataTypeSize(desc->dtype); +} + +static bool CompareResult(const void *outputData, int64_t outSize) +{ + void *goldenData; + CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize)); + size_t goldenSize = outSize; + bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize); + if (ret) { + printf("ReadFile golden.bin success!\n"); + } else { + printf("test failed!\n"); + return false; + } + constexpr float EPS = 1e-6; + int64_t wrongNum = 0; + + for (int i = 0; i < outSize / sizeof(int8_t); i++) { + float a = (reinterpret_cast(outputData))[i]; + float b = (reinterpret_cast(goldenData))[i]; + float ae = std::abs(a - b); + float re = ae / abs(b); + if (ae > EPS && re > EPS) { + printf("CompareResult golden.bin failed output is %lf, golden is %lf\n", a, b); + wrongNum++; + } + } + CHECK_ACL(aclrtFreeHost(goldenData)); + + if (wrongNum != 0) { + return false; + } else { + printf("CompareResult golden.bin success\n"); + return true; + } +} + +int main(void) +{ + aclrtStream stream; + int64_t input[] = {1024}; + int64_t output[] = {1024}; + struct tensorInfo tensorDesc[] = {{input, 1, ACL_FLOAT, ACL_FORMAT_ND}, + {output, 1, ACL_INT8, ACL_FORMAT_ND}, + }; + stream = CreateStream(0); + aclTensor *tensors[sizeof(tensorDesc) / sizeof(struct tensorInfo)]; + void *devMem[sizeof(tensorDesc) / sizeof(struct tensorInfo)]; + for (auto i = 0; i < sizeof(tensorDesc) / sizeof(struct tensorInfo); i++) { + void *data; + struct tensorInfo *info = &(tensorDesc[i]); + int64_t size = GetDataSize(info); + if (size == 0) { + tensors[i] = NULL; + devMem[i] = NULL; + continue; + } + CHECK_ACL(aclrtMalloc(&data, size, ACL_MEM_MALLOC_HUGE_FIRST)); + // read input + if (i == 0) { + size_t inputSize = size; + void *dataHost; + CHECK_ACL(aclrtMallocHost((void **)(&dataHost), inputSize)); + ReadFile("../input/input.bin", inputSize, dataHost, inputSize); + CHECK_ACL(aclrtMemcpy(data, size, dataHost, size, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtFreeHost(dataHost)); + } + devMem[i] = data; + tensors[i] = + aclCreateTensor(info->dims, info->dimCnt, info->dtype, NULL, 0, info->fmt, info->dims, info->dimCnt, data); + } + + size_t workspaceSize = 0; + aclOpExecutor *handle; + int32_t ret; + ret = aclnnQuantCustomGetWorkspaceSize(tensors[0], tensors[1], &workspaceSize, &handle); + printf("aclnnQuantCustomGetWorkspaceSize ret %u workspace size %lu\n", ret, workspaceSize); + void *workspace = NULL; + if (workspaceSize != 0) { + CHECK_ACL(aclrtMalloc(&workspace, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST)); + } + ret = aclnnQuantCustom(workspace, workspaceSize, handle, stream); + printf("aclnnQuantCustom ret %u\n", ret); + if (aclrtSynchronizeStreamWithTimeout(stream, 5000) != ACL_SUCCESS) { + printf("Synchronize stream failed\n"); + } + + uint8_t *outputHost; + int64_t outputHostSize = GetDataSize(&(tensorDesc[1])); + + CHECK_ACL(aclrtMallocHost((void **)(&outputHost), outputHostSize)); + CHECK_ACL(aclrtMemcpy(outputHost, outputHostSize, devMem[1], outputHostSize, ACL_MEMCPY_DEVICE_TO_HOST)); + WriteFile("../output/output.bin", outputHost, outputHostSize); + bool goldenResult = CompareResult(outputHost, outputHostSize); + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + + CHECK_ACL(aclrtFreeHost(outputHost)); + + for (auto i = 0; i < sizeof(tensorDesc) / sizeof(struct tensorInfo); i++) { + if (!tensors[i]) + continue; + if (devMem[i]) { + CHECK_ACL(aclrtFree(devMem[i])); + } + aclDestroyTensor(tensors[i]); + } + DestroyStream(stream, 0); + return 0; +} -- Gitee From 79e532e363cca07da69d0de7b5596e8cfe2bedf1 Mon Sep 17 00:00:00 2001 From: Hanglei Zhang Date: Sat, 23 Nov 2024 11:29:17 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E8=A1=A5=E5=85=85=E9=87=8F=E5=8C=96?= =?UTF-8?q?=E5=8F=8D=E9=87=8F=E5=8C=96readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quantization/dequant/README.md | 18 +++++------ .../kernel_launch_method_by_direct/README.md | 14 ++++---- .../README.md | 31 ++++++++++++++++-- examples/quantization/quant/README.md | 17 +++++----- .../kernel_launch_method_by_direct/README.md | 16 +++++----- .../README.md | 32 ++++++++++++++++--- examples/readme.md | 11 +++++++ 7 files changed, 100 insertions(+), 39 deletions(-) diff --git a/examples/quantization/dequant/README.md b/examples/quantization/dequant/README.md index ba4c0a9d..fb6f9888 100644 --- a/examples/quantization/dequant/README.md +++ b/examples/quantization/dequant/README.md @@ -2,13 +2,13 @@ ## 概述 -本样例介绍了调用AscendDequant高阶API实现反量化dequant单算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 +本样例介绍了调用AscendDequant高阶API实现DequantCustom单算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 -- 直调:使用核函数直调dequant自定义算子。 +- 直调:使用核函数直调DequantCustom自定义算子。 核函数的基础调用方式,开发者完成算子核函数的开发和Tiling实现后,即可通过AscendCL运行时接口,完成算子的调用。 -- 框架调用:使用框架调用dequant自定义算子。 +- 框架调用:使用框架调用DequantCustom自定义算子。 按照工程创建->算子实现->编译部署>算子调用的流程完成算子开发。整个过程都依赖于算子工程:基于工程代码框架完成算子核函数的开发和Tiling实现,通过工程编译脚本完成算子的编译部署,继而实现单算子调用或第三方框架中的算子调用。 @@ -21,7 +21,7 @@ ## 样例支持的产品型号为: - Atlas A2训练系列产品/Atlas 800I A2推理产品 -- Atlas推理系列产品(Ascend 310P处理器)AI Core +- Atlas推理系列产品AI Core ## 目录结构 @@ -34,20 +34,20 @@ ## 算子描述 -dequant单算子,对输入tensor做反量化计算。 +DequantCustom单算子,对输入tensor按元素做反量化计算,将int32_t数据类型反量化为half/float等数据类型。(本样例实现了反量化到float的情况) -dequant算子规格: +DequantCustom算子规格: - - + + - + diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md index 75bc9271..1e17894d 100644 --- a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为src[4,8], deq_scale[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 +本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 ## 目录结构介绍 | 目录及文件 | 描述 | @@ -24,7 +24,7 @@ ``` export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH ``` - 若执行sim仿真,可自行配置仿真日志文件目录,默认仿真日志会在build目录下生成。若需要详细了解sim仿真相关内容,请参考[《AscendC算子调测工具》](https://hiascend.com/document/redirect/CannCommunityToolAscendebug)中的 调测功能说明 > Simulator性能仿真功能 > CAModel性能仿真 章节。 + 若执行sim仿真,可自行配置仿真日志文件目录,默认仿真日志会在build目录下生成。若需要详细了解sim仿真相关内容,请参考[《算子开发工具msProf》](https://hiascend.com/document/redirect/CannCommunityToolMsProf)中的 工具使用 章节。 ``` # 设置仿真模式日志生成目录(可选),需要自行确保设置的目录已存在。若设置为相对路径下的目录,则以程序执行时的目录作为当前目录。例如,执行如下设置时,需要确保./目录下存在xxx目录 export CAMODEL_LOG_PATH=./xxx @@ -42,11 +42,11 @@ ``` 其中cmake参数说明如下: - RUN_MODE :编译方式,可选择CPU调试,NPU仿真,NPU上板。支持参数为[cpu / sim/ npu] - - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下参数取值(xxx请替换为具体取值): - - Atlas 推理系列产品(Ascend 310P处理器)参数值:Ascend310P1、Ascend310P3 - - Atlas A2训练系列产品参数值:AscendxxxB1、AscendxxxB2、AscendxxxB3、AscendxxxB4 + - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 推理系列产品 + - Atlas A2训练系列产品/Atlas 800I A2推理产品 - 示例如下: + 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 ``` - bash run.sh -r cpu -v Ascend310P1 + bash run.sh -r cpu -v Ascendxxxyy ``` diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md index d3034029..340873ad 100644 --- a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md @@ -2,10 +2,8 @@ ## 概述 -本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为src[4,8], deq_scale[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 +本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 -## 样例支持的产品型号为: -- Atlas A2训练系列产品/Atlas 800I A2推理产品 ## 目录结构 | 目录 | 描述 | @@ -77,5 +75,32 @@ ``` ./dequant_custom_npu ``` + + +### 6.sim仿真模式运行(可选) +若要执行sim仿真,在build_out目录下执行如下命令: +``` +export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH +msprof op simulator --application=./dequant_custom_npu +``` +其中SOC_VERSION参数说明如下: +- SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 推理系列产品 + - Atlas A2训练系列产品/Atlas 800I A2推理产品 + +### 7.不同环境上的编译与运行(可选) +若想在不同的环境上分别进行编译和执行,请在执行环境中进行如下操作,确保该环境上能够正确执行样例. +注意,以下方法仅支持编译环境与运行环境是相同的物理硬件架构,比如编译环境和执行环境均为x86硬件架构;若硬件架构不一致,必须重新编译算子工程,再安装部署和运行样例。 + - 参考步骤1,配置环境变量。 + - 参考步骤2,生成输入和真值数据,或者将编译环境下生成的input和output目录复制到执行环境。 + - 将编译环境下编译生成的自定义算子包和可执行程序,复制到执行环境。 + - 参考步骤4,在执行环境,安装部署自定义算子包。 + - 设置如下环境变量: + ``` + export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH + ``` + - 在input/output的同级目录中创建一个临时目录,将可执行程序放入临时目录,进入临时目录参考步骤5,执行可执行程序,即可运行样例。 + + ### 注意事项 本样例工程会自动识别执行的硬件平台,无需单独设置SOC_VERSION \ No newline at end of file diff --git a/examples/quantization/quant/README.md b/examples/quantization/quant/README.md index 4e4bdca8..0332690e 100644 --- a/examples/quantization/quant/README.md +++ b/examples/quantization/quant/README.md @@ -2,13 +2,13 @@ ## 概述 -本样例介绍了调用AscendQuant高阶API实现量化算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 +本样例介绍了调用AscendQuant高阶API实现QuantCustom单算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 -- 直调:使用核函数直调quant自定义算子。 +- 直调:使用核函数直调QuantCustom自定义算子。 核函数的基础调用方式,开发者完成算子核函数的开发和Tiling实现后,即可通过AscendCL运行时接口,完成算子的调用。 -- 框架调用:使用框架调用quant自定义算子。 +- 框架调用:使用框架调用QuantCustom自定义算子。 按照工程创建->算子实现->编译部署>算子调用的流程完成算子开发。整个过程都依赖于算子工程:基于工程代码框架完成算子核函数的开发和Tiling实现,通过工程编译脚本完成算子的编译部署,继而实现单算子调用或第三方框架中的算子调用。 @@ -21,7 +21,7 @@ ## 样例支持的产品型号为: - Atlas A2训练系列产品/Atlas 800I A2推理产品 -- Atlas推理系列产品(Ascend 310P处理器)AI Core +- Atlas推理系列产品AI Core ## 目录结构 @@ -34,7 +34,8 @@ ## 算子描述 -QuantCustom单算子,对输入tensor按元素进行per_tensor量化计算。 +QuantCustom单算子,对输入tensor按元素做量化计算,将half/float数据类型量化为int8_t数据类型。(本样例实现了对float类型量化的情况) + QuantCustom算子规格: @@ -43,12 +44,12 @@ QuantCustom算子规格: - + - + @@ -56,7 +57,7 @@ QuantCustom算子规格: ## 算子实现介绍 -本样例实现了quant算子,其中输入src为固定shape[1024]。 +本样例实现了QuantCustom算子,其中输入src为固定shape[1024]。 - kernel实现 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/README.md b/examples/quantization/quant/kernel_launch_method_by_direct/README.md index bf2ec349..50cf350f 100644 --- a/examples/quantization/quant/kernel_launch_method_by_direct/README.md +++ b/examples/quantization/quant/kernel_launch_method_by_direct/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入src为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 +本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 ## 目录结构介绍 | 目录及文件 | 描述 | @@ -24,7 +24,7 @@ ``` export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH ``` - 若执行sim仿真,可自行配置仿真日志文件目录,默认仿真日志会在build目录下生成。若需要详细了解sim仿真相关内容,请参考[《AscendC算子调测工具》](https://hiascend.com/document/redirect/CannCommunityToolAscendebug)中的 调测功能说明 > Simulator性能仿真功能 > CAModel性能仿真 章节。 + 若执行sim仿真,可自行配置仿真日志文件目录,默认仿真日志会在build目录下生成。若需要详细了解sim仿真相关内容,请参考[《算子开发工具msProf》](https://hiascend.com/document/redirect/CannCommunityToolMsProf)中的 工具使用 章节。 ``` # 设置仿真模式日志生成目录(可选),需要自行确保设置的目录已存在。若设置为相对路径下的目录,则以程序执行时的目录作为当前目录。例如,执行如下设置时,需要确保./目录下存在xxx目录 export CAMODEL_LOG_PATH=./xxx @@ -42,11 +42,11 @@ ``` 其中cmake参数说明如下: - RUN_MODE :编译方式,可选择CPU调试,NPU仿真,NPU上板。支持参数为[cpu / sim/ npu] - - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下参数取值(xxx请替换为具体取值): - - Atlas 推理系列产品(Ascend 310P处理器)参数值:Ascend310P1、Ascend310P3 - - Atlas A2训练系列产品参数值:AscendxxxB1、AscendxxxB2、AscendxxxB3、AscendxxxB4 + - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 推理系列产品 + - Atlas A2训练系列产品/Atlas 800I A2推理产品 - 示例如下: - ``` - bash run.sh -r cpu -v Ascend310P1 + 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 ``` + bash run.sh -r cpu -v Ascendxxxyy + ``` \ No newline at end of file diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/README.md b/examples/quantization/quant/kernel_launch_method_by_framework/README.md index 76a5d354..2547fe66 100644 --- a/examples/quantization/quant/kernel_launch_method_by_framework/README.md +++ b/examples/quantization/quant/kernel_launch_method_by_framework/README.md @@ -2,10 +2,7 @@ ## 概述 -本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入src为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 - -## 样例支持的产品型号为: -- Atlas A2训练系列产品/Atlas 800I A2推理产品 +本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 ## 目录结构 | 目录 | 描述 | @@ -77,5 +74,32 @@ ``` ./quant_custom_npu ``` + + +### 6.sim仿真模式运行(可选) +若要执行sim仿真,在build_out目录下执行如下命令: +``` +export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH +msprof op simulator --application=./quant_custom_npu +``` +其中SOC_VERSION参数说明如下: +- SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 推理系列产品 + - Atlas A2训练系列产品/Atlas 800I A2推理产品 + +### 7.不同环境上的编译与运行(可选) +若想在不同的环境上分别进行编译和执行,请在执行环境中进行如下操作,确保该环境上能够正确执行样例. +注意,以下方法仅支持编译环境与运行环境是相同的物理硬件架构,比如编译环境和执行环境均为x86硬件架构;若硬件架构不一致,必须重新编译算子工程,再安装部署和运行样例。 + - 参考步骤1,配置环境变量。 + - 参考步骤2,生成输入和真值数据,或者将编译环境下生成的input和output目录复制到执行环境。 + - 将编译环境下编译生成的自定义算子包和可执行程序,复制到执行环境。 + - 参考步骤4,在执行环境,安装部署自定义算子包。 + - 设置如下环境变量: + ``` + export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH + ``` + - 在input/output的同级目录中创建一个临时目录,将可执行程序放入临时目录,进入临时目录参考步骤5,执行可执行程序,即可运行样例。 + + ### 注意事项 本样例工程会自动识别执行的硬件平台,无需单独设置SOC_VERSION \ No newline at end of file diff --git a/examples/readme.md b/examples/readme.md index 47d60a48..348684bb 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -58,6 +58,17 @@ + + + + + + + + +
算子类型(OpType)DequantCustom
算子输入
nameshapedata typeformat
src4*8int_32ND
deq_scale8floatND
srcGm4*8int32_tND
deqScaleGm8floatND
算子输出
dst4*8floatND
dstGm4*8floatND
核函数名dequant_custom
算子输入
nameshapedata typeformat
src1024floatND
srcGm1024floatND
算子输出
dst1024int_8ND
dstGm1024int8_tND
核函数名quant_custom
broadcast 对输入tensor的shape进行广播。
quantization quant 对输入tensor按元素做量化计算,将half/float数据类型量化为int8_t数据类型,样例实现float输入。 +
dequant 对输入tensor按元素做反量化计算,将int32_t数据类型反量化为half/float数据类型,样例实现float输出。 +
-- Gitee From a77e911addd441397c620eb7f0892b2cf1481754 Mon Sep 17 00:00:00 2001 From: Hanglei Zhang Date: Mon, 25 Nov 2024 10:10:23 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E8=A1=A5=E5=85=85=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?=E9=87=8F=E5=8C=96=E5=8F=8D=E9=87=8F=E5=8C=96readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quantization/dequant/README.md | 8 ++++---- .../dequant/host_tiling/dequant_custom_tiling.h | 0 .../quantization/dequant/kernel_impl/dequant_custom.h | 0 .../dequant/kernel_launch_method_by_direct/CMakeLists.txt | 0 .../dequant/kernel_launch_method_by_direct/README.md | 2 +- .../kernel_launch_method_by_direct/cmake/cpu_lib.cmake | 0 .../kernel_launch_method_by_direct/cmake/npu_lib.cmake | 0 .../kernel_launch_method_by_direct/dequant_custom.cpp | 0 .../dequant_custom_tiling.cpp | 0 .../dequant/kernel_launch_method_by_direct/main.cpp | 0 .../dequant/kernel_launch_method_by_direct/run.sh | 0 .../kernel_launch_method_by_direct/scripts/gen_data.py | 0 .../kernel_launch_method_by_framework/CMakeLists.txt | 0 .../kernel_launch_method_by_framework/CMakePresets.json | 0 .../dequant/kernel_launch_method_by_framework/README.md | 2 +- .../dequant/kernel_launch_method_by_framework/build.sh | 0 .../kernel_launch_method_by_framework/cmake/config.cmake | 0 .../kernel_launch_method_by_framework/cmake/func.cmake | 0 .../kernel_launch_method_by_framework/cmake/intf.cmake | 0 .../cmake/makeself.cmake | 0 .../op_host/CMakeLists.txt | 0 .../op_host/dequant_custom.cpp | 0 .../op_host/dequant_custom_tiling.h | 0 .../op_kernel/CMakeLists.txt | 0 .../op_kernel/dequant_custom.cpp | 0 .../kernel_launch_method_by_framework/scripts/gen_data.py | 0 .../kernel_launch_method_by_framework/scripts/help.info | 0 .../kernel_launch_method_by_framework/scripts/install.sh | 0 .../kernel_launch_method_by_framework/scripts/upgrade.sh | 0 .../testcases/CMakeLists.txt | 0 .../testcases/cmake/fun.cmake | 0 .../testcases/npu/CMakeLists.txt | 0 .../testcases/npu/dequant_custom_main.cpp | 0 examples/quantization/quant/README.md | 8 ++++---- .../quantization/quant/host_tiling/quant_custom_tiling.h | 0 examples/quantization/quant/kernel_impl/quant_custom.h | 0 .../quant/kernel_launch_method_by_direct/CMakeLists.txt | 0 .../quant/kernel_launch_method_by_direct/README.md | 2 +- .../kernel_launch_method_by_direct/cmake/cpu_lib.cmake | 0 .../kernel_launch_method_by_direct/cmake/npu_lib.cmake | 0 .../quant/kernel_launch_method_by_direct/main.cpp | 0 .../quant/kernel_launch_method_by_direct/quant_custom.cpp | 0 .../quant_custom_tiling.cpp | 0 .../quant/kernel_launch_method_by_direct/run.sh | 0 .../kernel_launch_method_by_direct/scripts/gen_data.py | 0 .../kernel_launch_method_by_framework/CMakeLists.txt | 0 .../kernel_launch_method_by_framework/CMakePresets.json | 0 .../quant/kernel_launch_method_by_framework/README.md | 2 +- .../quant/kernel_launch_method_by_framework/build.sh | 0 .../kernel_launch_method_by_framework/cmake/config.cmake | 0 .../kernel_launch_method_by_framework/cmake/func.cmake | 0 .../kernel_launch_method_by_framework/cmake/intf.cmake | 0 .../cmake/makeself.cmake | 0 .../op_host/CMakeLists.txt | 0 .../op_host/quant_custom.cpp | 0 .../op_host/quant_custom_tiling.h | 0 .../op_kernel/CMakeLists.txt | 0 .../op_kernel/quant_custom.cpp | 0 .../kernel_launch_method_by_framework/scripts/gen_data.py | 0 .../kernel_launch_method_by_framework/scripts/help.info | 0 .../kernel_launch_method_by_framework/scripts/install.sh | 0 .../kernel_launch_method_by_framework/scripts/upgrade.sh | 0 .../testcases/CMakeLists.txt | 0 .../testcases/cmake/fun.cmake | 0 .../testcases/npu/CMakeLists.txt | 0 .../testcases/npu/quant_custom_main.cpp | 0 66 files changed, 12 insertions(+), 12 deletions(-) mode change 100644 => 100755 examples/quantization/dequant/README.md mode change 100644 => 100755 examples/quantization/dequant/host_tiling/dequant_custom_tiling.h mode change 100644 => 100755 examples/quantization/dequant/kernel_impl/dequant_custom.h mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/README.md mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/run.sh mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/README.md mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/build.sh mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt mode change 100644 => 100755 examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp mode change 100644 => 100755 examples/quantization/quant/README.md mode change 100644 => 100755 examples/quantization/quant/host_tiling/quant_custom_tiling.h mode change 100644 => 100755 examples/quantization/quant/kernel_impl/quant_custom.h mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/README.md mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/main.cpp mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/run.sh mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/README.md mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/build.sh mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt mode change 100644 => 100755 examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp diff --git a/examples/quantization/dequant/README.md b/examples/quantization/dequant/README.md old mode 100644 new mode 100755 index fb6f9888..899db24b --- a/examples/quantization/dequant/README.md +++ b/examples/quantization/dequant/README.md @@ -55,14 +55,14 @@ DequantCustom算子规格: ## 算子实现介绍 -本样例实现了dequant算子,其中固定shape输入为src[4,8], deq_scale[8],输出为dst[4,8]。 +本样例实现了DequantCustom算子,其中固定shape输入为srcGm[4,8], deqScaleGm[8],输出为dstGm[4,8]。 - kernel实现 - 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendDequant高阶API接口完成dequant计算,得到最终结果,再搬出到外部存储上。 + 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendDequant高阶API接口完成反量化计算,得到最终结果,再搬出到外部存储上。 - dequant算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm与deq_scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行dequant计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 + DequantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm与deq_scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行反量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 - tiling实现 - dequant算子的tiling实现流程如下:首先获取AscendDequant接口能完成计算所需最大/最小临时空间大小,根据该范围结合实际的内存使用情况设置合适的空间大小,然后根据输入长度dataLength确定所需tiling参数。 \ No newline at end of file + DequantCustom算子的tiling实现流程如下:首先获取AscendDequant接口能完成计算所需最大/最小临时空间大小,根据该范围结合实际的内存使用情况设置合适的空间大小,然后根据输入长度dataLength确定所需tiling参数。 \ No newline at end of file diff --git a/examples/quantization/dequant/host_tiling/dequant_custom_tiling.h b/examples/quantization/dequant/host_tiling/dequant_custom_tiling.h old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_impl/dequant_custom.h b/examples/quantization/dequant/kernel_impl/dequant_custom.h old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_direct/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md old mode 100644 new mode 100755 index 1e17894d..dc826001 --- a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 +本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现DequantCustom单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 ## 目录结构介绍 | 目录及文件 | 描述 | diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake b/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake b/examples/quantization/dequant/kernel_launch_method_by_direct/cmake/npu_lib.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp b/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp b/examples/quantization/dequant/kernel_launch_method_by_direct/dequant_custom_tiling.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp b/examples/quantization/dequant/kernel_launch_method_by_direct/main.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/run.sh b/examples/quantization/dequant/kernel_launch_method_by_direct/run.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py b/examples/quantization/dequant/kernel_launch_method_by_direct/scripts/gen_data.py old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md old mode 100644 new mode 100755 index 340873ad..395b9c99 --- a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现dequant单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 +本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现DequantCustom单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 ## 目录结构 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/build.sh b/examples/quantization/dequant/kernel_launch_method_by_framework/build.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/config.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/func.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/intf.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/cmake/makeself.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h b/examples/quantization/dequant/kernel_launch_method_by_framework/op_host/dequant_custom_tiling.h old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp b/examples/quantization/dequant/kernel_launch_method_by_framework/op_kernel/dequant_custom.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/gen_data.py old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/help.info old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/install.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh b/examples/quantization/dequant/kernel_launch_method_by_framework/scripts/upgrade.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp b/examples/quantization/dequant/kernel_launch_method_by_framework/testcases/npu/dequant_custom_main.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/README.md b/examples/quantization/quant/README.md old mode 100644 new mode 100755 index 0332690e..83bb90eb --- a/examples/quantization/quant/README.md +++ b/examples/quantization/quant/README.md @@ -57,14 +57,14 @@ QuantCustom算子规格: ## 算子实现介绍 -本样例实现了QuantCustom算子,其中输入src为固定shape[1024]。 +本样例实现了QuantCustom算子,其中输入srcGm为固定shape[1024]。 - kernel实现 - 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendQuant高阶API接口完成ascendquant计算,得到最终结果,再搬出到外部存储上。 + 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendQuant高阶API接口完成量化计算,得到最终结果,再搬出到外部存储上。 - ascendquant算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm存储在srcLocal中,Compute任务负责对srcLocal执行ascendquant计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 + QuantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm存储在srcLocal中,Compute任务负责对srcLocal执行量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 - tiling实现 - quant算子的tiling实现流程如下:首先获取AscendQuant接口能完成计算所需最大/最小临时空间大小,根据该范围结合实际的内存使用情况设置合适的空间大小,然后根据输入长度dataLength确定所需tiling参数。 \ No newline at end of file + QuantCustom算子的tiling实现流程如下:首先获取AscendQuant接口能完成计算所需最大/最小临时空间大小,根据该范围结合实际的内存使用情况设置合适的空间大小,然后根据输入长度dataLength确定所需tiling参数。 \ No newline at end of file diff --git a/examples/quantization/quant/host_tiling/quant_custom_tiling.h b/examples/quantization/quant/host_tiling/quant_custom_tiling.h old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_impl/quant_custom.h b/examples/quantization/quant/kernel_impl/quant_custom.h old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_direct/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/README.md b/examples/quantization/quant/kernel_launch_method_by_direct/README.md old mode 100644 new mode 100755 index 50cf350f..85fec8b9 --- a/examples/quantization/quant/kernel_launch_method_by_direct/README.md +++ b/examples/quantization/quant/kernel_launch_method_by_direct/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 +本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现QuantCustom单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 ## 目录结构介绍 | 目录及文件 | 描述 | diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake b/examples/quantization/quant/kernel_launch_method_by_direct/cmake/cpu_lib.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake b/examples/quantization/quant/kernel_launch_method_by_direct/cmake/npu_lib.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/main.cpp b/examples/quantization/quant/kernel_launch_method_by_direct/main.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp b/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp b/examples/quantization/quant/kernel_launch_method_by_direct/quant_custom_tiling.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/run.sh b/examples/quantization/quant/kernel_launch_method_by_direct/run.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py b/examples/quantization/quant/kernel_launch_method_by_direct/scripts/gen_data.py old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json b/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/README.md b/examples/quantization/quant/kernel_launch_method_by_framework/README.md old mode 100644 new mode 100755 index 2547fe66..2dcaa269 --- a/examples/quantization/quant/kernel_launch_method_by_framework/README.md +++ b/examples/quantization/quant/kernel_launch_method_by_framework/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现quant单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 +本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现QuantCustom单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 ## 目录结构 | 目录 | 描述 | diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/build.sh b/examples/quantization/quant/kernel_launch_method_by_framework/build.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/config.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/func.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/intf.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/cmake/makeself.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h b/examples/quantization/quant/kernel_launch_method_by_framework/op_host/quant_custom_tiling.h old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp b/examples/quantization/quant/kernel_launch_method_by_framework/op_kernel/quant_custom.cpp old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/gen_data.py old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/help.info old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/install.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh b/examples/quantization/quant/kernel_launch_method_by_framework/scripts/upgrade.sh old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/cmake/fun.cmake old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp b/examples/quantization/quant/kernel_launch_method_by_framework/testcases/npu/quant_custom_main.cpp old mode 100644 new mode 100755 -- Gitee From 48d8c41e8081b79da78a7e8d336ebc72a0a55ec1 Mon Sep 17 00:00:00 2001 From: Hanglei Zhang Date: Mon, 25 Nov 2024 14:25:41 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E9=87=8F=E5=8C=96=E5=8F=8D=E9=87=8F?= =?UTF-8?q?=E5=8C=96readme=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quantization/dequant/README.md | 12 ++++++------ .../dequant/kernel_launch_method_by_direct/README.md | 2 +- .../kernel_launch_method_by_framework/README.md | 2 +- examples/quantization/quant/README.md | 10 +++++----- .../quant/kernel_launch_method_by_direct/README.md | 2 +- .../kernel_launch_method_by_framework/README.md | 2 +- examples/readme.md | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/quantization/dequant/README.md b/examples/quantization/dequant/README.md index 899db24b..7da33cf7 100755 --- a/examples/quantization/dequant/README.md +++ b/examples/quantization/dequant/README.md @@ -17,7 +17,7 @@ | 调用方式 | 目录 | **描述** | | --------- | ------------------------------------------------------------ | ---------------------------------------------------------- | | 直调 | [kernel_launch_method_by_direct](./kernel_launch_method_by_direct) | host侧的核函数调用程序,包含CPU侧、NPU侧、仿真侧三种运行验证方法。 | -| 框架调用 | [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用dequant算子。 | +| 框架调用 | [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用DequantCustom算子。 | ## 样例支持的产品型号为: - Atlas A2训练系列产品/Atlas 800I A2推理产品 @@ -43,11 +43,11 @@ DequantCustom算子规格: 算子输入 nameshapedata typeformat -srcGm4*8int32_tND -deqScaleGm8floatND +inputGm4*8int32_tND +scaleGm8floatND 算子输出 -dstGm4*8floatND +outputGm4*8floatND 核函数名dequant_custom @@ -55,13 +55,13 @@ DequantCustom算子规格: ## 算子实现介绍 -本样例实现了DequantCustom算子,其中固定shape输入为srcGm[4,8], deqScaleGm[8],输出为dstGm[4,8]。 +本样例实现了DequantCustom算子,其中固定shape输入为inputGm[4,8], scaleGm[8],输出为outputGm[4,8]。 - kernel实现 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendDequant高阶API接口完成反量化计算,得到最终结果,再搬出到外部存储上。 - DequantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm与deq_scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行反量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 + DequantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor inputGm与deq_scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行反量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor outputGm。 - tiling实现 diff --git a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md index dc826001..75f01d66 100755 --- a/examples/quantization/dequant/kernel_launch_method_by_direct/README.md +++ b/examples/quantization/dequant/kernel_launch_method_by_direct/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现DequantCustom单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 +本样例基于Kernel直调算子工程,介绍了调用AscendDequant高阶API实现DequantCustom单算子,本样例固定shape输入为inputGm[4,8], scaleGm[8],主要演示AscendDequant高阶API在Kernel直调工程中的调用。 ## 目录结构介绍 | 目录及文件 | 描述 | diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md index 395b9c99..09813ae2 100755 --- a/examples/quantization/dequant/kernel_launch_method_by_framework/README.md +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现DequantCustom单算子,本样例固定shape输入为srcGm[4,8], deqScaleGm[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 +本样例基于自定义算子工程,介绍了调用AscendDequant高阶API实现DequantCustom单算子,本样例固定shape输入为inputGm[4,8], scaleGm[8],主要演示AscendDequant高阶API在自定义算子工程中的调用。 ## 目录结构 diff --git a/examples/quantization/quant/README.md b/examples/quantization/quant/README.md index 83bb90eb..80ef978e 100755 --- a/examples/quantization/quant/README.md +++ b/examples/quantization/quant/README.md @@ -17,7 +17,7 @@ | 调用方式 | 目录 | **描述** | | --------- | ------------------------------------------------------------ | ---------------------------------------------------------- | | 直调 | [kernel_launch_method_by_direct](./kernel_launch_method_by_direct) | host侧的核函数调用程序,包含CPU侧、NPU侧、仿真侧三种运行验证方法。 | -| 框架调用 | [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用AscendQuant算子。 | +| 框架调用 | [kernel_launch_method_by_framework](./kernel_launch_method_by_framework) | 通过aclnn调用的方式调用QuantCustom算子。 | ## 样例支持的产品型号为: - Atlas A2训练系列产品/Atlas 800I A2推理产品 @@ -44,12 +44,12 @@ QuantCustom算子规格: 算子输入 nameshapedata typeformat -srcGm1024floatND +inputGm1024floatND 算子输出 -dstGm1024int8_tND +outputGm1024int8_tND 核函数名quant_custom @@ -57,13 +57,13 @@ QuantCustom算子规格: ## 算子实现介绍 -本样例实现了QuantCustom算子,其中输入srcGm为固定shape[1024]。 +本样例实现了QuantCustom算子,其中输入inputGm为固定shape[1024]。 - kernel实现 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendQuant高阶API接口完成量化计算,得到最终结果,再搬出到外部存储上。 - QuantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGm存储在srcLocal中,Compute任务负责对srcLocal执行量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGm。 + QuantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor inputGm存储在srcLocal中,Compute任务负责对srcLocal执行量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor outputGm。 - tiling实现 diff --git a/examples/quantization/quant/kernel_launch_method_by_direct/README.md b/examples/quantization/quant/kernel_launch_method_by_direct/README.md index 85fec8b9..201316dd 100755 --- a/examples/quantization/quant/kernel_launch_method_by_direct/README.md +++ b/examples/quantization/quant/kernel_launch_method_by_direct/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现QuantCustom单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 +本样例基于Kernel直调算子工程,介绍了调用AscendQuant高阶API实现QuantCustom单算子,本样例输入inputGm为固定shape[1024],主要演示AscendQuant高阶API在Kernel直调工程中的调用。 ## 目录结构介绍 | 目录及文件 | 描述 | diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/README.md b/examples/quantization/quant/kernel_launch_method_by_framework/README.md index 2dcaa269..bc418833 100755 --- a/examples/quantization/quant/kernel_launch_method_by_framework/README.md +++ b/examples/quantization/quant/kernel_launch_method_by_framework/README.md @@ -2,7 +2,7 @@ ## 概述 -本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现QuantCustom单算子,本样例输入srcGm为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 +本样例基于自定义算子工程,介绍了调用AscendQuant高阶API实现QuantCustom单算子,本样例输入inputGm为固定shape[1024],主要演示AscendQuant高阶API在自定义算子工程中的调用。 ## 目录结构 | 目录 | 描述 | diff --git a/examples/readme.md b/examples/readme.md index 348684bb..b87070f4 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -61,12 +61,12 @@ quantization quant - 对输入tensor按元素做量化计算,将half/float数据类型量化为int8_t数据类型,样例实现float输入。 + 对输入tensor按元素做量化计算,将float数据类型量化为int8_t数据类型。 dequant - 对输入tensor按元素做反量化计算,将int32_t数据类型反量化为half/float数据类型,样例实现float输出。 + 对输入tensor按元素做反量化计算,将int32_t数据类型反量化为float数据类型。 -- Gitee From 955e7f35aad45f46fb99950dee0e4833a5cff767 Mon Sep 17 00:00:00 2001 From: Hanglei Zhang Date: Mon, 25 Nov 2024 14:28:03 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E9=87=8F=E5=8C=96=E5=8F=8D=E9=87=8F?= =?UTF-8?q?=E5=8C=96readme=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quantization/dequant/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quantization/dequant/README.md b/examples/quantization/dequant/README.md index 7da33cf7..ce317ae2 100755 --- a/examples/quantization/dequant/README.md +++ b/examples/quantization/dequant/README.md @@ -61,7 +61,7 @@ DequantCustom算子规格: 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AscendDequant高阶API接口完成反量化计算,得到最终结果,再搬出到外部存储上。 - DequantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor inputGm与deq_scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行反量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor outputGm。 + DequantCustom算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor inputGm与scaleGM存储在srcLocal中,Compute任务负责对srcLocal执行反量化计算,计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor outputGm。 - tiling实现 -- Gitee From 9e2642ed28623a5b5b99efc4429a6408eed6d605 Mon Sep 17 00:00:00 2001 From: Hanglei Zhang Date: Mon, 2 Dec 2024 10:55:05 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=87=8F=E5=8C=96?= =?UTF-8?q?=E5=8F=8D=E9=87=8F=E5=8C=96cmake=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dequant/kernel_launch_method_by_framework/CMakePresets.json | 2 +- .../quant/kernel_launch_method_by_framework/CMakePresets.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json index dc47e02c..e56e9011 100755 --- a/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json +++ b/examples/quantization/dequant/kernel_launch_method_by_framework/CMakePresets.json @@ -39,7 +39,7 @@ }, "ASCEND_CANN_PACKAGE_PATH": { "type": "PATH", - "value": "/home/ma-user/work/Ascend/ascend-toolkit/latest" + "value": "~/Ascend/ascend-toolkit/latest" }, "ASCEND_PYTHON_EXECUTABLE": { "type": "STRING", diff --git a/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json b/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json index dc47e02c..e56e9011 100755 --- a/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json +++ b/examples/quantization/quant/kernel_launch_method_by_framework/CMakePresets.json @@ -39,7 +39,7 @@ }, "ASCEND_CANN_PACKAGE_PATH": { "type": "PATH", - "value": "/home/ma-user/work/Ascend/ascend-toolkit/latest" + "value": "~/Ascend/ascend-toolkit/latest" }, "ASCEND_PYTHON_EXECUTABLE": { "type": "STRING", -- Gitee