diff --git a/examples/activation/adjustsoftmaxres/CMakeLists.txt b/examples/activation/adjustsoftmaxres/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..be5170fc8b26f661cb9b0670a063f92b6f2ba742 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/CMakeLists.txt @@ -0,0 +1,82 @@ +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== + +cmake_minimum_required(VERSION 3.16) +project(Ascend_c) +if(${RUN_MODE}) + set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu") +endif() +if (${SOC_VERSION}) + set(SOC_VERSION "Ascend910" CACHE STRING "system on chip type") +endif() + +set(ASCEND_CANN_PACKAGE_PATH "~/Ascend/ascend-toolkit/latest" CACHE STRING "ASCEND CANN package installation directory") + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type Release/Debug (default Debug)" FORCE) +endif() + +if(CMAKE_INSTALL_PREFIX STREQUAL /usr/local) + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRING "path for install()" FORCE) +endif() + +file(GLOB KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel/adjust_softmax_res_custom.cpp +) +set(CUSTOM_ASCEND310P_LIST "Ascend310P1" "Ascend310P3") + +if("${RUN_MODE}" STREQUAL "cpu") + include(cmake/cpu_lib.cmake) +elseif("${RUN_MODE}" STREQUAL "sim" OR "${RUN_MODE}" STREQUAL "npu") + include(cmake/npu_lib.cmake) +else() + message("invalid RUN_MODE: ${RUN_MODE}") +endif() + +add_executable(adjust_softmax_res_direct_kernel_op + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/op_host/adjust_softmax_res_custom_tiling.cpp +) + +target_compile_options(adjust_softmax_res_direct_kernel_op PRIVATE + $:-g>> + -O2 + -std=c++17 + -D_GLIBCXX_USE_CXX11_ABI=0 +) + +target_compile_definitions(adjust_softmax_res_direct_kernel_op PRIVATE + $<$>:CUSTOM_ASCEND310P> +) + +target_include_directories(adjust_softmax_res_direct_kernel_op PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + $:${ASCEND_CANN_PACKAGE_PATH}/include>> + $:${ASCEND_CANN_PACKAGE_PATH}/runtime/include>> +) + +target_link_libraries(adjust_softmax_res_direct_kernel_op PRIVATE + $,$>:host_intf_pub>> + $:tikicpulib::${SOC_VERSION}>> + $:ascendcl>> + $:c_sec>> + ascendc_kernels_${RUN_MODE} + tiling_api + register + platform + ascendalog + dl + graph_base +) + +install(TARGETS adjust_softmax_res_direct_kernel_op + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/README.md b/examples/activation/adjustsoftmaxres/README.md new file mode 100644 index 0000000000000000000000000000000000000000..370c12a55115f69403dfb3615e8abcd79d9e8c69 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/README.md @@ -0,0 +1,93 @@ + + +## 概述 + +本样例介绍了调用AdjustSoftMaxRes高阶API实现AdjustSoftMaxRes算子,并按照核函数直调的方式分别给出了对应的端到端实现。 + +本样例以直调的方式调用算子核函数。 + +直调:核函数的基础调用方式,开发者完成算子核函数的开发和Tiling实现后,即可通过AscendCL运行时接口,完成算子的调用。包含CPU侧、NPU侧、仿真侧三种运行验证方法。 + +## 样例支持的产品型号为: +- Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件 +- Atlas 推理系列产品AI Core + +## 目录结构 + +| 目录 | 描述 | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| [cmake](./cmake) | 编译工程文件 | +| [op_host](./op_host) | 本样例tiling代码实现 | +| [op_kernel](./op_kernel) | 本样例kernel侧代码实现 | +| [scripts](./scripts) | 包含输入数据和真值数据生成脚本文件 | +| CMakeLists.txt | 编译工程文件 | +| main.cpp | 主函数,调用算子的应用程序,含CPU域及NPU域调用 | +| run.sh | 编译执行脚本 | + +## 算子描述 +- 算子功能: + + AdjustSoftMaxResCustom单算子,用于对SoftMax相关计算结果做后处理。当输入的max中存在指定的值,调整x中对应位置的数据为自定义的值。 + +- 算子规格: + + + + + + + + + + + + +
算子类型(OpType)AdjustSoftMaxResCustom
算子输入
nameshapedata typeformat
x32*32floatND
max32*8floatND
算子输出
y32*32floatND
核函数名adjust_softmax_res_custom
+ +## 算子实现介绍 + +本样例中实现的是固定shape为输入x[32, 32],输出y[32, 32]的AdjustSoftMaxResCustom算子。算子中需要判断的max中的值为0xFF7FFFFF,需要向y的结果数据中填充的值为0,将这两个值分别定义为两个全局常量,用于算子实现。 + +- Kernel实现 + + 计算逻辑是:Ascend C提供的矢量计算接口的操作元素都为LocalTensor,输入数据需要先搬运进片上存储,然后使用AdjustSoftMax高阶API接口完成计算,得到最终结果,再搬出到外部存储上。 + + AdjustSoftMaxRes算子的实现流程分为3个基本任务:CopyIn,Compute,CopyOut。CopyIn任务负责将Global Memory上的输入Tensor srcGlobal和maxLocal分别存储在srcLocal和maxLocal中,Compute任务负责对maxLocal中的数据进行判断,并更改srcLocal数据。计算结果存储在dstLocal中,CopyOut任务负责将输出数据从dstLocal搬运至Global Memory上的输出Tensor dstGlobal。 + +- Tiling实现 + + 根据输入Tensor的长度和宽度确定所需tiling参数height、width、srcSize、dstSize。 + +## 编译运行样例 + + - 配置环境变量 + + 这里的\$ASCEND_CANN_PACKAGE_PATH需要替换为CANN开发套件包安装后文件存储路径。例如:/usr/local/Ascend/ascend-toolkit/latest + ``` + export ASCEND_HOME_DIR=$ASCEND_CANN_PACKAGE_PATH + source $ASCEND_HOME_DIR/../set_env.sh + ``` + + - 生成输入和真值 + + 执行如下命令后,当前目录生成input和output目录存放输入数据和真值数据。 + ``` + python3 scripts/gen_data.py + ``` + + - 编译执行 + + ``` + bash run.sh -r [RUN_MODE] -v [SOC_VERSION] + ``` + + 其中脚本参数说明如下: + - RUN_MODE :编译执行方式,可选择CPU调试,NPU仿真,NPU上板,对应参数分别为[cpu / sim / npu]。若需要详细了解NPU仿真相关内容,请参考[《算子开发工具msProf》](https://hiascend.com/document/redirect/CannCommunityToolMsProf)中的“工具使用”章节。 + - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas A2 训练系列产品/Atlas 800I A2 推理产品/A200I A2 Box 异构组件 + - Atlas 推理系列产品AI Core + + 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 + ``` + bash run.sh -r cpu -v Ascendxxxyy + ``` \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/cmake/cpu_lib.cmake b/examples/activation/adjustsoftmaxres/cmake/cpu_lib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..693f15ac115d655aacd3218bc5b14060c0a3de2f --- /dev/null +++ b/examples/activation/adjustsoftmaxres/cmake/cpu_lib.cmake @@ -0,0 +1,26 @@ +if(NOT DEFINED ENV{CMAKE_PREFIX_PATH}) + set(CMAKE_PREFIX_PATH ${ASCEND_CANN_PACKAGE_PATH}/tools/tikicpulib/lib/cmake) +endif() +find_package(tikicpulib REQUIRED) + +add_library(ascendc_kernels_${RUN_MODE} SHARED + ${KERNEL_FILES} +) + +target_link_libraries(ascendc_kernels_${RUN_MODE} PRIVATE + tikicpulib::${SOC_VERSION} +) + +target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$>:CUSTOM_ASCEND310P> +) + +target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE + -g + -O0 + -std=c++17 +) + +install(TARGETS ascendc_kernels_${RUN_MODE} +DESTINATION ${CMAKE_INSTALL_LIBDIR} +) \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/cmake/npu_lib.cmake b/examples/activation/adjustsoftmaxres/cmake/npu_lib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8ad136f38b80bea109925ab797fbde0871874964 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/cmake/npu_lib.cmake @@ -0,0 +1,12 @@ +if(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/compiler/tikcpp/ascendc_kernel_cmake) +elseif(EXISTS ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) + set(ASCENDC_CMAKE_DIR ${ASCEND_CANN_PACKAGE_PATH}/tools/tikcpp/ascendc_kernel_cmake) +else() + message(FATAL_ERROR "ascendc_kernel_cmake does not exist ,please check whether the cann package is installed") +endif() +include(${ASCENDC_CMAKE_DIR}/ascendc.cmake) + +ascendc_library(ascendc_kernels_${RUN_MODE} STATIC + ${KERNEL_FILES} +) \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/main.cpp b/examples/activation/adjustsoftmaxres/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9ca6580060f80ecc556fd713c4ae8d1690efd785 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/main.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "../../common/data_utils.h" +#include "./op_host/adjust_softmax_res_custom_tiling.h" +#ifndef ASCENDC_CPU_DEBUG +#include "acl/acl.h" +#include "aclrtlaunch_adjust_softmax_res_custom.h" +#include "tiling/platform/platform_ascendc.h" +#else +#include "tikicpulib.h" +extern "C" __global__ __aicore__ void adjust_softmax_res_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR max, + AdjustSoftMaxResTilingData tiling); +#endif + +namespace { +constexpr uint32_t USED_CORE_NUM = 1; +constexpr uint32_t TILING_SIZE = 4; +constexpr uint32_t SRC_HEIGHT = 32; +constexpr uint32_t SRC_WIDTH = 32; +} + +extern void GenerateTiling(uint32_t height, uint32_t width, uint8_t* tilingBuf); + +static bool CompareResult(const void *outputData, int64_t outSize) { + void *goldenData; +#ifdef ASCENDC_CPU_DEBUG + goldenData = (uint8_t *)AscendC::GmAlloc(outSize); +#else + CHECK_ACL(aclrtMallocHost((void **)(&goldenData), outSize)); +#endif + size_t goldenSize = outSize; + bool ret = ReadFile("../output/golden.bin", goldenSize, goldenData, goldenSize); + if (ret) { + printf("ReadFile golden.bin success!\n"); + } else { + printf("test failed!\n"); + return false; + } + constexpr float EPS = 1e-4; + int64_t wrongNum = 0; + + for (int i = 0; i < outSize / sizeof(float); i++) { + float a = (reinterpret_cast(outputData))[i]; + float b = (reinterpret_cast(goldenData))[i]; + float ae = std::abs(a - b); + float re = ae / abs(b); + if (ae > EPS && re > EPS) { + wrongNum++; + } + } +#ifdef ASCENDC_CPU_DEBUG + AscendC::GmFree((void *)goldenData); +#else + CHECK_ACL(aclrtFreeHost(goldenData)); +#endif + if (wrongNum != 0) { + printf("wrongNum: %ld\n", wrongNum); + return false; + } else { + printf("CompareResult golden.bin success!\n"); + return true; + } +} + +int32_t main(int32_t argc, char *argv[]) { + size_t tilingSize = TILING_SIZE * sizeof(uint32_t); + size_t inputSize = SRC_HEIGHT * SRC_WIDTH * sizeof(uint32_t); + size_t inputMaxSize = SRC_HEIGHT * 8 * sizeof(uint32_t); + size_t outputSize = SRC_HEIGHT * SRC_WIDTH * sizeof(uint32_t); +#ifdef ASCENDC_CPU_DEBUG + uint8_t *x = (uint8_t *)AscendC::GmAlloc(inputSize); + uint8_t *y = (uint8_t *)AscendC::GmAlloc(outputSize); + uint8_t *max = (uint8_t *)AscendC::GmAlloc(inputMaxSize); + uint8_t *tiling = (uint8_t *)AscendC::GmAlloc(tilingSize); + + ReadFile("../input/input_softmax.bin", inputSize, x, inputSize); + ReadFile("../input/input_max.bin", inputMaxSize, max, inputMaxSize); + + GenerateTiling(SRC_HEIGHT, SRC_WIDTH, tiling); + + AscendC::SetKernelMode(KernelMode::AIV_MODE); // run in aiv mode + + ICPU_RUN_KF(adjust_softmax_res_custom, 1, x, y, max, + *reinterpret_cast(tiling)); // use this macro for cpu debug + + WriteFile("../output/output.bin", y, outputSize); + + bool goldenResult = true; + goldenResult = CompareResult(y, outputSize); + + AscendC::GmFree((void *)x); + AscendC::GmFree((void *)y); + AscendC::GmFree((void *)tiling); +#else + CHECK_ACL(aclInit(nullptr)); + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *xHost; + uint8_t *yHost; + uint8_t *maxHost; + uint8_t *tiling; + uint8_t *xDevice; + uint8_t *yDevice; + uint8_t *maxDevice; + + CHECK_ACL(aclrtMallocHost((void **)(&xHost), inputSize)); + CHECK_ACL(aclrtMallocHost((void **)(&yHost), outputSize)); + CHECK_ACL(aclrtMallocHost((void **)(&maxHost), inputMaxSize)); + CHECK_ACL(aclrtMallocHost((void **)(&tiling), tilingSize)) + + CHECK_ACL(aclrtMalloc((void **)&xDevice, inputSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&yDevice, outputSize, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMalloc((void **)&maxDevice, inputMaxSize, ACL_MEM_MALLOC_HUGE_FIRST)); + + ReadFile("../input/input_softmax.bin", inputSize, xHost, inputSize); + ReadFile("../input/input_max.bin", inputMaxSize, maxHost, inputMaxSize); + + GenerateTiling(SRC_HEIGHT, SRC_WIDTH, tiling); + + // Copy host memory to device memory + CHECK_ACL(aclrtMemcpy(xDevice, inputSize, xHost, inputSize, ACL_MEMCPY_HOST_TO_DEVICE)); + CHECK_ACL(aclrtMemcpy(maxDevice, inputMaxSize, maxHost, inputMaxSize, ACL_MEMCPY_HOST_TO_DEVICE)); + + // Execute the kernel + ACLRT_LAUNCH_KERNEL(adjust_softmax_res_custom) + (USED_CORE_NUM, stream, xDevice, yDevice, maxDevice, reinterpret_cast(tiling)); + + // Wait for the stop event to complete + CHECK_ACL(aclrtSynchronizeStream(stream)); + + // Copy result to host memory and write to output file + CHECK_ACL(aclrtMemcpy(yHost, outputSize, yDevice, outputSize, ACL_MEMCPY_DEVICE_TO_HOST)); + WriteFile("../output/output.bin", yHost, outputSize); + + // Compare the result with the golden result + bool goldenResult = true; + goldenResult = CompareResult(yHost, outputSize); + + // Clean up memory + CHECK_ACL(aclrtFree(xDevice)); + CHECK_ACL(aclrtFree(yDevice)); + CHECK_ACL(aclrtFree(maxDevice)); + CHECK_ACL(aclrtFreeHost(xHost)); + CHECK_ACL(aclrtFreeHost(yHost)); + CHECK_ACL(aclrtFreeHost(maxHost)); + CHECK_ACL(aclrtFreeHost(tiling)); + + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); +#endif + + if (goldenResult) { + printf("test pass!\n"); + } else { + printf("test failed!\n"); + } + return 0; +} \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/op_host/adjust_softmax_res_custom_tiling.cpp b/examples/activation/adjustsoftmaxres/op_host/adjust_softmax_res_custom_tiling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..75b8cbf61dcb7e602c57c1494ee12d5ff02b9f72 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/op_host/adjust_softmax_res_custom_tiling.cpp @@ -0,0 +1,22 @@ +/** + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "tiling/tiling_api.h" +#include "adjust_softmax_res_custom_tiling.h" + + +void GenerateTiling(uint32_t height, uint32_t width, uint8_t* tilingBuf) +{ + AdjustSoftMaxResTilingData *tiling = reinterpret_cast(tilingBuf); + tiling->height = height; + tiling->width = width; + tiling->srcSize = height*width; + tiling->dstSize = height*width; +} \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/op_host/adjust_softmax_res_custom_tiling.h b/examples/activation/adjustsoftmaxres/op_host/adjust_softmax_res_custom_tiling.h new file mode 100644 index 0000000000000000000000000000000000000000..146743b7a50f8d9e8b7e514b91781d7ff85d6b2e --- /dev/null +++ b/examples/activation/adjustsoftmaxres/op_host/adjust_softmax_res_custom_tiling.h @@ -0,0 +1,21 @@ +/** + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_ACTIVATION_ADJUST_SOFTMAX_RES_CUSTOM_TILING_H +#define EXAMPLES_ACTIVATION_ADJUST_SOFTMAX_RES_CUSTOM_TILING_H +#include + +struct AdjustSoftMaxResTilingData { + uint32_t height; + uint32_t width; + uint32_t srcSize; + uint32_t dstSize; +}; +#endif \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/op_kernel/adjust_softmax_res_custom.cpp b/examples/activation/adjustsoftmaxres/op_kernel/adjust_softmax_res_custom.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cca4e64270b4e794669e14f30aa298bf168926a9 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/op_kernel/adjust_softmax_res_custom.cpp @@ -0,0 +1,20 @@ +/** + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#include "./adjust_softmax_res_custom.h" +#include "kernel_operator.h" + +extern "C" __global__ __aicore__ void adjust_softmax_res_custom(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR maxGM, AdjustSoftMaxResTilingData tiling) +{ + AscendC::TPipe pipe; + MyCustomKernel::KernelSoftmax op; + op.Init(srcGm, dstGm, maxGM, tiling, &pipe); + op.Process(); +} \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/op_kernel/adjust_softmax_res_custom.h b/examples/activation/adjustsoftmaxres/op_kernel/adjust_softmax_res_custom.h new file mode 100644 index 0000000000000000000000000000000000000000..409d5f5102a7569f3f763a85b91f9986a9375807 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/op_kernel/adjust_softmax_res_custom.h @@ -0,0 +1,96 @@ +/** + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +#ifndef EXAMPLES_ACTIVATION_ADJUST_SOFTMAX_RES_CUSTOM_H +#define EXAMPLES_ACTIVATION_ADJUST_SOFTMAX_RES_CUSTOM_H +#include "../op_host/adjust_softmax_res_custom_tiling.h" +#include "kernel_operator.h" + +namespace MyCustomKernel { +constexpr int32_t BUFFER_NUM = 1; +constexpr int32_t HEIGHT = 32; +constexpr int32_t WIDTH = 32; +constexpr uint32_t FROM = 0xFF7FFFFF; +constexpr float TO = 0.0; + +template +class KernelSoftmax { +public: + __aicore__ inline KernelSoftmax() + {} + __aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstGm, GM_ADDR maxGM, AdjustSoftMaxResTilingData tiling, AscendC::TPipe* pipeIn) + { + pipe = pipeIn; + elementNumPerBlk = HEIGHT / sizeof(T); + height = tiling.height; + width = tiling.width; + srcGlobal.SetGlobalBuffer((__gm__ T *)srcGm); + maxGlobal.SetGlobalBuffer((__gm__ T *)maxGM); + dstGlobal.SetGlobalBuffer((__gm__ T *)dstGm); + pipe->InitBuffer(inQueueSrc, BUFFER_NUM, height * width * sizeof(T)); + pipe->InitBuffer(inQueueMax, BUFFER_NUM, height * elementNumPerBlk * sizeof(T)); + pipe->InitBuffer(outQueueDst, BUFFER_NUM, height * width * sizeof(T)); + pipe->InitBuffer(calcBuf, height * width * sizeof(T)); + } + __aicore__ inline void Process() + { + CopyIn(); + Compute(); + CopyOut(); + } + +private: + __aicore__ inline void CopyIn() + { + AscendC::LocalTensor srcLocal = inQueueSrc.AllocTensor(); + AscendC::LocalTensor maxLocal = inQueueMax.AllocTensor(); + AscendC::DataCopy(srcLocal, srcGlobal, height * width); + AscendC::DataCopy(maxLocal, maxGlobal, height * elementNumPerBlk); + inQueueSrc.EnQue(srcLocal); + inQueueMax.EnQue(maxLocal); + } + __aicore__ inline void Compute() + { + AscendC::LocalTensor srcLocal = inQueueSrc.DeQue(); + AscendC::LocalTensor maxLocal = inQueueMax.DeQue(); + AscendC::LocalTensor dstLocal = outQueueDst.AllocTensor(); + AscendC::LocalTensor tmpTensor = calcBuf.Get(); + AscendC::SoftMaxShapeInfo srcShape = {height, width, height, width}; + AscendC::AdjustSoftMaxRes(srcLocal, maxLocal, FROM, TO, srcShape); + AscendC::DataCopy(tmpTensor, srcLocal, height * width); + AscendC::DataCopy(dstLocal, tmpTensor, height * width); + + outQueueDst.EnQue(dstLocal); + inQueueMax.FreeTensor(maxLocal); + inQueueSrc.FreeTensor(srcLocal); + calcBuf.FreeTensor(tmpTensor); + } + __aicore__ inline void CopyOut() + { + AscendC::LocalTensor dstLocal = outQueueDst.DeQue(); + AscendC::DataCopy(dstGlobal, dstLocal, height * width); + outQueueDst.FreeTensor(dstLocal); + } + +private: + AscendC::TPipe* pipe; + AscendC::TQue inQueueSrc; + AscendC::TQue inQueueMax; + AscendC::TQue outQueueDst; + AscendC::TBuf calcBuf; + AscendC::GlobalTensor srcGlobal; + AscendC::GlobalTensor maxGlobal; + AscendC::GlobalTensor dstGlobal; + uint32_t elementNumPerBlk = 0; + uint32_t height = 0; + uint32_t width = 0; +}; +} +#endif // EXAMPLES_ACTIVATION_ADJUST_SOFTMAX_RES_CUSTOM_H \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/run.sh b/examples/activation/adjustsoftmaxres/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..0588d58679cbca97360347a0ab4514b3440b9bd3 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/run.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== + +SHORT=r:,v:, +LONG=run-mode:,soc-version:, +OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") +eval set -- "$OPTS" +while : +do + case "$1" in + (-r | --run-mode ) + RUN_MODE="$2" + shift 2;; + (-v | --soc-version ) + SOC_VERSION="$2" + shift 2;; + (--) + shift; + break;; + (*) + echo "[ERROR] Unexpected option: $1"; + break;; + esac +done + +rm -rf build +mkdir build +cd build + +# in case of running op in simulator, use stub so instead +if [ "${RUN_MODE}" = "sim" ]; then + export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | sed 's/\/.*\/runtime\/lib64://g') + export LD_LIBRARY_PATH=$ASCEND_HOME_DIR/runtime/lib64/stub:$LD_LIBRARY_PATH +fi + +source $ASCEND_HOME_DIR/bin/setenv.bash +export LD_LIBRARY_PATH=${ASCEND_HOME_DIR}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH + +cmake -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION} -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} .. +make -j16 + +if [ "${RUN_MODE}" = "npu" ]; then + ./adjust_softmax_res_direct_kernel_op +elif [ "${RUN_MODE}" = "sim" ]; then + export ASCEND_TOOLKIT_HOME=${ASCEND_HOME_DIR} + export ASCEND_HOME_PATH=${ASCEND_HOME_DIR} + msprof op simulator --application=./adjust_softmax_res_direct_kernel_op +elif [ "${RUN_MODE}" = "cpu" ]; then + ./adjust_softmax_res_direct_kernel_op +fi \ No newline at end of file diff --git a/examples/activation/adjustsoftmaxres/scripts/gen_data.py b/examples/activation/adjustsoftmaxres/scripts/gen_data.py new file mode 100644 index 0000000000000000000000000000000000000000..d1cbe12376ccac2376806e74369f9ea388dcd708 --- /dev/null +++ b/examples/activation/adjustsoftmaxres/scripts/gen_data.py @@ -0,0 +1,60 @@ +#!/usr/bin/python3 +# coding=utf-8 + +# Copyright (c) 2025 Huawei Technologies Co., Ltd. +# This file is a part of the CANN Open Software. +# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ====================================================================================================================== +import os +import numpy as np + + +def adjust_softmax_res(res, max_val, res_shape): + target = 0xFF7FFFFF + to = 0.0 + for i in range(res_shape[0]): + if max_val[i][0] == target: + for j in range(res_shape[1]): + res[i][j] = to + return + + +def softmax_py_float(x): + orig_shape = x.shape + x_max = np.max(x, axis=-1) + x_max = np.reshape(x_max, [orig_shape[0], 1]) + + x_sub = x - x_max + x_exp = np.exp(x_sub) + + x_exp1 = np.reshape(x_exp, [orig_shape[0], orig_shape[1]]) + x_sum = np.sum(x_exp1, axis=-1) + x_sum = np.reshape(x_sum, [orig_shape[0], 1]) + x_div = x_exp / x_sum + out = np.reshape(x_div, [orig_shape[0], orig_shape[1]]) + return out, x_max, x_sum + + +def gen_golden_data_simple(): + x_shape = (32, 32) + x = np.random.uniform(-1, 1, x_shape).astype(np.float32) + + output_max = np.zeros([x_shape[0], 8], dtype=np.float32) + + softmax_out, max_val, sum_val = softmax_py_float(x) + os.system("mkdir -p input") + os.system("mkdir -p output") + softmax_out.tofile("./input/input_softmax.bin") + + adjust_softmax_res(softmax_out, max_val, softmax_out.shape) + output_max = output_max + max_val + + output_max.tofile("./input/input_max.bin") + softmax_out.tofile("./output/golden.bin") + +if __name__ == "__main__": + gen_golden_data_simple() diff --git a/impl/sort/topk/topk_common_utils.h b/impl/sort/topk/topk_common_utils.h index e999ed4113860394fb5a562f9554cef4d361769b..2d5a1aba25812149c3c67dacc5760f8628ec6fbe 100644 --- a/impl/sort/topk/topk_common_utils.h +++ b/impl/sort/topk/topk_common_utils.h @@ -53,10 +53,13 @@ struct TopKInfo { int32_t n; // actual length of the tensor }; +#ifndef ASCC_ENUM_TOPKMODE +#define ASCC_ENUM_TOPKMODE enum class TopKMode { TOPK_NORMAL, TOPK_NSMALL, }; +#endif } #endif diff --git a/lib/matmul/matmul_tiling_base.h b/lib/matmul/matmul_tiling_base.h index b6708ce5ee31cc1af0bdb5de6e443f9f44166940..05828030f0468bca986efa0504ee0d3644a5bee5 100644 --- a/lib/matmul/matmul_tiling_base.h +++ b/lib/matmul/matmul_tiling_base.h @@ -21,7 +21,9 @@ #include "tiling/platform/platform_ascendc.h" namespace matmul_tiling { +#ifndef __ASCC_DEVICE__ using half = double; +#endif constexpr int32_t UINT8_BYTES = 1; constexpr int32_t INT8_BYTES = 1; constexpr int32_t FP32_BYTES = 4; @@ -66,12 +68,14 @@ enum class DataType : int32_t { DT_MAX = 34 // Mark the boundaries of data types }; +#ifndef __ASCC_DEVICE__ const std::map DTYPE_BYTE_TAB = { {DataType::DT_FLOAT, 4}, {DataType::DT_FLOAT16, 2}, {DataType::DT_INT8, 1}, {DataType::DT_INT16, 2}, {DataType::DT_UINT16, 2}, {DataType::DT_UINT8, 1}, {DataType::DT_INT32, 4}, {DataType::DT_INT64, 8}, {DataType::DT_UINT32, 4}, {DataType::DT_UINT64, 8}, {DataType::DT_BF16, 2}, {DataType::DT_BFLOAT16, 2}, {DataType::DT_INT4, 1} }; +#endif // __ASCC_DEVICE__ const std::map DTYPE_BIT_TAB = { {DataType::DT_FLOAT, 32}, {DataType::DT_FLOAT16, 16}, {DataType::DT_INT8, 8}, {DataType::DT_INT16, 16}, diff --git a/lib/sort/topk_tiling.h b/lib/sort/topk_tiling.h index 07e36dbbfde70517691d0f9b7a7b41a3f483d62e..b224737b93179578823d40759955220d7d3a4fb1 100644 --- a/lib/sort/topk_tiling.h +++ b/lib/sort/topk_tiling.h @@ -13,12 +13,16 @@ #include "tiling/platform/platform_ascendc.h" namespace AscendC { +#ifndef ASCC_ENUM_TOPKMODE +#define ASCC_ENUM_TOPKMODE enum class TopKMode { TOPK_NORMAL, TOPK_NSMALL, }; +#endif + /* * @ingroup GetTopKMaxMinTmpSize * @brief Get TopK api calculate need max and min temporary local space size. diff --git a/lib/transpose/confusion_transpose_tiling.h b/lib/transpose/confusion_transpose_tiling.h index d91742ee8d0b1b5659f35c1e94ceeb7c92db3c33..ae4a59bc21d1a944037c33c300bef47c59d1cb40 100644 --- a/lib/transpose/confusion_transpose_tiling.h +++ b/lib/transpose/confusion_transpose_tiling.h @@ -18,9 +18,42 @@ #include "confusion_transpose_tilingdata.h" namespace AscendC { constexpr uint32_t TWO_TIMES = 2; +#ifndef __ASCC_DEVICE__ + +#ifndef ASCC_PARAM_BLOCK_CUBE + +#define ASCC_PARAM_BLOCK_CUBE constexpr uint32_t BLOCK_CUBE = 16; +#endif + +#ifndef ASCC_PARAM_ONE_BLK_SIZE +#define ASCC_PARAM_ONE_BLK_SIZE constexpr uint32_t ONE_BLK_SIZE = 32; +#endif + +#ifndef ASCC_PARAM_CUBE_MAX_SIZE +#define ASCC_PARAM_CUBE_MAX_SIZE constexpr int32_t CUBE_MAX_SIZE = 256; +#endif + +#else // #ifdef __ASCC_DEVICE__ + +#ifndef ASCC_PARAM_BLOCK_CUBE +#define ASCC_PARAM_BLOCK_CUBE +const int32_t BLOCK_CUBE = 16; +#endif + +#ifndef ASCC_PARAM_ONE_BLK_SIZE +#define ASCC_PARAM_ONE_BLK_SIZE +const uint16_t ONE_BLK_SIZE = 32; +#endif + +#ifndef ASCC_PARAM_CUBE_MAX_SIZE +#define ASCC_PARAM_CUBE_MAX_SIZE +const int32_t CUBE_MAX_SIZE = 256; +#endif + +#endif // __ASCC_DEVICE__ /*! * \brief calculate max and min tmp buffer size for ConfusionTranspose interface. tmp buffer size is a input for GetConfusionTransposeTilingInfo