From 285208f4d351325fdec89c2262a33af31819568e Mon Sep 17 00:00:00 2001 From: sheinilya Date: Thu, 21 Nov 2024 19:12:07 +0800 Subject: [PATCH 1/4] Add cumprod --- cmake/kernel_headers.cmake | 7 + cmake/tiling_headers.cmake | 7 + impl/math/cumprod/cumprod_impl.h | 408 +++++++++++++++++++++++++++ impl/math/cumprod/cumprod_tiling.cpp | 50 ++++ lib/math/cumprod.h | 147 ++++++++++ lib/math/cumprod_tiling.h | 33 +++ lib/math/cumprod_tiling_intf.h | 23 ++ tests/math/test_operator_cumprod.cpp | 95 +++++++ tests/tiling/test_tiling.cpp | 23 ++ 9 files changed, 793 insertions(+) create mode 100644 impl/math/cumprod/cumprod_impl.h create mode 100644 impl/math/cumprod/cumprod_tiling.cpp create mode 100644 lib/math/cumprod.h create mode 100644 lib/math/cumprod_tiling.h create mode 100644 lib/math/cumprod_tiling_intf.h create mode 100644 tests/math/test_operator_cumprod.cpp diff --git a/cmake/kernel_headers.cmake b/cmake/kernel_headers.cmake index 5c5a2566..22010a38 100644 --- a/cmake/kernel_headers.cmake +++ b/cmake/kernel_headers.cmake @@ -196,3 +196,10 @@ file(CREATE_LINK ../activation/geglu_tiling_intf.h ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/math/geglu_tiling_intf.h SYMBOLIC) file(CREATE_LINK ../activation/geglu_tiling.h ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/math/geglu_tiling.h SYMBOLIC) + +# cumprod +file(MAKE_DIRECTORY ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod) +file(CREATE_LINK ../scan/kernel_operator_cumprod_intf.h + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod/kernel_operator_cumprod_intf.h SYMBOLIC) +file(CREATE_LINK ../sacn/cumprod.h + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod/cumprod.h SYMBOLIC) diff --git a/cmake/tiling_headers.cmake b/cmake/tiling_headers.cmake index 6e9ee95d..b2579768 100644 --- a/cmake/tiling_headers.cmake +++ b/cmake/tiling_headers.cmake @@ -233,3 +233,10 @@ file(CREATE_LINK ../../lib/math/xor_tiling.h file(CREATE_LINK ../lib/tiling_api.h ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/tiling_api.h SYMBOLIC) + +# cumprod +file(MAKE_DIRECTORY ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/cumprod) +file(CREATE_LINK ../../lib/math/cumprod_tiling.h + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/math/cumprod_tiling.h SYMBOLIC) +file(CREATE_LINK ../../lib/math/cumprod_tiling_intf.h + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/math/cumprod_tiling_intf.h SYMBOLIC) diff --git a/impl/math/cumprod/cumprod_impl.h b/impl/math/cumprod/cumprod_impl.h new file mode 100644 index 00000000..313ac1e5 --- /dev/null +++ b/impl/math/cumprod/cumprod_impl.h @@ -0,0 +1,408 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the + * "License"). Please refer to the License for details. You may not use this + * file except in compliance with the License. THIS SOFTWARE IS PROVIDED ON AN + * "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS + * FOR A PARTICULAR PURPOSE. See LICENSE in the root of the software repository + * for the full text of the License. + */ + +/* ! + * \file cumprod_impl.h + * \brief + */ +#ifndef IMPL_MATH_CUMPROD_CUMPROD_IMPL_H +#define IMPL_MATH_CUMPROD_CUMPROD_IMPL_H + +#include "kernel_tensor.h" +#include "kernel_operator_intf.h" +#if __CCE_AICORE__ >= 200 + +namespace AscendC { +struct CumProdInfo { + uint32_t outter{0}; + uint32_t inner{0}; // 32-byte alignment +}; + +struct CumProdConfig { + bool isLastAxis{true}; + bool isReuseSource{false}; + bool outputLastRow{false}; +}; + +template +__aicore__ inline void CumProdLastDim(const LocalTensor &dstTensor, const LocalTensor &srcTensor, + LocalTensor tempBuffer, const cumProdInfo &cumProdInfo) { + constexpr uint32_t oneBlockElementNum = ONE_BLK_SIZE / sizeof(T); + uint16_t alignOutter = (cumProdInfo.outter + NCHW_CONV_ADDR_LIST_SIZE - 1) / + NCHW_CONV_ADDR_LIST_SIZE * NCHW_CONV_ADDR_LIST_SIZE; + uint64_t transDataTo5HDDstLocalList[NCHW_CONV_ADDR_LIST_SIZE]; + uint64_t transDataTo5HDSrcLocalList[NCHW_CONV_ADDR_LIST_SIZE]; + uint8_t repeatTimes = 1; + uint16_t dstRepStride = 0; + uint16_t srcRepStride = 0; + if (cumProdInfo.outter == alignOutter && alignOutter > cumProdInfo.inner) { + repeatTimes = alignOutter / NCHW_CONV_ADDR_LIST_SIZE; + if (repeatTimes > 1) { + dstRepStride = 1; + srcRepStride = cumProdInfo.inner; + } + TransDataTo5HDParams params(false, false, repeatTimes, dstRepStride, + srcRepStride); + for (int32_t i = 0; i < cumProdInfo.inner / oneBlockElementNum; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)srcTensor[i * oneBlockElementNum + n * cumProdInfo.inner] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n] = + (uint64_t)tempBuffer[i * oneBlockElementNum * alignOutter + + alignOutter * n] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, transDataTo5HDSrcLocalList, + params); + } + } else { + repeatTimes = cumProdInfo.inner / oneBlockElementNum; + if (repeatTimes > 1) { + dstRepStride = alignOutter; + srcRepStride = 1; + } + TransDataTo5HDParams params(false, false, repeatTimes, dstRepStride, + srcRepStride); + for (int32_t i = 0; i < alignOutter / NCHW_CONV_ADDR_LIST_SIZE; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)srcTensor[((i * NCHW_CONV_ADDR_LIST_SIZE + + n % (cumProdInfo.outter - + i * NCHW_CONV_ADDR_LIST_SIZE)) * + cumProdInfo.inner)] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n] = + (uint64_t)tempBuffer[i * NCHW_CONV_ADDR_LIST_SIZE + alignOutter * n] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, transDataTo5HDSrcLocalList, + params); + } + } + PipeBarrier(); + SetMaskCount(); + SetVectorMask(alignOutter * cumProdInfo.inner); + LocalTensor floatTempBuffer = + tempBuffer[alignOutter * cumProdInfo.inner] + .template ReinterpretCast(); + Cast( + floatTempBuffer, tempBuffer, RoundMode::CAST_NONE, MASK_PLACEHOLDER, 1, + {1, 1, DEFAULT_REPEAT_STRIDE, HALF_DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + + SetVectorMask(0, alignOutter); + const BinaryRepeatParams binaryParams; + for (uint32_t row = 1; row < cumProdInfo.inner; ++row) { + Mul(floatTempBuffer[row * alignOutter], + floatTempBuffer[(row - 1) * alignOutter], + floatTempBuffer[row * alignOutter], MASK_PLACEHOLDER, 1, + binaryParams); + PipeBarrier(); + } + + SetVectorMask(alignOutter * cumProdInfo.inner); + Cast( + tempBuffer, floatTempBuffer, RoundMode::CAST_NONE, MASK_PLACEHOLDER, 1, + {1, 1, HALF_DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + SetMaskNorm(); + ResetMask(); + auto tempBuffer2 = tempBuffer[alignOutter * cumProdInfo.inner]; + if (alignOutter > cumProdInfo.inner) { + repeatTimes = alignOutter / oneBlockElementNum; + if (repeatTimes > 1) { + dstRepStride = cumProdInfo.inner; + srcRepStride = 1; + } else { + dstRepStride = 0; + srcRepStride = 0; + } + TransDataTo5HDParams paramsBack(false, false, repeatTimes, dstRepStride, + srcRepStride); + for (int32_t i = 0; i < cumProdInfo.inner / NCHW_CONV_ADDR_LIST_SIZE; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t) + tempBuffer[(i * NCHW_CONV_ADDR_LIST_SIZE + n) * alignOutter] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n] = + (uint64_t)tempBuffer2[i * NCHW_CONV_ADDR_LIST_SIZE + + n * cumProdInfo.inner] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, transDataTo5HDSrcLocalList, + paramsBack); + } + } else { + repeatTimes = cumProdInfo.inner / oneBlockElementNum; + if (repeatTimes > 1) { + dstRepStride = alignOutter; + srcRepStride = 1; + } else { + dstRepStride = 0; + srcRepStride = 0; + } + TransDataTo5HDParams paramsBack(false, false, repeatTimes, srcRepStride, + dstRepStride); + for (int32_t i = 0; i < alignOutter / NCHW_CONV_ADDR_LIST_SIZE; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)tempBuffer[i * NCHW_CONV_ADDR_LIST_SIZE + alignOutter * n] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n] = + (uint64_t)tempBuffer2[(i * NCHW_CONV_ADDR_LIST_SIZE + n) * + cumProdInfo.inner] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, transDataTo5HDSrcLocalList, + paramsBack); + } + } + PipeBarrier(); + SetMaskCount(); + SetVectorMask(0, cumProdInfo.outter * cumProdInfo.inner); + Muls(dstTensor, tempBuffer2, 1, MASK_PLACEHOLDER, 1, + {1, 1, DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + SetMaskNorm(); + ResetMask(); +} + +template <> +__aicore__ inline void CumProdLastDim(const LocalTensor &dstTensor, + const LocalTensor &srcTensor, + LocalTensor tempBuffer, + const cumProdInfo &cumProdInfo) { + constexpr uint32_t oneBlockElementNum = ONE_BLK_SIZE / sizeof(float); + uint8_t repeatTimes = 1; + uint16_t dstRepStride = 0; + uint16_t srcRepStride = 0; + uint16_t alignOutter = (cumProdInfo.outter + NCHW_CONV_ADDR_LIST_SIZE - 1) / + NCHW_CONV_ADDR_LIST_SIZE * NCHW_CONV_ADDR_LIST_SIZE; + uint64_t transDataTo5HDDstLocalList[NCHW_CONV_ADDR_LIST_SIZE]; + uint64_t transDataTo5HDSrcLocalList[NCHW_CONV_ADDR_LIST_SIZE]; + if (cumProdInfo.outter == alignOutter && alignOutter > cumProdInfo.inner) { + repeatTimes = alignOutter / NCHW_CONV_ADDR_LIST_SIZE; + if (repeatTimes > 1) { + dstRepStride = 2; + srcRepStride = cumProdInfo.inner * 2; + } + TransDataTo5HDParams params(false, false, repeatTimes, dstRepStride, + srcRepStride); + for (int32_t i = 0; i < cumProdInfo.inner / oneBlockElementNum; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)srcTensor[i * oneBlockElementNum + n * cumProdInfo.inner] + .GetPhyAddr(); + } + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE / 2; + n++) { + transDataTo5HDDstLocalList[n * 2] = + (uint64_t)tempBuffer[(i * oneBlockElementNum + n) * alignOutter] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n * 2 + 1] = + (uint64_t)tempBuffer[(i * oneBlockElementNum + n) * alignOutter + + oneBlockElementNum] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, + transDataTo5HDSrcLocalList, params); + } + } else { + repeatTimes = cumProdInfo.inner / oneBlockElementNum; + if (repeatTimes > 1) { + dstRepStride = alignOutter; + srcRepStride = 1; + } + TransDataTo5HDParams params(false, false, repeatTimes, dstRepStride, + srcRepStride); + for (int32_t i = 0; i < alignOutter / NCHW_CONV_ADDR_LIST_SIZE; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)srcTensor[((i * NCHW_CONV_ADDR_LIST_SIZE + + n % (cumProdInfo.outter - + i * NCHW_CONV_ADDR_LIST_SIZE)) * + cumProdInfo.inner)] + .GetPhyAddr(); + } + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE / 2; n++) { + transDataTo5HDDstLocalList[n * 2] = + (uint64_t)tempBuffer[i * NCHW_CONV_ADDR_LIST_SIZE + n * alignOutter] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n * 2 + 1] = + (uint64_t)tempBuffer[i * NCHW_CONV_ADDR_LIST_SIZE + + n * alignOutter + oneBlockElementNum] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, + transDataTo5HDSrcLocalList, params); + } + } + PipeBarrier(); + SetMaskCount(); + SetVectorMask(0, alignOutter); + const BinaryRepeatParams binaryParams; + uint32_t addOffset = alignOutter; + for (uint32_t row = 1; row < cumProdInfo.inner; ++row) { + Mul(tempBuffer[addOffset], + tempBuffer[addOffset - alignOutter], + tempBuffer[addOffset], MASK_PLACEHOLDER, 1, binaryParams); + addOffset += alignOutter; + PipeBarrier(); + } + SetMaskNorm(); + ResetMask(); + + auto tempBuffer2 = tempBuffer[alignOutter * cumProdInfo.inner]; + if (alignOutter > cumProdInfo.inner) { + repeatTimes = alignOutter / NCHW_CONV_ADDR_LIST_SIZE; + if (repeatTimes > 1) { + dstRepStride = cumProdInfo.inner * 2; + srcRepStride = 2; + } else { + dstRepStride = 0; + srcRepStride = 0; + } + TransDataTo5HDParams paramsBack(false, false, repeatTimes, dstRepStride, + srcRepStride); + for (int32_t i = 0; i < cumProdInfo.inner / oneBlockElementNum; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE / 2; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)tempBuffer[i * oneBlockElementNum * alignOutter + + n * alignOutter] + .GetPhyAddr(); + transDataTo5HDSrcLocalList[n + NCHW_CONV_ADDR_LIST_SIZE / 2] = + (uint64_t)tempBuffer[i * oneBlockElementNum * alignOutter + + n * alignOutter + oneBlockElementNum] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n * 2] = + (uint64_t) + tempBuffer2[i * oneBlockElementNum + n * cumProdInfo.inner] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n * 2 + 1] = + (uint64_t)tempBuffer2[i * oneBlockElementNum + + (n + oneBlockElementNum) * cumProdInfo.inner] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, + transDataTo5HDSrcLocalList, paramsBack); + } + + } else { + repeatTimes = cumProdInfo.inner / oneBlockElementNum; + if (repeatTimes > 1) { + dstRepStride = alignOutter; + srcRepStride = 1; + } else { + dstRepStride = 0; + srcRepStride = 0; + } + TransDataTo5HDParams paramsBack(false, false, repeatTimes, srcRepStride, + dstRepStride); + for (int32_t i = 0; i < alignOutter / NCHW_CONV_ADDR_LIST_SIZE; i++) { + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE / 2; n++) { + transDataTo5HDSrcLocalList[n] = + (uint64_t)tempBuffer[i * NCHW_CONV_ADDR_LIST_SIZE + n * alignOutter] + .GetPhyAddr(); + transDataTo5HDSrcLocalList[n + NCHW_CONV_ADDR_LIST_SIZE / 2] = + (uint64_t)tempBuffer[i * NCHW_CONV_ADDR_LIST_SIZE + + n * alignOutter + oneBlockElementNum] + .GetPhyAddr(); + } + for (int32_t n = 0; n < NCHW_CONV_ADDR_LIST_SIZE / 2; n++) { + transDataTo5HDDstLocalList[n * 2] = + (uint64_t)tempBuffer2[(i * NCHW_CONV_ADDR_LIST_SIZE + n) * + cumProdInfo.inner] + .GetPhyAddr(); + transDataTo5HDDstLocalList[n * 2 + 1] = + (uint64_t)tempBuffer2[(i * NCHW_CONV_ADDR_LIST_SIZE + + (n + NCHW_CONV_ADDR_LIST_SIZE / 2)) * + cumProdInfo.inner] + .GetPhyAddr(); + } + TransDataTo5HD(transDataTo5HDDstLocalList, + transDataTo5HDSrcLocalList, paramsBack); + } + } + PipeBarrier(); + SetMaskCount(); + SetVectorMask(0, cumProdInfo.outter * cumProdInfo.inner); + Muls(dstTensor, tempBuffer2, 1, MASK_PLACEHOLDER, 1, + {1, 1, DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + SetMaskNorm(); + ResetMask(); +} + +template +__aicore__ inline void CumProdFirstDim(const LocalTensor &dstTensor, + const LocalTensor &srcTensor, + LocalTensor &sharedTmpBuffer, + const cumProdInfo &cumProdInfo) { + if constexpr (sizeof(T) == 2) { + const uint32_t minTmpBufferSize = + cumProdInfo.outter * cumProdInfo.inner * sizeof(float); + const uint32_t tmpBufferSize = sharedTmpBuffer.GetSize(); +#if ASCENDC_CPU_DEBUG + ASCENDC_ASSERT((tmpBufferSize >= minTmpBufferSize), { + KERNEL_LOG(KERNEL_ERROR, + "tmpBufferSize can't smaller than minTmpBufferSize, tmpBufferSize is %u, minTmpBufferSize is %u!", + tmpBufferSize, + minTmpBufferSize); + }); +#endif + SetMaskCount(); + SetVectorMask(cumProdInfo.outter * + cumProdInfo.inner); + LocalTensor tmpBuffer = sharedTmpBuffer.ReinterpretCast(); + Cast( + tmpBuffer, srcTensor, RoundMode::CAST_NONE, MASK_PLACEHOLDER, 1, + {1, 1, DEFAULT_REPEAT_STRIDE, HALF_DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + + SetVectorMask(0, cumProdInfo.inner); + const BinaryRepeatParams binaryParams; + for (uint32_t row = 1; row < cumProdInfo.outter; ++row) { + Mul(tmpBuffer[row * cumProdInfo.inner], + tmpBuffer[(row - 1) * cumProdInfo.inner], + tmpBuffer[row * cumProdInfo.inner], MASK_PLACEHOLDER, 1, + binaryParams); + PipeBarrier(); + } + + SetVectorMask(cumProdInfo.outter * cumProdInfo.inner); + Cast( + dstTensor, tmpBuffer, RoundMode::CAST_NONE, MASK_PLACEHOLDER, 1, + {1, 1, HALF_DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + + } else { + SetMaskCount(); + SetVectorMask(0, cumProdInfo.inner); + Muls(dstTensor, srcTensor, 1, MASK_PLACEHOLDER, 1, + {1, 1, DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + const BinaryRepeatParams binaryParams; + for (uint32_t row = 1; row < cumProdInfo.outter; ++row) { + Mul(dstTensor[row * cumProdInfo.inner], + dstTensor[(row - 1) * cumProdInfo.inner], + srcTensor[row * cumProdInfo.inner], MASK_PLACEHOLDER, 1, + binaryParams); + PipeBarrier(); + } + SetMaskNorm(); + ResetMask(); + } +} +} // namespace AscendC +#endif // IMPL_MATH_CUMPROD_IMPL_H \ No newline at end of file diff --git a/impl/math/cumprod/cumprod_tiling.cpp b/impl/math/cumprod/cumprod_tiling.cpp new file mode 100644 index 00000000..ba14dfa4 --- /dev/null +++ b/impl/math/cumprod/cumprod_tiling.cpp @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/* ! + * \file cumprod_tiling.cpp + * \brief + */ +#include "lib/math/cumprod_tiling.h" + +#include + +#include "graph/tensor.h" +#include "impl/host_log.h" +namespace AscendC { +namespace { +constexpr uint32_t CUMPROD_HALF_TMP_SIZE = 6; +constexpr uint32_t CUMPROD_FLOAT_TMP_SIZE = 0; +constexpr uint32_t CUMPROD_ONE_REPEAT_BYTE_SIZE = 256; + +inline uint32_t GetCumProdMaxTmpSize(const uint32_t inputSize, const uint32_t typeSize) +{ + const uint32_t calcPro = (typeSize == sizeof(float)) ? CUMPROD_FLOAT_TMP_SIZE : CUMPROD_HALF_TMP_SIZE; + return calcPro * std::max(inputSize * typeSize, CUMPROD_ONE_REPEAT_BYTE_SIZE); +} + +inline uint32_t GetCumProdMinTmpSize(const uint32_t typeSize) +{ + const uint32_t calcPro = (typeSize == sizeof(float)) ? CUMPROD_FLOAT_TMP_SIZE : CUMPROD_HALF_TMP_SIZE; + return calcPro * CUMPROD_ONE_REPEAT_BYTE_SIZE; +} +} // namespace + +void GetCumPodMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t typeSize, const bool isReuseSource, + uint32_t &maxValue, uint32_t &minValue) +{ + (void)isReuseSource; + const uint32_t inputSize = srcShape.GetShapeSize(); + ASCENDC_HOST_ASSERT(inputSize > 0, return, "Input Shape size must be greater than 0."); + + minValue = GetCumProdMinTmpSize(typeSize); + maxValue = GetCumProdMaxTmpSize(inputSize, typeSize); +} +}// namespace AscendC \ No newline at end of file diff --git a/lib/math/cumprod.h b/lib/math/cumprod.h new file mode 100644 index 00000000..5d452952 --- /dev/null +++ b/lib/math/cumprod.h @@ -0,0 +1,147 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/* ! + * \file cumprod.h + * \brief + */ +#ifndef LIB_MATH_CUMPROD_H +#define LIB_MATH_CUMPROD_H + +#include "kernel_tensor.h" +#include "kernel_operator_intf.h" +#include "../../impl/math/cumprod/cumprod_impl.h" +#if ASCENDC_CPU_DEBUG +#include "kernel_log.h" +#endif +#if __CCE_AICORE__ >= 200 + +namespace AscendC { +#pragma begin_pipe(V) + +constexpr CumProdConfig defaultCumProdConfig = {true, false, true}; + +/* ! + * \brief This function calculates the average based on the orientation of the last axis or fist axis. + * For details about the interface description, see + * https://pytorch.org/docs/stable/generated/torch.cumprod.html + * + * \note support data type: half and float + * + * \param [out] dstTensor, output LocalTensor + * \param [out] lastRowTensor, the last row of the output LocalTensor + * \param [in] srcTensor, input LocalTensor + * \param [in] sharedTmpBuffer, input local temporary Tensor + * \param [in] cumProdInfo, shape information of srcTensor + */ + +template +__aicore__ inline void CumProd(LocalTensor &dstTensor, LocalTensor &lastRowTensor, const LocalTensor &srcTensor, + LocalTensor &sharedTmpBuffer, const CumProdInfo &cumProdInfo) +{ + if ASCEND_IS_AIC { + return; + } + +#if ASCENDC_CPU_DEBUG + bool ans = cumProdInfo.inner > 0 && (cumProdInfo.inner * sizeof(T) % ONE_BLK_SIZE == 0); + ASCENDC_ASSERT(ans, { KERNEL_LOG(KERNEL_ERROR, "inner is %u, is not 32B aligned.", cumProdInfo.inner); }); + ans = srcTensor.GetSize() >= (cumProdInfo.inner * cumProdInfo.outter); + ASCENDC_ASSERT(ans, { KERNEL_LOG(KERNEL_ERROR, "srcTensor size isn't enough!."); }); + ans = dstTensor.GetSize() >= (cumProdInfo.inner * cumProdInfo.outter); + ASCENDC_ASSERT(ans, { KERNEL_LOG(KERNEL_ERROR, "dstTensor size isn't enough!."); }); + if (config.outputLastRow) { + ans = lastRowTensor.GetSize() >= cumProdInfo.inner; + ASCENDC_ASSERT(ans, { KERNEL_LOG(KERNEL_ERROR, "outputLastRow size isn't enough!."); }); + } +#endif + + if constexpr (config.isLastAxis) { + uint32_t minCastTempBufferSize = 0; + if constexpr (sizeof(T) == 2) { + minCastTempBufferSize = cumProdInfo.inner * NCHW_CONV_ADDR_LIST_SIZE * sizeof(half); + } + const uint32_t minTmpBufferSize = minCastTempBufferSize + NCHW_CONV_ADDR_LIST_SIZE * cumProdInfo.inner * + sizeof(T) * 2; + const uint32_t tmpBufferSize = sharedTmpBuffer.GetSize(); +#if ASCENDC_CPU_DEBUG + ASCENDC_ASSERT((tmpBufferSize >= minTmpBufferSize), { + KERNEL_LOG(KERNEL_ERROR, + "tmpBufferSize can't smaller than minTmpBufferSize, tmpBufferSize is %u, minTmpBufferSize is %u!", + tmpBufferSize, + minTmpBufferSize); + }); +#endif + const uint32_t oneRepeateSize = tmpBufferSize / minTmpBufferSize * NCHW_CONV_ADDR_LIST_SIZE; + const uint32_t rangeM = cumProdInfo.outter / oneRepeateSize; + const uint32_t tailM = cumProdInfo.outter - oneRepeateSize * rangeM; + uint32_t dstLocalOffset = 0; + uint32_t srcLocalOffset = 0; + LocalTensor tmpBuffer = sharedTmpBuffer.ReinterpretCast(); + for (uint32_t i = 0; i < rangeM; i++) { + CumProdLastDim( + dstTensor[dstLocalOffset], srcTensor[srcLocalOffset], tmpBuffer, {oneRepeateSize, cumProdInfo.inner}); + dstLocalOffset += cumProdInfo.inner * oneRepeateSize; + srcLocalOffset += cumProdInfo.inner * oneRepeateSize; + } + + if (tailM != 0) { + CumProdLastDim( + dstTensor[dstLocalOffset], srcTensor[srcLocalOffset], tmpBuffer, {tailM, cumProdInfo.inner}); + } + } else { + CumProdFirstDim(dstTensor, srcTensor, sharedTmpBuffer, cumProdInfo); + } + + if constexpr (config.outputLastRow) { + SetMaskCount(); + SetVectorMask(0, cumProdInfo.inner); + Adds(lastRowTensor, + dstTensor[(cumProdInfo.outter - 1) * cumProdInfo.inner], + 0, + MASK_PLACEHOLDER, + 1, + {1, 1, DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); + PipeBarrier(); + SetMaskNorm(); + ResetMask(); + } +} + +/* ! + * \brief This function calculates the average based on the orientation of the last axis or fist axis. + * For details about the interface description, see + * https://pytorch.org/docs/stable/generated/torch.cumprod.html + * + * \note support data type: half and float + * + * \param [out] dstTensor, output LocalTensor + * \param [out] lastRowTensor, the last row of the output LocalTensor + * \param [in] srcTensor, input LocalTensor + * \param [in] cumProdInfo, shape information of srcTensor + */ + +template +__aicore__ inline void CumProd(LocalTensor &dstTensor, LocalTensor &lastRowTensor, const LocalTensor &srcTensor, + const cumProdInfo &cumProdInfo) +{ + if ASCEND_IS_AIC { + return; + } + LocalTensor sharedTmpBuffer; + bool ans = PopStackBuffer(sharedTmpBuffer); + ASCENDC_ASSERT((ans), { KERNEL_LOG(KERNEL_ERROR, "PopStackBuffer Error!"); }); + CumProd(dstTensor, lastRowTensor, srcTensor, sharedTmpBuffer, cumProdInfo); +} + +#pragma end_pipe +} // namespace AscendC + +#endif \ No newline at end of file diff --git a/lib/math/cumprod_tiling.h b/lib/math/cumprod_tiling.h new file mode 100644 index 00000000..d607fd18 --- /dev/null +++ b/lib/math/cumprod_tiling.h @@ -0,0 +1,33 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/* ! + * \file cumprod_tiling.h + * \brief + */ +#ifndef LIB_MATH_CUMPROD_TILING_H +#define LIB_MATH_CUMPROD_TILING_H +#include + +#include "graph/tensor.h" +namespace AscendC { +/* + * @ingroup GetCumProdMaxMinTmpSize + * @brief get cumprod api calculate need max and min temporary local space size + * @param [in] srcShape : src tensor shape + * @param [in] typeSize : src tensor dtype size + * @param [in] isLastAxis : whether to operate along the last axis + * @param [in] isReuseSource : whether to reuse the src Tensor + * @return max temporary local space size + * @return min temporary local space size + */ +void GetCumProdMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t typeSize, const bool isLastAxis, + const bool isReuseSource, uint32_t &maxValue, uint32_t &minValue); +} // namespace AscendC \ No newline at end of file diff --git a/lib/math/cumprod_tiling_intf.h b/lib/math/cumprod_tiling_intf.h new file mode 100644 index 00000000..72515c7c --- /dev/null +++ b/lib/math/cumprod_tiling_intf.h @@ -0,0 +1,23 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/* ! + * \file cumprod_tiling_intf.h + * \brief + */ +#ifndef LIB_MATH_CUMPROD_TILING_INTF_H +#define LIB_MATH_CUMPROD_TILING_INTF_H + +#include "cumprod_tiling.h" +namespace AscendC { +[[deprecated(__FILE__ " is deprecated, please use cumprod_tiling.h instead!")]] +typedef void CumprodTilingDeprecatedHeader; +using LibCumprodTilingInterface = CumprodTilingDeprecatedHeader; +} // namespace AscendC \ No newline at end of file diff --git a/tests/math/test_operator_cumprod.cpp b/tests/math/test_operator_cumprod.cpp new file mode 100644 index 00000000..1f4d29c1 --- /dev/null +++ b/tests/math/test_operator_cumprod.cpp @@ -0,0 +1,95 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file test_operator_cumprod.cpp + * \brief + */ +#include +#include "kernel_operator.h" + +using namespace std; +using namespace AscendC; + +class TEST_CUMPROD : public testing::Test { +protected: + void SetUp() + { + AscendC::SetGCoreType(2); + } + void TearDown() + { + AscendC::SetGCoreType(0); + } +}; + +template +void main_vec_cumprod_demo(__gm__ uint8_t* __restrict__ dstGm, __gm__ uint8_t* __restrict__ lastRawGm, __gm__ uint8_t* __restrict__ srcGm, uint32_t dataSize) +{ + TPipe tpipe; + GlobalTensor input0Global; + GlobalTensor input1Global; + GlobalTensor output0Global; + GlobalTensor output1Global; + input0Global.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(srcGm), dataSize); + output0Global.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(dstGm), dataSize); + output1Global.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(lastRawGm), dataSize); + + TBuf tbuf1; + tpipe.InitBuffer(tbuf1, dataSize * sizeof(T)); + LocalTensor input0Local = tbuf1.Get(); + + TBuf tbuf2; + tpipe.InitBuffer(tbuf2, dataSize * sizeof(T)); + LocalTensor input1Local = tbuf2.Get(); + + LocalTensor tmpLocal; + + TBuf tbuf3; + tpipe.InitBuffer(tbuf3, dataSize * sizeof(T)); + LocalTensor output0Local = tbuf3.Get(); + + TBuf tbuf4; + tpipe.InitBuffer(tbuf4, dataSize * sizeof(T)); + LocalTensor output1Local = tbuf4.Get(); + + DataCopy(input0Local, input0Global, dataSize); + + set_flag(PIPE_MTE2, PIPE_V, EVENT_ID0); + wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID0); + + CumProd(output0Local, output1Local, input0Local, CumProdInfo{1, dataSize}); + + set_flag(PIPE_V, PIPE_MTE3, EVENT_ID0); + wait_flag(PIPE_V, PIPE_MTE3, EVENT_ID0); + + DataCopy(output0Global, output0Local, dataSize); + DataCopy(output1Global, output1Local, dataSize); + + pipe_barrier(PIPE_ALL); +} +#define CUMPROD_TESTCASE(DATA_TYPE) \ + TEST_F(TEST_CUMPROD, CUMPROD##DATA_TYPE##Case) \ + { \ + uint32_t dataSize = 256; \ + uint32_t sel_mask_size = dataSize / AscendCUtils::GetBitSize(sizeof(uint8_t)); \ + uint8_t input0Gm[dataSize * sizeof(DATA_TYPE)]; \ + uint8_t outputGm[dataSize * sizeof(DATA_TYPE)]; \ + uint8_t lastRawGm[dataSize * sizeof(DATA_TYPE)]; \ + \ + main_vec_cumprod_demo(outputGm, lastRawGm, inputGm, dataSize); \ + \ + for (uint32_t i = 0; i < dataSize; i++) { \ + EXPECT_EQ(outputGm[i], 0x00); \ + EXPECT_EQ(lastRawGm[i], 0x00); \ + } \ + } +CUMPROD_TESTCASE(half); +CUMPROD_TESTCASE(float); \ No newline at end of file diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index c72d121b..26a39b7a 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -3094,3 +3094,26 @@ TEST_F(TestTiling, tiling_compute_error) ret = bmm_tiling.Compute(); EXPECT_EQ(ret, -1); } + +TEST_F(TestTiling, TestCumProdTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto CumProdShape = ge::Shape(shapeDims); + uint32_t CumProdNeedMaxSize; + uint32_t CumProdNeedMinSize; + GetCumProdMaxMinTmpSize(CumProdShape, 2, true, CumProdNeedMaxSize, CumProdNeedMinSize); + EXPECT_EQ(CumProdNeedMaxSize, 131072); + EXPECT_EQ(CumProdNeedMinSize, 1024); + GetCumProdMaxMinTmpSize(CumProdShape, 2, true, CumProdNeedMaxSize, CumProdNeedMinSize); +} + +TEST_F(TestTiling, TestCumProdTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto CumProdShape = ge::Shape(shapeDims); + uint32_t CumProdNeedMaxSize; + uint32_t CumProdNeedMinSize; + GetCumProdMaxMinTmpSize(CumProdShape, 4, true, CumProdNeedMaxSize, CumProdNeedMinSize); + EXPECT_EQ(CumProdNeedMaxSize, 0); + EXPECT_EQ(CumProdNeedMinSize, 0); +} -- Gitee From c76f9f357c5161c22a46e93f62fa5ab39e514db3 Mon Sep 17 00:00:00 2001 From: sheinilya Date: Thu, 21 Nov 2024 20:10:17 +0800 Subject: [PATCH 2/4] Add empty line --- impl/math/cumprod/cumprod_impl.h | 2 +- impl/math/cumprod/cumprod_tiling.cpp | 2 +- lib/math/cumprod.h | 2 +- lib/math/cumprod_tiling.h | 2 +- lib/math/cumprod_tiling_intf.h | 2 +- tests/math/test_operator_cumprod.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/impl/math/cumprod/cumprod_impl.h b/impl/math/cumprod/cumprod_impl.h index 313ac1e5..34515324 100644 --- a/impl/math/cumprod/cumprod_impl.h +++ b/impl/math/cumprod/cumprod_impl.h @@ -405,4 +405,4 @@ __aicore__ inline void CumProdFirstDim(const LocalTensor &dstTensor, } } } // namespace AscendC -#endif // IMPL_MATH_CUMPROD_IMPL_H \ No newline at end of file +#endif // IMPL_MATH_CUMPROD_IMPL_H diff --git a/impl/math/cumprod/cumprod_tiling.cpp b/impl/math/cumprod/cumprod_tiling.cpp index ba14dfa4..a8f009cf 100644 --- a/impl/math/cumprod/cumprod_tiling.cpp +++ b/impl/math/cumprod/cumprod_tiling.cpp @@ -47,4 +47,4 @@ void GetCumPodMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t typeSize, minValue = GetCumProdMinTmpSize(typeSize); maxValue = GetCumProdMaxTmpSize(inputSize, typeSize); } -}// namespace AscendC \ No newline at end of file +}// namespace AscendC diff --git a/lib/math/cumprod.h b/lib/math/cumprod.h index 5d452952..98fb54ea 100644 --- a/lib/math/cumprod.h +++ b/lib/math/cumprod.h @@ -144,4 +144,4 @@ __aicore__ inline void CumProd(LocalTensor &dstTensor, LocalTensor &lastRo #pragma end_pipe } // namespace AscendC -#endif \ No newline at end of file +#endif diff --git a/lib/math/cumprod_tiling.h b/lib/math/cumprod_tiling.h index d607fd18..e623c552 100644 --- a/lib/math/cumprod_tiling.h +++ b/lib/math/cumprod_tiling.h @@ -30,4 +30,4 @@ namespace AscendC { */ void GetCumProdMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t typeSize, const bool isLastAxis, const bool isReuseSource, uint32_t &maxValue, uint32_t &minValue); -} // namespace AscendC \ No newline at end of file +} // namespace AscendC diff --git a/lib/math/cumprod_tiling_intf.h b/lib/math/cumprod_tiling_intf.h index 72515c7c..6ce8c05d 100644 --- a/lib/math/cumprod_tiling_intf.h +++ b/lib/math/cumprod_tiling_intf.h @@ -20,4 +20,4 @@ namespace AscendC { [[deprecated(__FILE__ " is deprecated, please use cumprod_tiling.h instead!")]] typedef void CumprodTilingDeprecatedHeader; using LibCumprodTilingInterface = CumprodTilingDeprecatedHeader; -} // namespace AscendC \ No newline at end of file +} // namespace AscendC diff --git a/tests/math/test_operator_cumprod.cpp b/tests/math/test_operator_cumprod.cpp index 1f4d29c1..4598c100 100644 --- a/tests/math/test_operator_cumprod.cpp +++ b/tests/math/test_operator_cumprod.cpp @@ -92,4 +92,4 @@ void main_vec_cumprod_demo(__gm__ uint8_t* __restrict__ dstGm, __gm__ uint8_t* _ } \ } CUMPROD_TESTCASE(half); -CUMPROD_TESTCASE(float); \ No newline at end of file +CUMPROD_TESTCASE(float); -- Gitee From 1dd9fdbcb33bbfde3121781f1fae97802b402c6a Mon Sep 17 00:00:00 2001 From: sheinilya Date: Thu, 21 Nov 2024 20:38:31 +0800 Subject: [PATCH 3/4] Add kernel operator --- cmake/kernel_headers.cmake | 8 ++++++-- cmake/tiling_headers.cmake | 4 ++-- lib/math/kernel_operator_cumprod_intf.h | 24 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 lib/math/kernel_operator_cumprod_intf.h diff --git a/cmake/kernel_headers.cmake b/cmake/kernel_headers.cmake index 22010a38..0a9724ac 100644 --- a/cmake/kernel_headers.cmake +++ b/cmake/kernel_headers.cmake @@ -199,7 +199,11 @@ file(CREATE_LINK ../activation/geglu_tiling.h # cumprod file(MAKE_DIRECTORY ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod) -file(CREATE_LINK ../scan/kernel_operator_cumprod_intf.h +file(CREATE_LINK ../math/kernel_operator_cumprod_intf.h ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod/kernel_operator_cumprod_intf.h SYMBOLIC) -file(CREATE_LINK ../sacn/cumprod.h +file(CREATE_LINK ../math/cumprod.h ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod/cumprod.h SYMBOLIC) +file(CREATE_LINK ../math/cumprod_tiling_intf.h + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod/cumprod_tiling_intf.h SYMBOLIC) +file(CREATE_LINK ../math/cumprod_tiling.h + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/lib/cumprod/cumprod_tiling.h SYMBOLIC) diff --git a/cmake/tiling_headers.cmake b/cmake/tiling_headers.cmake index b2579768..698741da 100644 --- a/cmake/tiling_headers.cmake +++ b/cmake/tiling_headers.cmake @@ -237,6 +237,6 @@ file(CREATE_LINK ../lib/tiling_api.h # cumprod file(MAKE_DIRECTORY ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/cumprod) file(CREATE_LINK ../../lib/math/cumprod_tiling.h - ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/math/cumprod_tiling.h SYMBOLIC) + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/cumprod/cumprod_tiling.h SYMBOLIC) file(CREATE_LINK ../../lib/math/cumprod_tiling_intf.h - ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/math/cumprod_tiling_intf.h SYMBOLIC) + ${ASCENDC_INSTALL_BASE_PATH}/ascendc/include/highlevel_api/tiling/cumprod/cumprod_tiling_intf.h SYMBOLIC) diff --git a/lib/math/kernel_operator_cumprod_intf.h b/lib/math/kernel_operator_cumprod_intf.h new file mode 100644 index 00000000..e8925e00 --- /dev/null +++ b/lib/math/kernel_operator_cumprod_intf.h @@ -0,0 +1,24 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file kernel_operator_cumprod_intf.h + * \brief + */ +#ifndef ASCENDC_MODULE_OPERATOR_CUMPROD_INTERFACE_H +#define ASCENDC_MODULE_OPERATOR_CUMPROD_INTERFACE_H + +#include "cumprod.h" + +namespace AscendC { +[[deprecated(__FILE__ " is deprecated, please use cumprod.h instead!")]] typedef void using_deprecated_header_h; +using ASCENDC_MODULE_OPERATOR_CUMPROD_INTERFACE = using_deprecated_header_h; +} // namespace AscendC +#endif // ASCENDC_MODULE_OPERATOR_CUMPROD_INTERFACE_H -- Gitee From f5157d55796a4c2ccb300d52526255093e38ced4 Mon Sep 17 00:00:00 2001 From: sheinilya Date: Fri, 22 Nov 2024 15:17:22 +0800 Subject: [PATCH 4/4] small fix --- impl/math/cumprod/cumprod_impl.h | 4 ++-- impl/math/cumprod/cumprod_tiling.cpp | 2 +- lib/math/cumprod.h | 6 +++--- lib/math/cumprod_tiling.h | 2 +- lib/math/cumprod_tiling_intf.h | 2 +- lib/math/kernel_operator_cumprod_intf.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/impl/math/cumprod/cumprod_impl.h b/impl/math/cumprod/cumprod_impl.h index 34515324..e9861454 100644 --- a/impl/math/cumprod/cumprod_impl.h +++ b/impl/math/cumprod/cumprod_impl.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the @@ -405,4 +405,4 @@ __aicore__ inline void CumProdFirstDim(const LocalTensor &dstTensor, } } } // namespace AscendC -#endif // IMPL_MATH_CUMPROD_IMPL_H +#endif // IMPL_MATH_CUMPROD_CUMPROD_IMPL_H diff --git a/impl/math/cumprod/cumprod_tiling.cpp b/impl/math/cumprod/cumprod_tiling.cpp index a8f009cf..f2e693de 100644 --- a/impl/math/cumprod/cumprod_tiling.cpp +++ b/impl/math/cumprod/cumprod_tiling.cpp @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). diff --git a/lib/math/cumprod.h b/lib/math/cumprod.h index 98fb54ea..09d2e68a 100644 --- a/lib/math/cumprod.h +++ b/lib/math/cumprod.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). @@ -103,9 +103,9 @@ __aicore__ inline void CumProd(LocalTensor &dstTensor, LocalTensor &lastRo if constexpr (config.outputLastRow) { SetMaskCount(); SetVectorMask(0, cumProdInfo.inner); - Adds(lastRowTensor, + Muls(lastRowTensor, dstTensor[(cumProdInfo.outter - 1) * cumProdInfo.inner], - 0, + 1, MASK_PLACEHOLDER, 1, {1, 1, DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE}); diff --git a/lib/math/cumprod_tiling.h b/lib/math/cumprod_tiling.h index e623c552..23b06272 100644 --- a/lib/math/cumprod_tiling.h +++ b/lib/math/cumprod_tiling.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). diff --git a/lib/math/cumprod_tiling_intf.h b/lib/math/cumprod_tiling_intf.h index 6ce8c05d..4409b4a1 100644 --- a/lib/math/cumprod_tiling_intf.h +++ b/lib/math/cumprod_tiling_intf.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). diff --git a/lib/math/kernel_operator_cumprod_intf.h b/lib/math/kernel_operator_cumprod_intf.h index e8925e00..db041a9f 100644 --- a/lib/math/kernel_operator_cumprod_intf.h +++ b/lib/math/kernel_operator_cumprod_intf.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2024 Huawei Technologies Co., Ltd. * This file is a part of the CANN Open Software. * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). -- Gitee