From 82e41b7f20770cc9e31f017ba8a9a0f8a64660cc Mon Sep 17 00:00:00 2001 From: PengC Date: Wed, 19 Feb 2025 11:12:09 +0800 Subject: [PATCH] =?UTF-8?q?=E9=AB=98=E9=98=B6api=E6=B7=BB=E5=8A=A0DFX?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/README.md | 9 +++++ impl/common/check.h | 54 +++++++++++++++++++++++++++++ impl/math/fmod/fmod_common_impl.h | 21 ++++++++--- impl/math/trunc/trunc_common_impl.h | 16 +++++++-- tests/CMakeLists.txt | 2 ++ 5 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 impl/common/check.h diff --git a/docs/README.md b/docs/README.md index 0e9c4ad8..3e268582 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,6 +8,15 @@ 类别 API 描述 + + 数学库 + Fmod + 按元素计算两个浮点数相除后的余数。 + + + Trunc + 按元素做浮点数截断操作,即向零取整操作。 + 量化反量化 AscendAntiQuant diff --git a/impl/common/check.h b/impl/common/check.h new file mode 100644 index 00000000..a095d86e --- /dev/null +++ b/impl/common/check.h @@ -0,0 +1,54 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#ifndef IMPL_COMMON_CHECK_H +#define IMPL_COMMON_CHECK_H + +namespace AscendC { + +template +__aicore__ inline void CheckTensorPosition(const LocalTensor &checkTensor, __gm__ const char* tensorInfo, + __gm__ const char* supportPosInfo) +{ +#if ASCENDC_CPU_DEBUG + ASCENDC_ASSERT(((TPosition)checkTensor.GetPosition() == TPosition::VECIN || + (TPosition)checkTensor.GetPosition() == TPosition::VECOUT || + (TPosition)checkTensor.GetPosition() == TPosition::VECCALC), { + KERNEL_LOG(KERNEL_ERROR, + "Failed to check tensor position of %s, current api support positions are %s, current position is %s.", + tensorInfo, supportPosInfo, + ConstDefiner::Instance().logicNameMap.at(static_cast(checkTensor.GetPosition())).c_str()); + }); +#endif +} + +template +__aicore__ inline void CheckCalCount(const uint32_t calCount, __gm__ const char* calCountInfo, + const LocalTensor &checkTensor, __gm__ const char* tensorInfo, __gm__ const char* apiInfo) +{ +#if ASCENDC_CPU_DEBUG + ASCENDC_ASSERT((calCount <= checkTensor.GetSize()), { + KERNEL_LOG(KERNEL_ERROR, + "The %s parameter cannot be %u, should not be larger than %s size %u in %s.", calCountInfo, calCount, + tensorInfo, checkTensor.GetSize(), apiInfo); + }); +#endif +} + +__aicore__ inline void CheckTmpBufferSize(const uint32_t checkBufferSize, const uint32_t compBufferSize, + const uint32_t tmpBufferSize) +{ + ASCENDC_ASSERT((checkBufferSize > compBufferSize), { + KERNEL_LOG( KERNEL_ERROR, "Insufficient temporary space, current operation is not enough, " + "but only %u units are available, please check the host tiling.", tmpBufferSize); + }); +} + +} // namespace AscendC +#endif // IMPL_COMMON_CHECK_H \ No newline at end of file diff --git a/impl/math/fmod/fmod_common_impl.h b/impl/math/fmod/fmod_common_impl.h index 8cbdf2a5..264afdeb 100644 --- a/impl/math/fmod/fmod_common_impl.h +++ b/impl/math/fmod/fmod_common_impl.h @@ -15,6 +15,7 @@ #ifndef IMPL_MATH_FMOD_FMOD_COMMON_IMPL_H #define IMPL_MATH_FMOD_FMOD_COMMON_IMPL_H #include "kernel_tensor.h" +#include "../../common/check.h" namespace AscendC { namespace { @@ -73,11 +74,20 @@ __aicore__ inline void FmodImpl(const LocalTensor &dstTensor, const LocalTens return; } + CheckTensorPosition(dstTensor, "dstTensor", "VECIN, VECOUT, VECCALC"); + CheckTensorPosition(src0Tensor, "src0Tensor", "VECIN, VECOUT, VECCALC"); + CheckTensorPosition(src1Tensor, "src1Tensor", "VECIN, VECOUT, VECCALC"); + CheckTensorPosition(sharedTmpBuffer, "sharedTmpBuffer", "VECIN, VECOUT, VECCALC"); + + CheckCalCount(calCount, "calCount", src0Tensor, "src0Tensor", "Fmod"); + CheckCalCount(calCount, "calCount", src1Tensor, "src1Tensor", "Fmod"); + CheckCalCount(calCount, "calCount", dstTensor, "dstTensor", "Fmod"); + + ASCENDC_ASSERT((std::is_same::value || std::is_same::value), { + KERNEL_LOG( KERNEL_ERROR, "Failed to check the data types, current api support data types are half/float."); }); + ASCENDC_ASSERT((src0Tensor.GetSize() == src1Tensor.GetSize()), { KERNEL_LOG(KERNEL_ERROR, "Input params.GetSize must be equal with each other!"); }); - ASCENDC_ASSERT((src0Tensor.GetSize() >= calCount && dstTensor.GetSize() >= calCount), { - KERNEL_LOG(KERNEL_ERROR, "Size of calCount should be less than or equal to size of Input and Output!"); - }); if constexpr (sizeof(T) == sizeof(float)) { FmodCompute(dstTensor, src0Tensor, src1Tensor, sharedTmpBuffer, src0Tensor.GetSize(), calCount); @@ -85,9 +95,10 @@ __aicore__ inline void FmodImpl(const LocalTensor &dstTensor, const LocalTens } constexpr uint32_t maxLiveNodeCnt = 8; // The corresponding maxLiveNodeCnt for half is 8, extra is 3 * 2 + trunc 2. + uint32_t bufferSize = sharedTmpBuffer.GetSize(); uint32_t stackSize = - sharedTmpBuffer.GetSize() / sizeof(T) / maxLiveNodeCnt / ONE_BLK_SIZE * ONE_BLK_SIZE; // divided by how many counts - ASCENDC_ASSERT((stackSize > 0), { KERNEL_LOG(KERNEL_ERROR, "stackSize must > 0!"); }); + bufferSize / sizeof(T) / maxLiveNodeCnt / ONE_BLK_SIZE * ONE_BLK_SIZE; // divided by how many counts + CheckTmpBufferSize(stackSize, 0, bufferSize); ASCENDC_ASSERT((src0Tensor.GetSize() > 0), { KERNEL_LOG(KERNEL_ERROR, "src0Tensor size must > 0!"); }); stackSize = stackSize > src0Tensor.GetSize() ? src0Tensor.GetSize() : stackSize; // No more than localTensor diff --git a/impl/math/trunc/trunc_common_impl.h b/impl/math/trunc/trunc_common_impl.h index f05e83ff..6c5682ca 100644 --- a/impl/math/trunc/trunc_common_impl.h +++ b/impl/math/trunc/trunc_common_impl.h @@ -10,6 +10,7 @@ #ifndef IMPL_MATH_TRUNC_TRUNC_COMMON_IMPL_H #define IMPL_MATH_TRUNC_TRUNC_COMMON_IMPL_H #include "kernel_tensor.h" +#include "../../common/check.h" #if __CCE_AICORE__ == 200 #include "trunc_v200_impl.h" #elif __CCE_AICORE__ == 220 @@ -50,14 +51,25 @@ __aicore__ inline void TruncImpl(const LocalTensor& dstTensor, const LocalTen return; } - uint32_t splitCount = sharedTmpBuffer.GetSize() / sizeof(T); + CheckTensorPosition(dstTensor, "dstTensor", "VECIN, VECOUT, VECCALC"); + CheckTensorPosition(srcTensor, "srcTensor", "VECIN, VECOUT, VECCALC"); + CheckTensorPosition(sharedTmpBuffer, "sharedTmpBuffer", "VECIN, VECOUT, VECCALC"); + + CheckCalCount(calCount, "calCount", srcTensor, "srcTensor", "Trunc"); + CheckCalCount(calCount, "calCount", dstTensor, "dstTensor", "Trunc"); + + ASCENDC_ASSERT((std::is_same::value || std::is_same::value), { + KERNEL_LOG( KERNEL_ERROR, "Failed to check the data types, current api support data types are half/float."); }); + + uint32_t tmpBufferSize = sharedTmpBuffer.GetSize(); + uint32_t splitCount = tmpBufferSize / sizeof(T); constexpr uint32_t TRUNC_HALF_CALC_PROCEDURE = 2; if constexpr (sizeof(T) == sizeof(half)) { splitCount = splitCount / TRUNC_HALF_CALC_PROCEDURE / ONE_BLK_SIZE * ONE_BLK_SIZE; } else { splitCount = splitCount / ONE_BLK_SIZE * ONE_BLK_SIZE; } - ASCENDC_ASSERT((splitCount > 0), { KERNEL_LOG(KERNEL_ERROR, "splitCount must > 0!"); }); + CheckTmpBufferSize(splitCount, 0, tmpBufferSize); uint32_t loopCount = calCount / splitCount; uint32_t calcTail = calCount % splitCount; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 32a937e9..7fdfe0e5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -175,6 +175,8 @@ foreach(product_type ${PRODUCT_TYPE_LIST}) ${ASCENDC_TEST_${product_type}_CASE_SRC_FILES} ) + add_dependencies(ascendc_utest_${product_type} gen_kernel_tiling) + # add soc version flags if(${product_type} STREQUAL "ascend610") target_compile_definitions(ascendc_utest_${product_type} PRIVATE -- Gitee