diff --git a/docs/README.md b/docs/README.md
index 0e9c4ad855c195cd90a58f1b80b8004b8aa43177..3e2685829f7c1aff31324ea0b345802400d5036a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,6 +8,15 @@
类别 |
API |
描述 |
+
+ 数学库 |
+ Fmod |
+ 按元素计算两个浮点数相除后的余数。 |
+
+
+ Trunc |
+ 按元素做浮点数截断操作,即向零取整操作。 |
+
量化反量化 |
AscendAntiQuant |
diff --git a/impl/common/check.h b/impl/common/check.h
new file mode 100644
index 0000000000000000000000000000000000000000..a095d86ed1acb598beeedaca58ef299a80d634c1
--- /dev/null
+++ b/impl/common/check.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#ifndef IMPL_COMMON_CHECK_H
+#define IMPL_COMMON_CHECK_H
+
+namespace AscendC {
+
+template
+__aicore__ inline void CheckTensorPosition(const LocalTensor &checkTensor, __gm__ const char* tensorInfo,
+ __gm__ const char* supportPosInfo)
+{
+#if ASCENDC_CPU_DEBUG
+ ASCENDC_ASSERT(((TPosition)checkTensor.GetPosition() == TPosition::VECIN ||
+ (TPosition)checkTensor.GetPosition() == TPosition::VECOUT ||
+ (TPosition)checkTensor.GetPosition() == TPosition::VECCALC), {
+ KERNEL_LOG(KERNEL_ERROR,
+ "Failed to check tensor position of %s, current api support positions are %s, current position is %s.",
+ tensorInfo, supportPosInfo,
+ ConstDefiner::Instance().logicNameMap.at(static_cast(checkTensor.GetPosition())).c_str());
+ });
+#endif
+}
+
+template
+__aicore__ inline void CheckCalCount(const uint32_t calCount, __gm__ const char* calCountInfo,
+ const LocalTensor &checkTensor, __gm__ const char* tensorInfo, __gm__ const char* apiInfo)
+{
+#if ASCENDC_CPU_DEBUG
+ ASCENDC_ASSERT((calCount <= checkTensor.GetSize()), {
+ KERNEL_LOG(KERNEL_ERROR,
+ "The %s parameter cannot be %u, should not be larger than %s size %u in %s.", calCountInfo, calCount,
+ tensorInfo, checkTensor.GetSize(), apiInfo);
+ });
+#endif
+}
+
+__aicore__ inline void CheckTmpBufferSize(const uint32_t checkBufferSize, const uint32_t compBufferSize,
+ const uint32_t tmpBufferSize)
+{
+ ASCENDC_ASSERT((checkBufferSize > compBufferSize), {
+ KERNEL_LOG( KERNEL_ERROR, "Insufficient temporary space, current operation is not enough, "
+ "but only %u units are available, please check the host tiling.", tmpBufferSize);
+ });
+}
+
+} // namespace AscendC
+#endif // IMPL_COMMON_CHECK_H
\ No newline at end of file
diff --git a/impl/math/fmod/fmod_common_impl.h b/impl/math/fmod/fmod_common_impl.h
index 8cbdf2a53267c6bbed6617d031b2176ecbaeeb19..264afdebbff4c93ec9bb5a49ceac2bee4cbb8fcf 100644
--- a/impl/math/fmod/fmod_common_impl.h
+++ b/impl/math/fmod/fmod_common_impl.h
@@ -15,6 +15,7 @@
#ifndef IMPL_MATH_FMOD_FMOD_COMMON_IMPL_H
#define IMPL_MATH_FMOD_FMOD_COMMON_IMPL_H
#include "kernel_tensor.h"
+#include "../../common/check.h"
namespace AscendC {
namespace {
@@ -73,11 +74,20 @@ __aicore__ inline void FmodImpl(const LocalTensor &dstTensor, const LocalTens
return;
}
+ CheckTensorPosition(dstTensor, "dstTensor", "VECIN, VECOUT, VECCALC");
+ CheckTensorPosition(src0Tensor, "src0Tensor", "VECIN, VECOUT, VECCALC");
+ CheckTensorPosition(src1Tensor, "src1Tensor", "VECIN, VECOUT, VECCALC");
+ CheckTensorPosition(sharedTmpBuffer, "sharedTmpBuffer", "VECIN, VECOUT, VECCALC");
+
+ CheckCalCount(calCount, "calCount", src0Tensor, "src0Tensor", "Fmod");
+ CheckCalCount(calCount, "calCount", src1Tensor, "src1Tensor", "Fmod");
+ CheckCalCount(calCount, "calCount", dstTensor, "dstTensor", "Fmod");
+
+ ASCENDC_ASSERT((std::is_same::value || std::is_same::value), {
+ KERNEL_LOG( KERNEL_ERROR, "Failed to check the data types, current api support data types are half/float."); });
+
ASCENDC_ASSERT((src0Tensor.GetSize() == src1Tensor.GetSize()),
{ KERNEL_LOG(KERNEL_ERROR, "Input params.GetSize must be equal with each other!"); });
- ASCENDC_ASSERT((src0Tensor.GetSize() >= calCount && dstTensor.GetSize() >= calCount), {
- KERNEL_LOG(KERNEL_ERROR, "Size of calCount should be less than or equal to size of Input and Output!");
- });
if constexpr (sizeof(T) == sizeof(float)) {
FmodCompute(dstTensor, src0Tensor, src1Tensor, sharedTmpBuffer, src0Tensor.GetSize(), calCount);
@@ -85,9 +95,10 @@ __aicore__ inline void FmodImpl(const LocalTensor &dstTensor, const LocalTens
}
constexpr uint32_t maxLiveNodeCnt = 8; // The corresponding maxLiveNodeCnt for half is 8, extra is 3 * 2 + trunc 2.
+ uint32_t bufferSize = sharedTmpBuffer.GetSize();
uint32_t stackSize =
- sharedTmpBuffer.GetSize() / sizeof(T) / maxLiveNodeCnt / ONE_BLK_SIZE * ONE_BLK_SIZE; // divided by how many counts
- ASCENDC_ASSERT((stackSize > 0), { KERNEL_LOG(KERNEL_ERROR, "stackSize must > 0!"); });
+ bufferSize / sizeof(T) / maxLiveNodeCnt / ONE_BLK_SIZE * ONE_BLK_SIZE; // divided by how many counts
+ CheckTmpBufferSize(stackSize, 0, bufferSize);
ASCENDC_ASSERT((src0Tensor.GetSize() > 0), { KERNEL_LOG(KERNEL_ERROR, "src0Tensor size must > 0!"); });
stackSize = stackSize > src0Tensor.GetSize() ? src0Tensor.GetSize() : stackSize; // No more than localTensor
diff --git a/impl/math/trunc/trunc_common_impl.h b/impl/math/trunc/trunc_common_impl.h
index f05e83ff5a487f442c1be274c592887b81b1fd68..6c5682ca4eb80ea67a71d36b0a2c2d2e748c3de5 100644
--- a/impl/math/trunc/trunc_common_impl.h
+++ b/impl/math/trunc/trunc_common_impl.h
@@ -10,6 +10,7 @@
#ifndef IMPL_MATH_TRUNC_TRUNC_COMMON_IMPL_H
#define IMPL_MATH_TRUNC_TRUNC_COMMON_IMPL_H
#include "kernel_tensor.h"
+#include "../../common/check.h"
#if __CCE_AICORE__ == 200
#include "trunc_v200_impl.h"
#elif __CCE_AICORE__ == 220
@@ -50,14 +51,25 @@ __aicore__ inline void TruncImpl(const LocalTensor& dstTensor, const LocalTen
return;
}
- uint32_t splitCount = sharedTmpBuffer.GetSize() / sizeof(T);
+ CheckTensorPosition(dstTensor, "dstTensor", "VECIN, VECOUT, VECCALC");
+ CheckTensorPosition(srcTensor, "srcTensor", "VECIN, VECOUT, VECCALC");
+ CheckTensorPosition(sharedTmpBuffer, "sharedTmpBuffer", "VECIN, VECOUT, VECCALC");
+
+ CheckCalCount(calCount, "calCount", srcTensor, "srcTensor", "Trunc");
+ CheckCalCount(calCount, "calCount", dstTensor, "dstTensor", "Trunc");
+
+ ASCENDC_ASSERT((std::is_same::value || std::is_same::value), {
+ KERNEL_LOG( KERNEL_ERROR, "Failed to check the data types, current api support data types are half/float."); });
+
+ uint32_t tmpBufferSize = sharedTmpBuffer.GetSize();
+ uint32_t splitCount = tmpBufferSize / sizeof(T);
constexpr uint32_t TRUNC_HALF_CALC_PROCEDURE = 2;
if constexpr (sizeof(T) == sizeof(half)) {
splitCount = splitCount / TRUNC_HALF_CALC_PROCEDURE / ONE_BLK_SIZE * ONE_BLK_SIZE;
} else {
splitCount = splitCount / ONE_BLK_SIZE * ONE_BLK_SIZE;
}
- ASCENDC_ASSERT((splitCount > 0), { KERNEL_LOG(KERNEL_ERROR, "splitCount must > 0!"); });
+ CheckTmpBufferSize(splitCount, 0, tmpBufferSize);
uint32_t loopCount = calCount / splitCount;
uint32_t calcTail = calCount % splitCount;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 32a937e9812c59bbfb15c8ab1f29ce16e683c8da..7fdfe0e51dacc7f62ca2c28d13553ef387f78d38 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -175,6 +175,8 @@ foreach(product_type ${PRODUCT_TYPE_LIST})
${ASCENDC_TEST_${product_type}_CASE_SRC_FILES}
)
+ add_dependencies(ascendc_utest_${product_type} gen_kernel_tiling)
+
# add soc version flags
if(${product_type} STREQUAL "ascend610")
target_compile_definitions(ascendc_utest_${product_type} PRIVATE