From 82e41b7f20770cc9e31f017ba8a9a0f8a64660cc Mon Sep 17 00:00:00 2001
From: PengC <chupeng5@huawei.com>
Date: Wed, 19 Feb 2025 11:12:09 +0800
Subject: [PATCH] =?UTF-8?q?=E9=AB=98=E9=98=B6api=E6=B7=BB=E5=8A=A0DFX?=
 =?UTF-8?q?=E4=BF=A1=E6=81=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/README.md                      |  9 +++++
 impl/common/check.h                 | 54 +++++++++++++++++++++++++++++
 impl/math/fmod/fmod_common_impl.h   | 21 ++++++++---
 impl/math/trunc/trunc_common_impl.h | 16 +++++++--
 tests/CMakeLists.txt                |  2 ++
 5 files changed, 95 insertions(+), 7 deletions(-)
 create mode 100644 impl/common/check.h
diff --git a/docs/README.md b/docs/README.md
index 0e9c4ad8..3e268582 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,6 +8,15 @@
     <td> 类别 </td>
     <td> API </td>
     <td> 描述 </td>
+    <tr>
+        <th rowspan="2"> 数学库 </th>
+        <td> Fmod </td>
+        <td> 按元素计算两个浮点数相除后的余数。 </td>
+    </tr>
+    <tr>
+        <td> Trunc </td>
+        <td> 按元素做浮点数截断操作，即向零取整操作。 </td>
+    </tr>
     <tr>
         <th rowspan="3"> 量化反量化 </th>
         <td> AscendAntiQuant </td>
diff --git a/impl/common/check.h b/impl/common/check.h
new file mode 100644
index 00000000..a095d86e
--- /dev/null
+++ b/impl/common/check.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#ifndef IMPL_COMMON_CHECK_H
+#define IMPL_COMMON_CHECK_H
+
+namespace AscendC {
+
+template <typename T>
+__aicore__ inline void CheckTensorPosition(const LocalTensor<T> &checkTensor, __gm__ const char* tensorInfo,
+    __gm__ const char* supportPosInfo)
+{
+#if ASCENDC_CPU_DEBUG
+    ASCENDC_ASSERT(((TPosition)checkTensor.GetPosition() == TPosition::VECIN ||
+        (TPosition)checkTensor.GetPosition() == TPosition::VECOUT ||
+        (TPosition)checkTensor.GetPosition() == TPosition::VECCALC), {
+        KERNEL_LOG(KERNEL_ERROR,
+            "Failed to check tensor position of %s, current api support positions are %s, current position is %s.",
+            tensorInfo, supportPosInfo,
+            ConstDefiner::Instance().logicNameMap.at(static_cast<uint8_t>(checkTensor.GetPosition())).c_str());
+    });
+#endif
+}
+
+template <typename T>
+__aicore__ inline void CheckCalCount(const uint32_t calCount, __gm__ const char* calCountInfo,
+    const LocalTensor<T> &checkTensor, __gm__ const char* tensorInfo, __gm__ const char* apiInfo)
+{
+#if ASCENDC_CPU_DEBUG
+    ASCENDC_ASSERT((calCount <= checkTensor.GetSize()), {
+        KERNEL_LOG(KERNEL_ERROR,
+            "The %s parameter cannot be %u, should not be larger than %s size %u in %s.", calCountInfo, calCount,
+            tensorInfo, checkTensor.GetSize(), apiInfo);
+    });
+#endif
+}
+
+__aicore__ inline void CheckTmpBufferSize(const uint32_t checkBufferSize, const uint32_t compBufferSize,
+    const uint32_t tmpBufferSize)
+{
+    ASCENDC_ASSERT((checkBufferSize > compBufferSize), {
+        KERNEL_LOG( KERNEL_ERROR, "Insufficient temporary space, current operation is not enough, "
+            "but only %u units are available, please check the host tiling.", tmpBufferSize);
+    });
+}
+
+} // namespace AscendC
+#endif // IMPL_COMMON_CHECK_H
\ No newline at end of file
diff --git a/impl/math/fmod/fmod_common_impl.h b/impl/math/fmod/fmod_common_impl.h
index 8cbdf2a5..264afdeb 100644
--- a/impl/math/fmod/fmod_common_impl.h
+++ b/impl/math/fmod/fmod_common_impl.h
@@ -15,6 +15,7 @@
 #ifndef IMPL_MATH_FMOD_FMOD_COMMON_IMPL_H
 #define IMPL_MATH_FMOD_FMOD_COMMON_IMPL_H
 #include "kernel_tensor.h"
+#include "../../common/check.h"
 
 namespace AscendC {
 namespace {
@@ -73,11 +74,20 @@ __aicore__ inline void FmodImpl(const LocalTensor<T> &dstTensor, const LocalTens
         return;
     }
 
+    CheckTensorPosition(dstTensor, "dstTensor", "VECIN, VECOUT, VECCALC");
+    CheckTensorPosition(src0Tensor, "src0Tensor", "VECIN, VECOUT, VECCALC");
+    CheckTensorPosition(src1Tensor, "src1Tensor", "VECIN, VECOUT, VECCALC");
+    CheckTensorPosition(sharedTmpBuffer, "sharedTmpBuffer", "VECIN, VECOUT, VECCALC");
+
+    CheckCalCount(calCount, "calCount", src0Tensor, "src0Tensor", "Fmod");
+    CheckCalCount(calCount, "calCount", src1Tensor, "src1Tensor", "Fmod");
+    CheckCalCount(calCount, "calCount", dstTensor, "dstTensor", "Fmod");
+
+    ASCENDC_ASSERT((std::is_same<T, float>::value || std::is_same<T, half>::value), {
+        KERNEL_LOG( KERNEL_ERROR, "Failed to check the data types, current api support data types are half/float."); });
+
     ASCENDC_ASSERT((src0Tensor.GetSize() == src1Tensor.GetSize()),
                    { KERNEL_LOG(KERNEL_ERROR, "Input params.GetSize must be equal with each other!"); });
-    ASCENDC_ASSERT((src0Tensor.GetSize() >= calCount && dstTensor.GetSize() >= calCount), {
-        KERNEL_LOG(KERNEL_ERROR, "Size of calCount should be less than or equal to size of Input and Output!");
-    });
 
     if constexpr (sizeof(T) == sizeof(float)) {
         FmodCompute(dstTensor, src0Tensor, src1Tensor, sharedTmpBuffer, src0Tensor.GetSize(), calCount);
@@ -85,9 +95,10 @@ __aicore__ inline void FmodImpl(const LocalTensor<T> &dstTensor, const LocalTens
     }
 
     constexpr uint32_t maxLiveNodeCnt = 8; // The corresponding maxLiveNodeCnt for half is 8, extra is 3 * 2 + trunc 2.
+    uint32_t bufferSize = sharedTmpBuffer.GetSize();
     uint32_t stackSize =
-        sharedTmpBuffer.GetSize() / sizeof(T) / maxLiveNodeCnt / ONE_BLK_SIZE * ONE_BLK_SIZE; // divided by how many counts
-    ASCENDC_ASSERT((stackSize > 0), { KERNEL_LOG(KERNEL_ERROR, "stackSize must > 0!"); });
+        bufferSize / sizeof(T) / maxLiveNodeCnt / ONE_BLK_SIZE * ONE_BLK_SIZE; // divided by how many counts
+    CheckTmpBufferSize(stackSize, 0, bufferSize);
     ASCENDC_ASSERT((src0Tensor.GetSize() > 0), { KERNEL_LOG(KERNEL_ERROR, "src0Tensor size must > 0!"); });
     stackSize = stackSize > src0Tensor.GetSize() ? src0Tensor.GetSize() : stackSize; // No more than localTensor
 
diff --git a/impl/math/trunc/trunc_common_impl.h b/impl/math/trunc/trunc_common_impl.h
index f05e83ff..6c5682ca 100644
--- a/impl/math/trunc/trunc_common_impl.h
+++ b/impl/math/trunc/trunc_common_impl.h
@@ -10,6 +10,7 @@
 #ifndef IMPL_MATH_TRUNC_TRUNC_COMMON_IMPL_H
 #define IMPL_MATH_TRUNC_TRUNC_COMMON_IMPL_H
 #include "kernel_tensor.h"
+#include "../../common/check.h"
 #if __CCE_AICORE__ == 200
 #include "trunc_v200_impl.h"
 #elif __CCE_AICORE__ == 220
@@ -50,14 +51,25 @@ __aicore__ inline void TruncImpl(const LocalTensor<T>& dstTensor, const LocalTen
         return;
     }
 
-    uint32_t splitCount = sharedTmpBuffer.GetSize() / sizeof(T);
+    CheckTensorPosition(dstTensor, "dstTensor", "VECIN, VECOUT, VECCALC");
+    CheckTensorPosition(srcTensor, "srcTensor", "VECIN, VECOUT, VECCALC");
+    CheckTensorPosition(sharedTmpBuffer, "sharedTmpBuffer", "VECIN, VECOUT, VECCALC");
+
+    CheckCalCount(calCount, "calCount", srcTensor, "srcTensor", "Trunc");
+    CheckCalCount(calCount, "calCount", dstTensor, "dstTensor", "Trunc");
+
+    ASCENDC_ASSERT((std::is_same<T, float>::value || std::is_same<T, half>::value), {
+        KERNEL_LOG( KERNEL_ERROR, "Failed to check the data types, current api support data types are half/float."); });
+
+    uint32_t tmpBufferSize = sharedTmpBuffer.GetSize();
+    uint32_t splitCount = tmpBufferSize / sizeof(T);
     constexpr uint32_t TRUNC_HALF_CALC_PROCEDURE = 2;
     if constexpr (sizeof(T) == sizeof(half)) {
         splitCount = splitCount / TRUNC_HALF_CALC_PROCEDURE / ONE_BLK_SIZE * ONE_BLK_SIZE;
     } else {
         splitCount = splitCount / ONE_BLK_SIZE * ONE_BLK_SIZE;
     }
-    ASCENDC_ASSERT((splitCount > 0), { KERNEL_LOG(KERNEL_ERROR, "splitCount must > 0!"); });
+    CheckTmpBufferSize(splitCount, 0, tmpBufferSize);
 
     uint32_t loopCount = calCount / splitCount;
     uint32_t calcTail = calCount % splitCount;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 32a937e9..7fdfe0e5 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -175,6 +175,8 @@ foreach(product_type ${PRODUCT_TYPE_LIST})
         ${ASCENDC_TEST_${product_type}_CASE_SRC_FILES}
     )
 
+    add_dependencies(ascendc_utest_${product_type} gen_kernel_tiling)
+
     # add soc version flags
     if(${product_type} STREQUAL "ascend610")
         target_compile_definitions(ascendc_utest_${product_type} PRIVATE
-- 
Gitee