From b95a044cdbff14d882c0750a2caa87cdb981a7e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=9F=E4=BF=8A=E6=88=90?= <jiangjuncheng2@huawei.com>
Date: Wed, 21 May 2025 09:47:55 +0800
Subject: [PATCH 1/5] add new reduce host check

---
 impl/reduce/mean/mean_tiling.cpp              |  23 ++-
 impl/reduce/reduce_all/reduce_all_v220_impl.h |   4 +-
 impl/reduce/reduce_any/reduce_any_v220_impl.h |   4 +-
 impl/reduce/reduce_sum/reduce_sum_v220_impl.h |   2 +
 impl/reduce/reduce_tiling.cpp                 | 176 ++++++++++--------
 .../reduce_xor_sum/reduce_xor_sum_tiling.cpp  |  11 +-
 impl/reduce/sum/sum_tiling.cpp                |  10 +-
 lib/reduce/reduce_tiling.h                    |   1 +
 lib/sort/topk_tiling.h                        |   1 +
 9 files changed, 151 insertions(+), 81 deletions(-)
diff --git a/impl/reduce/mean/mean_tiling.cpp b/impl/reduce/mean/mean_tiling.cpp
index c22cacec..6838fb09 100644
--- a/impl/reduce/mean/mean_tiling.cpp
+++ b/impl/reduce/mean/mean_tiling.cpp
@@ -10,16 +10,37 @@
 
 #include "lib/reduce/mean_tiling.h"
 #include "register/tilingdata_base.h"
+#include "impl/host_log.h"
+
 namespace AscendC {
 constexpr uint32_t MEAN_CALC_PROC = 1;
 const uint32_t MEAN_ONE_BLK_SIZE = 32;
 const uint32_t MEAN_ONE_REPEAT_BYTE_SIZE = 256;
 const uint32_t HALF_TYPE_SIZE = 2;
 const uint32_t FLOAT_TYPE_SIZE = 4;
+
+inline void CheckMeanHostParams(const uint32_t n, const uint32_t srcTypeSize,
+    const uint32_t accTypeSize, const bool isReuseSource)
+{
+    ASCENDC_HOST_ASSERT(
+        ((srcTypeSize == 2U && accTypeSize == 2U) ||
+         (srcTypeSize == 4U && accTypeSize == 4U) ||
+         (srcTypeSize == 2U && srcTypeSize == 4U)),
+         return,
+         "[Mean][GetMeanMaxMinTmpSize] The parameter (srcTypeSize, accTypeSize) is (%u, %u), expected is (2, 2)/(4, 4)/(2, 4).",
+         srcTypeSize, accTypeSize
+        );
+    ASCENDC_HOST_ASSERT(n > 0,
+         return, "[Mean][GetMeanMaxMinTmpSize] The parameter n is %u, expected is greater than 0!", n);
+    if (isReuseSource) {
+        TILING_LOG_WARNING("[Mean][GetMeanMaxMinTmpSize] The parameter isReuseSource is true, which is not effective!");
+    }
+}
+
 void GetMeanMaxMinTmpSize(const uint32_t n, const uint32_t srcTypeSize,
     const uint32_t accTypeSize, const bool isReuseSource, uint32_t& maxSize, uint32_t& minSize)
 {
-    (void)isReuseSource;
+    CheckMeanHostParams(n, srcTypeSize, accTypeSize, isReuseSource);
     if (srcTypeSize == 0) {
         return;
     }
diff --git a/impl/reduce/reduce_all/reduce_all_v220_impl.h b/impl/reduce/reduce_all/reduce_all_v220_impl.h
index 223639d8..aa5386bb 100644
--- a/impl/reduce/reduce_all/reduce_all_v220_impl.h
+++ b/impl/reduce/reduce_all/reduce_all_v220_impl.h
@@ -64,9 +64,9 @@ __aicore__ inline void ReduceAllImpl(const LocalTensor<T>& dstTensor, const Loca
             BinaryReduceByFirstAxis<T, isReuseSource, Min<T, false>>(
                 dstTensor, srcTensor, tmpTensor, first, last, padLast);
         }
-        SetMaskNorm();
-        ResetMask();
     }
+    SetMaskNorm();
+    ResetMask();
 }
 } // namespace Internal
 } // namespace AscendC
diff --git a/impl/reduce/reduce_any/reduce_any_v220_impl.h b/impl/reduce/reduce_any/reduce_any_v220_impl.h
index b8d7f59d..5c3bd252 100644
--- a/impl/reduce/reduce_any/reduce_any_v220_impl.h
+++ b/impl/reduce/reduce_any/reduce_any_v220_impl.h
@@ -64,9 +64,9 @@ __aicore__ inline void ReduceAnyImpl(const LocalTensor<T>& dstTensor, const Loca
             BinaryReduceByFirstAxis<T, isReuseSource, Max<T, false>>(
                 dstTensor, srcTensor, tmpTensor, first, last, padLast);
         }
-        SetMaskNorm();
-        ResetMask();
     }
+    SetMaskNorm();
+    ResetMask();
 }
 } // namespace Internal
 } // namespace AscendC
diff --git a/impl/reduce/reduce_sum/reduce_sum_v220_impl.h b/impl/reduce/reduce_sum/reduce_sum_v220_impl.h
index 75890b67..f2921187 100644
--- a/impl/reduce/reduce_sum/reduce_sum_v220_impl.h
+++ b/impl/reduce/reduce_sum/reduce_sum_v220_impl.h
@@ -268,6 +268,8 @@ __aicore__ inline void ReduceSumImpl(const LocalTensor<T>& dstTensor, const Loca
         BinaryReduceByFirstAxis<T, isReuseSource, Add<T, false>>(
                 dstTensor, srcTensor, tmpBuf, first, last, padLast);
     }
+    SetMaskNorm();
+    ResetMask();
 }
 } // namespace Internal
 } // namespace AscendC
diff --git a/impl/reduce/reduce_tiling.cpp b/impl/reduce/reduce_tiling.cpp
index b427d5b4..d6efe31f 100644
--- a/impl/reduce/reduce_tiling.cpp
+++ b/impl/reduce/reduce_tiling.cpp
@@ -10,6 +10,7 @@
 
 #include "lib/reduce/reduce_tiling.h"
 
+#include <string>
 #include <cstdint>
 #include <algorithm>
 
@@ -44,27 +45,31 @@ uint32_t FindK(uint32_t n) {
 }
 
 inline void CheckParams(std::vector<int64_t> shapeDims, bool isSrcInnerPad, ReducePattern pattern,
-    uint32_t first, uint32_t last)
+    uint32_t first, uint32_t last, std::string apiName, std::string funcName)
 {
-    ASCENDC_HOST_ASSERT(shapeDims.size() == ALLOWED_SHAPE_DIM, return, "srcShape dims must be 2.");
-    ASCENDC_HOST_ASSERT(isSrcInnerPad, return, "isSrcInnerPad must be true on this platform.");
+    ASCENDC_HOST_ASSERT(shapeDims.size() == ALLOWED_SHAPE_DIM, return,
+        "[%s][%s] srcShape dims must be 2.", apiName.c_str(), funcName.c_str());
+    ASCENDC_HOST_ASSERT(isSrcInnerPad, return,
+        "[%s][%s] isSrcInnerPad must be true on this platform.", apiName.c_str(), funcName.c_str());
     ASCENDC_HOST_ASSERT(pattern == ReducePattern::AR || pattern == ReducePattern::RA,
         return,
-        "Currently only support AR and RA pattern.");
-    ASCENDC_HOST_ASSERT(first > 0 && last > 0, return, "both first and last axis must be greater than 0.");
+        "[%s][%s] Currently only support AR and RA pattern.", apiName.c_str(), funcName.c_str());
+    ASCENDC_HOST_ASSERT(first > 0 && last > 0, return,
+        "[%s][%s] both first and last axis must be greater than 0.", apiName.c_str(), funcName.c_str());
 }
 } // namespace
 
 void GetReduceCommonMaxMinTmpSize(const ge::Shape &srcShape,
                                 const ge::DataType dataType,
                                 ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
-                                uint32_t &maxValue, uint32_t &minValue, bool isBinaryAdd) 
+                                uint32_t &maxValue, uint32_t &minValue, bool isBinaryAdd,
+                                std::string apiName, std::string funcName)
 {
     std::vector<int64_t> shapeDims = srcShape.GetDims();
 
     const uint32_t first = static_cast<uint32_t>(shapeDims[0]);
     const uint32_t last = static_cast<uint32_t>(shapeDims[1]);
-    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last);
+    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last, apiName, funcName);
     if (isReuseSource) {
         maxValue = minValue = 0U;
         return;
@@ -96,18 +101,79 @@ void GetReduceCommonMaxMinTmpSize(const ge::Shape &srcShape,
     maxValue = minValue = k * ((last * GetTypeSize(dataType) + ONE_BLK_SIZE - 1u) / ONE_BLK_SIZE * ONE_BLK_SIZE);
 }
 
+inline void GetReduceSumMeanCommonTmpSize(const ge::Shape &srcShape,
+                               const ge::DataType dataType,
+                               ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
+                               uint32_t &maxValue, uint32_t &minValue, std::string apiName, std::string funcName)
+{
+    std::vector<int64_t> shapeDims = srcShape.GetDims();
+    const uint32_t first = static_cast<uint32_t>(shapeDims[0]);
+    const uint32_t last = static_cast<uint32_t>(shapeDims[1]);
+    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last, apiName, funcName);
+    if (isReuseSource) {
+        maxValue = minValue = 0U;
+        return;
+    }
+    uint32_t elePerBlk = ONE_BLK_SIZE / FLOAT_TYPE_SIZE;
+    if (pattern == ReducePattern::AR) {
+        uint32_t k = FindK(last);
+        if (k == last && first > 1U) {
+            k >>= 1U;
+        }
+        if (last <= B32_ELEM_NUM_PER_REPEAT) {
+            maxValue = minValue = 0U;
+        } else {
+            maxValue = minValue = (first * k) * FLOAT_TYPE_SIZE;
+        }
+    } else {
+        uint32_t k = FindK(first);
+        uint32_t padLast = (last + elePerBlk - 1U) / elePerBlk * elePerBlk;
+        if (first == k && first > 1U) {
+            k >>= 1U;
+        }
+        maxValue = minValue = (k * padLast) * FLOAT_TYPE_SIZE;
+    }
+    return;
+}
+
+inline void GetReduceAnyAllCommonTmpSize(const ge::Shape &srcShape,
+                                const ge::DataType dataType,
+                                ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
+                                uint32_t &maxValue, uint32_t &minValue, std::string apiName, std::string funcName)
+{
+    std::vector<int64_t> shapeDims = srcShape.GetDims();
+    const uint32_t first = static_cast<uint32_t>(shapeDims[0]);
+    const uint32_t last = static_cast<uint32_t>(shapeDims[1]);
+    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last, apiName, funcName);
+    if (pattern == ReducePattern::AR) {
+        uint32_t elePerBlk = static_cast<uint32_t>(ONE_BLK_SIZE / sizeof(uint8_t));
+        uint32_t padLast = (last + elePerBlk - 1U) / elePerBlk * elePerBlk;
+        minValue = maxValue = static_cast<uint32_t>(padLast * sizeof(uint16_t)) + (first * elePerBlk);
+    } else {
+        if (isReuseSource) {
+            maxValue = minValue = 0U;
+            return;
+        }
+        uint32_t k = FindK(first);
+        if (k == first && first > 1U) {
+            k >>= 1U;
+        }
+        maxValue = minValue = k * ((last + ONE_BLK_SIZE - 1U) / ONE_BLK_SIZE * ONE_BLK_SIZE);
+    }
+    return;
+}
+
 void GetReduceProdMaxMinTmpSize(const ge::Shape &srcShape,
                                 const ge::DataType dataType,
                                 ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
                                 uint32_t &maxValue, uint32_t &minValue) 
 {
-    ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return, "it only supports float type on this platform.");
-
+    ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return,
+        "[ReduceProd][GetReduceProdMaxMinTmpSize] it only supports float type on this platform.");
     std::vector<int64_t> shapeDims = srcShape.GetDims();
-
     const uint32_t first = static_cast<uint32_t>(shapeDims[0]);
     const uint32_t last = static_cast<uint32_t>(shapeDims[1]);
-    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last);
+    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last, "ReduceProd", "GetReduceProdMaxMinTmpSize");
     if (isReuseSource) {
         minValue = pattern == ReducePattern::AR ? ONE_REPEAT_BYTE_SIZE : 0U;
         maxValue = minValue;
@@ -137,8 +203,9 @@ void GetReduceMaxMaxMinTmpSize(const ge::Shape &srcShape,
 {
     ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT || dataType == ge::DT_FLOAT16,
         return,
-        "it only supports float and half type on this platform.");
-    GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue, false);
+        "[ReduceMax][GetReduceMaxMaxMinTmpSize] it only supports float and half type on this platform.");
+    GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue, false,
+        "ReduceMax", "GetReduceMaxMaxMinTmpSize");
 }
 
 void GetReduceMinMaxMinTmpSize(const ge::Shape &srcShape,
@@ -148,8 +215,9 @@ void GetReduceMinMaxMinTmpSize(const ge::Shape &srcShape,
 {
     ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT || dataType == ge::DT_FLOAT16,
         return,
-        "it only supports float and half type on this platform.");
-    GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue, false);
+        "[ReduceMin][GetReduceMinMaxMinTmpSize] it only supports float and half type on this platform.");
+    GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue, false,
+        "ReduceMin", "GetReduceMinMaxMinTmpSize");
 }
 
 void GetReduceAnyMaxMinTmpSize(const ge::Shape &srcShape,
@@ -157,13 +225,15 @@ void GetReduceAnyMaxMinTmpSize(const ge::Shape &srcShape,
                                 ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
                                 uint32_t &maxValue, uint32_t &minValue) 
 {
+    ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT || dataType == ge::DT_UINT8,
+        return,
+        "[ReduceAny][GetReduceAnyMaxMinTmpSize] it only supports float and uint8_t type on this platform.");
     if (dataType == ge::DT_UINT8) {
-        GetReduceAllMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue);
+        GetReduceAnyAllCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+            "ReduceAny", "GetReduceAnyMaxMinTmpSize");
     } else {
-        ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT || dataType == ge::DT_UINT8,
-            return,
-            "it only supports float and uint8_t type on this platform.");
-        GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue, false);
+        GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+            false, "ReduceAny", "GetReduceAnyMaxMinTmpSize");
     }
 }
 
@@ -173,31 +243,13 @@ void GetReduceAllMaxMinTmpSize(const ge::Shape &srcShape,
                                uint32_t &maxValue, uint32_t &minValue)
 {
     ASCENDC_HOST_ASSERT((dataType == ge::DT_FLOAT || dataType == ge::DT_UINT8), return,
-        "it only supports float uint8 type on this platform.");
-    std::vector<int64_t> shapeDims = srcShape.GetDims();
-    const uint32_t first = static_cast<uint32_t>(shapeDims[0]);
-    const uint32_t last = static_cast<uint32_t>(shapeDims[1]);
-    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last);
+        "[ReduceAll][GetReduceAllMaxMinTmpSize] it only supports float and uint8 type on this platform.");
     if (dataType == ge::DT_UINT8) {
-        if (pattern == ReducePattern::AR) {
-            uint32_t elePerBlk = static_cast<uint32_t>(ONE_BLK_SIZE / sizeof(uint8_t));
-            uint32_t padLast = (last + elePerBlk - 1U) / elePerBlk * elePerBlk;
-            minValue = maxValue = static_cast<uint32_t>(padLast * sizeof(uint16_t)) + (first * elePerBlk);
-        } else {
-            if (isReuseSource) {
-                maxValue = minValue = 0U;
-                return;
-            }
-            uint32_t k = FindK(first);
-            if (k == first && first > 1U) {
-                k >>= 1U;
-            }
-            maxValue = minValue = k * ((last + ONE_BLK_SIZE - 1U) / ONE_BLK_SIZE * ONE_BLK_SIZE);
-        }
-        return;
+        GetReduceAnyAllCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+            "ReduceAll", "GetReduceAllMaxMinTmpSize");
     } else {
-        GetReduceCommonMaxMinTmpSize(
-            srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue, false);
+        GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+            false, "ReduceAll", "GetReduceAllMaxMinTmpSize");
     }
 }
 
@@ -206,35 +258,10 @@ void GetReduceSumMaxMinTmpSize(const ge::Shape &srcShape,
                                ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
                                uint32_t &maxValue, uint32_t &minValue)
 {
-    ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return, "it only supports float type on this platform.");
-    std::vector<int64_t> shapeDims = srcShape.GetDims();
-    const uint32_t first = static_cast<uint32_t>(shapeDims[0]);
-    const uint32_t last = static_cast<uint32_t>(shapeDims[1]);
-    CheckParams(shapeDims, isSrcInnerPad, pattern, first, last);
-    if (isReuseSource) {
-        maxValue = minValue = 0U;
-        return;
-    }
-    uint32_t elePerBlk = ONE_BLK_SIZE / FLOAT_TYPE_SIZE;
-    if (pattern == ReducePattern::AR) {
-        uint32_t k = FindK(last);
-        if (k == last && first > 1U) {
-            k >>= 1U;
-        }
-        if (last <= B32_ELEM_NUM_PER_REPEAT) {
-            maxValue = minValue = 0U;
-        } else {
-            maxValue = minValue = (first * k) * FLOAT_TYPE_SIZE;
-        }
-    } else {
-        uint32_t k = FindK(first);
-        uint32_t padLast = (last + elePerBlk - 1U) / elePerBlk * elePerBlk;
-        if (first == k && first > 1U) {
-            k >>= 1U;
-        }
-        maxValue = minValue = (k * padLast) * FLOAT_TYPE_SIZE;
-    }
-    return;
+    ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return,
+        "[ReduceSum][GetReduceSumMaxMinTmpSize] it only supports float type on this platform.");
+    GetReduceSumMeanCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+        "ReduceSum", "GetReduceSumMaxMinTmpSize");
 }
 
 void GetReduceMeanMaxMinTmpSize(const ge::Shape &srcShape,
@@ -242,6 +269,9 @@ void GetReduceMeanMaxMinTmpSize(const ge::Shape &srcShape,
                                 ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
                                 uint32_t &maxValue, uint32_t &minValue)
 {
-    GetReduceSumMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue);
+    ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return,
+        "[ReduceMean][GetReduceMeanMaxMinTmpSize] it only supports float type on this platform.");
+    GetReduceSumMeanCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+        "ReduceMean", "GetReduceMeanMaxMinTmpSize");
 }
 }  // namespace AscendC
diff --git a/impl/reduce/reduce_xor_sum/reduce_xor_sum_tiling.cpp b/impl/reduce/reduce_xor_sum/reduce_xor_sum_tiling.cpp
index d705e532..24445609 100644
--- a/impl/reduce/reduce_xor_sum/reduce_xor_sum_tiling.cpp
+++ b/impl/reduce/reduce_xor_sum/reduce_xor_sum_tiling.cpp
@@ -32,7 +32,16 @@ void GetReduceXorSumMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t type
                                   uint32_t &maxValue, uint32_t &minValue)
 {
     const uint32_t inputSize = srcShape.GetShapeSize();
-    ASCENDC_HOST_ASSERT(inputSize > 0, return, "ReduceXorSum input Shape size must be greater than 0.");
+
+    ASCENDC_HOST_ASSERT(inputSize > 0U, return,
+        "[ReduceXorSum][GetReduceXorSumMaxMinTmpSize] The parameter srcShape size is %u, expected is greater than 0!",
+         inputSize);
+    std::vector<int64_t> shapeDims = srcShape.GetDims();
+    ASCENDC_HOST_ASSERT(shapeDims.size() > 0UL, return,
+        "[ReduceXorSum][GetReduceXorSumMaxMinTmpSize] The parameter srcShape dimension number is %lli, expected is greater than 0!",
+         shapeDims.size());
+    ASCENDC_HOST_ASSERT(typeSize == 2U, return,
+        "[ReduceXorSum][GetReduceXorSumMaxMinTmpSize] The parameter typeSize is %u, expected is 2!", typeSize);
 
     maxValue = GetTmpSize(inputSize, typeSize, isReuseSource);
     minValue = maxValue;
diff --git a/impl/reduce/sum/sum_tiling.cpp b/impl/reduce/sum/sum_tiling.cpp
index 95e087ac..817031cd 100644
--- a/impl/reduce/sum/sum_tiling.cpp
+++ b/impl/reduce/sum/sum_tiling.cpp
@@ -19,8 +19,14 @@ namespace AscendC {
 void GetSumMaxMinTmpSize(
     const uint32_t n, const uint32_t typeSize, const bool isReuseSource, uint32_t &maxSize, uint32_t &minSize)
 {
-    (void)isReuseSource;
-    ASCENDC_HOST_ASSERT(typeSize > 0, return, "typeSize must be greater than 0.");
+    if (isReuseSource) {
+        TILING_LOG_WARNING("[Sum][GetSumMaxMinTmpSize] The parameter isReuseSource is true, which is not effective!");
+    }
+    ASCENDC_HOST_ASSERT(typeSize > 0, return,
+        "[Sum][GetSumMaxMinTmpSize] The parameter typeSize is %u, expected is 2 or 4!", typeSize);
+    ASCENDC_HOST_ASSERT(n > 0,
+         return, "[Sum][GetSumMaxMinTmpSize] The parameter n is %u, expected is greater than 0!", n);
+
     constexpr uint32_t sumOneBlkSize = 32;
     constexpr uint32_t sumOneRepeatByteSize = 256;
 
diff --git a/lib/reduce/reduce_tiling.h b/lib/reduce/reduce_tiling.h
index a43f6f83..c27591e4 100644
--- a/lib/reduce/reduce_tiling.h
+++ b/lib/reduce/reduce_tiling.h
@@ -15,6 +15,7 @@
 #ifndef LIB_REDUCE_REDUCE_TILING_H
 #define LIB_REDUCE_REDUCE_TILING_H
 #include <cstdint>
+#include "graph/types.h"
 #include "graph/tensor.h"
 
 namespace AscendC {
diff --git a/lib/sort/topk_tiling.h b/lib/sort/topk_tiling.h
index 07e36dbb..e08d1af4 100644
--- a/lib/sort/topk_tiling.h
+++ b/lib/sort/topk_tiling.h
@@ -11,6 +11,7 @@
 #define LIB_SORT_TOPK_TILING_H
 #include "topk_tilingdata.h"
 #include "tiling/platform/platform_ascendc.h"
+#include "graph/types.h"
 
 namespace AscendC {
 
-- 
Gitee


From 65b4a40f6d68166c4fbe46aaa7ebbe9060a9c721 Mon Sep 17 00:00:00 2001
From: chen-yiyuan <chenyiyuan5@huawei.com>
Date: Fri, 23 May 2025 11:34:45 +0800
Subject: [PATCH 2/5] update

---
 impl/CMakeLists.txt                 |   31 +
 impl/transdata/transdata_impl.h     |  318 +++++++
 impl/transdata/transdata_tiling.cpp |  140 +++
 lib/kernel_api.h                    |  159 ++++
 lib/transdata/transdata.h           |   47 +
 lib/transdata/transdata_tiling.h    |   67 ++
 tests/CMakeLists.txt                |    8 +
 tests/tiling/test_tiling.cpp        | 1369 +++++++++++++++++++++++++++
 8 files changed, 2139 insertions(+)
 create mode 100644 impl/transdata/transdata_impl.h
 create mode 100644 impl/transdata/transdata_tiling.cpp
 create mode 100644 lib/kernel_api.h
 create mode 100644 lib/transdata/transdata.h
 create mode 100644 lib/transdata/transdata_tiling.h

diff --git a/impl/CMakeLists.txt b/impl/CMakeLists.txt
index c29d95d3..8de6e974 100644
--- a/impl/CMakeLists.txt
+++ b/impl/CMakeLists.txt
@@ -92,6 +92,37 @@ add_library(tiling_api STATIC
     ${CMAKE_CURRENT_SOURCE_DIR}/math/axpy/axpy_tiling_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/ceil/ceil_tiling_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/floor/floor_tiling_impl.cpp
+<<<<<<< HEAD
+=======
+    ${CMAKE_CURRENT_SOURCE_DIR}/activation/softmax/softmax_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/activation/softmax/logsoftmax_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/rmsnorm/rmsnorm_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/batchnorm/batchnorm_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/sort/sort/sort_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/sort/topk/topk_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/deepnorm/deepnorm_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/select/selectwithbytesmask/selectwithbytesmask_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/layernorm/layernorm_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/normalize/normalize_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/layernormgrad/layernorm_grad_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/layernormgrad/layernorm_grad_beta_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/groupnorm/groupnorm_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/welfordfinalize/welfordfinalize_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pad/pad/pad_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/transpose/confusion_transpose/confusion_transpose_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pad/broadcast/broadcast_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pad/broadcast/broadcast_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/math/xor/xor_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/math/cumsum/cumsum_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/mean/mean_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/math/sign/sign_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/activation/reglu/reglu_tiling_impl.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/reduce_xor_sum/reduce_xor_sum_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/sum/sum_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/reduce_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/transdata/transdata_tiling.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/index/arithprogression/arithprogression_tiling_impl.cpp
+>>>>>>> 4a0a42bb (update)
     ${CMAKE_CURRENT_SOURCE_DIR}/math/fmod/fmod_tiling_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/trunc/trunc_tiling_impl.cpp
     $<$<BOOL:${BUILD_OPEN_PROJECT}>:$<TARGET_OBJECTS:${ASCENDC_API_ADV_OBJ}>>
diff --git a/impl/transdata/transdata_impl.h b/impl/transdata/transdata_impl.h
new file mode 100644
index 00000000..37966c8a
--- /dev/null
+++ b/impl/transdata/transdata_impl.h
@@ -0,0 +1,318 @@
+/**
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#ifndef IMPL_TRANSDATA_TRANSDATA_IMPL_H
+#define IMPL_TRANSDATA_TRANSDATA_IMPL_H
+
+#include "kernel_tensor.h"
+#include "kernel_operator_intf.h"
+#include "kernel_tiling/kernel_tiling.h"
+
+namespace AscendC {
+
+template <typename T, typename U>
+struct TransDataParams {
+    T srcLayout;
+    U dstLayout;
+};
+
+#ifndef ASCC_PARAM_TRANSDATACONFIG
+#define ASCC_PARAM_TRANSDATACONFIG
+struct TransDataConfig {
+    DataFormat srcFormat;
+    DataFormat dstFormat;
+};
+#endif // ASCC_PARAM_TRANSDATACONFIG
+
+namespace Internal {
+struct TransDataTmpParams {
+    int32_t n;
+    int32_t c;
+    int32_t d;
+    int32_t h;
+    int32_t w;
+};
+
+constexpr int32_t DEFAULT_TRANSDATA_5HD_LIST = 16;
+
+// Transdata NCDHW -> FRACTAL_Z_3D
+template <typename T>
+__aicore__ inline void TransDataImplMode1(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
+    const TransDataTmpParams& param)
+{
+    constexpr int32_t elePerBlk = ONE_BLK_SIZE / sizeof(T);
+    const int32_t n = param.n, c = param.c, d = param.d, h = param.h, w = param.w;
+    constexpr int32_t c0 = 16;
+    constexpr int32_t n0 = 16;
+    const int32_t c1 = DivCeil(c, c0);
+    const int32_t n1 = DivCeil(n, n0);
+    int32_t padHw = AlignUp(h * w, elePerBlk);
+    int32_t currAxis = c * d * padHw;
+    Duplicate<T>(tmpBuffer.ReinterpretCast<T>(), static_cast<T>(0), currAxis);
+    PipeBarrier<PIPE_V>();
+    auto tmpDstTensor = tmpBuffer[currAxis * sizeof(T)].ReinterpretCast<T>();
+    uint64_t dstLocalList[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t srcLocalList[DEFAULT_TRANSDATA_5HD_LIST];
+
+    uint64_t dstTensorAddr = (uint64_t)dst.GetPhyAddr();
+    uint64_t srcTensorAddr = (uint64_t)src.GetPhyAddr();
+    uint64_t tmpDstTensorAddr = (uint64_t)tmpDstTensor.GetPhyAddr();
+    uint64_t tmpBufferAddr = (uint64_t)tmpBuffer.GetPhyAddr();
+    // step1, NCDHW -> CDHW, N1, N0
+    // Do n1 times Transpose to split axis N, and fill with 0 on padding data.
+    TransDataTo5HDParams transDataParams;
+    transDataParams.dstHighHalf = false;
+    transDataParams.srcHighHalf = false;
+    transDataParams.repeatTimes = currAxis / elePerBlk;
+    // if repeat = 1, start offset is auto incremental by stride. 
+    transDataParams.dstRepStride = transDataParams.repeatTimes == 1 ? 0 : n1 * n0;
+    transDataParams.srcRepStride = transDataParams.repeatTimes == 1 ? 0 : 1;
+
+    bool isPadded = padHw != h * w;
+    // dst tensor is unable to fill all padded data.
+    auto tmpIfPadAddr = isPadded ? tmpDstTensorAddr : dstTensorAddr;
+    for (int j = 0; j < n1; j++) {
+        uint64_t currDstAddr = tmpIfPadAddr + j * n0 * sizeof(T);
+        uint64_t currSrcAddr = srcTensorAddr + j * currAxis * n0 * sizeof(T);
+        // handle the last axis if N is not even splited by n0.
+        int remain = j == n1 - 1 ? n - j * n0 : n0;
+        for (int32_t i = 0; i < n0; i++) {
+            dstLocalList[i] = currDstAddr +  (i * n1 * n0) * sizeof(T);
+        }
+        for (int32_t i = 0; i < remain; i++) {
+            srcLocalList[i] = currSrcAddr + i * currAxis * sizeof(T);
+        }
+        for (int32_t i = remain; i < n0; i++) {
+            srcLocalList[i] = tmpBufferAddr;
+        }
+        TransDataTo5HD<half>(dstLocalList, srcLocalList, transDataParams);
+    }
+    PipeBarrier<PIPE_V>();
+    // step1.5 collapse padded H,W axis for CDHW, N1N0
+    DataCopyParams copyParams;
+    if (isPadded) {
+        currAxis = h * w * n1 * n0;
+        copyParams.blockCount = c * d;
+        copyParams.blockLen = currAxis / elePerBlk;
+        // Merge axis by skiping padded H,W.
+        copyParams.srcStride = (padHw - h * w) * n1 * n0 / elePerBlk;
+        copyParams.dstStride = 0;
+        DataCopy(dst, tmpDstTensor, copyParams);
+    }
+    PipeBarrier<PIPE_V>();
+
+    // step2, CDHWN1N0 -> C1DHW, N1N0, C0
+    currAxis = d * h * w * n1 * n0;
+    transDataParams.repeatTimes = currAxis / elePerBlk;
+    transDataParams.dstRepStride = transDataParams.repeatTimes == 1 ? 0 : c0;
+    transDataParams.srcRepStride = transDataParams.repeatTimes == 1 ? 0 : 1;
+    for (int32_t j = 0; j < c1; j++) {
+        uint64_t currDstAddr = tmpDstTensorAddr + j * currAxis * c0 * sizeof(T);
+        uint64_t currSrcAddr = dstTensorAddr + j * currAxis * c0 * sizeof(T);
+        int remain = j == c1 - 1 ? c - j * c0 : c0;
+        for (int32_t i = 0; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+            dstLocalList[i] = currDstAddr + i * c0 * sizeof(T);
+        }
+        for (int32_t i = 0; i < remain; i++) {
+            srcLocalList[i] = currSrcAddr + i * currAxis * sizeof(T);
+        }
+        for (int32_t i = remain; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+            srcLocalList[i] = tmpBufferAddr;
+        }
+        TransDataTo5HD<half>(dstLocalList, srcLocalList, transDataParams);
+    }
+    PipeBarrier<PIPE_V>();
+    // steo3 C1DHW, N1N0, C0 -> DC1HW, N1N0, C0
+    currAxis = c0 * h * w * n1 * n0;
+    copyParams.blockCount = d;
+    copyParams.blockLen = currAxis / elePerBlk;
+    // Merge axis by skiping padding padHW -> h, w
+    copyParams.srcStride = 0;
+    copyParams.dstStride = (c1 - 1) * currAxis / elePerBlk;
+    for (int32_t i = 0; i < c1; i++) {
+        DataCopy(dst[i * currAxis], tmpDstTensor[i * d * currAxis], copyParams);
+    }
+    PipeBarrier<PIPE_V>();
+}
+
+// Transdata NCDHW -> NDC1HWC0
+template <typename T>
+__aicore__ inline void TransDataImplMode2(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
+    const TransDataTmpParams& param)
+{
+    constexpr int32_t c0 = 16;
+    constexpr int32_t elePerBlk = ONE_BLK_SIZE / sizeof(T);
+    const int32_t n = param.n, c = param.c, d = param.d, h = param.h, w = param.w;
+    const int32_t c1 = DivCeil(c, c0);
+    const int32_t padHw = AlignUp(h * w, elePerBlk);
+    int32_t currAxis = d * padHw;
+
+    int32_t axisHwd = h * w * d;
+    int32_t axisHwc0 = h * w * c0;
+    int32_t axisC1hwc0 = axisHwc0 * c1;
+    int32_t axisC1hwdc0 = axisC1hwc0 * d;
+    int32_t axisPadHwd = padHw * d;
+    int32_t axisPadHwc0 = padHw * c0;
+    int32_t axisPadHwdc0 = padHw * c0 * d;
+    Duplicate<T>(tmpBuffer.ReinterpretCast<T>(), static_cast<T>(0), axisPadHwd);
+    PipeBarrier<PIPE_V>();
+
+    // reserve for padded 0 on additional axis c.
+    auto tmpDstTensor = tmpBuffer[axisPadHwd * sizeof(T)].ReinterpretCast<T>();
+
+    uint64_t dstTensorAddr = (uint64_t)dst.GetPhyAddr();
+    uint64_t srcTensorAddr = (uint64_t)src.GetPhyAddr();
+    uint64_t tmpDstTensorAddr = (uint64_t)tmpDstTensor.GetPhyAddr();
+    uint64_t tmpBufferAddr = (uint64_t)tmpBuffer.GetPhyAddr();
+    uint64_t dstLocalList[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t srcLocalList[DEFAULT_TRANSDATA_5HD_LIST];
+    TransDataTo5HDParams transDataParams;
+    transDataParams.dstHighHalf = false;
+    transDataParams.srcHighHalf = false;
+    transDataParams.repeatTimes = axisPadHwd / elePerBlk;
+    transDataParams.dstRepStride = transDataParams.repeatTimes == 1 ? 0 : c0;
+    transDataParams.srcRepStride = transDataParams.repeatTimes == 1 ? 0 : 1;
+
+    DataCopyParams copyParams;
+    copyParams.blockCount = d;
+    copyParams.blockLen = axisHwc0 / elePerBlk;
+    copyParams.srcStride = (padHw - h * w) * c0 / elePerBlk;
+    copyParams.dstStride = (c1 - 1) * axisHwc0 / elePerBlk;
+    // iterates N times CDHW -> C1DHWC0
+    for (int32_t k = 0; k < n; k++) {
+        int32_t currSrcStart = k * axisPadHwd * c;
+        int32_t currDstStart = k * axisC1hwdc0;
+        // it's impossible to have calculation size exceed max 255 repeats due to the total memory size.
+        // step1, CDHW -> C1DHWC0 with pad data
+        for (int32_t j = 0; j < c1; j++) {
+            uint64_t currDstAddr = tmpDstTensorAddr + j * axisPadHwdc0 * sizeof(T);
+            uint64_t currSrcAddr = srcTensorAddr + (currSrcStart + j * axisPadHwdc0) * sizeof(T);
+            int remain = j == c1 - 1 ? c - j * c0 : c0;
+            for (int32_t i = 0; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+                dstLocalList[i] = currDstAddr + i * c0 * sizeof(T);
+            }
+            for (int32_t i = 0; i < remain; i++) {
+                srcLocalList[i] = currSrcAddr + i * axisPadHwd * sizeof(T);
+            }
+            for (int32_t i = remain; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+                srcLocalList[i] = tmpBufferAddr;
+            }
+            TransDataTo5HD<half>(dstLocalList, srcLocalList, transDataParams);
+        }
+        PipeBarrier<PIPE_V>();
+        // step2, C1DHWC0 -> DC1HWC0
+        for (int32_t i = 0; i < c1; i++) {
+            DataCopy(dst[currDstStart + i * axisHwc0], tmpDstTensor[i * axisPadHwdc0], copyParams);
+        }
+        PipeBarrier<PIPE_V>();
+    }
+}
+
+// Transdata NDC1HWC0 -> NCDHW
+template <typename T>
+__aicore__ inline void TransDataImplMode3(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
+    const TransDataTmpParams& param)
+{
+    const int32_t n = param.n, c = param.c, d = param.d, h = param.h, w = param.w;
+    constexpr int32_t c0 = 16;
+    constexpr int32_t elePerBlk = ONE_BLK_SIZE / sizeof(T);
+    const int32_t c1 = DivCeil(c, c0);
+    const int32_t padHw = AlignUp(h * w, elePerBlk);
+    constexpr int32_t reservedDummy = 512;
+    auto tmpDstTensor = tmpBuffer[reservedDummy].template ReinterpretCast<T>();
+    uint64_t dstLocalList[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t srcLocalList[DEFAULT_TRANSDATA_5HD_LIST];
+
+    uint64_t dstTensorAddr = (uint64_t)dst.GetPhyAddr();
+    uint64_t tmpDstTensorAddr = (uint64_t)tmpDstTensor.GetPhyAddr();
+    uint64_t tmpBufferAddr = (uint64_t)tmpBuffer.GetPhyAddr();
+    
+    int32_t axisHwd = h * w * d;
+    int32_t axisHwc0 = h * w * c0;
+    int32_t axisC1hwc0 = axisHwc0 * c1;
+    int32_t axisC1hwdc0 = axisC1hwc0 * d;
+    int32_t axisPadHwd = padHw * d;
+    int32_t axisPadHwc0 = padHw * c0;
+    int32_t axisPadHwdc0 = padHw * c0 * d;
+    TransDataTo5HDParams transDataParams;
+    transDataParams.dstHighHalf = false;
+    transDataParams.srcHighHalf = false;
+    transDataParams.repeatTimes = padHw * d / elePerBlk;
+    transDataParams.srcRepStride = transDataParams.repeatTimes == 1 ? 0 : c0;
+    transDataParams.dstRepStride = transDataParams.repeatTimes == 1 ? 0 : 1;
+
+    DataCopyParams copyParams;
+    copyParams.blockCount = c1;
+    copyParams.blockLen = h * w * c0 / elePerBlk;
+    copyParams.srcStride = 0;
+    copyParams.dstStride = (d * padHw - h * w) * c0 / elePerBlk;
+    // iterates N times C1DHWC0 -> CDHW
+    for (int32_t k = 0; k < n; k++) {
+        // step1 DC1HWC0 -> C1DHWC0
+        int32_t currSrcStart = k * axisC1hwdc0;
+        int32_t currDstStart = k * axisPadHwd * c;
+        for (int32_t i = 0; i < d; i++) {
+            DataCopy(tmpDstTensor[i * axisPadHwc0], src[currSrcStart + i * axisC1hwc0], copyParams);
+        }
+        PipeBarrier<PIPE_V>();
+        // step2, C1DHWC0 -> C1C0DHW
+        // it's impossible to have calculation size exceed max 255 repeats due to the total memory size.
+        for (int32_t j = 0; j < c1; j++) {
+            int32_t remain = j == c1 - 1 ? c - j * c0 : c0;
+            uint64_t currDstAddr = dstTensorAddr + (currDstStart + j * axisPadHwdc0) * sizeof(T);
+            uint64_t currSrcAddr = tmpDstTensorAddr + j * axisPadHwdc0 * sizeof(T);
+            for (int32_t i = 0; i < remain; i++) {
+                dstLocalList[i] = currDstAddr + i * axisPadHwd * sizeof(T);
+            }
+            for (int32_t i = remain; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+                // temp for reserve redundant data.
+                dstLocalList[i] = tmpBufferAddr + i * ONE_BLK_SIZE;
+            }
+            for (int32_t i = 0; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+                srcLocalList[i] = currSrcAddr + i * c0 * sizeof(T);
+            }
+            TransDataTo5HD<half>(dstLocalList, srcLocalList, transDataParams);
+        }
+        PipeBarrier<PIPE_V>();
+    }
+}
+
+template <const TransDataConfig& config, typename T, typename U, typename S>
+__aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const LocalTensor<T>& srcTensor,
+    const LocalTensor<uint8_t>& sharedTmpBuffer, const TransDataParams<U, S>& params)
+{
+    static_assert(SupportType<T, half, bfloat16_t>(), "Currents only supports half/bfloat16_t types.");
+    static_assert(is_layout_v<U>, "srcLayout must be a layout");
+    static_assert(is_layout_v<S>, "dstLayout must be a layout");
+    using SrcShapeTuple = Std::remove_cvref_t<decltype(params.srcLayout.GetShape())>;
+    using DstShapeTuple = Std::remove_cvref_t<decltype(params.dstLayout.GetShape())>;
+    static_assert(Std::is_tuple_v<SrcShapeTuple>, "it must be a shape.");
+    static_assert(Std::is_tuple_v<DstShapeTuple>, "it must be a shape.");
+
+    auto ncdhwShape = config.srcFormat == DataFormat::NCDHW ? params.srcLayout.GetShape() : params.dstLayout.GetShape();
+    TransDataTmpParams tmpParams = {
+        Std::get<0>(ncdhwShape),
+        Std::get<1>(ncdhwShape),
+        Std::get<2>(ncdhwShape),
+        Std::get<3>(ncdhwShape),
+        Std::get<4>(ncdhwShape)
+    };
+
+    if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
+        TransDataImplMode2(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+    } else if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
+        TransDataImplMode1(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+    } else if constexpr (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW) {
+        TransDataImplMode3(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+    }
+}
+
+} // namespace Internal
+} // namespace AscendC
+#endif // IMPL_TRANSDATA_TRANSDATA_IMPL_H
\ No newline at end of file
diff --git a/impl/transdata/transdata_tiling.cpp b/impl/transdata/transdata_tiling.cpp
new file mode 100644
index 00000000..dfb216cc
--- /dev/null
+++ b/impl/transdata/transdata_tiling.cpp
@@ -0,0 +1,140 @@
+/**
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+#include "lib/transdata/transdata_tiling.h"
+
+#include <cstdint>
+#include <algorithm>
+
+#include "graph/tensor.h"
+#include "impl/host_log.h"
+#include "tiling/platform/platform_ascendc.h"
+namespace AscendC {
+namespace {
+constexpr int32_t PAD_ELE_FOR_HALF = 16;
+constexpr int32_t N_INDEX = 0;
+constexpr int32_t C_INDEX = 1;
+constexpr int32_t D_INDEX = 2;
+constexpr int32_t H_INDEX = 3;
+constexpr int32_t W_INDEX = 4;
+
+struct TmpTransDataParams {
+    int32_t n = 0;
+    int32_t c = 0;
+    int32_t d = 0;
+    int32_t h = 0;
+    int32_t w = 0;
+};
+
+int32_t DivCeil(int32_t a, int32_t b)
+{
+    if (b == 0) {
+        return a;
+    }
+    return (a + b - 1) / b;
+}
+
+int32_t AlignUp(int32_t a, int32_t b)
+{
+    return DivCeil(a, b) * b;
+}
+
+bool GenerateShapeInfo(const TransDataConfig &config, const ge::Shape &srcShape, const ge::Shape &dstShape, ge::DataType type,
+    TmpTransDataParams &param)
+{
+    (void)type;
+    constexpr int32_t c0 = 16, n0 = 16;
+    std::vector<int64_t> srcDims = srcShape.GetDims();
+    std::vector<int64_t> dstDims = dstShape.GetDims();
+    if (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
+        ASCENDC_HOST_ASSERT(srcDims.size() == 5 && dstDims.size() == 6, return false, "input shapes are not matched with DataFormat.");
+        param.n = srcDims[N_INDEX];
+        param.c = srcDims[C_INDEX];
+        param.d = srcDims[D_INDEX];
+        param.h = srcDims[H_INDEX];
+        param.w = srcDims[W_INDEX];
+        // validate n, d, h, w
+        ASCENDC_HOST_ASSERT(param.n == dstDims[0] && param.d == dstDims[1] && param.h == dstDims[3] && param.w == dstDims[4], return false, "shapeInfo n,d,h,w is not matched.");
+        ASCENDC_HOST_ASSERT(dstDims[5] == c0 && dstDims[2] * c0 == AlignUp(param.c, c0), return false, "dst c0, c1 is not able to be converted to c.");
+        return true;
+    }
+    if (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
+        ASCENDC_HOST_ASSERT(srcDims.size() == 5 && dstDims.size() == 7, return false, "input shapes are not matched with DataFormat.");
+        param.n = srcDims[N_INDEX];
+        param.c = srcDims[C_INDEX];
+        param.d = srcDims[D_INDEX];
+        param.h = srcDims[H_INDEX];
+        param.w = srcDims[W_INDEX];
+        // validate n, d, h, w
+        ASCENDC_HOST_ASSERT(param.d == dstDims[0] && param.h == dstDims[2] && param.w == dstDims[3], return false, "shapeInfo n,d,h,w is not matched.");
+        ASCENDC_HOST_ASSERT(dstDims[6] == c0 && dstDims[1] * c0 == AlignUp(param.c, c0), return false, "dst c0, c1 is not able to be converted to c.");
+        ASCENDC_HOST_ASSERT(dstDims[5] == n0 && dstDims[4] * n0 == AlignUp(param.n, n0), return false, "dst n0, n1 is not able to be converted to n.");
+        return true;
+    }
+    if (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW) {
+        ASCENDC_HOST_ASSERT(srcDims.size() == 6 && dstDims.size() == 5, return false, "input shapes are not matched with DataFormat.");
+        param.n = dstDims[N_INDEX];
+        param.c = dstDims[C_INDEX];
+        param.d = dstDims[D_INDEX];
+        param.h = dstDims[H_INDEX];
+        param.w = dstDims[W_INDEX];
+        // validate n, d, h, w
+        ASCENDC_HOST_ASSERT(param.n == srcDims[0] && param.d == srcDims[1] && param.h == srcDims[3] && param.w == srcDims[4], return false, "shapeInfo n,d,h,w is not matched.");
+        ASCENDC_HOST_ASSERT(srcDims[5] == c0 && srcDims[2] * c0 == AlignUp(param.c, c0), return false, "src c0, c1 is not able to be converted to c.");
+        return true;
+    }
+    return false;
+}
+
+int32_t GetTmpBufferSize(const TransDataConfig &config, const TmpTransDataParams &param)
+{
+    constexpr int32_t dataSize = 2;
+    int32_t n = param.n, c = param.c, d = param.d, h = param.h, w = param.w;
+    constexpr int32_t c0 = 16, n0 = 16;
+    int32_t c1 = DivCeil(c, c0), n1 = DivCeil(n, n0);
+    int32_t padHw = AlignUp(h * w, 16);
+    if (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0)
+    {
+        return d * padHw * dataSize + d * c1 * c0 * padHw * dataSize;
+    }
+    if (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW)
+    {
+        constexpr int32_t redundantDataBuffer = 512;
+        return d * c1 * c0 * padHw * dataSize + redundantDataBuffer;
+    }
+    if (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D)
+    {
+        return c * d * padHw * dataSize + n1 * n0 * d * c1 * c0 * padHw * dataSize;
+    }
+    return 0;
+}
+
+} // namespace
+
+bool GetTransDataMaxMinTmpSize(const platform_ascendc::PlatformAscendC &platform,
+                                const ge::Shape &srcShape,
+                                const ge::Shape &dstShape,
+                                const ge::DataType dataType,
+                                const TransDataConfig &config,
+                                uint32_t &maxValue, uint32_t &minValue)
+{
+    ASCENDC_HOST_ASSERT(dataType == ge::DataType::DT_FLOAT16 || dataType == ge::DataType::DT_BF16, return false, "it only supports DT_FLOAT16/DT_BF16 data type");
+    platform_ascendc::SocVersion socVersion = platform.GetSocVersion();
+    ASCENDC_HOST_ASSERT(socVersion == platform_ascendc::SocVersion::ASCEND910B, return false,
+                        "Unsupported SocVersion for TransData API.");
+
+    TmpTransDataParams tmpParam;
+
+    ASCENDC_HOST_ASSERT(GenerateShapeInfo(config, srcShape, dstShape, dataType, tmpParam), return false, "failed to validate inputs informations.");
+    maxValue = GetTmpBufferSize(config, tmpParam);
+    minValue = maxValue;
+    return true;
+}
+} // namespace AscendC
diff --git a/lib/kernel_api.h b/lib/kernel_api.h
new file mode 100644
index 00000000..b6f7a069
--- /dev/null
+++ b/lib/kernel_api.h
@@ -0,0 +1,159 @@
+/**
+ * Copyright (c) 2024 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+/*!
+ * \file kernel_api.h
+ * \brief
+ */
+#ifndef LIB_KERNEL_API_H
+#define LIB_KERNEL_API_H
+
+#if defined(__DAV_C310__) || defined(__DAV_310R6__)
+#include "hccl/hccl.h"
+#include "index/arithprogression.h"
+#include "activation/sigmoid.h"
+#include "activation/softmax.h"
+#include "activation/simplesoftmax.h"
+#include "activation/softmaxflashv2.h"
+#include "activation/softmaxgrad.h"
+#include "activation/gelu.h"
+#include "filter/dropout.h"
+#include "math/tan.h"
+#include "math/tanh.h"
+#include "math/floor.h"
+#include "math/lgamma.h"
+#include "math/log.h"
+#include "math/sin.h"
+#include "math/atanh.h"
+#include "math/asinh.h"
+#include "math/acosh.h"
+#include "math/trunc.h"
+#include "math/cos.h"
+#include "math/fmod.h"
+#include "math/hypot.h"
+#include "math/power.h"
+#include "math/frac.h"
+#include "math/cumsum.h"
+#include "math/erf.h"
+#include "math/erfc.h"
+#include "math/atan.h"
+#include "math/is_finite.h"
+#include "math/philox.h"
+#include "math/sinh.h"
+#include "math/cosh.h"
+#include "math/sign.h"
+#include "math/asin.h"
+#include "math/acos.h"
+#include "math/exp.h"
+#include "math/xor.h"
+#include "normalization/layernorm.h"
+#include "normalization/welfordfinalize.h"
+#include "normalization/normalize.h"
+#include "pad/broadcast.h"
+#include "quantization/ascend_quant.h"
+#include "quantization/ascend_dequant.h"
+#include "quantization/ascend_antiquant.h"
+#include "quantization/quantize.h"
+#include "quantization/dequantize.h"
+#include "quantization/antiquantize.h"
+#include "utils/init_global_memory.h"
+#include "sort/sort.h"
+#include "sort/topk.h"
+#include "transpose/confusion_transpose.h"
+#include "select/selectwithbytesmask.h"
+#include "reduce/reduce.h"
+#include "math/clamp.h"
+#include "math/round.h"
+#include "math/ceil.h"
+#endif // __CCE_AICORE__ == 310
+
+#if defined(__CCE_AICORE__) && (__CCE_AICORE__ != 310)
+#include "filter/dropout.h"
+#include "activation/sigmoid.h"
+#include "activation/softmax.h"
+#include "activation/simplesoftmax.h"
+#include "activation/softmaxflashv2.h"
+#include "activation/softmaxflashv3.h"
+#include "activation/softmaxgrad.h"
+#include "math/xor.h"
+#include "math/floor.h"
+#include "sort/sort.h"
+#endif
+
+#include "std/tuple.h"
+#include "std/type_traits.h"
+#include "std/utility.h"
+#include "std/algorithm.h"
+
+#if defined(__CCE_AICORE__) && (__CCE_AICORE__ < 300)
+#include "index/arithprogression.h"
+#include "normalization/layernormgrad.h"
+#include "normalization/layernormgradbeta.h"
+#include "pad/pad.h"
+#include "hccl/hccl.h"
+#include "math/frac.h"
+#include "math/power.h"
+#include "math/log.h"
+#include "math/sin.h"
+#include "math/cos.h"
+#include "math/asin.h"
+#include "math/acos.h"
+#include "math/asinh.h"
+#include "math/acosh.h"
+#include "math/atan.h"
+#include "math/cosh.h"
+#include "math/erf.h"
+#include "math/erfc.h"
+#include "math/clamp.h"
+#include "normalization/rmsnorm.h"
+#include "normalization/batchnorm.h"
+#include "math/tanh.h"
+#include "math/atanh.h"
+#include "normalization/deepnorm.h"
+#include "math/exp.h"
+#include "normalization/layernorm.h"
+#include "normalization/welfordfinalize.h"
+#include "normalization/normalize.h"
+#include "reduce/sum.h"
+#include "activation/silu.h"
+#include "activation/gelu.h"
+#include "quantization/ascend_quant.h"
+#include "quantization/ascend_dequant.h"
+#include "quantization/ascend_antiquant.h"
+#include "activation/logsoftmax.h"
+#include "activation/softmaxflash.h"
+#include "transpose/confusion_transpose.h"
+#include "select/selectwithbytesmask.h"
+#include "math/sinh.h"
+#include "activation/swiglu.h"
+#include "activation/reglu.h"
+#include "math/tan.h"
+#include "math/round.h"
+#include "math/trunc.h"
+#include "activation/swish.h"
+#include "sort/topk.h"
+#include "activation/geglu.h"
+#include "math/lgamma.h"
+#include "math/digamma.h"
+#include "math/sign.h"
+#include "reduce/mean.h"
+#include "math/axpy.h"
+#include "math/ceil.h"
+#include "pad/broadcast.h"
+#include "reduce/reduce_xor_sum.h"
+#include "reduce/reduce.h"
+#include "transdata/transdata.h"
+#include "math/cumsum.h"
+#include "math/fmod.h"
+#include "normalization/groupnorm.h"
+#include "utils/init_global_memory.h"
+#endif // __CCE_AICORE__ < 300
+
+#endif // LIB_KERNEL_API_H
diff --git a/lib/transdata/transdata.h b/lib/transdata/transdata.h
new file mode 100644
index 00000000..795c9a03
--- /dev/null
+++ b/lib/transdata/transdata.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#ifndef LIB_TRANSDATA_TRANSDATA_H
+#define LIB_TRANSDATA_TRANSDATA_H
+#if __CCE_AICORE__ == 220
+#include "kernel_tensor.h"
+#include "kernel_operator_intf.h"
+#include "kernel_pop_stack_buffer.h"
+#include "../../impl/transdata/transdata_impl.h"
+#if ASCENDC_CPU_DEBUG
+#include "kernel_log.h"
+#include <type_traits>
+#endif
+
+namespace AscendC {
+
+template <const TransDataConfig& config, typename T, typename U, typename S>
+__aicore__ inline void TransData(const LocalTensor<T>& dstTensor, const LocalTensor<T>& srcTensor,
+    const LocalTensor<uint8_t>& sharedTmpBuffer, const TransDataParams<U, S>& params)
+{
+    Internal::TransDataImpl<config, T, U, S>(dstTensor, srcTensor, sharedTmpBuffer, params);
+}
+
+template <const TransDataConfig& config, typename T, typename U, typename S>
+__aicore__ inline void TransData(const LocalTensor<T>& dstTensor, const LocalTensor<T>& srcTensor,
+    const TransDataParams<U, S>& params)
+{
+    // Only for AI Vector Core.
+    if ASCEND_IS_AIC {
+        return;
+    }
+    LocalTensor<uint8_t> tmp;
+    const bool ret = PopStackBuffer<uint8_t, TPosition::LCM>(tmp);
+    ASCENDC_ASSERT((ret), { KERNEL_LOG(KERNEL_ERROR, "PopStackBuffer Error!"); });
+
+    TransData<config, T, U, S>(dstTensor, srcTensor, tmp, params);
+}
+} // namespace AscendC
+#endif
+#endif // LIB_TRANSDATA_TRANSDATA_H
\ No newline at end of file
diff --git a/lib/transdata/transdata_tiling.h b/lib/transdata/transdata_tiling.h
new file mode 100644
index 00000000..87559a51
--- /dev/null
+++ b/lib/transdata/transdata_tiling.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+/*!
+ * \file transdata_tiling.h
+ * \brief
+ */
+#ifndef LIB_TRANSDATA_TRANSDATA_TILING_H
+#define LIB_TRANSDATA_TRANSDATA_TILING_H
+#include <cstdint>
+#include "graph/tensor.h"
+#include "tiling/platform/platform_ascendc.h"
+
+namespace AscendC {
+/*
+ * @brief DataFormat
+*/
+#ifndef ASCC_ENUM_DATAFORMAT
+#define ASCC_ENUM_DATAFORMAT
+enum class DataFormat : uint8_t {
+    ND = 0,
+    NZ,
+    NCHW,
+    NC1HWC0,
+    NHWC,
+    NCDHW,
+    NDC1HWC0,
+    FRACTAL_Z_3D,
+};
+#endif // ASCC_ENUM_DATAFORMAT
+
+#ifndef ASCC_PARAM_TRANSDATACONFIG
+#define ASCC_PARAM_TRANSDATACONFIG
+struct TransDataConfig {
+    DataFormat srcFormat;
+    DataFormat dstFormat;
+};
+#endif // ASCC_PARAM_TRANSDATACONFIG
+
+/*!
+ * \brief This interface is used to obtain the maximum and minimum temporary space reserved or applied.
+ * The developer selects a proper space size based on this range as the tiling parameter.
+ *
+ * \param [in] platform, targeted platform information
+ * \param [in] srcShape, src tensor shape
+ * \param [in] dstShape, src tensor shape
+ * \param [in] dataType, actual data type of the input
+ * \param [in] config, transdata config
+ * \param [out] maxValue, maximum temporary space required
+ * \param [out] minValue, minimum temporary space required
+ * \return whether get the max/min value successfully
+ */
+bool GetTransDataMaxMinTmpSize(const platform_ascendc::PlatformAscendC &platform,
+                               const ge::Shape &srcShape,
+                               const ge::Shape &dstShape,
+                               const ge::DataType dataType,
+                               const TransDataConfig &config,
+                               uint32_t &maxValue, uint32_t &minValue);
+} // AscendC
+#endif // LIB_TRANSDATA_TRANSDATA_TILING_H
\ No newline at end of file
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index e1f9cd37..6e91de48 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -404,7 +404,11 @@ file(GLOB ASCENDC_TILING_SRC_FILES
     ${ASCENDC_API_DIR}/impl/quantization/quant/*.cpp
     ${ASCENDC_API_DIR}/impl/sort/topk/*.cpp
     ${ASCENDC_API_DIR}/impl/reduce/reduce_tiling.cpp
+<<<<<<< HEAD
     ${ASCENDC_API_DIR}/impl/normalization/layernormV2/*.cpp
+=======
+    ${ASCENDC_API_DIR}/impl/transdata/transdata_tiling.cpp
+>>>>>>> 4a0a42bb (update)
 )
 
 # ascendc_tiling_utest
@@ -460,11 +464,15 @@ foreach(product_type ${PRODUCT_TYPE_LIST})
         ${ASCENDC_API_DIR}/lib/reduce/
         ${ASCENDC_API_DIR}/lib/select/
         ${ASCENDC_API_DIR}/lib/transpose/
+<<<<<<< HEAD
         ${ASCENDC_API_DIR}/lib/matmul/
         ${ASCENDC_API_DIR}/lib/math/
         ${ASCENDC_API_DIR}/lib/normalization/
         ${ASCENDC_API_DIR}/lib/quantization/
         ${ASCENDC_API_DIR}/lib/sort/
+=======
+        ${ASCENDC_API_DIR}/lib/transdata/
+>>>>>>> 4a0a42bb (update)
         ${ASCENDC_TESTS_DIR}/common/
     )
 
diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp
index 50d4faa0..cc0a7cec 100644
--- a/tests/tiling/test_tiling.cpp
+++ b/tests/tiling/test_tiling.cpp
@@ -13,6 +13,7 @@
 #define private public
 #define protected public
 #include "lib/activation/softmax_tiling.h"
+#include "lib/transdata/transdata_tiling.h" // temp for upload code
 #include "tiling_api.h"
 #include "platform_stub.h"
 #include "impl/matmul/tiling/math_util.h"
@@ -33,6 +34,1310 @@ protected:
 };
 
 
+<<<<<<< HEAD
+=======
+#if defined(__DAV_C310__) || defined(__DAV_310R6__)
+TEST_F(TestTiling, TestSoftMaxTiling)
+{
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    std::vector<int64_t> shapeDims = { 128, 128 };
+    optiling::SoftMaxTiling tilingData;
+    auto softmaxShape = ge::Shape(shapeDims);
+    uint32_t softmaxTmpSize = 100 * 1024 * 4;
+    uint32_t softmaxNeedMinSize = GetSoftMaxMinTmpSize(ascendcPlatform, softmaxShape, 2, true);
+    EXPECT_EQ(softmaxNeedMinSize, 128 * (16 + 128) * 4);
+    uint32_t softmaxFlashNeedMinSize = GetSoftMaxFlashMinTmpSize(softmaxShape, 2, true, true);
+    EXPECT_EQ(softmaxFlashNeedMinSize, (16 * 4 + 128 * 2) * 4);
+    softmaxFlashNeedMinSize = GetSoftMaxFlashMinTmpSize(softmaxShape, 4, true, true);
+    EXPECT_EQ(softmaxFlashNeedMinSize, (8 * 4 + 128 * 2) * 4);
+    softmaxFlashNeedMinSize = GetSoftMaxFlashMinTmpSize(softmaxShape, 4, false, true);
+    EXPECT_EQ(softmaxFlashNeedMinSize, (8 + 128 + 64) * 4);
+    uint32_t softmaxGradNeedMinSize = GetSoftMaxGradMinTmpSize(softmaxShape, 2, true, true);
+    EXPECT_EQ(softmaxGradNeedMinSize, (16 * 2 + 128 * 3 + 64) * 4);
+    softmaxGradNeedMinSize = GetSoftMaxGradMinTmpSize(softmaxShape, 4, true, true);
+    EXPECT_EQ(softmaxGradNeedMinSize, (8 + 128 + 64) * 4);
+    softmaxGradNeedMinSize = GetSoftMaxGradMinTmpSize(softmaxShape, 0, true, true);
+    EXPECT_EQ(softmaxGradNeedMinSize, 0);
+
+    uint32_t softmaxNeedMaxSize = GetSoftMaxMaxTmpSize(ascendcPlatform, softmaxShape, 2, true);
+    EXPECT_EQ(softmaxNeedMaxSize, 128 * (16 + 128 + 64) * 4);
+    softmaxNeedMaxSize = GetSoftMaxMaxTmpSize(ascendcPlatform, softmaxShape, 1, true);
+    EXPECT_EQ(softmaxNeedMaxSize, 0);
+    uint32_t softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 2, true, true);
+    EXPECT_EQ(softmaxFlashNeedMaxSize, 128 * (16 * 4 + 128 * 2) * 4);
+    softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 4, false, true);
+    EXPECT_EQ(softmaxFlashNeedMaxSize, 128 * (8 + 128 + 64) * 4);
+    softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 4, true, true);
+    EXPECT_EQ(softmaxFlashNeedMaxSize, 128 * (8 * 4 + 128 * 2) * 4);
+    softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 1, true, true);
+    EXPECT_EQ(softmaxFlashNeedMaxSize, 0);
+    uint32_t softmaxGradNeedMaxSize = GetSoftMaxGradMaxTmpSize(softmaxShape, 2, true, true);
+    EXPECT_EQ(softmaxGradNeedMaxSize, 128 * (16 * 2 + 128 * 3 + 64) * 4);
+    softmaxGradNeedMaxSize = GetSoftMaxGradMaxTmpSize(softmaxShape, 4, true, true);
+    EXPECT_EQ(softmaxGradNeedMaxSize, 128 * (8 + 128 + 64) * 4);
+    softmaxGradNeedMaxSize = GetSoftMaxGradMaxTmpSize(softmaxShape, 1, true, true);
+    EXPECT_EQ(softmaxGradNeedMaxSize, 0);
+    SoftMaxTilingFunc(softmaxShape, 2, softmaxTmpSize, tilingData);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+    bool flag = IsBasicBlockInSoftMax(tilingData);
+    EXPECT_EQ(flag, true);
+    SoftMaxFlashTilingFunc(softmaxShape, 2, 77952, tilingData, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 32);
+    SoftMaxFlashTilingFunc(softmaxShape, 2, 77952, tilingData, false);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+    SoftMaxGradTilingFunc(softmaxShape, 2, softmaxTmpSize, tilingData, false);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+    SoftMaxGradTilingFunc(softmaxShape, 4, softmaxTmpSize, tilingData, false);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+    SoftMaxGradTilingFunc(softmaxShape, 2, 133120, tilingData, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+}
+
+TEST_F(TestTiling, TestSoftMaxFlashV2TilingMaxMinTmpSize)
+{
+    uint32_t softmaxflashV2NeedMinLength = 0;
+    uint32_t softmaxflashV2NeedMaxLength = 0;
+
+    std::vector<int64_t> shapeDims = { 3, 3, 448 };
+    auto softmaxShape = ge::Shape(shapeDims);
+    uint32_t dataTypeSize1 = 2;
+    uint32_t dataTypeSize2 = 2;
+    uint32_t isUpdate = 0;
+    uint32_t isBasicBlock = 0;
+    uint32_t isFlashOutputBrc = 1;
+
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 17504);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 19008);
+
+    shapeDims = {7, 1072};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 31296);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 32256);
+
+    shapeDims = {1, 2, 3, 1, 2, 1, 16};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 2240);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 4608);
+
+    shapeDims = {2, 6, 1, 16};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 2240);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 4608);
+
+    shapeDims = {6, 1664};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 41184);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 41856);
+
+    shapeDims = {2, 1760 };
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 15200);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 15200);
+
+    shapeDims = {1, 5536 };
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 23232);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 23232);
+
+    shapeDims = {2, 2, 2352};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 38816);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 38912);
+
+    shapeDims = {2, 2, 2, 480 };
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 0;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 16672);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 17920);
+
+    shapeDims = {2, 3632};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 1;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 29440);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 29824);
+
+    shapeDims = {2, 4, 96};
+    softmaxShape = ge::Shape(shapeDims);
+    dataTypeSize1 = 2;
+    dataTypeSize2 = 2;
+    isUpdate = 1;
+    isBasicBlock = 0;
+    isFlashOutputBrc = 1;
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 3840);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 6144);
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, 1, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 0);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, 1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 0);
+
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, 1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 0);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, 1, isUpdate, isBasicBlock, isFlashOutputBrc);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 0);
+}
+
+TEST_F(TestTiling, TestSoftMaxFlashV2Tiling)
+{
+    std::vector<int64_t> shapeDims = { 128, 128 };
+    optiling::SoftMaxTiling tilingData;
+    auto softmaxShape = ge::Shape(shapeDims);
+    uint32_t maxSumTypeSize = 2;
+    uint32_t inputTypeSize = 2;
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    uint32_t softmaxflashV2NeedMinLength =
+        GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, false);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 128 * (16 + 128) * 4);
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, (128 * 2 + 128 * (128 + 16)) * 4);
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 128 * (128 + 16) * 4);
+
+    uint32_t softmaxflashV2NeedMaxLength =
+        GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, false);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 16) * 4);
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 16 * 2) * 4);
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 16) * 4);
+
+    maxSumTypeSize = 4;
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, (128 * 2 + 128 * (128 + 16 + 8)) * 4);
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, 128 * (128 + 8) * 4);
+
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 8 * 2) * 4);
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 8) * 4);
+
+    uint32_t workLength = 100 * 1024;
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, false, false);
+    EXPECT_EQ(tilingData.get_reduceM(), 120);
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, false, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, false);
+    EXPECT_EQ(tilingData.get_reduceM(), 120);
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+
+    inputTypeSize = 4;
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, (128 * 2 + 128 * (16)) * 4);
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (64 + 8) * 4);
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 64);
+}
+
+TEST_F(TestTiling, TestSoftMaxFlashV2TilingBasicBlock)
+{
+    std::vector<int64_t> shapeDims = { 8, 1024 };
+    optiling::SoftMaxTiling tilingData;
+    auto softmaxShape = ge::Shape(shapeDims);
+    uint32_t maxSumTypeSize = 4;
+    uint32_t inputTypeSize = 4;
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    uint32_t softmaxflashV2NeedMinLength =
+        GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, (64 + 8 * (16)) * 4);
+    uint32_t softmaxflashV2NeedMaxLength =
+        GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 8*(8 + 64) * 4);
+
+    uint32_t workLength = 32 * 1024;
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 8);
+
+    inputTypeSize = 2;
+    workLength = 64 * 1024;
+    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
+    EXPECT_EQ(softmaxflashV2NeedMinLength, (64 + 8 * (16 + 1024 + 8)) * 4);
+    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
+    EXPECT_EQ(softmaxflashV2NeedMaxLength, 8 * (8 + 1024 + 64) * 4);
+    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
+    EXPECT_EQ(tilingData.get_reduceM(), 8);
+}
+
+TEST_F(TestTiling, TestWelfordUpdateTiling)
+{
+    std::vector<int64_t> shapeDims1d = {1, 128};
+    auto shape1d = ge::Shape(shapeDims1d);
+    uint32_t maxSize = 0;
+    uint32_t minSize = 0;
+    uint32_t dtypeTSize = sizeof(half);
+    uint32_t dtypeUSize = sizeof(float);
+    bool isReuseSource = false;
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    GetWelfordUpdateMaxMinTmpSize(shape1d, dtypeTSize, dtypeUSize, isReuseSource, false, ascendcPlatform, maxSize, minSize);
+    EXPECT_EQ(minSize, 0);
+    EXPECT_EQ(maxSize, 0);
+}
+
+TEST_F(TestTiling, TestWelfordFinalizeTiling)
+{
+    std::vector<int64_t> shapeDims1d = {64};
+    auto shape1d = ge::Shape(shapeDims1d);
+    uint32_t maxSize = 0;
+    uint32_t minSize = 0;
+    uint32_t dtypeSize = sizeof(float);
+    bool isReuseSource = false;
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    GetWelfordFinalizeMaxMinTmpSize(shape1d, dtypeSize, isReuseSource, ascendcPlatform, maxSize, minSize);
+    EXPECT_EQ(minSize, 768);
+    EXPECT_EQ(maxSize, 768);
+}
+
+TEST_F(TestTiling, TestLayerNormRstdTiling)
+{
+    const uint32_t stackBufferSize = 100 * 1024;
+    const uint32_t typeSize = sizeof(float);
+    std::vector<int64_t> shapeDims = {128, 88};
+    auto layernormShape = ge::Shape(shapeDims);
+    bool isReuseSource = false;
+    bool isComputeRstd = true;
+    bool isOnlyOutput = false;
+    optiling::LayerNormSeparateTiling tiling;
+    gert::TilingContext* context = fe::GetFakeTilingContext();
+    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
+    uint32_t minValue = 0;
+    uint32_t maxValue = 0;
+    GetLayerNormMaxMinTmpSize(layernormShape, typeSize, isReuseSource, isComputeRstd, isOnlyOutput, ascendcPlatform, maxValue, minValue);
+    EXPECT_EQ(maxValue, 128 * typeSize + 128 * typeSize);
+    EXPECT_EQ(minValue, 128 * typeSize + 128 * typeSize);
+    GetNormalizeMaxMinTmpSize(layernormShape, typeSize, typeSize, isReuseSource, isComputeRstd, isOnlyOutput, ascendcPlatform, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+    GetLayerNormNDTilingInfo(layernormShape, stackBufferSize, typeSize, isReuseSource, isComputeRstd, ascendcPlatform, tiling);
+    EXPECT_EQ(tiling.get_rLength(), 88);
+    EXPECT_EQ(tiling.get_rHeadLength(), 64); 
+}
+
+TEST_F(TestTiling, TestAntiquantTilingNoTransposeFP4)
+{
+    std::vector<int64_t> srcDims = { 640, 5120 };
+    auto srcShape = ge::Shape(srcDims);
+    std::vector<int64_t> offsetDSms = { 1, 5120 };
+    auto offsetShape = ge::Shape(offsetDSms);
+    bool isTranspose = false;
+    uint32_t maxValue;
+    uint32_t minValue;
+    GetAscendAntiQuantMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_FLOAT4_E2M1, ge::DT_FLOAT16, maxValue, minValue);
+    EXPECT_EQ(minValue, 0);
+    EXPECT_EQ(maxValue, 0);
+}
+
+TEST_F(TestTiling, TestAntiquantTilingTransposeFP4)
+{
+    std::vector<int64_t> srcDims = { 640, 5120 };
+    auto srcShape = ge::Shape(srcDims);
+    std::vector<int64_t> offsetDSms = { 1, 5120 };
+    auto offsetShape = ge::Shape(offsetDSms);
+    bool isTranspose = true;
+    uint32_t maxValue;
+    uint32_t minValue;
+    GetAscendAntiQuantMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_FLOAT4_E2M1, ge::DT_FLOAT16, maxValue, minValue);
+    EXPECT_EQ(minValue, 10240);
+    EXPECT_EQ(maxValue, 10240);
+}
+
+TEST_F(TestTiling, TestAntiquantizeTilingNoTransposeFP4)
+{
+    std::vector<int64_t> srcDims = { 640, 5120 };
+    auto srcShape = ge::Shape(srcDims);
+    std::vector<int64_t> offsetDSms = { 1, 5120 };
+    auto offsetShape = ge::Shape(offsetDSms);
+    bool isTranspose = false;
+    uint32_t maxValue;
+    uint32_t minValue;
+    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_FLOAT4_E2M1, ge::DT_FLOAT16, maxValue, minValue);
+    EXPECT_EQ(minValue, 0);
+    EXPECT_EQ(maxValue, 0);
+}
+
+TEST_F(TestTiling, TestDequantizeTiling)
+{
+    // 2d input shape
+    std::vector<int64_t> shape_dims = {10, 32};
+    auto shape = ge::Shape(shape_dims);
+    uint32_t maxValue;
+    uint32_t minValue;
+
+    GetDequantizeMaxMinTmpSize(shape, 2, maxValue, minValue);
+    EXPECT_EQ(minValue, 4 * (64 + 32 + 40));
+    EXPECT_EQ(maxValue, 4 * (64 + 32 * 10 + 40));
+
+    // 1d input shape
+    std::vector<int64_t> shape_dims_1d = {320};
+    auto shape_1d = ge::Shape(shape_dims_1d);
+
+    GetDequantizeMaxMinTmpSize(shape_1d, 2, maxValue, minValue);
+    EXPECT_EQ(minValue, 4 * (64 + 1 * 320 + 328));
+    EXPECT_EQ(maxValue, 4 * (64 + 1 * 320 + 328));
+}
+
+TEST_F(TestTiling, TestAntiquantizeTilingNoTransposePerChannelHalf)
+{
+    std::vector<int64_t> srcDims = { 640, 5120 };
+    auto srcShape = ge::Shape(srcDims);
+    std::vector<int64_t> offsetDSms = { 1, 5120 };
+    auto offsetShape = ge::Shape(offsetDSms);
+    bool isTranspose = false;
+    uint32_t maxValue;
+    uint32_t minValue;
+    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_INT8, ge::DT_FLOAT16, maxValue, minValue);
+    EXPECT_EQ(minValue, 0);
+    EXPECT_EQ(maxValue, 0);
+}
+
+TEST_F(TestTiling, TestAntiquantizeTilingNoTransposePerChannel)
+{
+    std::vector<int64_t> srcDims = { 640, 5120 };
+    auto srcShape = ge::Shape(srcDims);
+    std::vector<int64_t> offsetDSms = { 1, 5120 };
+    auto offsetShape = ge::Shape(offsetDSms);
+    bool isTranspose = false;
+    uint32_t maxValue;
+    uint32_t minValue;
+    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_INT8, ge::DT_BF16, maxValue, minValue);
+    uint32_t expectValue = 5120 * 2 * sizeof(float) + 64 * 640 * sizeof(float);
+    EXPECT_EQ(minValue, expectValue);
+    EXPECT_EQ(maxValue, expectValue);
+}
+
+TEST_F(TestTiling, TestAntiquantizeTilingNoTransposePerTensor)
+{
+    std::vector<int64_t> srcDims = { 640, 5120 };
+    auto srcShape = ge::Shape(srcDims);
+    std::vector<int64_t> offsetDSms = { 1 };
+    auto offsetShape = ge::Shape(offsetDSms);
+    bool isTranspose = false;
+    uint32_t maxValue;
+    uint32_t minValue;
+    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_INT8, ge::DT_BF16, maxValue, minValue);
+    EXPECT_EQ(minValue, 1024);
+    EXPECT_EQ(maxValue, 640 * 5120 * sizeof(float));
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutput)
+{
+    std::vector<int64_t> shapeDims = { 32, 32 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_INT16;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = false;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 9728);
+    EXPECT_EQ(minValue, 9728);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutputB8)
+{
+    std::vector<int64_t> shapeDims = { 32, 32 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_UINT8;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = false;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 7680);
+    EXPECT_EQ(minValue, 7680);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutputB64)
+{
+    std::vector<int64_t> shapeDims = { 32, 32 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_INT64;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = false;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 15872);
+    EXPECT_EQ(minValue, 15872);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingDescendOrder)
+{
+    std::vector<int64_t> shapeDims = { 1023 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_UINT32;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = true;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = false;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 11776);
+    EXPECT_EQ(minValue, 11776);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingWithExtraDstIndex)
+{
+    std::vector<int64_t> shapeDims = { 300 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_FLOAT;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = true;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 5312);
+    EXPECT_EQ(minValue, 5312);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingWithExtraDstIndexForB8)
+{
+    std::vector<int64_t> shapeDims = { 300 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_UINT8;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = true;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 2112);
+    EXPECT_EQ(minValue, 2112);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingWithBothSrcDstIndex)
+{
+    std::vector<int64_t> shapeDims = { 4096 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_UINT16;
+    ge::DataType indexType = ge::DT_UINT64;
+    bool isDescend = false;
+    bool hasSrcIndex = true;
+    bool hasDstIndex = true;
+    bool isReuseSource = false;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 70144);
+    EXPECT_EQ(minValue, 70144);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutputReuseSource)
+{
+    std::vector<int64_t> shapeDims = { 32, 32 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_INT16;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = false;
+    bool isReuseSource = true;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 7680);
+    EXPECT_EQ(minValue, 7680);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingDescendOrderReuseSource)
+{
+    std::vector<int64_t> shapeDims = { 1023 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_UINT32;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = true;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = false;
+    bool isReuseSource = true;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 7680);
+    EXPECT_EQ(minValue, 7680);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingWithExtraDstIndexReuseSource)
+{
+    std::vector<int64_t> shapeDims = { 32, 32 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_INT32;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = false;
+    bool hasDstIndex = true;
+    bool isReuseSource = true;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 11776);
+    EXPECT_EQ(minValue, 11776);
+}
+
+TEST_F(TestTiling, testAdvanceSortTilingWithBothSrcDstIndexReuseSource)
+{
+    std::vector<int64_t> shapeDims = { 32, 32 };
+    auto srcShape = ge::Shape(shapeDims);
+    ge::DataType valueType = ge::DT_INT16;
+    ge::DataType indexType = ge::DT_UINT32;
+    bool isDescend = false;
+    bool hasSrcIndex = true;
+    bool hasDstIndex = true;
+    bool isReuseSource = true;
+    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
+
+    EXPECT_EQ(maxValue, 7680);
+    EXPECT_EQ(minValue, 7680);
+}
+
+extern void platfrom_stub_set_chip_version(const char *num);
+TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexTrue_Float_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    bool isInitIndex = true;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 4;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 256);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_innerDataSize(), 128);
+    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
+    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
+    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
+    EXPECT_EQ(tilingData.get_maskOffset(), 16);
+    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
+    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1024);
+    EXPECT_EQ(minValue, 1024);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexFalse_Float_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 4;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 320);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_innerDataSize(), 128);
+    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
+    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
+    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
+    EXPECT_EQ(tilingData.get_maskOffset(), 16);
+    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
+    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
+    EXPECT_EQ(tilingData.get_srcIndexOffset(), 256);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1280);
+    EXPECT_EQ(minValue, 1280);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexTrue_Half_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    bool isInitIndex = true;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 2;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 512);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_innerDataSize(), 256);
+    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
+    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
+    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
+    EXPECT_EQ(tilingData.get_maskOffset(), 16);
+    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
+    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1024);
+    EXPECT_EQ(minValue, 1024);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexFalse_Half_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 2;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 640);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_innerDataSize(), 256);
+    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
+    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
+    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
+    EXPECT_EQ(tilingData.get_maskOffset(), 16);
+    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
+    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
+    EXPECT_EQ(tilingData.get_srcIndexOffset(), 512);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1280);
+    EXPECT_EQ(minValue, 1280);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexTrue_Float_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = true;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 4;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, false, tilingData);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 128);
+    EXPECT_EQ(tilingData.get_maskOffset(), 10);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 512);
+    EXPECT_EQ(minValue, 512);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Float_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 4;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_maskOffset(), 10);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 192);
+
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 768);
+    EXPECT_EQ(minValue, 768);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexTrue_Half_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = true;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 2;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 256);
+    EXPECT_EQ(tilingData.get_maskOffset(), 10);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 512);
+    EXPECT_EQ(minValue, 512);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Half_Inner64)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 2;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, false, tilingData);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_maskOffset(), 10);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 384);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 768);
+    EXPECT_EQ(minValue, 768);
+}
+
+TEST_F(TestTiling, TestTopkTiling_DataTypeSize0_FAILED)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 0;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    auto res = TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, false, tilingData);
+    EXPECT_EQ(res, false);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Half_k)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    int32_t k = 13;
+    uint32_t dataTypeSize = 2;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_maskOffset(), 13);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 384);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 768);
+    EXPECT_EQ(minValue, 768);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Float_k32)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 32;
+    uint32_t dataTypeSize = 4;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
+    EXPECT_EQ(tilingData.get_allDataSize(), 64);
+    EXPECT_EQ(tilingData.get_maskOffset(), 32);
+    EXPECT_EQ(tilingData.get_tmpLocalSize(), 192);
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 768);
+    EXPECT_EQ(minValue, 768);
+}
+
+TEST_F(TestTiling, TestTopkTiling_RadixTopKModeSmall_isInitIndexFalse)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
+    const int32_t outter = 1;
+    const int32_t inner = 32;
+    const int32_t k = 10;
+    ge::DataType valueType = ge::DT_INT16;
+    bool isReuseSource = false;
+    bool isInitIndex = false;
+    TopKConfig config = { TopKAlgo::RADIX_SELECT, TopKOrder::UNSET, true};
+
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    GetTopKMaxMinTmpSize(inner, outter, k, isReuseSource, isInitIndex, topkMode,
+        true, valueType, config, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1696);
+    EXPECT_EQ(minValue, 1696);
+}
+
+TEST_F(TestTiling, TestTopkTiling_RadixTopKModeNormal_isInitIndexFalse)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    const int32_t outter = 1;
+    const int32_t inner = 32;
+    const int32_t k = 10;
+    ge::DataType valueType = ge::DT_INT16;
+    bool isReuseSource = false;
+    bool isInitIndex = false;
+    TopKConfig config = { TopKAlgo::RADIX_SELECT, TopKOrder::UNSET, true};
+
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    GetTopKMaxMinTmpSize(inner, outter, k, isReuseSource, isInitIndex, topkMode,
+        true, valueType, config, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1696);
+    EXPECT_EQ(minValue, 1696);
+}
+
+TEST_F(TestTiling, TestTopkTiling_RadixTopKModeNormal_isInitIndexTrue)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    const int32_t outter = 1;
+    const int32_t inner = 32;
+    const int32_t k = 10;
+    ge::DataType valueType = ge::DT_INT16;
+    bool isReuseSource = true;
+    bool isInitIndex = true;
+    TopKConfig config = { TopKAlgo::RADIX_SELECT, TopKOrder::UNSET, true};
+
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    GetTopKMaxMinTmpSize(inner, outter, k, isReuseSource, isInitIndex, topkMode,
+        true, valueType, config, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1504);
+    EXPECT_EQ(minValue, 1504);
+}
+
+TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexFalse)
+{
+    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
+    bool isInitIndex = false;
+    const int32_t outter = 1;
+    const int32_t inner = 64;
+    const int32_t k = 10;
+    uint32_t dataTypeSize = 4;
+    bool isReuseSource = true;
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    optiling::TopkTiling tilingData;
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+
+    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
+    EXPECT_EQ(maxValue, 1280);
+    EXPECT_EQ(minValue, 1280);
+}
+
+TEST_F(TestTiling, TestPowerTiling)
+{
+    std::vector<int64_t> shapeDims = { 1, 512 };
+    auto powerShape = ge::Shape(shapeDims);
+    uint32_t maxVal;
+    uint32_t minVal;
+
+    GetPowerMaxMinTmpSize(powerShape, powerShape, false, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    GetPowerMaxMinTmpSize(powerShape, powerShape, true, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 0);
+    EXPECT_EQ(minVal, 0);
+    GetPowerMaxMinTmpSize(powerShape, powerShape, false, 2, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    EXPECT_EQ(minVal, 512 * 4 * 2);
+
+    std::vector<int64_t> scalar_shape = { 1 };
+    auto scalarShape = ge::Shape(scalar_shape);
+    GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 2, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    GetPowerMaxMinTmpSize(powerShape, scalarShape, true, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 0);
+    EXPECT_EQ(minVal, 0);
+    GetPowerMaxMinTmpSize(scalarShape, powerShape, false, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    EXPECT_EQ(minVal, 512 * 4 * 2);
+}
+
+TEST_F(TestTiling, TestPowerTilingFactorSize)
+{
+    uint32_t maxLiveNodeCnt = 0xffff;
+    uint32_t extraBuf = 0xffff;
+
+    GetPowerTmpBufferFactorSize(false, true, false, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 2);
+    EXPECT_EQ(extraBuf, 0);
+
+    GetPowerTmpBufferFactorSize(false, true, false, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 4);
+    EXPECT_EQ(extraBuf, 0);
+
+    GetPowerTmpBufferFactorSize(false, true, true, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+}
+
+TEST_F(TestTiling, TestPowerTilingWithConfig)
+{
+    std::vector<int64_t> shapeDims = { 1, 512 };
+    auto powerShape = ge::Shape(shapeDims);
+    uint32_t maxVal;
+    uint32_t minVal;
+
+    AscendC::PowerConfig intrinsicConfig = { AscendC::PowerAlgo::INTRINSIC };
+    AscendC::PowerConfig doubleFloatTechConfig = { AscendC::PowerAlgo::DOUBLE_FLOAT_TECH };
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+    GetPowerMaxMinTmpSize(plat, intrinsicConfig, powerShape, powerShape, false, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 0);
+    EXPECT_EQ(maxVal, 0);
+    GetPowerMaxMinTmpSize(plat, intrinsicConfig, powerShape, powerShape, true, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 0);
+    EXPECT_EQ(minVal, 0);
+
+    std::vector<int64_t> scalar_shape = { 1 };
+    auto scalarShape = ge::Shape(scalar_shape);
+    GetPowerMaxMinTmpSize(plat, doubleFloatTechConfig, powerShape, scalarShape, false, 2, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    GetPowerMaxMinTmpSize(plat, doubleFloatTechConfig, powerShape, scalarShape, true, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 0);
+    EXPECT_EQ(minVal, 0);
+    GetPowerMaxMinTmpSize(plat, doubleFloatTechConfig, scalarShape, powerShape, false, 4, false, maxVal, minVal);
+    EXPECT_EQ(maxVal, 512 * 4 * 2);
+    EXPECT_EQ(minVal, 512 * 4 * 2);
+}
+
+TEST_F(TestTiling, TestPowerTilingFactorSizeWithConfig)
+{
+    uint32_t maxLiveNodeCnt = 0xffff;
+    uint32_t extraBuf = 0xffff;
+
+    AscendC::PowerConfig intrinsicConfig = { AscendC::PowerAlgo::INTRINSIC };
+    AscendC::PowerConfig doubleFloatTechConfig = { AscendC::PowerAlgo::DOUBLE_FLOAT_TECH };
+    fe::PlatFormInfos platformInfo;
+    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
+
+    GetPowerTmpBufferFactorSize(plat, intrinsicConfig, false, true, false, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+
+    GetPowerTmpBufferFactorSize(plat, intrinsicConfig, false, true, false, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+
+    GetPowerTmpBufferFactorSize(plat, doubleFloatTechConfig, false, true, false, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 2);
+    EXPECT_EQ(extraBuf, 0);
+
+    GetPowerTmpBufferFactorSize(plat, doubleFloatTechConfig, false, true, false, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 4);
+    EXPECT_EQ(extraBuf, 0);
+
+    GetPowerTmpBufferFactorSize(plat, doubleFloatTechConfig, false, true, true, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 4);
+    EXPECT_EQ(extraBuf, 0);
+}
+
+TEST_F(TestTiling, TestCosTilingFloatWithConfig)
+{
+    std::vector<int64_t> shapeDims = { 128, 128 };
+    auto cosShape = ge::Shape(shapeDims);
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+
+    AscendC::CosConfig polyConfig = { AscendC::CosAlgo::POLYNOMIAL_APPROXIMATION };
+    AscendC::CosConfig radinConfig = { AscendC::CosAlgo::RADIAN_REDUCTION };
+
+    AscendC::GetCosMaxMinTmpSize(polyConfig, cosShape, 4, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+    AscendC::GetCosMaxMinTmpSize(radinConfig, cosShape, 4, true, maxValue, minValue);
+    EXPECT_EQ(maxValue, 128 * 128 * 2 * 4 + 32);
+
+    uint32_t maxLiveNodeCnt = 0;
+    uint32_t extraBuf = 0;
+    GetCosTmpBufferFactorSize(polyConfig, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+    GetCosTmpBufferFactorSize(radinConfig, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 2);
+    EXPECT_EQ(extraBuf, 32);
+}
+
+TEST_F(TestTiling, TestCosTilingHalfWithConfig)
+{
+    std::vector<int64_t> shapeDims = { 512 };
+    auto cosShape = ge::Shape(shapeDims);
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+
+    AscendC::CosConfig polyConfig = { AscendC::CosAlgo::POLYNOMIAL_APPROXIMATION };
+    AscendC::CosConfig radinConfig = { AscendC::CosAlgo::RADIAN_REDUCTION };
+
+    AscendC::GetCosMaxMinTmpSize(polyConfig, cosShape, 2, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+
+    AscendC::GetCosMaxMinTmpSize(radinConfig, cosShape, 2, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 512 * 2 * 4 + 32);
+    EXPECT_EQ(minValue, 512 * 2 * 4 + 32);
+
+    uint32_t maxLiveNodeCnt = 0;
+    uint32_t extraBuf = 0;
+    GetCosTmpBufferFactorSize(polyConfig, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+    GetCosTmpBufferFactorSize(radinConfig, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 4);
+    EXPECT_EQ(extraBuf, 0);
+}
+
+TEST_F(TestTiling, TestHypotTilingHalf)
+{
+    std::vector<int64_t> shapeDims = { 128, 128 };
+    auto atanShape = ge::Shape(shapeDims);
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+    GetHypotMaxMinTmpSize(atanShape, 2, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+    uint32_t maxLiveNodeCnt = 0;
+    uint32_t extraBuf = 0;
+    GetHypotTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+}
+
+TEST_F(TestTiling, TestSinTilingFloatWithConfig)
+{
+    std::vector<int64_t> shapeDims = { 128, 128 };
+    auto sinShape = ge::Shape(shapeDims);
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+
+    AscendC::SinConfig polyConfig = { AscendC::SinAlgo::POLYNOMIAL_APPROXIMATION };
+    AscendC::SinConfig radinConfig = { AscendC::SinAlgo::RADIAN_REDUCTION };
+
+    AscendC::GetSinMaxMinTmpSize(polyConfig, sinShape, 4, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+    AscendC::GetSinMaxMinTmpSize(radinConfig, sinShape, 4, true, maxValue, minValue);
+    EXPECT_EQ(maxValue, 128 * 128 * 2 * 4 + 32);
+
+    uint32_t maxLiveNodeCnt = 0;
+    uint32_t extraBuf = 0;
+    GetSinTmpBufferFactorSize(polyConfig, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+    GetSinTmpBufferFactorSize(radinConfig, 4, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 2);
+    EXPECT_EQ(extraBuf, 32);
+}
+
+TEST_F(TestTiling, TestSinTilingHalfWithConfig)
+{
+    std::vector<int64_t> shapeDims = { 512 };
+    auto sinShape = ge::Shape(shapeDims);
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+
+    AscendC::SinConfig polyConfig = { AscendC::SinAlgo::POLYNOMIAL_APPROXIMATION };
+    AscendC::SinConfig radinConfig = { AscendC::SinAlgo::RADIAN_REDUCTION };
+
+    AscendC::GetSinMaxMinTmpSize(polyConfig, sinShape, 2, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+
+    AscendC::GetSinMaxMinTmpSize(radinConfig, sinShape, 2, false, maxValue, minValue);
+    EXPECT_EQ(maxValue, 512 * 2 * 4 + 32);
+    EXPECT_EQ(minValue, 512 * 2 * 4 + 32);
+
+    uint32_t maxLiveNodeCnt = 0;
+    uint32_t extraBuf = 0;
+    GetSinTmpBufferFactorSize(polyConfig, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 0);
+    EXPECT_EQ(extraBuf, 0);
+    GetSinTmpBufferFactorSize(radinConfig, 2, maxLiveNodeCnt, extraBuf);
+    EXPECT_EQ(maxLiveNodeCnt, 4);
+    EXPECT_EQ(extraBuf, 0);
+}
+
+TEST_F(TestTiling, TestConfusionTransposeTiling)
+{
+    const uint32_t stackBufferSize = 0;
+    const uint32_t typeSize = 4;
+
+    std::vector<int64_t> shapeDims = { 32, 64, 128 };
+    auto srcShape = ge::Shape(shapeDims);
+    uint32_t maxValue = 0;
+    uint32_t minValue = 0;
+
+    AscendC::GetConfusionTransposeMaxMinTmpSize(srcShape, typeSize, 13, maxValue, minValue);
+    AscendC::GetConfusionTransposeMaxMinTmpSize(srcShape, typeSize, 14, maxValue, minValue);
+    AscendC::GetConfusionTransposeMaxMinTmpSize(srcShape, typeSize, 15, maxValue, minValue);
+    EXPECT_EQ(maxValue, 0);
+    EXPECT_EQ(minValue, 0);
+
+    optiling::ConfusionTransposeTiling tiling;
+    AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 13, tiling);
+    AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 14, tiling);
+    AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 15, tiling);
+}
+
+#else
+extern void platfrom_stub_set_chip_version(const char *num);
+>>>>>>> 4a0a42bb (update)
 TEST_F(TestTiling, MultiCoreSmallMN)
 {
     matmul_tiling::MultiCoreMatmulTiling rnnMatmul3,rnnMatmul4,rnnMatmul5;
@@ -5400,6 +6705,70 @@ TEST_F(TestTiling, TestOneElementBroadCast200)
 }
 #endif
 
+TEST_F(TestTiling, testTransDataTilingUnalignedHw)
+{
+    platfrom_stub_set_chip_version("Ascend910B");
+    uint32_t maxSize;
+    uint32_t minSize;
+    auto ncdhwShape = ge::Shape({ 16, 16, 3, 3, 3 });
+    auto ndc1hwc0Shape = ge::Shape({ 16, 3, 1, 3, 3, 16});
+    auto fractalzShape = ge::Shape({ 3, 1, 3, 3, 1, 16, 16});
+    fe::PlatFormInfos platform_info;
+    auto plat = platform_ascendc::PlatformAscendC(&platform_info);
+    TransDataConfig config = {DataFormat::NCDHW, DataFormat::NDC1HWC0};
+    bool ret = GetTransDataMaxMinTmpSize(plat, ncdhwShape, ndc1hwc0Shape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, 1632);
+    EXPECT_EQ(minSize, 1632);
+
+    config = {DataFormat::NDC1HWC0, DataFormat::NCDHW};
+    ret = GetTransDataMaxMinTmpSize(plat, ndc1hwc0Shape, ncdhwShape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, 2048);
+    EXPECT_EQ(minSize, 2048);
+
+    config = {DataFormat::NCDHW, DataFormat::FRACTAL_Z_3D};
+    ret = GetTransDataMaxMinTmpSize(plat, ncdhwShape, fractalzShape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, 26112);
+    EXPECT_EQ(minSize, 26112);
+}
+
+TEST_F(TestTiling, testTransDataTilingAlignedHw)
+{
+    platfrom_stub_set_chip_version("Ascend910B");
+    uint32_t maxSize;
+    uint32_t minSize;
+    auto ncdhwShape = ge::Shape({ 5, 30, 2, 4, 8 });
+    auto ndc1hwc0Shape = ge::Shape({ 5, 2, 2, 4, 8, 16});
+    auto fractalzShape = ge::Shape({ 2, 2, 4, 8, 1, 16, 16});
+    fe::PlatFormInfos platform_info;
+    auto plat = platform_ascendc::PlatformAscendC(&platform_info);
+    TransDataConfig config = {DataFormat::NCDHW, DataFormat::NDC1HWC0};
+    bool ret = GetTransDataMaxMinTmpSize(plat, ncdhwShape, ndc1hwc0Shape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, 4224);
+    EXPECT_EQ(minSize, 4224);
+
+    config = {DataFormat::NDC1HWC0, DataFormat::NCDHW};
+    ret = GetTransDataMaxMinTmpSize(plat, ndc1hwc0Shape, ncdhwShape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, 4608);
+    EXPECT_EQ(minSize, 4608);
+
+    config = {DataFormat::NCDHW, DataFormat::FRACTAL_Z_3D};
+    ret = GetTransDataMaxMinTmpSize(plat, ncdhwShape, fractalzShape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, 69376);
+    EXPECT_EQ(minSize, 69376);
+}
+
 TEST_F(TestTiling, TestReduceXorSumTilingInt16)
 {
     std::vector<int64_t> shapeDims = { 128, 128 };
-- 
Gitee


From 28a54fa25ada7088ad91c44eb58675df6cd20aff Mon Sep 17 00:00:00 2001
From: chen-yiyuan <chenyiyuan5@huawei.com>
Date: Tue, 20 May 2025 11:17:54 +0800
Subject: [PATCH 3/5] add transdata high api fractal_z_3d to ncdhw

---
 impl/transdata/transdata_common_impl.h      |  29 +++
 impl/transdata/transdata_impl.h             | 231 +++++++++++++++--
 impl/transdata/transdata_tiling.cpp         |  90 +++++--
 lib/tiling_api.h                            |  90 +++++++
 tests/CMakeLists.txt                        |  88 +++++++
 tests/tiling/test_tiling.cpp                |  49 +++-
 tests/transdata/test_operator_transdata.cpp | 267 ++++++++++++++++++++
 tiling/tiling_api.h                         |  90 +++++++
 8 files changed, 882 insertions(+), 52 deletions(-)
 create mode 100644 impl/transdata/transdata_common_impl.h
 create mode 100644 lib/tiling_api.h
 create mode 100644 tests/transdata/test_operator_transdata.cpp
 create mode 100644 tiling/tiling_api.h

diff --git a/impl/transdata/transdata_common_impl.h b/impl/transdata/transdata_common_impl.h
new file mode 100644
index 00000000..b1289dc7
--- /dev/null
+++ b/impl/transdata/transdata_common_impl.h
@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#ifndef IMPL_TRANSDATA_TRANSDATA_COMMON_IMPL_H
+#define IMPL_TRANSDATA_TRANSDATA_COMMON_IMPL_H
+
+namespace AscendC {
+template <typename T, typename U>
+struct TransDataParams {
+    T srcLayout;
+    U dstLayout;
+};
+
+#ifndef ASCC_PARAM_TRANSDATACONFIG
+#define ASCC_PARAM_TRANSDATACONFIG
+struct TransDataConfig {
+    DataFormat srcFormat;
+    DataFormat dstFormat;
+};
+#endif // ASCC_PARAM_TRANSDATACONFIG
+} // namespace AscendC
+
+#endif // IMPL_TRANSDATA_TRANSDATA_COMMON_IMPL_H
\ No newline at end of file
diff --git a/impl/transdata/transdata_impl.h b/impl/transdata/transdata_impl.h
index 37966c8a..33571fab 100644
--- a/impl/transdata/transdata_impl.h
+++ b/impl/transdata/transdata_impl.h
@@ -13,34 +13,209 @@
 #include "kernel_tensor.h"
 #include "kernel_operator_intf.h"
 #include "kernel_tiling/kernel_tiling.h"
+#include "transdata_common_impl.h"
+#include "../common/check.h"
+#include "../api_check/kernel_api_check.h"
 
 namespace AscendC {
+namespace Internal {
 
-template <typename T, typename U>
-struct TransDataParams {
-    T srcLayout;
-    U dstLayout;
-};
-
-#ifndef ASCC_PARAM_TRANSDATACONFIG
-#define ASCC_PARAM_TRANSDATACONFIG
-struct TransDataConfig {
-    DataFormat srcFormat;
-    DataFormat dstFormat;
-};
-#endif // ASCC_PARAM_TRANSDATACONFIG
+namespace {
+constexpr int32_t n0 = 16;
+constexpr int32_t c0 = 16;
+constexpr int32_t hw0 = 16;
+}
 
-namespace Internal {
 struct TransDataTmpParams {
     int32_t n;
     int32_t c;
     int32_t d;
     int32_t h;
     int32_t w;
+    int32_t n1;
+    int32_t c1;
+    int32_t padHw;
 };
 
 constexpr int32_t DEFAULT_TRANSDATA_5HD_LIST = 16;
 
+template <typename T>
+__aicore__ inline void DC1Hwn1n0c0ToC1DHwn1n0c0HWAlign(const LocalTensor<T>& dst, const LocalTensor<T>& src,
+    const TransDataTmpParams& params)
+{
+    // d, c1, h w n1 n0 c0 -> c1, d, hw1*hw0 n1 n0 c0
+    int32_t d = params.d;
+    int32_t h = params.h;
+    int32_t w = params.w;
+    int32_t n1 = params.n1;
+    int32_t c1 = params.c1;
+    int32_t padHw = params.padHw;
+
+    uint32_t dim0 = d;
+    uint32_t dim1 = c1;
+    uint32_t lastDim = h * w * n1 * n0 * c0;
+
+    // dim0, dim1, lastDim -> dim1, dim0, lastDim
+    int32_t n1n0c0DimElems = n1 * n0 * c0;
+    int32_t hwAlignElems = padHw * n1n0c0DimElems;
+    int32_t hwPadElems = (padHw - h * w) * n1n0c0DimElems;
+
+    uint16_t blockCount = dim1;
+    uint16_t blockLen = lastDim * sizeof(T) / ONE_BLK_SIZE;
+    uint16_t srcGap = 0;
+    uint16_t dstGap = ((dim0 - 1) * hwAlignElems + hwPadElems) * sizeof(T) / ONE_BLK_SIZE;
+
+    uint32_t dstSize = c1 * d * padHw * n1 * n0 * c0;
+    Duplicate<T>(dst, static_cast<T>(0), dstSize);
+    PipeBarrier<PIPE_V>();
+
+    DataCopyParams dataCopyParams = { blockCount, blockLen, srcGap, dstGap };
+    for (uint32_t d0 = 0; d0 < dim0; d0++) {
+        DataCopy(dst[d0 * hwAlignElems], src[d0 * dim1 * lastDim], dataCopyParams);
+    }
+    PipeBarrier<PIPE_V>();
+}
+
+template <typename T>
+__aicore__ inline void C1Dhwn1n0c0ToC1C0Dhwn1n0(const LocalTensor<T>& dst, const LocalTensor<T>& src,
+    const TransDataTmpParams& params)
+{
+    // C1 DHWN1N0 C0 -> C1 C0 DHWN1N0
+    int32_t d = params.d;
+    int32_t n1 = params.n1;
+    int32_t c1 = params.c1;
+    int32_t padHw = params.padHw;
+
+    TransDataTo5HDParams transDataParams;
+    transDataParams.dstHighHalf = false;
+    transDataParams.srcHighHalf = false;
+    transDataParams.repeatTimes = d * padHw * n1;
+    if (transDataParams.repeatTimes == 1) {
+        transDataParams.srcRepStride = 0;
+        transDataParams.dstRepStride = 0;
+    } else {
+        transDataParams.srcRepStride = DEFAULT_TRANSDATA_5HD_LIST * c0 * sizeof(T) / ONE_BLK_SIZE;
+        transDataParams.dstRepStride = n0 * sizeof(T) / ONE_BLK_SIZE;
+    }
+
+    uint64_t srcOffsetArr[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t dstOffsetArr[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t srcAddr = (uint64_t)src.GetPhyAddr();
+    uint64_t dstAddr = (uint64_t)dst.GetPhyAddr();
+    for (uint32_t j = 0; j < c1; j++) {
+        uint32_t outOffset = j * d * padHw * n1 * n0 * c0;
+        for (uint8_t i = 0; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+            srcOffsetArr[i] = (uint64_t)(srcAddr + (outOffset + i * n0) * sizeof(T));
+            dstOffsetArr[i] = (uint64_t)(dstAddr + (outOffset + i * d * padHw * n1 * n0) * sizeof(T));
+        }
+        TransDataTo5HD<T>(dstOffsetArr, srcOffsetArr, transDataParams);
+    }
+    PipeBarrier<PIPE_V>();
+}
+
+template <typename T>
+__aicore__ inline void C1c0dhwN1n0ToNcdhw(const LocalTensor<T>& dst, const LocalTensor<T>& src,
+    const LocalTensor<T>& tmp, const TransDataTmpParams& params)
+{
+    // C1C0DHW N1N0 -> N CDHW
+    int32_t d = params.d;
+    int32_t n1 = params.n1;
+    int32_t padHw = params.padHw;
+    int32_t currN = params.n;
+    int32_t c = params.c;
+
+    TransDataTo5HDParams transDataParams;
+    transDataParams.dstHighHalf = false;
+    transDataParams.srcHighHalf = false;
+    transDataParams.repeatTimes = c * d * padHw / n0;
+    if (transDataParams.repeatTimes == 1) {
+        transDataParams.srcRepStride = 0;
+        transDataParams.dstRepStride = 0;
+    } else {
+        transDataParams.srcRepStride = DEFAULT_TRANSDATA_5HD_LIST * n1 * n0 * sizeof(T) / ONE_BLK_SIZE;
+        transDataParams.dstRepStride = c0 * sizeof(T) / ONE_BLK_SIZE;
+    }
+
+    uint64_t srcOffsetArr[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t dstOffsetArr[DEFAULT_TRANSDATA_5HD_LIST];
+    uint64_t srcAddr = (uint64_t)src.GetPhyAddr();
+    uint64_t dstAddr = (uint64_t)dst.GetPhyAddr();
+    uint64_t tmpAddr = (uint64_t)tmp.GetPhyAddr();
+    for (uint32_t j = 0; j < n1; j++) {
+        if (n0 - currN > 0) {
+            for (uint8_t i = 0; i < currN; i++) {
+                dstOffsetArr[i] = (uint64_t)(dstAddr + (j * d * c * padHw * n0 + i * c * d * padHw) * sizeof(T));
+            }
+            for (uint8_t i = currN; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+                dstOffsetArr[i] = (uint64_t)(tmpAddr + i * ONE_BLK_SIZE * sizeof(T));
+            }
+        } else {
+            for (uint8_t i = 0; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+                dstOffsetArr[i] = (uint64_t)(dstAddr + (j * d * c * padHw * n0 + i * c * d * padHw) * sizeof(T));
+            }
+        }
+        for (uint8_t i = 0; i < DEFAULT_TRANSDATA_5HD_LIST; i++) {
+            srcOffsetArr[i] = (uint64_t)(srcAddr + (j * n0 + i * n0 * n1) * sizeof(T));
+        }
+        TransDataTo5HD<T>(dstOffsetArr, srcOffsetArr, transDataParams);
+        currN -= n0;
+    }
+    PipeBarrier<PIPE_V>();
+}
+
+template <typename T>
+__aicore__ inline void N1n0C1c0DHWToNCDHW(const LocalTensor<T>& dst, const LocalTensor<T>& src,
+    const TransDataTmpParams& params)
+{
+    // N1N0 C1C0 D H W -> N C D H W
+    int32_t n = params.n;
+    int32_t c = params.c;
+    int32_t d = params.d;
+    int32_t c1 = params.c1;
+    int32_t padHw = params.padHw;
+
+    uint16_t blockCount = n;
+    uint16_t blockLen = (c * (d * padHw)) * sizeof(T) / ONE_BLK_SIZE;
+    uint16_t srcGap = ((c1 * c0 - c) * (d * padHw)) * sizeof(T) /ONE_BLK_SIZE;
+    uint16_t dstGap = 0;
+    DataCopyParams dataCopyParams = { blockCount, blockLen, srcGap, dstGap };
+    DataCopy(dst, src, dataCopyParams);
+    PipeBarrier<PIPE_V>();
+}
+
+template <typename T>
+__aicore__ inline void TransDataFractalToNcdhw(const LocalTensor<T>& dst, const LocalTensor<T>& src,
+    const LocalTensor<uint8_t>& tmpBuffer, const TransDataTmpParams& params)
+{
+    int32_t d = params.d;
+    int32_t n1 = params.n1;
+    int32_t c1 = params.c1;
+    int32_t padHw = params.padHw;
+    int32_t n = params.n;
+    int32_t c = params.c;
+
+    LocalTensor<half> tmp = tmpBuffer.template ReinterpretCast<half>();
+    LocalTensor<half> srcTmp = src.template ReinterpretCast<half>();
+    if (c == c1 * c0 && n == n1 * n0) {
+        LocalTensor<half> dstTmp = dst.template ReinterpretCast<half>();
+        // D C1 HWN1N0C0 -> C1 D HWN1N0C0 (H*W 32B ALIGN -> HW1*HW0)
+        DC1Hwn1n0c0ToC1DHwn1n0c0HWAlign<half>(dstTmp, srcTmp, params);
+        // C1 DHWN1N0 C0 -> C1 C0 DHWN1N0
+        C1Dhwn1n0c0ToC1C0Dhwn1n0<half>(tmp, dstTmp, params);
+        // C1C0DHW N1N0 -> N CDHW
+        C1c0dhwN1n0ToNcdhw<half>(dstTmp, tmp, tmp, params);
+    } else {
+        LocalTensor<half> transDataTmp = tmp[n1 * n0 * c1 * c0 * d * padHw];
+        LocalTensor<half> dstTmp = dst.template ReinterpretCast<half>();
+        // D C1 HWN1N0C0 -> C1 D HWN1N0C0 (H*W 32B ALIGN -> HW1*HW0)
+        DC1Hwn1n0c0ToC1DHwn1n0c0HWAlign<half>(tmp, srcTmp, params);
+        // C1 DHWN1N0 C0 -> C1 C0 DHWN1N0
+        C1Dhwn1n0c0ToC1C0Dhwn1n0<half>(transDataTmp, tmp, params);
+        // C1C0DHW N1N0 -> N CDHW
+        C1c0dhwN1n0ToNcdhw<half>(dstTmp, transDataTmp, tmp, params);
+    }
+}
+
 // Transdata NCDHW -> FRACTAL_Z_3D
 template <typename T>
 __aicore__ inline void TransDataImplMode1(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
@@ -70,7 +245,7 @@ __aicore__ inline void TransDataImplMode1(const LocalTensor<T>& dst, const Local
     transDataParams.dstHighHalf = false;
     transDataParams.srcHighHalf = false;
     transDataParams.repeatTimes = currAxis / elePerBlk;
-    // if repeat = 1, start offset is auto incremental by stride. 
+    // if repeat = 1, start offset is auto incremental by stride.
     transDataParams.dstRepStride = transDataParams.repeatTimes == 1 ? 0 : n1 * n0;
     transDataParams.srcRepStride = transDataParams.repeatTimes == 1 ? 0 : 1;
 
@@ -232,7 +407,7 @@ __aicore__ inline void TransDataImplMode3(const LocalTensor<T>& dst, const Local
     uint64_t dstTensorAddr = (uint64_t)dst.GetPhyAddr();
     uint64_t tmpDstTensorAddr = (uint64_t)tmpDstTensor.GetPhyAddr();
     uint64_t tmpBufferAddr = (uint64_t)tmpBuffer.GetPhyAddr();
-    
+
     int32_t axisHwd = h * w * d;
     int32_t axisHwc0 = h * w * c0;
     int32_t axisC1hwc0 = axisHwc0 * c1;
@@ -295,19 +470,27 @@ __aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const Loca
     static_assert(Std::is_tuple_v<SrcShapeTuple>, "it must be a shape.");
     static_assert(Std::is_tuple_v<DstShapeTuple>, "it must be a shape.");
 
+    CHECK_FUNC_HIGHLEVEL_API(TransData, (config, T, U, S), (dstTensor, srcTensor, sharedTmpBuffer, params));
     auto ncdhwShape = config.srcFormat == DataFormat::NCDHW ? params.srcLayout.GetShape() : params.dstLayout.GetShape();
+    int32_t n = Std::get<0>(ncdhwShape);
+    int32_t c = Std::get<1>(ncdhwShape);
+    int32_t d = Std::get<2>(ncdhwShape);
+    int32_t h = Std::get<3>(ncdhwShape);
+    int32_t w = Std::get<4>(ncdhwShape);
+    int32_t n1 = (n + n0 - 1) / n0;
+    int32_t c1 = (c + c0 - 1) / c0;
+    int32_t hw1 = (h * w + hw0 - 1) / hw0;
+    int32_t padHw = hw1 * hw0;
     TransDataTmpParams tmpParams = {
-        Std::get<0>(ncdhwShape),
-        Std::get<1>(ncdhwShape),
-        Std::get<2>(ncdhwShape),
-        Std::get<3>(ncdhwShape),
-        Std::get<4>(ncdhwShape)
+        n, c, d, h, w, n1, c1, padHw,
     };
 
-    if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
-        TransDataImplMode2(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
-    } else if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
+    if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
         TransDataImplMode1(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+    } else if constexpr (config.srcFormat == DataFormat::FRACTAL_Z_3D && config.dstFormat == DataFormat::NCDHW) {
+        TransDataFractalToNcdhw<T>(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+    } else if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
+        TransDataImplMode2(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     } else if constexpr (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW) {
         TransDataImplMode3(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     }
diff --git a/impl/transdata/transdata_tiling.cpp b/impl/transdata/transdata_tiling.cpp
index dfb216cc..a5de5de1 100644
--- a/impl/transdata/transdata_tiling.cpp
+++ b/impl/transdata/transdata_tiling.cpp
@@ -46,6 +46,46 @@ int32_t AlignUp(int32_t a, int32_t b)
     return DivCeil(a, b) * b;
 }
 
+bool GenerateFractalZ3DToNcdhwShapeInfo(const std::vector<int64_t>& dstDims, const std::vector<int64_t>& srcDims,
+   TmpTransDataParams &param, const int32_t c0, const int32_t n0)
+{
+    ASCENDC_HOST_ASSERT(srcDims.size() == 7 && dstDims.size() == 5, return false,
+        "[TransData][GetTransDataMaxMinTmpSize] input shapes are not matched with DataFormat.");
+    param.n = dstDims[N_INDEX];
+    param.c = dstDims[C_INDEX];
+    param.d = dstDims[D_INDEX];
+    param.h = dstDims[H_INDEX];
+    param.w = dstDims[W_INDEX];
+    // validate d, h, w
+    ASCENDC_HOST_ASSERT(param.d == srcDims[0] && param.h == srcDims[2] && param.w == srcDims[3], return false,
+        "[TransData][GetTransDataMaxMinTmpSize] shapeInfo d,h,w is not matched.");
+    ASCENDC_HOST_ASSERT(srcDims[6] == c0 && srcDims[1] * c0 == AlignUp(param.c, c0), return false,
+        "[TransData][GetTransDataMaxMinTmpSize] src c0, c1 is not able to be converted to c.");
+    ASCENDC_HOST_ASSERT(srcDims[5] == n0 && srcDims[4] * n0 == AlignUp(param.n, n0), return false,
+        "[TransData][GetTransDataMaxMinTmpSize] src n0, n1 is not able to be converted to n.");
+    return true;
+}
+
+bool GenerateNcdhwToFractalZ3DShapeInfo(const std::vector<int64_t>& dstDims, const std::vector<int64_t>& srcDims,
+   TmpTransDataParams &param, const int32_t c0, const int32_t n0)
+{
+    ASCENDC_HOST_ASSERT(srcDims.size() == 5 && dstDims.size() == 7, return false,
+        "[TransData][GetTransDataMaxMinTmpSize] input shapes are not matched with DataFormat.");
+    param.n = srcDims[N_INDEX];
+    param.c = srcDims[C_INDEX];
+    param.d = srcDims[D_INDEX];
+    param.h = srcDims[H_INDEX];
+    param.w = srcDims[W_INDEX];
+    // validate d, h, w
+    ASCENDC_HOST_ASSERT(param.d == dstDims[0] && param.h == dstDims[2] && param.w == dstDims[3], return false,
+        "[TransData][GetTransDataMaxMinTmpSize] shapeInfo d,h,w is not matched.");
+    ASCENDC_HOST_ASSERT(dstDims[6] == c0 && dstDims[1] * c0 == AlignUp(param.c, c0), return false,
+        "[TransData][GetTransDataMaxMinTmpSize] dst c0, c1 is not able to be converted to c.");
+    ASCENDC_HOST_ASSERT(dstDims[5] == n0 && dstDims[4] * n0 == AlignUp(param.n, n0), return false,
+        "[TransData][GetTransDataMaxMinTmpSize] dst n0, n1 is not able to be converted to n.");
+    return true;
+}
+
 bool GenerateShapeInfo(const TransDataConfig &config, const ge::Shape &srcShape, const ge::Shape &dstShape, ge::DataType type,
     TmpTransDataParams &param)
 {
@@ -54,40 +94,39 @@ bool GenerateShapeInfo(const TransDataConfig &config, const ge::Shape &srcShape,
     std::vector<int64_t> srcDims = srcShape.GetDims();
     std::vector<int64_t> dstDims = dstShape.GetDims();
     if (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
-        ASCENDC_HOST_ASSERT(srcDims.size() == 5 && dstDims.size() == 6, return false, "input shapes are not matched with DataFormat.");
+        ASCENDC_HOST_ASSERT(srcDims.size() == 5 && dstDims.size() == 6, return false,
+            "[TransData][GetTransDataMaxMinTmpSize] input shapes are not matched with DataFormat.");
         param.n = srcDims[N_INDEX];
         param.c = srcDims[C_INDEX];
         param.d = srcDims[D_INDEX];
         param.h = srcDims[H_INDEX];
         param.w = srcDims[W_INDEX];
         // validate n, d, h, w
-        ASCENDC_HOST_ASSERT(param.n == dstDims[0] && param.d == dstDims[1] && param.h == dstDims[3] && param.w == dstDims[4], return false, "shapeInfo n,d,h,w is not matched.");
-        ASCENDC_HOST_ASSERT(dstDims[5] == c0 && dstDims[2] * c0 == AlignUp(param.c, c0), return false, "dst c0, c1 is not able to be converted to c.");
+        ASCENDC_HOST_ASSERT(param.n == dstDims[0] && param.d == dstDims[1] && param.h == dstDims[3] && param.w == dstDims[4],
+            return false, "[TransData][GetTransDataMaxMinTmpSize] shapeInfo n,d,h,w is not matched.");
+        ASCENDC_HOST_ASSERT(dstDims[5] == c0 && dstDims[2] * c0 == AlignUp(param.c, c0), return false,
+            "[TransData][GetTransDataMaxMinTmpSize] dst c0, c1 is not able to be converted to c.");
         return true;
     }
     if (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
-        ASCENDC_HOST_ASSERT(srcDims.size() == 5 && dstDims.size() == 7, return false, "input shapes are not matched with DataFormat.");
-        param.n = srcDims[N_INDEX];
-        param.c = srcDims[C_INDEX];
-        param.d = srcDims[D_INDEX];
-        param.h = srcDims[H_INDEX];
-        param.w = srcDims[W_INDEX];
-        // validate n, d, h, w
-        ASCENDC_HOST_ASSERT(param.d == dstDims[0] && param.h == dstDims[2] && param.w == dstDims[3], return false, "shapeInfo n,d,h,w is not matched.");
-        ASCENDC_HOST_ASSERT(dstDims[6] == c0 && dstDims[1] * c0 == AlignUp(param.c, c0), return false, "dst c0, c1 is not able to be converted to c.");
-        ASCENDC_HOST_ASSERT(dstDims[5] == n0 && dstDims[4] * n0 == AlignUp(param.n, n0), return false, "dst n0, n1 is not able to be converted to n.");
-        return true;
+        return GenerateNcdhwToFractalZ3DShapeInfo(dstDims, srcDims, param, c0, n0);
+    }
+    if (config.srcFormat == DataFormat::FRACTAL_Z_3D && config.dstFormat == DataFormat::NCDHW) {
+        return GenerateFractalZ3DToNcdhwShapeInfo(dstDims, srcDims, param, c0, n0);
     }
     if (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW) {
-        ASCENDC_HOST_ASSERT(srcDims.size() == 6 && dstDims.size() == 5, return false, "input shapes are not matched with DataFormat.");
+        ASCENDC_HOST_ASSERT(srcDims.size() == 6 && dstDims.size() == 5, return false,
+            "[TransData][GetTransDataMaxMinTmpSize] input shapes are not matched with DataFormat.");
         param.n = dstDims[N_INDEX];
         param.c = dstDims[C_INDEX];
         param.d = dstDims[D_INDEX];
         param.h = dstDims[H_INDEX];
         param.w = dstDims[W_INDEX];
         // validate n, d, h, w
-        ASCENDC_HOST_ASSERT(param.n == srcDims[0] && param.d == srcDims[1] && param.h == srcDims[3] && param.w == srcDims[4], return false, "shapeInfo n,d,h,w is not matched.");
-        ASCENDC_HOST_ASSERT(srcDims[5] == c0 && srcDims[2] * c0 == AlignUp(param.c, c0), return false, "src c0, c1 is not able to be converted to c.");
+        ASCENDC_HOST_ASSERT(param.n == srcDims[0] && param.d == srcDims[1] && param.h == srcDims[3] && param.w == srcDims[4],
+            return false, "[TransData][GetTransDataMaxMinTmpSize] shapeInfo n,d,h,w is not matched.");
+        ASCENDC_HOST_ASSERT(srcDims[5] == c0 && srcDims[2] * c0 == AlignUp(param.c, c0), return false,
+            "[TransData][GetTransDataMaxMinTmpSize]  src c0, c1 is not able to be converted to c.");
         return true;
     }
     return false;
@@ -113,9 +152,16 @@ int32_t GetTmpBufferSize(const TransDataConfig &config, const TmpTransDataParams
     {
         return c * d * padHw * dataSize + n1 * n0 * d * c1 * c0 * padHw * dataSize;
     }
+    if (config.srcFormat == DataFormat::FRACTAL_Z_3D && config.dstFormat == DataFormat::NCDHW)
+    {
+        constexpr int32_t doubleTmpSize = 2;
+        if (n == n0 * n1 && c == c0 * c1) {
+            return n1 * n0 * c1 * c0 * d * padHw * dataSize;
+        }
+        return n1 * n0 * c1 * c0 * d * padHw * dataSize * doubleTmpSize;
+    }
     return 0;
 }
-
 } // namespace
 
 bool GetTransDataMaxMinTmpSize(const platform_ascendc::PlatformAscendC &platform,
@@ -125,14 +171,16 @@ bool GetTransDataMaxMinTmpSize(const platform_ascendc::PlatformAscendC &platform
                                 const TransDataConfig &config,
                                 uint32_t &maxValue, uint32_t &minValue)
 {
-    ASCENDC_HOST_ASSERT(dataType == ge::DataType::DT_FLOAT16 || dataType == ge::DataType::DT_BF16, return false, "it only supports DT_FLOAT16/DT_BF16 data type");
+    ASCENDC_HOST_ASSERT(dataType == ge::DataType::DT_FLOAT16 || dataType == ge::DataType::DT_BF16, return false,
+        "[TransData][GetTransDataMaxMinTmpSize] it only supports DT_FLOAT16/DT_BF16 data type");
     platform_ascendc::SocVersion socVersion = platform.GetSocVersion();
     ASCENDC_HOST_ASSERT(socVersion == platform_ascendc::SocVersion::ASCEND910B, return false,
-                        "Unsupported SocVersion for TransData API.");
+                        "[TransData][GetTransDataMaxMinTmpSize] Unsupported SocVersion for TransData API.");
 
     TmpTransDataParams tmpParam;
 
-    ASCENDC_HOST_ASSERT(GenerateShapeInfo(config, srcShape, dstShape, dataType, tmpParam), return false, "failed to validate inputs informations.");
+    ASCENDC_HOST_ASSERT(GenerateShapeInfo(config, srcShape, dstShape, dataType, tmpParam), return false,
+        "[TransData][GetTransDataMaxMinTmpSize] failed to validate inputs informations.");
     maxValue = GetTmpBufferSize(config, tmpParam);
     minValue = maxValue;
     return true;
diff --git a/lib/tiling_api.h b/lib/tiling_api.h
new file mode 100644
index 00000000..1b83428d
--- /dev/null
+++ b/lib/tiling_api.h
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2024 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+/*!
+ * \file tiling_api.h
+ * \brief
+ */
+#ifndef LIB_TILING_API_H
+#define LIB_TILING_API_H
+#include "matmul/matmul_tiling.h"
+#include "matmul/bmm_tiling.h"
+#include "activation/softmax_tiling.h"
+#include "activation/logsoftmax_tiling.h"
+#include "filter/dropout_tiling.h"
+#include "sort/sort_tiling_intf.h"
+#include "index/arithprogression_tiling.h"
+#include "quantization/ascend_dequant_tiling.h"
+#include "quantization/ascend_quant_tiling.h"
+#include "quantization/ascend_antiquant_tiling.h"
+#include "quantization/quantize_tiling.h"
+#include "quantization/antiquantize_tiling.h"
+#include "quantization/dequantize_tiling.h"
+#include "reduce/sum_tiling.h"
+#include "activation/silu_tiling.h"
+#include "activation/swish_tiling.h"
+#include "activation/gelu_tiling.h"
+#include "pad/pad_tiling.h"
+#include "normalization/rmsnorm_tiling.h"
+#include "normalization/deepnorm_tiling.h"
+#include "normalization/layernorm_tiling.h"
+#include "normalization/normalize_tiling.h"
+#include "normalization/batchnorm_tiling.h"
+#include "normalization/layernorm_grad_tiling.h"
+#include "normalization/layernorm_grad_beta_tiling.h"
+#include "normalization/welfordfinalize_tiling.h"
+#include "transpose/confusion_transpose_tiling.h"
+#include "tiling/platform/platform_ascendc.h"
+#include "sort/topk_tiling.h"
+#include "math/tanh_tiling.h"
+#include "activation/sigmoid_tiling.h"
+#include "math/frac_tiling.h"
+#include "math/acos_tiling.h"
+#include "math/asin_tiling.h"
+#include "math/acosh_tiling.h"
+#include "math/asinh_tiling.h"
+#include "math/sin_tiling.h"
+#include "math/cos_tiling.h"
+#include "math/atan_tiling.h"
+#include "math/power_tiling.h"
+#include "math/log_tiling.h"
+#include "math/cosh_tiling.h"
+#include "math/clamp_tiling.h"
+#include "math/erf_tiling.h"
+#include "math/erfc_tiling.h"
+#include "math/round_tiling.h"
+#include "math/sinh_tiling.h"
+#include "activation/swiglu_tiling.h"
+#include "math/tan_tiling.h"
+#include "math/hypot_tiling.h"
+#include "select/selectwithbytesmask_tiling.h"
+#include "math/trunc_tiling.h"
+#include "activation/geglu_tiling.h"
+#include "math/lgamma_tiling.h"
+#include "math/digamma_tiling.h"
+#include "math/atanh_tiling.h"
+#include "math/xor_tiling.h"
+#include "math/sign_tiling.h"
+#include "reduce/mean_tiling.h"
+#include "math/exp_tiling.h"
+#include "math/axpy_tiling.h"
+#include "math/ceil_tiling.h"
+#include "math/floor_tiling.h"
+#include "activation/reglu_tiling.h"
+#include "pad/broadcast_tiling.h"
+#include "reduce/reduce_xor_sum_tiling.h"
+#include "reduce/reduce_tiling.h"
+#include "math/cumsum_tiling.h"
+#include "math/fmod_tiling.h"
+#include "normalization/groupnorm_tiling.h"
+#include "transdata/transdata_tiling.h"
+#include "hccl/hccl_tilingdata.h"
+#include "hccl/hccl_tiling.h"
+#endif // LIB_TILING_API_H
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6e91de48..133a36fd 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -153,7 +153,95 @@ file(GLOB ASCENDC_TEST_ascend910B1_AIV_CASE_SRC_FILES
     ${ASCENDC_TESTS_DIR}/sort/topk/test_operator_topk.cpp
     ${ASCENDC_TESTS_DIR}/normalization/welfordfinalize/test_operator_welfordfinalize.cpp
     ${ASCENDC_TESTS_DIR}/utils/init_global_memory/test_operator_init_global_memory.cpp
+<<<<<<< HEAD
     ${ASCENDC_TESTS_DIR}/normalization/layernormV2/test_operator_layernormV2.cpp
+=======
+    ${ASCENDC_TESTS_DIR}/std/sequence/test_sequence.cpp
+    ${ASCENDC_TESTS_DIR}/std/tuple/*.cpp
+    ${ASCENDC_TESTS_DIR}/std/type_traits/*.cpp
+    ${ASCENDC_TESTS_DIR}/transdata/*cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/geglu/kernel_geglu_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/gelu/kernel_gelu_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/gelu/kernel_faster_gelu_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/gelu/kernel_faster_geluv2_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/reglu/kernel_reglu_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/sigmoid/kernel_sigmoid_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/silu/kernel_silu_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/swiglu/kernel_swiglu_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/swish/kernel_swish_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/adjust_softmax_res/kernel_adjust_softmax_res_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/log_softmax/kernel_log_softmax_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/simple_softmax/kernel_simple_softmax_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax/kernel_softmax_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_flash/kernel_softmax_flash_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_flashv2/kernel_softmax_flashv2_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_flashv3/kernel_softmax_flashv3_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_grad/kernel_softmax_grad_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_grad_front/kernel_softmax_grad_front_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/filter/droupout/kernel_droupout_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/index/arithprogression/kernel_arithprogression_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/batchnorm/kernel_batchnorm_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/deepnorm/kernel_deepnorm_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/groupnorm/kernel_groupnorm_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/layernorm/kernel_layernorm_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/layernormgrad/kernel_layernormgrad_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/layernormgradbeta/kernel_layernormgradbeta_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/normalize/kernel_normalize_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/rmsnorm/kernel_rmsnorm_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/welfordfinalize/kernel_welfordfinalize_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/normalization/welfordupdate/kernel_welfordupdate_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/pad/broadcast/kernel_broadcast_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/pad/pad/kernel_pad_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/pad/unpad/kernel_unpad_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/quantization/antiquant/kernel_antiquant_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/quantization/dequant/kernel_dequant_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/quantization/quant/kernel_quant_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/select/selectwithbytesmask/kernel_selectwithbytesmask_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/sort/topk/kernel_topk_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/transpose/confusion_transpose/kernel_confusion_transpose_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/utils/init_global_memory/kernel_init_global_memory_check.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/acos/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/acosh/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/asin/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/asinh/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/atan/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/atanh/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/axpy/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/ceil/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/clamp/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/cos/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/cosh/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/cumsum/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/digamma/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/erf/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/erfc/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/exp/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/floor/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/fmod/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/frac/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/lgamma/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/log/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/power/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/round/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/sign/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/sin/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/sinh/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/tan/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/tanh/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/trunc/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/math/xor/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/mean/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/sum/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_xor_sum/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_all/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_any/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_max/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_min/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_sum/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_prod/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_mean/*.cpp
+    ${ASCENDC_TESTS_DIR}/api_check/transdata/*.cpp
+>>>>>>> 4d201cad (add transdata high api fractal_z_3d to ncdhw)
 )
 
 # ascend910B1 aic test cases
diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp
index cc0a7cec..09630754 100644
--- a/tests/tiling/test_tiling.cpp
+++ b/tests/tiling/test_tiling.cpp
@@ -13,7 +13,6 @@
 #define private public
 #define protected public
 #include "lib/activation/softmax_tiling.h"
-#include "lib/transdata/transdata_tiling.h" // temp for upload code
 #include "tiling_api.h"
 #include "platform_stub.h"
 #include "impl/matmul/tiling/math_util.h"
@@ -6710,9 +6709,20 @@ TEST_F(TestTiling, testTransDataTilingUnalignedHw)
     platfrom_stub_set_chip_version("Ascend910B");
     uint32_t maxSize;
     uint32_t minSize;
-    auto ncdhwShape = ge::Shape({ 16, 16, 3, 3, 3 });
-    auto ndc1hwc0Shape = ge::Shape({ 16, 3, 1, 3, 3, 16});
-    auto fractalzShape = ge::Shape({ 3, 1, 3, 3, 1, 16, 16});
+    int32_t n = 16;
+    int32_t c = 16;
+    int32_t d = 3;
+    int32_t h = 3;
+    int32_t w = 3;
+    int32_t c0 = 16;
+    int32_t n0 = 16;
+    int32_t c1 = (c + c0 - 1) / c0;
+    int32_t n1 = (n + n0 - 1) / n0;
+    int32_t hw0 = 16;
+    int32_t hw1 = (h * w + hw0 - 1) / hw0;
+    auto ncdhwShape = ge::Shape({ n, c, d, h, w });
+    auto ndc1hwc0Shape = ge::Shape({ n, d, c1, h, w, c0});
+    auto fractalzShape = ge::Shape({ d, c1, h, w, n1, n0, c0});
     fe::PlatFormInfos platform_info;
     auto plat = platform_ascendc::PlatformAscendC(&platform_info);
     TransDataConfig config = {DataFormat::NCDHW, DataFormat::NDC1HWC0};
@@ -6735,6 +6745,13 @@ TEST_F(TestTiling, testTransDataTilingUnalignedHw)
     EXPECT_TRUE(ret);
     EXPECT_EQ(maxSize, 26112);
     EXPECT_EQ(minSize, 26112);
+
+    config = {DataFormat::FRACTAL_Z_3D, DataFormat::NCDHW};
+    ret = GetTransDataMaxMinTmpSize(plat, fractalzShape, ncdhwShape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, n1 * n0 * c1 * c0 * d * hw0 * hw1 * 2);
+    EXPECT_EQ(minSize, n1 * n0 * c1 * c0 * d * hw0 * hw1 * 2);
 }
 
 TEST_F(TestTiling, testTransDataTilingAlignedHw)
@@ -6742,9 +6759,20 @@ TEST_F(TestTiling, testTransDataTilingAlignedHw)
     platfrom_stub_set_chip_version("Ascend910B");
     uint32_t maxSize;
     uint32_t minSize;
-    auto ncdhwShape = ge::Shape({ 5, 30, 2, 4, 8 });
-    auto ndc1hwc0Shape = ge::Shape({ 5, 2, 2, 4, 8, 16});
-    auto fractalzShape = ge::Shape({ 2, 2, 4, 8, 1, 16, 16});
+    int32_t n = 5;
+    int32_t c = 30;
+    int32_t d = 2;
+    int32_t h = 4;
+    int32_t w = 8;
+    int32_t c0 = 16;
+    int32_t n0 = 16;
+    int32_t c1 = (c + c0 - 1) / c0;
+    int32_t n1 = (n + n0 - 1) / n0;
+    int32_t hw0 = 16;
+    int32_t hw1 = (h * w + hw0 - 1) / hw0;
+    auto ncdhwShape = ge::Shape({ n, c, d, h, w });
+    auto ndc1hwc0Shape = ge::Shape({ n, d, c1, h, w, c0});
+    auto fractalzShape = ge::Shape({ d, c1, h, w, n1, n0, c0});
     fe::PlatFormInfos platform_info;
     auto plat = platform_ascendc::PlatformAscendC(&platform_info);
     TransDataConfig config = {DataFormat::NCDHW, DataFormat::NDC1HWC0};
@@ -6767,6 +6795,13 @@ TEST_F(TestTiling, testTransDataTilingAlignedHw)
     EXPECT_TRUE(ret);
     EXPECT_EQ(maxSize, 69376);
     EXPECT_EQ(minSize, 69376);
+
+    config = {DataFormat::FRACTAL_Z_3D, DataFormat::NCDHW};
+    ret = GetTransDataMaxMinTmpSize(plat, fractalzShape, ncdhwShape, ge::DataType::DT_FLOAT16, config, maxSize, minSize);
+
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(maxSize, n1 * n0 * c1 * c0 * d * hw0 * hw1 * 2 * 2);
+    EXPECT_EQ(minSize, n1 * n0 * c1 * c0 * d * hw0 * hw1 * 2 * 2);
 }
 
 TEST_F(TestTiling, TestReduceXorSumTilingInt16)
diff --git a/tests/transdata/test_operator_transdata.cpp b/tests/transdata/test_operator_transdata.cpp
new file mode 100644
index 00000000..d50408a9
--- /dev/null
+++ b/tests/transdata/test_operator_transdata.cpp
@@ -0,0 +1,267 @@
+/**
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+#include <gtest/gtest.h>
+#include "kernel_operator.h"
+
+#include <iostream>
+#include <fstream>
+
+namespace AscendC {
+
+namespace {
+
+constexpr uint32_t NCDHW_FractalZ3D = 1;
+constexpr uint32_t FractalZ3D_NCDHW = 2;
+constexpr uint32_t NCDHW_NDC1HWC0 = 3;
+constexpr uint32_t NDC1HWC0_NCDHW = 4;
+
+
+constexpr TransDataConfig config1 = {DataFormat::NCDHW, DataFormat::FRACTAL_Z_3D};
+constexpr TransDataConfig config2 = {DataFormat::FRACTAL_Z_3D, DataFormat::NCDHW};
+constexpr TransDataConfig config3 = {DataFormat::NCDHW, DataFormat::NDC1HWC0};
+constexpr TransDataConfig config4 = {DataFormat::NDC1HWC0, DataFormat::NCDHW};
+
+}
+
+template <typename T, uint32_t mode>
+class KernelTransData {
+public:
+__aicore__ inline KernelTransData() {}
+__aicore__ inline void Init(GM_ADDR srcGm, GM_ADDR dstGm,
+    int32_t n, int32_t c, int32_t d, int32_t h, int32_t w, TPipe *tpipe)
+{
+    this->d = d;
+    this->c = c;
+    this->h = h;
+    this->w = w;
+    this->n = n;
+    this->c1 = (c + c0 - 1) / c0;
+    this->n1 = (n + n0 - 1) / n0;
+    this->hw1 = (h*w + hw0 - 1) / hw0;
+
+    if (mode == NDC1HWC0_NCDHW) {
+        this->srcShapeSize = n * c1 * c0 * d * h * w;
+        this->dstShapeSize = n * d * c * hw0;
+        this->tmpShapeSize = 512 + d * c1 * c0 * hw0 * hw1;
+        uint32_t dstGmSize = n * c * d * h * w;
+        srcGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(srcGm), srcShapeSize * sizeof(T));
+        dstGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(dstGm), dstGmSize * sizeof(T));
+    } else {
+        if constexpr (mode == NCDHW_FractalZ3D) {
+            srcShapeSize = n * d * c * hw0 * hw1;
+            dstShapeSize = n1 * n0 * c1 * c0 * d * h * w;
+            if ((h*w) % 16 != 0 ) {
+                needPad = true;
+                dstShapeSize = n1 * n0 * c1 * c0 * d * hw0 * hw1;
+            }
+            tmpShapeSize = c * d * hw0 * hw1 + n1 * n0 * d * c1 * c0 * hw0 * hw1;
+        } else if constexpr (mode == FractalZ3D_NCDHW) {
+            this->srcShapeSize = d * c1 * h * w * n1 * n0 * c0;
+            this->dstShapeSize = n * c * d * (hw1 * hw0);
+            this->tmpShapeSize = d * c1 * (hw1 * hw0) * n1 * n0 * c0 * 2;
+        } else if constexpr (mode == NCDHW_NDC1HWC0) {
+            this->srcShapeSize = n * d * c * hw0;
+            this->dstShapeSize = n * c1 * c0 * d * h * w;
+            this->tmpShapeSize = d * hw0 * hw1 + d * c1 * c0 * hw0 * hw1;
+        }
+        srcGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(srcGm), srcShapeSize * sizeof(T));
+        dstGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ T*>(dstGm), dstShapeSize * sizeof(T));
+    }
+
+
+    this->pipe = tpipe;
+    pipe->InitBuffer(inQueue, 1, srcShapeSize * sizeof(T));
+    pipe->InitBuffer(outQueue, 1, dstShapeSize * sizeof(T));
+    pipe->InitBuffer(tmpBuf, tmpShapeSize * sizeof(T));
+
+}
+__aicore__ inline void Process()
+{
+    CopyIn();
+    Compute();
+    CopyOut();
+}
+
+private:
+__aicore__ inline void CopyIn()
+{
+    LocalTensor<T> srcLocal = inQueue.AllocTensor<T>();
+    if constexpr (mode == NCDHW_FractalZ3D || mode == NCDHW_NDC1HWC0) {
+        DataCopyExtParams extParam = {static_cast<uint16_t>(n * c * d),
+            static_cast<uint32_t>(h * w * sizeof(T)), 0, 0, 0};
+        DataCopyPadExtParams<T> padParam = {true, 0, 0, 0};
+        if (needPad) {
+            DataCopyPad(srcLocal, srcGlobal, extParam, padParam);
+        } else {
+            DataCopy(srcLocal, srcGlobal, srcShapeSize);
+        }
+    } else if constexpr (mode == FractalZ3D_NCDHW || mode == NDC1HWC0_NCDHW) {
+        DataCopy(srcLocal, srcGlobal, srcShapeSize);
+    }
+
+    inQueue.EnQue(srcLocal);
+}
+__aicore__ inline void Compute()
+{
+    LocalTensor<T> dstLocal = outQueue.AllocTensor<T>();
+    LocalTensor<uint8_t> tmp = tmpBuf.Get<uint8_t>();
+    LocalTensor<T> srcLocal = inQueue.DeQue<T>();
+    PipeBarrier<PIPE_V>();
+
+    Layout ncdhwLayout = MakeLayout(MakeShape(n, c, d, h, w), MakeStride());
+    Layout ndc1hwc0Layout = MakeLayout(MakeShape(n, d, c1, h, w, c0), MakeStride());
+    Layout fractalLayout = MakeLayout(MakeShape(d, c1, h, w, n1, n0, c0), MakeStride());
+
+    if constexpr (mode == NCDHW_FractalZ3D) {
+        TransDataParams<decltype(ncdhwLayout), decltype(fractalLayout)> params = {ncdhwLayout, fractalLayout};
+        TransData<config1>(dstLocal, srcLocal, tmp, params);
+    } else if constexpr (mode == FractalZ3D_NCDHW) {
+        TransDataParams<decltype(fractalLayout), decltype(ncdhwLayout)> params = {fractalLayout, ncdhwLayout};
+        TransData<config2, T, decltype(fractalLayout), decltype(ncdhwLayout)>(dstLocal, srcLocal, tmp, params);
+    } else if constexpr (mode == NCDHW_NDC1HWC0) {
+        TransDataParams<decltype(ncdhwLayout), decltype(ndc1hwc0Layout)> params = {ncdhwLayout, ndc1hwc0Layout};
+        TransData<config3>(dstLocal, srcLocal, tmp, params);
+    } else if constexpr (mode == NDC1HWC0_NCDHW) {
+        TransDataParams<decltype(ndc1hwc0Layout), decltype(ncdhwLayout)> params = {ndc1hwc0Layout, ncdhwLayout};
+        TransData<config4>(dstLocal, srcLocal, tmp, params);
+    }
+
+    outQueue.EnQue<T>(dstLocal);
+    inQueue.FreeTensor(srcLocal);
+
+}
+__aicore__ inline void CopyOut()
+{
+    LocalTensor<T> dstLocal = outQueue.DeQue<T>();
+    DataCopyExtParams extParam {static_cast<uint16_t>(n * c * d), static_cast<uint32_t>(h*w*sizeof(T)), 0, 0, 0};
+    if constexpr (mode == NCDHW_FractalZ3D) {
+        DataCopy(dstGlobal, dstLocal, n1 * n0 * c1);
+    } else if constexpr (mode == FractalZ3D_NCDHW) {
+        DataCopy(dstGlobal, dstLocal, dstShapeSize);
+    } else if constexpr (mode == NCDHW_NDC1HWC0) {
+        DataCopy(dstGlobal, dstLocal, dstShapeSize);
+    } else if constexpr (mode == NDC1HWC0_NCDHW) {
+        DataCopyPad(dstGlobal, dstLocal, extParam);
+    }
+    outQueue.FreeTensor(dstLocal);
+}
+
+private:
+    GlobalTensor<T> srcGlobal;
+    GlobalTensor<T> dstGlobal;
+    TPipe *pipe;
+    TQue<TPosition::VECIN, 1> inQueue;
+    TQue<TPosition::VECOUT, 1> outQueue;
+    TBuf<QuePosition::VECIN> tmpBuf;
+    bool needPad = false;
+    int32_t n = 0;
+    int32_t c = 0;
+    int32_t d = 0;
+    int32_t h = 0;
+    int32_t w = 0;
+    int32_t n1 = 0;
+    int32_t c1 = 0;
+    int32_t hw1 = 0;
+    int32_t c0 = 16;
+    int32_t n0 = 16;
+    int32_t hw0 = 16;
+    uint32_t srcShapeSize = 0;
+    uint32_t dstShapeSize = 0;
+    uint32_t tmpShapeSize = 0;
+};
+} // namespace AscendC
+
+template <typename T, uint32_t mode>
+__global__ __aicore__ void MainTransdata(
+    __gm__ uint8_t* dstGm, __gm__ uint8_t* srcGm, uint64_t n, uint64_t c, uint64_t d, uint64_t h, uint64_t w)
+{
+    if (g_coreType == AscendC::AIC || AscendC::GetBlockIdx() > 0) {
+        return;
+    }
+    AscendC::TPipe pipe;
+    AscendC::KernelTransData<T, mode> op;
+    op.Init(srcGm, dstGm, n, c, d, h, w, &pipe);
+    op.Process();
+}
+
+struct TransDataTestParams {
+    int32_t n;
+    int32_t c;
+    int32_t d;
+    int32_t h;
+    int32_t w;
+    uint32_t mode;
+    void (*cal_func)(uint8_t*, uint8_t*, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+};
+
+class TransDataTestsuite : public testing::Test, public testing::WithParamInterface<TransDataTestParams> {
+protected:
+    void SetUp()
+    {
+        AscendC::SetGCoreType(2);
+    }
+
+    void TearDown()
+    {
+        AscendC::SetGCoreType(0);
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(TEST_OPERATTION_TRANSDATA, TransDataTestsuite,
+    ::testing::Values(
+        TransDataTestParams { 5, 32, 2, 1, 16, 1, MainTransdata<half, 1> },
+        TransDataTestParams { 4, 31, 1, 6, 7, 2, MainTransdata<half, 2> },
+        TransDataTestParams { 4, 20, 2, 3, 1, 3, MainTransdata<half, 3> },
+        TransDataTestParams { 8, 14, 2, 1, 16, 4, MainTransdata<half, 4> },
+        TransDataTestParams { 5, 32, 2, 1, 16, 1, MainTransdata<bfloat16_t, 1> },
+        TransDataTestParams { 4, 31, 1, 6, 7, 2, MainTransdata<bfloat16_t, 2> },
+        TransDataTestParams { 4, 20, 2, 3, 1, 3, MainTransdata<bfloat16_t, 3> },
+        TransDataTestParams { 8, 14, 2, 1, 16, 4, MainTransdata<bfloat16_t, 4> }
+
+        ));
+
+TEST_P(TransDataTestsuite, TransDataOpTestCase)
+{
+    auto params = GetParam();
+    auto n = params.n;
+    auto c = params.c;
+    auto d = params.d;
+    auto h = params.h;
+    auto w = params.w;
+    auto mode = params.mode;
+    uint32_t srcShapeSize;
+    uint32_t dstShapeSize;
+    int32_t hw0 = 16;
+    int32_t hw1 = (h * w + hw0 - 1) / hw0;
+    int32_t c0 = 16;
+    int32_t n0 = 16;
+    int32_t c1 = (c + c0 - 1) / c0;
+    int32_t n1 = (n + n0 - 1) / n0;
+    if (mode == 1) {
+        srcShapeSize = n * d * c * hw0 * hw1;
+        dstShapeSize = n1 * n0 * c1 * c0 * d * h * w;
+        if ((h*w) % 16 != 0 ) {
+            dstShapeSize = n1 * n0 * c1 * c0 * d * hw0 * hw1;
+        }
+    } else if (mode == 2) {
+        srcShapeSize = d * c1 * h * w * n1 * n0 * c0;
+        dstShapeSize = n * c * d * (hw1 * hw0);
+    } else if (mode == 3) {
+        srcShapeSize = n * d * c * hw0;
+        dstShapeSize = n * c1 * c0 * d * h * w;
+    } else if (mode == 4) {
+        srcShapeSize = n * c1 * c0 * d * h * w;
+        dstShapeSize = n * d * c * hw0;
+    }
+    uint8_t srcGm[srcShapeSize * sizeof(half)] = {0}; // 外部保证inner是32B对齐
+    uint8_t dstGm[dstShapeSize * sizeof(half)] = {0};
+    params.cal_func(dstGm, srcGm, n, c, d, h, w);
+    EXPECT_EQ(dstGm[0], 0);
+}
diff --git a/tiling/tiling_api.h b/tiling/tiling_api.h
new file mode 100644
index 00000000..015f68e4
--- /dev/null
+++ b/tiling/tiling_api.h
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2024 Huawei Technologies Co., Ltd.
+ * This file is a part of the CANN Open Software.
+ * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
+ * Please refer to the License for details. You may not use this file except in compliance with the License.
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See LICENSE in the root of the software repository for the full text of the License.
+ */
+
+/*!
+ * \file tiling_api.h
+ * \brief
+ */
+#ifndef TILING_API_H
+#define TILING_API_H
+#include "../lib/matmul/matmul_tiling.h"
+#include "../lib/matmul/bmm_tiling.h"
+#include "../lib/activation/softmax_tiling.h"
+#include "../lib/activation/logsoftmax_tiling.h"
+#include "../lib/filter/dropout_tiling.h"
+#include "../lib/sort/sort_tiling_intf.h"
+#include "../lib/index/arithprogression_tiling.h"
+#include "../lib/quantization/ascend_dequant_tiling.h"
+#include "../lib/quantization/ascend_quant_tiling.h"
+#include "../lib/quantization/ascend_antiquant_tiling.h"
+#include "../lib/quantization/quantize_tiling.h"
+#include "../lib/quantization/antiquantize_tiling.h"
+#include "../lib/quantization/dequantize_tiling.h"
+#include "../lib/reduce/sum_tiling.h"
+#include "../lib/activation/silu_tiling.h"
+#include "../lib/activation/swish_tiling.h"
+#include "../lib/activation/gelu_tiling.h"
+#include "../lib/pad/pad_tiling.h"
+#include "../lib/normalization/rmsnorm_tiling.h"
+#include "../lib/normalization/deepnorm_tiling.h"
+#include "../lib/normalization/layernorm_tiling.h"
+#include "../lib/normalization/normalize_tiling.h"
+#include "../lib/normalization/groupnorm_tiling.h"
+#include "../lib/normalization/batchnorm_tiling.h"
+#include "../lib/normalization/layernorm_grad_tiling.h"
+#include "../lib/normalization/layernorm_grad_beta_tiling.h"
+#include "../lib/normalization/welfordfinalize_tiling.h"
+#include "../lib/transpose/confusion_transpose_tiling.h"
+#include "tiling/platform/platform_ascendc.h"
+#include "../lib/sort/topk_tiling.h"
+#include "../lib/math/tanh_tiling.h"
+#include "../lib/activation/sigmoid_tiling.h"
+#include "../lib/math/frac_tiling.h"
+#include "../lib/math/acos_tiling.h"
+#include "../lib/math/asin_tiling.h"
+#include "../lib/math/acosh_tiling.h"
+#include "../lib/math/asinh_tiling.h"
+#include "../lib/math/sin_tiling.h"
+#include "../lib/math/cos_tiling.h"
+#include "../lib/math/hypot_tiling.h"
+#include "../lib/math/atan_tiling.h"
+#include "../lib/math/power_tiling.h"
+#include "../lib/math/log_tiling.h"
+#include "../lib/math/cosh_tiling.h"
+#include "../lib/math/clamp_tiling.h"
+#include "../lib/math/erf_tiling.h"
+#include "../lib/math/erfc_tiling.h"
+#include "../lib/math/round_tiling.h"
+#include "../lib/math/sinh_tiling.h"
+#include "../lib/activation/swiglu_tiling.h"
+#include "../lib/math/tan_tiling.h"
+#include "../lib/select/selectwithbytesmask_tiling.h"
+#include "../lib/math/trunc_tiling.h"
+#include "../lib/math/fmod_tiling.h"
+#include "../lib/activation/geglu_tiling.h"
+#include "../lib/math/lgamma_tiling.h"
+#include "../lib/math/digamma_tiling.h"
+#include "../lib/math/atanh_tiling.h"
+#include "../lib/math/xor_tiling.h"
+#include "../lib/math/sign_tiling.h"
+#include "../lib/reduce/mean_tiling.h"
+#include "../lib/math/exp_tiling.h"
+#include "../lib/math/axpy_tiling.h"
+#include "../lib/math/ceil_tiling.h"
+#include "../lib/math/floor_tiling.h"
+#include "../lib/activation/reglu_tiling.h"
+#include "../lib/pad/broadcast_tiling.h"
+#include "../lib/reduce/reduce_xor_sum_tiling.h"
+#include "../lib/reduce/reduce_tiling.h"
+#include "../lib/transdata/transdata_tiling.h"
+#include "../lib/math/cumsum_tiling.h"
+#include "../lib/hccl/hccl_tilingdata.h"
+#include "../lib/hccl/hccl_tiling.h"
+#endif // TILING_API_H
-- 
Gitee


From 51e18c6dce8aa6b86f085dfd093cbf1bb630897a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B1=9F=E4=BF=8A=E6=88=90?= <jiangjuncheng2@huawei.com>
Date: Wed, 4 Jun 2025 09:14:43 +0800
Subject: [PATCH 4/5] add transdata shape check

---
 impl/reduce/reduce_tiling.cpp                 | 10 ++---
 impl/transdata/transdata_impl.h               | 45 ++++++++++++++-----
 impl/transdata/transdata_tiling.cpp           | 13 +++++-
 lib/transdata/transdata.h                     |  1 +
 .../transdata/transdata_common.h              |  6 +--
 5 files changed, 54 insertions(+), 21 deletions(-)
 rename impl/transdata/transdata_common_impl.h => lib/transdata/transdata_common.h (86%)

diff --git a/impl/reduce/reduce_tiling.cpp b/impl/reduce/reduce_tiling.cpp
index d6efe31f..71722928 100644
--- a/impl/reduce/reduce_tiling.cpp
+++ b/impl/reduce/reduce_tiling.cpp
@@ -102,7 +102,6 @@ void GetReduceCommonMaxMinTmpSize(const ge::Shape &srcShape,
 }
 
 inline void GetReduceSumMeanCommonTmpSize(const ge::Shape &srcShape,
-                               const ge::DataType dataType,
                                ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
                                uint32_t &maxValue, uint32_t &minValue, std::string apiName, std::string funcName)
 {
@@ -137,7 +136,6 @@ inline void GetReduceSumMeanCommonTmpSize(const ge::Shape &srcShape,
 }
 
 inline void GetReduceAnyAllCommonTmpSize(const ge::Shape &srcShape,
-                                const ge::DataType dataType,
                                 ReducePattern pattern, bool isSrcInnerPad, bool isReuseSource,
                                 uint32_t &maxValue, uint32_t &minValue, std::string apiName, std::string funcName)
 {
@@ -229,7 +227,7 @@ void GetReduceAnyMaxMinTmpSize(const ge::Shape &srcShape,
         return,
         "[ReduceAny][GetReduceAnyMaxMinTmpSize] it only supports float and uint8_t type on this platform.");
     if (dataType == ge::DT_UINT8) {
-        GetReduceAnyAllCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+        GetReduceAnyAllCommonTmpSize(srcShape, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
             "ReduceAny", "GetReduceAnyMaxMinTmpSize");
     } else {
         GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
@@ -245,7 +243,7 @@ void GetReduceAllMaxMinTmpSize(const ge::Shape &srcShape,
     ASCENDC_HOST_ASSERT((dataType == ge::DT_FLOAT || dataType == ge::DT_UINT8), return,
         "[ReduceAll][GetReduceAllMaxMinTmpSize] it only supports float and uint8 type on this platform.");
     if (dataType == ge::DT_UINT8) {
-        GetReduceAnyAllCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+        GetReduceAnyAllCommonTmpSize(srcShape, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
             "ReduceAll", "GetReduceAllMaxMinTmpSize");
     } else {
         GetReduceCommonMaxMinTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
@@ -260,7 +258,7 @@ void GetReduceSumMaxMinTmpSize(const ge::Shape &srcShape,
 {
     ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return,
         "[ReduceSum][GetReduceSumMaxMinTmpSize] it only supports float type on this platform.");
-    GetReduceSumMeanCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+    GetReduceSumMeanCommonTmpSize(srcShape, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
         "ReduceSum", "GetReduceSumMaxMinTmpSize");
 }
 
@@ -271,7 +269,7 @@ void GetReduceMeanMaxMinTmpSize(const ge::Shape &srcShape,
 {
     ASCENDC_HOST_ASSERT(dataType == ge::DT_FLOAT, return,
         "[ReduceMean][GetReduceMeanMaxMinTmpSize] it only supports float type on this platform.");
-    GetReduceSumMeanCommonTmpSize(srcShape, dataType, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
+    GetReduceSumMeanCommonTmpSize(srcShape, pattern, isSrcInnerPad, isReuseSource, maxValue, minValue,
         "ReduceMean", "GetReduceMeanMaxMinTmpSize");
 }
 }  // namespace AscendC
diff --git a/impl/transdata/transdata_impl.h b/impl/transdata/transdata_impl.h
index 33571fab..ad544da1 100644
--- a/impl/transdata/transdata_impl.h
+++ b/impl/transdata/transdata_impl.h
@@ -13,7 +13,6 @@
 #include "kernel_tensor.h"
 #include "kernel_operator_intf.h"
 #include "kernel_tiling/kernel_tiling.h"
-#include "transdata_common_impl.h"
 #include "../common/check.h"
 #include "../api_check/kernel_api_check.h"
 
@@ -24,6 +23,9 @@ namespace {
 constexpr int32_t n0 = 16;
 constexpr int32_t c0 = 16;
 constexpr int32_t hw0 = 16;
+constexpr int32_t ncdhwDims = 5;
+constexpr int32_t fractalZ3DDims = 7;
+constexpr int32_t ndc1hwc0Dims = 6;
 }
 
 struct TransDataTmpParams {
@@ -458,20 +460,38 @@ __aicore__ inline void TransDataImplMode3(const LocalTensor<T>& dst, const Local
     }
 }
 
-template <const TransDataConfig& config, typename T, typename U, typename S>
-__aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const LocalTensor<T>& srcTensor,
-    const LocalTensor<uint8_t>& sharedTmpBuffer, const TransDataParams<U, S>& params)
+template <typename T, typename U, typename S>
+__aicore__ inline void TransDataCheck(const TransDataParams<U, S>& params)
 {
-    static_assert(SupportType<T, half, bfloat16_t>(), "Currents only supports half/bfloat16_t types.");
+    static_assert(SupportType<T, half, bfloat16_t, uint16_t, int16_t>(),
+        "Currents only supports half/bfloat16_t/uint16_t/int16_t types.");
     static_assert(is_layout_v<U>, "srcLayout must be a layout");
     static_assert(is_layout_v<S>, "dstLayout must be a layout");
     using SrcShapeTuple = Std::remove_cvref_t<decltype(params.srcLayout.GetShape())>;
     using DstShapeTuple = Std::remove_cvref_t<decltype(params.dstLayout.GetShape())>;
     static_assert(Std::is_tuple_v<SrcShapeTuple>, "it must be a shape.");
     static_assert(Std::is_tuple_v<DstShapeTuple>, "it must be a shape.");
+}
 
+template <const TransDataConfig& config, typename T, typename U, typename S>
+__aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const LocalTensor<T>& srcTensor,
+    const LocalTensor<uint8_t>& sharedTmpBuffer, const TransDataParams<U, S>& params)
+{
+    TransDataCheck<T, U, S>(params);
+    auto srcShape = params.srcLayout.GetShape();
+    auto dstShape = params.dstLayout.GetShape();
+    constexpr uint32_t srcShapeSize = static_cast<uint32_t>(Std::tuple_size<decltype(srcShape)>::value);
+    constexpr uint32_t dstShapeSize = static_cast<uint32_t>(Std::tuple_size<decltype(dstShape)>::value);
     CHECK_FUNC_HIGHLEVEL_API(TransData, (config, T, U, S), (dstTensor, srcTensor, sharedTmpBuffer, params));
-    auto ncdhwShape = config.srcFormat == DataFormat::NCDHW ? params.srcLayout.GetShape() : params.dstLayout.GetShape();
+    using srcType = decltype(srcShape);
+    using dstType = decltype(dstShape);
+    using ncdhwType = Std::conditional_t<config.srcFormat == DataFormat::NCDHW, srcType, dstType>;
+    ncdhwType ncdhwShape;
+    if constexpr (config.srcFormat == DataFormat::NCDHW) {
+        ncdhwShape = params.srcLayout.GetShape();
+    } else {
+        ncdhwShape = params.dstLayout.GetShape();
+    }
     int32_t n = Std::get<0>(ncdhwShape);
     int32_t c = Std::get<1>(ncdhwShape);
     int32_t d = Std::get<2>(ncdhwShape);
@@ -481,17 +501,22 @@ __aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const Loca
     int32_t c1 = (c + c0 - 1) / c0;
     int32_t hw1 = (h * w + hw0 - 1) / hw0;
     int32_t padHw = hw1 * hw0;
-    TransDataTmpParams tmpParams = {
-        n, c, d, h, w, n1, c1, padHw,
-    };
-
+    TransDataTmpParams tmpParams = { n, c, d, h, w, n1, c1, padHw };
     if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
+        static_assert(srcShapeSize == ncdhwDims, "srcLayout's shape dims must be equal to 5!");
+        static_assert(dstShapeSize == fractalZ3DDims, "dstLayout's shape dims must be equal to 7!");
         TransDataImplMode1(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     } else if constexpr (config.srcFormat == DataFormat::FRACTAL_Z_3D && config.dstFormat == DataFormat::NCDHW) {
+        static_assert(srcShapeSize == fractalZ3DDims, "srcLayout's shape dims must be equal to 7!");
+        static_assert(dstShapeSize == ncdhwDims, "dstLayout's shape dims must be equal to 5!");
         TransDataFractalToNcdhw<T>(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     } else if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
+        static_assert(srcShapeSize == ncdhwDims, "srcLayout's shape dims must be equal to 5!");
+        static_assert(dstShapeSize == ndc1hwc0Dims, "dstLayout's shape dims must be equal to 6!");
         TransDataImplMode2(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     } else if constexpr (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW) {
+        static_assert(srcShapeSize == ndc1hwc0Dims, "srcLayout's shape dims must be equal to 6!");
+        static_assert(dstShapeSize == ncdhwDims, "dstLayout's shape dims must be equal to 5!");
         TransDataImplMode3(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     }
 }
diff --git a/impl/transdata/transdata_tiling.cpp b/impl/transdata/transdata_tiling.cpp
index a5de5de1..45d1bc71 100644
--- a/impl/transdata/transdata_tiling.cpp
+++ b/impl/transdata/transdata_tiling.cpp
@@ -171,8 +171,17 @@ bool GetTransDataMaxMinTmpSize(const platform_ascendc::PlatformAscendC &platform
                                 const TransDataConfig &config,
                                 uint32_t &maxValue, uint32_t &minValue)
 {
-    ASCENDC_HOST_ASSERT(dataType == ge::DataType::DT_FLOAT16 || dataType == ge::DataType::DT_BF16, return false,
-        "[TransData][GetTransDataMaxMinTmpSize] it only supports DT_FLOAT16/DT_BF16 data type");
+    ASCENDC_HOST_ASSERT(dataType == ge::DataType::DT_FLOAT16 || dataType == ge::DataType::DT_BF16 ||
+        dataType == ge::DataType::DT_UINT16 || dataType == ge::DataType::DT_INT16, return false,
+        "[TransData][GetTransDataMaxMinTmpSize] it only supports DT_FLOAT16/DT_BF16/DT_UINT16/DT_INT16 data type");
+
+    ASCENDC_HOST_ASSERT(((config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) ||
+        (config.srcFormat == DataFormat::FRACTAL_Z_3D && config.dstFormat == DataFormat::NCDHW) ||
+        (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) ||
+        (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW)), return false,
+         "[TransData][GetTransDataMaxMinTmpSize] The parameter config srcFormat/dstFormat only supports "
+         "(NCDHW, FRACTAL_Z_3D)/(FRACTAL_Z_3D, NCDHW)/(NCDHW, NDC1HWC0)/(NDC1HWC0, NCDHW)!");
+
     platform_ascendc::SocVersion socVersion = platform.GetSocVersion();
     ASCENDC_HOST_ASSERT(socVersion == platform_ascendc::SocVersion::ASCEND910B, return false,
                         "[TransData][GetTransDataMaxMinTmpSize] Unsupported SocVersion for TransData API.");
diff --git a/lib/transdata/transdata.h b/lib/transdata/transdata.h
index 795c9a03..755a0086 100644
--- a/lib/transdata/transdata.h
+++ b/lib/transdata/transdata.h
@@ -10,6 +10,7 @@
 #ifndef LIB_TRANSDATA_TRANSDATA_H
 #define LIB_TRANSDATA_TRANSDATA_H
 #if __CCE_AICORE__ == 220
+#include "transdata_common.h"
 #include "kernel_tensor.h"
 #include "kernel_operator_intf.h"
 #include "kernel_pop_stack_buffer.h"
diff --git a/impl/transdata/transdata_common_impl.h b/lib/transdata/transdata_common.h
similarity index 86%
rename from impl/transdata/transdata_common_impl.h
rename to lib/transdata/transdata_common.h
index b1289dc7..644d78ab 100644
--- a/impl/transdata/transdata_common_impl.h
+++ b/lib/transdata/transdata_common.h
@@ -7,8 +7,8 @@
  * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
  * See LICENSE in the root of the software repository for the full text of the License.
  */
-#ifndef IMPL_TRANSDATA_TRANSDATA_COMMON_IMPL_H
-#define IMPL_TRANSDATA_TRANSDATA_COMMON_IMPL_H
+#ifndef LIB_TRANSDATA_TRANSDATA_COMMON_H
+#define LIB_TRANSDATA_TRANSDATA_COMMON_H
 
 namespace AscendC {
 template <typename T, typename U>
@@ -26,4 +26,4 @@ struct TransDataConfig {
 #endif // ASCC_PARAM_TRANSDATACONFIG
 } // namespace AscendC
 
-#endif // IMPL_TRANSDATA_TRANSDATA_COMMON_IMPL_H
\ No newline at end of file
+#endif // LIB_TRANSDATA_TRANSDATA_COMMON_H
\ No newline at end of file
-- 
Gitee


From ba10d800693e533d28c74a9153aba4361aedf4a8 Mon Sep 17 00:00:00 2001
From: chen-yiyuan <chenyiyuan5@huawei.com>
Date: Mon, 9 Jun 2025 14:31:25 +0800
Subject: [PATCH 5/5] update

---
 docs/README.md                                |    6 +-
 impl/CMakeLists.txt                           |   32 +-
 .../transdata/transdata_impl.h                |   26 +-
 .../transdata/transdata_tiling.cpp            |    2 +-
 lib/kernel_api.h                              |  159 --
 lib/tiling_api.h                              |   90 --
 lib/{transdata => transpose}/transdata.h      |    8 +-
 .../transdata_common.h                        |    6 +-
 .../transdata_tiling.h                        |    6 +-
 tests/CMakeLists.txt                          |   98 +-
 tests/tiling/test_tiling.cpp                  | 1304 -----------------
 .../transdata/test_operator_transdata.cpp     |    0
 tiling/tiling_api.h                           |   90 --
 13 files changed, 32 insertions(+), 1795 deletions(-)
 rename impl/{ => transpose}/transdata/transdata_impl.h (95%)
 rename impl/{ => transpose}/transdata/transdata_tiling.cpp (99%)
 delete mode 100644 lib/kernel_api.h
 delete mode 100644 lib/tiling_api.h
 rename lib/{transdata => transpose}/transdata.h (91%)
 rename lib/{transdata => transpose}/transdata_common.h (88%)
 rename lib/{transdata => transpose}/transdata_tiling.h (94%)
 rename tests/{ => transpose}/transdata/test_operator_transdata.cpp (100%)
 delete mode 100644 tiling/tiling_api.h

diff --git a/docs/README.md b/docs/README.md
index b688a7f7..b90de102 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -334,10 +334,14 @@
         <td> 给定两个源操作数src0和src1，根据maskTensor相应位置的值(非bit位)选取元素，得到目的操作数dst。 </td>
     </tr>
     <tr>
-        <th rowspan="1"> 变形 </th>
+        <th rowspan="2"> 变形 </th>
         <td> ConfusionTranspose </td>
         <td> 对输入数据进行数据排布及Reshape操作。 </td>
     </tr>
+    <tr>
+        <td> TransData </td>
+        <td> 对输入数据排布格式转换为输出所需的数据排布格式 </td>
+    </tr>
     <tr>
         <th rowspan="1"> 索引操作 </th>
         <td> ArithProgression </td>
diff --git a/impl/CMakeLists.txt b/impl/CMakeLists.txt
index 8de6e974..1ab2cc72 100644
--- a/impl/CMakeLists.txt
+++ b/impl/CMakeLists.txt
@@ -92,37 +92,7 @@ add_library(tiling_api STATIC
     ${CMAKE_CURRENT_SOURCE_DIR}/math/axpy/axpy_tiling_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/ceil/ceil_tiling_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/floor/floor_tiling_impl.cpp
-<<<<<<< HEAD
-=======
-    ${CMAKE_CURRENT_SOURCE_DIR}/activation/softmax/softmax_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/activation/softmax/logsoftmax_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/rmsnorm/rmsnorm_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/batchnorm/batchnorm_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/sort/sort/sort_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/sort/topk/topk_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/deepnorm/deepnorm_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/select/selectwithbytesmask/selectwithbytesmask_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/layernorm/layernorm_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/normalize/normalize_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/layernormgrad/layernorm_grad_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/layernormgrad/layernorm_grad_beta_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/groupnorm/groupnorm_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/normalization/welfordfinalize/welfordfinalize_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/pad/pad/pad_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/transpose/confusion_transpose/confusion_transpose_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/pad/broadcast/broadcast_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/pad/broadcast/broadcast_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/math/xor/xor_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/math/cumsum/cumsum_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/mean/mean_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/math/sign/sign_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/activation/reglu/reglu_tiling_impl.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/reduce_xor_sum/reduce_xor_sum_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/sum/sum_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/reduce/reduce_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/transdata/transdata_tiling.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/index/arithprogression/arithprogression_tiling_impl.cpp
->>>>>>> 4a0a42bb (update)
+    ${CMAKE_CURRENT_SOURCE_DIR}/transpose/transdata/transdata_tiling.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/fmod/fmod_tiling_impl.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/math/trunc/trunc_tiling_impl.cpp
     $<$<BOOL:${BUILD_OPEN_PROJECT}>:$<TARGET_OBJECTS:${ASCENDC_API_ADV_OBJ}>>
diff --git a/impl/transdata/transdata_impl.h b/impl/transpose/transdata/transdata_impl.h
similarity index 95%
rename from impl/transdata/transdata_impl.h
rename to impl/transpose/transdata/transdata_impl.h
index ad544da1..d8e5d41e 100644
--- a/impl/transdata/transdata_impl.h
+++ b/impl/transpose/transdata/transdata_impl.h
@@ -7,14 +7,14 @@
  * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
  * See LICENSE in the root of the software repository for the full text of the License.
  */
-#ifndef IMPL_TRANSDATA_TRANSDATA_IMPL_H
-#define IMPL_TRANSDATA_TRANSDATA_IMPL_H
+#ifndef IMPL_TRANSPOSE_TRANSDATA_TRANSDATA_IMPL_H
+#define IMPL_TRANSPOSE_TRANSDATA_TRANSDATA_IMPL_H
 
 #include "kernel_tensor.h"
 #include "kernel_operator_intf.h"
 #include "kernel_tiling/kernel_tiling.h"
-#include "../common/check.h"
-#include "../api_check/kernel_api_check.h"
+#include "../../common/check.h"
+#include "../../api_check/kernel_api_check.h"
 
 namespace AscendC {
 namespace Internal {
@@ -220,7 +220,7 @@ __aicore__ inline void TransDataFractalToNcdhw(const LocalTensor<T>& dst, const
 
 // Transdata NCDHW -> FRACTAL_Z_3D
 template <typename T>
-__aicore__ inline void TransDataImplMode1(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
+__aicore__ inline void TransDataImplNcdhwToFractal(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
     const TransDataTmpParams& param)
 {
     constexpr int32_t elePerBlk = ONE_BLK_SIZE / sizeof(T);
@@ -320,7 +320,7 @@ __aicore__ inline void TransDataImplMode1(const LocalTensor<T>& dst, const Local
 
 // Transdata NCDHW -> NDC1HWC0
 template <typename T>
-__aicore__ inline void TransDataImplMode2(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
+__aicore__ inline void TransDataImplNcdhwTo6Hd(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
     const TransDataTmpParams& param)
 {
     constexpr int32_t c0 = 16;
@@ -393,7 +393,7 @@ __aicore__ inline void TransDataImplMode2(const LocalTensor<T>& dst, const Local
 
 // Transdata NDC1HWC0 -> NCDHW
 template <typename T>
-__aicore__ inline void TransDataImplMode3(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
+__aicore__ inline void TransDataImpl6HdToNcdhw(const LocalTensor<T>& dst, const LocalTensor<T>& src, const LocalTensor<uint8_t>& tmpBuffer,
     const TransDataTmpParams& param)
 {
     const int32_t n = param.n, c = param.c, d = param.d, h = param.h, w = param.w;
@@ -469,8 +469,8 @@ __aicore__ inline void TransDataCheck(const TransDataParams<U, S>& params)
     static_assert(is_layout_v<S>, "dstLayout must be a layout");
     using SrcShapeTuple = Std::remove_cvref_t<decltype(params.srcLayout.GetShape())>;
     using DstShapeTuple = Std::remove_cvref_t<decltype(params.dstLayout.GetShape())>;
-    static_assert(Std::is_tuple_v<SrcShapeTuple>, "it must be a shape.");
-    static_assert(Std::is_tuple_v<DstShapeTuple>, "it must be a shape.");
+    static_assert(Std::is_tuple_v<SrcShapeTuple>, "srcLayout.GetShape() must be a shape.");
+    static_assert(Std::is_tuple_v<DstShapeTuple>, "dstLayout.GetShape() must be a shape.");
 }
 
 template <const TransDataConfig& config, typename T, typename U, typename S>
@@ -505,7 +505,7 @@ __aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const Loca
     if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::FRACTAL_Z_3D) {
         static_assert(srcShapeSize == ncdhwDims, "srcLayout's shape dims must be equal to 5!");
         static_assert(dstShapeSize == fractalZ3DDims, "dstLayout's shape dims must be equal to 7!");
-        TransDataImplMode1(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+        TransDataImplNcdhwToFractal(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     } else if constexpr (config.srcFormat == DataFormat::FRACTAL_Z_3D && config.dstFormat == DataFormat::NCDHW) {
         static_assert(srcShapeSize == fractalZ3DDims, "srcLayout's shape dims must be equal to 7!");
         static_assert(dstShapeSize == ncdhwDims, "dstLayout's shape dims must be equal to 5!");
@@ -513,14 +513,14 @@ __aicore__ inline void TransDataImpl(const LocalTensor<T>& dstTensor, const Loca
     } else if constexpr (config.srcFormat == DataFormat::NCDHW && config.dstFormat == DataFormat::NDC1HWC0) {
         static_assert(srcShapeSize == ncdhwDims, "srcLayout's shape dims must be equal to 5!");
         static_assert(dstShapeSize == ndc1hwc0Dims, "dstLayout's shape dims must be equal to 6!");
-        TransDataImplMode2(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+        TransDataImplNcdhwTo6Hd(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     } else if constexpr (config.srcFormat == DataFormat::NDC1HWC0 && config.dstFormat == DataFormat::NCDHW) {
         static_assert(srcShapeSize == ndc1hwc0Dims, "srcLayout's shape dims must be equal to 6!");
         static_assert(dstShapeSize == ncdhwDims, "dstLayout's shape dims must be equal to 5!");
-        TransDataImplMode3(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
+        TransDataImpl6HdToNcdhw(dstTensor, srcTensor, sharedTmpBuffer, tmpParams);
     }
 }
 
 } // namespace Internal
 } // namespace AscendC
-#endif // IMPL_TRANSDATA_TRANSDATA_IMPL_H
\ No newline at end of file
+#endif // IMPL_TRANSPOSE_TRANSDATA_TRANSDATA_IMPL_H
\ No newline at end of file
diff --git a/impl/transdata/transdata_tiling.cpp b/impl/transpose/transdata/transdata_tiling.cpp
similarity index 99%
rename from impl/transdata/transdata_tiling.cpp
rename to impl/transpose/transdata/transdata_tiling.cpp
index 45d1bc71..dbfc281e 100644
--- a/impl/transdata/transdata_tiling.cpp
+++ b/impl/transpose/transdata/transdata_tiling.cpp
@@ -8,7 +8,7 @@
  * See LICENSE in the root of the software repository for the full text of the License.
  */
 
-#include "lib/transdata/transdata_tiling.h"
+#include "lib/transpose/transdata_tiling.h"
 
 #include <cstdint>
 #include <algorithm>
diff --git a/lib/kernel_api.h b/lib/kernel_api.h
deleted file mode 100644
index b6f7a069..00000000
--- a/lib/kernel_api.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * Copyright (c) 2024 Huawei Technologies Co., Ltd.
- * This file is a part of the CANN Open Software.
- * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
- * Please refer to the License for details. You may not use this file except in compliance with the License.
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
- * See LICENSE in the root of the software repository for the full text of the License.
- */
-
-/*!
- * \file kernel_api.h
- * \brief
- */
-#ifndef LIB_KERNEL_API_H
-#define LIB_KERNEL_API_H
-
-#if defined(__DAV_C310__) || defined(__DAV_310R6__)
-#include "hccl/hccl.h"
-#include "index/arithprogression.h"
-#include "activation/sigmoid.h"
-#include "activation/softmax.h"
-#include "activation/simplesoftmax.h"
-#include "activation/softmaxflashv2.h"
-#include "activation/softmaxgrad.h"
-#include "activation/gelu.h"
-#include "filter/dropout.h"
-#include "math/tan.h"
-#include "math/tanh.h"
-#include "math/floor.h"
-#include "math/lgamma.h"
-#include "math/log.h"
-#include "math/sin.h"
-#include "math/atanh.h"
-#include "math/asinh.h"
-#include "math/acosh.h"
-#include "math/trunc.h"
-#include "math/cos.h"
-#include "math/fmod.h"
-#include "math/hypot.h"
-#include "math/power.h"
-#include "math/frac.h"
-#include "math/cumsum.h"
-#include "math/erf.h"
-#include "math/erfc.h"
-#include "math/atan.h"
-#include "math/is_finite.h"
-#include "math/philox.h"
-#include "math/sinh.h"
-#include "math/cosh.h"
-#include "math/sign.h"
-#include "math/asin.h"
-#include "math/acos.h"
-#include "math/exp.h"
-#include "math/xor.h"
-#include "normalization/layernorm.h"
-#include "normalization/welfordfinalize.h"
-#include "normalization/normalize.h"
-#include "pad/broadcast.h"
-#include "quantization/ascend_quant.h"
-#include "quantization/ascend_dequant.h"
-#include "quantization/ascend_antiquant.h"
-#include "quantization/quantize.h"
-#include "quantization/dequantize.h"
-#include "quantization/antiquantize.h"
-#include "utils/init_global_memory.h"
-#include "sort/sort.h"
-#include "sort/topk.h"
-#include "transpose/confusion_transpose.h"
-#include "select/selectwithbytesmask.h"
-#include "reduce/reduce.h"
-#include "math/clamp.h"
-#include "math/round.h"
-#include "math/ceil.h"
-#endif // __CCE_AICORE__ == 310
-
-#if defined(__CCE_AICORE__) && (__CCE_AICORE__ != 310)
-#include "filter/dropout.h"
-#include "activation/sigmoid.h"
-#include "activation/softmax.h"
-#include "activation/simplesoftmax.h"
-#include "activation/softmaxflashv2.h"
-#include "activation/softmaxflashv3.h"
-#include "activation/softmaxgrad.h"
-#include "math/xor.h"
-#include "math/floor.h"
-#include "sort/sort.h"
-#endif
-
-#include "std/tuple.h"
-#include "std/type_traits.h"
-#include "std/utility.h"
-#include "std/algorithm.h"
-
-#if defined(__CCE_AICORE__) && (__CCE_AICORE__ < 300)
-#include "index/arithprogression.h"
-#include "normalization/layernormgrad.h"
-#include "normalization/layernormgradbeta.h"
-#include "pad/pad.h"
-#include "hccl/hccl.h"
-#include "math/frac.h"
-#include "math/power.h"
-#include "math/log.h"
-#include "math/sin.h"
-#include "math/cos.h"
-#include "math/asin.h"
-#include "math/acos.h"
-#include "math/asinh.h"
-#include "math/acosh.h"
-#include "math/atan.h"
-#include "math/cosh.h"
-#include "math/erf.h"
-#include "math/erfc.h"
-#include "math/clamp.h"
-#include "normalization/rmsnorm.h"
-#include "normalization/batchnorm.h"
-#include "math/tanh.h"
-#include "math/atanh.h"
-#include "normalization/deepnorm.h"
-#include "math/exp.h"
-#include "normalization/layernorm.h"
-#include "normalization/welfordfinalize.h"
-#include "normalization/normalize.h"
-#include "reduce/sum.h"
-#include "activation/silu.h"
-#include "activation/gelu.h"
-#include "quantization/ascend_quant.h"
-#include "quantization/ascend_dequant.h"
-#include "quantization/ascend_antiquant.h"
-#include "activation/logsoftmax.h"
-#include "activation/softmaxflash.h"
-#include "transpose/confusion_transpose.h"
-#include "select/selectwithbytesmask.h"
-#include "math/sinh.h"
-#include "activation/swiglu.h"
-#include "activation/reglu.h"
-#include "math/tan.h"
-#include "math/round.h"
-#include "math/trunc.h"
-#include "activation/swish.h"
-#include "sort/topk.h"
-#include "activation/geglu.h"
-#include "math/lgamma.h"
-#include "math/digamma.h"
-#include "math/sign.h"
-#include "reduce/mean.h"
-#include "math/axpy.h"
-#include "math/ceil.h"
-#include "pad/broadcast.h"
-#include "reduce/reduce_xor_sum.h"
-#include "reduce/reduce.h"
-#include "transdata/transdata.h"
-#include "math/cumsum.h"
-#include "math/fmod.h"
-#include "normalization/groupnorm.h"
-#include "utils/init_global_memory.h"
-#endif // __CCE_AICORE__ < 300
-
-#endif // LIB_KERNEL_API_H
diff --git a/lib/tiling_api.h b/lib/tiling_api.h
deleted file mode 100644
index 1b83428d..00000000
--- a/lib/tiling_api.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Copyright (c) 2024 Huawei Technologies Co., Ltd.
- * This file is a part of the CANN Open Software.
- * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
- * Please refer to the License for details. You may not use this file except in compliance with the License.
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
- * See LICENSE in the root of the software repository for the full text of the License.
- */
-
-/*!
- * \file tiling_api.h
- * \brief
- */
-#ifndef LIB_TILING_API_H
-#define LIB_TILING_API_H
-#include "matmul/matmul_tiling.h"
-#include "matmul/bmm_tiling.h"
-#include "activation/softmax_tiling.h"
-#include "activation/logsoftmax_tiling.h"
-#include "filter/dropout_tiling.h"
-#include "sort/sort_tiling_intf.h"
-#include "index/arithprogression_tiling.h"
-#include "quantization/ascend_dequant_tiling.h"
-#include "quantization/ascend_quant_tiling.h"
-#include "quantization/ascend_antiquant_tiling.h"
-#include "quantization/quantize_tiling.h"
-#include "quantization/antiquantize_tiling.h"
-#include "quantization/dequantize_tiling.h"
-#include "reduce/sum_tiling.h"
-#include "activation/silu_tiling.h"
-#include "activation/swish_tiling.h"
-#include "activation/gelu_tiling.h"
-#include "pad/pad_tiling.h"
-#include "normalization/rmsnorm_tiling.h"
-#include "normalization/deepnorm_tiling.h"
-#include "normalization/layernorm_tiling.h"
-#include "normalization/normalize_tiling.h"
-#include "normalization/batchnorm_tiling.h"
-#include "normalization/layernorm_grad_tiling.h"
-#include "normalization/layernorm_grad_beta_tiling.h"
-#include "normalization/welfordfinalize_tiling.h"
-#include "transpose/confusion_transpose_tiling.h"
-#include "tiling/platform/platform_ascendc.h"
-#include "sort/topk_tiling.h"
-#include "math/tanh_tiling.h"
-#include "activation/sigmoid_tiling.h"
-#include "math/frac_tiling.h"
-#include "math/acos_tiling.h"
-#include "math/asin_tiling.h"
-#include "math/acosh_tiling.h"
-#include "math/asinh_tiling.h"
-#include "math/sin_tiling.h"
-#include "math/cos_tiling.h"
-#include "math/atan_tiling.h"
-#include "math/power_tiling.h"
-#include "math/log_tiling.h"
-#include "math/cosh_tiling.h"
-#include "math/clamp_tiling.h"
-#include "math/erf_tiling.h"
-#include "math/erfc_tiling.h"
-#include "math/round_tiling.h"
-#include "math/sinh_tiling.h"
-#include "activation/swiglu_tiling.h"
-#include "math/tan_tiling.h"
-#include "math/hypot_tiling.h"
-#include "select/selectwithbytesmask_tiling.h"
-#include "math/trunc_tiling.h"
-#include "activation/geglu_tiling.h"
-#include "math/lgamma_tiling.h"
-#include "math/digamma_tiling.h"
-#include "math/atanh_tiling.h"
-#include "math/xor_tiling.h"
-#include "math/sign_tiling.h"
-#include "reduce/mean_tiling.h"
-#include "math/exp_tiling.h"
-#include "math/axpy_tiling.h"
-#include "math/ceil_tiling.h"
-#include "math/floor_tiling.h"
-#include "activation/reglu_tiling.h"
-#include "pad/broadcast_tiling.h"
-#include "reduce/reduce_xor_sum_tiling.h"
-#include "reduce/reduce_tiling.h"
-#include "math/cumsum_tiling.h"
-#include "math/fmod_tiling.h"
-#include "normalization/groupnorm_tiling.h"
-#include "transdata/transdata_tiling.h"
-#include "hccl/hccl_tilingdata.h"
-#include "hccl/hccl_tiling.h"
-#endif // LIB_TILING_API_H
diff --git a/lib/transdata/transdata.h b/lib/transpose/transdata.h
similarity index 91%
rename from lib/transdata/transdata.h
rename to lib/transpose/transdata.h
index 755a0086..c0075cf5 100644
--- a/lib/transdata/transdata.h
+++ b/lib/transpose/transdata.h
@@ -7,14 +7,14 @@
  * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
  * See LICENSE in the root of the software repository for the full text of the License.
  */
-#ifndef LIB_TRANSDATA_TRANSDATA_H
-#define LIB_TRANSDATA_TRANSDATA_H
+#ifndef LIB_TRANSPOSE_TRANSDATA_H
+#define LIB_TRANSPOSE_TRANSDATA_H
 #if __CCE_AICORE__ == 220
 #include "transdata_common.h"
 #include "kernel_tensor.h"
 #include "kernel_operator_intf.h"
 #include "kernel_pop_stack_buffer.h"
-#include "../../impl/transdata/transdata_impl.h"
+#include "../../impl/transpose/transdata/transdata_impl.h"
 #if ASCENDC_CPU_DEBUG
 #include "kernel_log.h"
 #include <type_traits>
@@ -45,4 +45,4 @@ __aicore__ inline void TransData(const LocalTensor<T>& dstTensor, const LocalTen
 }
 } // namespace AscendC
 #endif
-#endif // LIB_TRANSDATA_TRANSDATA_H
\ No newline at end of file
+#endif // LIB_TRANSPOSE_TRANSDATA_H
\ No newline at end of file
diff --git a/lib/transdata/transdata_common.h b/lib/transpose/transdata_common.h
similarity index 88%
rename from lib/transdata/transdata_common.h
rename to lib/transpose/transdata_common.h
index 644d78ab..0421a3ca 100644
--- a/lib/transdata/transdata_common.h
+++ b/lib/transpose/transdata_common.h
@@ -7,8 +7,8 @@
  * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
  * See LICENSE in the root of the software repository for the full text of the License.
  */
-#ifndef LIB_TRANSDATA_TRANSDATA_COMMON_H
-#define LIB_TRANSDATA_TRANSDATA_COMMON_H
+#ifndef LIB_TRANSPOSE_TRANSDATA_COMMON_H
+#define LIB_TRANSPOSE_TRANSDATA_COMMON_H
 
 namespace AscendC {
 template <typename T, typename U>
@@ -26,4 +26,4 @@ struct TransDataConfig {
 #endif // ASCC_PARAM_TRANSDATACONFIG
 } // namespace AscendC
 
-#endif // LIB_TRANSDATA_TRANSDATA_COMMON_H
\ No newline at end of file
+#endif // LIB_TRANSPOSE_TRANSDATA_COMMON_H
\ No newline at end of file
diff --git a/lib/transdata/transdata_tiling.h b/lib/transpose/transdata_tiling.h
similarity index 94%
rename from lib/transdata/transdata_tiling.h
rename to lib/transpose/transdata_tiling.h
index 87559a51..f2d72221 100644
--- a/lib/transdata/transdata_tiling.h
+++ b/lib/transpose/transdata_tiling.h
@@ -12,8 +12,8 @@
  * \file transdata_tiling.h
  * \brief
  */
-#ifndef LIB_TRANSDATA_TRANSDATA_TILING_H
-#define LIB_TRANSDATA_TRANSDATA_TILING_H
+#ifndef LIB_TRANSPOSE_TRANSDATA_TILING_H
+#define LIB_TRANSPOSE_TRANSDATA_TILING_H
 #include <cstdint>
 #include "graph/tensor.h"
 #include "tiling/platform/platform_ascendc.h"
@@ -64,4 +64,4 @@ bool GetTransDataMaxMinTmpSize(const platform_ascendc::PlatformAscendC &platform
                                const TransDataConfig &config,
                                uint32_t &maxValue, uint32_t &minValue);
 } // AscendC
-#endif // LIB_TRANSDATA_TRANSDATA_TILING_H
\ No newline at end of file
+#endif // LIB_TRANSPOSE_TRANSDATA_TILING_H
\ No newline at end of file
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 133a36fd..a7211593 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -153,95 +153,8 @@ file(GLOB ASCENDC_TEST_ascend910B1_AIV_CASE_SRC_FILES
     ${ASCENDC_TESTS_DIR}/sort/topk/test_operator_topk.cpp
     ${ASCENDC_TESTS_DIR}/normalization/welfordfinalize/test_operator_welfordfinalize.cpp
     ${ASCENDC_TESTS_DIR}/utils/init_global_memory/test_operator_init_global_memory.cpp
-<<<<<<< HEAD
     ${ASCENDC_TESTS_DIR}/normalization/layernormV2/test_operator_layernormV2.cpp
-=======
-    ${ASCENDC_TESTS_DIR}/std/sequence/test_sequence.cpp
-    ${ASCENDC_TESTS_DIR}/std/tuple/*.cpp
-    ${ASCENDC_TESTS_DIR}/std/type_traits/*.cpp
-    ${ASCENDC_TESTS_DIR}/transdata/*cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/geglu/kernel_geglu_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/gelu/kernel_gelu_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/gelu/kernel_faster_gelu_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/gelu/kernel_faster_geluv2_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/reglu/kernel_reglu_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/sigmoid/kernel_sigmoid_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/silu/kernel_silu_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/swiglu/kernel_swiglu_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/swish/kernel_swish_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/adjust_softmax_res/kernel_adjust_softmax_res_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/log_softmax/kernel_log_softmax_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/simple_softmax/kernel_simple_softmax_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax/kernel_softmax_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_flash/kernel_softmax_flash_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_flashv2/kernel_softmax_flashv2_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_flashv3/kernel_softmax_flashv3_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_grad/kernel_softmax_grad_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/activation/softmax/softmax_grad_front/kernel_softmax_grad_front_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/filter/droupout/kernel_droupout_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/index/arithprogression/kernel_arithprogression_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/batchnorm/kernel_batchnorm_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/deepnorm/kernel_deepnorm_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/groupnorm/kernel_groupnorm_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/layernorm/kernel_layernorm_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/layernormgrad/kernel_layernormgrad_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/layernormgradbeta/kernel_layernormgradbeta_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/normalize/kernel_normalize_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/rmsnorm/kernel_rmsnorm_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/welfordfinalize/kernel_welfordfinalize_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/normalization/welfordupdate/kernel_welfordupdate_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/pad/broadcast/kernel_broadcast_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/pad/pad/kernel_pad_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/pad/unpad/kernel_unpad_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/quantization/antiquant/kernel_antiquant_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/quantization/dequant/kernel_dequant_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/quantization/quant/kernel_quant_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/select/selectwithbytesmask/kernel_selectwithbytesmask_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/sort/topk/kernel_topk_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/transpose/confusion_transpose/kernel_confusion_transpose_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/utils/init_global_memory/kernel_init_global_memory_check.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/acos/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/acosh/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/asin/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/asinh/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/atan/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/atanh/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/axpy/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/ceil/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/clamp/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/cos/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/cosh/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/cumsum/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/digamma/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/erf/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/erfc/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/exp/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/floor/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/fmod/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/frac/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/lgamma/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/log/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/power/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/round/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/sign/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/sin/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/sinh/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/tan/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/tanh/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/trunc/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/math/xor/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/mean/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/sum/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_xor_sum/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_all/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_any/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_max/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_min/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_sum/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_prod/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/reduce/reduce_mean/*.cpp
-    ${ASCENDC_TESTS_DIR}/api_check/transdata/*.cpp
->>>>>>> 4d201cad (add transdata high api fractal_z_3d to ncdhw)
+    ${ASCENDC_TESTS_DIR}/transpose/transdata/*cpp
 )
 
 # ascend910B1 aic test cases
@@ -492,11 +405,8 @@ file(GLOB ASCENDC_TILING_SRC_FILES
     ${ASCENDC_API_DIR}/impl/quantization/quant/*.cpp
     ${ASCENDC_API_DIR}/impl/sort/topk/*.cpp
     ${ASCENDC_API_DIR}/impl/reduce/reduce_tiling.cpp
-<<<<<<< HEAD
     ${ASCENDC_API_DIR}/impl/normalization/layernormV2/*.cpp
-=======
-    ${ASCENDC_API_DIR}/impl/transdata/transdata_tiling.cpp
->>>>>>> 4a0a42bb (update)
+    ${ASCENDC_API_DIR}/impl/transpose/transdata/transdata_tiling.cpp
 )
 
 # ascendc_tiling_utest
@@ -552,15 +462,11 @@ foreach(product_type ${PRODUCT_TYPE_LIST})
         ${ASCENDC_API_DIR}/lib/reduce/
         ${ASCENDC_API_DIR}/lib/select/
         ${ASCENDC_API_DIR}/lib/transpose/
-<<<<<<< HEAD
         ${ASCENDC_API_DIR}/lib/matmul/
         ${ASCENDC_API_DIR}/lib/math/
         ${ASCENDC_API_DIR}/lib/normalization/
         ${ASCENDC_API_DIR}/lib/quantization/
         ${ASCENDC_API_DIR}/lib/sort/
-=======
-        ${ASCENDC_API_DIR}/lib/transdata/
->>>>>>> 4a0a42bb (update)
         ${ASCENDC_TESTS_DIR}/common/
     )
 
diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp
index 09630754..b2835619 100644
--- a/tests/tiling/test_tiling.cpp
+++ b/tests/tiling/test_tiling.cpp
@@ -32,1311 +32,7 @@ protected:
     void TearDown() {}
 };
 
-
-<<<<<<< HEAD
-=======
-#if defined(__DAV_C310__) || defined(__DAV_310R6__)
-TEST_F(TestTiling, TestSoftMaxTiling)
-{
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-    std::vector<int64_t> shapeDims = { 128, 128 };
-    optiling::SoftMaxTiling tilingData;
-    auto softmaxShape = ge::Shape(shapeDims);
-    uint32_t softmaxTmpSize = 100 * 1024 * 4;
-    uint32_t softmaxNeedMinSize = GetSoftMaxMinTmpSize(ascendcPlatform, softmaxShape, 2, true);
-    EXPECT_EQ(softmaxNeedMinSize, 128 * (16 + 128) * 4);
-    uint32_t softmaxFlashNeedMinSize = GetSoftMaxFlashMinTmpSize(softmaxShape, 2, true, true);
-    EXPECT_EQ(softmaxFlashNeedMinSize, (16 * 4 + 128 * 2) * 4);
-    softmaxFlashNeedMinSize = GetSoftMaxFlashMinTmpSize(softmaxShape, 4, true, true);
-    EXPECT_EQ(softmaxFlashNeedMinSize, (8 * 4 + 128 * 2) * 4);
-    softmaxFlashNeedMinSize = GetSoftMaxFlashMinTmpSize(softmaxShape, 4, false, true);
-    EXPECT_EQ(softmaxFlashNeedMinSize, (8 + 128 + 64) * 4);
-    uint32_t softmaxGradNeedMinSize = GetSoftMaxGradMinTmpSize(softmaxShape, 2, true, true);
-    EXPECT_EQ(softmaxGradNeedMinSize, (16 * 2 + 128 * 3 + 64) * 4);
-    softmaxGradNeedMinSize = GetSoftMaxGradMinTmpSize(softmaxShape, 4, true, true);
-    EXPECT_EQ(softmaxGradNeedMinSize, (8 + 128 + 64) * 4);
-    softmaxGradNeedMinSize = GetSoftMaxGradMinTmpSize(softmaxShape, 0, true, true);
-    EXPECT_EQ(softmaxGradNeedMinSize, 0);
-
-    uint32_t softmaxNeedMaxSize = GetSoftMaxMaxTmpSize(ascendcPlatform, softmaxShape, 2, true);
-    EXPECT_EQ(softmaxNeedMaxSize, 128 * (16 + 128 + 64) * 4);
-    softmaxNeedMaxSize = GetSoftMaxMaxTmpSize(ascendcPlatform, softmaxShape, 1, true);
-    EXPECT_EQ(softmaxNeedMaxSize, 0);
-    uint32_t softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 2, true, true);
-    EXPECT_EQ(softmaxFlashNeedMaxSize, 128 * (16 * 4 + 128 * 2) * 4);
-    softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 4, false, true);
-    EXPECT_EQ(softmaxFlashNeedMaxSize, 128 * (8 + 128 + 64) * 4);
-    softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 4, true, true);
-    EXPECT_EQ(softmaxFlashNeedMaxSize, 128 * (8 * 4 + 128 * 2) * 4);
-    softmaxFlashNeedMaxSize = GetSoftMaxFlashMaxTmpSize(softmaxShape, 1, true, true);
-    EXPECT_EQ(softmaxFlashNeedMaxSize, 0);
-    uint32_t softmaxGradNeedMaxSize = GetSoftMaxGradMaxTmpSize(softmaxShape, 2, true, true);
-    EXPECT_EQ(softmaxGradNeedMaxSize, 128 * (16 * 2 + 128 * 3 + 64) * 4);
-    softmaxGradNeedMaxSize = GetSoftMaxGradMaxTmpSize(softmaxShape, 4, true, true);
-    EXPECT_EQ(softmaxGradNeedMaxSize, 128 * (8 + 128 + 64) * 4);
-    softmaxGradNeedMaxSize = GetSoftMaxGradMaxTmpSize(softmaxShape, 1, true, true);
-    EXPECT_EQ(softmaxGradNeedMaxSize, 0);
-    SoftMaxTilingFunc(softmaxShape, 2, softmaxTmpSize, tilingData);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-    bool flag = IsBasicBlockInSoftMax(tilingData);
-    EXPECT_EQ(flag, true);
-    SoftMaxFlashTilingFunc(softmaxShape, 2, 77952, tilingData, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 32);
-    SoftMaxFlashTilingFunc(softmaxShape, 2, 77952, tilingData, false);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-    SoftMaxGradTilingFunc(softmaxShape, 2, softmaxTmpSize, tilingData, false);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-    SoftMaxGradTilingFunc(softmaxShape, 4, softmaxTmpSize, tilingData, false);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-    SoftMaxGradTilingFunc(softmaxShape, 2, 133120, tilingData, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-}
-
-TEST_F(TestTiling, TestSoftMaxFlashV2TilingMaxMinTmpSize)
-{
-    uint32_t softmaxflashV2NeedMinLength = 0;
-    uint32_t softmaxflashV2NeedMaxLength = 0;
-
-    std::vector<int64_t> shapeDims = { 3, 3, 448 };
-    auto softmaxShape = ge::Shape(shapeDims);
-    uint32_t dataTypeSize1 = 2;
-    uint32_t dataTypeSize2 = 2;
-    uint32_t isUpdate = 0;
-    uint32_t isBasicBlock = 0;
-    uint32_t isFlashOutputBrc = 1;
-
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 17504);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 19008);
-
-    shapeDims = {7, 1072};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 31296);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 32256);
-
-    shapeDims = {1, 2, 3, 1, 2, 1, 16};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 2240);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 4608);
-
-    shapeDims = {2, 6, 1, 16};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 2240);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 4608);
-
-    shapeDims = {6, 1664};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 41184);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 41856);
-
-    shapeDims = {2, 1760 };
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 15200);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 15200);
-
-    shapeDims = {1, 5536 };
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 23232);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 23232);
-
-    shapeDims = {2, 2, 2352};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 38816);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 38912);
-
-    shapeDims = {2, 2, 2, 480 };
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 0;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 16672);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 17920);
-
-    shapeDims = {2, 3632};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 1;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 29440);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 29824);
-
-    shapeDims = {2, 4, 96};
-    softmaxShape = ge::Shape(shapeDims);
-    dataTypeSize1 = 2;
-    dataTypeSize2 = 2;
-    isUpdate = 1;
-    isBasicBlock = 0;
-    isFlashOutputBrc = 1;
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 3840);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 6144);
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, 1, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 0);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, 1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 0);
-
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, 1, dataTypeSize2, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 0);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, dataTypeSize1, 1, isUpdate, isBasicBlock, isFlashOutputBrc);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 0);
-}
-
-TEST_F(TestTiling, TestSoftMaxFlashV2Tiling)
-{
-    std::vector<int64_t> shapeDims = { 128, 128 };
-    optiling::SoftMaxTiling tilingData;
-    auto softmaxShape = ge::Shape(shapeDims);
-    uint32_t maxSumTypeSize = 2;
-    uint32_t inputTypeSize = 2;
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-    uint32_t softmaxflashV2NeedMinLength =
-        GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, false);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 128 * (16 + 128) * 4);
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, (128 * 2 + 128 * (128 + 16)) * 4);
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 128 * (128 + 16) * 4);
-
-    uint32_t softmaxflashV2NeedMaxLength =
-        GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, false);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 16) * 4);
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 16 * 2) * 4);
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 16) * 4);
-
-    maxSumTypeSize = 4;
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, (128 * 2 + 128 * (128 + 16 + 8)) * 4);
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, 128 * (128 + 8) * 4);
-
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, false);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 8 * 2) * 4);
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, false, true);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (128 + 64 + 8) * 4);
-
-    uint32_t workLength = 100 * 1024;
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, false, false);
-    EXPECT_EQ(tilingData.get_reduceM(), 120);
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, false, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, false);
-    EXPECT_EQ(tilingData.get_reduceM(), 120);
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-
-    inputTypeSize = 4;
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, (128 * 2 + 128 * (16)) * 4);
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 128 * (64 + 8) * 4);
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 64);
-}
-
-TEST_F(TestTiling, TestSoftMaxFlashV2TilingBasicBlock)
-{
-    std::vector<int64_t> shapeDims = { 8, 1024 };
-    optiling::SoftMaxTiling tilingData;
-    auto softmaxShape = ge::Shape(shapeDims);
-    uint32_t maxSumTypeSize = 4;
-    uint32_t inputTypeSize = 4;
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-    uint32_t softmaxflashV2NeedMinLength =
-        GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, (64 + 8 * (16)) * 4);
-    uint32_t softmaxflashV2NeedMaxLength =
-        GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 8*(8 + 64) * 4);
-
-    uint32_t workLength = 32 * 1024;
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 8);
-
-    inputTypeSize = 2;
-    workLength = 64 * 1024;
-    softmaxflashV2NeedMinLength = GetSoftMaxFlashV2MinTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
-    EXPECT_EQ(softmaxflashV2NeedMinLength, (64 + 8 * (16 + 1024 + 8)) * 4);
-    softmaxflashV2NeedMaxLength = GetSoftMaxFlashV2MaxTmpSize(ascendcPlatform, softmaxShape, inputTypeSize, maxSumTypeSize, true, true);
-    EXPECT_EQ(softmaxflashV2NeedMaxLength, 8 * (8 + 1024 + 64) * 4);
-    SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true);
-    EXPECT_EQ(tilingData.get_reduceM(), 8);
-}
-
-TEST_F(TestTiling, TestWelfordUpdateTiling)
-{
-    std::vector<int64_t> shapeDims1d = {1, 128};
-    auto shape1d = ge::Shape(shapeDims1d);
-    uint32_t maxSize = 0;
-    uint32_t minSize = 0;
-    uint32_t dtypeTSize = sizeof(half);
-    uint32_t dtypeUSize = sizeof(float);
-    bool isReuseSource = false;
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-    GetWelfordUpdateMaxMinTmpSize(shape1d, dtypeTSize, dtypeUSize, isReuseSource, false, ascendcPlatform, maxSize, minSize);
-    EXPECT_EQ(minSize, 0);
-    EXPECT_EQ(maxSize, 0);
-}
-
-TEST_F(TestTiling, TestWelfordFinalizeTiling)
-{
-    std::vector<int64_t> shapeDims1d = {64};
-    auto shape1d = ge::Shape(shapeDims1d);
-    uint32_t maxSize = 0;
-    uint32_t minSize = 0;
-    uint32_t dtypeSize = sizeof(float);
-    bool isReuseSource = false;
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-    GetWelfordFinalizeMaxMinTmpSize(shape1d, dtypeSize, isReuseSource, ascendcPlatform, maxSize, minSize);
-    EXPECT_EQ(minSize, 768);
-    EXPECT_EQ(maxSize, 768);
-}
-
-TEST_F(TestTiling, TestLayerNormRstdTiling)
-{
-    const uint32_t stackBufferSize = 100 * 1024;
-    const uint32_t typeSize = sizeof(float);
-    std::vector<int64_t> shapeDims = {128, 88};
-    auto layernormShape = ge::Shape(shapeDims);
-    bool isReuseSource = false;
-    bool isComputeRstd = true;
-    bool isOnlyOutput = false;
-    optiling::LayerNormSeparateTiling tiling;
-    gert::TilingContext* context = fe::GetFakeTilingContext();
-    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
-    uint32_t minValue = 0;
-    uint32_t maxValue = 0;
-    GetLayerNormMaxMinTmpSize(layernormShape, typeSize, isReuseSource, isComputeRstd, isOnlyOutput, ascendcPlatform, maxValue, minValue);
-    EXPECT_EQ(maxValue, 128 * typeSize + 128 * typeSize);
-    EXPECT_EQ(minValue, 128 * typeSize + 128 * typeSize);
-    GetNormalizeMaxMinTmpSize(layernormShape, typeSize, typeSize, isReuseSource, isComputeRstd, isOnlyOutput, ascendcPlatform, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-    GetLayerNormNDTilingInfo(layernormShape, stackBufferSize, typeSize, isReuseSource, isComputeRstd, ascendcPlatform, tiling);
-    EXPECT_EQ(tiling.get_rLength(), 88);
-    EXPECT_EQ(tiling.get_rHeadLength(), 64); 
-}
-
-TEST_F(TestTiling, TestAntiquantTilingNoTransposeFP4)
-{
-    std::vector<int64_t> srcDims = { 640, 5120 };
-    auto srcShape = ge::Shape(srcDims);
-    std::vector<int64_t> offsetDSms = { 1, 5120 };
-    auto offsetShape = ge::Shape(offsetDSms);
-    bool isTranspose = false;
-    uint32_t maxValue;
-    uint32_t minValue;
-    GetAscendAntiQuantMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_FLOAT4_E2M1, ge::DT_FLOAT16, maxValue, minValue);
-    EXPECT_EQ(minValue, 0);
-    EXPECT_EQ(maxValue, 0);
-}
-
-TEST_F(TestTiling, TestAntiquantTilingTransposeFP4)
-{
-    std::vector<int64_t> srcDims = { 640, 5120 };
-    auto srcShape = ge::Shape(srcDims);
-    std::vector<int64_t> offsetDSms = { 1, 5120 };
-    auto offsetShape = ge::Shape(offsetDSms);
-    bool isTranspose = true;
-    uint32_t maxValue;
-    uint32_t minValue;
-    GetAscendAntiQuantMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_FLOAT4_E2M1, ge::DT_FLOAT16, maxValue, minValue);
-    EXPECT_EQ(minValue, 10240);
-    EXPECT_EQ(maxValue, 10240);
-}
-
-TEST_F(TestTiling, TestAntiquantizeTilingNoTransposeFP4)
-{
-    std::vector<int64_t> srcDims = { 640, 5120 };
-    auto srcShape = ge::Shape(srcDims);
-    std::vector<int64_t> offsetDSms = { 1, 5120 };
-    auto offsetShape = ge::Shape(offsetDSms);
-    bool isTranspose = false;
-    uint32_t maxValue;
-    uint32_t minValue;
-    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_FLOAT4_E2M1, ge::DT_FLOAT16, maxValue, minValue);
-    EXPECT_EQ(minValue, 0);
-    EXPECT_EQ(maxValue, 0);
-}
-
-TEST_F(TestTiling, TestDequantizeTiling)
-{
-    // 2d input shape
-    std::vector<int64_t> shape_dims = {10, 32};
-    auto shape = ge::Shape(shape_dims);
-    uint32_t maxValue;
-    uint32_t minValue;
-
-    GetDequantizeMaxMinTmpSize(shape, 2, maxValue, minValue);
-    EXPECT_EQ(minValue, 4 * (64 + 32 + 40));
-    EXPECT_EQ(maxValue, 4 * (64 + 32 * 10 + 40));
-
-    // 1d input shape
-    std::vector<int64_t> shape_dims_1d = {320};
-    auto shape_1d = ge::Shape(shape_dims_1d);
-
-    GetDequantizeMaxMinTmpSize(shape_1d, 2, maxValue, minValue);
-    EXPECT_EQ(minValue, 4 * (64 + 1 * 320 + 328));
-    EXPECT_EQ(maxValue, 4 * (64 + 1 * 320 + 328));
-}
-
-TEST_F(TestTiling, TestAntiquantizeTilingNoTransposePerChannelHalf)
-{
-    std::vector<int64_t> srcDims = { 640, 5120 };
-    auto srcShape = ge::Shape(srcDims);
-    std::vector<int64_t> offsetDSms = { 1, 5120 };
-    auto offsetShape = ge::Shape(offsetDSms);
-    bool isTranspose = false;
-    uint32_t maxValue;
-    uint32_t minValue;
-    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_INT8, ge::DT_FLOAT16, maxValue, minValue);
-    EXPECT_EQ(minValue, 0);
-    EXPECT_EQ(maxValue, 0);
-}
-
-TEST_F(TestTiling, TestAntiquantizeTilingNoTransposePerChannel)
-{
-    std::vector<int64_t> srcDims = { 640, 5120 };
-    auto srcShape = ge::Shape(srcDims);
-    std::vector<int64_t> offsetDSms = { 1, 5120 };
-    auto offsetShape = ge::Shape(offsetDSms);
-    bool isTranspose = false;
-    uint32_t maxValue;
-    uint32_t minValue;
-    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_INT8, ge::DT_BF16, maxValue, minValue);
-    uint32_t expectValue = 5120 * 2 * sizeof(float) + 64 * 640 * sizeof(float);
-    EXPECT_EQ(minValue, expectValue);
-    EXPECT_EQ(maxValue, expectValue);
-}
-
-TEST_F(TestTiling, TestAntiquantizeTilingNoTransposePerTensor)
-{
-    std::vector<int64_t> srcDims = { 640, 5120 };
-    auto srcShape = ge::Shape(srcDims);
-    std::vector<int64_t> offsetDSms = { 1 };
-    auto offsetShape = ge::Shape(offsetDSms);
-    bool isTranspose = false;
-    uint32_t maxValue;
-    uint32_t minValue;
-    GetAntiQuantizeMaxMinTmpSize(srcShape, offsetShape, isTranspose, ge::DT_INT8, ge::DT_BF16, maxValue, minValue);
-    EXPECT_EQ(minValue, 1024);
-    EXPECT_EQ(maxValue, 640 * 5120 * sizeof(float));
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutput)
-{
-    std::vector<int64_t> shapeDims = { 32, 32 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_INT16;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = false;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 9728);
-    EXPECT_EQ(minValue, 9728);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutputB8)
-{
-    std::vector<int64_t> shapeDims = { 32, 32 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_UINT8;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = false;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 7680);
-    EXPECT_EQ(minValue, 7680);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutputB64)
-{
-    std::vector<int64_t> shapeDims = { 32, 32 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_INT64;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = false;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 15872);
-    EXPECT_EQ(minValue, 15872);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingDescendOrder)
-{
-    std::vector<int64_t> shapeDims = { 1023 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_UINT32;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = true;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = false;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 11776);
-    EXPECT_EQ(minValue, 11776);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingWithExtraDstIndex)
-{
-    std::vector<int64_t> shapeDims = { 300 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_FLOAT;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = true;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 5312);
-    EXPECT_EQ(minValue, 5312);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingWithExtraDstIndexForB8)
-{
-    std::vector<int64_t> shapeDims = { 300 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_UINT8;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = true;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 2112);
-    EXPECT_EQ(minValue, 2112);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingWithBothSrcDstIndex)
-{
-    std::vector<int64_t> shapeDims = { 4096 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_UINT16;
-    ge::DataType indexType = ge::DT_UINT64;
-    bool isDescend = false;
-    bool hasSrcIndex = true;
-    bool hasDstIndex = true;
-    bool isReuseSource = false;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 70144);
-    EXPECT_EQ(minValue, 70144);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingOnlyDataOutputReuseSource)
-{
-    std::vector<int64_t> shapeDims = { 32, 32 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_INT16;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = false;
-    bool isReuseSource = true;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 7680);
-    EXPECT_EQ(minValue, 7680);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingDescendOrderReuseSource)
-{
-    std::vector<int64_t> shapeDims = { 1023 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_UINT32;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = true;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = false;
-    bool isReuseSource = true;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 7680);
-    EXPECT_EQ(minValue, 7680);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingWithExtraDstIndexReuseSource)
-{
-    std::vector<int64_t> shapeDims = { 32, 32 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_INT32;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = false;
-    bool hasDstIndex = true;
-    bool isReuseSource = true;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 11776);
-    EXPECT_EQ(minValue, 11776);
-}
-
-TEST_F(TestTiling, testAdvanceSortTilingWithBothSrcDstIndexReuseSource)
-{
-    std::vector<int64_t> shapeDims = { 32, 32 };
-    auto srcShape = ge::Shape(shapeDims);
-    ge::DataType valueType = ge::DT_INT16;
-    ge::DataType indexType = ge::DT_UINT32;
-    bool isDescend = false;
-    bool hasSrcIndex = true;
-    bool hasDstIndex = true;
-    bool isReuseSource = true;
-    SortConfig config = { SortType::RADIX_SORT, isDescend, hasSrcIndex, hasDstIndex };
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    AscendC::GetSortMaxMinTmpSize(srcShape, valueType, indexType, isReuseSource, config, maxValue, minValue);
-
-    EXPECT_EQ(maxValue, 7680);
-    EXPECT_EQ(minValue, 7680);
-}
-
-extern void platfrom_stub_set_chip_version(const char *num);
-TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexTrue_Float_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    bool isInitIndex = true;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 4;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 256);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_innerDataSize(), 128);
-    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
-    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
-    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
-    EXPECT_EQ(tilingData.get_maskOffset(), 16);
-    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
-    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1024);
-    EXPECT_EQ(minValue, 1024);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexFalse_Float_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 4;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 320);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_innerDataSize(), 128);
-    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
-    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
-    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
-    EXPECT_EQ(tilingData.get_maskOffset(), 16);
-    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
-    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
-    EXPECT_EQ(tilingData.get_srcIndexOffset(), 256);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1280);
-    EXPECT_EQ(minValue, 1280);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexTrue_Half_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    bool isInitIndex = true;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 2;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 512);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_innerDataSize(), 256);
-    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
-    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
-    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
-    EXPECT_EQ(tilingData.get_maskOffset(), 16);
-    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
-    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1024);
-    EXPECT_EQ(minValue, 1024);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexFalse_Half_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 2;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 640);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_innerDataSize(), 256);
-    EXPECT_EQ(tilingData.get_sortRepeat(), 2);
-    EXPECT_EQ(tilingData.get_kAlignFourBytes(), 16);
-    EXPECT_EQ(tilingData.get_kAlignTwoBytes(), 16);
-    EXPECT_EQ(tilingData.get_maskOffset(), 16);
-    EXPECT_EQ(tilingData.get_maskVreducev2FourBytes(), 20);
-    EXPECT_EQ(tilingData.get_maskVreducev2TwoBytes(), 40);
-    EXPECT_EQ(tilingData.get_srcIndexOffset(), 512);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1280);
-    EXPECT_EQ(minValue, 1280);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexTrue_Float_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = true;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 4;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, false, tilingData);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 128);
-    EXPECT_EQ(tilingData.get_maskOffset(), 10);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 512);
-    EXPECT_EQ(minValue, 512);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Float_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 4;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_maskOffset(), 10);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 192);
-
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 768);
-    EXPECT_EQ(minValue, 768);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexTrue_Half_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = true;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 2;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 256);
-    EXPECT_EQ(tilingData.get_maskOffset(), 10);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 512);
-    EXPECT_EQ(minValue, 512);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Half_Inner64)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 2;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, false, tilingData);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_maskOffset(), 10);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 384);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 768);
-    EXPECT_EQ(minValue, 768);
-}
-
-TEST_F(TestTiling, TestTopkTiling_DataTypeSize0_FAILED)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 0;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    auto res = TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, false, tilingData);
-    EXPECT_EQ(res, false);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Half_k)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    int32_t k = 13;
-    uint32_t dataTypeSize = 2;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_maskOffset(), 13);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 384);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 768);
-    EXPECT_EQ(minValue, 768);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeSmall_isInitIndexFalse_Float_k32)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 32;
-    uint32_t dataTypeSize = 4;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    TopKTilingFunc(plat, inner, outter, k, dataTypeSize, isInitIndex, topkMode, true, tilingData);
-    EXPECT_EQ(tilingData.get_allDataSize(), 64);
-    EXPECT_EQ(tilingData.get_maskOffset(), 32);
-    EXPECT_EQ(tilingData.get_tmpLocalSize(), 192);
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, true, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 768);
-    EXPECT_EQ(minValue, 768);
-}
-
-TEST_F(TestTiling, TestTopkTiling_RadixTopKModeSmall_isInitIndexFalse)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NSMALL;
-    const int32_t outter = 1;
-    const int32_t inner = 32;
-    const int32_t k = 10;
-    ge::DataType valueType = ge::DT_INT16;
-    bool isReuseSource = false;
-    bool isInitIndex = false;
-    TopKConfig config = { TopKAlgo::RADIX_SELECT, TopKOrder::UNSET, true};
-
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    GetTopKMaxMinTmpSize(inner, outter, k, isReuseSource, isInitIndex, topkMode,
-        true, valueType, config, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1696);
-    EXPECT_EQ(minValue, 1696);
-}
-
-TEST_F(TestTiling, TestTopkTiling_RadixTopKModeNormal_isInitIndexFalse)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    const int32_t outter = 1;
-    const int32_t inner = 32;
-    const int32_t k = 10;
-    ge::DataType valueType = ge::DT_INT16;
-    bool isReuseSource = false;
-    bool isInitIndex = false;
-    TopKConfig config = { TopKAlgo::RADIX_SELECT, TopKOrder::UNSET, true};
-
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    GetTopKMaxMinTmpSize(inner, outter, k, isReuseSource, isInitIndex, topkMode,
-        true, valueType, config, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1696);
-    EXPECT_EQ(minValue, 1696);
-}
-
-TEST_F(TestTiling, TestTopkTiling_RadixTopKModeNormal_isInitIndexTrue)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    const int32_t outter = 1;
-    const int32_t inner = 32;
-    const int32_t k = 10;
-    ge::DataType valueType = ge::DT_INT16;
-    bool isReuseSource = true;
-    bool isInitIndex = true;
-    TopKConfig config = { TopKAlgo::RADIX_SELECT, TopKOrder::UNSET, true};
-
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    GetTopKMaxMinTmpSize(inner, outter, k, isReuseSource, isInitIndex, topkMode,
-        true, valueType, config, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1504);
-    EXPECT_EQ(minValue, 1504);
-}
-
-TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexFalse)
-{
-    enum TopKMode topkMode = TopKMode::TOPK_NORMAL;
-    bool isInitIndex = false;
-    const int32_t outter = 1;
-    const int32_t inner = 64;
-    const int32_t k = 10;
-    uint32_t dataTypeSize = 4;
-    bool isReuseSource = true;
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    optiling::TopkTiling tilingData;
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-
-    GetTopKMaxMinTmpSize(plat, inner, outter, isReuseSource, isInitIndex, topkMode, false, 4, maxValue, minValue);
-    EXPECT_EQ(maxValue, 1280);
-    EXPECT_EQ(minValue, 1280);
-}
-
-TEST_F(TestTiling, TestPowerTiling)
-{
-    std::vector<int64_t> shapeDims = { 1, 512 };
-    auto powerShape = ge::Shape(shapeDims);
-    uint32_t maxVal;
-    uint32_t minVal;
-
-    GetPowerMaxMinTmpSize(powerShape, powerShape, false, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    GetPowerMaxMinTmpSize(powerShape, powerShape, true, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 0);
-    EXPECT_EQ(minVal, 0);
-    GetPowerMaxMinTmpSize(powerShape, powerShape, false, 2, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    EXPECT_EQ(minVal, 512 * 4 * 2);
-
-    std::vector<int64_t> scalar_shape = { 1 };
-    auto scalarShape = ge::Shape(scalar_shape);
-    GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 2, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    GetPowerMaxMinTmpSize(powerShape, scalarShape, true, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 0);
-    EXPECT_EQ(minVal, 0);
-    GetPowerMaxMinTmpSize(scalarShape, powerShape, false, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    EXPECT_EQ(minVal, 512 * 4 * 2);
-}
-
-TEST_F(TestTiling, TestPowerTilingFactorSize)
-{
-    uint32_t maxLiveNodeCnt = 0xffff;
-    uint32_t extraBuf = 0xffff;
-
-    GetPowerTmpBufferFactorSize(false, true, false, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 2);
-    EXPECT_EQ(extraBuf, 0);
-
-    GetPowerTmpBufferFactorSize(false, true, false, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 4);
-    EXPECT_EQ(extraBuf, 0);
-
-    GetPowerTmpBufferFactorSize(false, true, true, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-}
-
-TEST_F(TestTiling, TestPowerTilingWithConfig)
-{
-    std::vector<int64_t> shapeDims = { 1, 512 };
-    auto powerShape = ge::Shape(shapeDims);
-    uint32_t maxVal;
-    uint32_t minVal;
-
-    AscendC::PowerConfig intrinsicConfig = { AscendC::PowerAlgo::INTRINSIC };
-    AscendC::PowerConfig doubleFloatTechConfig = { AscendC::PowerAlgo::DOUBLE_FLOAT_TECH };
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-    GetPowerMaxMinTmpSize(plat, intrinsicConfig, powerShape, powerShape, false, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 0);
-    EXPECT_EQ(maxVal, 0);
-    GetPowerMaxMinTmpSize(plat, intrinsicConfig, powerShape, powerShape, true, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 0);
-    EXPECT_EQ(minVal, 0);
-
-    std::vector<int64_t> scalar_shape = { 1 };
-    auto scalarShape = ge::Shape(scalar_shape);
-    GetPowerMaxMinTmpSize(plat, doubleFloatTechConfig, powerShape, scalarShape, false, 2, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    GetPowerMaxMinTmpSize(plat, doubleFloatTechConfig, powerShape, scalarShape, true, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 0);
-    EXPECT_EQ(minVal, 0);
-    GetPowerMaxMinTmpSize(plat, doubleFloatTechConfig, scalarShape, powerShape, false, 4, false, maxVal, minVal);
-    EXPECT_EQ(maxVal, 512 * 4 * 2);
-    EXPECT_EQ(minVal, 512 * 4 * 2);
-}
-
-TEST_F(TestTiling, TestPowerTilingFactorSizeWithConfig)
-{
-    uint32_t maxLiveNodeCnt = 0xffff;
-    uint32_t extraBuf = 0xffff;
-
-    AscendC::PowerConfig intrinsicConfig = { AscendC::PowerAlgo::INTRINSIC };
-    AscendC::PowerConfig doubleFloatTechConfig = { AscendC::PowerAlgo::DOUBLE_FLOAT_TECH };
-    fe::PlatFormInfos platformInfo;
-    auto plat = platform_ascendc::PlatformAscendC(&platformInfo);
-
-    GetPowerTmpBufferFactorSize(plat, intrinsicConfig, false, true, false, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-
-    GetPowerTmpBufferFactorSize(plat, intrinsicConfig, false, true, false, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-
-    GetPowerTmpBufferFactorSize(plat, doubleFloatTechConfig, false, true, false, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 2);
-    EXPECT_EQ(extraBuf, 0);
-
-    GetPowerTmpBufferFactorSize(plat, doubleFloatTechConfig, false, true, false, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 4);
-    EXPECT_EQ(extraBuf, 0);
-
-    GetPowerTmpBufferFactorSize(plat, doubleFloatTechConfig, false, true, true, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 4);
-    EXPECT_EQ(extraBuf, 0);
-}
-
-TEST_F(TestTiling, TestCosTilingFloatWithConfig)
-{
-    std::vector<int64_t> shapeDims = { 128, 128 };
-    auto cosShape = ge::Shape(shapeDims);
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-
-    AscendC::CosConfig polyConfig = { AscendC::CosAlgo::POLYNOMIAL_APPROXIMATION };
-    AscendC::CosConfig radinConfig = { AscendC::CosAlgo::RADIAN_REDUCTION };
-
-    AscendC::GetCosMaxMinTmpSize(polyConfig, cosShape, 4, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-    AscendC::GetCosMaxMinTmpSize(radinConfig, cosShape, 4, true, maxValue, minValue);
-    EXPECT_EQ(maxValue, 128 * 128 * 2 * 4 + 32);
-
-    uint32_t maxLiveNodeCnt = 0;
-    uint32_t extraBuf = 0;
-    GetCosTmpBufferFactorSize(polyConfig, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-    GetCosTmpBufferFactorSize(radinConfig, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 2);
-    EXPECT_EQ(extraBuf, 32);
-}
-
-TEST_F(TestTiling, TestCosTilingHalfWithConfig)
-{
-    std::vector<int64_t> shapeDims = { 512 };
-    auto cosShape = ge::Shape(shapeDims);
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-
-    AscendC::CosConfig polyConfig = { AscendC::CosAlgo::POLYNOMIAL_APPROXIMATION };
-    AscendC::CosConfig radinConfig = { AscendC::CosAlgo::RADIAN_REDUCTION };
-
-    AscendC::GetCosMaxMinTmpSize(polyConfig, cosShape, 2, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-
-    AscendC::GetCosMaxMinTmpSize(radinConfig, cosShape, 2, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 512 * 2 * 4 + 32);
-    EXPECT_EQ(minValue, 512 * 2 * 4 + 32);
-
-    uint32_t maxLiveNodeCnt = 0;
-    uint32_t extraBuf = 0;
-    GetCosTmpBufferFactorSize(polyConfig, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-    GetCosTmpBufferFactorSize(radinConfig, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 4);
-    EXPECT_EQ(extraBuf, 0);
-}
-
-TEST_F(TestTiling, TestHypotTilingHalf)
-{
-    std::vector<int64_t> shapeDims = { 128, 128 };
-    auto atanShape = ge::Shape(shapeDims);
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-    GetHypotMaxMinTmpSize(atanShape, 2, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-    uint32_t maxLiveNodeCnt = 0;
-    uint32_t extraBuf = 0;
-    GetHypotTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-}
-
-TEST_F(TestTiling, TestSinTilingFloatWithConfig)
-{
-    std::vector<int64_t> shapeDims = { 128, 128 };
-    auto sinShape = ge::Shape(shapeDims);
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-
-    AscendC::SinConfig polyConfig = { AscendC::SinAlgo::POLYNOMIAL_APPROXIMATION };
-    AscendC::SinConfig radinConfig = { AscendC::SinAlgo::RADIAN_REDUCTION };
-
-    AscendC::GetSinMaxMinTmpSize(polyConfig, sinShape, 4, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-    AscendC::GetSinMaxMinTmpSize(radinConfig, sinShape, 4, true, maxValue, minValue);
-    EXPECT_EQ(maxValue, 128 * 128 * 2 * 4 + 32);
-
-    uint32_t maxLiveNodeCnt = 0;
-    uint32_t extraBuf = 0;
-    GetSinTmpBufferFactorSize(polyConfig, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-    GetSinTmpBufferFactorSize(radinConfig, 4, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 2);
-    EXPECT_EQ(extraBuf, 32);
-}
-
-TEST_F(TestTiling, TestSinTilingHalfWithConfig)
-{
-    std::vector<int64_t> shapeDims = { 512 };
-    auto sinShape = ge::Shape(shapeDims);
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-
-    AscendC::SinConfig polyConfig = { AscendC::SinAlgo::POLYNOMIAL_APPROXIMATION };
-    AscendC::SinConfig radinConfig = { AscendC::SinAlgo::RADIAN_REDUCTION };
-
-    AscendC::GetSinMaxMinTmpSize(polyConfig, sinShape, 2, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-
-    AscendC::GetSinMaxMinTmpSize(radinConfig, sinShape, 2, false, maxValue, minValue);
-    EXPECT_EQ(maxValue, 512 * 2 * 4 + 32);
-    EXPECT_EQ(minValue, 512 * 2 * 4 + 32);
-
-    uint32_t maxLiveNodeCnt = 0;
-    uint32_t extraBuf = 0;
-    GetSinTmpBufferFactorSize(polyConfig, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 0);
-    EXPECT_EQ(extraBuf, 0);
-    GetSinTmpBufferFactorSize(radinConfig, 2, maxLiveNodeCnt, extraBuf);
-    EXPECT_EQ(maxLiveNodeCnt, 4);
-    EXPECT_EQ(extraBuf, 0);
-}
-
-TEST_F(TestTiling, TestConfusionTransposeTiling)
-{
-    const uint32_t stackBufferSize = 0;
-    const uint32_t typeSize = 4;
-
-    std::vector<int64_t> shapeDims = { 32, 64, 128 };
-    auto srcShape = ge::Shape(shapeDims);
-    uint32_t maxValue = 0;
-    uint32_t minValue = 0;
-
-    AscendC::GetConfusionTransposeMaxMinTmpSize(srcShape, typeSize, 13, maxValue, minValue);
-    AscendC::GetConfusionTransposeMaxMinTmpSize(srcShape, typeSize, 14, maxValue, minValue);
-    AscendC::GetConfusionTransposeMaxMinTmpSize(srcShape, typeSize, 15, maxValue, minValue);
-    EXPECT_EQ(maxValue, 0);
-    EXPECT_EQ(minValue, 0);
-
-    optiling::ConfusionTransposeTiling tiling;
-    AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 13, tiling);
-    AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 14, tiling);
-    AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 15, tiling);
-}
-
-#else
 extern void platfrom_stub_set_chip_version(const char *num);
->>>>>>> 4a0a42bb (update)
 TEST_F(TestTiling, MultiCoreSmallMN)
 {
     matmul_tiling::MultiCoreMatmulTiling rnnMatmul3,rnnMatmul4,rnnMatmul5;
diff --git a/tests/transdata/test_operator_transdata.cpp b/tests/transpose/transdata/test_operator_transdata.cpp
similarity index 100%
rename from tests/transdata/test_operator_transdata.cpp
rename to tests/transpose/transdata/test_operator_transdata.cpp
diff --git a/tiling/tiling_api.h b/tiling/tiling_api.h
deleted file mode 100644
index 015f68e4..00000000
--- a/tiling/tiling_api.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Copyright (c) 2024 Huawei Technologies Co., Ltd.
- * This file is a part of the CANN Open Software.
- * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
- * Please refer to the License for details. You may not use this file except in compliance with the License.
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
- * See LICENSE in the root of the software repository for the full text of the License.
- */
-
-/*!
- * \file tiling_api.h
- * \brief
- */
-#ifndef TILING_API_H
-#define TILING_API_H
-#include "../lib/matmul/matmul_tiling.h"
-#include "../lib/matmul/bmm_tiling.h"
-#include "../lib/activation/softmax_tiling.h"
-#include "../lib/activation/logsoftmax_tiling.h"
-#include "../lib/filter/dropout_tiling.h"
-#include "../lib/sort/sort_tiling_intf.h"
-#include "../lib/index/arithprogression_tiling.h"
-#include "../lib/quantization/ascend_dequant_tiling.h"
-#include "../lib/quantization/ascend_quant_tiling.h"
-#include "../lib/quantization/ascend_antiquant_tiling.h"
-#include "../lib/quantization/quantize_tiling.h"
-#include "../lib/quantization/antiquantize_tiling.h"
-#include "../lib/quantization/dequantize_tiling.h"
-#include "../lib/reduce/sum_tiling.h"
-#include "../lib/activation/silu_tiling.h"
-#include "../lib/activation/swish_tiling.h"
-#include "../lib/activation/gelu_tiling.h"
-#include "../lib/pad/pad_tiling.h"
-#include "../lib/normalization/rmsnorm_tiling.h"
-#include "../lib/normalization/deepnorm_tiling.h"
-#include "../lib/normalization/layernorm_tiling.h"
-#include "../lib/normalization/normalize_tiling.h"
-#include "../lib/normalization/groupnorm_tiling.h"
-#include "../lib/normalization/batchnorm_tiling.h"
-#include "../lib/normalization/layernorm_grad_tiling.h"
-#include "../lib/normalization/layernorm_grad_beta_tiling.h"
-#include "../lib/normalization/welfordfinalize_tiling.h"
-#include "../lib/transpose/confusion_transpose_tiling.h"
-#include "tiling/platform/platform_ascendc.h"
-#include "../lib/sort/topk_tiling.h"
-#include "../lib/math/tanh_tiling.h"
-#include "../lib/activation/sigmoid_tiling.h"
-#include "../lib/math/frac_tiling.h"
-#include "../lib/math/acos_tiling.h"
-#include "../lib/math/asin_tiling.h"
-#include "../lib/math/acosh_tiling.h"
-#include "../lib/math/asinh_tiling.h"
-#include "../lib/math/sin_tiling.h"
-#include "../lib/math/cos_tiling.h"
-#include "../lib/math/hypot_tiling.h"
-#include "../lib/math/atan_tiling.h"
-#include "../lib/math/power_tiling.h"
-#include "../lib/math/log_tiling.h"
-#include "../lib/math/cosh_tiling.h"
-#include "../lib/math/clamp_tiling.h"
-#include "../lib/math/erf_tiling.h"
-#include "../lib/math/erfc_tiling.h"
-#include "../lib/math/round_tiling.h"
-#include "../lib/math/sinh_tiling.h"
-#include "../lib/activation/swiglu_tiling.h"
-#include "../lib/math/tan_tiling.h"
-#include "../lib/select/selectwithbytesmask_tiling.h"
-#include "../lib/math/trunc_tiling.h"
-#include "../lib/math/fmod_tiling.h"
-#include "../lib/activation/geglu_tiling.h"
-#include "../lib/math/lgamma_tiling.h"
-#include "../lib/math/digamma_tiling.h"
-#include "../lib/math/atanh_tiling.h"
-#include "../lib/math/xor_tiling.h"
-#include "../lib/math/sign_tiling.h"
-#include "../lib/reduce/mean_tiling.h"
-#include "../lib/math/exp_tiling.h"
-#include "../lib/math/axpy_tiling.h"
-#include "../lib/math/ceil_tiling.h"
-#include "../lib/math/floor_tiling.h"
-#include "../lib/activation/reglu_tiling.h"
-#include "../lib/pad/broadcast_tiling.h"
-#include "../lib/reduce/reduce_xor_sum_tiling.h"
-#include "../lib/reduce/reduce_tiling.h"
-#include "../lib/transdata/transdata_tiling.h"
-#include "../lib/math/cumsum_tiling.h"
-#include "../lib/hccl/hccl_tilingdata.h"
-#include "../lib/hccl/hccl_tiling.h"
-#endif // TILING_API_H
-- 
Gitee