From 60d30b0e8e65c3839c356d4f70218f5fc28ca41e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=98=AE=E8=B5=A2=E6=B4=8B?= <ruanyingyang@hisilicon.com>
Date: Fri, 22 Aug 2025 10:40:05 +0800
Subject: [PATCH] add comment for matmul_config.h matmul_tiling.h

---
 lib/matmul/matmul_config.h | 82 ++++++++++++++++++++++++++------------
 lib/matmul/matmul_tiling.h | 29 +++++++++++++-
 2 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/lib/matmul/matmul_config.h b/lib/matmul/matmul_config.h
index ab7711fd..cdaba7dd 100644
--- a/lib/matmul/matmul_config.h
+++ b/lib/matmul/matmul_config.h
@@ -150,45 +150,75 @@ enum class MatmulConfigMode {
     CONFIG_IBSHARE
 };
 
+/**
+ * @struct MatmulShapeParams
+ * @brief tiling shape information
+ *
+ * Constantized for tiling shape information
+ */
 struct MatmulShapeParams {
-    uint32_t singleCoreM;
-    uint32_t singleCoreN;
-    uint32_t singleCoreK;
-    uint32_t basicM;
-    uint32_t basicN;
-    uint32_t basicK;
+    uint32_t singleCoreM;  // size of M axis shape within a single core, in unit of element
+    uint32_t singleCoreN;  // size of N axis shape within a single core, in unit of element
+    uint32_t singleCoreK;  // size of K axis shape within a single core, in unit of element
+    uint32_t basicM;  // size of M axis shape for Matmul caculation, in unit of element
+    uint32_t basicN;  // size of N axis shape for Matmul caculation, in unit of element
+    uint32_t basicK;  // size of K axis shape for Matmul caculation, in unit of element
 };
 
+/**
+ * @struct MatmulQuantParams
+ * @brief quant config
+ *
+ * Scenario of quant: A is float16_t and B is int8_t
+ */
 struct MatmulQuantParams {
-    bool isPerTensor;
-    bool hasAntiQuantOffset;
+    bool isPerTensor;  // whether B quant is per tensor
+    bool hasAntiQuantOffset;  // whether B uses of offset coefficients
 };
 
+/**
+ * @struct MatmulBatchParams
+ * @brief batch matmul config
+ *
+ * Enable when batchMode is not BatchMode::None
+ */
 struct MatmulBatchParams {
-    bool isNBatch;
-    BatchMode batchMode;
-    bool isBiasBatch = true;
-    bool isNBatchOut = false;
+    bool isNBatch;  // whether invoke IterNBatch to achieve multiple batch inputs and outputs
+    BatchMode batchMode;  // relationship between the total size of A/B and the size of L1 Buffer
+    bool isBiasBatch = true;  // whether the size of the bias include the batch axis
+    bool isNBatchOut = false;  // whether to cache multiple batch outputs to copy out together
 };
 
+/**
+ * @struct MatmulFuncParams
+ * @brief matmul function config
+ *
+ * Matmul common feature config
+ */
 struct MatmulFuncParams {
-    bool intrinsicsCheck;
-    bool enVecND2NZ;
-    bool enableDoubleCache;
-    bool enableL1CacheUB;
-    uint32_t doMTE2Preload;
-    IterateOrder iterateOrder;
-    ScheduleType scheduleType;
-    bool enableReuse = true;
-    bool enableUBReuse;
-    bool isPartialOutput = false;
-    bool isA2B2Shared = false;
-    bool isEnableChannelSplit = false;
-    bool enableKdimReorderLoad = false;
+    bool intrinsicsCheck;  // enable cyclic DataCopy from GM to L1 when the element num of A/B's inner axis >= 65535
+    bool enVecND2NZ;  // enable use the Vector instruction to transform ND to NZ
+    bool enableDoubleCache;  // enable double data cached in L1 for IBShare
+    bool enableL1CacheUB;  // enable use L1 to cache for UB buffer
+    uint32_t doMTE2Preload;  // enable the preload function for M/N direction to reduce the MTE2 gap
+    IterateOrder iterateOrder;  // the loop iterate order of M or N direction
+    ScheduleType scheduleType;  // the type of Matmul data copy
+    bool enableReuse = true;  // enable directly pass the calculate data for the dataPtr in the callback funtion
+                              // set by the SetSelfDefineData function
+    bool enableUBReuse;  // enable reuse of UB buffer to cache double data for two iterate
+    bool isPartialOutput = false;  // enable K axis does not atomic add
+    bool isA2B2Shared = false;  // enable all Mamtul instance share the double buffer feature of A2 and B2
+    bool isEnableChannelSplit = false;  // enable C matrix split form [x]*[m*n] to [2x]*[m*(n/2)]
+                                        // when C's DataType is float, Position is GM and CubeFormat is NZ
+    bool enableKdimReorderLoad = false;  // enable K axis load data by peak-shifting
 };
 
+/**
+ * @struct MatmulBiasParams
+ * @brief matmul bias config
+ */
 struct MatmulBiasParams {
-    bool enableSetBias = true;
+    bool enableSetBias = true;  // enable SetBias function
 };
 
 struct MatrixOffset {
diff --git a/lib/matmul/matmul_tiling.h b/lib/matmul/matmul_tiling.h
index fdae6489..b6de3af9 100644
--- a/lib/matmul/matmul_tiling.h
+++ b/lib/matmul/matmul_tiling.h
@@ -25,7 +25,10 @@
 #include "tiling/platform/platform_ascendc.h"
 
 namespace matmul_tiling {
-// single core matmul tiling
+/**
+ * @class MatmulApiTiling
+ * @brief single core matmul tiling
+ */
 class MatmulApiTiling : public MatmulApiTilingBase {
 public:
     MatmulApiTiling() {};
@@ -33,7 +36,19 @@ public:
         : MatmulApiTilingBase(ascendcPlatform){};
     explicit MatmulApiTiling(const PlatformInfo& platform) : MatmulApiTilingBase(platform) {};
     ~MatmulApiTiling() override = default;
+
+    /**
+     * @brief Get caculated tiling information
+     * @param [in] tiling the structure to store the tiling result defined on the Host side
+     * @return return 0 if success, else return -1
+     */
     int64_t GetTiling(optiling::TCubeTiling &tiling) override;
+
+    /**
+     * @brief Get caculated tiling information
+     * @param [in] tiling the structure to store the tiling result defined on the Kernel side
+     * @return return 0 if success, else return -1
+     */
     int64_t GetTiling(TCubeTiling &tiling) override;
 
 protected:
@@ -42,7 +57,19 @@ protected:
 } // namespace matmul_tiling
 
 extern "C" {
+/**
+ * @brief After invoke GetTiling function, obtain the used size of L1/UB/L0C buffer based on TCubeTiling
+ * @param [in] tiling tiling information defined on the Host side
+ * @param [in] bufSize the structure to store the used size of L1/UB/L0C buffer
+ * @return return 0 if success, else return -1
+ */
 int32_t MatmulGetTmpBufSize(optiling::TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize);
+/**
+ * @brief After invoke GetTiling function, obtain the used size of L1/UB/L0C buffer based on TCubeTiling
+ * @param [in] tiling tiling information defined on the Kernel side
+ * @param [in] bufSize the structure to store the used sizeof L1/UB/L0C buffer
+ * @return return 0 if success, else return -1
+ */
 int32_t MatmulGetTmpBufSizeV2(TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize);
 };
 
-- 
Gitee