From 60d30b0e8e65c3839c356d4f70218f5fc28ca41e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=AE=E8=B5=A2=E6=B4=8B?= Date: Fri, 22 Aug 2025 10:40:05 +0800 Subject: [PATCH] add comment for matmul_config.h matmul_tiling.h --- lib/matmul/matmul_config.h | 82 ++++++++++++++++++++++++++------------ lib/matmul/matmul_tiling.h | 29 +++++++++++++- 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/lib/matmul/matmul_config.h b/lib/matmul/matmul_config.h index ab7711fd..cdaba7dd 100644 --- a/lib/matmul/matmul_config.h +++ b/lib/matmul/matmul_config.h @@ -150,45 +150,75 @@ enum class MatmulConfigMode { CONFIG_IBSHARE }; +/** + * @struct MatmulShapeParams + * @brief tiling shape information + * + * Constantized for tiling shape information + */ struct MatmulShapeParams { - uint32_t singleCoreM; - uint32_t singleCoreN; - uint32_t singleCoreK; - uint32_t basicM; - uint32_t basicN; - uint32_t basicK; + uint32_t singleCoreM; // size of M axis shape within a single core, in unit of element + uint32_t singleCoreN; // size of N axis shape within a single core, in unit of element + uint32_t singleCoreK; // size of K axis shape within a single core, in unit of element + uint32_t basicM; // size of M axis shape for Matmul caculation, in unit of element + uint32_t basicN; // size of N axis shape for Matmul caculation, in unit of element + uint32_t basicK; // size of K axis shape for Matmul caculation, in unit of element }; +/** + * @struct MatmulQuantParams + * @brief quant config + * + * Scenario of quant: A is float16_t and B is int8_t + */ struct MatmulQuantParams { - bool isPerTensor; - bool hasAntiQuantOffset; + bool isPerTensor; // whether B quant is per tensor + bool hasAntiQuantOffset; // whether B uses of offset coefficients }; +/** + * @struct MatmulBatchParams + * @brief batch matmul config + * + * Enable when batchMode is not BatchMode::None + */ struct MatmulBatchParams { - bool isNBatch; - BatchMode batchMode; - bool isBiasBatch = true; - bool isNBatchOut = false; + bool isNBatch; // whether invoke IterNBatch to achieve multiple batch inputs and outputs + BatchMode batchMode; // relationship between the total size of A/B and the size of L1 Buffer + bool isBiasBatch = true; // whether the size of the bias include the batch axis + bool isNBatchOut = false; // whether to cache multiple batch outputs to copy out together }; +/** + * @struct MatmulFuncParams + * @brief matmul function config + * + * Matmul common feature config + */ struct MatmulFuncParams { - bool intrinsicsCheck; - bool enVecND2NZ; - bool enableDoubleCache; - bool enableL1CacheUB; - uint32_t doMTE2Preload; - IterateOrder iterateOrder; - ScheduleType scheduleType; - bool enableReuse = true; - bool enableUBReuse; - bool isPartialOutput = false; - bool isA2B2Shared = false; - bool isEnableChannelSplit = false; - bool enableKdimReorderLoad = false; + bool intrinsicsCheck; // enable cyclic DataCopy from GM to L1 when the element num of A/B's inner axis >= 65535 + bool enVecND2NZ; // enable use the Vector instruction to transform ND to NZ + bool enableDoubleCache; // enable double data cached in L1 for IBShare + bool enableL1CacheUB; // enable use L1 to cache for UB buffer + uint32_t doMTE2Preload; // enable the preload function for M/N direction to reduce the MTE2 gap + IterateOrder iterateOrder; // the loop iterate order of M or N direction + ScheduleType scheduleType; // the type of Matmul data copy + bool enableReuse = true; // enable directly pass the calculate data for the dataPtr in the callback funtion + // set by the SetSelfDefineData function + bool enableUBReuse; // enable reuse of UB buffer to cache double data for two iterate + bool isPartialOutput = false; // enable K axis does not atomic add + bool isA2B2Shared = false; // enable all Mamtul instance share the double buffer feature of A2 and B2 + bool isEnableChannelSplit = false; // enable C matrix split form [x]*[m*n] to [2x]*[m*(n/2)] + // when C's DataType is float, Position is GM and CubeFormat is NZ + bool enableKdimReorderLoad = false; // enable K axis load data by peak-shifting }; +/** + * @struct MatmulBiasParams + * @brief matmul bias config + */ struct MatmulBiasParams { - bool enableSetBias = true; + bool enableSetBias = true; // enable SetBias function }; struct MatrixOffset { diff --git a/lib/matmul/matmul_tiling.h b/lib/matmul/matmul_tiling.h index fdae6489..b6de3af9 100644 --- a/lib/matmul/matmul_tiling.h +++ b/lib/matmul/matmul_tiling.h @@ -25,7 +25,10 @@ #include "tiling/platform/platform_ascendc.h" namespace matmul_tiling { -// single core matmul tiling +/** + * @class MatmulApiTiling + * @brief single core matmul tiling + */ class MatmulApiTiling : public MatmulApiTilingBase { public: MatmulApiTiling() {}; @@ -33,7 +36,19 @@ public: : MatmulApiTilingBase(ascendcPlatform){}; explicit MatmulApiTiling(const PlatformInfo& platform) : MatmulApiTilingBase(platform) {}; ~MatmulApiTiling() override = default; + + /** + * @brief Get caculated tiling information + * @param [in] tiling the structure to store the tiling result defined on the Host side + * @return return 0 if success, else return -1 + */ int64_t GetTiling(optiling::TCubeTiling &tiling) override; + + /** + * @brief Get caculated tiling information + * @param [in] tiling the structure to store the tiling result defined on the Kernel side + * @return return 0 if success, else return -1 + */ int64_t GetTiling(TCubeTiling &tiling) override; protected: @@ -42,7 +57,19 @@ protected: } // namespace matmul_tiling extern "C" { +/** + * @brief After invoke GetTiling function, obtain the used size of L1/UB/L0C buffer based on TCubeTiling + * @param [in] tiling tiling information defined on the Host side + * @param [in] bufSize the structure to store the used size of L1/UB/L0C buffer + * @return return 0 if success, else return -1 + */ int32_t MatmulGetTmpBufSize(optiling::TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize); +/** + * @brief After invoke GetTiling function, obtain the used size of L1/UB/L0C buffer based on TCubeTiling + * @param [in] tiling tiling information defined on the Kernel side + * @param [in] bufSize the structure to store the used sizeof L1/UB/L0C buffer + * @return return 0 if success, else return -1 + */ int32_t MatmulGetTmpBufSizeV2(TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize); }; -- Gitee