From d5b895c8fc9ae2e9169b7d5709eab77bebeadc78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=97=AD?= Date: Fri, 22 Aug 2025 16:35:17 +0800 Subject: [PATCH] annotate core APIs for clarity --- lib/matmul/bmm_tiling.h | 102 ++++++++++++++++++++++++++++++++++- lib/matmul/constant_tiling.h | 16 ++++++ lib/matmul/matmul_client.h | 8 ++- 3 files changed, 124 insertions(+), 2 deletions(-) diff --git a/lib/matmul/bmm_tiling.h b/lib/matmul/bmm_tiling.h index 20ab4b01..777eb0b9 100644 --- a/lib/matmul/bmm_tiling.h +++ b/lib/matmul/bmm_tiling.h @@ -55,6 +55,20 @@ public: { enableSplitK_ = flag; } + /** + * @brief Controls whether to enable K-axis splitting in multi-core scenarios + * + * This interface is used to set whether to split the K-axis in multi-core computing scenarios. + * If this interface is not called, the system disables K-axis splitting by default. + * Note: This interface has the same functionality as EnableMultiCoreSplitK, and it's recommended to use the latter. + * + * @param flag Boolean parameter: + * - true: Enable K-axis splitting + * - false: Disable K-axis splitting (default behavior) + * + * @note This interface must be used before calling the GetTiling interface to ensure that the K-axis splitting + * configuration is correctly applied to the tensor tiling process. + */ void SetSplitK(bool flag) { EnableMultiCoreSplitK(flag); @@ -63,14 +77,60 @@ protected: int64_t Compute() override; }; -// batch matul tiling +/** + * @class BatchMatmulTiling + * + * @brief Class for creating a BatchMatmul Tiling object + * + * Used to create and manage Tiling objects for batch matrix multiplication operations. + */ class BatchMatmulTiling : public MatmulApiTilingBase { public: BatchMatmulTiling() {}; explicit BatchMatmulTiling(const platform_ascendc::PlatformAscendC &ascendcPlatform) : MatmulApiTilingBase(ascendcPlatform){}; + /** + * @brief Retrieves the BlockDim used after multi-core splitting + * + * This interface is used to obtain the number of cores distributed in different directions + * after multi-core splitting, which constitutes the BlockDim for computation. + * + * @param[out] dim Total number of cores required for computation, where dim = mDim * nDim + * @param[out] mDim Number of cores required in the M direction for computation + * @param[out] nDim Number of cores required in the N direction for computation + * @param[out] batchCoreM Number of cores required in the batch M direction for computation + * @param[out] batchCoreN Number of cores required in the batch N direction for computation + * + * @return 0 if the core numbers are successfully retrieved; -1 if the retrieval fails + */ int32_t GetCoreNum(int32_t &dim, int32_t &mDim, int32_t &nDim, int32_t &batchCoreM, int32_t &batchCoreN); + /** + * @brief Retrieves the tiling parameters + * + * This interface is used to obtain the tiling parameters and store them in the TCubeTiling structure. + * + * @param[out] tiling The TCubeTiling structure that stores the final tiling results. + * - optiling::TCubeTiling: The TCubeTiling structure with the optiling namespace, + * which is the Matmul TilingData defined on the Host side. + * + * @return int64_t Return value indicating the result of tiling calculation: + * - Any value other than -1: Tiling calculation succeeded, and the tiling structure can be used. + * - -1: Tiling calculation failed, and the tiling result cannot be used. + */ int64_t GetTiling(optiling::TCubeTiling &tiling) override; + /** + * @brief Retrieves the tiling parameters + * + * This interface is used to obtain the tiling parameters and store them in the TCubeTiling structure. + * + * @param[out] tiling The TCubeTiling structure that stores the final tiling results. + * - TCubeTiling: The TCubeTiling structure without the optiling namespace, + * which is the Matmul TilingData defined on the Kernel side. + * + * @return int64_t Return value indicating the result of tiling calculation: + * - Any value other than -1: Tiling calculation succeeded, and the tiling structure can be used. + * - -1: Tiling calculation failed, and the tiling result cannot be used. + */ int64_t GetTiling(TCubeTiling &tiling) override; protected: int64_t Compute() override; @@ -82,9 +142,49 @@ private: } // namespace matmul_tiling extern "C" { +/** + * @brief Get L1/Unified/L0C buffer sizes for multi-core Matmul + * + * Retrieves buffer sizes using TCubeTiling from MultiCoreMatmulTiling after GetTiling. + * + * @param[in] tiling Input `optiling::TCubeTiling` structure (from MultiCoreMatmulTiling object) + * @param[out] bufSize Output buffer sizes (ubSize, l1Size, l0cSize) + * + * @return 0 if the core numbers are successfully retrieved; -1 if the retrieval fails + */ int32_t MultiCoreMatmulGetTmpBufSize(optiling::TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize); +/** + * @brief Get L1/Unified/L0C buffer sizes for BatchMatmul + * + * Retrieves buffer sizes using TCubeTiling from BatchMatmulTiling after GetTiling. + * + * @param[in] tiling Input `optiling::TCubeTiling` structure (from BatchMatmulTiling object) + * @param[out] bufSize Output buffer sizes (ubSize, l1Size, l0cSize) + * + * @return 0 if the core numbers are successfully retrieved; -1 if the retrieval fails + */ int32_t BatchMatmulGetTmpBufSize(optiling::TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize); +/** + * @brief Get L1/Unified/L0C buffer sizes for BatchMatmul + * + * Retrieves buffer sizes using TCubeTiling from BatchMatmulTiling after GetTiling. + * + * @param[in] tiling Input `TCubeTiling` structure (from BatchMatmulTiling object) + * @param[out] bufSize Output buffer sizes (ubSize, l1Size, l0cSize) + * + * @return 0 if the core numbers are successfully retrieved; -1 if the retrieval fails + */ int32_t MultiCoreMatmulGetTmpBufSizeV2(TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize); +/** + * @brief Get L1/Unified/L0C buffer sizes for BatchMatmul + * + * Retrieves buffer sizes using TCubeTiling from BatchMatmulTiling after GetTiling. + * + * @param[in] tiling Input `TCubeTiling` structure (from BatchMatmulTiling object) + * @param[out] bufSize Output buffer sizes (ubSize, l1Size, l0cSize) + * + * @return 0 if the core numbers are successfully retrieved; -1 if the retrieval fails + */ int32_t BatchMatmulGetTmpBufSizeV2(TCubeTiling &tiling, matmul_tiling::SysTilingTempBufSize &bufSize); }; diff --git a/lib/matmul/constant_tiling.h b/lib/matmul/constant_tiling.h index ea71c46f..be876485 100644 --- a/lib/matmul/constant_tiling.h +++ b/lib/matmul/constant_tiling.h @@ -18,6 +18,22 @@ #include "../../impl/matmul/tiling/matmul_constant_tiling_impl.h" namespace AscendC { +/** + * @brief Retrieves constantized Matmul Tiling parameters during compilation + * + * This interface is used to obtain constantized Matmul Tiling parameters at compile time, + * which can be used for matrix multiplication operations with fixed configurations. + * + * @tparam A_TYPE Type information of matrix A, defined through MatmulType + * @tparam B_TYPE Type information of matrix B, defined through MatmulType + * @tparam C_TYPE Type information of matrix C, defined through MatmulType + * @tparam BIAS_TYPE Type information of BIAS matrix, defined through MatmulType + * + * @param[in] mmCFG Input MatmulConfig template. + * @param[in] l1Size Available L1 size, default value is L1_SIZE + * + * @return MatmulApiStaticTiling Constantized Matmul Tiling parameters obtained + */ template __aicore__ constexpr MatmulApiStaticTiling GetMatmulApiTiling(const MatmulConfig &mmCFG, int32_t l1Size = Impl::L1_SIZE) { diff --git a/lib/matmul/matmul_client.h b/lib/matmul/matmul_client.h index d54685f8..5c85213f 100644 --- a/lib/matmul/matmul_client.h +++ b/lib/matmul/matmul_client.h @@ -34,7 +34,13 @@ constexpr uint16_t NUM_SIXTEEN = uint16_t(16); constexpr uint16_t NUM_THIRTYTWO = uint16_t(32); constexpr uint16_t NUM_FORTYEIGHT = uint16_t(48); -// Service function of the Matmul on the AIV client side, which is the unit for sending messages. +/** + * @class MatmulClientBase + * + * @brief Base class for MatmulClient + * + * Service function of matrix multiplication on the AIV client side, acting as the unit for message sending. + */ template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> class MatmulClientBase { -- Gitee