diff --git a/examples/matrix/matmul_l2cache/op_host/matmul_l2_cache_custom_tiling.cpp b/examples/matrix/matmul_l2cache/op_host/matmul_l2_cache_custom_tiling.cpp deleted file mode 100644 index a2a1d3c693ab488eb85feeef295c53c1724efb98..0000000000000000000000000000000000000000 --- a/examples/matrix/matmul_l2cache/op_host/matmul_l2_cache_custom_tiling.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2025 Huawei Technologies Co., Ltd. - * This file is a part of the CANN Open Software. - * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). - * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. - * See LICENSE in the root of the software repository for the full text of the License. - */ - -#include "matmul_l2_cache_custom_tiling.h" -#include - -namespace optiling { - -bool ComputeTiling(uint32_t blockDim, TCubeTiling& tiling, matmul_tiling::MultiCoreMatmulTiling* cubeTiling, - const TestcaseParams& caseParams) -{ - cubeTiling->SetDim(blockDim); - cubeTiling->SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, - matmul_tiling::DataType::DT_FLOAT); - cubeTiling->SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, - matmul_tiling::DataType::DT_FLOAT); - cubeTiling->SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, - matmul_tiling::DataType::DT_FLOAT); - cubeTiling->SetBiasType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, - matmul_tiling::DataType::DT_FLOAT); - cubeTiling->SetShape(caseParams.m / caseParams.splitTimes, caseParams.n, caseParams.k); - cubeTiling->SetOrgShape(caseParams.m, caseParams.n, caseParams.k); - cubeTiling->EnableBias(caseParams.isBias); - cubeTiling->SetBufferSpace(-1, -1, -1); - if (cubeTiling->GetTiling(tiling) == -1) { - return false; - } - return true; -} - -void GenerateTiling(uint32_t blockDim, matmul_tiling::MultiCoreMatmulTiling* cubeTiling, - const TestcaseParams& caseParams, uint8_t* tilingBuffer) -{ - TCubeTiling tilingData; - - bool res = ComputeTiling(blockDim, tilingData, cubeTiling, caseParams); - if (!res) { - std::cout << "gen tiling failed" << std::endl; - } - - uint32_t tilingSize = tilingData.GetDataSize(); - tilingData.SaveToBuffer(tilingBuffer, tilingSize); -} -} // optiling \ No newline at end of file diff --git a/examples/matrix/matmul_l2cache/op_host/matmul_l2_cache_custom_tiling.h b/examples/matrix/matmul_l2cache/op_host/matmul_l2_cache_custom_tiling.h deleted file mode 100644 index d1eacf9938f7f6f12ea9551843aac802b91b6f16..0000000000000000000000000000000000000000 --- a/examples/matrix/matmul_l2cache/op_host/matmul_l2_cache_custom_tiling.h +++ /dev/null @@ -1,39 +0,0 @@ -/** -* Copyright (c) 2025 Huawei Technologies Co., Ltd. -* This file is a part of the CANN Open Software. -* Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). -* Please refer to the License for details. You may not use this file except in compliance with the License. -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, -* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. -* See LICENSE in the root of the software repository for the full text of the License. -*/ -#ifndef EXAMPLES_MATRIX_MATMUL_L2CACHE_OP_HOST_MATMUL_L2_CACHE_CUSTOM_TILING_H -#define EXAMPLES_MATRIX_MATMUL_L2CACHE_OP_HOST_MATMUL_L2_CACHE_CUSTOM_TILING_H -#include "register/tilingdata_base.h" -#include "tiling/tiling_api.h" - -namespace optiling { - -BEGIN_TILING_DATA_DEF(MatmulL0cExtendCustomTilingData) - TILING_DATA_FIELD_DEF_STRUCT(TCubeTiling, cubeTilingData); -END_TILING_DATA_DEF; -REGISTER_TILING_DATA_CLASS(MatmulCustom, MatmulL0cExtendCustomTilingData) - -struct TestcaseParams { - uint32_t m; - uint32_t n; - uint32_t k; - bool isBias; - uint32_t splitTimes; -}; -/** - * @brief Generate matmul tiling. - * @param blockDim: Number of cores involved in the computation. - * @param cubeTiling: TCubeTiling structure. - * @param caseParams: Testcase parameters. - * @param tilingBuf: Data buffer. - */ -void GenerateTiling(uint32_t blockDim, matmul_tiling::MultiCoreMatmulTiling* cubeTiling, - const TestcaseParams& caseParams, uint8_t* tilingBuffer); -} // namespace optiling -#endif // EXAMPLES_MATRIX_MATMUL_L2CACHE_OP_HOST_MATMUL_L2_CACHE_CUSTOM_TILING_H \ No newline at end of file diff --git a/examples/matrix/matmul_l2cache/op_kernel/matmul_l2_cache_custom_kernel.cpp b/examples/matrix/matmul_l2cache/op_kernel/matmul_l2_cache_custom_kernel.cpp deleted file mode 100644 index b354fdb84c57ed9e5cff55e2771cf57f46ca82d4..0000000000000000000000000000000000000000 --- a/examples/matrix/matmul_l2cache/op_kernel/matmul_l2_cache_custom_kernel.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2025 Huawei Technologies Co., Ltd. - * This file is a part of the CANN Open Software. - * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). - * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. - * See LICENSE in the root of the software repository for the full text of the License. - */ - -#include "matmul_l2_cache_custom_kernel.h" - -#if ASCENDC_CPU_DEBUG -#define SET_G_CORE_TYPE_IS_AIC int g_coreType = 1 -#else -#define SET_G_CORE_TYPE_IS_AIC -#endif - -namespace MatmulCustom { - -template -__aicore__ inline void MatmulL2CacheKernel::Init(GM_ADDR a, - GM_ADDR b, GM_ADDR bias, GM_ADDR c, const TCubeTiling& tiling) -{ - this->tiling = tiling; - - aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ AType*>(a), tiling.M * tiling.Ka); - bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ BType*>(b), tiling.Kb * tiling.N); - cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ CType*>(c), tiling.M * tiling.N); - biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ BiasType*>(bias), tiling.N); - - int32_t offsetA = 0; - int32_t offsetB = 0; - int32_t offsetC = 0; - int32_t offsetBias = 0; - CalcOffset(AscendC::GetBlockIdx(), offsetA, offsetB, offsetC, offsetBias); - aGlobal = aGlobal[offsetA]; - bGlobal = bGlobal[offsetB]; - cGlobal = cGlobal[offsetC]; - biasGlobal = biasGlobal[offsetBias]; - - if(GetSysWorkSpacePtr() == nullptr){ - return; - } -} - -template -__aicore__ inline void MatmulL2CacheKernel::Process(AscendC::TPipe* pipe) -{ - REGIST_MATMUL_OBJ(pipe, GetSysWorkSpacePtr(), matmulObj, &(this->tiling)); - - matmulObj.SetTensorB(bGlobal); - if (tiling.isBias) { - matmulObj.SetBias(biasGlobal); - } - const uint32_t splitTimes = 2; - const uint32_t splitOffsetA = this->tiling.M * this->tiling.Ka / splitTimes; - const uint32_t splitOffsetC = this->tiling.M * this->tiling.N / splitTimes; - for (uint32_t i = 0; i < splitTimes; i++) { - matmulObj.SetTensorA(aGlobal[splitOffsetA * i]); - matmulObj.IterateAll(cGlobal[splitOffsetC * i]); - } - matmulObj.End(); -} - - -template -__aicore__ inline void MatmulL2CacheKernel::CalcOffset( - int32_t blockIdx, int32_t& offsetA, int32_t& offsetB, int32_t& offsetC, int32_t& offsetBias) -{ - TCubeTiling& param = this->tiling; - - auto temp0 = AscendC::Ceil(param.M, param.singleCoreM); - auto temp1 = AscendC::Ceil(param.N, param.singleCoreN); - auto temp2 = AscendC::Ceil(param.Ka, param.singleCoreK); - - auto divideKCoreNum = param.usedCoreNum / temp2; - - auto mCoreIndex = (blockIdx % divideKCoreNum) % temp0; - auto nCoreIndex = (blockIdx % divideKCoreNum) / temp0; - auto subKIndex = blockIdx / divideKCoreNum; - - offsetA = mCoreIndex * param.Ka * param.singleCoreM + subKIndex * param.singleCoreK; - offsetB = subKIndex * param.singleCoreK * param.N + nCoreIndex * param.singleCoreN; - offsetC = mCoreIndex * param.N * param.singleCoreM + nCoreIndex * param.singleCoreN; - offsetBias = nCoreIndex * param.singleCoreN; - - int32_t gmUseM = param.M - mCoreIndex * param.singleCoreM; - param.singleCoreM = gmUseM < param.singleCoreM ? gmUseM : param.singleCoreM; - - int32_t gmUseN = param.N - nCoreIndex * param.singleCoreN; - param.singleCoreN = gmUseN < param.singleCoreN ? gmUseN : param.singleCoreN; - - int32_t gmUseK = param.Ka - subKIndex * param.singleCoreK; - param.singleCoreK = gmUseK < param.singleCoreK ? gmUseK : param.singleCoreK; -} -} // namespace MatmulCustom - -namespace { -__aicore__ inline void CopyTiling(TCubeTiling* tiling, GM_ADDR tilingGM) -{ - uint32_t* ptr = reinterpret_cast(tiling); - auto tiling32 = reinterpret_cast<__gm__ uint32_t*>(tilingGM); - - for (int i = 0; i < sizeof(TCubeTiling) / sizeof(uint32_t); i++, ptr++) { - *ptr = *(tiling32 + i); - } - return; -} -} - -extern "C" __global__ __aicore__ void matmul_l2_cache_custom( - GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tilingGm) -{ - if (g_coreType == AscendC::AIV) { - return; - } - - TCubeTiling tiling; - CopyTiling(&tiling, tilingGm); - AscendC::TPipe pipe; - - MatmulCustom::MatmulL2CacheKernel matmulL2CacheKernel; - matmulL2CacheKernel.Init(a, b, bias, c, tiling); - matmulL2CacheKernel.Process(&pipe); -} - -#ifndef ASCENDC_CPU_DEBUG -void matmul_l2_cache_custom_do(uint32_t blockDim, void* stream, - GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tilingGm) -{ - matmul_l2_cache_custom<<>>(a, b, bias, c, workspace, tilingGm); -} -#endif \ No newline at end of file diff --git a/examples/matrix/matmul_l2cache/op_kernel/matmul_l2_cache_custom_kernel.h b/examples/matrix/matmul_l2cache/op_kernel/matmul_l2_cache_custom_kernel.h deleted file mode 100644 index 5555090e4fecf4345c139f933662c0420b4e6a12..0000000000000000000000000000000000000000 --- a/examples/matrix/matmul_l2cache/op_kernel/matmul_l2_cache_custom_kernel.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Copyright (c) 2025 Huawei Technologies Co., Ltd. - * This file is a part of the CANN Open Software. - * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). - * Please refer to the License for details. You may not use this file except in compliance with the License. - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. - * See LICENSE in the root of the software repository for the full text of the License. - */ - -#ifndef EXAMPLES_MATRIX_MATMUL_L2CACHE_OP_KERNEL_MATMUL_L2_CACHE_CUSTOM_KERNEL_H -#define EXAMPLES_MATRIX_MATMUL_L2CACHE_OP_KERNEL_MATMUL_L2_CACHE_CUSTOM_KERNEL_H -#include "kernel_operator.h" -#define ASCENDC_CUBE_ONLY -#include "lib/matmul_intf.h" - -namespace MatmulCustom { - -template -class MatmulL2CacheKernel { -public: - __aicore__ inline MatmulL2CacheKernel(){}; - /** - * @brief Initialization before process. - * @param a: A matrix gm addr. - * @param b: B matrix gm addr. - * @param bias: Bias matrix gm addr. - * @param c: C matrix gm addr. - * @param workspace: workspace gm addr. - * @param tiling: Matmul tiling struct. - * @retval None - */ - __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, const TCubeTiling& tiling); - /** - * @brief Process matrix calculation. - * @param pipe: The TPipe object which manages global memory and synchronization. - * @retval None - */ - __aicore__ inline void Process(AscendC::TPipe* pipe); - - AscendC::Matmul, - AscendC::MatmulType, - AscendC::MatmulType, - AscendC::MatmulType> matmulObj; - -private: - /** - * @brief Calculate the gm offset based on the blockIdx. - * @param blockIdx: Current Core blockidx. - * @param offsetA: Gm offset of A matrix. - * @param offsetB: Gm offset of B matrix. - * @param offsetC: Gm offset of C matrix. - * @param offsetBias: Gm offset of Bias matrix. - * @retval None - */ - __aicore__ inline void CalcOffset( - int32_t blockIdx, int32_t& offsetA, int32_t& offsetB, int32_t& offsetC, int32_t& offsetBias); - - AscendC::GlobalTensor aGlobal; - AscendC::GlobalTensor bGlobal; - AscendC::GlobalTensor cGlobal; - AscendC::GlobalTensor biasGlobal; - TCubeTiling tiling; -}; -} // namespace MatmulCustom -#endif // EXAMPLES_MATRIX_MATMUL_L2CACHE_OP_KERNEL_MATMUL_L2_CACHE_CUSTOM_KERNEL_H \ No newline at end of file diff --git a/examples/matrix/matmul_l2cache/scripts/gen_l2_cache_data.py b/examples/matrix/matmul_l2cache/scripts/gen_l2_cache_data.py deleted file mode 100644 index 7d66b1a1051c367e678316af5eb841aadda0ae86..0000000000000000000000000000000000000000 --- a/examples/matrix/matmul_l2cache/scripts/gen_l2_cache_data.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 - -# Copyright (c) 2025 Huawei Technologies Co., Ltd. -# This file is a part of the CANN Open Software. -# Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). -# Please refer to the License for details. You may not use this file except in compliance with the License. -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. -# See LICENSE in the root of the software repository for the full text of the License. -# ====================================================================================================================== - -import os - -import numpy as np - - -def gen_golden_data(work_path, params=None): - input_path = os.path.join(work_path, "input") - if not os.path.exists(input_path): - os.makedirs(input_path) - output_path = os.path.join(work_path, "output") - if not os.path.exists(output_path): - os.makedirs(output_path) - - if params: - m, n, k, is_bias = params.m, params.n, params.k, params.isBias - else: - m, n, k, is_bias = 30720, 1024, 1024, True - - a_gm = np.random.uniform(-10, 10, [m, k]).reshape([m, k]).astype(np.float32) - b_gm = np.random.uniform(-10, 10, [k, n]).reshape([k, n]).astype(np.float32) - bias_gm = np.random.uniform(-10, 10, [n]).reshape([n]).astype(np.float32) - - if is_bias: - golden_c = np.matmul(a_gm, b_gm, dtype=np.float32) + bias_gm - else: - golden_c = np.matmul(a_gm, b_gm, dtype=np.float32) - - a_gm.tofile(os.path.join(input_path, "a_gm.bin")) - b_gm.tofile(os.path.join(input_path, "b_gm.bin")) - bias_gm.tofile(os.path.join(input_path, "bias_gm.bin")) - golden_c.tofile(os.path.join(output_path, "golden.bin")) - - -if __name__ == "__main__": - gen_golden_data(os.getcwd())