From 42c1e4815a7ef0b3d6f3cd1b892969fbda3d720b Mon Sep 17 00:00:00 2001 From: hujiawen Date: Sat, 2 Aug 2025 11:02:51 +0800 Subject: [PATCH] fix mm leakyrelu block24 aicore error --- .../op_host/matmul_leakyrelu_custom.cpp | 4 ++-- .../op_kernel/matmul_leakyrelu_custom.cpp | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp index b811c71ca..5e5da7928 100644 --- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp +++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp @@ -60,8 +60,8 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) context->SetTilingKey(2); } else { /* SetBlockDim here refers to the number of cube cores, so for separated arch(AIC:AIV=1:2), - vector cores number is set 48 by SetDim, cube core number need to be set 24 here.*/ - context->SetBlockDim(24); + when vector cores number is set like 48 by SetDim, cube core number need to be set 24 here.*/ + context->SetBlockDim(ascendcPlatform.GetCoreNumAic()); context->SetTilingKey(1); } tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); diff --git a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp index 4f56aca86..e1894f29f 100644 --- a/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp +++ b/operator/ascendc/0_introduction/12_matmulleakyrelu_frameworklaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp @@ -147,11 +147,13 @@ template __aicore__ inline void MatmulLeakyKernel::CopyOut(uint32_t count) { reluOutQueue_.DeQue(); - const uint32_t roundM = tiling.singleCoreM / tiling.baseM; - const uint32_t roundN = tiling.singleCoreN / tiling.baseN; + const uint32_t roundM = Ceiling(tiling.singleCoreM, tiling.baseM); + const uint32_t curCopyM = tiling.singleCoreM < tiling.baseM ? tiling.singleCoreM : tiling.baseM; + const uint32_t curCopyN = tiling.singleCoreN < tiling.baseN ? tiling.singleCoreN : tiling.baseN; + const uint32_t curCopyNStride = tiling.N < tiling.baseN ? tiling.N : tiling.N - tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + DataCopyParams copyParam = {(uint16_t)curCopyM, (uint16_t)(curCopyN * sizeof(cType) / 32), 0, + (uint16_t)(curCopyNStride * sizeof(cType) / 32)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); } -- Gitee