From e73080ce0823d03d6d4bc813f70bbd73d8ed5d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EF=A3=B5=EF=A3=B5=EF=A3=B5=EF=A3=B5=EF=A3=B5=EF=A3=B5?= Date: Tue, 29 Jul 2025 16:39:26 +0800 Subject: [PATCH 1/2] Fix bias ground and data enter with NZ Precison errors --- .../iterator/batch_loop/batch_loop_multi.h | 16 +++++++++------- .../copy_cube_in/batch/batch_copy_cube_in.h | 13 ++++++++----- tests/matmul/iterator/test_batch_loop.cpp | 4 ++-- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/impl/matmul/scheduler/iterator/batch_loop/batch_loop_multi.h b/impl/matmul/scheduler/iterator/batch_loop/batch_loop_multi.h index 684823a8..5b204767 100644 --- a/impl/matmul/scheduler/iterator/batch_loop/batch_loop_multi.h +++ b/impl/matmul/scheduler/iterator/batch_loop/batch_loop_multi.h @@ -81,7 +81,7 @@ public: if (newProcess_ && outerIdx_ == batchOuter_ - 1) { const int32_t tail = inputBatchNum_ % batchA_; batchA_ = tail == 0 ? mainBatchInner_ : tail; - batchB_ = tail == 0 ? mainBatchInner_ : tail; + batchB_ = batchA_; batchNum_ = batchA_; batchCalcSize_ = batchNum_ * MATMUL_MODULE(MatmulShapeInfo)->GetSingleCoreM() * MATMUL_MODULE(MatmulShapeInfo)->GetSingleCoreN(); @@ -143,7 +143,7 @@ public: __aicore__ inline int32_t GetBiasBatchSrcOffset() const { - return outerIdx_ * batchNum_ * MATMUL_MODULE(MatmulShapeInfo)->GetSingleCoreN(); + return outerIdx_ * mainBatchInner_ * MATMUL_MODULE(MatmulShapeInfo)->GetSingleCoreN(); } // Double Buffer Loop @@ -290,6 +290,7 @@ private: (batchNumA % batchNumB == 0 || batchNumB % batchNumA == 0)); batchA_ = batchNumA; batchB_ = batchNumB; + mainBatchInner_ = batchA_; return; } @@ -301,6 +302,7 @@ private: batchOuter_ = 1; batchA_ = layoutBatchNumA; batchB_ = layoutBatchNumB; + mainBatchInner_ = batchA_; return; } if (layoutBatchNumA >= layoutBatchNumB) { @@ -353,7 +355,7 @@ private: inputBatchNum_ = batchNumLarge; ASSERT(batchInner > 0); - newProcess_ = (multiples == 1) && (inputBatchNum_ % DB_FACTOR != 0) && (inputBatchNum_ >= batchInner); + newProcess_ = (multiples == 1) && (inputBatchNum_ % DB_FACTOR != 0); if (newProcess_) { mainBatchInner_ = batchInner; batchOuter_ = CeilT(batchNumLess, batchInner); @@ -374,12 +376,12 @@ private: { batchNum_ = batchA_ > batchB_ ? batchA_ : batchB_; if constexpr (!IsBmmDoubleBuffer()) { - if (!newProcess_ || batchA_ != batchB_) { - splitSize_ = (batchNum_ >= DB_FACTOR) && (batchA_ % DB_FACTOR == 0) && - (batchB_ % DB_FACTOR == 0) ? DB_FACTOR : 1; + if (batchOuter_ > 1 && batchA_ == batchB_) { + splitSize_ = (batchA_ >= DB_FACTOR) ? DB_FACTOR : 1; splitBatchNum_ = batchNum_ / splitSize_; } else { - splitSize_ = (batchA_ >= DB_FACTOR) ? DB_FACTOR : 1; + splitSize_ = (batchNum_ >= DB_FACTOR) && (batchA_ % DB_FACTOR == 0) && + (batchB_ % DB_FACTOR == 0) ? DB_FACTOR : 1; splitBatchNum_ = batchNum_ / splitSize_; } } diff --git a/impl/matmul/stage/copy_cube_in/batch/batch_copy_cube_in.h b/impl/matmul/stage/copy_cube_in/batch/batch_copy_cube_in.h index fa2adcbd..2969c0ab 100644 --- a/impl/matmul/stage/copy_cube_in/batch/batch_copy_cube_in.h +++ b/impl/matmul/stage/copy_cube_in/batch/batch_copy_cube_in.h @@ -362,19 +362,22 @@ private: auto alignWidth = CeilAlign(MATMUL_MODULE(BatchCopyCubeInParams)->template GetSingleWidth(), c0Size_); // 2. Calculate src and dst stride of one step - auto batchNum = MATMUL_MODULE(BatchCopyCubeInParams)->GetBatchNum() / splitSize; + auto batchNum = MATMUL_MODULE(BatchCopyCubeInParams)->GetBatchNum(); + int32_t tmpBatchNum = batchNum / splitSize; + int64_t srcStride = alignWidth * alignHeight; int64_t dstStride = MATMUL_MODULE(BatchCopyCubeInParams)->template GetSingleSizeAlign(); - int64_t srcOffset = batchNum * splitIdx * srcStride; - int64_t dstOffset = batchNum * splitIdx * dstStride; + int64_t srcOffset = tmpBatchNum * splitIdx * srcStride; + int64_t dstOffset = tmpBatchNum * splitIdx * dstStride; + auto paramsBatchNum = splitIdx == 0 ? tmpBatchNum : batchNum - tmpBatchNum; // 3. loop copy NZ data by batch bool iskRowDirec = IS_KROW && IsSupportB8(); - auto batchOffset = outerIdx * MATMUL_MODULE(BatchCopyCubeInParams)->GetBatchNum() * srcStride; + auto batchOffset = outerIdx * MATMUL_MODULE(BatchCopyCubeInParams)->GetBatchMainBlock() * srcStride; GlobalTensor srcGlobal; srcGlobal.SetGlobalBuffer(MATMUL_MODULE(MatmulTensorInfo)->GetGlobalTensor().address_); srcGlobal.SetAddr(batchOffset); - for (int i = 0; i < batchNum; ++i) { + for (int i = 0; i < paramsBatchNum; ++i) { MATMUL_MODULE(DataCopyWrapper)->CopyNZ2NZ( dstTensor[dstOffset], srcGlobal[srcOffset], 0, 0, alignHeight, alignWidth, alignHeight, iskRowDirec); diff --git a/tests/matmul/iterator/test_batch_loop.cpp b/tests/matmul/iterator/test_batch_loop.cpp index 7f741805..4c5ae929 100644 --- a/tests/matmul/iterator/test_batch_loop.cpp +++ b/tests/matmul/iterator/test_batch_loop.cpp @@ -117,7 +117,7 @@ TEST_F(TestBatchLoop, batch_loop) { EXPECT_EQ(mm.GetOuterIndex(), 1); EXPECT_EQ(mm.GetDstOffset(), 33264); EXPECT_EQ(mm.GetBatchNum(), 3); - EXPECT_EQ(mm.GetBiasBatchSrcOffset(), 231); + EXPECT_EQ(mm.GetBiasBatchSrcOffset(), 0); EXPECT_EQ(mm.GetSplitIndex(), 1); EXPECT_EQ(mm.GetSplitSize(), 1); EXPECT_EQ(mm.GetSplitBatchNum(), 3); @@ -137,7 +137,7 @@ TEST_F(TestBatchLoop, batch_loop_db) { EXPECT_EQ(mm1.GetOuterIndex(), 1); EXPECT_EQ(mm1.GetDstOffset(), 49152); EXPECT_EQ(mm1.GetBatchNum(), 6); - EXPECT_EQ(mm1.GetBiasBatchSrcOffset(), 1536); + EXPECT_EQ(mm1.GetBiasBatchSrcOffset(), 0); EXPECT_EQ(mm1.GetSplitIndex(), 2); EXPECT_EQ(mm1.GetSplitSize(), 2); EXPECT_EQ(mm1.GetSplitBatchNum(), 3); -- Gitee From c66ac3d0b28e04edf6b71f41ac75b4a863f4fd6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EF=A3=B5=EF=A3=B5=EF=A3=B5=EF=A3=B5=EF=A3=B5=EF=A3=B5?= Date: Tue, 29 Jul 2025 16:56:04 +0800 Subject: [PATCH 2/2] fix an error --- tests/matmul/iterator/test_batch_loop.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/matmul/iterator/test_batch_loop.cpp b/tests/matmul/iterator/test_batch_loop.cpp index 4c5ae929..7f741805 100644 --- a/tests/matmul/iterator/test_batch_loop.cpp +++ b/tests/matmul/iterator/test_batch_loop.cpp @@ -117,7 +117,7 @@ TEST_F(TestBatchLoop, batch_loop) { EXPECT_EQ(mm.GetOuterIndex(), 1); EXPECT_EQ(mm.GetDstOffset(), 33264); EXPECT_EQ(mm.GetBatchNum(), 3); - EXPECT_EQ(mm.GetBiasBatchSrcOffset(), 0); + EXPECT_EQ(mm.GetBiasBatchSrcOffset(), 231); EXPECT_EQ(mm.GetSplitIndex(), 1); EXPECT_EQ(mm.GetSplitSize(), 1); EXPECT_EQ(mm.GetSplitBatchNum(), 3); @@ -137,7 +137,7 @@ TEST_F(TestBatchLoop, batch_loop_db) { EXPECT_EQ(mm1.GetOuterIndex(), 1); EXPECT_EQ(mm1.GetDstOffset(), 49152); EXPECT_EQ(mm1.GetBatchNum(), 6); - EXPECT_EQ(mm1.GetBiasBatchSrcOffset(), 0); + EXPECT_EQ(mm1.GetBiasBatchSrcOffset(), 1536); EXPECT_EQ(mm1.GetSplitIndex(), 2); EXPECT_EQ(mm1.GetSplitSize(), 2); EXPECT_EQ(mm1.GetSplitBatchNum(), 3); -- Gitee