diff --git a/impl/matmul/scheduler/bias/bias_scheduler_base.h b/impl/matmul/scheduler/bias/bias_scheduler_base.h index 295aaee8407a8f89416faf745ab1c78b5ae75da1..0005d79bf20962173d9cfa0c9d864915b28aa9ac 100644 --- a/impl/matmul/scheduler/bias/bias_scheduler_base.h +++ b/impl/matmul/scheduler/bias/bias_scheduler_base.h @@ -32,7 +32,7 @@ class BiasSchedulerBase { MATMUL_USE_MODULE(MatmulShapeTiling); using BiasT = typename BIAS_TYPE::T; - using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (!MatmulFeatureTrait::IsSupportUBToL1() + using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (PhyPosIsUB(BIAS_TYPE::pos) && !MatmulFeatureTrait::IsSupportUBToL1() && !MatmulFeatureTrait::IsSupportUBToL1Singleshape())), GlobalTensor, LocalTensor>::type; public: diff --git a/impl/matmul/scheduler/bias/bias_scheduler_intf.h b/impl/matmul/scheduler/bias/bias_scheduler_intf.h index 40ff3fa2b939e1f5aa1157d2592304c2d55a6afe..7fea10bf4503a1b95aa8a7ea8d0793caf12f565c 100644 --- a/impl/matmul/scheduler/bias/bias_scheduler_intf.h +++ b/impl/matmul/scheduler/bias/bias_scheduler_intf.h @@ -28,7 +28,7 @@ namespace Detail { template class BiasScheduler { using BiasT = typename BIAS_TYPE::T; - using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (!MatmulFeatureTrait::IsSupportUBToL1() + using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (PhyPosIsUB(BIAS_TYPE::pos) && !MatmulFeatureTrait::IsSupportUBToL1() && !MatmulFeatureTrait::IsSupportUBToL1Singleshape())), GlobalTensor, LocalTensor>::type; public: diff --git a/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_batch.h b/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_batch.h index 7146a59eecbcfd33bc0d539fdf3e5f9f060cdaa2..6c5c0995be11a0772e405846339ca265bd695ae0 100644 --- a/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_batch.h +++ b/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_batch.h @@ -35,7 +35,7 @@ class CopyBiasIn> { using BiasT = typename BIAS_TYPE::T; - using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || !MatmulFeatureTrait::IsSupportUBToL1()), + using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (PhyPosIsUB(BIAS_TYPE::pos) && !MatmulFeatureTrait::IsSupportUBToL1())), GlobalTensor, LocalTensor>::type; public: diff --git a/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_intf.h b/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_intf.h index 38cbb34d4682eb32810ae4c9aa7fcf353b967d57..3b7179908b758bc1ed328c86325a91debf570660 100644 --- a/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_intf.h +++ b/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_intf.h @@ -28,7 +28,7 @@ namespace Detail { template class CopyBiasIn { using BiasT = typename BIAS_TYPE::T; - using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (!MatmulFeatureTrait::IsSupportUBToL1() + using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (PhyPosIsUB(BIAS_TYPE::pos) && !MatmulFeatureTrait::IsSupportUBToL1() && !MatmulFeatureTrait::IsSupportUBToL1Singleshape())), GlobalTensor, LocalTensor>::type; public: __aicore__ inline CopyBiasIn() = default; diff --git a/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_v220.h b/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_v220.h index 492203476dee61b391566624ec8670fe3cf30bc8..c701517336e0166c16d0e05f97eeb802d7351078 100644 --- a/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_v220.h +++ b/impl/matmul/stage/copy_cube_in/bias/copy_bias_in_v220.h @@ -41,7 +41,7 @@ class CopyBiasIn::IsSupportUBToL1() + using TensorT = typename Conditional<(PhyPosIsGM(BIAS_TYPE::pos) || (PhyPosIsUB(BIAS_TYPE::pos) && !MatmulFeatureTrait::IsSupportUBToL1() && !MatmulFeatureTrait::IsSupportUBToL1Singleshape())), GlobalTensor, LocalTensor>::type; public: diff --git a/impl/matmul/stage/copy_cube_out/copy_cube_out_fixpipe.h b/impl/matmul/stage/copy_cube_out/copy_cube_out_fixpipe.h index 9835a4856e6dd5ccafb4840ba07dc175c1488a36..6ccd5947fdd4707a59493aa3f2edab1c91661ae5 100644 --- a/impl/matmul/stage/copy_cube_out/copy_cube_out_fixpipe.h +++ b/impl/matmul/stage/copy_cube_out/copy_cube_out_fixpipe.h @@ -178,18 +178,21 @@ private: if constexpr (MatmulFeatureTrait::IsSupportL0CToUB() && PhyPosIsUB(C_TYPE::pos) && (A_TYPE::ibShare && B_TYPE::ibShare)) { if constexpr (FIXPIPE_MODE != McgShfMode::DUAL_DST_SPLIT_N) { - dstOffset = (static_cast (static_cast( - curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM() * stride)) >> 1) + + dstOffset = (static_cast (static_cast(static_cast( + curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()) * stride)) >> 1) + static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()); } else { dstOffset = - static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM() * stride) + - static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN() * baseHeight); + static_cast(static_cast( + curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()) * stride) + + static_cast(static_cast( + curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * baseHeight); dstOffset = dstOffset >> 1; } } else { dstOffset = - static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM() * stride)+ + static_cast(static_cast( + curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()) * stride)+ static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()); } return dstOffset; @@ -221,7 +224,8 @@ private: if constexpr (!enSequentialWrite) { stride = static_cast(GetOrgM() * CHANNEL_MERGE_FACTOR * BLOCK_CUBE); if constexpr (!IsBasic(MM_CFG)) { - dstOffset = static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * GetOrgM() + + dstOffset = static_cast(static_cast( + curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * GetOrgM()) + static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()) * CHANNEL_MERGE_FACTOR * BLOCK_CUBE; } if constexpr (PhyPosIsUB(C_TYPE::pos) && ((A_TYPE::ibShare && B_TYPE::ibShare) || @@ -274,8 +278,9 @@ private: if constexpr (!enSequentialWrite) { stride = GetOrgHeight(); if constexpr (!IsBasic(MM_CFG)) { - dstOffset = static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN() * - stride)+ static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()); + dstOffset = static_cast(static_cast( + curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * stride) + + static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()); } } FixpipeAdaptor fixpipe(baseWidth, baseHeight, baseBlockWidth, baseBlockHeight, @@ -293,11 +298,13 @@ private: uint32_t stride; if constexpr (!enSequentialWrite) { if constexpr (!ToMatmulConfig(MM_CFG).isEnableChannelSplit) { - dstOffset = static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * GetOrgM() + + dstOffset = static_cast(static_cast( + curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * GetOrgM()) + static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()) * BLOCK_CUBE; stride = static_cast(GetOrgM() * BLOCK_CUBE * sizeof(DstT) / ONE_BLK_SIZE); } else { - dstOffset = static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * Ceil(GetOrgM(), BLOCK_CUBE) * BLOCK_CUBE + + dstOffset = static_cast(static_cast(curCol * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseN()) * + Ceil(GetOrgM(), BLOCK_CUBE)) * BLOCK_CUBE + static_cast(curRow * MATMUL_MODULE(MatmulShapeTiling)->GetTiling().GetBaseM()) * B32_C0SIZE; stride = static_cast(Ceil(GetOrgM() , BLOCK_CUBE) * BLOCK_CUBE * B32_C0SIZE * sizeof(DstT) / ONE_BLK_SIZE); @@ -455,12 +462,14 @@ private: if constexpr (C_TYPE::format == CubeFormat::ND || C_TYPE::format == CubeFormat::ND_ALIGN) { dstStrideIn = GetOrgWidth(); nSize = static_cast(baseWidth); - dstOffset = static_cast(static_cast(curRow * baseM) * dstStrideIn) + static_cast(curCol * baseN); + dstOffset = static_cast(static_cast(curRow * baseM) * dstStrideIn) + + static_cast(curCol * baseN); } else if constexpr (C_TYPE::format == CubeFormat::NZ) { dstStrideIn = static_cast((MATMUL_MODULE(MatmulShapeInfo)->GetOrgM()) * BLOCK_CUBE * sizeof(DstT) / ONE_BLK_SIZE); nSize = 0; - dstOffset = curCol * baseN * MATMUL_MODULE(MatmulShapeInfo)->GetOrgM() + curRow * baseM * BLOCK_CUBE; + dstOffset = static_cast(static_cast(curCol * baseN) * MATMUL_MODULE(MatmulShapeInfo)->GetOrgM()) + + static_cast(curRow * baseM * BLOCK_CUBE); } } } diff --git a/impl/matmul/tiling/matmul_tiling_algorithm.cpp b/impl/matmul/tiling/matmul_tiling_algorithm.cpp index fad57699c91237139ee336eeb72e97ac5dd8f55e..14145e00883bc2bf5b0cab57b2f4fdc532e75898 100644 --- a/impl/matmul/tiling/matmul_tiling_algorithm.cpp +++ b/impl/matmul/tiling/matmul_tiling_algorithm.cpp @@ -3045,8 +3045,8 @@ void MatmulTilingAlgorithm::CheckL0DB(SingleCoreStatus& singleCoreStatus, const tilingIns_->bType_.scalePos == TPosition::TSCM) { baseN = MathUtil::Align(singleCoreStatus.l0Status.nL0, L0_FACTOR_NUM_LIMIT) * C0_SIZE; } - const uint32_t aTypeSize = DTYPE_BIT_TAB.at(tilingIns_->aType_.dataType); - const uint32_t bTypeSize = DTYPE_BIT_TAB.at(tilingIns_->bType_.dataType); + const int32_t aTypeSize = DTYPE_BIT_TAB.at(tilingIns_->aType_.dataType); + const int32_t bTypeSize = DTYPE_BIT_TAB.at(tilingIns_->bType_.dataType); if ((baseM * baseK * aTypeSize / BITS_PER_BYTE) > (tilingIns_->bufferPool_.l0ASize / DB_ON)) { singleCoreStatus.l0Status.dbL0A = DB_OFF; }