diff --git a/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq.h b/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq.h index 91b59336b8c134566c7fec8dd4f1ea8b957aaa97..98efc8776260ea896e2a492f32bbb9b088003f44 100644 --- a/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq.h +++ b/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq.h @@ -214,7 +214,7 @@ void GmmDeq( CATLASS_DEVICE void BarrierBetweenUpAndDown() { - AscendC::PipeBarrier(); + // AscendC::PipeBarrier(); Arch::CrossCoreFlag gmm1AivFinished{0}; if constexpr (g_coreType == AscendC::AIV) { Arch::CrossCoreBarrier<0x0, PIPE_MTE3>(); diff --git a/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n128.cpp b/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n128.cpp index c7a48744f53ca7cdd6c866d33c247d955a41d7a2..08a351f5f4755bb2fc4c161b4e289061f2d09a4c 100644 --- a/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n128.cpp +++ b/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n128.cpp @@ -49,7 +49,7 @@ void gmm_deq_swiglu_quant_gmm_deq_n128( workspaceOffset += RoundUp(static_cast(m) * nOut * sizeof(int8_t)); GM_ADDR gmPerTokenScale2 = workspace + workspaceOffset; workspaceOffset += RoundUp(static_cast(m) * sizeof(float)); - GM_ADDR gmWorkspace = workspace + workspaceOffset; + GM_ADDR gmWorkspace = workspace + workspaceOffset - 512; using Gmm1L1TileShape = GemmShape::L1M, Gmm1TileArgs::L1N, GMM1_L1K>; using Gmm1L0TileShape = GemmShape; diff --git a/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n256.cpp b/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n256.cpp index 32a5ce645ba9fb593c3fb75b5c8829ba2528bcbb..b689eec2b836db4f6f68c5a7ea739a433127b22c 100644 --- a/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n256.cpp +++ b/src/kernels/mixkernels/gmm_deq_swiglu_quant_gmm_deq/op_kernel/gmm_deq_swiglu_quant_gmm_deq_n256.cpp @@ -49,7 +49,7 @@ void gmm_deq_swiglu_quant_gmm_deq_n256( workspaceOffset += RoundUp(static_cast(m) * nOut * sizeof(int8_t)); GM_ADDR gmPerTokenScale2 = workspace + workspaceOffset; workspaceOffset += RoundUp(static_cast(m) * sizeof(float)); - GM_ADDR gmWorkspace = workspace + workspaceOffset; + GM_ADDR gmWorkspace = workspace + workspaceOffset - 512; using Gmm1L1TileShape = GemmShape::L1M, Gmm1TileArgs::L1N, GMM1_L1K>; using Gmm1L0TileShape = GemmShape;