From 74cbe15570c444e26d98e7569cd5db1e7b716060 Mon Sep 17 00:00:00 2001 From: kongziyi <1045916357@qq.com> Date: Wed, 4 Jun 2025 10:16:42 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90master=E3=80=91mindspeed-llm=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E9=9B=B6=E5=86=97=E4=BD=99=E9=80=9A=E4=BF=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mindspeed_llm/mindspore/mindspore_adaptor.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mindspeed_llm/mindspore/mindspore_adaptor.py b/mindspeed_llm/mindspore/mindspore_adaptor.py index d73080d6a..8336c7e1c 100644 --- a/mindspeed_llm/mindspore/mindspore_adaptor.py +++ b/mindspeed_llm/mindspore/mindspore_adaptor.py @@ -298,3 +298,20 @@ class MindSporeAdaptation(MegatronAdaptationABC): from ..mindspore.core.transformer.module import set_is_first_microbatch MindSporeAdaptation.register('megatron.core.transformer.module.MegatronModule.set_is_first_microbatch', set_is_first_microbatch) + + + if args.moe_zerc: + from mindspeed.mindspore.core.transformer.moe.moe_zerc.fwdbwd import transformer_layer_forward_moe_backward_dense_overlaping_zerc, transformer_layer_forward_moe_backward_moe_overlaping_zerc + MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.overlap_funcs.fwdbwd.transformer_layer_forward_moe_backward_dense_overlaping', + transformer_layer_forward_moe_backward_dense_overlaping_zerc) + MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.overlap_funcs.fwdbwd.transformer_layer_forward_moe_backward_moe_overlaping', + transformer_layer_forward_moe_backward_moe_overlaping_zerc) + from mindspeed.mindspore.core.transformer.moe.moe_zerc.token_dispatcher import zerc_alltoall_token_perm1, zerc_alltoall_token_perm2, zerc_alltoall_token_unperm1, zerc_alltoall_token_unperm2 + MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.modules.token_dispatcher.alltoall_token_perm1', + zerc_alltoall_token_perm1) + MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.modules.token_dispatcher.alltoall_token_perm2', + zerc_alltoall_token_perm2) + MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.modules.token_dispatcher.alltoall_token_unperm1', + zerc_alltoall_token_unperm1) + MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.modules.token_dispatcher.alltoall_token_unperm2', + zerc_alltoall_token_unperm2) -- Gitee