From 46b04d0d7e2d0ec7532cba6029b50f13038457e6 Mon Sep 17 00:00:00 2001 From: xinyuan Date: Thu, 26 Jun 2025 19:11:46 +0800 Subject: [PATCH] fix_wrapper_patch2.1.0 --- mindspeed_llm/mindspore/mindspore_adaptor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mindspeed_llm/mindspore/mindspore_adaptor.py b/mindspeed_llm/mindspore/mindspore_adaptor.py index 66613be49..844122e01 100644 --- a/mindspeed_llm/mindspore/mindspore_adaptor.py +++ b/mindspeed_llm/mindspore/mindspore_adaptor.py @@ -53,10 +53,10 @@ class MindSporeAdaptation(MegatronAdaptationABC): MindSporeAdaptation.register('megatron.core.models.gpt.gpt_model.GPTModel', GPTModel) MindSporeAdaptation.register('megatron.core.distributed.distributed_data_parallel.DistributedDataParallel.__init__', distributed_data_parallel_init_with_cp) + from megatron.core.transformer.moe.moe_layer import MoELayer + clear_wrapper('megatron.core.transformer.moe.moe_layer.MoELayer.__init__', MoELayer.__init__) MindSporeAdaptation.register('megatron.core.transformer.moe.moe_layer.MoELayer.__init__', moe_layer_init_wrapper) - MindSporeAdaptation.register('megatron.core.transformer.moe.experts.GroupedMLP.__init__', - groupedmlp_init_wrapper) MindSporeAdaptation.register('megatron.core.transformer.moe.moe_layer.MoELayer.forward', moe_layer_forward) from .core.transformer.moe.moe_utils import unpermute @@ -169,6 +169,8 @@ class MindSporeAdaptation(MegatronAdaptationABC): MindSporeAdaptation.register('megatron.core.transformer.moe.router.TopKRouter.gating', topk_router_gating_func) from mindspeed.mindspore.core.transformer.transformer import core_mlp_forward_wrapper + from megatron.core.transformer.mlp import MLP + clear_wrapper('megatron.core.transformer.mlp.MLP.forward', MLP.forward) MindSporeAdaptation.register('megatron.core.transformer.mlp.MLP.forward', core_mlp_forward_wrapper) -- Gitee