From ca8eec73d97b17f1de0486c824e9cd23edbbbc0d Mon Sep 17 00:00:00 2001 From: y30062407 Date: Wed, 18 Jun 2025 16:15:19 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90mindspore=E3=80=91=E3=80=90bugfix?= =?UTF-8?q?=E3=80=91Fix=20error=20enabling=20moe-zerc=20+=20dualpipe=20in?= =?UTF-8?q?=20full=20recompute=20for=20deepseek=5Fv3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mindspeed_llm/mindspore/mindspore_adaptor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mindspeed_llm/mindspore/mindspore_adaptor.py b/mindspeed_llm/mindspore/mindspore_adaptor.py index a140ee171..33430a8fe 100644 --- a/mindspeed_llm/mindspore/mindspore_adaptor.py +++ b/mindspeed_llm/mindspore/mindspore_adaptor.py @@ -185,7 +185,6 @@ class MindSporeAdaptation(MegatronAdaptationABC): #mindspeed - from mindspeed.mindspore.core.pipeline_parallel.dualpipev.dualpipev_schedules import backward_step_with_model_graph, set_shared_embedding_from_dual_chunk, forward_step_with_model_graph, get_shared_embedding_from_dual_chunk, forward_backward_pipelining_with_cutinhalf MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.dualpipev.dualpipev_schedules.backward_step_with_model_graph', backward_step_with_model_graph) @@ -198,10 +197,11 @@ class MindSporeAdaptation(MegatronAdaptationABC): MindSporeAdaptation.register('megatron.core.pipeline_parallel.schedules.forward_backward_pipelining_without_interleaving', forward_backward_pipelining_with_cutinhalf) - - from mindspeed.mindspore.core.pipeline_parallel.fb_overlap.transformer_layer import transformer_layer_backward - MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.transformer_layer.transformer_layer_backward', - transformer_layer_backward) + from mindspeed.mindspore.core.pipeline_parallel.fb_overlap.transformer_layer import \ + transformer_layer_recompute + MindSporeAdaptation.register( + 'mindspeed.core.pipeline_parallel.fb_overlap.transformer_layer.transformer_layer_recompute', + transformer_layer_recompute) from mindspeed.mindspore.core.pipeline_parallel.fb_overlap.transformer_block import transformer_block_forward, transformer_block_forward_backward_overlaping MindSporeAdaptation.register('mindspeed.core.pipeline_parallel.fb_overlap.transformer_block.transformer_block_forward', -- Gitee