From a6cc64fba54816ba7c0609f09e183f72ac8435c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?flippy=E8=88=AA?= <654733882@qq.com> Date: Tue, 5 Aug 2025 10:49:52 +0800 Subject: [PATCH] fix mamba_hybrid model pretrain in recompute mode --- mindspeed_llm/core/ssm/mamba_block.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mindspeed_llm/core/ssm/mamba_block.py b/mindspeed_llm/core/ssm/mamba_block.py index c15023bde..6ba740220 100644 --- a/mindspeed_llm/core/ssm/mamba_block.py +++ b/mindspeed_llm/core/ssm/mamba_block.py @@ -86,6 +86,11 @@ def _mamba_block_method_checkpointed_forward_func( inference_context=None, rotary_pos_emb=rotary_pos_emb, ) + # The attention layer (currently a simplified transformer layer) + # outputs a tuple of (hidden_states, context). Context is intended + # for cross-attention, and is not needed in our model. + if isinstance(hidden_states, tuple): + hidden_states = hidden_states[0] return hidden_states return custom_forward -- Gitee