From a6cc64fba54816ba7c0609f09e183f72ac8435c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?flippy=E8=88=AA?= <654733882@qq.com>
Date: Tue, 5 Aug 2025 10:49:52 +0800
Subject: [PATCH] fix mamba_hybrid model pretrain in recompute mode

---
 mindspeed_llm/core/ssm/mamba_block.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mindspeed_llm/core/ssm/mamba_block.py b/mindspeed_llm/core/ssm/mamba_block.py
index c15023bde..6ba740220 100644
--- a/mindspeed_llm/core/ssm/mamba_block.py
+++ b/mindspeed_llm/core/ssm/mamba_block.py
@@ -86,6 +86,11 @@ def _mamba_block_method_checkpointed_forward_func(
                     inference_context=None,
                     rotary_pos_emb=rotary_pos_emb,
                 )
+                # The attention layer (currently a simplified transformer layer)
+                # outputs a tuple of (hidden_states, context). Context is intended
+                # for cross-attention, and is not needed in our model.
+                if isinstance(hidden_states, tuple):
+                    hidden_states = hidden_states[0]
             return hidden_states
 
         return custom_forward
-- 
Gitee