From ecaf10ccd92835b600e8bd3e122921a10cee8044 Mon Sep 17 00:00:00 2001 From: twc Date: Mon, 14 Apr 2025 16:20:58 +0800 Subject: [PATCH] lm head set dynamic input --- vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py | 2 ++ vllm_mindspore/model_executor/models/mf_models/qwen2.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py index af5a3428..38a08e11 100644 --- a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py +++ b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py @@ -123,6 +123,8 @@ class DeepseekV3ForCausalLM(MfModelBase): weight_processor = DeepseekV3WeightProcessor(self.mf_config, self.network, self.is_quant) weight_processor.load_safetensors_shard(self.mf_config.load_checkpoint) self.network.set_dynamic_inputs() + dynamic_hidden_states = Tensor(shape=[None, None], dtype=self.mf_model_config.compute_dtype) + self.lm_head.set_inputs(dynamic_hidden_states) return None def get_model_path(self): diff --git a/vllm_mindspore/model_executor/models/mf_models/qwen2.py b/vllm_mindspore/model_executor/models/mf_models/qwen2.py index 27711b93..14ce9444 100644 --- a/vllm_mindspore/model_executor/models/mf_models/qwen2.py +++ b/vllm_mindspore/model_executor/models/mf_models/qwen2.py @@ -82,5 +82,6 @@ class Qwen2ForCausalLM(MfModelBase): weight_processor.load_safetensors_shard(self.mf_config.load_checkpoint) self.network.set_dynamic_inputs() - + dynamic_hidden_states = Tensor(shape=[None, None], dtype=self.mf_model_config.compute_dtype) + self.lm_head.set_inputs(dynamic_hidden_states) return None -- Gitee