diff --git a/vllm_mindspore/model_executor/models/registry.py b/vllm_mindspore/model_executor/models/registry.py index 79a37c3813fbb281d21f51887f2828b298c8188a..57e9ecc92a7cf510bd1373f0de25de2d2fdeb126 100644 --- a/vllm_mindspore/model_executor/models/registry.py +++ b/vllm_mindspore/model_executor/models/registry.py @@ -56,7 +56,6 @@ _NATIVE_MODELS = { } _MINDFORMERS_MODELS = { - "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"), "MindFormersForCausalLM": ("mindformers", "MindFormersForCausalLM") } diff --git a/vllm_mindspore/utils.py b/vllm_mindspore/utils.py index 445d49985ccc8e0044b0cb93a6ca3fcbe9c694c7..09c6f8c0795675a0e3a9f8a712a340a3521d8a50 100644 --- a/vllm_mindspore/utils.py +++ b/vllm_mindspore/utils.py @@ -71,9 +71,10 @@ def create_kv_cache(kv_shape, dtype): batch_size, num_heads, seq_len, head_dim = kv_shape reshaped_for_nz = (batch_size, num_heads, seq_len * head_dim) zeros_tensor = ms.mint.zeros(reshaped_for_nz, dtype=dtype) - - return ms.ops.auto_generate.format_cast(zeros_tensor, - FORMAT_TYPE['nz']) + kv_cache = ms.ops.auto_generate.format_cast(zeros_tensor, + FORMAT_TYPE['nz']) + ms.runtime.empty_cache() + return kv_cache return ms.mint.zeros(kv_shape, dtype=dtype) @@ -443,4 +444,4 @@ def view(self, *shape_or_dtype): result.append(item) else: result.append(items) - return ms.ops.reshape(self, result) \ No newline at end of file + return ms.ops.reshape(self, result)