From 5ffe553a70cc85343af4f931245391ba43ac8539 Mon Sep 17 00:00:00 2001 From: jerry Date: Fri, 23 Jan 2026 15:39:38 +0800 Subject: [PATCH] vllm_mindspore master support sparsequant algoritom(w8a8sc) --- vllm_mindspore/__init__.py | 5 ++-- vllm_mindspore/config.py | 2 +- .../quantization/golden_stick/__init__.py | 2 +- .../quantization/golden_stick/a8w8sc.py | 2 +- .../quantization/golden_stick/golden_stick.py | 4 ++-- .../layers/quantization/quant_ops.py | 2 +- .../model_loader/sparse_quant_loader.py | 2 +- vllm_mindspore/model_executor/models/qwen2.py | 23 +++---------------- .../models/sparse_quant_weight_loader.py | 2 +- vllm_mindspore/utils.py | 2 +- 10 files changed, 14 insertions(+), 32 deletions(-) diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index bf144e79..7fc8bcb3 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -211,8 +211,7 @@ vllm.model_executor.model_loader.weight_utils.get_quantization_config = ( # Import golden_stick module to trigger sparse quantization support setup # The setup is done automatically in golden_stick/__init__.py -from vllm_mindspore.model_executor.layers.quantization.golden_stick import ( # noqa: F401 - GoldenStickConfig, ModelSlimConfig) +import vllm_mindspore.model_executor.layers.quantization.golden_stick # noqa: F401 from vllm_mindspore.executor.multiproc_worker_utils import ( get_mp_context as ms_get_mp_context, ) diff --git a/vllm_mindspore/config.py b/vllm_mindspore/config.py index 7dd5dbf2..6ea80611 100644 --- a/vllm_mindspore/config.py +++ b/vllm_mindspore/config.py @@ -3,7 +3,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/v0.8.3/vllm/config.py # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # Copyright 2024-2025 The vLLM team. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py b/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py index 398902a4..8ab5b93c 100644 --- a/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py +++ b/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py b/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py index 31dadacf..63f20e30 100644 --- a/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py +++ b/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py b/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py index 209f1e90..66c98551 100644 --- a/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py +++ b/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -148,10 +148,10 @@ class GoldenStickConfig(QuantizationConfig): @staticmethod def get_config_filenames() -> list[str]: + # quant_model_description_w8a8sc.json is for sparse quant (W8A8SC) return [ "quantization_description.json", "quant_model_description.json", "quant_model_description_w8a8sc.json" - # Added for sparse quantization config ] @classmethod diff --git a/vllm_mindspore/model_executor/layers/quantization/quant_ops.py b/vllm_mindspore/model_executor/layers/quantization/quant_ops.py index 16dd5ab9..0281a6b8 100644 --- a/vllm_mindspore/model_executor/layers/quantization/quant_ops.py +++ b/vllm_mindspore/model_executor/layers/quantization/quant_ops.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py b/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py index d026001e..84825178 100644 --- a/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py +++ b/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/models/qwen2.py b/vllm_mindspore/model_executor/models/qwen2.py index 31aad66e..b21801d6 100644 --- a/vllm_mindspore/model_executor/models/qwen2.py +++ b/vllm_mindspore/model_executor/models/qwen2.py @@ -2,7 +2,7 @@ # Adapted from # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/qwen2/modeling_qwen2.py -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # Copyright 2024 The Qwen team. # Copyright 2023 The vLLM team. # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. @@ -363,24 +363,6 @@ class Qwen2Model(nn.Cell): hidden_states, residual = self.norm(hidden_states, residual) return hidden_states, residual - def _load_split_weights(self, weights: Iterable[tuple[str, Tensor]], - params_dict: dict[str, Parameter]) -> set[str]: - """Load sparse quantized weights directly without sharding. - - Weights are already partitioned by rank folders, so load them - directly without any sharding operations. This method delegates - to the common sparse quantized weight loader utility. - - Args: - weights: Iterable of (name, weight) tuples - params_dict: Dictionary of parameter names to Parameter objects - - Returns: - Set of loaded parameter names - """ - return load_split_weights(weights, params_dict, self.config, - self.quant_config) - def load_weights(self, weights: Iterable[tuple[str, Tensor]], params_dict: dict[str, Parameter]): # Check if sparse quantization is enabled via rank-level config @@ -397,7 +379,8 @@ class Qwen2Model(nn.Cell): isinstance(v, str) and v.lower() == "w8a8s" for v in sparse_config.values()) if has_sparse_quant: - return self._load_split_weights(weights, params_dict) + return load_split_weights(weights, params_dict, + self.config, self.quant_config) loaded_params: set[str] = set() stacked_params_mapping = [ diff --git a/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py b/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py index 29dd8d7e..bbaee2cb 100644 --- a/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py +++ b/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/utils.py b/vllm_mindspore/utils.py index f8fbf042..6d6a8a61 100644 --- a/vllm_mindspore/utils.py +++ b/vllm_mindspore/utils.py @@ -3,7 +3,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/v0.8.3/vllm/utils.py # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # Copyright 2024-2025 The vLLM team. # # Licensed under the Apache License, Version 2.0 (the "License"); -- Gitee