diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index bf144e79a54e3e4bc36e14a7d839fab1d60129bf..7fc8bcb323380039e7708f965bc6ac68a4fc0ec7 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -211,8 +211,7 @@ vllm.model_executor.model_loader.weight_utils.get_quantization_config = ( # Import golden_stick module to trigger sparse quantization support setup # The setup is done automatically in golden_stick/__init__.py -from vllm_mindspore.model_executor.layers.quantization.golden_stick import ( # noqa: F401 - GoldenStickConfig, ModelSlimConfig) +import vllm_mindspore.model_executor.layers.quantization.golden_stick # noqa: F401 from vllm_mindspore.executor.multiproc_worker_utils import ( get_mp_context as ms_get_mp_context, ) diff --git a/vllm_mindspore/config.py b/vllm_mindspore/config.py index 7dd5dbf2b50fd17b3a481a2251a91f6c247f81c2..6ea80611f84279472188844e6fec03ece361d423 100644 --- a/vllm_mindspore/config.py +++ b/vllm_mindspore/config.py @@ -3,7 +3,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/v0.8.3/vllm/config.py # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # Copyright 2024-2025 The vLLM team. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py b/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py index 398902a441f0989030c2c0a4fef0376fba13c30a..8ab5b93c13c0ff73f2ab741400051890ab2ee2ae 100644 --- a/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py +++ b/vllm_mindspore/model_executor/layers/quantization/golden_stick/__init__.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py b/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py index 31dadacf9531afd1ebab6824da17a30052fc1bd0..63f20e301de8f083e5f10cd0bd49c36e51b93f0e 100644 --- a/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py +++ b/vllm_mindspore/model_executor/layers/quantization/golden_stick/a8w8sc.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py b/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py index 209f1e90a4be8ba188fbd539a8f15be3795cbc7e..66c9855171cb35852e3631fdff5d7cfd1293d45f 100644 --- a/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py +++ b/vllm_mindspore/model_executor/layers/quantization/golden_stick/golden_stick.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -148,10 +148,10 @@ class GoldenStickConfig(QuantizationConfig): @staticmethod def get_config_filenames() -> list[str]: + # quant_model_description_w8a8sc.json is for sparse quant (W8A8SC) return [ "quantization_description.json", "quant_model_description.json", "quant_model_description_w8a8sc.json" - # Added for sparse quantization config ] @classmethod diff --git a/vllm_mindspore/model_executor/layers/quantization/quant_ops.py b/vllm_mindspore/model_executor/layers/quantization/quant_ops.py index 16dd5ab9786a9b6c3c02ad75589e812dc0b19ca5..0281a6b85ffbf51360ed0d3e6200bbb3efbf228c 100644 --- a/vllm_mindspore/model_executor/layers/quantization/quant_ops.py +++ b/vllm_mindspore/model_executor/layers/quantization/quant_ops.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py b/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py index d026001eb82d223a99486f8900c0f88323efff00..84825178f3f95b5f6820000163730a0798e32acb 100644 --- a/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py +++ b/vllm_mindspore/model_executor/model_loader/sparse_quant_loader.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/model_executor/models/qwen2.py b/vllm_mindspore/model_executor/models/qwen2.py index 31aad66edef6d9b2f2b9bda51c34ec0d16d81177..b21801d6ed73b0019f8a5af044c3a29ebc58904b 100644 --- a/vllm_mindspore/model_executor/models/qwen2.py +++ b/vllm_mindspore/model_executor/models/qwen2.py @@ -2,7 +2,7 @@ # Adapted from # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/qwen2/modeling_qwen2.py -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # Copyright 2024 The Qwen team. # Copyright 2023 The vLLM team. # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. @@ -363,24 +363,6 @@ class Qwen2Model(nn.Cell): hidden_states, residual = self.norm(hidden_states, residual) return hidden_states, residual - def _load_split_weights(self, weights: Iterable[tuple[str, Tensor]], - params_dict: dict[str, Parameter]) -> set[str]: - """Load sparse quantized weights directly without sharding. - - Weights are already partitioned by rank folders, so load them - directly without any sharding operations. This method delegates - to the common sparse quantized weight loader utility. - - Args: - weights: Iterable of (name, weight) tuples - params_dict: Dictionary of parameter names to Parameter objects - - Returns: - Set of loaded parameter names - """ - return load_split_weights(weights, params_dict, self.config, - self.quant_config) - def load_weights(self, weights: Iterable[tuple[str, Tensor]], params_dict: dict[str, Parameter]): # Check if sparse quantization is enabled via rank-level config @@ -397,7 +379,8 @@ class Qwen2Model(nn.Cell): isinstance(v, str) and v.lower() == "w8a8s" for v in sparse_config.values()) if has_sparse_quant: - return self._load_split_weights(weights, params_dict) + return load_split_weights(weights, params_dict, + self.config, self.quant_config) loaded_params: set[str] = set() stacked_params_mapping = [ diff --git a/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py b/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py index 29dd8d7eb5e75918e43675367e4f3ee09f865c16..bbaee2cb7bf2b264b48343c5ccabf5e93cb86a75 100644 --- a/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py +++ b/vllm_mindspore/model_executor/models/sparse_quant_weight_loader.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/vllm_mindspore/utils.py b/vllm_mindspore/utils.py index f8fbf042fd8499e7148e328644faa57db2f56ff2..6d6a8a618b840022197075862fd92c274532c77b 100644 --- a/vllm_mindspore/utils.py +++ b/vllm_mindspore/utils.py @@ -3,7 +3,7 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/v0.8.3/vllm/utils.py # -# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2026 Huawei Technologies Co., Ltd. # Copyright 2024-2025 The vLLM team. # # Licensed under the Apache License, Version 2.0 (the "License");