From cd4251f887ab47b90596c4198b81007755badbd1 Mon Sep 17 00:00:00 2001 From: r1chardf1d0 Date: Tue, 22 Jul 2025 14:54:45 +0800 Subject: [PATCH] add dllm comment --- .jenkins/test/config/dependent_packages.yaml | 2 +- install_depend_pkgs.sh | 4 ++-- vllm_mindspore/entrypoints/__main__.py | 3 +++ .../model_executor/models/mf_models/deepseek_v3.py | 2 ++ .../model_executor/models/mf_models/mf_model_base.py | 5 +++++ vllm_mindspore/model_executor/models/model_base.py | 2 +- vllm_mindspore/utils.py | 6 ++++++ vllm_mindspore/v1/worker/gpu_worker.py | 1 + 8 files changed, 21 insertions(+), 4 deletions(-) diff --git a/.jenkins/test/config/dependent_packages.yaml b/.jenkins/test/config/dependent_packages.yaml index 3a206eb9..7bfb6a33 100644 --- a/.jenkins/test/config/dependent_packages.yaml +++ b/.jenkins/test/config/dependent_packages.yaml @@ -2,7 +2,7 @@ mindspore: 'https://repo.mindspore.cn/mindspore/mindspore/version/202507/20250717/br_infer_iter_20250717031508_4a22a0fff06e31e66c543c710941c9518577125f_newest/' mindspore_gs: - 'https://repo.mindspore.cn/mindspore/golden-stick/version/202507/20250709/master_20250709010018_5f01a0211ca36690a577d3d456c5ba194c88771d_newest/' + 'https://repo.mindspore.cn/mindspore/golden-stick/version/202507/20250721/develop_20250721153507_7026d5afce6b611d3ec2653bee26a263dead90b8_newest/' msadapter: 'https://repo.mindspore.cn/mindspore/msadapter/version/202507/20250715/master_20250715160021_01925e853210c29c01c6d7602528ea1fcb85c6ad_newest/' diff --git a/install_depend_pkgs.sh b/install_depend_pkgs.sh index 47c6335a..456bb1f4 100644 --- a/install_depend_pkgs.sh +++ b/install_depend_pkgs.sh @@ -91,9 +91,9 @@ fi echo "========= Installing mindspore golden-stick" -gs_dir=gs-master +gs_dir=gs-develop if [ ! -d "$gs_dir" ]; then - git clone https://gitee.com/mindspore/golden-stick.git "$gs_dir" + git clone https://gitee.com/mindspore/golden-stick.git -b develop "$gs_dir" else echo "The $gs_dir folder already exists and will not be re-downloaded." fi diff --git a/vllm_mindspore/entrypoints/__main__.py b/vllm_mindspore/entrypoints/__main__.py index 92181761..0b8fdd3a 100644 --- a/vllm_mindspore/entrypoints/__main__.py +++ b/vllm_mindspore/entrypoints/__main__.py @@ -25,6 +25,9 @@ from pathlib import Path _original_run_api_server_worker_proc = None +# DLLM +# use monkey patch to add "import vllm_mindspore" +# avoid no initialing when use multi-apiserver def monkey_patch_server_run_api_server_worker_proc(*arg, **kwargs): import vllm_mindspore assert _original_run_api_server_worker_proc is not None diff --git a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py index dfc8be5d..e0e09885 100644 --- a/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py +++ b/vllm_mindspore/model_executor/models/mf_models/deepseek_v3.py @@ -57,6 +57,7 @@ from vllm_mindspore.model_executor.models.mf_models.mf_model_base import ( from vllm_mindspore.model_executor.models.model_base import MLAAttentionWrapper with contextlib.suppress(ImportError): + # DLLM # Need to apply dllm pd patch on vllm to use pd disagg related functions from vllm.attention.layer import maybe_save_kv_layer_to_connector @@ -192,6 +193,7 @@ class DeepseekV3ForCausalLM(MfModelBase): key_cache.append(k_cache) return mutable(key_cache), None + # DLLM def connector_send_kvcache(self): logger.debug("reached deepseek_v3 connector_send_kvcache") _pynative_executor.sync() diff --git a/vllm_mindspore/model_executor/models/mf_models/mf_model_base.py b/vllm_mindspore/model_executor/models/mf_models/mf_model_base.py index 38e97956..f82d2d9f 100644 --- a/vllm_mindspore/model_executor/models/mf_models/mf_model_base.py +++ b/vllm_mindspore/model_executor/models/mf_models/mf_model_base.py @@ -105,12 +105,14 @@ class MfModelBase(MsModelBase): raise NotImplementedError( "Function _create_network should be Implemented!") + # DLLM def is_decoder_task(self) -> bool: if self.kv_transfer_config is None: return False return self.kv_transfer_config.is_kv_consumer + # DLLM def is_prefill_task(self) -> bool: if self.kv_transfer_config is None: return False @@ -131,6 +133,7 @@ class MfModelBase(MsModelBase): def update_model_inputs(self, model_inputs, **kwargs): return model_inputs + # DLLM def connector_send_kvcache(self): logger.debug("reached connector_send_kvcache") _pynative_executor.sync() @@ -143,6 +146,7 @@ class MfModelBase(MsModelBase): v_cache = kv_cache.kv_cache[forward_context.virtual_engine][1] maybe_save_kv_layer_to_connector(str(i), (k_cache, v_cache)) + # DLLM def connector_wait_for_kv_layer(self): logger.debug("reached connector_wait_for_kv_layer") if not hasattr(self, 'mf_model_config'): @@ -170,6 +174,7 @@ class MfModelBase(MsModelBase): self.set_flags = True if kv_transfer_supported and is_v1_kv_transfer_group(): self.connector_send_kvcache() + # DLLM else: if kv_transfer_supported: if is_v1_kv_transfer_group() and self.is_prefill_task(): diff --git a/vllm_mindspore/model_executor/models/model_base.py b/vllm_mindspore/model_executor/models/model_base.py index 38199ae5..6acd27af 100644 --- a/vllm_mindspore/model_executor/models/model_base.py +++ b/vllm_mindspore/model_executor/models/model_base.py @@ -101,7 +101,7 @@ class MsModelBase: self.is_multi_step_chunked_prefill = (self.is_multi_step and self.enable_chunked_prefill) - self.set_flags = False + self.set_flags: bool = False self.kv_caches: list[Any] = [] self.casual_mask = LowerTriangularMask( dtype=self.model_config.dtype, diff --git a/vllm_mindspore/utils.py b/vllm_mindspore/utils.py index b2615b46..7cff11ab 100644 --- a/vllm_mindspore/utils.py +++ b/vllm_mindspore/utils.py @@ -183,14 +183,20 @@ def is_mindone_model_backend(): == vllmModelBackendEnum.MIND_ONE) +# DLLM def register_connector(): try: from vllm.distributed.kv_transfer.kv_connector.factory import ( KVConnectorFactory) + # use D2H for KVtransfer KVConnectorFactory.register_connector( "DLLMDsConnector", "dllm.dkvc.v1.dllm_ds_connector", "DLLMDsConnector") + # use D2D for KVtransfer + KVConnectorFactory.register_connector( + "DLLMDsD2DConnector", "dllm.dkvc.v1.dllm_ds_d2d_connector", + "DLLMDsD2DConnector") except: # noqa: E722 pass diff --git a/vllm_mindspore/v1/worker/gpu_worker.py b/vllm_mindspore/v1/worker/gpu_worker.py index ab3bd566..2a49d511 100644 --- a/vllm_mindspore/v1/worker/gpu_worker.py +++ b/vllm_mindspore/v1/worker/gpu_worker.py @@ -38,6 +38,7 @@ def init_device(self): config = get_current_vllm_config() if config is not None and config.parallel_config.data_parallel_size > 1: + # DLLM self.local_rank = (self.parallel_config.data_parallel_rank_local * self.parallel_config.world_size + self.local_rank) self.device = torch.device(f"cuda:{self.local_rank}") -- Gitee