From ac98b7b4b2df95d194b91ee686ec5943e668384a Mon Sep 17 00:00:00 2001 From: zhaizhiqiang Date: Wed, 2 Jul 2025 14:49:52 +0800 Subject: [PATCH 1/3] optimize profiler and enhance dansh board --- vllm_mindspore/__init__.py | 6 +- vllm_mindspore/dashboard_utils.py | 344 +++++++++++++++ .../model_executor/models/model_base.py | 4 + vllm_mindspore/worker/profile_controller.py | 403 ++++++++++++++++++ 4 files changed, 756 insertions(+), 1 deletion(-) create mode 100644 vllm_mindspore/dashboard_utils.py create mode 100644 vllm_mindspore/worker/profile_controller.py diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index 5e0ae33e..2ef8d3ee 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -160,7 +160,7 @@ vllm.model_executor.model_loader.loader.safetensors_weights_iterator = ( safetensors_weights_iterator) from vllm_mindspore.worker.worker import _warm_up_model -from vllm_mindspore.worker.profile import ( +from vllm_mindspore.worker.profile_controller import ( wrapper_worker_init, wrapper_worker_init_device, ) @@ -405,6 +405,10 @@ from vllm.v1.executor.multiproc_executor import MultiprocExecutor MultiprocExecutor._ensure_worker_termination = \ executor_ensure_worker_termination +MultiprocExecutor._ensure_worker_termination = executor_ensure_worker_termination +# init vllm-mindspore profile controller +from vllm_mindspore.worker.profile_controller import init_vllm_mindspore_profile_controller +init_vllm_mindspore_profile_controller() from .utils import check_ready diff --git a/vllm_mindspore/dashboard_utils.py b/vllm_mindspore/dashboard_utils.py new file mode 100644 index 00000000..d57adbba --- /dev/null +++ b/vllm_mindspore/dashboard_utils.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# Copyright 2025 Huawei Technologies Co., Ltd +# Copyright 2024 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +dashboad_html_code = ''' + + + + + + vLLM MindSpore Profiler Dashboard + + + +
+

vLLM MindSpore Profiler Dashboard

+
+ +
+ + + +
+ +
+ + + + + +
+ +
+

Infer Results

+
+
+ +
+ +
+ +
+ + + + + + + + + + + +
No.FileDescription
+
+ + + + + +''' + +def get_dashboard_html() -> str: + return dashboad_html_code diff --git a/vllm_mindspore/model_executor/models/model_base.py b/vllm_mindspore/model_executor/models/model_base.py index a65ac08e..0996f099 100644 --- a/vllm_mindspore/model_executor/models/model_base.py +++ b/vllm_mindspore/model_executor/models/model_base.py @@ -35,6 +35,7 @@ from vllm_mindspore.model_executor.models.attention_mask import ( LowerTriangularMask) from vllm_mindspore.utils import STR_DTYPE_TO_MS_DTYPE from vllm_mindspore.v1.attention.backends.ms_attn import MsAttentionMetadata +from vllm_mindspore.worker.profile_controller import vllm_mindspore_profile_controller class AttentionWrapper: @@ -196,6 +197,9 @@ class MsModelBase: previous_hidden_states: Optional[Tensor] = None, spec_step_idx: int = 0, ) -> Union[Tensor, IntermediateTensors]: + # check if need profile + vllm_mindspore_profile_controller.check_profile_point() + return self.forward(input_ids, positions, intermediate_tensors, diff --git a/vllm_mindspore/worker/profile_controller.py b/vllm_mindspore/worker/profile_controller.py new file mode 100644 index 00000000..ac95045f --- /dev/null +++ b/vllm_mindspore/worker/profile_controller.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# Copyright 2025 Huawei Technologies Co., Ltd +# Copyright 2024 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +import os +import json +import sys +import subprocess +import tarfile +import shutil +from types import SimpleNamespace + +import mindspore as ms + +# host profiling modules +from mindspore._c_expression import _framework_profiler_enable_mi +from mindspore._c_expression import _framework_profiler_disable_mi +from mindspore._c_expression import _framework_profiler_step_start +from mindspore._c_expression import _framework_profiler_step_end +from mindspore._c_expression import _framework_profiler_clear + +# device profiling utils +from mindspore import Profiler +from mindspore.profiler import ProfilerLevel, ProfilerActivity +from mindspore.profiler.common.profiler_context import ProfilerContext + +# vllm modules +import vllm.envs as envs +from vllm.logger import init_logger +from vllm.entrypoints.openai.api_server import router as vllm_router +from vllm.entrypoints.openai.api_server import engine_client + +from fastapi import Request +from fastapi.responses import Response, JSONResponse, FileResponse, HTMLResponse + +from vllm_mindspore.dashboard_utils import get_dashboard_html + + +VLLM_DEFAULT_PROFILE_ENV_NAME = "VLLM_TORCH_PROFILING_DIR" +VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME = "VLLM_MS_PROFILE_CONFIG_PATH" + +# default vllm-mindspore profile config is based on the vllm backend start dir +# the content example is like follow +# { +# "enable_profile": true, +# "profile_config": { +# "profile_type": "device", +# "start_iteration": 50, +# "sample_iteration": 10, +# "profile_output_path": "./graph", +# "online_ananlyse": true, +# "profiler_level": "Level1", +# "with_stack": true, +# "activities": ["CPU", "NPU"] +# } +# } +DEFAULT_VLLM_MS_CONFIG_FILE_PATH = "./vllm_ms_profile.config" + +vllm_logger = init_logger(__name__) + +def shell_analyse(path: str) -> None: + subprocess.run( + [sys.executable, "-c", f'from mindspore import Profiler; Profiler.offline_analyse("{path}")'], + shell=False, check=True + ) + return + + +# Pure vLLM MindSpore Profile Config class +class ProfileControllerConfig: + def __init__(self): + # start_iteration: iterations to run before real profile + self.start_iteration = 50 + # sample_iteration: iteration num to profile + self.sample_iteration = 10 + # profile_type: device or host profile, advice use device + self.profile_type = "device" + # profile_output_path: output path of profiling + self.profile_output_path = "./graph" + # online_analyse: if online analyse profile data + self.online_ananlyse = True + # profiler_level: device profiler level, valid value: Level0/Level1/Level2 + # Note: the string must the same with valid value + self.profiler_level = ProfilerLevel.Level1 + # with_stack: if profile python stack data + self.with_stack = True + # activities: the profile active, it is a List with "CPU", "NPU", "GPU" + # advice always use ["CPU", "NPU"] on Ascend platform + self.activities = [ProfilerActivity.CPU, ProfilerActivity.NPU] + + def to_dict(self): + out_dict = {} + + for (key, value) in self.__dict__.items(): + if hasattr(value, "to_dict"): + out_dict[key] = value.to_dict() + elif isinstance(value, ProfilerLevel): + out_dict[key] = value.value + elif key == "activities": + # ctivities is a list of ProfilerActivity Enum, deal it single case + out_list = [] + for elem in value: + out_list.append(str(elem.value)) + out_dict[key] = out_list + else: + out_dict[key] = value + return out_dict + + +default_profile_config = ProfileControllerConfig() +# this avariable is because origin vLLM profiler is controlled by the output path +# in vllm-mindspore, the output path is package files dir +profile_results_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "./profile_results") + +# Control profile class +class ProfileController: + def __init__(self, config: ProfileControllerConfig = default_profile_config): + self.name = "vllm mindspore profile controller" + self.is_profiling = False + self.config = config + self.iteration = 0 + self.profiler = None + + + # start profile controll period + def start(self, config: ProfileControllerConfig = None) -> None: + if self.is_profiling: + # already in profiling state, skip + vllm_logger.warning(f"vllm-mindspore is already in profiling state, try start later") + return + + self.is_profiling = True + if config is not None: + vllm_logger.info(f"start profile with new config: {config.to_dict()}") + self.config = config + + self.iteration = 0 + + + # host profile check point function + def _host_profile_point(self) -> None: + if self.iteration == self.config.start_iteration: + # start host profile + if os.environ.get("MS_ENABLE_RUNTIME_PROFILER", "") != "1": + vllm_logger.warning(f"env MS_ENABLE_RUNTIME_PROFILER is not set, host profile cannot work") + vllm_logger.info(f"start host profile at iteration {self.iteration}") + # set the host output path + ms.set_context(save_graphs_path=self.config.profile_output_path) + _framework_profiler_enable_mi() + _framework_profiler_step_start() + + + if self.iteration == self.config.start_iteration + self.config.sample_iteration: + # end host profile + vllm_logger.info(f"end host profile at iteration {self.iteration}") + _framework_profiler_step_end() + _framework_profiler_clear() + _framework_profiler_disable_mi() + self.is_profiling = False + + return + + # device profile check point function + def _device_profile_point(self) -> None: + if self.iteration == self.config.start_iteration: + # start device profile + self.profiler = Profiler(profiler_level=self.config.profiler_level, + activities=self.config.activities, + with_stack=self.config.with_stack, + output_path=self.config.profile_output_path) + + + if self.iteration == self.config.start_iteration + self.config.sample_iteration: + # end device profile + vllm_logger.info(f"end device profile at iteration {self.iteration}") + self.profiler.stop() + self.is_profiling = False + + return + + + # if the controller is in profiling state + def is_profiling(self) -> bool: + return self.is_profiling + + + # exposed profile control check point function + def check_profile_point(self): + if not self.is_profiling: + # controller is not in profilig state, return + return + + if self.config.profile_type == "host": + self._host_profile_point() + elif self.config.profile_type == "device": + self._device_profile_point() + else: + vllm_logger.warning(f"Invalid profiling type {self.config.profile_type}, please check profile config") + self.is_profiling = False + self.iteration = 0 + + self.iteration += 1 + + + # stop profile controll period + def stop(self): + if self.config.profile_type == "device": + if self.is_profiling: + # the profile is not finish, stop it + if self.profiler: + self.profiler.stop() + self.is_profiling = False + + if self.profiler and self.config.online_ananlyse: + # enable online analyse, call analyse + try: + self.profiler.analyse() + except Exception as e: + vllm_logger.warning(f"the online analyse catch exception {e}, try offline analyse.") + profile_output_path = ProfilerContext().ascend_ms_dir + shell_analyse(profile_output_path) + self.profiler = None + + +vllm_mindspore_profile_controller = ProfileController() + + +# class for file config for profile controller +# this is used for changing profile config when vLLM is already running +# because vLLM do not provide set config pai for profiling, +# so vllm-mindspore reuse the api, and set the config from specified file path +# the file path is set by a env VLLM_MS_PROFILE_CONFIG_PATH when vLLM server setup +# if the config file is not exist, the profile controller will use default config +class ProfileFileControlerConfig(SimpleNamespace): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def to_dict(self): + out_dict = {} + + for (key, value) in self.__dict__.items(): + if hasattr(value, "to_dict"): + out_dict[key] = value.to_dict() + else: + out_dict[key] = value + + return out_dict + + +default_profile_file_controller_config = ProfileFileControlerConfig() +# enable_profile: if the profile is anable, if the config set False, call start will not start profile +default_profile_file_controller_config.enable_profile = True +# profile_config: the profile config +default_profile_file_controller_config.profile_config = ProfileControllerConfig() + + +# the Profiler class for vLLM, it will take the start and stop api from vLLM to control profile +class AdapterControlProfiler: + def __init__(self, config_path: str): + self.config_path = config_path + + def get_config(self): + if not os.path.exists(self.config_path): + # config file path is not exist, return default profile config + vllm_logger.info(f"profile config path is not exist, use default config") + return default_profile_file_controller_config + + with open(self.config_path, "r") as config_file: + config_json = config_file.read() + try: + config = json.loads(config_json, object_hook=lambda d: ProfileFileControlerConfig(**d)) + except Exception as e: + vllm_logger.warning(f"invalid profile config file, return default config") + return default_profile_file_controller_config + + return config + + def start(self): + # only start call will trigger read config file + config = self.get_config() + if not config.enable_profile: + # config file disable profile, print warning to tell user + vllm_logger.warning(f"the config file is disable the profile, please check it again") + + vllm_mindspore_profile_controller.start(config.profile_config) + + + def stop(self): + vllm_mindspore_profile_controller.stop() + + # package the profile result + current_profile_output_path = ProfilerContext().ascend_ms_dir + vllm_logger.info(f"packaging the profile dir: {current_profile_output_path}") + + profile_dir_name = os.path.basename(current_profile_output_path) + package_profile_file_path = f"{profile_results_path}/{profile_dir_name}.tar.gz" + + with tarfile.open(package_profile_file_path, "w:gz") as tar: + tar.add(current_profile_output_path, arcname=os.path.basename(current_profile_output_path)) + + +# the profile controller init function, if the vLLM is not enable profile, this init function will provide the api +def init_vllm_mindspore_profile_controller() -> None: + # in vllm-mindspore, the profile api is always provided for easy to use + # so we do not need restart vllm if we want to profile + # if the VLLM_TORCH_PROFILING_DIR env is set, the vLLLM will set the api + if not envs.VLLM_TORCH_PROFILER_DIR: + @vllm_router.post("/start_profile") + async def start_profile(raw_request: Request): + vllm_logger.info("Starting profiler...") + await engine_client(raw_request).start_profile() + vllm_logger.info("Profiler started.") + return Response(status_code=200) + + @vllm_router.post("/stop_profile") + async def stop_profile(raw_request: Request): + vllm_logger.info("Stop profiler...") + await engine_client(raw_request).stop_profile() + vllm_logger.info("Profiler stopped.") + return Response(status_code=200) + + # get the profile config path + # the reason for this api is like above, we do not want to modify vLLM source code to provide profile ability + @vllm_router.get("/get_profile_config_info") + async def get_profile_config_path(raw_request: Request): + profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, DEFAULT_VLLM_MS_CONFIG_FILE_PATH) + ret = {"vllm_ms_profile_config_path": profile_config_path, + "vllm_ms_profile_config_example": default_profile_file_controller_config.to_dict()} + return JSONResponse(ret) + + @vllm_router.get("/get_profile_result_files") + async def get_profile_result_files(raw_request: Request): + profile_result_file_list = os.listdir(profile_results_path) + + ret = { + "vllm_ms_profile_files": profile_result_file_list + } + return JSONResponse(ret) + + @vllm_router.get("/get_profile_data/{file_name}") + async def get_profile_data(file_name: str): + profile_file_path = f"{profile_results_path}/{file_name}" + vllm_logger.info(f"packaging the profile dir: {profile_file_path}") + return FileResponse(profile_file_path, filename=file_name) + + @vllm_router.get("/profile_dashboard") + async def get_profile_data(raw_request: Request): + vllm_logger.info(f"raw_request: {raw_request}") + dashboard_html_str = get_dashboard_html() + return HTMLResponse(dashboard_html_str) + + return + +# wrapper vLLM worker init functions +# these functions instead the vLLM worker init to init profiler modules +def wrapper_worker_init(func) -> None: + def new_func(*args, **kwargs) -> None: + # Profiler initialization during worker init triggers device setup, + # causing init device to fail due to duplicate configuration. + # To fix this, temporarily unset VLLM_TORCH_PROFILING_DIR before vLLM worker init, + # restore it afterward, then initialize profiler properlly after worker init_device completes + profile_output_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "") + if profile_output_path: + del os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] + + func(*args, **kwargs) + + if profile_output_path: + os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] = profile_output_path + return new_func + +def wrapper_worker_init_device(func) -> None: + def new_func(*args, **kwargs): + func(*args, **kwargs) + + # The actual profiler initialization is performed after the worker.init_device() method, + # based on the VLLM_TORCH_PROFILING_DIR environment variable. + worker = args[0] + profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, DEFAULT_VLLM_MS_CONFIG_FILE_PATH) + + # reset profile results dir + if os.path.exists(profile_results_path): + shutil.rmtree(profile_results_path, ignore_errors=True) + os.makedirs(profile_results_path, exist_ok=True) + + worker.profiler = AdapterControlProfiler(profile_config_path) + return new_func + -- Gitee From 93161a07feb49a3e53443826be477cc11eac5d02 Mon Sep 17 00:00:00 2001 From: liu lili Date: Wed, 16 Jul 2025 11:45:31 +0800 Subject: [PATCH 2/3] lll: solve pre-commit --- vllm_mindspore/__init__.py | 7 +- vllm_mindspore/dashboard_utils.py | 54 ++-- .../model_executor/models/model_base.py | 5 +- vllm_mindspore/worker/profile_controller.py | 290 ++++++++++-------- 4 files changed, 197 insertions(+), 159 deletions(-) diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index 2ef8d3ee..4570579f 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -405,9 +405,12 @@ from vllm.v1.executor.multiproc_executor import MultiprocExecutor MultiprocExecutor._ensure_worker_termination = \ executor_ensure_worker_termination -MultiprocExecutor._ensure_worker_termination = executor_ensure_worker_termination +MultiprocExecutor._ensure_worker_termination = \ + executor_ensure_worker_termination # init vllm-mindspore profile controller -from vllm_mindspore.worker.profile_controller import init_vllm_mindspore_profile_controller +from vllm_mindspore.worker.profile_controller \ + import init_vllm_mindspore_profile_controller + init_vllm_mindspore_profile_controller() from .utils import check_ready diff --git a/vllm_mindspore/dashboard_utils.py b/vllm_mindspore/dashboard_utils.py index d57adbba..c5e80a9c 100644 --- a/vllm_mindspore/dashboard_utils.py +++ b/vllm_mindspore/dashboard_utils.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# encoding: utf-8 +# SPDX-License-Identifier: Apache-2.0 # Copyright 2025 Huawei Technologies Co., Ltd # Copyright 2024 The vLLM team. # @@ -16,7 +16,6 @@ # limitations under the License. # ============================================================================ - dashboad_html_code = ''' @@ -30,7 +29,8 @@ dashboad_html_code = ''' margin: 0; padding: 0; box-sizing: border-box; - font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen; + font-family: 'Inter', -apple-system, BlinkMacSystemFont, + 'Segoe UI', Roboto, Oxygen; } /* 主题色定义 */ @@ -105,7 +105,8 @@ dashboad_html_code = ''' } .btn-start { - background: linear-gradient(135deg, var(--primary), var(--secondary)); + background: linear-gradient(135deg, var(--primary), + var(--secondary)); color: white; } @@ -185,9 +186,6 @@ dashboad_html_code = '''
@@ -248,7 +243,7 @@ dashboad_html_code = ''' api_path = "v1/completions" dest_url = base_url + api_path - fetch(dest_url, { + fetch(dest_url, { method: 'POST', headers: { 'Content-Type': 'application/json' @@ -264,12 +259,13 @@ dashboad_html_code = ''' .catch(error => console.error('错误:', error)); } - document.getElementById('inferBtn').addEventListener('click', sendInferRequest) + document.getElementById('inferBtn').addEventListener('click', + sendInferRequest) function sendStartProfile() { api_path = "start_profile" dest_url = base_url + api_path - fetch(dest_url, { + fetch(dest_url, { method: 'POST', headers: { 'Content-Type': 'application/json' @@ -280,12 +276,13 @@ dashboad_html_code = ''' .catch(error => console.error('错误:', error)); } - document.getElementById('startProfileBtn').addEventListener('click', sendStartProfile) + document.getElementById('startProfileBtn').addEventListener('click', + sendStartProfile) function sendStopProfile() { api_path = "stop_profile" dest_url = base_url + api_path - fetch(dest_url, { + fetch(dest_url, { method: 'POST', headers: { 'Content-Type': 'application/json' @@ -296,12 +293,13 @@ dashboad_html_code = ''' .catch(error => console.error('错误:', error)); } - document.getElementById('stopProfileBtn').addEventListener('click', sendStopProfile) + document.getElementById('stopProfileBtn').addEventListener('click', + sendStopProfile) function sendGetProfileResults() { api_path = "get_profile_result_files" dest_url = base_url + api_path - fetch(dest_url, { + fetch(dest_url, { method: 'GET', headers: { 'Content-Type': 'application/json' @@ -319,26 +317,28 @@ dashboad_html_code = ''' const tbody = document.getElementById('profileDatasTable'); tbody.innerHTML = "" profile_results.forEach((item, index) => { - const row = document.createElement('tr'); - api_path = "get_profile_data/" - download_url = base_url + api_path + item - row.innerHTML = ` - ${index + 1} - ${item} - Download - `; - tbody.appendChild(row); + const row = document.createElement('tr'); + api_path = "get_profile_data/" + download_url = base_url + api_path + item + row.innerHTML = ` + ${index + 1} + ${item} + Download + `; + tbody.appendChild(row); }); }) .catch(error => console.error('错误:', error)); } - document.getElementById('refreshProfileResultBtn').addEventListener('click', sendGetProfileResults) + document.getElementById('refreshProfileResultBtn').addEventListener('click', + sendGetProfileResults) ''' + def get_dashboard_html() -> str: return dashboad_html_code diff --git a/vllm_mindspore/model_executor/models/model_base.py b/vllm_mindspore/model_executor/models/model_base.py index 0996f099..f705fbe6 100644 --- a/vllm_mindspore/model_executor/models/model_base.py +++ b/vllm_mindspore/model_executor/models/model_base.py @@ -35,7 +35,8 @@ from vllm_mindspore.model_executor.models.attention_mask import ( LowerTriangularMask) from vllm_mindspore.utils import STR_DTYPE_TO_MS_DTYPE from vllm_mindspore.v1.attention.backends.ms_attn import MsAttentionMetadata -from vllm_mindspore.worker.profile_controller import vllm_mindspore_profile_controller +from vllm_mindspore.worker.profile_controller import ( + vllm_mindspore_profile_controller) class AttentionWrapper: @@ -199,7 +200,7 @@ class MsModelBase: ) -> Union[Tensor, IntermediateTensors]: # check if need profile vllm_mindspore_profile_controller.check_profile_point() - + return self.forward(input_ids, positions, intermediate_tensors, diff --git a/vllm_mindspore/worker/profile_controller.py b/vllm_mindspore/worker/profile_controller.py index ac95045f..60a8d402 100644 --- a/vllm_mindspore/worker/profile_controller.py +++ b/vllm_mindspore/worker/profile_controller.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# encoding: utf-8 +# SPDX-License-Identifier: Apache-2.0 # Copyright 2025 Huawei Technologies Co., Ltd # Copyright 2024 The vLLM team. # @@ -16,41 +16,36 @@ # limitations under the License. # ============================================================================ - -import os import json -import sys +import os +import shutil import subprocess +import sys import tarfile -import shutil from types import SimpleNamespace import mindspore as ms - -# host profiling modules -from mindspore._c_expression import _framework_profiler_enable_mi -from mindspore._c_expression import _framework_profiler_disable_mi -from mindspore._c_expression import _framework_profiler_step_start -from mindspore._c_expression import _framework_profiler_step_end -from mindspore._c_expression import _framework_profiler_clear - +# vllm modules +import vllm.envs as envs +from fastapi import Request +from fastapi.responses import (FileResponse, HTMLResponse, JSONResponse, + Response) # device profiling utils from mindspore import Profiler -from mindspore.profiler import ProfilerLevel, ProfilerActivity +# host profiling modules +from mindspore._c_expression import (_framework_profiler_clear, + _framework_profiler_disable_mi, + _framework_profiler_enable_mi, + _framework_profiler_step_end, + _framework_profiler_step_start) +from mindspore.profiler import ProfilerActivity, ProfilerLevel from mindspore.profiler.common.profiler_context import ProfilerContext - -# vllm modules -import vllm.envs as envs -from vllm.logger import init_logger -from vllm.entrypoints.openai.api_server import router as vllm_router from vllm.entrypoints.openai.api_server import engine_client - -from fastapi import Request -from fastapi.responses import Response, JSONResponse, FileResponse, HTMLResponse +from vllm.entrypoints.openai.api_server import router as vllm_router +from vllm.logger import init_logger from vllm_mindspore.dashboard_utils import get_dashboard_html - VLLM_DEFAULT_PROFILE_ENV_NAME = "VLLM_TORCH_PROFILING_DIR" VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME = "VLLM_MS_PROFILE_CONFIG_PATH" @@ -73,16 +68,20 @@ DEFAULT_VLLM_MS_CONFIG_FILE_PATH = "./vllm_ms_profile.config" vllm_logger = init_logger(__name__) + def shell_analyse(path: str) -> None: - subprocess.run( - [sys.executable, "-c", f'from mindspore import Profiler; Profiler.offline_analyse("{path}")'], - shell=False, check=True - ) + subprocess.run([ + sys.executable, "-c", + f'from mindspore import Profiler; Profiler.offline_analyse("{path}")' + ], + shell=False, + check=True) return # Pure vLLM MindSpore Profile Config class class ProfileControllerConfig: + def __init__(self): # start_iteration: iterations to run before real profile self.start_iteration = 50 @@ -94,7 +93,7 @@ class ProfileControllerConfig: self.profile_output_path = "./graph" # online_analyse: if online analyse profile data self.online_ananlyse = True - # profiler_level: device profiler level, valid value: Level0/Level1/Level2 + # profiler_level: device profiler level, eg: Level0/Level1/Level2 # Note: the string must the same with valid value self.profiler_level = ProfilerLevel.Level1 # with_stack: if profile python stack data @@ -112,7 +111,7 @@ class ProfileControllerConfig: elif isinstance(value, ProfilerLevel): out_dict[key] = value.value elif key == "activities": - # ctivities is a list of ProfilerActivity Enum, deal it single case + # activities is a list of ProfilerActivity Enum out_list = [] for elem in value: out_list.append(str(elem.value)) @@ -123,115 +122,123 @@ class ProfileControllerConfig: default_profile_config = ProfileControllerConfig() -# this avariable is because origin vLLM profiler is controlled by the output path -# in vllm-mindspore, the output path is package files dir -profile_results_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "./profile_results") +# this avariable is because origin vLLM profiler is controlled by the output +# path in vllm-mindspore, the output path is package files dir +profile_results_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, + "./profile_results") + # Control profile class class ProfileController: - def __init__(self, config: ProfileControllerConfig = default_profile_config): + + def __init__(self, + config: ProfileControllerConfig = default_profile_config): self.name = "vllm mindspore profile controller" - self.is_profiling = False + self.profiliing_state = False self.config = config self.iteration = 0 - self.profiler = None + self.profiler: Profiler = None - - # start profile controll period - def start(self, config: ProfileControllerConfig = None) -> None: - if self.is_profiling: + # start profile control period + def start(self, config=None) -> None: + if self.profiliing_state: # already in profiling state, skip - vllm_logger.warning(f"vllm-mindspore is already in profiling state, try start later") + vllm_logger.warning( + "vllm-mindspore is already in profiling state, try start later" + ) return - - self.is_profiling = True + + self.profiliing_state = True if config is not None: - vllm_logger.info(f"start profile with new config: {config.to_dict()}") + vllm_logger.info("start profile with new config", config.to_dict()) self.config = config - + self.iteration = 0 - # host profile check point function def _host_profile_point(self) -> None: if self.iteration == self.config.start_iteration: # start host profile if os.environ.get("MS_ENABLE_RUNTIME_PROFILER", "") != "1": - vllm_logger.warning(f"env MS_ENABLE_RUNTIME_PROFILER is not set, host profile cannot work") - vllm_logger.info(f"start host profile at iteration {self.iteration}") + vllm_logger.warning( + "env MS_ENABLE_RUNTIME_PROFILER is not set, " + "host profile cannot work") + vllm_logger.info("start host profile at iter %d", self.iteration) # set the host output path ms.set_context(save_graphs_path=self.config.profile_output_path) _framework_profiler_enable_mi() _framework_profiler_step_start() - - if self.iteration == self.config.start_iteration + self.config.sample_iteration: + if self.iteration == self.config.start_iteration + \ + self.config.sample_iteration: # end host profile - vllm_logger.info(f"end host profile at iteration {self.iteration}") + vllm_logger.info("end host profile at iter %d", self.iteration) _framework_profiler_step_end() _framework_profiler_clear() _framework_profiler_disable_mi() - self.is_profiling = False + self.profiliing_state = False return - + # device profile check point function def _device_profile_point(self) -> None: if self.iteration == self.config.start_iteration: # start device profile - self.profiler = Profiler(profiler_level=self.config.profiler_level, - activities=self.config.activities, - with_stack=self.config.with_stack, - output_path=self.config.profile_output_path) - - - if self.iteration == self.config.start_iteration + self.config.sample_iteration: + self.profiler = Profiler( + profiler_level=self.config.profiler_level, + activities=self.config.activities, + with_stack=self.config.with_stack, + output_path=self.config.profile_output_path) + + if self.iteration == self.config.start_iteration + \ + self.config.sample_iteration: # end device profile - vllm_logger.info(f"end device profile at iteration {self.iteration}") + vllm_logger.info("end device profile at iter %d", self.iteration) self.profiler.stop() - self.is_profiling = False + self.profiliing_state = False - return - + return # if the controller is in profiling state def is_profiling(self) -> bool: - return self.is_profiling - + return self.profiliing_state # exposed profile control check point function def check_profile_point(self): - if not self.is_profiling: + if not self.profiliing_state: # controller is not in profilig state, return return - + if self.config.profile_type == "host": self._host_profile_point() elif self.config.profile_type == "device": self._device_profile_point() else: - vllm_logger.warning(f"Invalid profiling type {self.config.profile_type}, please check profile config") - self.is_profiling = False + vllm_logger.warning( + "Invalid profiling type %s, " + "please check profile config", self.config.profile_type) + self.profiliing_state = False self.iteration = 0 self.iteration += 1 - - # stop profile controll period + # stop profile control period def stop(self): if self.config.profile_type == "device": - if self.is_profiling: + if self.profiliing_state: # the profile is not finish, stop it if self.profiler: self.profiler.stop() - self.is_profiling = False + self.profiliing_state = False if self.profiler and self.config.online_ananlyse: # enable online analyse, call analyse try: self.profiler.analyse() except Exception as e: - vllm_logger.warning(f"the online analyse catch exception {e}, try offline analyse.") + vllm_logger.warning( + "online analyse catch exception, try offline analyse.", + e) profile_output_path = ProfilerContext().ascend_ms_dir shell_analyse(profile_output_path) self.profiler = None @@ -242,11 +249,13 @@ vllm_mindspore_profile_controller = ProfileController() # class for file config for profile controller # this is used for changing profile config when vLLM is already running -# because vLLM do not provide set config pai for profiling, +# because vLLM do not provide set config pai for profiling, # so vllm-mindspore reuse the api, and set the config from specified file path -# the file path is set by a env VLLM_MS_PROFILE_CONFIG_PATH when vLLM server setup -# if the config file is not exist, the profile controller will use default config +# the file path is set by a env VLLM_MS_PROFILE_CONFIG_PATH when vLLM server +# setup if the config file is not exist, the profile controller will use +# default config class ProfileFileControlerConfig(SimpleNamespace): + def __init__(self, **kwargs): super().__init__(**kwargs) @@ -258,122 +267,142 @@ class ProfileFileControlerConfig(SimpleNamespace): out_dict[key] = value.to_dict() else: out_dict[key] = value - + return out_dict default_profile_file_controller_config = ProfileFileControlerConfig() -# enable_profile: if the profile is anable, if the config set False, call start will not start profile +# enable_profile: if the profile is anable, if the config set False, +# call start will not start profile default_profile_file_controller_config.enable_profile = True # profile_config: the profile config -default_profile_file_controller_config.profile_config = ProfileControllerConfig() +default_profile_file_controller_config.profile_config = ProfileControllerConfig( +) -# the Profiler class for vLLM, it will take the start and stop api from vLLM to control profile +# the Profiler class for vLLM, it will take the start and stop api +# from vLLM to control profile class AdapterControlProfiler: + def __init__(self, config_path: str): self.config_path = config_path - + def get_config(self): if not os.path.exists(self.config_path): # config file path is not exist, return default profile config - vllm_logger.info(f"profile config path is not exist, use default config") + vllm_logger.info( + "profile config path is not exist, use default config") return default_profile_file_controller_config - - with open(self.config_path, "r") as config_file: + + with open(self.config_path) as config_file: config_json = config_file.read() try: - config = json.loads(config_json, object_hook=lambda d: ProfileFileControlerConfig(**d)) - except Exception as e: - vllm_logger.warning(f"invalid profile config file, return default config") + config = json.loads( + config_json, + object_hook=lambda d: ProfileFileControlerConfig(**d)) + except Exception: + vllm_logger.warning( + "invalid profile config file, return default config") return default_profile_file_controller_config - + return config - + def start(self): # only start call will trigger read config file config = self.get_config() if not config.enable_profile: # config file disable profile, print warning to tell user - vllm_logger.warning(f"the config file is disable the profile, please check it again") + vllm_logger.warning( + "the config file is disable the profile, please check it again" + ) vllm_mindspore_profile_controller.start(config.profile_config) - def stop(self): vllm_mindspore_profile_controller.stop() - + # package the profile result current_profile_output_path = ProfilerContext().ascend_ms_dir - vllm_logger.info(f"packaging the profile dir: {current_profile_output_path}") - + vllm_logger.info("packaging the profile dir: %s", + current_profile_output_path) + profile_dir_name = os.path.basename(current_profile_output_path) - package_profile_file_path = f"{profile_results_path}/{profile_dir_name}.tar.gz" - + package_profile_file_path = f"{profile_results_path} \ + /{profile_dir_name}.tar.gz" + with tarfile.open(package_profile_file_path, "w:gz") as tar: - tar.add(current_profile_output_path, arcname=os.path.basename(current_profile_output_path)) + tar.add(current_profile_output_path, + arcname=os.path.basename(current_profile_output_path)) -# the profile controller init function, if the vLLM is not enable profile, this init function will provide the api +# the profile controller init function, if the vLLM is not enable profile, +# this init function will provide the api def init_vllm_mindspore_profile_controller() -> None: - # in vllm-mindspore, the profile api is always provided for easy to use - # so we do not need restart vllm if we want to profile + # in vllm-mindspore, the profile api is always provided + # for easy to use, so we do not need restart vllm if we want to profile # if the VLLM_TORCH_PROFILING_DIR env is set, the vLLLM will set the api if not envs.VLLM_TORCH_PROFILER_DIR: + @vllm_router.post("/start_profile") async def start_profile(raw_request: Request): vllm_logger.info("Starting profiler...") await engine_client(raw_request).start_profile() vllm_logger.info("Profiler started.") return Response(status_code=200) - + @vllm_router.post("/stop_profile") async def stop_profile(raw_request: Request): vllm_logger.info("Stop profiler...") await engine_client(raw_request).stop_profile() vllm_logger.info("Profiler stopped.") return Response(status_code=200) - + # get the profile config path - # the reason for this api is like above, we do not want to modify vLLM source code to provide profile ability + # the reason for this api is like above, we do not want to modify + # vLLM source code to provide profile ability @vllm_router.get("/get_profile_config_info") async def get_profile_config_path(raw_request: Request): - profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, DEFAULT_VLLM_MS_CONFIG_FILE_PATH) - ret = {"vllm_ms_profile_config_path": profile_config_path, - "vllm_ms_profile_config_example": default_profile_file_controller_config.to_dict()} + profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, + DEFAULT_VLLM_MS_CONFIG_FILE_PATH) + ret = { + "vllm_ms_profile_config_path": + profile_config_path, + "vllm_ms_profile_config_example": + default_profile_file_controller_config.to_dict() + } return JSONResponse(ret) - + @vllm_router.get("/get_profile_result_files") async def get_profile_result_files(raw_request: Request): profile_result_file_list = os.listdir(profile_results_path) - - ret = { - "vllm_ms_profile_files": profile_result_file_list - } + + ret = {"vllm_ms_profile_files": profile_result_file_list} return JSONResponse(ret) - + @vllm_router.get("/get_profile_data/{file_name}") async def get_profile_data(file_name: str): profile_file_path = f"{profile_results_path}/{file_name}" - vllm_logger.info(f"packaging the profile dir: {profile_file_path}") + vllm_logger.info("packaging the profile dir: %s", profile_file_path) return FileResponse(profile_file_path, filename=file_name) - + @vllm_router.get("/profile_dashboard") - async def get_profile_data(raw_request: Request): - vllm_logger.info(f"raw_request: {raw_request}") + async def get_profile_dashboard(raw_request: Request): dashboard_html_str = get_dashboard_html() return HTMLResponse(dashboard_html_str) - + return + # wrapper vLLM worker init functions # these functions instead the vLLM worker init to init profiler modules -def wrapper_worker_init(func) -> None: +def wrapper_worker_init(func): + def new_func(*args, **kwargs) -> None: # Profiler initialization during worker init triggers device setup, # causing init device to fail due to duplicate configuration. - # To fix this, temporarily unset VLLM_TORCH_PROFILING_DIR before vLLM worker init, - # restore it afterward, then initialize profiler properlly after worker init_device completes + # To fix this, temporarily unset VLLM_TORCH_PROFILING_DIR before + # vLLM worker init, restore it afterward, then initialize profiler + # properly after worker init_device completes profile_output_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "") if profile_output_path: del os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] @@ -382,22 +411,27 @@ def wrapper_worker_init(func) -> None: if profile_output_path: os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] = profile_output_path + return new_func -def wrapper_worker_init_device(func) -> None: - def new_func(*args, **kwargs): + +def wrapper_worker_init_device(func): + + def new_func(*args, **kwargs) -> None: func(*args, **kwargs) - # The actual profiler initialization is performed after the worker.init_device() method, - # based on the VLLM_TORCH_PROFILING_DIR environment variable. + # The actual profiler initialization is performed after the + # worker.init_device() method, based on the + # VLLM_TORCH_PROFILING_DIR environment variable. worker = args[0] - profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, DEFAULT_VLLM_MS_CONFIG_FILE_PATH) + profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, + DEFAULT_VLLM_MS_CONFIG_FILE_PATH) # reset profile results dir if os.path.exists(profile_results_path): shutil.rmtree(profile_results_path, ignore_errors=True) os.makedirs(profile_results_path, exist_ok=True) - + worker.profiler = AdapterControlProfiler(profile_config_path) - return new_func + return new_func -- Gitee From 5915540605288d217c71f2e29c13c349114842f5 Mon Sep 17 00:00:00 2001 From: liu lili Date: Sat, 19 Jul 2025 11:50:14 +0800 Subject: [PATCH 3/3] lll: update model path --- vllm_mindspore/dashboard_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_mindspore/dashboard_utils.py b/vllm_mindspore/dashboard_utils.py index c5e80a9c..cee2c097 100644 --- a/vllm_mindspore/dashboard_utils.py +++ b/vllm_mindspore/dashboard_utils.py @@ -179,7 +179,7 @@ dashboad_html_code = ''' id="modelInput" type="text" placeholder="模型路径" - value="/home/lll/dockers/vllm-develop/workspace/scripts/Qwen2-7B" + value="/path/to/model" > -- Gitee