diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index 5e0ae33eb6f9ab14f37896376c678ee35c76d051..4570579f803e41c2947504214f525169ea4b2b33 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -160,7 +160,7 @@ vllm.model_executor.model_loader.loader.safetensors_weights_iterator = ( safetensors_weights_iterator) from vllm_mindspore.worker.worker import _warm_up_model -from vllm_mindspore.worker.profile import ( +from vllm_mindspore.worker.profile_controller import ( wrapper_worker_init, wrapper_worker_init_device, ) @@ -405,6 +405,13 @@ from vllm.v1.executor.multiproc_executor import MultiprocExecutor MultiprocExecutor._ensure_worker_termination = \ executor_ensure_worker_termination +MultiprocExecutor._ensure_worker_termination = \ + executor_ensure_worker_termination +# init vllm-mindspore profile controller +from vllm_mindspore.worker.profile_controller \ + import init_vllm_mindspore_profile_controller + +init_vllm_mindspore_profile_controller() from .utils import check_ready diff --git a/vllm_mindspore/dashboard_utils.py b/vllm_mindspore/dashboard_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cee2c097db0c70a204c3328d87836d9e6ea857e8 --- /dev/null +++ b/vllm_mindspore/dashboard_utils.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2025 Huawei Technologies Co., Ltd +# Copyright 2024 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +dashboad_html_code = ''' + + + + + + vLLM MindSpore Profiler Dashboard + + + +
+

vLLM MindSpore Profiler Dashboard

+
+ +
+ + + +
+ +
+ + + + + +
+ +
+

Infer Results

+
+
+ +
+ +
+ +
+ + + + + + + + + + + +
No.FileDescription
+
+ + + + + +''' + + +def get_dashboard_html() -> str: + return dashboad_html_code diff --git a/vllm_mindspore/model_executor/models/model_base.py b/vllm_mindspore/model_executor/models/model_base.py index a65ac08e623ab0a75f78c79e7272e22384c3ce7b..f705fbe6055f63886e90ef5085c8c082a4cdc287 100644 --- a/vllm_mindspore/model_executor/models/model_base.py +++ b/vllm_mindspore/model_executor/models/model_base.py @@ -35,6 +35,8 @@ from vllm_mindspore.model_executor.models.attention_mask import ( LowerTriangularMask) from vllm_mindspore.utils import STR_DTYPE_TO_MS_DTYPE from vllm_mindspore.v1.attention.backends.ms_attn import MsAttentionMetadata +from vllm_mindspore.worker.profile_controller import ( + vllm_mindspore_profile_controller) class AttentionWrapper: @@ -196,6 +198,9 @@ class MsModelBase: previous_hidden_states: Optional[Tensor] = None, spec_step_idx: int = 0, ) -> Union[Tensor, IntermediateTensors]: + # check if need profile + vllm_mindspore_profile_controller.check_profile_point() + return self.forward(input_ids, positions, intermediate_tensors, diff --git a/vllm_mindspore/worker/profile_controller.py b/vllm_mindspore/worker/profile_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..60a8d4025f02f54eaffb5828a3f439badd9ba328 --- /dev/null +++ b/vllm_mindspore/worker/profile_controller.py @@ -0,0 +1,437 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2025 Huawei Technologies Co., Ltd +# Copyright 2024 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import json +import os +import shutil +import subprocess +import sys +import tarfile +from types import SimpleNamespace + +import mindspore as ms +# vllm modules +import vllm.envs as envs +from fastapi import Request +from fastapi.responses import (FileResponse, HTMLResponse, JSONResponse, + Response) +# device profiling utils +from mindspore import Profiler +# host profiling modules +from mindspore._c_expression import (_framework_profiler_clear, + _framework_profiler_disable_mi, + _framework_profiler_enable_mi, + _framework_profiler_step_end, + _framework_profiler_step_start) +from mindspore.profiler import ProfilerActivity, ProfilerLevel +from mindspore.profiler.common.profiler_context import ProfilerContext +from vllm.entrypoints.openai.api_server import engine_client +from vllm.entrypoints.openai.api_server import router as vllm_router +from vllm.logger import init_logger + +from vllm_mindspore.dashboard_utils import get_dashboard_html + +VLLM_DEFAULT_PROFILE_ENV_NAME = "VLLM_TORCH_PROFILING_DIR" +VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME = "VLLM_MS_PROFILE_CONFIG_PATH" + +# default vllm-mindspore profile config is based on the vllm backend start dir +# the content example is like follow +# { +# "enable_profile": true, +# "profile_config": { +# "profile_type": "device", +# "start_iteration": 50, +# "sample_iteration": 10, +# "profile_output_path": "./graph", +# "online_ananlyse": true, +# "profiler_level": "Level1", +# "with_stack": true, +# "activities": ["CPU", "NPU"] +# } +# } +DEFAULT_VLLM_MS_CONFIG_FILE_PATH = "./vllm_ms_profile.config" + +vllm_logger = init_logger(__name__) + + +def shell_analyse(path: str) -> None: + subprocess.run([ + sys.executable, "-c", + f'from mindspore import Profiler; Profiler.offline_analyse("{path}")' + ], + shell=False, + check=True) + return + + +# Pure vLLM MindSpore Profile Config class +class ProfileControllerConfig: + + def __init__(self): + # start_iteration: iterations to run before real profile + self.start_iteration = 50 + # sample_iteration: iteration num to profile + self.sample_iteration = 10 + # profile_type: device or host profile, advice use device + self.profile_type = "device" + # profile_output_path: output path of profiling + self.profile_output_path = "./graph" + # online_analyse: if online analyse profile data + self.online_ananlyse = True + # profiler_level: device profiler level, eg: Level0/Level1/Level2 + # Note: the string must the same with valid value + self.profiler_level = ProfilerLevel.Level1 + # with_stack: if profile python stack data + self.with_stack = True + # activities: the profile active, it is a List with "CPU", "NPU", "GPU" + # advice always use ["CPU", "NPU"] on Ascend platform + self.activities = [ProfilerActivity.CPU, ProfilerActivity.NPU] + + def to_dict(self): + out_dict = {} + + for (key, value) in self.__dict__.items(): + if hasattr(value, "to_dict"): + out_dict[key] = value.to_dict() + elif isinstance(value, ProfilerLevel): + out_dict[key] = value.value + elif key == "activities": + # activities is a list of ProfilerActivity Enum + out_list = [] + for elem in value: + out_list.append(str(elem.value)) + out_dict[key] = out_list + else: + out_dict[key] = value + return out_dict + + +default_profile_config = ProfileControllerConfig() +# this avariable is because origin vLLM profiler is controlled by the output +# path in vllm-mindspore, the output path is package files dir +profile_results_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, + "./profile_results") + + +# Control profile class +class ProfileController: + + def __init__(self, + config: ProfileControllerConfig = default_profile_config): + self.name = "vllm mindspore profile controller" + self.profiliing_state = False + self.config = config + self.iteration = 0 + self.profiler: Profiler = None + + # start profile control period + def start(self, config=None) -> None: + if self.profiliing_state: + # already in profiling state, skip + vllm_logger.warning( + "vllm-mindspore is already in profiling state, try start later" + ) + return + + self.profiliing_state = True + if config is not None: + vllm_logger.info("start profile with new config", config.to_dict()) + self.config = config + + self.iteration = 0 + + # host profile check point function + def _host_profile_point(self) -> None: + if self.iteration == self.config.start_iteration: + # start host profile + if os.environ.get("MS_ENABLE_RUNTIME_PROFILER", "") != "1": + vllm_logger.warning( + "env MS_ENABLE_RUNTIME_PROFILER is not set, " + "host profile cannot work") + vllm_logger.info("start host profile at iter %d", self.iteration) + # set the host output path + ms.set_context(save_graphs_path=self.config.profile_output_path) + _framework_profiler_enable_mi() + _framework_profiler_step_start() + + if self.iteration == self.config.start_iteration + \ + self.config.sample_iteration: + # end host profile + vllm_logger.info("end host profile at iter %d", self.iteration) + _framework_profiler_step_end() + _framework_profiler_clear() + _framework_profiler_disable_mi() + self.profiliing_state = False + + return + + # device profile check point function + def _device_profile_point(self) -> None: + if self.iteration == self.config.start_iteration: + # start device profile + self.profiler = Profiler( + profiler_level=self.config.profiler_level, + activities=self.config.activities, + with_stack=self.config.with_stack, + output_path=self.config.profile_output_path) + + if self.iteration == self.config.start_iteration + \ + self.config.sample_iteration: + # end device profile + vllm_logger.info("end device profile at iter %d", self.iteration) + self.profiler.stop() + self.profiliing_state = False + + return + + # if the controller is in profiling state + def is_profiling(self) -> bool: + return self.profiliing_state + + # exposed profile control check point function + def check_profile_point(self): + if not self.profiliing_state: + # controller is not in profilig state, return + return + + if self.config.profile_type == "host": + self._host_profile_point() + elif self.config.profile_type == "device": + self._device_profile_point() + else: + vllm_logger.warning( + "Invalid profiling type %s, " + "please check profile config", self.config.profile_type) + self.profiliing_state = False + self.iteration = 0 + + self.iteration += 1 + + # stop profile control period + def stop(self): + if self.config.profile_type == "device": + if self.profiliing_state: + # the profile is not finish, stop it + if self.profiler: + self.profiler.stop() + self.profiliing_state = False + + if self.profiler and self.config.online_ananlyse: + # enable online analyse, call analyse + try: + self.profiler.analyse() + except Exception as e: + vllm_logger.warning( + "online analyse catch exception, try offline analyse.", + e) + profile_output_path = ProfilerContext().ascend_ms_dir + shell_analyse(profile_output_path) + self.profiler = None + + +vllm_mindspore_profile_controller = ProfileController() + + +# class for file config for profile controller +# this is used for changing profile config when vLLM is already running +# because vLLM do not provide set config pai for profiling, +# so vllm-mindspore reuse the api, and set the config from specified file path +# the file path is set by a env VLLM_MS_PROFILE_CONFIG_PATH when vLLM server +# setup if the config file is not exist, the profile controller will use +# default config +class ProfileFileControlerConfig(SimpleNamespace): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def to_dict(self): + out_dict = {} + + for (key, value) in self.__dict__.items(): + if hasattr(value, "to_dict"): + out_dict[key] = value.to_dict() + else: + out_dict[key] = value + + return out_dict + + +default_profile_file_controller_config = ProfileFileControlerConfig() +# enable_profile: if the profile is anable, if the config set False, +# call start will not start profile +default_profile_file_controller_config.enable_profile = True +# profile_config: the profile config +default_profile_file_controller_config.profile_config = ProfileControllerConfig( +) + + +# the Profiler class for vLLM, it will take the start and stop api +# from vLLM to control profile +class AdapterControlProfiler: + + def __init__(self, config_path: str): + self.config_path = config_path + + def get_config(self): + if not os.path.exists(self.config_path): + # config file path is not exist, return default profile config + vllm_logger.info( + "profile config path is not exist, use default config") + return default_profile_file_controller_config + + with open(self.config_path) as config_file: + config_json = config_file.read() + try: + config = json.loads( + config_json, + object_hook=lambda d: ProfileFileControlerConfig(**d)) + except Exception: + vllm_logger.warning( + "invalid profile config file, return default config") + return default_profile_file_controller_config + + return config + + def start(self): + # only start call will trigger read config file + config = self.get_config() + if not config.enable_profile: + # config file disable profile, print warning to tell user + vllm_logger.warning( + "the config file is disable the profile, please check it again" + ) + + vllm_mindspore_profile_controller.start(config.profile_config) + + def stop(self): + vllm_mindspore_profile_controller.stop() + + # package the profile result + current_profile_output_path = ProfilerContext().ascend_ms_dir + vllm_logger.info("packaging the profile dir: %s", + current_profile_output_path) + + profile_dir_name = os.path.basename(current_profile_output_path) + package_profile_file_path = f"{profile_results_path} \ + /{profile_dir_name}.tar.gz" + + with tarfile.open(package_profile_file_path, "w:gz") as tar: + tar.add(current_profile_output_path, + arcname=os.path.basename(current_profile_output_path)) + + +# the profile controller init function, if the vLLM is not enable profile, +# this init function will provide the api +def init_vllm_mindspore_profile_controller() -> None: + # in vllm-mindspore, the profile api is always provided + # for easy to use, so we do not need restart vllm if we want to profile + # if the VLLM_TORCH_PROFILING_DIR env is set, the vLLLM will set the api + if not envs.VLLM_TORCH_PROFILER_DIR: + + @vllm_router.post("/start_profile") + async def start_profile(raw_request: Request): + vllm_logger.info("Starting profiler...") + await engine_client(raw_request).start_profile() + vllm_logger.info("Profiler started.") + return Response(status_code=200) + + @vllm_router.post("/stop_profile") + async def stop_profile(raw_request: Request): + vllm_logger.info("Stop profiler...") + await engine_client(raw_request).stop_profile() + vllm_logger.info("Profiler stopped.") + return Response(status_code=200) + + # get the profile config path + # the reason for this api is like above, we do not want to modify + # vLLM source code to provide profile ability + @vllm_router.get("/get_profile_config_info") + async def get_profile_config_path(raw_request: Request): + profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, + DEFAULT_VLLM_MS_CONFIG_FILE_PATH) + ret = { + "vllm_ms_profile_config_path": + profile_config_path, + "vllm_ms_profile_config_example": + default_profile_file_controller_config.to_dict() + } + return JSONResponse(ret) + + @vllm_router.get("/get_profile_result_files") + async def get_profile_result_files(raw_request: Request): + profile_result_file_list = os.listdir(profile_results_path) + + ret = {"vllm_ms_profile_files": profile_result_file_list} + return JSONResponse(ret) + + @vllm_router.get("/get_profile_data/{file_name}") + async def get_profile_data(file_name: str): + profile_file_path = f"{profile_results_path}/{file_name}" + vllm_logger.info("packaging the profile dir: %s", profile_file_path) + return FileResponse(profile_file_path, filename=file_name) + + @vllm_router.get("/profile_dashboard") + async def get_profile_dashboard(raw_request: Request): + dashboard_html_str = get_dashboard_html() + return HTMLResponse(dashboard_html_str) + + return + + +# wrapper vLLM worker init functions +# these functions instead the vLLM worker init to init profiler modules +def wrapper_worker_init(func): + + def new_func(*args, **kwargs) -> None: + # Profiler initialization during worker init triggers device setup, + # causing init device to fail due to duplicate configuration. + # To fix this, temporarily unset VLLM_TORCH_PROFILING_DIR before + # vLLM worker init, restore it afterward, then initialize profiler + # properly after worker init_device completes + profile_output_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "") + if profile_output_path: + del os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] + + func(*args, **kwargs) + + if profile_output_path: + os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] = profile_output_path + + return new_func + + +def wrapper_worker_init_device(func): + + def new_func(*args, **kwargs) -> None: + func(*args, **kwargs) + + # The actual profiler initialization is performed after the + # worker.init_device() method, based on the + # VLLM_TORCH_PROFILING_DIR environment variable. + worker = args[0] + profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, + DEFAULT_VLLM_MS_CONFIG_FILE_PATH) + + # reset profile results dir + if os.path.exists(profile_results_path): + shutil.rmtree(profile_results_path, ignore_errors=True) + os.makedirs(profile_results_path, exist_ok=True) + + worker.profiler = AdapterControlProfiler(profile_config_path) + + return new_func