From ba741d9269b82a381fa92ebdb402df78dcc510c8 Mon Sep 17 00:00:00 2001 From: fary86 Date: Tue, 29 Jul 2025 23:28:08 +0800 Subject: [PATCH] Fix server shutdown stuck when some executors exited --- vllm_mindspore/__init__.py | 5 + vllm_mindspore/entrypoints/launcher.py | 97 ++++++++++++++++++++ vllm_mindspore/v1/worker/gpu_model_runner.py | 5 +- 3 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 vllm_mindspore/entrypoints/launcher.py diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py index 4e9a1717e..a9273bb9a 100644 --- a/vllm_mindspore/__init__.py +++ b/vllm_mindspore/__init__.py @@ -385,6 +385,11 @@ import vllm.engine.multiprocessing.engine vllm.engine.multiprocessing.engine.MQLLMEngine.cleanup = cleanup +from vllm_mindspore.entrypoints.launcher import serve_http +import vllm.entrypoints.launcher + +vllm.entrypoints.launcher.serve_http = serve_http + from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm_mindspore.entrypoints.openai.serving_chat import chat_completion_stream_generator diff --git a/vllm_mindspore/entrypoints/launcher.py b/vllm_mindspore/entrypoints/launcher.py new file mode 100644 index 000000000..d1239207e --- /dev/null +++ b/vllm_mindspore/entrypoints/launcher.py @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: Apache-2.0 + +# Adapted from +# https://github.com/vllm-project/vllm/blob/v0.8.3/vllm/entrypoints/launcher.py +# +# Copyright 2025 Huawei Technologies Co., Ltd. +# Copyright 2024-2025 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Fix server shutdown stuck when some executors exited.""" + + +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import signal +import socket +from typing import Any, Optional + +import uvicorn +from fastapi import FastAPI +from vllm.entrypoints.launcher import _add_shutdown_handlers +from vllm.entrypoints.ssl import SSLCertRefresher +from vllm.logger import init_logger +from vllm.utils import find_process_using_port + +logger = init_logger(__name__) + + +async def serve_http(app: FastAPI, + sock: Optional[socket.socket], + enable_ssl_refresh: bool = False, + **uvicorn_kwargs: Any): + logger.info("Available routes are:") + for route in app.routes: + methods = getattr(route, "methods", None) + path = getattr(route, "path", None) + + if methods is None or path is None: + continue + + logger.info("Route: %s, Methods: %s", path, ', '.join(methods)) + + config = uvicorn.Config(app, **uvicorn_kwargs) + config.load() + server = uvicorn.Server(config) + _add_shutdown_handlers(app, server) + + loop = asyncio.get_running_loop() + + server_task = loop.create_task( + server.serve(sockets=[sock] if sock else None)) + + ssl_cert_refresher = None if not enable_ssl_refresh else SSLCertRefresher( + ssl_context=config.ssl, + key_path=config.ssl_keyfile, + cert_path=config.ssl_certfile, + ca_path=config.ssl_ca_certs) + + def signal_handler() -> None: + # prevents the uvicorn signal handler to exit early + server_task.cancel() + if ssl_cert_refresher: + ssl_cert_refresher.stop() + + async def dummy_shutdown() -> None: + pass + + loop.add_signal_handler(signal.SIGINT, signal_handler) + loop.add_signal_handler(signal.SIGTERM, signal_handler) + + try: + await server_task + return dummy_shutdown() + except asyncio.CancelledError: + port = uvicorn_kwargs["port"] + process = find_process_using_port(port) + if process is not None: + logger.debug( + "port %s is used by process %s launched with command:\n%s", + port, process, " ".join(process.cmdline())) + logger.info("Shutting down FastAPI HTTP server.") + # Add by vllm-mindspore begin: + # Fix server shutdown stuck when some executors exited + server.force_exit = True + # Add by vllm-mindspore end. + return server.shutdown() diff --git a/vllm_mindspore/v1/worker/gpu_model_runner.py b/vllm_mindspore/v1/worker/gpu_model_runner.py index 31fde36cb..d7b5ac384 100644 --- a/vllm_mindspore/v1/worker/gpu_model_runner.py +++ b/vllm_mindspore/v1/worker/gpu_model_runner.py @@ -36,7 +36,7 @@ from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheSpec, SlidingWi from vllm.v1.utils import bind_kv_cache from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs from vllm.distributed.parallel_state import get_pp_group -from vllm.utils import cdiv +from vllm.utils import cdiv, get_exception_traceback from vllm.logger import init_logger from vllm.v1.worker.gpu_input_batch import CachedRequestState from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding @@ -445,8 +445,9 @@ def wrapper_gpu_model_runner_execute_model(func): output = func(*args, **kwargs) return output except Exception as e: + traceback = get_exception_traceback() logger.warning( - f"Caught exception {str(e)} when processing req_ids {self.input_batch.req_ids}" + f"ModelRunner hit an exception when processing req_ids {self.input_batch.req_ids}: {traceback}" ) return ModelRunnerOutput( req_ids=self.input_batch.req_ids, -- Gitee