diff --git a/vllm_mindspore/__init__.py b/vllm_mindspore/__init__.py
index 5892937a8957829c8a47ae1df13ada581137f7e6..54e291dba924fe0c2be037c1da369d509d3aa4f0 100644
--- a/vllm_mindspore/__init__.py
+++ b/vllm_mindspore/__init__.py
@@ -141,7 +141,7 @@ vllm.model_executor.model_loader.loader.safetensors_weights_iterator = (
 )
 
 from vllm_mindspore.worker.worker import _warm_up_model
-from vllm_mindspore.worker.profile import (
+from vllm_mindspore.worker.profile_controller import (
     wrapper_worker_init,
     wrapper_worker_init_device,
 )
@@ -257,6 +257,10 @@ MultiModalKwargs.as_kwargs = as_kwargs
 from vllm_mindspore.model_executor.layers.rotary_embedding import InferMRotaryEmbedding
 vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding = InferMRotaryEmbedding
 
+
+# init vllm-mindspore profile controller
+from vllm_mindspore.worker.profile_controller import init_vllm_mindspore_profile_controller
+init_vllm_mindspore_profile_controller()
 from vllm_mindspore.v1.sample import rejection_sampler
 update_modules("vllm.v1.sample.rejection_sampler", rejection_sampler)
 
diff --git a/vllm_mindspore/model_executor/models/model_base.py b/vllm_mindspore/model_executor/models/model_base.py
index 0d933a2db438919cf68388833e1f95c572436c81..2c9f6081ef6cb299164f14cd3710f1cbfb1fa151 100644
--- a/vllm_mindspore/model_executor/models/model_base.py
+++ b/vllm_mindspore/model_executor/models/model_base.py
@@ -31,7 +31,8 @@ from vllm.attention.layer import Attention
 import torch
 
 from mindspore import Tensor, nn, mutable
-
+from vllm_mindspore.utils import STR_DTYPE_TO_MS_DTYPE
+from vllm_mindspore.worker.profile_controller import vllm_mindspore_profile_controller
 
 class Fake_Attention:
     def __init__(self):
@@ -198,6 +199,10 @@ class MsModelBase():
         previous_hidden_states: Optional[Tensor] = None,
         spec_step_idx: int = 0,
     ) -> Union[Tensor, IntermediateTensors]:
+        
+        # check if need profile
+        vllm_mindspore_profile_controller.check_profile_point()
+
         return self.forward(
             input_ids,
             positions,
diff --git a/vllm_mindspore/worker/profile_controller.py b/vllm_mindspore/worker/profile_controller.py
new file mode 100644
index 0000000000000000000000000000000000000000..3286de04ba8c65709fc4edc8fa439f6cd1a648fe
--- /dev/null
+++ b/vllm_mindspore/worker/profile_controller.py
@@ -0,0 +1,393 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+# Copyright 2025 Huawei Technologies Co., Ltd
+# Copyright 2024 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+
+import os
+import json
+import sys
+import subprocess
+import tarfile
+import shutil
+from types import SimpleNamespace
+
+import mindspore as ms
+
+# host profiling modules
+from mindspore._c_expression import _framework_profiler_enable_mi
+from mindspore._c_expression import _framework_profiler_disable_mi
+from mindspore._c_expression import _framework_profiler_step_start
+from mindspore._c_expression import _framework_profiler_step_end
+from mindspore._c_expression import _framework_profiler_clear
+
+# device profiling utils
+from mindspore import Profiler
+from mindspore.profiler import ProfilerLevel,  ProfilerActivity
+from mindspore.profiler.common.profiler_context import ProfilerContext
+
+# vllm modules
+import vllm.envs as envs
+from vllm.logger import init_logger
+from vllm.entrypoints.openai.api_server import router as vllm_router
+from vllm.entrypoints.openai.api_server import engine_client
+
+from fastapi import Request
+from fastapi.responses import Response, JSONResponse, FileResponse
+
+
+VLLM_DEFAULT_PROFILE_ENV_NAME = "VLLM_TORCH_PROFILING_DIR"
+VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME = "VLLM_MS_PROFILE_CONFIG_PATH"
+
+# default vllm-mindspore profile config is based on the vllm backend start dir
+# the content example is like follow
+# {
+#     "enable_profile": true,
+#     "profile_config": {
+#         "profile_type": "device",
+#         "start_iteration": 50,
+#         "sample_iteration": 10,
+#         "profile_output_path": "./graph",
+#         "online_ananlyse": true,
+#         "profiler_level": "Level1",
+#         "with_stack": true,
+#         "activities": ["CPU", "NPU"]
+#     }
+# }
+DEFAULT_VLLM_MS_CONFIG_FILE_PATH = "./vllm_ms_profile.config"
+
+vllm_logger = init_logger(__name__)
+
+def shell_analyse(path: str) -> None:
+    subprocess.run(
+        [sys.executable, "-c", f'from mindspore import Profiler; Profiler.offline_analyse("{path}")'],
+        shell=False, check=True
+    )
+    return
+
+
+# Pure vLLM MindSpore Profile Config class
+class ProfileControllerConfig:
+    def __init__(self):
+        # start_iteration: iterations to run before real profile
+        self.start_iteration = 50
+        # sample_iteration: iteration num to profile
+        self.sample_iteration = 10
+        # profile_type: device or host profile, advice use device
+        self.profile_type = "device"
+        # profile_output_path: output path of profiling
+        self.profile_output_path = "./graph"
+        # online_analyse: if online analyse profile data
+        self.online_ananlyse = True
+        # profiler_level: device profiler level, valid value: Level0/Level1/Level2
+        # Note: the string must the same with valid value
+        self.profiler_level = ProfilerLevel.Level1
+        # with_stack: if profile python stack data
+        self.with_stack = True
+        # activities: the profile active,  it is a List with "CPU", "NPU", "GPU"
+        # advice always use ["CPU", "NPU"] on Ascend platform
+        self.activities = [ProfilerActivity.CPU, ProfilerActivity.NPU]
+
+    def to_dict(self):
+        out_dict = {}
+
+        for (key, value) in self.__dict__.items():
+            if hasattr(value, "to_dict"):
+                out_dict[key] = value.to_dict()
+            elif isinstance(value, ProfilerLevel):
+                out_dict[key] = value.value
+            elif key == "activities":
+                # ctivities is a list of ProfilerActivity Enum, deal it single case
+                out_list = []
+                for elem in value:
+                    out_list.append(str(elem.value))
+                out_dict[key] = out_list
+            else:
+                out_dict[key] = value
+        return out_dict
+
+
+default_profile_config = ProfileControllerConfig()
+# this avariable is because origin vLLM profiler is controlled by the output path
+# in vllm-mindspore, the output path is package files dir
+profile_results_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "./profile_results")
+
+# Control profile class
+class ProfileController:
+    def __init__(self, config: ProfileControllerConfig = default_profile_config):
+        self.name = "vllm mindspore profile controller"
+        self.is_profiling = False
+        self.config = config
+        self.iteration = 0
+        self.profiler = None
+
+    
+    # start profile controll period
+    def start(self, config: ProfileControllerConfig = None) -> None:
+        if self.is_profiling:
+            # already in profiling state, skip
+            vllm_logger.warning(f"vllm-mindspore is already in profiling state, tyrp start later")
+            return
+        
+        self.is_profiling = True
+        if config is not None:
+            vllm_logger.info(f"start profile with new config: {config.to_dict()}")
+            self.config = config
+        
+        self.iteration = 0
+
+    
+    # host profile check point function
+    def _host_profile_point(self) -> None:
+        if self.iteration == self.config.start_iteration:
+            # start host profile
+            if os.environ.get("MS_ENABLE_RUNTIME_PROFILER", "") != "1":
+                vllm_logger.warning(f"env MS_ENABLE_RUNTIME_PROFILER is not set, host profile cannot work")
+            vllm_logger.info(f"start host profile at iteration {self.iteration}")
+            # set the host output path
+            ms.set_context(save_graphs_path=self.config.profile_output_path)
+            _framework_profiler_enable_mi()
+            _framework_profiler_step_start()
+
+
+        if self.iteration == self.config.start_iteration + self.config.sample_iteration:
+            # end host profile
+            vllm_logger.info(f"end host profile at iteration {self.iteration}")
+            _framework_profiler_step_end()
+            _framework_profiler_clear()
+            _framework_profiler_disable_mi()
+            self.is_profiling = False
+
+        return
+    
+    # device profile check point function
+    def _device_profile_point(self) -> None:
+        if self.iteration == self.config.start_iteration:
+            # start device profile
+            self.profiler = Profiler(profiler_level=self.config.profiler_level, 
+                                     activities=self.config.activities, 
+                                     with_stack=self.config.with_stack, 
+                                     output_path=self.config.profile_output_path)
+        
+
+        if self.iteration == self.config.start_iteration + self.config.sample_iteration:
+            # end device profile
+            vllm_logger.info(f"end device profile at iteration {self.iteration}")
+            self.profiler.stop()
+            self.is_profiling = False
+
+        return   
+    
+
+    # if the controller is in profiling state
+    def is_profiling(self) -> bool:
+        return self.is_profiling
+
+
+    # exposed profile control check point function
+    def check_profile_point(self):
+        if not self.is_profiling:
+            # controller is not in profilig state, return
+            return
+        
+        if self.config.profile_type == "host":
+            self._host_profile_point()
+        elif self.config.profile_type == "device":
+            self._device_profile_point()
+        else:
+            vllm_logger.warning(f"Invalid profiling type {self.config.profile_type}, please check profile config")
+            self.is_profiling = False
+            self.iteration = 0
+
+        self.iteration += 1
+
+    
+    # stop profile controll period
+    def stop(self):
+        if self.config.profile_type == "device":
+            if self.is_profiling:
+                # the profile is not finish, stop it
+                if self.profiler:
+                    self.profiler.stop()
+                self.is_profiling = False
+
+            if self.profiler and self.config.online_ananlyse:
+                # enable online analyse, call analyse
+                try:
+                    self.profiler.analyse()
+                except Exception as e:
+                    vllm_logger.warning(f"the online analyse catch exception {e}, try offline analyse.")
+                    profile_output_path = ProfilerContext().ascend_ms_dir
+                    shell_analyse(profile_output_path)
+            self.profiler = None
+
+
+vllm_mindspore_profile_controller = ProfileController()
+
+
+# class for file config for profile controller
+# this is used for changing profile config when vLLM is already running
+# because vLLM do not provide set config pai for profiling, 
+# so vllm-mindspore reuse the api, and set the config from specified file path
+# the file path is set by a env VLLM_MS_PROFILE_CONFIG_PATH when vLLM server setup
+# if the config file is not exist, the profile controller will use default config
+class ProfileFileControlerConfig(SimpleNamespace):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def to_dict(self):
+        out_dict = {}
+
+        for (key, value) in self.__dict__.items():
+            if hasattr(value, "to_dict"):
+                out_dict[key] = value.to_dict()
+            else:
+                out_dict[key] = value
+        
+        return out_dict
+
+
+default_profile_file_controller_config = ProfileFileControlerConfig()
+# enable_profile: if the profile is anable, if the config set False, call start will not start profile
+default_profile_file_controller_config.enable_profile = True
+# profile_config: the profile config
+default_profile_file_controller_config.profile_config = ProfileControllerConfig()
+
+
+# the Profiler class for vLLM, it will take the start and stop api from vLLM to control profile
+class AdapterControlProfiler:
+    def __init__(self, config_path: str):
+        self.config_path = config_path
+    
+    def get_config(self):
+        if not os.path.exists(self.config_path):
+            # config file path is not exist, return default profile config
+            vllm_logger.info(f"profile config path is not exist, use default config")
+            return default_profile_file_controller_config
+        
+        with open(self.config_path, "r") as config_file:
+            config_json = config_file.read()
+            try:
+                config = json.loads(config_json, object_hook=lambda d: ProfileFileControlerConfig(**d))
+            except Exception as e:
+                vllm_logger.warning(f"invalid profile config file, return default config")
+                return default_profile_file_controller_config
+            
+        return config
+    
+    def start(self):
+        # only start call will trigger read config file
+        config = self.get_config()
+        if not config.enable_profile:
+            # config file disable profile, print warning to tell user
+            vllm_logger.warning(f"the config file is disable the profile, please check it again")
+
+        vllm_mindspore_profile_controller.start(config.profile_config)
+
+    
+    def stop(self):
+        vllm_mindspore_profile_controller.stop()
+        
+        # package the profile result
+        current_profile_output_path = ProfilerContext().ascend_ms_dir
+        vllm_logger.info(f"packaging the profile dir: {current_profile_output_path}")
+        
+        profile_dir_name = os.path.basename(current_profile_output_path)
+        package_profile_file_path = f"{profile_results_path}/{profile_dir_name}.tar.gz"
+        
+        with tarfile.open(package_profile_file_path, "w:gz") as tar:
+            tar.add(current_profile_output_path, arcname=os.path.basename(current_profile_output_path))
+
+
+# the profile controller init function, if the vLLM is not enable profile, this init function will provide the api
+def init_vllm_mindspore_profile_controller() -> None:
+    # in vllm-mindspore, the profile api is always provided for easy to use
+    # so we do not need restart vllm if we want to profile
+    # if the VLLM_TORCH_PROFILING_DIR env is set, the vLLLM will set the api
+    if not envs.VLLM_TORCH_PROFILER_DIR:
+        @vllm_router.post("/start_profile")
+        async def start_profile(raw_request: Request):
+            vllm_logger.info("Starting profiler...")
+            await engine_client(raw_request).start_profile()
+            vllm_logger.info("Profiler started.")
+            return Response(status_code=200)
+        
+        @vllm_router.post("/stop_profile")
+        async def stop_profile(raw_request: Request):
+            vllm_logger.info("Stop profiler...")
+            await engine_client(raw_request).stop_profile()
+            vllm_logger.info("Profiler stopped.")
+            return Response(status_code=200)
+        
+    # get the profile config path
+    # the reason for this api is like above, we do not want to modify vLLM source code to provide profile ability
+    @vllm_router.get("/get_profile_config_info")
+    async def get_profile_config_path(raw_request: Request):
+        profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, DEFAULT_VLLM_MS_CONFIG_FILE_PATH)
+        ret = {"vllm_ms_profile_config_path": profile_config_path,
+               "vllm_ms_profile_config_example": default_profile_file_controller_config.to_dict()}
+        return JSONResponse(ret)
+    
+    @vllm_router.get("/get_profile_result_files")
+    async def get_profile_result_files(raw_request: Request):
+        profile_result_file_list = os.listdir(profile_results_path)
+        ret = {
+            "vllm_ms_profile_files": profile_result_file_list
+        }
+        return JSONResponse(ret)
+    
+    @vllm_router.get("/get_profile_data/<file_name>")
+    async def get_profile_data(file_name: str):
+        profile_file_path = f"{profile_results_path}/{file_name}"
+        return FileResponse(profile_file_path, filename=file_name)
+    
+    return 
+
+# wrapper vLLM worker init functions
+# these functions instead the vLLM worker init to init profiler modules
+def wrapper_worker_init(func) -> None:
+    def new_func(*args, **kwargs) -> None:
+        # Profiler initialization during worker init triggers device setup,
+        # causing init device to fail due to duplicate configuration.
+        # To fix this, temporarily unset VLLM_TORCH_PROFILING_DIR before vLLM worker init,
+        # restore it afterward, then initialize profiler properlly after worker init_device completes
+        profile_output_path = os.getenv(VLLM_DEFAULT_PROFILE_ENV_NAME, "")
+        if profile_output_path:
+            del os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME]
+
+        func(*args, **kwargs)
+
+        if profile_output_path:
+            os.environ[VLLM_DEFAULT_PROFILE_ENV_NAME] = profile_output_path
+    return new_func
+
+def wrapper_worker_init_device(func) -> None:
+    def new_func(*args, **kwargs):
+        func(*args, **kwargs)
+
+        # The actual profiler initialization is performed after the worker.init_device() method,
+        # based on the VLLM_TORCH_PROFILING_DIR environment variable.
+        worker = args[0]
+        profile_config_path = os.getenv(VLLM_MS_PROFILE_CONFIG_PATH_ENV_NAME, DEFAULT_VLLM_MS_CONFIG_FILE_PATH)
+
+        # reset profile results dir
+        if os.path.exists(profile_results_path):
+            shutil.rmtree(profile_results_path)
+        os.mkdir(profile_results_path)
+        
+        worker.profiler = AdapterControlProfiler(profile_config_path)
+    return new_func
+