diff --git a/accuracy_tools/msprobe/core/__init__.py b/accuracy_tools/msprobe/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..607855a9ec58e9bd995bd73bbd98680f3901470a --- /dev/null +++ b/accuracy_tools/msprobe/core/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from msprobe.core import cli, components, service diff --git a/accuracy_tools/msprobe/core/service/__init__.py b/accuracy_tools/msprobe/core/service/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..149700e99b3070e4dce9d3760dc54492a6181499 --- /dev/null +++ b/accuracy_tools/msprobe/core/service/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from msprobe.core.service.dump import ServiceDump diff --git a/accuracy_tools/msprobe/core/service/dump.py b/accuracy_tools/msprobe/core/service/dump.py new file mode 100644 index 0000000000000000000000000000000000000000..ce44fc5be4731cbf6b93c9d80e6b7f7a68e814ce --- /dev/null +++ b/accuracy_tools/msprobe/core/service/dump.py @@ -0,0 +1,315 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tqdm import tqdm + +from msprobe.base import BaseService, Component, Dict2Class, Service +from msprobe.common.dirs import DirPool +from msprobe.core.config_initiator import DumpConfig +from msprobe.utils.constants import CfgConst, CmdConst, CompConst, DumpConst, MsgConst, PathConst +from msprobe.utils.exceptions import MsprobeException +from msprobe.utils.io import savedmodel2pb +from msprobe.utils.log import logger, print_log_with_star +from msprobe.utils.path import get_name_and_ext, is_file, is_saved_model_scene + + +@Service.register(CmdConst.DUMP) +class ServiceDump(BaseService): + def __init__(self, *args, **kwargs): + super().__init__() + args, dump_path, namespace = self._parse_kw(**kwargs) + config = DumpConfig(*args).check_config(dump_path) + self.cfg = Dict2Class(config) + setattr(self.cfg, CfgConst.EXEC, namespace.exec) + logger.set_level(self.cfg.log_level) + DirPool.make_msprobe_dir(self.cfg.dump_path) + self.activate_comp_when_init() + + @property + def is_skip(self): + if not self._is_step_in_goal or not self._is_rank_in_goal: + logger.info(f"Skip task {self.cfg.task}, step {self.cfg.step}, rank {self.cfg.rank}.") + return True + return False + + @property + def _is_indep_control(self): + return self.cfg.framework == CfgConst.FRAMEWORK_MINDIE_LLM + + @property + def _is_step_in_goal(self): + if self._is_indep_control: + return True + return not self.cfg.step or self.current_step in self.cfg.step + + @property + def _is_rank_in_goal(self): + if self._is_indep_control: + return True + return not self.cfg.rank or self.current_rank in self.cfg.rank + + @property + def _is_offline_model(self): + if len(self.cfg.exec) == 1: + if is_file(self.cfg.exec[0]) and get_name_and_ext(self.cfg.exec[0])[1] in PathConst.SUFFIX_OFFLINE_MODEL: + return True + elif is_saved_model_scene(self.cfg.exec[0]): + return True + else: + return False + return False + + @property + def _offline_model_comps_map(self): + model_map_for_cpu = { + ".onnx": "_construct_for_onnx_model", + ".pb": "_construct_for_frozen_graph_model_on_cpu", + "saved_model": "_construct_for_saved_model_on_cpu", + ".prototxt": "_construct_for_caffe_model", + } + model_map_for_npu = { + ".om": "_construct_for_om_model", + ".pb": "_construct_for_frozen_graph_model_on_npu", + "saved_model": "_construct_for_saved_model_on_npu", + } + device_handlers = {"cpu": model_map_for_cpu, "npu": model_map_for_npu} + return device_handlers + + @property + def _online_model_comps_map(self): + framework_handlers = {CfgConst.FRAMEWORK_MINDIE_LLM: "_construct_for_atb_model"} + return framework_handlers + + @property + def _is_make_model_dir(self): + return self._is_offline_model or self.cfg.framework == CfgConst.FRAMEWORK_MINDIE_LLM + + @staticmethod + def _parse_kw(**kwargs): + task = kwargs.get(CfgConst.TASK) + step = kwargs.get(CfgConst.STEP) + level = kwargs.get(CfgConst.LEVEL) + dump_path = kwargs.get(DumpConst.DUMP_PATH) + cmd_namespace = kwargs.get("cmd_namespace") + if hasattr(cmd_namespace, CfgConst.CONFIG_PATH): + config_path = cmd_namespace.config_path + else: + config_path = kwargs.get(CfgConst.CONFIG_PATH) + if hasattr(cmd_namespace, "framework"): + framework = cmd_namespace.framework + else: + framework = kwargs.get(CfgConst.FRAMEWORK) + return (config_path, task, framework, step, level), dump_path, cmd_namespace + + def activate_comp_when_init(self): + pass + + def init_start(self): + print_log_with_star(f"Launching {self.cfg.task} task...") + self.make_dirs() + + def make_dirs(self): + if self._is_make_model_dir: + DirPool.make_model_dir() + if self._is_indep_control: + return + self.dir_pool = DirPool() + self.dir_pool.make_step_dir(self.current_step) + self.dir_pool.make_rank_dir() + if self.cfg.task == CfgConst.TASK_TENSOR: + self.dir_pool.make_tensor_dir() + + def finalize_start(self): + if hasattr(self, "writer"): + self.writer.finalize() + print_log_with_star(f"{self.cfg.task} task completed successfully.") + + def construct(self): + if self._is_offline_model: + device_handler = self._offline_model_comps_map.get(self.cfg.device) + if not device_handler: + raise MsprobeException( + MsgConst.INVALID_ARGU, + '"device" must be set to either "cpu" or "npu" when dumping the offline model.', + ) + exec_type = self.cfg.exec[0] + model_key = ( + "saved_model" + if is_saved_model_scene(exec_type) + else next((key for key in device_handler if exec_type.endswith(key)), None) + ) + handler_name = device_handler.get(model_key) + else: + handler_name = self._online_model_comps_map.get(self.cfg.framework) + if handler_name: + getattr(self, handler_name)() + else: + raise MsprobeException(MsgConst.INVALID_ARGU, "Unsupported framework. Please check parameter settings.") + + def run_cli(self): + if self._is_offline_model: + if isinstance(self.cfg.input_shape, list) and len(self.cfg.input_shape) > 1: + for inshape in tqdm(self.cfg.input_shape, desc="Processing"): + self.cfg.input_shape = inshape + self.start() + self.step() + self.stop() + else: + self.start() + self.stop() + else: + self.start() + self.stop() + + def _construct_for_om_model(self): + self.actuator = Component.get(CompConst.OM_ACTUATOR_COMP)( + priority=20, + model_path=self.cfg.exec[0], + input_shape=self.cfg.input_shape, + input_path=self.cfg.input_path, + dir_pool=self.dir_pool, + rank=self.cfg.rank[0] if self.cfg.rank else 0, + ) + self.dumper = Component.get(CompConst.ACL_DUMPER_COMP)( + priority=10, + data_mode=self.cfg.data_mode, + model_path=self.cfg.exec[0], + rank=self.cfg.rank[0] if self.cfg.rank else 0, + ) + self.compatible = Component.get(CompConst.ACL_COMPATIBLE_COMP)(priority=12) + self.writer = Component.get(CompConst.DUMP_WRITER_COMP)( + priority=15, + task=self.cfg.task, + level=CfgConst.LEVEL_KERNEL, + framework=CfgConst.FRAMEWORK_OM, + summary_mode=self.cfg.summary_mode, + strategy=DumpConst.BIN_FORMAT, + buffer_size=self.cfg.buffer_size, + dir_pool=self.dir_pool, + ) + self.compatible.subscribe(self.dumper) + self.writer.subscribe(self.compatible) + + def _construct_for_atb_model(self): + self.actuator = Component.get(CompConst.ATB_ACTUATOR_COMP)( + priority=100, + dump_path=DirPool.get_msprobe_dir(), + task=self.cfg.task, + dump_level=self.cfg.level, + step=self.cfg.step, + rank=self.cfg.rank, + seed=self.cfg.seed, + log_level=self.cfg.log_level, + summary_mode=self.cfg.summary_mode, + buffer_size=self.cfg.buffer_size, + data_mode=self.cfg.data_mode, + dump_extra=self.cfg.dump_extra, + op_id=self.cfg.op_id, + op_name=self.cfg.list, + exec=self.cfg.exec, + ) + + def _construct_for_onnx_model(self): + self.actuator = Component.get(CompConst.ONNX_ACTUATOR_COMP)( + priority=20, + model_path=self.cfg.exec[0], + input_shape=self.cfg.input_shape, + input_path=self.cfg.input_path, + dir_pool=self.dir_pool, + onnx_fusion_switch=self.cfg.onnx_fusion_switch, + ) + self.dumper = Component.get(CompConst.ONNX_DUMPER_COMP)(priority=10, data_mode=self.cfg.data_mode) + self.writer = Component.get(CompConst.DUMP_WRITER_COMP)( + priority=15, + task=self.cfg.task, + level=CfgConst.LEVEL_KERNEL, + framework=CfgConst.FRAMEWORK_ONNX, + summary_mode=self.cfg.summary_mode, + strategy=DumpConst.NPY_FORMAT, + buffer_size=self.cfg.buffer_size, + dir_pool=self.dir_pool, + ) + self.writer.subscribe(self.dumper) + + def _construct_for_caffe_model(self): + self.actuator = Component.get(CompConst.CAFFE_ACTUATOR_COMP)( + priority=20, + model_path=self.cfg.exec[0], + input_shape=self.cfg.input_shape, + input_path=self.cfg.input_path, + dir_pool=self.dir_pool, + weight_path=self.cfg.weight_path, + ) + self.dumper = Component.get(CompConst.CAFFE_DUMPER_COMP)(priority=10, data_mode=self.cfg.data_mode) + self.writer = Component.get(CompConst.DUMP_WRITER_COMP)( + priority=15, + task=self.cfg.task, + level=CfgConst.LEVEL_MODULE, + framework=CfgConst.FRAMEWORK_CAFFE, + summary_mode=self.cfg.summary_mode, + strategy=DumpConst.NPY_FORMAT, + buffer_size=self.cfg.buffer_size, + dir_pool=self.dir_pool, + ) + self.writer.subscribe(self.dumper) + + def _construct_for_frozen_graph_model_on_cpu(self): + self.actuator = Component.get(CompConst.FROZEN_GRAPH_ACTUATOR_COMP_CPU)( + priority=20, + model_path=self.cfg.exec[0], + input_shape=self.cfg.input_shape, + input_path=self.cfg.input_path, + dir_pool=self.dir_pool, + ) + self.dumper = Component.get(CompConst.FROZEN_GRAPH_DUMPER_COMP_CPU)(priority=10, data_mode=self.cfg.data_mode) + self.writer = Component.get(CompConst.DUMP_WRITER_COMP)( + priority=15, + task=self.cfg.task, + level=CfgConst.LEVEL_KERNEL, + framework=CfgConst.FRAMEWORK_TF, + summary_mode=self.cfg.summary_mode, + strategy=DumpConst.NPY_FORMAT, + buffer_size=self.cfg.buffer_size, + dir_pool=self.dir_pool, + ) + self.writer.subscribe(self.dumper) + + def _construct_for_saved_model_on_cpu(self): + self.cfg.exec[0] = savedmodel2pb( + self.cfg.exec[0], self.cfg.saved_model_tag, self.cfg.saved_model_signature, DirPool.get_model_dir() + ) + self._construct_for_frozen_graph_model_on_cpu() + + def _construct_for_frozen_graph_model_on_npu(self): + self.actuator = Component.get(CompConst.FROZEN_GRAPH_ACTUATOR_COMP_NPU)( + priority=20, + model_path=self.cfg.exec[0], + input_shape=self.cfg.input_shape, + input_path=self.cfg.input_path, + data_mode=self.cfg.data_mode, + fsf=self.cfg.fusion_switch_file, + ) + self.setter = Component.get(CompConst.FROZEN_GRAPH_SET_GE_COMP_NPU)( + priority=10, + work_path=DirPool.get_msprobe_dir(), + dump_ge_graph=self.cfg.dump_ge_graph, + dump_graph_level=self.cfg.dump_graph_level, + dump_graph_path=DirPool.get_model_dir(), + ) + + def _construct_for_saved_model_on_npu(self): + self.cfg.exec[0] = savedmodel2pb( + self.cfg.exec[0], self.cfg.saved_model_tag, self.cfg.saved_model_signature, DirPool.get_model_dir() + ) + self._construct_for_frozen_graph_model_on_npu()