From 6e3f455e1cc79372ea0c87aee655f5f0d79360ba Mon Sep 17 00:00:00 2001 From: hehongzhe <935062458@qq.com> Date: Thu, 10 Jul 2025 15:48:23 +0800 Subject: [PATCH] log resource fix --- .../analysis/prof_common_func/_log.py | 19 ++++++------------- .../prof_view/_communication_parser.py | 4 ++-- .../analysis/prof_view/_integrate_parser.py | 4 ++-- .../analysis/prof_view/_kernel_view_parser.py | 4 ++-- .../analysis/prof_view/_memory_view_parser.py | 4 ++-- .../prof_view/_operator_view_parser.py | 4 ++-- .../analysis/prof_view/_stack_view_parser.py | 4 ++-- .../prof_view/_trace_step_time_parser.py | 4 ++-- .../analysis/prof_view/_trace_view_parser.py | 4 ++-- .../prof_view/cann_parse/_cann_analyze.py | 4 ++-- .../prof_view/cann_parse/_cann_export.py | 4 ++-- .../prepare_parse/_fwk_pre_parser.py | 4 ++-- .../prepare_parse/_relation_parser.py | 4 ++-- 13 files changed, 30 insertions(+), 37 deletions(-) diff --git a/torch_npu/profiler/analysis/prof_common_func/_log.py b/torch_npu/profiler/analysis/prof_common_func/_log.py index eba5db1af7f..0fecde48c41 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_log.py +++ b/torch_npu/profiler/analysis/prof_common_func/_log.py @@ -57,14 +57,15 @@ class ProfilerLogger: if cls._instance is not None: if cls._pid == os.getpid(): return - cls.destroy() # Create logs directory log_dir = os.path.join(output_dir, cls.DEFAULT_LOG_DIR) PathManager.make_dir_safety(log_dir) # Create logger - logger = logging.getLogger(cls.DEFAULT_LOGGER_NAME) + logger = logging.getLogger( + f"{cls.DEFAULT_LOGGER_NAME}_{custom_name}" if custom_name else cls.DEFAULT_LOGGER_NAME + ) logger.setLevel(cls.DEFAULT_LOG_LEVEL) logger.propagate = False @@ -112,19 +113,11 @@ class ProfilerLogger: def destroy(cls) -> None: """ Close and cleanup the logger. - To avoid the deadlock problem caused by directly calling close on handler in multi-process scenarios, close the - file descriptor manually. + To avoid the deadlock problem caused by directly calling close on handler in multi-process scenarios, + when child process updates instance, the parent process instance obtained by fork does not call this method. """ if cls._instance: for handler in cls._instance.handlers[:]: cls._instance.removeHandler(handler) - if cls._pid == os.getpid(): - handler.close() - else: - try: - if hasattr(handler.stream, 'fileno'): - fileno = handler.stream.fileno() - os.close(fileno) - except (OSError, AttributeError, ValueError): - logging.warning("Close profiler logger handler stream failed.") + handler.close() cls._instance = None diff --git a/torch_npu/profiler/analysis/prof_view/_communication_parser.py b/torch_npu/profiler/analysis/prof_view/_communication_parser.py index fff6d265d6c..e07f68b785b 100644 --- a/torch_npu/profiler/analysis/prof_view/_communication_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_communication_parser.py @@ -46,8 +46,6 @@ class CommunicationParser(BaseParser): self._root_node = TorchOpNode() self._kernel_dict = {} self.step_list = [] - ProfilerLogger.init(self._profiler_path, "CommunicationParser") - self.logger = ProfilerLogger.get_instance() @staticmethod def combine_size_distribution(op_dict: dict, total_dict: dict): @@ -63,6 +61,8 @@ class CommunicationParser(BaseParser): return round(dividend / divisor, 4) def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CommunicationParser") + self.logger = ProfilerLogger.get_instance() try: self._init_step_list(deps_data) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_integrate_parser.py b/torch_npu/profiler/analysis/prof_view/_integrate_parser.py index b6c545420c3..28472a24117 100644 --- a/torch_npu/profiler/analysis/prof_view/_integrate_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_integrate_parser.py @@ -26,10 +26,10 @@ class IntegrateParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "IntegrateParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "IntegrateParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py b/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py index 30ffd8be8ba..ded9a612c6c 100644 --- a/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py @@ -17,8 +17,6 @@ class KernelViewParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) self.step_range = [] - ProfilerLogger.init(self._profiler_path, "KernelViewParser") - self.logger = ProfilerLogger.get_instance() @classmethod def _project_map_for_headers(cls, input_headers: list): @@ -35,6 +33,8 @@ class KernelViewParser(BaseParser): return output_headers def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "KernelViewParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) self._init_step_range(deps_data) diff --git a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py index a82c3dc3c8f..47255efd09d 100644 --- a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py @@ -34,8 +34,6 @@ class MemoryViewParser(BaseParser): self.ge_record_list = [] self.memory_data = [] self.component_list = [] - ProfilerLogger.init(self._profiler_path, "MemoryViewParser") - self.logger = ProfilerLogger.get_instance() @staticmethod def _get_data_from_file(file_set: set, file_type_bean: any, bean_list: bool = False) -> list: @@ -73,6 +71,8 @@ class MemoryViewParser(BaseParser): return [cur_record_list, pta_ge_record_list] def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "MemoryViewParser") + self.logger = ProfilerLogger.get_instance() try: self.memory_data = deps_data.get(Constant.MEMORY_PREPARE, {}).get("memory_data", {}).get(Constant.Text, []) self.pta_record_list = deps_data.get(Constant.MEMORY_PREPARE, {}).get("pta_record_list", []) diff --git a/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py b/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py index f87e8dc8b85..7c10e9d4bf4 100644 --- a/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py @@ -22,10 +22,10 @@ class OperatorViewParser(BaseParser): self._torch_op_node = [] self._root_node = None self._kernel_dict = {} - ProfilerLogger.init(self._profiler_path, "OperatorViewParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "OperatorViewParser") + self.logger = ProfilerLogger.get_instance() try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) self._kernel_dict = deps_data.get(Constant.RELATION_PARSER, {}) diff --git a/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py b/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py index 2f793a8af8b..b4a85271d99 100644 --- a/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py @@ -23,10 +23,10 @@ class StackViewParser(BaseParser): self._root_node = None self._kernel_dict = {} self._metric = param_dict.get("metric") - ProfilerLogger.init(self._profiler_path, "StackViewParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "StackViewParser") + self.logger = ProfilerLogger.get_instance() try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py index 744e2cd8a6e..46093bec4e8 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py @@ -51,8 +51,6 @@ class TraceStepTimeParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) self.step_range = [] - ProfilerLogger.init(self._profiler_path, "TraceStepTimeParser") - self.logger = ProfilerLogger.get_instance() @classmethod def is_float_num(cls, num): @@ -165,6 +163,8 @@ class TraceStepTimeParser(BaseParser): FileManager.create_csv_file(output_path, print_time, file_name, self.title) def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TraceStepTimeParser") + self.logger = ProfilerLogger.get_instance() try: self._init_step_range(deps_data) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py index f90100e869f..c5e572e1bcf 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py @@ -27,8 +27,6 @@ class TraceViewParser(BaseParser): self._trace_data = [] self._torch_op_node = [] self._root_node = None - ProfilerLogger.init(self._profiler_path, "TraceViewParser") - self.logger = ProfilerLogger.get_instance() @staticmethod def _prune_trace_by_level(json_data: list) -> list: @@ -47,6 +45,8 @@ class TraceViewParser(BaseParser): return result def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TraceViewParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py index 8ef2072be61..da8037f982b 100644 --- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py +++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py @@ -34,10 +34,10 @@ class CANNAnalyzeParser(BaseParser): super().__init__(name, param_dict) self._cann_path = ProfilerPathManager.get_cann_path(self._profiler_path) self.msprof_path = shutil.which("msprof") - ProfilerLogger.init(self._profiler_path, "CANNAnalyzeParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CANNAnalyzeParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) if not os.path.isdir(self._cann_path): diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py index 6a703d0b954..7228525fae6 100644 --- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py +++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py @@ -41,10 +41,10 @@ class CANNExportParser(BaseParser): super().__init__(name, param_dict) self._cann_path = ProfilerPathManager.get_cann_path(self._profiler_path) self.msprof_path = shutil.which("msprof") - ProfilerLogger.init(self._profiler_path, "CANNExportParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CANNExportParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) if not os.path.isdir(self._cann_path): diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py index 6cc6f235165..939e06cf748 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py @@ -28,10 +28,10 @@ class TracePreParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "TracePreParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TracePreParser") + self.logger = ProfilerLogger.get_instance() try: fwk_trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data() trace_file_path = os.path.join(self._output_path, Constant.TRACE_VIEW_TEMP) if os.path.isdir( diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py index e6eb02ddb81..5e8a941de28 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py @@ -23,10 +23,10 @@ __all__ = [] class RelationParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "RelationParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "RelationParser") + self.logger = ProfilerLogger.get_instance() try: kernel_dict = FwkCANNRelationParser(self._profiler_path).get_kernel_dict() except Exception as e: -- Gitee