diff --git a/torch_npu/profiler/analysis/prof_common_func/_log.py b/torch_npu/profiler/analysis/prof_common_func/_log.py index eba5db1af7f74910d1afd3a1fcf47bfb2a928098..0fecde48c41b465cf04eff26282a02911655c032 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_log.py +++ b/torch_npu/profiler/analysis/prof_common_func/_log.py @@ -57,14 +57,15 @@ class ProfilerLogger: if cls._instance is not None: if cls._pid == os.getpid(): return - cls.destroy() # Create logs directory log_dir = os.path.join(output_dir, cls.DEFAULT_LOG_DIR) PathManager.make_dir_safety(log_dir) # Create logger - logger = logging.getLogger(cls.DEFAULT_LOGGER_NAME) + logger = logging.getLogger( + f"{cls.DEFAULT_LOGGER_NAME}_{custom_name}" if custom_name else cls.DEFAULT_LOGGER_NAME + ) logger.setLevel(cls.DEFAULT_LOG_LEVEL) logger.propagate = False @@ -112,19 +113,11 @@ class ProfilerLogger: def destroy(cls) -> None: """ Close and cleanup the logger. - To avoid the deadlock problem caused by directly calling close on handler in multi-process scenarios, close the - file descriptor manually. + To avoid the deadlock problem caused by directly calling close on handler in multi-process scenarios, + when child process updates instance, the parent process instance obtained by fork does not call this method. """ if cls._instance: for handler in cls._instance.handlers[:]: cls._instance.removeHandler(handler) - if cls._pid == os.getpid(): - handler.close() - else: - try: - if hasattr(handler.stream, 'fileno'): - fileno = handler.stream.fileno() - os.close(fileno) - except (OSError, AttributeError, ValueError): - logging.warning("Close profiler logger handler stream failed.") + handler.close() cls._instance = None diff --git a/torch_npu/profiler/analysis/prof_view/_communication_parser.py b/torch_npu/profiler/analysis/prof_view/_communication_parser.py index fff6d265d6ceb5198681e78956b6268efc732cb9..e07f68b785b31eb509602a99a12760fad476a5f3 100644 --- a/torch_npu/profiler/analysis/prof_view/_communication_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_communication_parser.py @@ -46,8 +46,6 @@ class CommunicationParser(BaseParser): self._root_node = TorchOpNode() self._kernel_dict = {} self.step_list = [] - ProfilerLogger.init(self._profiler_path, "CommunicationParser") - self.logger = ProfilerLogger.get_instance() @staticmethod def combine_size_distribution(op_dict: dict, total_dict: dict): @@ -63,6 +61,8 @@ class CommunicationParser(BaseParser): return round(dividend / divisor, 4) def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CommunicationParser") + self.logger = ProfilerLogger.get_instance() try: self._init_step_list(deps_data) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_integrate_parser.py b/torch_npu/profiler/analysis/prof_view/_integrate_parser.py index b6c545420c3bb961640c7ef25dc54e8050fad6ae..28472a241177ed4f8f13c7b090e02a98db1113c2 100644 --- a/torch_npu/profiler/analysis/prof_view/_integrate_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_integrate_parser.py @@ -26,10 +26,10 @@ class IntegrateParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "IntegrateParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "IntegrateParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py b/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py index 30ffd8be8ba46e0b8cc5ac1300c4eba389211eaa..ded9a612c6cfd98a7076fb749457e0c3da9aa44c 100644 --- a/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py @@ -17,8 +17,6 @@ class KernelViewParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) self.step_range = [] - ProfilerLogger.init(self._profiler_path, "KernelViewParser") - self.logger = ProfilerLogger.get_instance() @classmethod def _project_map_for_headers(cls, input_headers: list): @@ -35,6 +33,8 @@ class KernelViewParser(BaseParser): return output_headers def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "KernelViewParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) self._init_step_range(deps_data) diff --git a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py index a82c3dc3c8f08ebe6875f0b7a5e59730c6cf4e6e..47255efd09dbdca635e4888fd575f311fbcff5ef 100644 --- a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py @@ -34,8 +34,6 @@ class MemoryViewParser(BaseParser): self.ge_record_list = [] self.memory_data = [] self.component_list = [] - ProfilerLogger.init(self._profiler_path, "MemoryViewParser") - self.logger = ProfilerLogger.get_instance() @staticmethod def _get_data_from_file(file_set: set, file_type_bean: any, bean_list: bool = False) -> list: @@ -73,6 +71,8 @@ class MemoryViewParser(BaseParser): return [cur_record_list, pta_ge_record_list] def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "MemoryViewParser") + self.logger = ProfilerLogger.get_instance() try: self.memory_data = deps_data.get(Constant.MEMORY_PREPARE, {}).get("memory_data", {}).get(Constant.Text, []) self.pta_record_list = deps_data.get(Constant.MEMORY_PREPARE, {}).get("pta_record_list", []) diff --git a/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py b/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py index f87e8dc8b85e7f35097afd2666194f7cd0311b68..7c10e9d4bf45c2881fb8bd04ae3c2b1124f578c5 100644 --- a/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py @@ -22,10 +22,10 @@ class OperatorViewParser(BaseParser): self._torch_op_node = [] self._root_node = None self._kernel_dict = {} - ProfilerLogger.init(self._profiler_path, "OperatorViewParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "OperatorViewParser") + self.logger = ProfilerLogger.get_instance() try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) self._kernel_dict = deps_data.get(Constant.RELATION_PARSER, {}) diff --git a/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py b/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py index 2f793a8af8b611559613799a004531224c366590..b4a85271d99034e55936d682e9b4748f6251cf11 100644 --- a/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py @@ -23,10 +23,10 @@ class StackViewParser(BaseParser): self._root_node = None self._kernel_dict = {} self._metric = param_dict.get("metric") - ProfilerLogger.init(self._profiler_path, "StackViewParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "StackViewParser") + self.logger = ProfilerLogger.get_instance() try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py index 744e2cd8a6e1a42b9e9e813f5cb27c51cd34ce61..46093bec4e8e2cbe50af5590be96f37ad9ac574f 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py @@ -51,8 +51,6 @@ class TraceStepTimeParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) self.step_range = [] - ProfilerLogger.init(self._profiler_path, "TraceStepTimeParser") - self.logger = ProfilerLogger.get_instance() @classmethod def is_float_num(cls, num): @@ -165,6 +163,8 @@ class TraceStepTimeParser(BaseParser): FileManager.create_csv_file(output_path, print_time, file_name, self.title) def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TraceStepTimeParser") + self.logger = ProfilerLogger.get_instance() try: self._init_step_range(deps_data) self.generate_view() diff --git a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py index f90100e869fd4c4ea92661dd2183b8fd20808412..c5e572e1bcfeba5ecaa4c4e6db93b47c896392eb 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py @@ -27,8 +27,6 @@ class TraceViewParser(BaseParser): self._trace_data = [] self._torch_op_node = [] self._root_node = None - ProfilerLogger.init(self._profiler_path, "TraceViewParser") - self.logger = ProfilerLogger.get_instance() @staticmethod def _prune_trace_by_level(json_data: list) -> list: @@ -47,6 +45,8 @@ class TraceViewParser(BaseParser): return result def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TraceViewParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py index 8ef2072be611814bb0a604685b957745d8d221fa..da8037f982bbc2ba77f18a3aa5928565bf45a28e 100644 --- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py +++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py @@ -34,10 +34,10 @@ class CANNAnalyzeParser(BaseParser): super().__init__(name, param_dict) self._cann_path = ProfilerPathManager.get_cann_path(self._profiler_path) self.msprof_path = shutil.which("msprof") - ProfilerLogger.init(self._profiler_path, "CANNAnalyzeParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CANNAnalyzeParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) if not os.path.isdir(self._cann_path): diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py index 6a703d0b954ecca3a58621cd940b23f7726dc27c..7228525fae6d03a8d41a2f50b6ca9094fee8070b 100644 --- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py +++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py @@ -41,10 +41,10 @@ class CANNExportParser(BaseParser): super().__init__(name, param_dict) self._cann_path = ProfilerPathManager.get_cann_path(self._profiler_path) self.msprof_path = shutil.which("msprof") - ProfilerLogger.init(self._profiler_path, "CANNExportParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CANNExportParser") + self.logger = ProfilerLogger.get_instance() try: ProfilerConfig().load_info(self._profiler_path) if not os.path.isdir(self._cann_path): diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py index 6cc6f235165107299886fb4cf936e927dbd687b4..939e06cf748ba4a011a9a33b4ded585fe04f3310 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py @@ -28,10 +28,10 @@ class TracePreParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "TracePreParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TracePreParser") + self.logger = ProfilerLogger.get_instance() try: fwk_trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data() trace_file_path = os.path.join(self._output_path, Constant.TRACE_VIEW_TEMP) if os.path.isdir( diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py index e6eb02ddb81d7ffce69d4e2d60899beb62012c61..5e8a941de2873cf071baa412a50d964978fce539 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py @@ -23,10 +23,10 @@ __all__ = [] class RelationParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "RelationParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "RelationParser") + self.logger = ProfilerLogger.get_instance() try: kernel_dict = FwkCANNRelationParser(self._profiler_path).get_kernel_dict() except Exception as e: