From a316f7dd45e26c7ad4ca1d7f3587ac50c1831e02 Mon Sep 17 00:00:00 2001 From: yu-liang-bin Date: Tue, 2 Sep 2025 14:27:43 +0800 Subject: [PATCH] fix profiler db bug --- test/profiler/test_memory_profiler.py | 8 ++++---- torch_npu/profiler/analysis/_profiling_parser.py | 4 ++-- torch_npu/profiler/analysis/prof_common_func/_constant.py | 1 + .../profiler/analysis/prof_common_func/_db_manager.py | 2 ++ .../profiler/analysis/prof_parse/_fwk_file_parser.py | 7 +++++++ .../profiler/analysis/prof_view/_memory_prepare_parser.py | 1 + 6 files changed, 17 insertions(+), 6 deletions(-) diff --git a/test/profiler/test_memory_profiler.py b/test/profiler/test_memory_profiler.py index 10d02bb7f9..1ea5f40cb6 100644 --- a/test/profiler/test_memory_profiler.py +++ b/test/profiler/test_memory_profiler.py @@ -1233,13 +1233,13 @@ class TestMemoryProfilerE2E(TestCase): aten::detach 7 (GRADIENT) -> 7 (GRADIENT) -- Optimizer -------------------------------------------------------------------------------------------- + aten::detach 7 (GRADIENT) -> 7 (GRADIENT) + aten::detach 7 (GRADIENT) -> 7 (GRADIENT) aten::clone 7 (GRADIENT) -> 10 (OPTIMIZER_STATE) - aten::detach 10 (OPTIMIZER_STATE) -> 10 (OPTIMIZER_STATE) - aten::detach 10 (OPTIMIZER_STATE) -> 10 (OPTIMIZER_STATE) aten::add_.Tensor 2 (PARAMETER), 10 (OPTIMIZER_STATE) -> 2 (PARAMETER) + aten::detach 9 (GRADIENT) -> 9 (GRADIENT) + aten::detach 9 (GRADIENT) -> 9 (GRADIENT) aten::clone 9 (GRADIENT) -> 11 (OPTIMIZER_STATE) - aten::detach 11 (OPTIMIZER_STATE) -> 11 (OPTIMIZER_STATE) - aten::detach 11 (OPTIMIZER_STATE) -> 11 (OPTIMIZER_STATE) aten::add_.Tensor 3 (PARAMETER), 11 (OPTIMIZER_STATE) -> 3 (PARAMETER)""", ) diff --git a/torch_npu/profiler/analysis/_profiling_parser.py b/torch_npu/profiler/analysis/_profiling_parser.py index 724a282ae1..fceaeed6ea 100644 --- a/torch_npu/profiler/analysis/_profiling_parser.py +++ b/torch_npu/profiler/analysis/_profiling_parser.py @@ -120,7 +120,7 @@ class ProfilingParser: parser_list = [] unique_parser_set = set() for export_type in set(ProfilerConfig().export_type): - for parser in parser_config.get(export_type).get(self._analysis_type): + for parser in parser_config.get(export_type, {}).get(self._analysis_type, []): if parser in unique_parser_set: continue unique_parser_set.add(parser) @@ -128,7 +128,7 @@ class ProfilingParser: # when cann package support default export db, add db parser to parser list if CannPackageManager.is_support_default_export_db(): - for parser in parser_config.get(Constant.Db).get(self._analysis_type): + for parser in parser_config.get(Constant.Db, {}).get(self._analysis_type, []): if parser in unique_parser_set: continue unique_parser_set.add(parser) diff --git a/torch_npu/profiler/analysis/prof_common_func/_constant.py b/torch_npu/profiler/analysis/prof_common_func/_constant.py index b9a0ecbe21..9c8540c1ab 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_constant.py +++ b/torch_npu/profiler/analysis/prof_common_func/_constant.py @@ -104,6 +104,7 @@ class Constant(object): RANK_ID = "rank_id" COMMON_CONFIG = "common_config" ACTIVITIES = "activities" + CPU_ACTIVITIES = "ProfilerActivity.CPU" NPU_ACTIVITIES = "ProfilerActivity.NPU" EXPERIMENTAL_CONFIG = "experimental_config" PROFILER_LEVEL = '_profiler_level' diff --git a/torch_npu/profiler/analysis/prof_common_func/_db_manager.py b/torch_npu/profiler/analysis/prof_common_func/_db_manager.py index 593a44dd78..fd6e20bd86 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_db_manager.py +++ b/torch_npu/profiler/analysis/prof_common_func/_db_manager.py @@ -192,6 +192,8 @@ class BasicDb: def close(self) -> None: self.db_path = None DbManager.destroy_db_connect(self.conn, self.curs) + self.conn = None + self.curs = None def judge_table_exist(self, table_name: str) -> bool: return DbManager.judge_table_exist(self.curs, table_name) diff --git a/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py b/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py index ba79447f93..cd537793df 100644 --- a/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py +++ b/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py @@ -2,6 +2,7 @@ import os import re from collections import defaultdict +from .._profiler_config import ProfilerConfig from ..prof_bean._torch_op_bean import TorchOpBean from ..prof_common_func._binary_decoder import BinaryDecoder from ..prof_common_func._constant import Constant, contact_2num, DbConstant @@ -86,6 +87,8 @@ class FwkFileParser: return enqueue_data_list, dequeue_data_list def get_torch_op_tree_node(self, torch_op_data: list, enqueue_data: list = None) -> list: + if Constant.CPU_ACTIVITIES not in ProfilerConfig().activities: + return [] if not torch_op_data: self.logger.error("Get torch op tree node failed, the torch op data is empty.") return [] @@ -95,6 +98,8 @@ class FwkFileParser: return result_data def get_fwk_trace_data(self, torch_op_data: list, enqueue_data_list: list, dequeue_data_list: list) -> list: + if Constant.CPU_ACTIVITIES not in ProfilerConfig().activities: + return [] if torch_op_data: pid = torch_op_data[0].pid elif enqueue_data_list or dequeue_data_list: @@ -193,6 +198,8 @@ class FwkFileParser: start_connection_id += 1 def get_fwk_api(self, torch_op_data: list, enqueue_data_list: list, dequeue_data_list: list) -> dict: + if Constant.CPU_ACTIVITIES not in ProfilerConfig().activities: + return {} if torch_op_data: pid = torch_op_data[0].pid elif enqueue_data_list or dequeue_data_list: diff --git a/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py index 1111b6566c..f49a7df78e 100644 --- a/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py @@ -136,6 +136,7 @@ class MemoryPrepareParser(BaseParser): self.memory_data.setdefault(Constant.Text, self._complete_record_entry(pid_mem_buf, torch_ops)) if CannPackageManager.is_support_default_export_db(): self.memory_data.setdefault(Constant.Db, self._complete_record_entry_for_db(pid_mem_buf, torch_ops)) + return if Constant.Db in self._export_type: self.memory_data.setdefault(Constant.Db, self._complete_record_entry_for_db(pid_mem_buf, torch_ops)) -- Gitee