From fe5ca678aa178d9f83e7f8576fc39666908d50f8 Mon Sep 17 00:00:00 2001
From: hehongzhe <935062458@qq.com>
Date: Thu, 5 Jun 2025 15:23:38 +0800
Subject: [PATCH] wc

---
 .../csrc/core/npu/NPUCachingAllocator.cpp     |  9 +--
 .../csrc/core/npu/NPUWorkspaceAllocator.cpp   | 57 ++++---------------
 torch_npu/csrc/profiler/npu_profiler.cpp      |  2 -
 torch_npu/csrc/profiler/npu_profiler.h        |  6 --
 .../csrc/toolkit/profiler/inc/data_reporter.h |  3 -
 .../toolkit/profiler/src/data_reporter.cpp    |  2 +-
 .../analysis/prof_bean/_memory_use_bean.py    | 15 ++---
 .../analysis/prof_common_func/_constant.py    |  3 -
 .../analysis/prof_common_func/_log.py         |  6 +-
 .../prof_config/_fwk_file_parser_config.py    |  2 +-
 .../prof_view/_memory_prepare_parser.py       | 12 ++--
 .../analysis/prof_view/_memory_view_parser.py |  5 --
 .../prof_view/_trace_step_time_parser.py      |  5 +-
 .../analysis/prof_view/_trace_view_parser.py  |  5 ++
 .../prof_view/cann_parse/_cann_export.py      |  7 +++
 .../prepare_parse/_fwk_pre_parser.py          |  2 +
 16 files changed, 47 insertions(+), 94 deletions(-)

diff --git a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
index 74afc22031..cf5e903881 100644
--- a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
+++ b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
@@ -1450,8 +1450,7 @@ public:
             torch_npu::profiler::MstxMgr::GetInstance()->memRegionsRegister(msleaksDomain, &regionDesc);
         }
         torch_npu::profiler::reportMemoryDataToNpuProfiler({ static_cast<int8_t>(c10::DeviceType::PrivateUse1),
-            block->device, static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::CACHING_ALLOCATOR),
-            static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_MALLOC), allocator_type,
+            block->device, static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_MALLOC), allocator_type,
             reinterpret_cast<int64_t>(block->ptr), block->size,
             stats.allocated_bytes[static_cast<size_t>(StatType::AGGREGATE)].current,
             stats.reserved_bytes[static_cast<size_t>(StatType::AGGREGATE)].current,
@@ -1514,8 +1513,7 @@ public:
             torch_npu::profiler::MstxMgr::GetInstance()->memRegionsUnregister(msleaksDomain, orig_block_ptr);
         }
         torch_npu::profiler::reportMemoryDataToNpuProfiler({ static_cast<int8_t>(c10::DeviceType::PrivateUse1),
-            block->device, static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::CACHING_ALLOCATOR),
-            static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_FREE), allocator_type,
+            block->device, static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_FREE), allocator_type,
             reinterpret_cast<int64_t>(orig_block_ptr), -orig_block_size,
             stats.allocated_bytes[static_cast<size_t>(StatType::AGGREGATE)].current,
             stats.reserved_bytes[static_cast<size_t>(StatType::AGGREGATE)].current,
@@ -2281,8 +2279,7 @@ private:
         });
 #ifndef BUILD_LIBTORCH
         torch_npu::profiler::reportMemoryDataToNpuProfiler({ static_cast<int8_t>(c10::DeviceType::PrivateUse1),
-            block->device, static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::CACHING_ALLOCATOR),
-            static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_BLOCK_FREE), allocator_type,
+            block->device, static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_BLOCK_FREE), allocator_type,
             reinterpret_cast<int64_t>(orig_block_ptr), -original_block_size,
             stats.allocated_bytes[static_cast<size_t>(StatType::AGGREGATE)].current,
             stats.reserved_bytes[static_cast<size_t>(StatType::AGGREGATE)].current,
diff --git a/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp b/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp
index fa4e79ff7b..ba56ab495e 100644
--- a/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp
+++ b/torch_npu/csrc/core/npu/NPUWorkspaceAllocator.cpp
@@ -106,15 +106,14 @@ public:
                 torch_npu::profiler::reportMemoryDataToNpuProfiler({
                     static_cast<int8_t>(c10::DeviceType::PrivateUse1),
                     device,
-                    static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::WORKSPACE_ALLOCATOR),
                     static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_FREE),
                     static_cast<uint8_t>(torch_npu::profiler::MemoryAllocatorType::ALLOCATOR_INNER),
                     reinterpret_cast<int64_t>(block->data_ptr),
                     -block->size,
-                    stats.allocated_bytes.current,
-                    stats.reserved_bytes.current,
-                    stats.allocated_bytes.current,
-                    reinterpret_cast<int64_t>(stream)}
+                    get_mem_size(),
+                    0, // reserved_bytes not used
+                    0, // active_bytes not used
+                  reinterpret_cast<int64_t>(stream)}
                 );
 #endif
                 block->data_ptr = nullptr;
@@ -147,14 +146,13 @@ public:
             torch_npu::profiler::reportMemoryDataToNpuProfiler({
                 static_cast<int8_t>(c10::DeviceType::PrivateUse1),
                 device,
-                static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::WORKSPACE_ALLOCATOR),
                 static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_MALLOC),
                 static_cast<uint8_t>(torch_npu::profiler::MemoryAllocatorType::ALLOCATOR_INNER),
                 reinterpret_cast<int64_t>(block->data_ptr),
                 block->size,
-                stats.allocated_bytes.current,
-                stats.reserved_bytes.current,
-                stats.allocated_bytes.current,
+                get_mem_size(),
+                0, // reserved_bytes not used
+                0, // active_bytes not used
                 reinterpret_cast<int64_t>(stream)}
             );
             const c10_npu::impl::PyCallbackTrigger* trigger = c10_npu::impl::NPUTrace::getTrace();
@@ -167,46 +165,12 @@ public:
 
         allocated_size = block->size;
         update_stat(stats.allocated_bytes, block->size);
-#ifndef BUILD_LIBTORCH
-        torch_npu::profiler::reportMemoryDataToNpuProfiler({
-            static_cast<int8_t>(c10::DeviceType::PrivateUse1),
-            device,
-            static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::WORKSPACE_ALLOCATOR),
-            static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_MALLOC),
-            static_cast<uint8_t>(torch_npu::profiler::MemoryAllocatorType::ALLOCATOR_INNER),
-            reinterpret_cast<int64_t>(block->data_ptr),
-            block->size,
-            stats.allocated_bytes.current,
-            stats.reserved_bytes.current,
-            stats.allocated_bytes.current,
-            reinterpret_cast<int64_t>(stream)}
-        );
-#endif
         return block->data_ptr;
     }
 
     void free()
     {
         update_stat(stats.allocated_bytes, -allocated_size);
-#ifndef BUILD_LIBTORCH
-        for (const auto& block_pair : blocks) {
-            if (block_pair.second->data_ptr != nullptr) {
-                torch_npu::profiler::reportMemoryDataToNpuProfiler({
-                    static_cast<int8_t>(c10::DeviceType::PrivateUse1),
-                    device,
-                    static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::WORKSPACE_ALLOCATOR),
-                    static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_FREE),
-                    static_cast<uint8_t>(torch_npu::profiler::MemoryAllocatorType::ALLOCATOR_INNER),
-                    reinterpret_cast<int64_t>(block_pair.second->data_ptr),
-                    -allocated_size,
-                    stats.allocated_bytes.current,
-                    stats.reserved_bytes.current,
-                    stats.allocated_bytes.current,
-                    reinterpret_cast<int64_t>(block_pair.first)}
-                );
-            }
-        }
-#endif
     }
 
     // return to the system allocator
@@ -245,14 +209,13 @@ public:
                 torch_npu::profiler::reportMemoryDataToNpuProfiler({
                     static_cast<int8_t>(c10::DeviceType::PrivateUse1),
                     device,
-                    static_cast<uint8_t>(torch_npu::profiler::MemoryComponentType::WORKSPACE_ALLOCATOR),
                     static_cast<uint8_t>(torch_npu::profiler::MemoryDataType::MEMORY_FREE),
                     static_cast<uint8_t>(torch_npu::profiler::MemoryAllocatorType::ALLOCATOR_INNER),
                     reinterpret_cast<int64_t>(block_pair.second->data_ptr),
                     -block_pair.second->size,
-                    stats.allocated_bytes.current,
-                    stats.reserved_bytes.current,
-                    stats.allocated_bytes.current,
+                    get_mem_size(),
+                    0, // reserved_bytes not used
+                    0, // active_bytes not used
                     reinterpret_cast<int64_t>(block_pair.first)}
                 );
 #endif
diff --git a/torch_npu/csrc/profiler/npu_profiler.cpp b/torch_npu/csrc/profiler/npu_profiler.cpp
index 295eda9aea..9d9f0fdfa0 100644
--- a/torch_npu/csrc/profiler/npu_profiler.cpp
+++ b/torch_npu/csrc/profiler/npu_profiler.cpp
@@ -131,7 +131,6 @@ struct NpuProfilerThreadLocalState : public ProfilerStateBase {
                 device.index(),
                 0,
                 0,
-                0,
                 Utils::GetTid(),
                 Utils::GetPid()
             ));
@@ -390,7 +389,6 @@ void reportMemoryDataToNpuProfiler(const MemoryUsage& data)
         data.stream_ptr,
         data.device_type,
         data.device_index,
-        data.component_type,
         data.data_type,
         data.allocator_type,
         Utils::GetTid(),
diff --git a/torch_npu/csrc/profiler/npu_profiler.h b/torch_npu/csrc/profiler/npu_profiler.h
index 2127825bc1..05afa29d40 100644
--- a/torch_npu/csrc/profiler/npu_profiler.h
+++ b/torch_npu/csrc/profiler/npu_profiler.h
@@ -26,11 +26,6 @@ enum class NpuActivityType {
     NPU,
 };
 
-enum class MemoryComponentType {
-    CACHING_ALLOCATOR = 0,
-    WORKSPACE_ALLOCATOR,
-};
-
 enum class MemoryDataType {
     MEMORY_MALLOC = 0,
     MEMORY_FREE,
@@ -47,7 +42,6 @@ enum class MemoryAllocatorType {
 struct MemoryUsage {
     int8_t device_type{0};
     int8_t device_index{0};
-    uint8_t component_type{static_cast<uint8_t>(MemoryComponentType::CACHING_ALLOCATOR)};
     uint8_t data_type{static_cast<uint8_t>(MemoryDataType::MEMORY_INVALID)};
     uint8_t allocator_type{static_cast<uint8_t>(MemoryAllocatorType::ALLOCATOR_INVALID)};
     int64_t ptr{0};
diff --git a/torch_npu/csrc/toolkit/profiler/inc/data_reporter.h b/torch_npu/csrc/toolkit/profiler/inc/data_reporter.h
index 0eb9da1f35..7101c7e805 100644
--- a/torch_npu/csrc/toolkit/profiler/inc/data_reporter.h
+++ b/torch_npu/csrc/toolkit/profiler/inc/data_reporter.h
@@ -347,7 +347,6 @@ struct MemoryData : BaseReportData {
     int64_t stream_ptr{0};
     int8_t device_type{0};
     int8_t device_index{0};
-    uint8_t component_type{0};
     uint8_t data_type{0};
     uint8_t allocator_type{0};
     uint64_t thread_id{0};
@@ -362,7 +361,6 @@ struct MemoryData : BaseReportData {
         int64_t stream_ptr,
         int8_t device_type,
         int8_t device_index,
-        uint8_t component_type,
         uint8_t data_type,
         uint8_t allocator_type,
         uint64_t thread_id,
@@ -377,7 +375,6 @@ struct MemoryData : BaseReportData {
           stream_ptr(stream_ptr),
           device_type(device_type),
           device_index(device_index),
-          component_type(component_type),
           data_type(data_type),
           allocator_type(allocator_type),
           thread_id(thread_id),
diff --git a/torch_npu/csrc/toolkit/profiler/src/data_reporter.cpp b/torch_npu/csrc/toolkit/profiler/src/data_reporter.cpp
index 2cbce73a06..669ee8a4d1 100644
--- a/torch_npu/csrc/toolkit/profiler/src/data_reporter.cpp
+++ b/torch_npu/csrc/toolkit/profiler/src/data_reporter.cpp
@@ -93,7 +93,7 @@ std::vector<uint8_t> MemoryData::encode()
                               total_reserved, total_active, stream_ptr},
                              result);
     encodeFixedData<int8_t>({device_type, device_index}, result);
-    encodeFixedData<uint8_t>({component_type, data_type, allocator_type}, result);
+    encodeFixedData<uint8_t>({data_type, allocator_type}, result);
     encodeFixedData<uint64_t>({thread_id, process_id}, result);
 
     std::vector<uint8_t> resultTLV;
diff --git a/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py b/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py
index 0385af8d79..4e29f204e3 100644
--- a/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py
+++ b/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py
@@ -19,15 +19,14 @@ class MemoryEnum(Enum):
     STREAM_PTR = 6
     DEVICE_TYPE = 7
     DEVICE_INDEX = 8
-    COMPONENT_TYPE = 9
-    DATA_TYPE = 10
-    ALLOCATOR_TYPE = 11
-    THREAD_ID = 12
-    PROCESS_ID = 13
+    DATA_TYPE = 9
+    ALLOCATOR_TYPE = 10
+    THREAD_ID = 11
+    PROCESS_ID = 12
 
 
 class MemoryUseBean(CommonBean):
-    CONSTANT_STRUCT = "<7q2b3B2Q"
+    CONSTANT_STRUCT = "<7q2b2B2Q"
     NPU_ID = 20
     CPU_ID = 0
     INNER_ALLOCATOR = 0
@@ -89,10 +88,6 @@ class MemoryUseBean(CommonBean):
     def device_index(self) -> int:
         return int(self._constant_data[MemoryEnum.DEVICE_INDEX.value])
 
-    @property
-    def component_type(self) -> int:
-        return int(self._constant_data[MemoryEnum.COMPONENT_TYPE.value])
-
     @property
     def data_type(self) -> int:
         return int(self._constant_data[MemoryEnum.DATA_TYPE.value])
diff --git a/torch_npu/profiler/analysis/prof_common_func/_constant.py b/torch_npu/profiler/analysis/prof_common_func/_constant.py
index 56809c9b7f..3906f92e86 100644
--- a/torch_npu/profiler/analysis/prof_common_func/_constant.py
+++ b/torch_npu/profiler/analysis/prof_common_func/_constant.py
@@ -81,7 +81,6 @@ class Constant(object):
     GE = "GE"
     APP = "APP"
     PTA_GE = "PTA+GE"
-    WORKSPACE = "WORKSPACE"
     B_TO_KB = 1024.0
     KB_TO_MB = 1024.0
     B_TO_MB = 1024.0 ** 2
@@ -94,8 +93,6 @@ class Constant(object):
     MEMORY_MALLOC = 0
     MEMORY_FREE = 1
     MEMORY_BLOCK_FREE = 2
-    CACHING_TYPE = 0
-    WORKSPACE_TYPE = 1
 
     # profiler config
     CONFIG = "config"
diff --git a/torch_npu/profiler/analysis/prof_common_func/_log.py b/torch_npu/profiler/analysis/prof_common_func/_log.py
index 15ba7a80f9..0bf0acad2b 100644
--- a/torch_npu/profiler/analysis/prof_common_func/_log.py
+++ b/torch_npu/profiler/analysis/prof_common_func/_log.py
@@ -34,6 +34,7 @@ class ProfilerLogger:
     BACKUP_COUNT = 3
     # logger instance
     _instance = None
+    _pid = None
 
     @classmethod
     def get_instance(cls) -> logging.Logger:
@@ -54,7 +55,9 @@ class ProfilerLogger:
             RuntimeError: If logger initialization fails
         """
         if cls._instance is not None:
-            return
+            if cls._pid == os.getpid():
+                return
+            cls.destroy()
 
         # Create logs directory
         log_dir = os.path.join(output_dir, cls.DEFAULT_LOG_DIR)
@@ -89,6 +92,7 @@ class ProfilerLogger:
         logger.addHandler(file_handler)
 
         cls._instance = logger
+        cls._pid = os.getpid()
         logger.info("Profiler logger initialized at: %s", log_file)
 
     @classmethod
diff --git a/torch_npu/profiler/analysis/prof_config/_fwk_file_parser_config.py b/torch_npu/profiler/analysis/prof_config/_fwk_file_parser_config.py
index 714a620401..a4edff67c6 100644
--- a/torch_npu/profiler/analysis/prof_config/_fwk_file_parser_config.py
+++ b/torch_npu/profiler/analysis/prof_config/_fwk_file_parser_config.py
@@ -25,7 +25,7 @@ class FwkFileParserConfig:
     FILE_BEAN_MAP = {
         FileTag.TORCH_OP: {"bean": TorchOpBean, "is_tlv": True, "struct_size": 58},
         FileTag.OP_MARK: {"bean": OpMarkBean, "is_tlv": True, "struct_size": 40},
-        FileTag.MEMORY: {"bean": MemoryUseBean, "is_tlv": True, "struct_size": 77},
+        FileTag.MEMORY: {"bean": MemoryUseBean, "is_tlv": True, "struct_size": 76},
         FileTag.GC_RECORD: {"bean": GCRecordBean, "is_tlv": False, "struct_size": 24},
         FileTag.PYTHON_TRACER_FUNC: {"bean": PythonTracerFuncBean, "is_tlv": False, "struct_size": 33},
         FileTag.PYTHON_TRACER_HASH: {"bean": PythonTracerHashBean, "is_tlv": True, "struct_size": 8},
diff --git a/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py
index 4cb4ed35db..c1bc74de77 100644
--- a/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py
+++ b/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py
@@ -210,15 +210,13 @@ class MemoryPrepareParser(BaseParser):
             else:
                 op_name = self._find_real_op_name_of_record(dequeue_record, torch_ops)
             if records_len == 1:
-                if hasattr(records[0], 'component_type') and records[0].component_type == Constant.CACHING_TYPE:
-                    self._incomplete_num += 2
+                self._incomplete_num += 2
                 combine_data = [op_name, records[0].alloc_size_for_db, records[0].time_ns, None, None, None, None,
                                 records[0].total_allocated_for_db, records[0].total_reserved_for_db, records[0].total_active_for_db,
                                 None, None, None,
                                 records[0].stream_ptr, records[0].device_index]
             elif records_len == 2:
-                if hasattr(records[0], 'component_type') and records[0].component_type == Constant.CACHING_TYPE:
-                    self._incomplete_num += 1
+                self._incomplete_num += 1
                 active_release_time = records[1].time_ns if records[1].data_type == Constant.MEMORY_BLOCK_FREE else None
                 release_time = records[1].time_ns if records[1].data_type == Constant.MEMORY_FREE else None
                 duration_time = records[1].time_ns - records[0].time_ns if records[1].data_type == Constant.MEMORY_FREE else None
@@ -253,15 +251,13 @@ class MemoryPrepareParser(BaseParser):
             else:
                 op_name = self._find_real_op_name_of_record(dequeue_record, torch_ops)
             if records_len == 1:
-                if hasattr(records[0], 'component_type') and records[0].component_type == Constant.CACHING_TYPE:
-                    self._incomplete_num += 2
+                self._incomplete_num += 2
                 combine_data = [op_name, records[0].alloc_size, convert_ns2us_str(records[0].time_ns, "\t"), None, None, None, None,
                                 records[0].total_allocated, records[0].total_reserved, records[0].total_active,
                                 None, None, None,
                                 records[0].stream_ptr, records[0].device_tag]
             elif records_len == 2:
-                if hasattr(records[0], 'component_type') and records[0].component_type == Constant.CACHING_TYPE:
-                    self._incomplete_num += 1
+                self._incomplete_num += 1
                 active_release_time = convert_ns2us_str(records[1].time_ns, "\t") if records[1].data_type == Constant.MEMORY_BLOCK_FREE else None
                 release_time = convert_ns2us_str(records[1].time_ns, "\t") if records[1].data_type == Constant.MEMORY_FREE else None
                 duration_time = convert_ns2us_str(records[1].time_ns - records[0].time_ns, "\t") if records[1].data_type == Constant.MEMORY_FREE else None
diff --git a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py
index 04ef7c0e90..fa834e543b 100644
--- a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py
+++ b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py
@@ -53,11 +53,6 @@ class MemoryViewParser(BaseParser):
 
     @staticmethod
     def _combine_record(last_record, cur_record):
-        if hasattr(cur_record, 'component_type') and cur_record.component_type == Constant.WORKSPACE_TYPE:
-            cur_record_list = [Constant.WORKSPACE, convert_ns2us_str(cur_record.time_ns, tail="\t"),
-                               cur_record.total_allocated, cur_record.total_reserved, cur_record.total_active,
-                               cur_record.stream_ptr, cur_record.device_tag]
-            return [cur_record_list]
         cur_record_list = cur_record.row
         if last_record:
             pta_ge_record_list = [Constant.PTA_GE, convert_ns2us_str(cur_record.time_ns, tail="\t"),
diff --git a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py
index f7a873669d..398a1772fc 100644
--- a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py
+++ b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py
@@ -157,7 +157,10 @@ class TraceStepTimeParser(BaseParser):
                     [device, step, step_time['compute'], step_time['comunNotOverlp'], step_time['Overlp'],
                      step_time['comun'], step_time['free'], step_time['stage'], step_time['bubble'],
                      step_time['comunNotOverlpRec'], step_time['prepare']])
-        print_time.sort(key=lambda x: (x[0], int(x[1])))  # step is a string
+        if self.step_range:
+            print_time.sort(key=lambda x: (x[0], int(x[1])))  # step is a string
+        else:
+            print_time.sort(key=lambda x: (x[0]))  # step is None
         FileManager.create_csv_file(output_path, print_time, file_name, self.title)
 
     def run(self, deps_data: dict):
diff --git a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py
index f90100e869..78434859ce 100644
--- a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py
+++ b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py
@@ -29,6 +29,7 @@ class TraceViewParser(BaseParser):
         self._root_node = None
         ProfilerLogger.init(self._profiler_path, "TraceViewParser")
         self.logger = ProfilerLogger.get_instance()
+        self.logger.info("TraceViewParser init success")
 
     @staticmethod
     def _prune_trace_by_level(json_data: list) -> list:
@@ -48,8 +49,10 @@ class TraceViewParser(BaseParser):
 
     def run(self, deps_data: dict):
         try:
+            self.logger.info("TraceViewParser run success")
             ProfilerConfig().load_info(self._profiler_path)
             torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, [])
+            self.logger.info("TraceViewParser torch_op_node success")
             if torch_op_node:
                 self._root_node = torch_op_node[0]
                 self._torch_op_node = torch_op_node[1:]
@@ -60,9 +63,11 @@ class TraceViewParser(BaseParser):
         return Constant.SUCCESS, None
 
     def generate_view(self) -> None:
+        self.logger.info("TraceViewParser generate_view start")
         if not ProfilerPathManager.get_cann_path(self._profiler_path):
             self._trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data()
         else:
+            self.logger.info("TraceViewParser CANNFileParser start")
             msprof_timeline_data = CANNFileParser(self._profiler_path).get_timeline_all_data()
             self._trace_data.extend(
                 self._prune_trace_by_level(msprof_timeline_data))
diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py
index 49d4e7eb8f..53793e8e20 100644
--- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py
+++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py
@@ -97,9 +97,12 @@ class CANNTimelineParser(BaseParser):
     def __init__(self, name: str, param_dict: dict):
         super().__init__(name, param_dict)
         self._cann_path = ProfilerPathManager.get_cann_path(self._profiler_path)
+        ProfilerLogger.init(self._profiler_path, "CANNTimelineParser")
+        self.logger = ProfilerLogger.get_instance()
 
     def run(self, deps_data: dict):
         if not os.path.isdir(self._cann_path):
+            self.logger.info("CANN profiling data is not exist.")
             return Constant.SUCCESS, None
         ProfilerConfig().load_info(self._profiler_path)
         if Constant.Text in self._export_type:
@@ -108,18 +111,22 @@ class CANNTimelineParser(BaseParser):
                 if os.path.exists(output_path):
                     for file_name in os.listdir(output_path):
                         if file_name.endswith('.csv'):
+                            self.logger.info("CANN timeline data parsed successfully11.")
                             return Constant.SUCCESS, None
                 try:
                     time.sleep(Constant.SLEEP_TIME)
                 except InterruptedError:
+                    self.logger.info("Failed to parse CANN timeline data.")
                     return Constant.FAIL, None
         else:
             patten = r'^msprof_\d+\.db$'
             while True:
                 for file in os.listdir(self._cann_path):
                     if re.match(patten, file) and os.path.isfile(os.path.join(self._cann_path, file)):
+                        self.logger.info("CANN timeline data parsed successfully.")
                         return Constant.SUCCESS, None
                 try:
                     time.sleep(Constant.SLEEP_TIME)
                 except InterruptedError:
+                    self.logger.info("Failed to parse CANN timeline data11.")
                     return Constant.FAIL, None
diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py
index 490488d5e1..1f591fe6a0 100644
--- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py
+++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py
@@ -40,6 +40,7 @@ class TracePreParser(BaseParser):
         except Exception as e:
             self.logger.error("Failed to create prepare trace json, error: %s", str(e), exc_info=True)
             return Constant.FAIL, None
+        self.logger.info("TracePreParser success")
         return Constant.SUCCESS, None
 
 
@@ -56,4 +57,5 @@ class TreeBuildParser(BaseParser):
         except Exception as e:
             self.logger.error("Failed to build torch op tree, error: %s", str(e), exc_info=True)
             return Constant.FAIL, []
+        self.logger.info("TreeBuildParser torch_op_node success")
         return Constant.SUCCESS, torch_op_node
-- 
Gitee