From 61b2ec73c5862613a7ad5413a5f82d22122f52e1 Mon Sep 17 00:00:00 2001
From: zhouxianqi <13165993773@163.com>
Date: Wed, 20 Sep 2023 11:37:09 +0800
Subject: [PATCH] bug_fix_for_performance_analysis

---
 .../profiling_analysis/npu_parser.py          | 55 ++++++++++---------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py
index 5fca9d3ea32..34a191e44e5 100644
--- a/profiler/compare_tools/profiling_analysis/npu_parser.py
+++ b/profiler/compare_tools/profiling_analysis/npu_parser.py
@@ -43,7 +43,7 @@ class NpuProfilingParser:
         communication_time = 0
         min_ts = sys.float_info.max
         max_ts = sys.float_info.min
-        ts_flag = False  # 表明没有获取到compute time的耗时
+        is_cluster = False  # 表明没有获取到compute time的耗时
         data = FileReader.read_trace_file(self.npu_json_file)
         event_wait_sqe = defaultdict(list)
         ai_core_dict = defaultdict(list)
@@ -52,15 +52,15 @@ class NpuProfilingParser:
         for dic in data:
             self.get_ts_by_task_type(dic, event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res)
             if ('name' in dic) and (dic.get('name', '') == 'Computing'):
-                ts_flag = True
-                ts = dic.get('ts')
+                is_cluster = True
+                ts = float(dic.get('ts', 0))
                 dur = dic.get('dur')
                 compute_time += dur
                 min_ts = ts if ts < min_ts else min_ts
                 max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts
             if ('name' in dic) and (dic.get('name', '') == 'Communication(Not Overlapped)'):
-                ts_flag = True
-                ts = dic.get('ts')
+                is_cluster = True
+                ts = float(dic.get('ts'))
                 dur = dic.get('dur')
                 communication_time += dur
                 min_ts = ts if ts < min_ts else min_ts
@@ -69,25 +69,28 @@ class NpuProfilingParser:
         # AI_CORE和EVENT_WAIT_SQE共存为计算流
         compute_stream = []
         parallel_stream = []
-        # 不存在算子并行的情况
-        if len(ai_core_dict) == 1:
-            compute_stream.append(min(ai_core_dict.keys()))
-        elif len(ai_core_dict) == 2:  # 2个ai_core，存在并行流（当前最多2条算子计算流）
-            compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys())
-            parallel_stream = list(ai_core_dict.keys() - set(compute_stream))
-        else:
-            print('[WARNING] Npu trace json file lack of Stream info')
-            return
-        cs_event_wait_sqe_list = event_wait_sqe[compute_stream[0]]
-        if parallel_stream:
-            cs_ai_core_list = ai_core_dict[parallel_stream[0]]
-            sorted(cs_event_wait_sqe_list, key=lambda x: (x[0]))
-            sorted(cs_ai_core_list, key=lambda x: (x[0]))
-            self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list)
-        self.profiling_info.compute_time = compute_time / 10 ** 6 if ts_flag else ai_core_res[compute_stream[0]] / 10 ** 6
-        self.profiling_info.e2e_time = (max_ts - min_ts) / 10 ** 6 if ts_flag else (self.max_stream_ts - self.min_stream_ts) / 10 ** 6
+        if not is_cluster:
+            #单机单卡没有overlap analysis
+            if len(ai_core_dict) == 1:
+                compute_stream.append(min(ai_core_dict.keys()))
+            elif len(ai_core_dict) == 2:  # 2个ai_core，存在并行流（当前最多2条算子计算流）
+                compute_stream = list(event_wait_sqe.keys() & ai_core_dict.keys())
+                parallel_stream = list(ai_core_dict.keys() - set(compute_stream))
+            else:
+                print('[WARNING] Npu trace json file lack of Stream info')
+                return
+            cs_event_wait_sqe_list = event_wait_sqe[compute_stream[0]]
+            if parallel_stream:
+                cs_ai_core_list = ai_core_dict[parallel_stream[0]]
+                sorted(cs_event_wait_sqe_list, key=lambda x: (x[0]))
+                sorted(cs_ai_core_list, key=lambda x: (x[0]))
+                self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list)
+        self.profiling_info.compute_time = compute_time / 10 ** 6 if is_cluster else \
+            ai_core_res[compute_stream[0]] / 10 ** 6
+        self.profiling_info.e2e_time = (max_ts - min_ts) / 10 ** 6 if is_cluster else \
+            (self.max_stream_ts - self.min_stream_ts) / 10 ** 6
         self.profiling_info.communication_not_overlapped = communication_time / 10 ** 6 \
-            if ts_flag else (event_wait_sqe_res[compute_stream[0]] - self.parallel_time) / 10 ** 6
+            if is_cluster else (event_wait_sqe_res[compute_stream[0]] - self.parallel_time) / 10 ** 6
         time_required = self.profiling_info.compute_time + self.profiling_info.communication_not_overlapped
         if self.npu_step_time:
             self.profiling_info.scheduling_time = self.npu_step_time - time_required
@@ -102,9 +105,9 @@ class NpuProfilingParser:
         json_data = FileReader.read_trace_file(self.info_json)
         if not json_data:
             return
-        if "ProfilerActivity.CPU" not in json_data.get('config', {}).get('common_config', {}).get('activities', []):
+        if "ProfilerActivity.CPU" in json_data.get('config', {}).get('common_config', {}).get('activities', []):
             return
-        if 'Level0' != json_data.get('experimental_config', {}).get('_profiler_level', ''):
+        if 'Level0' != json_data.get('config', {}).get('experimental_config', {}).get('_profiler_level', ''):
             return
         self.profiling_info.minimal_profiling = True
 
@@ -180,7 +183,7 @@ class NpuProfilingParser:
             if args.get('Task Type') == 'EVENT_WAIT_SQE':
                 enent_wait_res[stream_id] += dur
                 event_wait_sqe[stream_id].append([ts, ts + dur])
-            elif args.get('Task Type') == 'AI_CORE':
+            elif args.get('Task Type') in ('AI_CORE', 'MIX_AIC', 'MIX_AIV', 'AI_CPU', 'AI_VECTOR_CORE', 'FFTS_PLUS'):
                 ai_core_res[stream_id] += dur
                 ai_core_dict[stream_id].append([ts, ts + dur])
             self.min_stream_ts = ts if ts < self.min_stream_ts else self.min_stream_ts
-- 
Gitee