From 104e8eda6d79e4d4fe4794feaea1cd572c89a026 Mon Sep 17 00:00:00 2001 From: sunboquan Date: Wed, 6 Sep 2023 11:16:41 +0800 Subject: [PATCH] git bug fix --- .../profiling_analysis/gpu_parser.py | 8 ++++---- .../profiling_analysis/npu_parser.py | 18 +++++++++++++----- .../compare_tools/utils/profiling_parser.py | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/profiler/compare_tools/profiling_analysis/gpu_parser.py b/profiler/compare_tools/profiling_analysis/gpu_parser.py index 61cd4f41bf..4cefc94ccf 100644 --- a/profiler/compare_tools/profiling_analysis/gpu_parser.py +++ b/profiler/compare_tools/profiling_analysis/gpu_parser.py @@ -53,14 +53,14 @@ class GpuProfilingParser: cat = event.get('cat') if cat.lower() != 'kernel': continue - if 'nccl' in name: + if 'nccl' in name.lower(): for timestep in range(ts + 1, ts + dur + 1): marks[str(timestep)] += 1 # mark this timestep in communication stream continue else: for timestep in range(ts + 1, ts + dur + 1): marks[str(timestep)] += -100 # mark this timestep in compute stream - if 'gemm' in name: + if 'gemm' in name.lower(): cube_time += float(dur) all_op_time += float(dur) op_list.append([ts, name, cat, dur]) @@ -90,7 +90,7 @@ class GpuProfilingParser: def parse_memory_reserved(self): memories = [ event.get('args').get('Total Reserved') for event in self.trace_events - if event.get('name') == '[memory]' and event.get('args').get('Device Id') >= 0 + if event.get('name', '').lower() == '[memory]' and event.get('args').get('Device Id') >= 0 ] if not memories: print("Gpu profiling data doesn't contain memory info") @@ -100,7 +100,7 @@ class GpuProfilingParser: def infer_compute_stream_id(self): kernel_stream_ids = [] for event in self.trace_events: - is_kernel_exec_event = event.get('cat') == 'Kernel' and 'nccl' not in event.get('name') + is_kernel_exec_event = event.get('cat', '').lower() == 'kernel' and 'nccl' not in event.get('name', '').lower() has_stream_id_event = event.get('args') and event.get('args').get('stream') if is_kernel_exec_event and has_stream_id_event: kernel_stream_ids.append(event.get('args').get('stream')) diff --git a/profiler/compare_tools/profiling_analysis/npu_parser.py b/profiler/compare_tools/profiling_analysis/npu_parser.py index bc4d21145f..d0547c04c9 100644 --- a/profiler/compare_tools/profiling_analysis/npu_parser.py +++ b/profiler/compare_tools/profiling_analysis/npu_parser.py @@ -38,6 +38,7 @@ class NpuProfilingParser: print('Npu trace json file is not available.') return compute_time = 0 + communication_time = 0 min_ts = sys.float_info.max max_ts = sys.float_info.min ts_flag = False # 表明没有获取到compute time的耗时 @@ -48,13 +49,21 @@ class NpuProfilingParser: ai_core_res = defaultdict(float) for dic in data: self.get_ts_by_task_type(dic, event_wait_sqe, ai_core_dict, event_wait_sqe_res, ai_core_res) - if ('name' in dic) and (dic.get('name') == 'compute_time'): + if ('name' in dic) and (dic.get('name', '') == 'Computing'): ts_flag = True ts = dic.get('ts') dur = dic.get('dur') compute_time += dur min_ts = ts if ts < min_ts else min_ts max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts + if ('name' in dic) and (dic.get('name', '') == 'Communication(Not Overlapped)'): + ts_flag = True + ts = dic.get('ts') + dur = dic.get('dur') + communication_time += dur + min_ts = ts if ts < min_ts else min_ts + max_ts = (ts + dur) if (ts + dur) > max_ts else max_ts + # AI_CORE和EVENT_WAIT_SQE共存为计算流 compute_stream = [] parallel_stream = [] @@ -75,10 +84,9 @@ class NpuProfilingParser: self.parallel_time = self.interval_intersection(cs_event_wait_sqe_list, cs_ai_core_list) self.profiling_info.compute_time = compute_time / 10 ** 6 if ts_flag else ai_core_res[compute_stream[0]] / 10 ** 6 self.profiling_info.e2e_time = (max_ts - min_ts) / 10 ** 6 if ts_flag else (self.max_aicore_ts - self.min_aicore_ts) / 10 ** 6 - self.profiling_info.communication_not_overlapped = (event_wait_sqe_res[compute_stream[0]] - - self.parallel_time) / 10 ** 6 - time_required = (self.profiling_info.cube_time + self.profiling_info.vector_time) + \ - self.profiling_info.communication_not_overlapped + self.profiling_info.communication_not_overlapped = communication_time / 10 ** 6 \ + if ts_flag else (event_wait_sqe_res[compute_stream[0]] - self.parallel_time) / 10 ** 6 + time_required = self.profiling_info.compute_time + self.profiling_info.communication_not_overlapped if self.npu_step_time: self.profiling_info.scheduling_time = self.npu_step_time - time_required else: diff --git a/profiler/compare_tools/utils/profiling_parser.py b/profiler/compare_tools/utils/profiling_parser.py index 8a94cb695d..928e5da635 100644 --- a/profiler/compare_tools/utils/profiling_parser.py +++ b/profiler/compare_tools/utils/profiling_parser.py @@ -91,7 +91,7 @@ class GPUProfilingParser(ProfilingParser): flow_start_dict[event.get("id")] = event elif event.get("cat") == flow_cat and event.get("ph") == "f": flow_end_dict[event.get("id")] = event - elif event.get("cat", "").capitalize() == "Kernel".capitalize(): + elif event.get("cat", "").lower() == "kernel": kernel_dict["{}-{}-{}".format(event.get("pid"), event.get("tid"), event.get("ts"))] = event for flow_id, start_flow in flow_start_dict.items(): @@ -138,7 +138,7 @@ class GPUProfilingParser(ProfilingParser): json_data = FileReader.read_trace_file(self._json_path) total_events = json_data.get("traceEvents", []) for data in total_events: - if data.get("cat", "") == "Kernel" and data.get("name", "").split("_")[0] == "ncclKernel": + if data.get("cat", "").lower() == "kernel" and data.get("name", "").split("_")[0].lower() == "ncclkernel": self._communication_data.append(data) -- Gitee