From 8744118d6321e97f646ef9fd0b2f3f9ca0af06a4 Mon Sep 17 00:00:00 2001 From: hid86196561 Date: Tue, 5 Mar 2024 15:46:28 +0800 Subject: [PATCH 1/4] wait_time_added --- .../overall_performance_comparator.py | 9 ++- .../compare_bean/profiling_info.py | 5 ++ .../profiling_parser/npu_profiling_parser.py | 56 +++++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py b/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py index bfc631c66c..7ad66c0fa7 100644 --- a/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py +++ b/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py @@ -37,9 +37,12 @@ class OverallPerformanceComparator(BaseComparator): self._headers.append('Mem Usage') base_col.append(f'{base_profiling_info.memory_used:.2f}G') comp_col.append(f'{comp_profiling_info.memory_used:.2f}G') - self._headers.extend(['Uncovered Communication Time']) - base_col.extend([f'{base_profiling_info.communication_not_overlapped: .3f}s']) - comp_col.extend([f'{comp_profiling_info.communication_not_overlapped: .3f}s']) + self._headers.extend(['Uncovered Communication Time(Wait Time)']) + if base_profiling_info.wait_time: + base_col.extend([f'{base_profiling_info.communication_not_overlapped: .3f}s({base_profiling_info.wait_time:.3f}s']) + else: + base_col.extend([f'{base_profiling_info.communication_not_overlapped: .3f}s( / )']) + comp_col.extend([f'{comp_profiling_info.communication_not_overlapped: .3f}s({comp_profiling_info.wait_time:.3f}s)']) if base_profiling_info.sdma_time or comp_profiling_info.sdma_time: self._headers.append('SDMA Time(Num)') base_col.append(f'{base_profiling_info.sdma_time:.3f}s({base_profiling_info.sdma_num})') diff --git a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py index 9184c790b7..b100e7ba98 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py +++ b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py @@ -18,6 +18,7 @@ class ProfilingInfo: self.fa_num_bwd = 0 self.compute_time = 0.0 self.communication_not_overlapped = 0.0 + self.wait_time = 0.0 self.memory_used = 0.0 self.e2e_time = 0.0 self.sdma_time = 0.0 @@ -33,6 +34,7 @@ class ProfilingInfo: self.vec_time = self.vec_time / 10 ** 6 self.compute_time = self.compute_time / 10 ** 6 self.communication_not_overlapped = self.communication_not_overlapped / 10 ** 6 + self.wait_time = self.wait_time / 10 ** 6 self.e2e_time = self.e2e_time / 10 ** 6 self.sdma_time = self.sdma_time / 10 ** 6 self.scheduling_time = self.scheduling_time / 10 ** 6 @@ -84,6 +86,9 @@ class ProfilingInfo: def update_comm_not_overlap(self, time: float): self.communication_not_overlapped += time + def update_comm_not_overlap_wait_time(self, time: float): + self.wait_time = time + def set_memory_used(self, memory: float): self.memory_used = memory diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index f872e52a53..dfc7d7d436 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -48,6 +48,7 @@ class NPUProfilingParser(BaseProfilingParser): if self._enable_profiling_compare: func_list.add(self._picking_overlap_analysis_data) func_list.add(self._picking_kernel_event) + func_list.add(self._picking_hccl_event) return list(func_list) def _update_memory_list(self): @@ -98,10 +99,65 @@ class NPUProfilingParser(BaseProfilingParser): self.__parse_kernel_csv() self.__add_sdma_time() self.__add_overlap_analysis_time() + self._picking_notify_wait_event_and_not_overlap_event() + self.__add_overlap_wait_time() self._result_data.overall_metrics.calculate_other_time() self._result_data.overall_metrics.calculate_schedule_time() self._result_data.overall_metrics.trans_time_to_s() + def _picking_notify_wait_event_and_not_overlap_event(self): + self.notify_event_cache = [] + self._not_overlaped_commu_event = [] + for event in self._commu_task_list: + if event.name == 'Notify_Wait' and event.args.get('rdma_type', 0) != 'RDMA_PAYLOAD_CHECK': + self.notify_event_cache.append(event) + for event in self._overlap_analysis: + if event.is_comm_not_overlap(): + self._not_overlaped_commu_event.append(event) + + def __add_overlap_wait_time(self): + notify_wait_event_dict = dict() + for notify_event in self.notify_event_cache: + if notify_event.tid in notify_wait_event_dict: + notify_wait_event_dict[notify_event.tid].append(notify_event) + else: + notify_wait_event_dict[notify_event.tid] = [notify_event] + total_time = 0 + for commu_event in self._not_overlaped_commu_event: + commu_event_start_time = float(commu_event.start_time) + commu_event_end_time = float(commu_event.end_time) + wait_time_list = [] + + for plane_id, events in notify_wait_event_dict.items(): + wait_time = 0 + idx = 0 + for notify_event in events: + notify_event_start_time = float(notify_event.start_time) + notify_event_end_time = float(notify_event.end_time) + if notify_event_start_time < commu_event_start_time and notify_event_end_time > commu_event_end_time: + wait_time = commu_event_end_time - commu_event_start_time + break + elif notify_event_start_time < commu_event_start_time and commu_event_start_time <= notify_event_end_time <= commu_event_end_time: + wait_time += notify_event_end_time - commu_event_start_time + idx += 1 + elif commu_event_start_time <= notify_event_start_time <= commu_event_end_time and notify_event_end_time > commu_event_end_time: + wait_time += commu_event_end_time - commu_event_start_time + break + elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= commu_event_end_time: + wait_time += notify_event_end_time - notify_event_start_time + elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= commu_event_end_time: + wait_time += notify_event_end_time - notify_event_start_time + idx += 1 + elif notify_event_end_time < commu_event_start_time: + idx += 1 + else: + break + + wait_time_list.append(wait_time) + notify_wait_event_dict[plane_id] = notify_wait_event_dict[plane_id][idx:] + total_time += max(wait_time_list) + self._result_data.overall_metrics.update_comm_not_overlap(total_time) + def _picking_hccl_event(self, event: TraceEventBean): if event.pid != self._hccl_pid or not event.is_x_mode(): return False -- Gitee From b96e83edc81b4583d4f485e210080e45e16d7a42 Mon Sep 17 00:00:00 2001 From: hid86196561 Date: Tue, 5 Mar 2024 19:35:57 +0800 Subject: [PATCH 2/4] logic_fix1 --- .../profiling_parser/npu_profiling_parser.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index dfc7d7d436..0a3a869f4b 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -133,19 +133,21 @@ class NPUProfilingParser(BaseProfilingParser): idx = 0 for notify_event in events: notify_event_start_time = float(notify_event.start_time) - notify_event_end_time = float(notify_event.end_time) - if notify_event_start_time < commu_event_start_time and notify_event_end_time > commu_event_end_time: + notify_event_end_time = float(notify_event.start_time) + notify_event.dur + if notify_event_start_time < commu_event_start_time and notify_event_end_time > \ + commu_event_end_time: wait_time = commu_event_end_time - commu_event_start_time break - elif notify_event_start_time < commu_event_start_time and commu_event_start_time <= notify_event_end_time <= commu_event_end_time: + elif notify_event_start_time < commu_event_start_time <= notify_event_end_time <= \ + commu_event_end_time: wait_time += notify_event_end_time - commu_event_start_time idx += 1 - elif commu_event_start_time <= notify_event_start_time <= commu_event_end_time and notify_event_end_time > commu_event_end_time: + elif commu_event_start_time <= notify_event_start_time <= commu_event_end_time < \ + notify_event_end_time: wait_time += commu_event_end_time - commu_event_start_time break - elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= commu_event_end_time: - wait_time += notify_event_end_time - notify_event_start_time - elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= commu_event_end_time: + elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= \ + commu_event_end_time: wait_time += notify_event_end_time - notify_event_start_time idx += 1 elif notify_event_end_time < commu_event_start_time: @@ -156,7 +158,7 @@ class NPUProfilingParser(BaseProfilingParser): wait_time_list.append(wait_time) notify_wait_event_dict[plane_id] = notify_wait_event_dict[plane_id][idx:] total_time += max(wait_time_list) - self._result_data.overall_metrics.update_comm_not_overlap(total_time) + self._result_data.overall_metrics.update_comm_not_overlap_wait_time(total_time) def _picking_hccl_event(self, event: TraceEventBean): if event.pid != self._hccl_pid or not event.is_x_mode(): -- Gitee From 6d0accbe729429ad3ef2a794be7ee0a5e023b3c0 Mon Sep 17 00:00:00 2001 From: hid86196561 Date: Tue, 5 Mar 2024 19:55:47 +0800 Subject: [PATCH 3/4] bug_fix --- .../compare_backend/profiling_parser/npu_profiling_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 0a3a869f4b..6fd99c3b4e 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -125,7 +125,7 @@ class NPUProfilingParser(BaseProfilingParser): total_time = 0 for commu_event in self._not_overlaped_commu_event: commu_event_start_time = float(commu_event.start_time) - commu_event_end_time = float(commu_event.end_time) + commu_event_end_time = float(commu_event.start_time) + commu_event.dur wait_time_list = [] for plane_id, events in notify_wait_event_dict.items(): -- Gitee From 5dc380cd96938f3d753301cb81df5817013262ee Mon Sep 17 00:00:00 2001 From: c30056266 Date: Wed, 6 Mar 2024 10:03:02 +0800 Subject: [PATCH 4/4] logic_minor_fix --- .../compare_backend/profiling_parser/npu_profiling_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 6fd99c3b4e..60c1c2dea1 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -124,9 +124,9 @@ class NPUProfilingParser(BaseProfilingParser): notify_wait_event_dict[notify_event.tid] = [notify_event] total_time = 0 for commu_event in self._not_overlaped_commu_event: + wait_time_list = [] commu_event_start_time = float(commu_event.start_time) commu_event_end_time = float(commu_event.start_time) + commu_event.dur - wait_time_list = [] for plane_id, events in notify_wait_event_dict.items(): wait_time = 0 @@ -144,7 +144,7 @@ class NPUProfilingParser(BaseProfilingParser): idx += 1 elif commu_event_start_time <= notify_event_start_time <= commu_event_end_time < \ notify_event_end_time: - wait_time += commu_event_end_time - commu_event_start_time + wait_time += commu_event_end_time - notify_event_start_time break elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= \ commu_event_end_time: -- Gitee