From a9355b6272f710d15ba81059d64d52effc175335 Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 22 Aug 2024 10:21:43 +0800 Subject: [PATCH] add_overall_metrics --- .../module_visualization/graph/prof_node.py | 68 ++++++++++++++----- .../graph_build/prof_graph_builder.py | 2 + .../prof_parse/prof_data_pre_process.py | 10 ++- profiler/prof_common/constant.py | 1 + profiler/prof_common/kernel_bean.py | 4 ++ profiler/prof_common/trace_event_bean.py | 6 ++ 6 files changed, 73 insertions(+), 18 deletions(-) diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index 3588a8b81..60df7bd0e 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -24,6 +24,9 @@ class ProfNode(BaseNode): self._kernel_total_list = [] self._communication_total_list = [] self._precision_index = 1 + self._computing_time = 0 + self._uncovered_comm_time = 0 + self._free_time = 0 @property def node_id(self): @@ -37,11 +40,19 @@ class ProfNode(BaseNode): @property def total_kernels(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return [kernel for node in self.child_nodes for kernel in node.total_kernels] return self._kernel_total_list + @property + def total_communications(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return [comm for node in self.child_nodes for comm in node.total_communications] + return self._communication_total_list + @property def host_total_dur(self): - if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: + if self.node_type == Constant.VIRTUAL_TYPE: return sum((node.host_total_dur for node in self.child_nodes)) return self._event.dur @@ -53,9 +64,7 @@ class ProfNode(BaseNode): @property def device_total_dur(self): - if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: - return sum((node.device_total_dur for node in self.child_nodes)) - return sum((kernel.dur for kernel in self._kernel_total_list)) + return sum((kernel.dur for kernel in self.total_kernels)) @property def device_self_dur(self): @@ -82,24 +91,27 @@ class ProfNode(BaseNode): @property def communication_data(self) -> list: - if self.node_type == Constant.VIRTUAL_TYPE: - return [comm for node in self.child_nodes for comm in node.communication_data] - return [[comm.name, comm.dur] for comm in self._communication_total_list] + return [[comm.name, comm.dur] for comm in self.total_communications] @property def overall_data(self): - return {"Computing Time(ms)": 1, "Uncovered Communication Time(ms)": 1, "Free Time(ms)": 1} + return {"Computing Time(us)": round(self._computing_time, 3), + "Uncovered Communication Time(us)": round(self._uncovered_comm_time, 3), + "Free Time(us)": round(self._free_time, 3)} @property def data(self): - return {"Input Data": self.input_data, - "precision_index": self.precision_index, - "Host Self Duration(us)": round(self.host_self_dur, 2), - "Host Total Duration(us)": round(self.host_total_dur, 2), - "Device Self Duration(us)": round(self.device_self_dur, 2), - "Device Total Duration(us)": round(self.device_total_dur, 2), - "kernels": self.kernel_data, - "communications": self.communication_data} + data = { + "Overall Metrics": self.overall_data} if self.node_type != Constant.OPERATOR_TYPE else {} + data.update({"Input Data": self.input_data, + "precision_index": self.precision_index, + "Host Self Duration(us)": round(self.host_self_dur, 3), + "Host Total Duration(us)": round(self.host_total_dur, 3), + "Device Self Duration(us)": round(self.device_self_dur, 3), + "Device Total Duration(us)": round(self.device_total_dur, 3), + "kernels": self.kernel_data, + "Communications": self.communication_data}) + return data @property def info(self): @@ -141,3 +153,27 @@ class ProfNode(BaseNode): diff_dur = max_dur - min_dur for node in self.child_nodes: node.precision_index = 1 - (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 + + def update_overall_metrics(self, overlap_analysis_event): + if not self.total_kernels and not self.total_communications: + return + kernel_start = min((kernel.start_time for kernel in self.total_kernels)) if self.total_kernels else float("inf") + kernel_end = max((kernel.end_time for kernel in self.total_kernels)) if self.total_kernels else float("-inf") + comm_start = min((comm.start_time for comm in self.total_communications)) \ + if self.total_communications else float("inf") + comm_end = max((comm.end_time for comm in self.total_communications)) \ + if self.total_communications else float("-inf") + device_start = min(kernel_start, comm_start) + device_end = max(kernel_end, comm_end) + for event in overlap_analysis_event: + if event.start_time >= device_end: + continue + if event.end_time <= device_start: + continue + duration_us = float(min(device_end, event.end_time) - max(device_start, event.start_time)) / 1000 + if event.name == "Computing": + self._computing_time += duration_us + elif event.name == "Free": + self._free_time += duration_us + elif event.name == "Communication(Not Overlapped)": + self._uncovered_comm_time += duration_us diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index 331e0cb05..f792fa65a 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -96,6 +96,8 @@ class ProfGraphBuilder: all_nodes.extend(virtual_nodes) for node in all_nodes: node.update_child_precision_index() + if node.node_type != Constant.OPERATOR_TYPE: + node.update_overall_metrics(self._prof_data.get(Constant.OVERLAP_ANALYSIS_EVENT, [])) return all_nodes def find_bwd_module(self) -> list: diff --git a/profiler/module_visualization/prof_parse/prof_data_pre_process.py b/profiler/module_visualization/prof_parse/prof_data_pre_process.py index c16daaecd..f91e40dbd 100644 --- a/profiler/module_visualization/prof_parse/prof_data_pre_process.py +++ b/profiler/module_visualization/prof_parse/prof_data_pre_process.py @@ -28,8 +28,10 @@ class ProfDataPreProcess: self._kernel_details_path = "" self._kernel_pid = None self._hccl_pid = None + self._overlap_analysis_pid = None self._result_data = {Constant.CPU_OP_EVENT: [], Constant.MODULE_EVENT: [], Constant.KERNEL_EVENT: [], - Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: []} + Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: [], + Constant.OVERLAP_ANALYSIS_EVENT: []} @staticmethod def _check_trace_data(trace_data): @@ -72,7 +74,7 @@ class ProfDataPreProcess: self._check_trace_data(trace_data) iter_trace_data = [TraceEventBean(data) for data in trace_data] for event in iter_trace_data: - if self._kernel_pid is not None and self._hccl_pid is not None: + if self._kernel_pid is not None and self._hccl_pid is not None and self._overlap_analysis_pid is not None: break if not event.is_meta(): continue @@ -80,6 +82,8 @@ class ProfDataPreProcess: self._kernel_pid = event.pid elif event.is_hccl_process(): self._hccl_pid = event.pid + elif event.is_overlap_analysis_process(): + self._overlap_analysis_pid = event.pid if self._kernel_pid is None: msg = "There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." raise RuntimeError(msg) @@ -108,6 +112,8 @@ class ProfDataPreProcess: self._result_data[Constant.KERNEL_EVENT].append(event) elif event.is_hccl_event(self._hccl_pid): self._result_data[Constant.HCCL_EVENT].append(event) + elif event.is_overlap_analysis_event(self._overlap_analysis_pid): + self._result_data[Constant.OVERLAP_ANALYSIS_EVENT].append(event) def _parse_kernel_details(self): if not self._kernel_details_path: diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 90ec6d006..445d361d3 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -24,6 +24,7 @@ class Constant(object): TORCH_TO_NPU_FLOW = "torch_to_device" KERNEL_EVENT = "kernel_event" HCCL_EVENT = "hccl_event" + OVERLAP_ANALYSIS_EVENT = "overlap_event" FWD_BWD_FLOW = "fwd_to_bwd" NPU_ROOT_ID = "NPU" diff --git a/profiler/prof_common/kernel_bean.py b/profiler/prof_common/kernel_bean.py index cbfa10c0a..4d60a6908 100644 --- a/profiler/prof_common/kernel_bean.py +++ b/profiler/prof_common/kernel_bean.py @@ -30,6 +30,10 @@ class KernelBean: def start_time(self): return convert_to_decimal(self._ts) + @property + def end_time(self): + return self.start_time + convert_to_decimal(self.dur) + @property def is_computing_op(self): return self._core_type != "HCCL" diff --git a/profiler/prof_common/trace_event_bean.py b/profiler/prof_common/trace_event_bean.py index 0aee79907..282a4badf 100644 --- a/profiler/prof_common/trace_event_bean.py +++ b/profiler/prof_common/trace_event_bean.py @@ -84,8 +84,14 @@ class TraceEventBean(AnalyzeDict): def is_hccl_event(self, hccl_pid): return self.ph == "X" and self.pid == hccl_pid and self.name.startswith("hcom_") + def is_overlap_analysis_event(self, overlap_analysis_pid): + return self.ph == "X" and self.pid == overlap_analysis_pid + def is_npu_process(self): return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "Ascend Hardware" def is_hccl_process(self): return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "HCCL" + + def is_overlap_analysis_process(self): + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "Overlap Analysis" -- Gitee