From 1254b2c9cae889d5cd600b09b792b4770cd79424 Mon Sep 17 00:00:00 2001 From: wuyulong11 Date: Thu, 30 Nov 2023 19:15:04 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E3=80=91=E3=80=90tbplugin=E3=80=91=E3=80=90=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E5=8D=95=E3=80=91Memory=E7=95=8C=E9=9D=A2=E6=97=B6=E9=97=B4?= =?UTF-8?q?=E6=98=BE=E7=A4=BA=E7=B2=BE=E5=BA=A6=E6=94=B9=E4=B8=BAus?= =?UTF-8?q?=E7=BA=A7=E5=88=AB=20=E3=80=90=E4=BF=AE=E6=94=B9=E4=BA=BA?= =?UTF-8?q?=E3=80=91=20wuyulong=2030031080?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fe/src/components/charts/NewLineChart.tsx | 3 +- .../torch_tb_profiler/profiler/data.py | 23 ++++---- .../profiler/run_generator.py | 52 +++++++++---------- .../tb_plugin/torch_tb_profiler/run.py | 10 ++-- .../tb_plugin/torch_tb_profiler/utils.py | 2 +- 5 files changed, 47 insertions(+), 43 deletions(-) diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx index 5bcbd445c..af350e93d 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx @@ -364,8 +364,7 @@ export const LineChart: React.FC = (props) => { }, [graph, height, resizeEventDependency]) React.useEffect(() => { - const compare_fn = (key: number, mid: Array) => - key - parseFloat(mid[0].toFixed(2)) + const compare_fn = (key: number, mid: Array) => key - mid[0] if (chartObj && tag === 'Operator') { if (record) { let startId = -1 diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py index 909cbc555..ba423019a 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py @@ -139,7 +139,7 @@ class RunProfileData(object): @staticmethod def parse_gpu(worker, span, path, cache_dir): - trace_path, trace_json, _ = RunProfileData._preprocess_file(path, cache_dir, 'GPU') + trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU') profile = RunProfileData.from_json(worker, span, trace_json) profile.trace_file_path = trace_path @@ -149,7 +149,6 @@ class RunProfileData(object): def parse_npu(worker, span, path, cache_dir): trace_json = {} trace_path = path - start_ts = 0 has_trace = False has_kernel = False has_memory_record = False @@ -160,13 +159,14 @@ class RunProfileData(object): if utils.is_npu_trace_path(file): has_trace = True trace_file = io.join(path, file) - trace_path, trace_json, start_ts = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend') + trace_path, trace_json = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend') break - profile = RunProfileData.from_json(worker, span, trace_json) + profile = RunProfileData(worker, span, trace_json) profile.trace_file_path = trace_path profile.has_trace = has_trace - profile.profiler_start_ts = 0 if math.isinf(start_ts) else start_ts + if math.isinf(profile.profiler_start_ts): + profile.profiler_start_ts = 0 for file in io.listdir(path): if str(file) == 'kernel_details.csv': @@ -193,6 +193,14 @@ class RunProfileData(object): profile.has_kernel = has_kernel profile.has_memory = has_memory_operator and has_memory_record profile.has_communication = has_communication_wait_ops and has_communication_overlap + if profile.has_communication: + with utils.timing('EventParser.parse'): + parser = EventParser() + with utils.timing('EventParser: parse steps times'): + # Process steps + parser.parse_steps(profile.events, parser.communication_data) + + profile.steps_names = parser.steps_names return profile @staticmethod @@ -235,10 +243,7 @@ class RunProfileData(object): event_list = trace_json['traceEvents'] end_index = None start_index = None - start_ts = float('inf') for i in reversed(range(len(event_list))): - if event_list[i].get('ts') is not None: - start_ts = min(start_ts, float(event_list[i]['ts'])) if device_target != 'Ascend': if event_list[i]['name'] == 'Record Window End': end_index = i @@ -260,7 +265,7 @@ class RunProfileData(object): fzip.write(json.dumps(trace_json)) trace_path = fp.name - return trace_path, trace_json, start_ts + return trace_path, trace_json def process(self): with utils.timing('EventParser.parse'): diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py index f13948aa3..4184e3830 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py @@ -57,6 +57,7 @@ class RunGenerator(object): profile_run.has_communication = self.profile_data.has_communication profile_run.has_memcpy_or_memset = self.profile_data.has_memcpy_or_memset profile_run.profiler_start_ts = self.profile_data.profiler_start_ts + profile_run.device_target = self.device_target if self.device_target != 'Ascend': profile_run.views.append(consts.OVERALL_VIEW) @@ -81,6 +82,26 @@ class RunGenerator(object): if self.profile_data.memory_snapshot: profile_run.views.append(consts.MEMORY_VIEW) profile_run.memory_snapshot = self.profile_data.memory_snapshot + + profile_run.gpu_metrics = self.profile_data.gpu_metrics_parser.get_gpu_metrics() + + gpu_infos = {gpu_id: RunGenerator._get_gpu_info(self.profile_data.device_props, gpu_id) + for gpu_id in self.profile_data.gpu_metrics_parser.gpu_ids} + gpu_infos = {gpu_id: gpu_info for gpu_id, gpu_info in gpu_infos.items() if gpu_info is not None} + + profile_run.gpu_summary, profile_run.gpu_tooltip = \ + self.profile_data.gpu_metrics_parser.get_gpu_metrics_data_tooltip( + gpu_infos, self.profile_data.tc_ratio) + + profile_run.tid2tree = self.profile_data.tid2tree + profile_run.pl_tid2tree = self.profile_data.pl_tid2tree + + profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events) + profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events) + if profile_run.is_pytorch_lightning and profile_run.pl_module_stats: + profile_run.views.append(consts.LIGHTNING_VIEW) + elif profile_run.module_stats: + profile_run.views.append(consts.MODULE_VIEW) else: if self.profile_data.has_operator_view: profile_run.views.append(consts.OP_VIEW) @@ -114,27 +135,6 @@ class RunGenerator(object): profile_run.views.append(consts.TRACE_VIEW) profile_run.trace_file_path = self.profile_data.trace_file_path - profile_run.gpu_metrics = self.profile_data.gpu_metrics_parser.get_gpu_metrics() - - gpu_infos = {gpu_id: RunGenerator._get_gpu_info(self.profile_data.device_props, gpu_id) - for gpu_id in self.profile_data.gpu_metrics_parser.gpu_ids} - gpu_infos = {gpu_id: gpu_info for gpu_id, gpu_info in gpu_infos.items() if gpu_info is not None} - - profile_run.gpu_summary, profile_run.gpu_tooltip = \ - self.profile_data.gpu_metrics_parser.get_gpu_metrics_data_tooltip( - gpu_infos, self.profile_data.tc_ratio) - - profile_run.tid2tree = self.profile_data.tid2tree - profile_run.pl_tid2tree = self.profile_data.pl_tid2tree - profile_run.device_target = self.device_target - - profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events) - profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events) - if profile_run.is_pytorch_lightning and profile_run.pl_module_stats: - profile_run.views.append(consts.LIGHTNING_VIEW) - elif profile_run.module_stats: - profile_run.views.append(consts.MODULE_VIEW) - return profile_run def _npu_get_overlap(self): @@ -436,9 +436,9 @@ class RunGenerator(object): # convert time metric 'us' to 'ms' # some operators may not have the following columns nums = [ls[0] if ls[0] else '', abs(float(ls[1])), - round((float(ls[2]) - self.profile_data.profiler_start_ts) / 1000, 2) if ls[2] else None, - round((float(ls[3]) - self.profile_data.profiler_start_ts) / 1000, 2) if ls[3] else None, - round(float(ls[4]) / 1000, 2) if ls[4] else None] + round((float(ls[2]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[2] else None, + round((float(ls[3]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[3] else None, + round(float(ls[4]) / 1000, 3) if ls[4] else None] display_datas[device_type].append(nums) table['rows'] = display_datas for name in display_datas: @@ -580,7 +580,7 @@ class RunGenerator(object): logger.error('Required column is missing in file "memory_record.csv"') else: for ls in datas[1:]: - time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 2) + time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) device_type = ls[device_type_idx] if ls[tag_type_idx] == 'PTA+GE': process_data.setdefault(device_type, {}).setdefault('Allocated', []).append( @@ -639,7 +639,7 @@ class RunGenerator(object): reserved_idx = memory_curve_id_dict.get('reserved_idx') tag_type_idx = memory_curve_id_dict.get('tag_type_idx') time_idx = memory_curve_id_dict.get('time_idx') - time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 2) + time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) for item in peak_memory_rows[ls[device_type_idx]]: if item[0] == ls[tag_type_idx]: if item[1] < ls[reserved_idx]: diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py index edbe5d230..2f719fb0c 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py @@ -192,7 +192,7 @@ class RunProfile(object): def get_memory_stats(self, start_ts=None, end_ts=None, memory_metric='K'): cano = Canonicalizer(memory_metric=memory_metric) - rounder = DisplayRounder(ndigits=2) + rounder = DisplayRounder(ndigits=3) stats = self.memory_snapshot.get_memory_statistics(self.tid2tree, start_ts=start_ts, end_ts=end_ts) @@ -281,9 +281,9 @@ class RunProfile(object): continue curves[dev].append([ - cano.convert_time(ts - self.profiler_start_ts), - cano.convert_memory(ta), - cano.convert_memory(tr), + round(cano.convert_time(ts - self.profiler_start_ts), 3), + round(cano.convert_memory(ta), 3), + round(cano.convert_memory(tr), 3), ]) peaks[dev] = max(peaks[dev], ta) @@ -366,7 +366,7 @@ class RunProfile(object): return name cano = Canonicalizer(time_metric=time_metric, memory_metric=memory_metric) - rounder = DisplayRounder(ndigits=2) + rounder = DisplayRounder(ndigits=3) profiler_start_ts = self.profiler_start_ts memory_records = RunProfile._filtered_by_ts(self.memory_snapshot.memory_records, start_ts, end_ts) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py index be87e02eb..8f4189d76 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py @@ -130,7 +130,7 @@ class DisplayRounder: def __call__(self, v: float): _v = abs(v) if _v >= self.precision or v == 0: - return round(v, 2) + return round(v, 3) else: ndigit = abs(math.floor(math.log10(_v))) return round(v, ndigit) -- Gitee