From 9950c360907ef3e46b0222c17b8f4ab6fa1ff532 Mon Sep 17 00:00:00 2001 From: wuyulong11 Date: Mon, 14 Aug 2023 11:54:27 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E3=80=91=20Memory=20View=E5=A2=9E=E5=8A=A0=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E7=BA=A7=E5=86=85=E5=AD=98=E4=BF=A1=E6=81=AF=E5=92=8C=E8=BF=9B?= =?UTF-8?q?=E7=A8=8B=E7=BA=A7=E5=86=85=E5=AD=98=E4=BF=A1=E6=81=AF=E5=B1=95?= =?UTF-8?q?=E7=A4=BA=20=E3=80=90=E4=BF=AE=E6=94=B9=E4=BA=BA=E3=80=91=20wuy?= =?UTF-8?q?ulong=2030031080?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fe/src/components/MemoryView.tsx | 1 + .../fe/src/components/charts/NewLineChart.tsx | 73 +++++++++++-------- .../torch_tb_profiler/profiler/loader.py | 2 +- .../profiler/run_generator.py | 43 ++++++----- 4 files changed, 70 insertions(+), 49 deletions(-) diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx index df737dd84..304a4ebad 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx @@ -314,6 +314,7 @@ export const MemoryView: React.FC = React.memo((props) => { memoryCurveDataAllRef.current = allCurveData setDevice(allCurveData.default_device) setDevices(allCurveData.devices) + setTag('Operator') } else { setMemoryCurveData(resp as MemoryCurveData) } diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx index a3a6e78b1..56f946f71 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx @@ -62,8 +62,8 @@ export const LineChart: React.FC = (props) => { if (!element) return element.oncontextmenu = () => { return false } + echarts.init(element).dispose() let myChart = echarts.init(element) - myChart.clear() let option: echarts.EChartsOption = { title: { @@ -99,16 +99,45 @@ export const LineChart: React.FC = (props) => { if (deviceTarget === 'Ascend') { if (tag === 'Component') { - if (graph.columns.length === 3) { + const mixedTooltip: echarts.TooltipComponentOption = { + trigger: 'axis', + formatter: function (params: any) { + var res = `${params[0].name}
` + for (const item of params) { + if (typeof item.value[item.encode.y[0]] === 'number') { + res += ` + + ${item.seriesName}: ${item.value[item.encode.y[0]]}
` + } + } + return res + } + } + if (graph.columns.length <= 4) { + let finalRows = graph.rows['PTA'] + if (graph.columns.length === 4) { + const mergedAPPRows = graph.rows['APP'].map((item: Array) => { + return [item[0], null, null, item[1]] + }) + finalRows = finalRows.concat(mergedAPPRows).sort((a: any, b: any) => { + return a[0] - b[0] + }) + } option = { ...option, + tooltip: mixedTooltip, dataset: { source: [ graph.columns.map(column => column.name), - ...(graph.rows['PTA'] ?? graph.rows['GE']) + ...finalRows ] }, - series: Array(2).fill( + series: Array(graph.columns.length - 1).fill( { type: 'line', select: { @@ -127,35 +156,23 @@ export const LineChart: React.FC = (props) => { } ) } - } else if (graph.columns.length === 5) { + } else if (graph.columns.length <= 6) { const datasetTitle = graph.columns.map(item => item.name) - const mergedGERows = graph.rows['GE'].map((item: Array) => { + let mergedGERows = graph.rows['GE'].map((item: Array) => { return [item[0], null, null, item[1], item[2]] }) + if (graph.columns.length === 6) { + const mergedAPPRows = graph.rows['APP'].map((item: Array) => { + return [item[0], null, null, null, null, item[2]] + }) + mergedGERows = mergedGERows.concat(mergedAPPRows) + } const finalRows = graph.rows['PTA'].concat(mergedGERows).sort((a: any, b: any) => { return a[0] - b[0] }) option = { ...option, - tooltip: { - trigger: 'axis', - formatter: function (params: any) { - var res = `${params[0].name}
` - for (const item of params) { - if (typeof item.value[item.encode.y[0]] === 'number') { - res += ` - - ${item.seriesName}: ${item.value[item.encode.y[0]]}
` - } - } - return res - } - }, + tooltip: mixedTooltip, dataset: { source: [ @@ -163,7 +180,7 @@ export const LineChart: React.FC = (props) => { ...finalRows ] }, - series: Array(4).fill( + series: Array(graph.columns.length - 1).fill( { type: 'line', connectNulls: true, @@ -310,14 +327,12 @@ export const LineChart: React.FC = (props) => { dataZoomSelectActive: true }) - myChart.off('dataZoom') myChart.on('dataZoom', (param: any) => { if (onSelectionChanged) { onSelectionChanged(param.batch[0].startValue, param.batch[0].endValue) } }) - myChart.off('restore') myChart.on('restore', () => { if (onSelectionChanged) { // Set startId greater than endId to query all memory events. @@ -325,7 +340,6 @@ export const LineChart: React.FC = (props) => { } }) - myChart.off('click') myChart.on('click', (param) => { myChart.dispatchAction({ type: 'unselect', @@ -341,7 +355,6 @@ export const LineChart: React.FC = (props) => { selectedPoints.current = [param.dataIndex] }) - myChart.off('contextmenu') myChart.getZr().on('contextmenu', () => { myChart.dispatchAction({ type: 'restore' diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py index 1166abed6..ce615c827 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py @@ -52,7 +52,7 @@ class RunLoader(object): data_path = io.join(self.run_dir, path, 'ASCEND_PROFILER_OUTPUT') for file in io.listdir(data_path): if utils.is_npu_trace_path(file) or str(file) in ( - 'kernel_details.csv', 'memory_record.csv', 'operator_memory.csv', + 'kernel_details.csv', 'memory_record.csv', 'memory_record.csv', 'operator_details.csv'): match = consts.WORKER_SPAN_PATTERN.match(path) worker = match.group(1) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py index e6bbdf349..ed9b099cc 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py @@ -46,7 +46,7 @@ class RunGenerator(object): self.statistic_data = {} self.accelerator_data = {} self.device_target = device_target - self.pta_or_ge_data = {} + self.component_curve_data = {} self.process_data = {} def generate_run_profile(self): @@ -101,7 +101,7 @@ class RunGenerator(object): if self.profile_data.has_memory: profile_run.views.append(consts.MEMORY_VIEW) profile_run.memory_div_curve = None - self.process_data, self.pta_or_ge_data, peak_memory_events = self._handle_memory_data() + self.process_data, self.component_curve_data, peak_memory_events = self._handle_memory_data() profile_run.memory_all_curve = self._get_memory_all_curve() profile_run.memory_events = self._get_memory_event(peak_memory_events) @@ -476,9 +476,9 @@ class RunGenerator(object): if len(total_result['columns'][device]) > 0: total_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'}) - pta_ge_devices_type, pta_ge_peaks = RunGenerator._get_pta_ge_peaks_and_devices_type(self.pta_or_ge_data, + pta_ge_devices_type, pta_ge_peaks = RunGenerator._get_pta_ge_peaks_and_devices_type(self.component_curve_data, memory_metric) - pta_ge_result = { + component_curve_result = { 'metadata': { 'devices': pta_ge_devices_type, 'default_device': pta_ge_devices_type[0] if len(pta_ge_devices_type) > 0 else '', @@ -491,29 +491,33 @@ class RunGenerator(object): 'memory_factor': cano.memory_factor, }, 'columns': defaultdict(list), - 'rows': self.pta_or_ge_data + 'rows': self.component_curve_data } for device in pta_ge_devices_type: - if self.pta_or_ge_data.get(device).get('PTA') is not None: - pta_ge_result['columns'][device] += [ + if self.component_curve_data.get(device).get('PTA') is not None: + component_curve_result['columns'][device] += [ {'name': f'PTA Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'PTA memory in use.'}, {'name': f'PTA Reserved ({cano.memory_metric})', 'type': 'number', 'tooltip': 'PTA reserved memory by allocator, both used and unused.'}] - if self.pta_or_ge_data.get(device).get('GE') is not None: - pta_ge_result['columns'][device] += [ + if self.component_curve_data.get(device).get('GE') is not None: + component_curve_result['columns'][device] += [ {'name': f'GE Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'GE memory in use.'}, {'name': f'GE Reserved ({cano.memory_metric})', 'type': 'number', 'tooltip': 'GE reserved memory by allocator, both used and unused.'}] - if len(pta_ge_result['columns'][device]) > 0: - pta_ge_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', + if self.component_curve_data.get(device).get('APP') is not None: + component_curve_result['columns'][device] += [ + {'name': f'APP Reserved ({cano.memory_metric})', 'type': 'number', + 'tooltip': 'APP reserved memory by allocator, both used and unused.'}] + if len(component_curve_result['columns'][device]) > 0: + component_curve_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'}) device_types = list(set(process_devices_type + pta_ge_devices_type)) return { 'devices': device_types, 'default_device': device_types[0], 'total': total_result, - 'ptaGe': pta_ge_result + 'ptaGe': component_curve_result } @staticmethod @@ -536,12 +540,12 @@ class RunGenerator(object): peaks = {} for device in process_data: devices_type.append(device) - peaks[device] = '' + peaks[device] = 'Reserved Peak Memory Usage:' for component in process_data.get(device): max_reserved = 0 for array_value in process_data.get(device).get(component): max_reserved = max(array_value[2], max_reserved) - peaks[device] += f'{component} Reserved Peak Memory Usage: {max_reserved:.1f}{memory_metric}\n' + peaks[device] += f' {component}-{max_reserved:.1f}{memory_metric} |' return devices_type, peaks @staticmethod @@ -588,6 +592,9 @@ class RunGenerator(object): [time_column, round(float(ls[allocated_idx]), 3)]) process_data.setdefault(device_type, {}).setdefault('Reserved', []).append( [time_column, round(float(ls[reserved_idx]), 3)]) + elif ls[tag_type_idx] == 'APP': + line_chart_data = [time_column, None, round(float(ls[reserved_idx]), 3)] + pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append(line_chart_data) elif ls[tag_type_idx] in ('PTA', 'GE'): line_chart_data = [time_column, round(float(ls[allocated_idx]), 3), round(float(ls[reserved_idx]), 3)] @@ -602,18 +609,18 @@ class RunGenerator(object): def _handle_peak_memory_rows(self, device_type_idx, ls, peak_memory_rows, reserved_idx, tag_type_idx, time_idx): # Record the peak memory usage of other components. has_flag = False + time_column = round((float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 2) for item in peak_memory_rows[ls[device_type_idx]]: if item[0] == ls[tag_type_idx]: if item[1] < ls[reserved_idx]: item[1] = ls[reserved_idx] - item[2] = ls[time_idx] + item[2] = time_column elif item[1] == ls[reserved_idx]: - item[2] = min(item[2], ls[time_idx]) + item[2] = min(item[2], time_column) has_flag = True break if not has_flag: - peak_memory_rows[ls[device_type_idx]].append([ls[tag_type_idx], ls[reserved_idx], round( - (float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 3)]) + peak_memory_rows[ls[device_type_idx]].append([ls[tag_type_idx], ls[reserved_idx], time_column]) def _generate_overview(self): def build_part_time_str(part_cost: float, part_name: str): -- Gitee