diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx index df737dd8458e3452689e906db2e0392bf89a2044..304a4ebadeefb7ecac4637e959c5822bfe3c76a0 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx @@ -314,6 +314,7 @@ export const MemoryView: React.FC = React.memo((props) => { memoryCurveDataAllRef.current = allCurveData setDevice(allCurveData.default_device) setDevices(allCurveData.devices) + setTag('Operator') } else { setMemoryCurveData(resp as MemoryCurveData) } diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx index a3a6e78b1fea2e182d2b030707c929c7a198a306..56f946f71ef4478ef2c1bd96a1f7da8248735b0f 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx @@ -62,8 +62,8 @@ export const LineChart: React.FC = (props) => { if (!element) return element.oncontextmenu = () => { return false } + echarts.init(element).dispose() let myChart = echarts.init(element) - myChart.clear() let option: echarts.EChartsOption = { title: { @@ -99,16 +99,45 @@ export const LineChart: React.FC = (props) => { if (deviceTarget === 'Ascend') { if (tag === 'Component') { - if (graph.columns.length === 3) { + const mixedTooltip: echarts.TooltipComponentOption = { + trigger: 'axis', + formatter: function (params: any) { + var res = `${params[0].name}
` + for (const item of params) { + if (typeof item.value[item.encode.y[0]] === 'number') { + res += ` + + ${item.seriesName}: ${item.value[item.encode.y[0]]}
` + } + } + return res + } + } + if (graph.columns.length <= 4) { + let finalRows = graph.rows['PTA'] + if (graph.columns.length === 4) { + const mergedAPPRows = graph.rows['APP'].map((item: Array) => { + return [item[0], null, null, item[1]] + }) + finalRows = finalRows.concat(mergedAPPRows).sort((a: any, b: any) => { + return a[0] - b[0] + }) + } option = { ...option, + tooltip: mixedTooltip, dataset: { source: [ graph.columns.map(column => column.name), - ...(graph.rows['PTA'] ?? graph.rows['GE']) + ...finalRows ] }, - series: Array(2).fill( + series: Array(graph.columns.length - 1).fill( { type: 'line', select: { @@ -127,35 +156,23 @@ export const LineChart: React.FC = (props) => { } ) } - } else if (graph.columns.length === 5) { + } else if (graph.columns.length <= 6) { const datasetTitle = graph.columns.map(item => item.name) - const mergedGERows = graph.rows['GE'].map((item: Array) => { + let mergedGERows = graph.rows['GE'].map((item: Array) => { return [item[0], null, null, item[1], item[2]] }) + if (graph.columns.length === 6) { + const mergedAPPRows = graph.rows['APP'].map((item: Array) => { + return [item[0], null, null, null, null, item[2]] + }) + mergedGERows = mergedGERows.concat(mergedAPPRows) + } const finalRows = graph.rows['PTA'].concat(mergedGERows).sort((a: any, b: any) => { return a[0] - b[0] }) option = { ...option, - tooltip: { - trigger: 'axis', - formatter: function (params: any) { - var res = `${params[0].name}
` - for (const item of params) { - if (typeof item.value[item.encode.y[0]] === 'number') { - res += ` - - ${item.seriesName}: ${item.value[item.encode.y[0]]}
` - } - } - return res - } - }, + tooltip: mixedTooltip, dataset: { source: [ @@ -163,7 +180,7 @@ export const LineChart: React.FC = (props) => { ...finalRows ] }, - series: Array(4).fill( + series: Array(graph.columns.length - 1).fill( { type: 'line', connectNulls: true, @@ -310,14 +327,12 @@ export const LineChart: React.FC = (props) => { dataZoomSelectActive: true }) - myChart.off('dataZoom') myChart.on('dataZoom', (param: any) => { if (onSelectionChanged) { onSelectionChanged(param.batch[0].startValue, param.batch[0].endValue) } }) - myChart.off('restore') myChart.on('restore', () => { if (onSelectionChanged) { // Set startId greater than endId to query all memory events. @@ -325,7 +340,6 @@ export const LineChart: React.FC = (props) => { } }) - myChart.off('click') myChart.on('click', (param) => { myChart.dispatchAction({ type: 'unselect', @@ -341,7 +355,6 @@ export const LineChart: React.FC = (props) => { selectedPoints.current = [param.dataIndex] }) - myChart.off('contextmenu') myChart.getZr().on('contextmenu', () => { myChart.dispatchAction({ type: 'restore' diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py index 1166abed6c6eaa3e4f867fe2d0c2ba4572d75afa..ce615c827fb7b53ff05df5e686ae468b8beac327 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py @@ -52,7 +52,7 @@ class RunLoader(object): data_path = io.join(self.run_dir, path, 'ASCEND_PROFILER_OUTPUT') for file in io.listdir(data_path): if utils.is_npu_trace_path(file) or str(file) in ( - 'kernel_details.csv', 'memory_record.csv', 'operator_memory.csv', + 'kernel_details.csv', 'memory_record.csv', 'memory_record.csv', 'operator_details.csv'): match = consts.WORKER_SPAN_PATTERN.match(path) worker = match.group(1) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py index e6bbdf349860e0e1e9401d591f6a8689a5a36a67..ed9b099ccba6725d3bbb8aab0685ecb2a535aad9 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py @@ -46,7 +46,7 @@ class RunGenerator(object): self.statistic_data = {} self.accelerator_data = {} self.device_target = device_target - self.pta_or_ge_data = {} + self.component_curve_data = {} self.process_data = {} def generate_run_profile(self): @@ -101,7 +101,7 @@ class RunGenerator(object): if self.profile_data.has_memory: profile_run.views.append(consts.MEMORY_VIEW) profile_run.memory_div_curve = None - self.process_data, self.pta_or_ge_data, peak_memory_events = self._handle_memory_data() + self.process_data, self.component_curve_data, peak_memory_events = self._handle_memory_data() profile_run.memory_all_curve = self._get_memory_all_curve() profile_run.memory_events = self._get_memory_event(peak_memory_events) @@ -476,9 +476,9 @@ class RunGenerator(object): if len(total_result['columns'][device]) > 0: total_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'}) - pta_ge_devices_type, pta_ge_peaks = RunGenerator._get_pta_ge_peaks_and_devices_type(self.pta_or_ge_data, + pta_ge_devices_type, pta_ge_peaks = RunGenerator._get_pta_ge_peaks_and_devices_type(self.component_curve_data, memory_metric) - pta_ge_result = { + component_curve_result = { 'metadata': { 'devices': pta_ge_devices_type, 'default_device': pta_ge_devices_type[0] if len(pta_ge_devices_type) > 0 else '', @@ -491,29 +491,33 @@ class RunGenerator(object): 'memory_factor': cano.memory_factor, }, 'columns': defaultdict(list), - 'rows': self.pta_or_ge_data + 'rows': self.component_curve_data } for device in pta_ge_devices_type: - if self.pta_or_ge_data.get(device).get('PTA') is not None: - pta_ge_result['columns'][device] += [ + if self.component_curve_data.get(device).get('PTA') is not None: + component_curve_result['columns'][device] += [ {'name': f'PTA Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'PTA memory in use.'}, {'name': f'PTA Reserved ({cano.memory_metric})', 'type': 'number', 'tooltip': 'PTA reserved memory by allocator, both used and unused.'}] - if self.pta_or_ge_data.get(device).get('GE') is not None: - pta_ge_result['columns'][device] += [ + if self.component_curve_data.get(device).get('GE') is not None: + component_curve_result['columns'][device] += [ {'name': f'GE Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'GE memory in use.'}, {'name': f'GE Reserved ({cano.memory_metric})', 'type': 'number', 'tooltip': 'GE reserved memory by allocator, both used and unused.'}] - if len(pta_ge_result['columns'][device]) > 0: - pta_ge_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', + if self.component_curve_data.get(device).get('APP') is not None: + component_curve_result['columns'][device] += [ + {'name': f'APP Reserved ({cano.memory_metric})', 'type': 'number', + 'tooltip': 'APP reserved memory by allocator, both used and unused.'}] + if len(component_curve_result['columns'][device]) > 0: + component_curve_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'}) device_types = list(set(process_devices_type + pta_ge_devices_type)) return { 'devices': device_types, 'default_device': device_types[0], 'total': total_result, - 'ptaGe': pta_ge_result + 'ptaGe': component_curve_result } @staticmethod @@ -536,12 +540,12 @@ class RunGenerator(object): peaks = {} for device in process_data: devices_type.append(device) - peaks[device] = '' + peaks[device] = 'Reserved Peak Memory Usage:' for component in process_data.get(device): max_reserved = 0 for array_value in process_data.get(device).get(component): max_reserved = max(array_value[2], max_reserved) - peaks[device] += f'{component} Reserved Peak Memory Usage: {max_reserved:.1f}{memory_metric}\n' + peaks[device] += f' {component}-{max_reserved:.1f}{memory_metric} |' return devices_type, peaks @staticmethod @@ -588,6 +592,9 @@ class RunGenerator(object): [time_column, round(float(ls[allocated_idx]), 3)]) process_data.setdefault(device_type, {}).setdefault('Reserved', []).append( [time_column, round(float(ls[reserved_idx]), 3)]) + elif ls[tag_type_idx] == 'APP': + line_chart_data = [time_column, None, round(float(ls[reserved_idx]), 3)] + pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append(line_chart_data) elif ls[tag_type_idx] in ('PTA', 'GE'): line_chart_data = [time_column, round(float(ls[allocated_idx]), 3), round(float(ls[reserved_idx]), 3)] @@ -602,18 +609,18 @@ class RunGenerator(object): def _handle_peak_memory_rows(self, device_type_idx, ls, peak_memory_rows, reserved_idx, tag_type_idx, time_idx): # Record the peak memory usage of other components. has_flag = False + time_column = round((float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 2) for item in peak_memory_rows[ls[device_type_idx]]: if item[0] == ls[tag_type_idx]: if item[1] < ls[reserved_idx]: item[1] = ls[reserved_idx] - item[2] = ls[time_idx] + item[2] = time_column elif item[1] == ls[reserved_idx]: - item[2] = min(item[2], ls[time_idx]) + item[2] = min(item[2], time_column) has_flag = True break if not has_flag: - peak_memory_rows[ls[device_type_idx]].append([ls[tag_type_idx], ls[reserved_idx], round( - (float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 3)]) + peak_memory_rows[ls[device_type_idx]].append([ls[tag_type_idx], ls[reserved_idx], time_column]) def _generate_overview(self): def build_part_time_str(part_cost: float, part_name: str):