diff --git a/plugins/tensorboard-plugins/tb_plugin/README.md b/plugins/tensorboard-plugins/tb_plugin/README.md index 93c40c443d35d7f6e244728221ac0a2fa2b28045..c2c5c2aee9f3b5c8ac9e8a3131e8035b435f62e3 100644 --- a/plugins/tensorboard-plugins/tb_plugin/README.md +++ b/plugins/tensorboard-plugins/tb_plugin/README.md @@ -248,4 +248,8 @@ Workers-Spans: 多线程的情况下Profiling可能包含多组数据,通过Wo | Elapse Time(us) | 此类算子总耗时。 | | Avg Elapse Time(us) | 单个算子平均耗时。 | | Transit Time(us) | 此类算子传输总耗时。 | - | Avg Transit Time(us) | 单个算子平均传输耗时。 | \ No newline at end of file + | Avg Transit Time(us) | 单个算子平均传输耗时。 | + +### 公网URL说明 + +[公网URL说明](./docs/公网URL说明.xlsx) \ No newline at end of file diff --git "a/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" "b/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" new file mode 100644 index 0000000000000000000000000000000000000000..4c6800ec528c8dc1cba44f006c79f0f4f64a027c Binary files /dev/null and "b/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" differ diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py b/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py index a36b606377060a4e6a584aa3e24ffc71c30dfb90..03fb7c15aea6bf361b241910fa4529bc0996286c 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py +++ b/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py @@ -24,9 +24,9 @@ def add_header(file): if __name__ == '__main__': - dir = sys.argv[1] - if not os.path.isdir(dir): - raise ValueError('{} is not a directory'.format(dir)) + directory = sys.argv[1] + if not os.path.isdir(directory): + raise ValueError('{} is not a directory'.format(directory)) - for file in glob.glob(dir + '/*.ts'): - add_header(file) + for ts_file in glob.glob(directory + '/*.ts'): + add_header(ts_file) diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx index 38e5118c95dab8155f622cdc257c1665572bcd8d..5b8ce929124e6b1d15af7e09837e828f9da9d521 100644 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx +++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx @@ -226,11 +226,6 @@ export const ModuleView: React.FC = (props) => {
- - {/* defaultExpandAllRows will only valid when first render the Table - if row is null, then it will be ignored so all data will be collapse. - see https://segmentfault.com/a/1190000007830998 for more information. - */} {rows && rows.length > 0 && ( total_mem * 0.9: - percentage = peak_mem / total_mem * 100 + percentage = peak_mem / total_mem * 100 if total_mem > 0 else 0 total_mem_gb = total_mem / 1024 / 1024 / 1024 ckp_url = 'https://pytorch.org/docs/stable/checkpoint.html' amp_url = 'https://pytorch.org/docs/stable/amp.html' diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py index 4f6afdb7ec6dc48fbaeccfece6b9983bbceeaf30..4434c65ad6ea575bd5deb010e0bde9f7a8d24a9d 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py @@ -111,8 +111,8 @@ class Operators(Operator): ops: List[OperatorNode] = [] kernels: List[DeviceNode] = [] for n in nodes: - o, k = n.get_operator_and_kernels() - ops.extend(o) + op, k = n.get_operator_and_kernels() + ops.extend(op) kernels.extend(k) return ops, kernels diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py index 412c677ce0faacf5c22bbb4f884a6b1adb936586..a164bd3d37390ba367f0d504910e45050227ffbf 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py @@ -20,9 +20,8 @@ class DiffNode: self.children: List[DiffNode] = [] def build_tree(self): - '''build the children from the left_node and right_node''' + """build the children from the left_node and right_node""" if not isinstance(self.left, Operators) or not isinstance(self.right, Operators): - # TODO: do we need calculate the stats or not? return if isinstance(self.left.op_nodes, OperatorNode) and isinstance(self.right.op_nodes, OperatorNode): @@ -39,7 +38,6 @@ class DiffNode: else: # one single item and one list pass - # TODO: do we need statistic the stats for both operator and kernel here? @staticmethod def create_node( @@ -84,16 +82,15 @@ class DiffNode: l_iter = 0 r_iter = 0 - for (l, r) in matched_paris: - left_child = left_nodes[l_iter:l] + for (left, r) in matched_paris: + left_child = left_nodes[l_iter:left] right_child = right_nodes[r_iter:r] if left_child or right_child: yield DiffNode.create_node(left_child, right_child) - yield DiffNode.create_node(left_nodes[l], right_nodes[r]) - l_iter = l + 1 + yield DiffNode.create_node(left_nodes[left], right_nodes[r]) + l_iter = left + 1 r_iter = r + 1 - # TODO: fill unknown nodes in case of the start_time of next node and current # end time is bigger than threshold. # Or do we need move the logic into frondend for visualization? diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py index 213d8002592f8bcac867d6c63b76538097a5ca64..d79d8ebc3c1e1378d036e607d0298f206fefa47f 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py @@ -387,7 +387,7 @@ class StepParser: self.steps[i_step] = (step_start_time, step_end_time) # Update step time considering device side. prev_step_end_time = step_end_time - is_remove_tail_steps = True # TODO: Use tensorboard argument instead. + is_remove_tail_steps = True if is_use_gpu and len(self.steps) > 1 and is_remove_tail_steps: i_step = len(self.steps) - 1 while i_step >= 0: diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py index 74e7b54dfc53bb2979df6e714f25600560c992cc..1321d5f6df8d0a6a06abc9342d50ab843a5d41ea 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py @@ -303,7 +303,7 @@ class GPUMetricsParser(object): process_gpu(gpu_ids[idx]) tooltip_summary = 'The GPU usage metrics:\n' - tooltip = '{}\n{}'.format(tooltip_summary, consts.TOOLTIP_GPU_UTIL) + tooltip = '{}\n{}'.format(tooltip_summary, consts.TOOLTIP_GPU_UTIL) if has_sm_efficiency: tooltip += '\n' + consts.TOOLTIP_SM_EFFICIENCY if has_occupancy: diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py index 68e74d578116b9deff4aef4efc36933c52250247..8c10cee675dc405cda5e6e84e1e90d599583a33a 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py @@ -40,11 +40,11 @@ class ModuleStats: @property def avg_host_duration(self): - return self.host_duration / self.occurences + return self.host_duration / self.occurences if self.occurences != 0 else 0 @property def avg_device_duration(self): - return self.device_duration / self.occurences + return self.device_duration / self.occurences if self.occurences != 0 else 0 Stats = namedtuple('Stats', [ @@ -111,7 +111,7 @@ def _build_module_hierarchy(events: List[PythonFunctionEvent]) -> List[Module]: children.setdefault(e_id, []) e_parent_id = e.python_parent_id children.setdefault(e_parent_id, []) - children[e_parent_id].append(e_id) + children.get(e_parent_id).append(e_id) function_leaves = [k for k, v in children.items() if not v] # Convert Python function topology to Module topology. @@ -142,15 +142,15 @@ def _build_module_hierarchy(events: List[PythonFunctionEvent]) -> List[Module]: for child_id, parent_id in module_parent_map.items(): module_child_map.setdefault(child_id, []) module_child_map.setdefault(parent_id, []) - module_child_map[parent_id].append(child_id) + module_child_map.get(parent_id).append(child_id) # The traverse order is well defined which guarantees that a given topology # will produce a unique and unambiguous hierarchy. def append_hierarchy(e_id) -> Module: e = id_to_event[e_id] module = Module(e.name, e.module_id) - for id in module_child_map[e_id]: - child = append_hierarchy(id) + for idx in module_child_map.get(e_id): + child = append_hierarchy(idx) module.children.append(child) return module @@ -211,7 +211,7 @@ def _process_module_statistics( def process_modules(h_modules: Iterable[Module]): for m in h_modules: name = m.name.replace('nn.Module: ', '') - stats = module_aggs[(m.name, m.module_id)] + stats = module_aggs.get((m.name, m.module_id)) child_stats = list(process_modules(m.children)) yield Stats( diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py index 54dc5d093bec6f87826c1a89853828d2005c4036..824b8094976c55ef36da2795332e5fe389abb6fb 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py @@ -68,7 +68,7 @@ class CommunicationNode(BaseNode): class HostNode(BaseNode): def __init__(self, device_duration: int = 0, **kwargs): super().__init__(**kwargs) - self.device_duration = device_duration # Total time of Kernel, GPU Memcpy, GPU Memset. TODO: parallel multi-stream? # noqa: E501 + self.device_duration = device_duration # Total time of Kernel, GPU Memcpy, GPU Memset. class OperatorNode(HostNode): @@ -90,7 +90,6 @@ class OperatorNode(HostNode): self.tc_total_duration = 0 # Time of TC kernels launched by this op including its children operators. def fill_stats(self): - # TODO: Replace recursive by using a stack, in case of too deep callstack. self.children.sort(key=lambda x: (x.start_time, -x.end_time)) self.runtimes.sort(key=lambda x: (x.start_time, -x.end_time) if x.start_time and x.end_time else (sys.maxsize, -sys.maxsize - 1)) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py index 8a1af502f5c91147b1ef4e764e84385d7cc11af7..f1fc6117befb1e9522603ca07007c47e9b993104 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py @@ -25,7 +25,6 @@ class OperatorAgg: self.tc_eligible = op.tc_eligible self.tc_self_duration: int = 0 self.tc_total_duration: int = 0 - # TODO: Think about adding these avgs to UI. @property def tc_self_ratio(self) -> float: @@ -81,7 +80,7 @@ class KernelAggByNameOp: @property def avg_duration(self): - return self.total_duration / self.calls + return self.total_duration / self.calls if self.calls > 0 else 0 @property def avg_blocks_per_sm(self) -> float: diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py index bee6687e8bd06bdcc936e5273dc81a240a586806..5639c666aadc32accc4d3548c19bcd4fa4ad4294 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py @@ -50,7 +50,7 @@ class OpTreeBuilder: backward_modules: List[BackwardNode] = [] for module in modules: OpTreeBuilder._build_backward_module(module, None, fwd_bwd_root, backward_modules) - OpTreeBuilder._insert_backward_modules(self.tid2tree[self.main_tid], backward_modules) + OpTreeBuilder._insert_backward_modules(self.tid2tree.get(self.main_tid), backward_modules) self.tid2tree = {tid: root for tid, root in self.tid2tree.items() if len(root.children) > 0} return self.tid2tree @@ -104,7 +104,6 @@ class OpTreeBuilder: if staled_device_nodes: # Note: Although kernels of this dummy runtime is put under main thread's tree, # we don't know which thread launches them. - # TODO: Don't make belonging thread assumption on future usage if we need special handling dummpy_rt.append(RuntimeNode( name='dummy', start_time=None, @@ -143,7 +142,6 @@ class OpTreeBuilder: # Merge the consecutive calls to same function into one. # Just follow the same pattern in torch/autograd/profiler.py, # EventList._remove_dup_nodes - # TODO: Replace recursive by for loop, in case of too deep callstack. def remove_dup_nodes(node: OperatorNode): if node.type == EventTypes.RUNTIME: return @@ -259,6 +257,7 @@ class OpTreeBuilder: return fwd_to_bwdroot + @staticmethod def _build_backward_module(node: ModuleNode, parent: Optional[BackwardNode], fwd_bwd_map: Dict[int, List[OperatorNode]], diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py index df1f29927e84d7321e6cb073d7d08b02c5febe7d..261e70b7a16d000c51064ae61d267b69e7ad4d16 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py @@ -43,12 +43,12 @@ class OverallParser(object): slots: List[Tuple[int, int]] = [] for role in role_ranges: if slots: - range = intersection_ranges_lists(slots, role) + inter_range = intersection_ranges_lists(slots, role) else: - range = role + inter_range = role slots = merge_ranges(list(steps)) - cost_ranges.append(range) - slots = subtract_ranges_lists(slots, range) + cost_ranges.append(inter_range) + slots = subtract_ranges_lists(slots, inter_range) # The last one is ProfileRole.Other cost_ranges.append(slots) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py index c927d5acbe658f9d80f4a251635b61cb05bee1ef..1c46bd1ae8b5044cc3cbda047d49b1181e6b9eb3 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py @@ -155,10 +155,10 @@ def intersection_ranges_lists(range_list1: List[Tuple[int, int]], def get_ranges_sum(ranges: List[Tuple[int, int]]) -> int: - sum: int = 0 - for range in ranges: - sum += (range[1] - range[0]) - return sum + total: int = 0 + for item in ranges: + total += (item[1] - item[0]) + return total def pop_list(range_list, index): diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py index f5c92479d50cc40fc2a630caf0ebe190b5d53f42..c7cc94ecd3dd058d279a78c6f3cffdbd77277a81 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py @@ -144,7 +144,7 @@ class RunGenerator(object): return overlap_by_steps title = [x.lower() for x in data[0]] title_name = RunGenerator._check_overlap_data(title) - if title_name is None: + if not title_name: logger.error("Incomplete content of CSV file.") return overlap_by_steps @@ -165,7 +165,7 @@ class RunGenerator(object): # csv: step / compute time / communication_not_overlap / overlap / communication / free time length = len(title) if length < 5: - return + return [] key = ["computing", "overlapped", "communication(not overlapped)", "free"] get_key = list() for j in key: @@ -173,7 +173,7 @@ class RunGenerator(object): if j == title[i]: get_key.append(i) if len(get_key) < 4: - return None + return [] return get_key def _npu_get_wait_table(self): @@ -514,8 +514,9 @@ class RunGenerator(object): {'name': f'APP Reserved ({cano.memory_metric})', 'type': 'number', 'tooltip': 'APP reserved memory by allocator, both used and unused.'}] if len(component_curve_result['columns'][device]) > 0: - component_curve_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', - 'tooltip': 'Time since profiler starts.'}) + component_curve_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', + 'type': 'number', + 'tooltip': 'Time since profiler starts.'}) device_types = list(set(process_devices_type + pta_ge_devices_type)) return { 'devices': device_types, @@ -604,15 +605,24 @@ class RunGenerator(object): round(float(ls[reserved_idx]), 3)] pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append(line_chart_data) else: - self._handle_peak_memory_rows(device_type_idx, ls, peak_memory_rows, reserved_idx, tag_type_idx, - time_idx) + memory_curve_id_dict = { + 'device_type_idx': device_type_idx, + 'reserved_idx': reserved_idx, + 'tag_type_idx': tag_type_idx, + 'time_idx': time_idx + } + self._handle_peak_memory_rows(memory_curve_id_dict, ls, peak_memory_rows) peak_memory_events['rows'] = peak_memory_rows return process_data, pta_or_ge_data, peak_memory_events - def _handle_peak_memory_rows(self, device_type_idx, ls, peak_memory_rows, reserved_idx, tag_type_idx, time_idx): + def _handle_peak_memory_rows(self, memory_curve_id_dict, ls, peak_memory_rows): # Record the peak memory usage of other components. has_flag = False + device_type_idx = memory_curve_id_dict.get('device_type_idx') + reserved_idx = memory_curve_id_dict.get('reserved_idx') + tag_type_idx = memory_curve_id_dict.get('tag_type_idx') + time_idx = memory_curve_id_dict.get('time_idx') time_column = round((float(ls[time_idx]) - self.profile_data.start_ts) / 1000, 2) for item in peak_memory_rows[ls[device_type_idx]]: if item[0] == ls[tag_type_idx]: @@ -1016,7 +1026,7 @@ class RunGenerator(object): @staticmethod def _get_csv_data(path: str): if path is None: - return + return [] datas = [] with open(path, encoding='utf-8-sig') as f: for row in csv.reader(f, skipinitialspace=True): @@ -1146,7 +1156,7 @@ class DistributedRunGenerator(object): steps_to_overlap['all'][data.worker] = [0, 0, 0, 0] step_number = len(data.steps_names) if step_number <= 0: - return + return None if self.device_target != 'Ascend': DistributedRunGenerator._get_gpu_overlap_data(data, steps_to_overlap) else: @@ -1270,11 +1280,11 @@ class DistributedRunGenerator(object): op, stats[0], stats[1] * 1024 * 1024, - round(stats[1] * 1024 * 1024 / stats[0]), # 1MB = 1024 * 1024 bytes + round(stats[1] * 1024 * 1024 / stats[0] if stats != 0 else 0), # 1MB = 1024 * 1024 bytes stats[2] * 1000, - round(stats[2] * 1000 / stats[0]), # 1ms = 1000us + round(stats[2] * 1000 / stats[0] if stats != 0 else 0), # 1ms = 1000us stats[3] * 1000, - round(stats[3] * 1000 / stats[0]) # 1ms = 1000us + round(stats[3] * 1000 / stats[0] if stats != 0 else 0) # 1ms = 1000us ] table['rows'].append(row) @@ -1298,10 +1308,10 @@ class DistributedRunGenerator(object): op, stats[0], stats[1], - round(stats[1] / stats[0]), + round(stats[1] / stats[0] if stats != 0 else 0), stats[2], - round(stats[2] / stats[0]), + round(stats[2] / stats[0] if stats != 0 else 0), stats[3], - round(stats[3] / stats[0]) + round(stats[3] / stats[0] if stats != 0 else 0) ] table['rows'].append(row) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py index 5f0da88055f21ccf8f2ed782e71aac4edb214fe6..f44a7b50809f39611cca3eae6ea638ccaf1a03e8 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py @@ -158,10 +158,8 @@ class PLModuleEvent(DurationEvent): super().__init__(EventTypes.PL_MODULE, data) self.module_id = 0 # just to be compatible with ModuleEvent processing self.name = self.name.replace('[pl][module]', '') - # self.shape = self.name[:self.name.rfind(']')+1] - # self.name = self.name[self.name.rfind(']')+1:] self.module_type = self.name[:self.name.find(': ')] - self.name = self.name[self.name.find(': ')+2:] + self.name = self.name[self.name.find(': ') + 2:] def create_event(event, is_pytorch_lightning) -> Optional[BaseEvent]: diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py index 870bf1909b2b0fe0c242651da4803c40cd6776f5..bd97d248ade42743b1b0af3379ee3918d3663aae 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py @@ -192,7 +192,7 @@ class RunProfile(object): def get_memory_stats(self, start_ts=None, end_ts=None, memory_metric='K'): cano = Canonicalizer(memory_metric=memory_metric) - round = DisplayRounder(ndigits=2) + rounder = DisplayRounder(ndigits=2) stats = self.memory_snapshot.get_memory_statistics(self.tid2tree, start_ts=start_ts, end_ts=end_ts) @@ -232,12 +232,12 @@ class RunProfile(object): these_rows.append([ op_name, stat[6], - round(cano.convert_memory(stat[MemoryMetrics.IncreaseSize])), - round(cano.convert_memory(stat[MemoryMetrics.SelfIncreaseSize])), + rounder(cano.convert_memory(stat[MemoryMetrics.IncreaseSize])), + rounder(cano.convert_memory(stat[MemoryMetrics.SelfIncreaseSize])), stat[MemoryMetrics.AllocationCount], stat[MemoryMetrics.SelfAllocationCount], - round(cano.convert_memory(stat[MemoryMetrics.AllocationSize])), - round(cano.convert_memory(stat[MemoryMetrics.SelfAllocationSize])), + rounder(cano.convert_memory(stat[MemoryMetrics.AllocationSize])), + rounder(cano.convert_memory(stat[MemoryMetrics.SelfAllocationSize])), ]) for dev_name in sorted(stats.keys()): @@ -366,7 +366,7 @@ class RunProfile(object): return name cano = Canonicalizer(time_metric=time_metric, memory_metric=memory_metric) - round = DisplayRounder(ndigits=2) + rounder = DisplayRounder(ndigits=2) profiler_start_ts = self.profiler_start_ts memory_records = RunProfile._filtered_by_ts(self.memory_snapshot.memory_records, start_ts, end_ts) @@ -394,10 +394,10 @@ class RunProfile(object): free_ts = r.ts events[alloc_r.device_name].append([ get_op_name_or_ctx(alloc_r), - round(cano.convert_memory(-size)), - round(cano.convert_time(alloc_ts - profiler_start_ts)), - round(cano.convert_time(free_ts - profiler_start_ts)), - round(cano.convert_time(free_ts - alloc_ts)), + rounder(cano.convert_memory(-size)), + rounder(cano.convert_time(alloc_ts - profiler_start_ts)), + rounder(cano.convert_time(free_ts - profiler_start_ts)), + rounder(cano.convert_time(free_ts - alloc_ts)), ]) del alloc[addr] else: @@ -409,8 +409,8 @@ class RunProfile(object): r = memory_records[i] events[r.device_name].append([ get_op_name_or_ctx(r), - round(cano.convert_memory(r.bytes)), - round(cano.convert_time(r.ts - profiler_start_ts)), + rounder(cano.convert_memory(r.bytes)), + rounder(cano.convert_time(r.ts - profiler_start_ts)), None, None, ]) @@ -419,9 +419,9 @@ class RunProfile(object): r = memory_records[i] events[r.device_name].append([ get_op_name_or_ctx(r), - round(cano.convert_memory(-r.bytes)), + rounder(cano.convert_memory(-r.bytes)), None, - round(cano.convert_time(r.ts - profiler_start_ts)), + rounder(cano.convert_time(r.ts - profiler_start_ts)), None, ]) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py index 9bd488e2bf149b19c796050b4bad9b1683f45191..4a02fa75c6d91262523eaefef96453228b4390a3 100644 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py +++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py @@ -94,11 +94,11 @@ class Canonicalizer: self.canonical_time_metrics = { 'micro': 'us', 'microsecond': 'us', 'us': 'us', 'milli': 'ms', 'millisecond': 'ms', 'ms': 'ms', - '': 's', 'second': 's', 's': 's', + '': 's', 'second': 's', 's': 's', } # canonicalize the memory metric to a string self.canonical_memory_metrics = { - '': 'B', 'B': 'B', + '': 'B', 'B': 'B', 'K': 'KB', 'KB': 'KB', 'M': 'MB', 'MB': 'MB', 'G': 'GB', 'GB': 'GB',