diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py index 2e49a9743b6a317ee401b2b8f0b31fb2ea68c07a..7812453e934dd13bf90ccf66838c3a6c75788372 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/dump/dump.py @@ -74,9 +74,32 @@ class APIList(list): self.pkl_mode_changed = True self.clear() + def process_data(self, data): + data_stack = torch._C._VariableFunctionsClass.stack([summary[1] for summary in data], dim=0).cpu() + ubind_data = torch._C._VariableFunctionsClass.unbind(data_stack, dim=0) + for i, tensor_stat in enumerate(ubind_data): + index = data[i][0] + stat_data = [ + stat.item() for stat in torch._C._VariableFunctionsClass.unbind(tensor_stat) + ] + self[index][5] = stat_data + + def process(self): + data_summary = [] + for index, data in enumerate(self): + if len(data) != Const.SUMMARY_COLUMN_NUM: + continue + tensor_stat = data[5] + if isinstance(tensor_stat, torch.Tensor) and tensor_stat.device.type != "cpu": + data_summary.append((index, tensor_stat)) + + if data_summary: + self.process_data(data_summary) + def append(self, data): list.append(self, data) if len(self) >= APIList.threshold: + self.process() self.flush() @@ -101,16 +124,31 @@ def get_not_float_tensor_info(data): tensor_max = [] tensor_min = [] tensor_mean = [] + tensor_norm = [] + tensor_stat = [tensor_max, tensor_min, tensor_mean, tensor_norm] elif len(data.shape) == 0: - item = data.float().item() - tensor_max = item - tensor_min = item - tensor_mean = item + if data.device.type == "cpu": + item = data.item() + tensor_stat = [item, item, item, item] + else: + tensor_stat = torch._C._VariableFunctionsClass.stack([data, data, data, data]) else: - tensor_max = torch._C._VariableFunctionsClass.max(data).float().item() - tensor_min = torch._C._VariableFunctionsClass.min(data).float().item() - tensor_mean = torch._C._VariableFunctionsClass.mean(data.float()).float().item() - return get_tensor_data_info(data, tensor_max, tensor_min, tensor_mean, CompareConst.NAN) + if data.device.type == "cpu": + tensor_max = torch._C._VariableFunctionsClass.max(data).item() + tensor_min = torch._C._VariableFunctionsClass.min(data).item() + tensor_mean = torch._C._VariableFunctionsClass.mean(data.float()).item() + tensor_norm = torch._C._VariableFunctionsClass.norm(data.float()).item() + tensor_stat = [tensor_max, tensor_min, tensor_mean, tensor_norm] + else: + tensor_stat = torch._C._VariableFunctionsClass.stack( + [ + torch._C._VariableFunctionsClass.max(data), + torch._C._VariableFunctionsClass.min(data), + torch._C._VariableFunctionsClass.mean(data.float()), + torch._C._VariableFunctionsClass.norm(data.float()) + ] + ) + return get_tensor_data_info(data, tensor_stat) def get_scalar_data_info(data): @@ -121,24 +159,35 @@ def get_scalar_data_info(data): def get_float_tensor_info(data): if DumpUtil.summary_mode == "md5": return DataInfo([], [], str(data.dtype), tuple(data.shape), get_md5_for_tensor(data)) - tensor_max = torch._C._VariableFunctionsClass.max(data).float().item() - tensor_min = torch._C._VariableFunctionsClass.min(data).float().item() - tensor_mean = torch._C._VariableFunctionsClass.mean(data).float().item() - tensor_norm = torch._C._VariableFunctionsClass.norm(data).float().item() - return get_tensor_data_info(data, tensor_max, tensor_min, tensor_mean, tensor_norm) + + if data.device.type == "cpu": + tensor_max = torch._C._VariableFunctionsClass.max(data).item() + tensor_min = torch._C._VariableFunctionsClass.min(data).item() + tensor_mean = torch._C._VariableFunctionsClass.mean(data).item() + tensor_norm = torch._C._VariableFunctionsClass.norm(data).item() + tensor_stat = [tensor_max, tensor_min, tensor_mean, tensor_norm] + else: + tensor_stat = torch._C._VariableFunctionsClass.stack( + [ + torch._C._VariableFunctionsClass.max(data), + torch._C._VariableFunctionsClass.min(data), + torch._C._VariableFunctionsClass.mean(data), + torch._C._VariableFunctionsClass.norm(data) + ] + ) + return get_tensor_data_info(data, tensor_stat) + -def get_tensor_data_info(data, *tensor_args): - summary_data = [] - summary_data.extend([*tensor_args]) +def get_tensor_data_info(data, tensor_stat): if DumpUtil.summary_mode == "all": saved_tensor = data.contiguous().cpu().detach() if data.dtype == torch.bfloat16: saved_numpy = saved_tensor.to(torch.float32).numpy() else: saved_numpy = saved_tensor.numpy() - return DataInfo(saved_numpy, summary_data, str(data.dtype), tuple(data.shape)) - return DataInfo([], summary_data, str(data.dtype), tuple(data.shape)) + return DataInfo(saved_numpy, tensor_stat, str(data.dtype), tuple(data.shape)) + return DataInfo([], tensor_stat, str(data.dtype), tuple(data.shape)) def dump_tensor(x, prefix, dump_step): @@ -221,7 +270,7 @@ def dump_stack_info(name_template): api_list.append([prefix, stack_str]) else: api_list.append([prefix, stack_str]) - + def dump_api_tensor(dump_step, in_feat, name_template, out_feat): if check_inplace_op(name_template): @@ -442,6 +491,7 @@ def acc_cmp_dump(name, **kwargs): def write_to_disk(): + api_list.process() api_list.flush() diff --git a/debug/accuracy_tools/ptdbg_ascend/test/ut/test_dump.py b/debug/accuracy_tools/ptdbg_ascend/test/ut/test_dump.py index 9673c292ba20cef94b926986ddac270f748645e9..fa3be6f6ae8372686dc4fdd1a13c4c248074d447 100644 --- a/debug/accuracy_tools/ptdbg_ascend/test/ut/test_dump.py +++ b/debug/accuracy_tools/ptdbg_ascend/test/ut/test_dump.py @@ -37,7 +37,8 @@ class TestDump(unittest.TestCase): tensor_max = 3.0 tensor_min = 1.0 tensor_mean = 2.0 - data_info = get_tensor_data_info(self.tensor, tensor_max, tensor_min, tensor_mean) + tensor_stat = [tensor_max, tensor_min, tensor_mean] + data_info = get_tensor_data_info(self.tensor, tensor_stat) self.assertEqual(data_info.save_data.tolist(), self.tensor.numpy().tolist()) self.assertEqual(data_info.summary_data, [tensor_max, tensor_min, tensor_mean]) self.assertEqual(data_info.dtype, 'torch.float32')