diff --git a/debug/accuracy_tools/api_accuracy_checker/__init__.py b/debug/accuracy_tools/api_accuracy_checker/__init__.py index 9b62b9512de151201c5830d2c62c14f041d749a8..aac6483f7ed1a005e02ce36009473c25fcfdc4aa 100644 --- a/debug/accuracy_tools/api_accuracy_checker/__init__.py +++ b/debug/accuracy_tools/api_accuracy_checker/__init__.py @@ -16,12 +16,6 @@ # ============================================================================== """ -from .dump.dump import acc_cmp_dump -from .dump.utils import set_dump_path, set_dump_switch, set_backward_input -from .hook_module.register_hook import register_hook from .common.utils import seed_all -from .common.version import __version__ seed_all() - -__all__ = ["register_hook", "set_dump_path", "set_dump_switch", "seed_all", - "acc_cmp_dump"] +__all__ = [] diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..397b31422b8c50489f4aa9afab7b09e0da5e864f --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/dump/__init__.py @@ -0,0 +1,10 @@ +from ..hook_module.register_hook import initialize_hook +from .dump import pretest_hook +from .info_dump import initialize_output_json +from .utils import set_dump_switch + + +initialize_hook(pretest_hook) +initialize_output_json() + +__all__ = ['set_dump_switch'] \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index 97b325a2de62df56389b072dd4d4039637ff9a7f..6ffc77578f1ce03d434361d671b317cfae3f4e95 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -23,136 +23,31 @@ import numpy as np import torch import threading -try: - import torch_npu -except ImportError: - is_gpu = True -else: - is_gpu = False -from .utils import DumpUtil, _set_dump_switch4api_list, make_dump_data_dir - -from ..common.utils import print_warn_log, Const, print_info_log, modify_dump_path -from ..dump.utils import check_writable - -forward_init_status = False -backward_init_status = False - -backward_threading_id = 0 - - -class DataInfo(object): - def __init__(self, data, save_data, summary_data, dtype, shape): - self.data = data - self.save_data = save_data - self.summary_data = summary_data - self.dtype = dtype - self.shape = shape - - -def get_not_float_tensor_info(data): - summary_data = [] - if data.numel() == 0 or data.dtype == torch.bool: - tensor_max = [] - tensor_min = [] - tensor_mean = [] - elif len(data.shape) == 0: - tensor_max = data.cpu().detach().float().numpy().tolist() - tensor_min = data.cpu().detach().float().numpy().tolist() - tensor_mean = data.cpu().detach().float().numpy().tolist() +from .utils import ForwardAPIInfo, BackwardAPIInfo +from .info_dump import write_api_info_json +from .utils import DumpConst, DumpUtil +from ..common.utils import print_warn_log, print_info_log, print_error_log + +def pretest_info_dump(name, out_feat, module, phase): + if not DumpUtil.dump_switch: + return + if phase == DumpConst.forward: + api_info = ForwardAPIInfo(name, module.input_args, module.input_kwargs) + elif phase == DumpConst.backward: + api_info = BackwardApiInfo(name, out_feat) else: - tensor_max = torch._C._VariableFunctionsClass.max(data).cpu().detach().float().numpy().tolist() - tensor_min = torch._C._VariableFunctionsClass.min(data).cpu().detach().float().numpy().tolist() - tensor_mean = torch._C._VariableFunctionsClass.mean(data.float()).cpu().detach().float().numpy().tolist() - saved_tensor = data.contiguous().cpu().detach().numpy() - summary_data.extend([tensor_max, tensor_min, tensor_mean]) - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) - - -def get_scalar_data_info(data): - summary_data = [data, data, data] - return DataInfo(data, data, summary_data, str(type(data)), str([])) - - -def get_float_tensor_info(data): - summary_data = [] - tensor_max = torch._C._VariableFunctionsClass.max(data).cpu().detach().float().numpy().tolist() - tensor_min = torch._C._VariableFunctionsClass.min(data).cpu().detach().float().numpy().tolist() - tensor_mean = torch._C._VariableFunctionsClass.mean(data).cpu().detach().float().numpy().tolist() - saved_tensor = data.contiguous().cpu().detach().numpy() - summary_data.extend([tensor_max, tensor_min, tensor_mean]) - return DataInfo(data, saved_tensor, summary_data, str(data.dtype), tuple(data.shape)) - - -def dump_tensor(args): - global data_info - args_list = [] - for x in args: - if isinstance(x, torch.Tensor): - if x.numel() == 0 or len(x.shape) == 0 or not x.is_floating_point(): - data_info = get_not_float_tensor_info(x) - else: - data_info = get_float_tensor_info(x) - arg = {"dtype": data_info.dtype, - "shape": data_info.shape, - "type": "torch.Tensor", - "Max": data_info.summary_data[0], - "Min": data_info.summary_data[1]} - else: - arg = {"value": None, - "type": type(x)} - args_list.append(arg) - return args_list - - -def dump_api_tensor(module, name_template, out_feat, dump_file): - api_params_dict = dict() - api_dict = dict() - if Const.BACKWARD in name_template and DumpUtil.dump_mode != Const.FORWARD: - path = os.path.dirname(dump_file) - dump_file = os.path.join(path, "dump_backward.pkl") - api_params_dict["args"] = dump_tensor(out_feat) - elif Const.BACKWARD not in name_template and DumpUtil.dump_mode != Const.BACKWARD: - if module.input_args: - args_list = dump_tensor(module.input_args) - api_params_dict["args"] = args_list - if module.input_kwargs: - api_params_dict["kwargs"] = module.input_kwargs - api_dict[name_template] = api_params_dict - with os.fdopen(os.open(dump_file, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), - "a") as f: - json.dump(api_dict, f) - f.write('\n') - - -def dump_acc_cmp(name, out_feat, module): - dump_file = DumpUtil.get_dump_path() - _set_dump_switch4api_list(name) - - dump_file = modify_dump_path(dump_file, DumpUtil.dump_switch_mode) - - if DumpUtil.get_dump_switch(): - if DumpUtil.dump_init_enable: - DumpUtil.dump_init_enable = False - DumpUtil.dump_data_dir = make_dump_data_dir(dump_file) \ - if DumpUtil.dump_switch_mode not in [Const.STACK, Const.ACL] else "" - if os.path.exists(dump_file) and not os.path.isdir(dump_file): - check_writable(dump_file) - os.remove(dump_file) - - if DumpUtil.dump_switch_mode in [Const.ALL, Const.API_LIST]: - dump_api_tensor(module, name, out_feat, dump_file) - - - - -def acc_cmp_dump(name): - - def acc_cmp_hook(module, in_feat, out_feat): - dump_acc_cmp(name, out_feat, module) + msg = "Unexpected training phase {}.".format(phase) + print_error_log(msg) + raise NotImplementedError(msg) + + write_api_info_json(api_info) + +def pretest_hook(name, phase): + def pretest_info_dump_hook(module, in_feat, out_feat): + pretest_info_dump(name, out_feat, module, phase) if hasattr(module, "input_args"): - del module.input_args + del module.input_args if hasattr(module, "input_kwargs"): - del module.input_kwargs - - return acc_cmp_hook + del module.input_kwargs + return pretest_info_dump_hook diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/hook_module.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/hook_module.py index c18b37a9c9a66aad9591a7f9bde24cde0349950e..9307387335406fc0f6cdb9eacb92f902bbcd7c47 100644 --- a/debug/accuracy_tools/api_accuracy_checker/hook_module/hook_module.py +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/hook_module.py @@ -40,13 +40,13 @@ class HOOKModule(nn.Module): if prefix not in module_count: module_count[prefix] = 1 - prefix += '0*' + prefix += '0' else: module_count[prefix] += 1 - prefix = prefix + str(module_count[prefix] - 1) + '*' + prefix = prefix + str(module_count[prefix] - 1) - self.register_forward_hook(hook(prefix + "forward")) - self.register_backward_hook(hook(prefix + "backward")) + self.register_forward_hook(hook(prefix, "forward")) + self.register_backward_hook(hook(prefix, "backward")) def __call__(self, *input, **kwargs): changed = False diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py index 35d02bb641c2c61cae369bc991f0b7a375ab2a83..ec5b4b55c6cc6990841bffc51193cb7063caaf6d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py @@ -14,27 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ - -import functools -import os - import torch from . import wrap_torch, wrap_functional, wrap_tensor -from .hook_module import HOOKModule -from ..common.utils import check_file_or_directory_path, print_error_log, CompareException, Const, \ - print_info_log, print_warn_log, get_process_rank -from ..dump.utils import make_dump_dirs -from ..dump.dump import acc_cmp_dump - -try: - import torch_npu -except ImportError: - is_gpu = True -else: - is_gpu = False - -make_dir_flag = True def initialize_hook(hook): @@ -53,10 +35,3 @@ def initialize_hook(hook): if attr_name.startswith("wrap_"): setattr(torch.nn.functional, attr_name[5:], getattr(wrap_functional.HOOKFunctionalOP, attr_name)) - -def register_hook(): - global make_dir_flag - if make_dir_flag: - make_dump_dirs(0) - make_dir_flag = False - initialize_hook(acc_cmp_dump) diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_torch.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_torch.py index 23334be233f35653a2459e9e681281463210695c..4684e40d8bcc37ee304ea22d2c8cf04d0372ef94 100644 --- a/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_torch.py +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_torch.py @@ -43,7 +43,7 @@ class TorchOPTemplate(HOOKModule): def __init__(self, op_name, hook): self.op_name_ = op_name - self.prefix_op_name_ = "Torch_" + str(op_name) + "_" + self.prefix_op_name_ = "Torch*" + str(op_name) + "*" super().__init__(hook) def input_param_need_adapt(self):