From 7235aaab479ac838b5b90d635e199164978e6ac6 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Sun, 5 May 2024 16:27:14 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E3=80=90=E7=B2=BE=E5=BA=A6=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E5=90=88=E4=B8=80=E3=80=91ptdbg=20=E9=87=87=E9=9B=86?= =?UTF-8?q?=E8=83=BD=E5=8A=9B=E5=BD=92=E4=B8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/atat/core/utils.py | 1 - .../atat/pytorch/common/utils.py | 1 + .../atat/pytorch/hook_module/__init__.py | 0 .../atat/pytorch/hook_module/api_registry.py | 162 ++ .../atat/pytorch/hook_module/hook_module.py | 110 + .../atat/pytorch/hook_module/register_hook.py | 159 ++ .../pytorch/hook_module/support_wrap_ops.yaml | 1876 +++++++++++++++++ .../atat/pytorch/hook_module/wrap_aten.py | 99 + .../pytorch/hook_module/wrap_distributed.py | 74 + .../pytorch/hook_module/wrap_functional.py | 106 + .../pytorch/hook_module/wrap_npu_custom.py | 72 + .../atat/pytorch/hook_module/wrap_tensor.py | 70 + .../atat/pytorch/hook_module/wrap_torch.py | 86 + .../atat/pytorch/hook_module/wrap_vf.py | 65 + .../atat/pytorch/overflow_check/__init__.py | 0 .../atat/pytorch/overflow_check/info_dump.py | 252 +++ .../pytorch/overflow_check/overflow_check.py | 190 ++ .../atat/pytorch/overflow_check/utils.py | 114 + 18 files changed, 3436 insertions(+), 1 deletion(-) create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/__init__.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/api_registry.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/hook_module.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py create mode 100644 debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py create mode 100644 debug/accuracy_tools/atat/pytorch/overflow_check/__init__.py create mode 100644 debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py create mode 100644 debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py create mode 100644 debug/accuracy_tools/atat/pytorch/overflow_check/utils.py diff --git a/debug/accuracy_tools/atat/core/utils.py b/debug/accuracy_tools/atat/core/utils.py index e0058009a..ab9c26008 100644 --- a/debug/accuracy_tools/atat/core/utils.py +++ b/debug/accuracy_tools/atat/core/utils.py @@ -33,7 +33,6 @@ from .file_check_util import FileOpen, FileChecker, FileCheckConst device = collections.namedtuple('device', ['type', 'index']) prefixes = ['api_stack', 'list', 'range', 'acl'] -npu_distributed_api = ['isend', 'irecv'] class Const: diff --git a/debug/accuracy_tools/atat/pytorch/common/utils.py b/debug/accuracy_tools/atat/pytorch/common/utils.py index 9423eb8fe..8f530db18 100644 --- a/debug/accuracy_tools/atat/pytorch/common/utils.py +++ b/debug/accuracy_tools/atat/pytorch/common/utils.py @@ -36,6 +36,7 @@ else: is_gpu = False torch_without_guard_version_list = ['2.1', '2.2'] +npu_distributed_api = ['isend', 'irecv'] for version in torch_without_guard_version_list: if torch.__version__.startswith(version): torch_without_guard_version = True diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/__init__.py b/debug/accuracy_tools/atat/pytorch/hook_module/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/api_registry.py b/debug/accuracy_tools/atat/pytorch/hook_module/api_registry.py new file mode 100644 index 000000000..cf21fe86b --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/api_registry.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import torch +import torch.distributed as dist +from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten +from .wrap_torch import get_torch_ops +from .wrap_functional import get_functional_ops +from .wrap_tensor import get_tensor_ops +from .wrap_vf import get_vf_ops +from .wrap_distributed import get_distributed_ops +from .wrap_aten import get_aten_ops +from ..common.utils import torch_without_guard_version, npu_distributed_api +torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' + +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + from . import wrap_npu_custom + from .wrap_npu_custom import get_npu_ops + + +class ApiRegistry: + def __init__(self): + self.tensor_ori_attr = {} + self.torch_ori_attr = {} + self.functional_ori_attr = {} + self.distributed_ori_attr = {} + self.npu_distributed_ori_attr = {} + self.vf_ori_attr = {} + self.aten_ori_attr = {} + self.torch_npu_ori_attr = {} + + self.tensor_hook_attr = {} + self.torch_hook_attr = {} + self.functional_hook_attr = {} + self.distributed_hook_attr = {} + self.npu_distributed_hook_attr = {} + self.vf_hook_attr = {} + self.aten_hook_attr = {} + self.torch_npu_hook_attr = {} + + @staticmethod + def store_ori_attr(ori_api_group, api_list, api_ori_attr): + for api in api_list: + if '.' in api: + sub_module_name, sub_op = api.rsplit('.', 1) + sub_module = getattr(ori_api_group, sub_module_name) + api_ori_attr[api] = getattr(sub_module, sub_op) + else: + api_ori_attr[api] = getattr(ori_api_group, api) + + @staticmethod + def set_api_attr(api_group, attr_dict): + for api, api_attr in attr_dict.items(): + if '.' in api: + sub_module_name, sub_op = api.rsplit('.', 1) + sub_module = getattr(api_group, sub_module_name, None) + if sub_module is not None: + setattr(sub_module, sub_op, api_attr) + else: + setattr(api_group, api, api_attr) + + def api_modularity(self): + self.set_api_attr(torch.Tensor, self.tensor_hook_attr) + self.set_api_attr(torch, self.torch_hook_attr) + self.set_api_attr(torch.nn.functional, self.functional_hook_attr) + self.set_api_attr(dist, self.distributed_hook_attr) + self.set_api_attr(dist.distributed_c10d, self.distributed_hook_attr) + if not is_gpu and not torch_without_guard_version: + self.set_api_attr(torch_npu.distributed, self.npu_distributed_hook_attr) + self.set_api_attr(torch_npu.distributed.distributed_c10d, self.npu_distributed_hook_attr) + if torch_version_above_2: + self.set_api_attr(torch.ops.aten, self.aten_hook_attr) + self.set_api_attr(torch._VF, self.vf_hook_attr) + if not is_gpu: + self.set_api_attr(torch_npu, self.torch_npu_hook_attr) + + def api_originality(self): + self.set_api_attr(torch.Tensor, self.tensor_ori_attr) + self.set_api_attr(torch, self.torch_ori_attr) + self.set_api_attr(torch.nn.functional, self.functional_ori_attr) + self.set_api_attr(dist, self.distributed_ori_attr) + self.set_api_attr(dist.distributed_c10d, self.distributed_ori_attr) + if not is_gpu and not torch_without_guard_version: + self.set_api_attr(torch_npu.distributed, self.npu_distributed_ori_attr) + self.set_api_attr(torch_npu.distributed.distributed_c10d, self.npu_distributed_ori_attr) + if torch_version_above_2: + self.set_api_attr(torch.ops.aten, self.aten_ori_attr) + self.set_api_attr(torch._VF, self.vf_ori_attr) + if not is_gpu: + self.set_api_attr(torch_npu, self.torch_npu_ori_attr) + + def initialize_hook(self, hook): + self.store_ori_attr(torch.Tensor, get_tensor_ops(), self.tensor_ori_attr) + wrap_tensor.wrap_tensor_ops_and_bind(hook) + for attr_name in dir(wrap_tensor.HOOKTensor): + if attr_name.startswith("wrap_"): + self.tensor_hook_attr[attr_name[5:]] = getattr(wrap_tensor.HOOKTensor, attr_name) + + self.store_ori_attr(torch, get_torch_ops(), self.torch_ori_attr) + wrap_torch.wrap_torch_ops_and_bind(hook) + for attr_name in dir(wrap_torch.HOOKTorchOP): + if attr_name.startswith("wrap_"): + self.torch_hook_attr[attr_name[5:]] = getattr(wrap_torch.HOOKTorchOP, attr_name) + + self.store_ori_attr(torch.nn.functional, get_functional_ops(), self.functional_ori_attr) + wrap_functional.wrap_functional_ops_and_bind(hook) + for attr_name in dir(wrap_functional.HOOKFunctionalOP): + if attr_name.startswith("wrap_"): + self.functional_hook_attr[attr_name[5:]] = getattr(wrap_functional.HOOKFunctionalOP, attr_name) + + self.store_ori_attr(dist, get_distributed_ops(), self.distributed_ori_attr) + wrap_distributed.wrap_distributed_ops_and_bind(hook) + if not is_gpu and not torch_without_guard_version: + self.store_ori_attr(torch_npu.distributed, npu_distributed_api, self.npu_distributed_ori_attr) + for attr_name in dir(wrap_distributed.HOOKDistributedOP): + if attr_name.startswith("wrap_"): + self.distributed_hook_attr[attr_name[5:]] = getattr(wrap_distributed.HOOKDistributedOP, attr_name) + if not is_gpu and not torch_without_guard_version and attr_name[5:] in npu_distributed_api: + self.npu_distributed_hook_attr[attr_name[5:]] = getattr(wrap_distributed.HOOKDistributedOP, + attr_name) + + if torch_version_above_2: + self.store_ori_attr(torch.ops.aten, get_aten_ops(), self.aten_ori_attr) + wrap_aten.wrap_aten_ops_and_bind(hook) + for attr_name in dir(wrap_aten.HOOKAtenOP): + if attr_name.startswith("wrap_"): + self.aten_hook_attr[attr_name[5:]] = getattr(wrap_aten.HOOKAtenOP, attr_name) + + self.store_ori_attr(torch._VF, get_vf_ops(), self.vf_ori_attr) + wrap_vf.wrap_vf_ops_and_bind(hook) + for attr_name in dir(wrap_vf.HOOKVfOP): + if attr_name.startswith("wrap_"): + self.vf_hook_attr[attr_name[5:]] = getattr(wrap_vf.HOOKVfOP, attr_name) + + if not is_gpu: + self.store_ori_attr(torch_npu, get_npu_ops(), self.torch_npu_ori_attr) + wrap_npu_custom.wrap_npu_ops_and_bind(hook) + for attr_name in dir(wrap_npu_custom.HOOKNpuOP): + if attr_name.startswith("wrap_"): + self.torch_npu_hook_attr[attr_name[5:]] = getattr(wrap_npu_custom.HOOKNpuOP, attr_name) + + +api_register = ApiRegistry() diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/atat/pytorch/hook_module/hook_module.py new file mode 100644 index 000000000..6f23a8d42 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/hook_module.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import functools +import threading +import torch +import torch.nn as nn +import torch.utils.hooks as full_hooks + + +class HOOKModule(nn.Module): + module_count = {} + inner_stop_hook = {} + + def __init__(self, hook) -> None: + super(HOOKModule, self).__init__() + self.has_overflow = False + self.input_args = tuple() + self.input_kwargs = dict() + self.prefix = "" + self.current_thread = threading.current_thread().ident + if self.current_thread not in HOOKModule.inner_stop_hook: + HOOKModule.inner_stop_hook[self.current_thread] = False + self.stop_hook = HOOKModule.inner_stop_hook.get(self.current_thread, False) + + if not self.stop_hook: + if hasattr(self, "prefix_op_name_"): + self.prefix = self.prefix_op_name_ + + if self.prefix not in HOOKModule.module_count: + HOOKModule.module_count[self.prefix] = 1 + self.prefix += '0_' + else: + HOOKModule.module_count[self.prefix] += 1 + self.prefix = self.prefix + str(HOOKModule.module_count[self.prefix] - 1) + '_' + self.register_forward_hook(hook(self.prefix + "forward")) + self.register_backward_hook(hook(self.prefix + "backward")) + + def __call__(self, *input, **kwargs): + changed = False + if not self.stop_hook: + HOOKModule.inner_stop_hook[self.current_thread] = True + changed = True + result = self._call_func(*input, **kwargs) + if changed: + HOOKModule.inner_stop_hook[self.current_thread] = False + return result + + def _call_func(self, *input, **kwargs): + full_backward_hooks, non_full_backward_hooks = [], [] + if len(self._backward_hooks) > 0: + full_backward_hooks, non_full_backward_hooks = self._get_backward_hooks() + for hook in self._forward_pre_hooks.values(): + result = hook(self, input) + if result is not None: + if not isinstance(result, tuple): + result = (result,) + input = result + bw_hook = None + if len(full_backward_hooks) > 0: + bw_hook = full_hooks.BackwardHook(self, full_backward_hooks) + input = bw_hook.setup_input_hook(input) + self.input_args = input + self.input_kwargs = kwargs + if torch._C._get_tracing_state(): + result = self._slow_forward(*input, **kwargs) + else: + result = self.forward(*input, **kwargs) + input_list = list(input) + input_list.extend(kwargs.values()) + for hook in self._forward_hooks.values(): + hook_result = hook(self, input_list, result) + if hook_result is not None: + result = hook_result + if bw_hook: + result = bw_hook.setup_output_hook(result) + if len(non_full_backward_hooks) > 0: + var = result + while not isinstance(var, torch.Tensor): + if isinstance(var, dict): + var = next((v for v in var.values() if isinstance(v, torch.Tensor))) + elif isinstance(var, (list, tuple)): + if var: + var = var[0] + else: + return result + else: + return result + grad_fn = var.grad_fn + if grad_fn is not None: + for hook in non_full_backward_hooks: + wrapper = functools.partial(hook, self) + functools.update_wrapper(wrapper, hook) + grad_fn.register_hook(wrapper) + self._maybe_warn_non_full_backward_hook(input, result, grad_fn) + return result diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py b/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py new file mode 100644 index 000000000..821f482f9 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import functools +import os + +from inspect import isfunction +import torch +import torch.distributed as dist + +from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten +from .hook_module import HOOKModule +from .api_registry import api_register +from .wrap_functional import remove_dropout +from atat.core.utils import check_file_or_directory_path, print_error_log, CompareException, Const, \ + print_info_log, print_warn_log, get_process_rank +from ..common.utils import torch_without_guard_version +from ..dump.utils import make_dump_dirs, DumpUtil +from ..overflow_check.utils import OverFlowUtil, clear_overflow_npu + +torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' + +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + from . import wrap_npu_custom + +make_dir_flag = True +REGISTER_HOOK_KWARGS = ["overflow_nums", "dump_mode", "dump_config"] + + +def add_clear_overflow(func, pid): + first_module = True + + def clear_overflow_wrapper(*args, **kwargs): + child_pid = os.getpid() + if pid != child_pid: + return func(*args, **kwargs) + nonlocal first_module + if first_module: + clear_overflow_npu() + first_module = False + return func(*args, **kwargs) + + return clear_overflow_wrapper + + +def register_hook(model, hook, **kwargs): + check_register_hook(hook, **kwargs) + print_info_log("Please disable dataloader shuffle before running the program.") + overflow_nums = kwargs.get('overflow_nums', 1) + init_overflow_nums(overflow_nums) + dump_mode, dump_config_file = init_dump_config(kwargs) + if dump_mode == 'acl': + DumpUtil.dump_switch_mode = dump_mode + DumpUtil.set_acl_config(dump_config_file) + register_hook_core(hook) + + +def init_overflow_nums(overflow_nums): + if isinstance(overflow_nums, int) and overflow_nums > 0 or overflow_nums == -1: + OverFlowUtil.overflow_nums = overflow_nums + else: + print_error_log("overflow_nums must be an integer greater than 0 or set -1.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + + +def check_register_hook(hook, **kwargs): + if not isfunction(hook) or hook.__name__ not in ["overflow_check", "acc_cmp_dump"]: + print_error_log("hook function must be set overflow_check or acc_cmp_dump") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + for item in kwargs.keys(): + if item not in REGISTER_HOOK_KWARGS: + print_error_log(f"{item} not a valid keyword arguments in register_hook.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + + +def register_hook_core(hook, model=None): + global make_dir_flag + + pid = os.getpid() + need_clear = True + if make_dir_flag: + make_dump_dirs() + make_dir_flag = False + hook_name = hook.__name__ + + if "overflow_check" in hook_name and model is not None: + print_error_log("Overflow check does not support model dump mode") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + if "overflow_check" in hook_name and not is_gpu: + if hasattr(torch_npu._C, "_enable_overflow_npu"): + torch_npu._C._enable_overflow_npu() + print_info_log("Enable overflow function success.") + else: + print_warn_log("Api '_enable_overflow_npu' is not exist, " + "the overflow detection function on milan platform maybe not work! " + "please check the version of software torch_npu.") + # In NPU scene, clear the overflow flag before overflow detection + if need_clear: + HOOKModule.__init__ = add_clear_overflow(HOOKModule.__init__, pid) + + print_info_log("Start mounting the {} hook function to the model.".format(hook_name)) + hook = functools.partial(hook, dump_step=0, pid=pid) + print_info_log("The {} hook function is successfully mounted to the model.".format(hook_name)) + + if model is not None: + print_info_log("The init dump mode is enabled, and the module dump function will not be available") + if not isinstance(model, torch.nn.Module): + print_error_log("The argument model must be an object of torch.nn.Module") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + for name, module in model.named_modules(): + if module == model: + continue + prefix = name + "_" + module.__class__.__name__ + module.register_forward_hook(hook(prefix + "_{}_" + "forward")) + module.register_backward_hook(hook(prefix + "_{}_" + "backward")) + else: + api_register.initialize_hook(hook) + api_register.api_modularity() + + if "acc_cmp_dump" in hook_name: + remove_dropout() + + +def init_dump_config(kwargs): + dump_mode = kwargs.get('dump_mode', "api") + dump_config = kwargs.get('dump_config') + dump_config_file = '' + if dump_mode not in Const.SUPPORT_DUMP_MODE: + print_error_log("dump_mode only support %s" % Const.SUPPORT_DUMP_MODE) + raise CompareException(CompareException.INVALID_PARAM_ERROR) + if dump_mode == "acl": + if dump_config is None: + print_error_log("dump_mode is acl mode, dump_config must be configured.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + dump_config_file = os.path.realpath(dump_config) + check_file_or_directory_path(dump_config_file) + if not dump_config.endswith(".json"): + print_error_log("dump_config must be configure json file.") + raise CompareException(CompareException.INVALID_PARAM_ERROR) + return dump_mode, dump_config_file diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml new file mode 100644 index 000000000..92096fc4b --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/support_wrap_ops.yaml @@ -0,0 +1,1876 @@ +# Copyright (c) 2020 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# List of ops that register hooks + +functional: + - conv1d + - conv2d + - conv3d + - conv_transpose1d + - conv_transpose2d + - conv_transpose3d + - conv_tbc + - avg_pool1d + - avg_pool2d + - avg_pool3d + - fractional_max_pool2d_with_indices + - fractional_max_pool2d + - fractional_max_pool3d_with_indices + - fractional_max_pool3d + - max_pool1d_with_indices + - max_pool1d + - max_pool2d_with_indices + - max_pool2d + - max_pool3d_with_indices + - max_pool3d + - max_unpool1d + - max_unpool2d + - max_unpool3d + - lp_pool2d + - lp_pool1d + - adaptive_max_pool1d_with_indices + - adaptive_max_pool1d + - adaptive_max_pool2d_with_indices + - adaptive_max_pool2d + - adaptive_max_pool3d_with_indices + - adaptive_max_pool3d + - adaptive_avg_pool1d + - adaptive_avg_pool2d + - adaptive_avg_pool3d + - dropout + - alpha_dropout + - dropout2d + - dropout3d + - feature_alpha_dropout + - threshold + - threshold_ + - relu + - relu_ + - glu + - hardtanh + - hardtanh_ + - relu6 + - elu + - elu_ + - selu + - selu_ + - celu + - celu_ + - leaky_relu + - leaky_relu_ + - prelu + - rrelu + - rrelu_ + - logsigmoid + - gelu + - hardshrink + - tanhshrink + - softsign + - softplus + - softmin + - softmax + - gumbel_softmax + - log_softmax + - softshrink + - tanh + - sigmoid + - hardsigmoid + - linear + - bilinear + - silu + - hardswish + - embedding + - embedding_bag + - batch_norm + - instance_norm + - layer_norm + - group_norm + - local_response_norm + - ctc_loss + - nll_loss + - poisson_nll_loss + - gaussian_nll_loss + - kl_div + - cross_entropy + - binary_cross_entropy + - binary_cross_entropy_with_logits + - smooth_l1_loss + - l1_loss + - mse_loss + - margin_ranking_loss + - hinge_embedding_loss + - multilabel_margin_loss + - soft_margin_loss + - multilabel_soft_margin_loss + - cosine_embedding_loss + - multi_margin_loss + - pixel_shuffle + - pixel_unshuffle + - channel_shuffle + - upsample + - interpolate + - upsample_nearest + - upsample_bilinear + - grid_sample + - affine_grid + - pad + - pairwise_distance + - pdist + - cosine_similarity + - one_hot + - triplet_margin_loss + - triplet_margin_with_distance_loss + - normalize + - unfold + - fold + - multi_head_attention_forward + - scaled_dot_product_attention + +tensor: + - __add__ + - __and__ + - __bool__ + - __div__ + - __eq__ + - __ge__ + - __gt__ + - __getitem__ + - __iadd__ + - __iand__ + - __idiv__ + - __ifloordiv__ + - __ilshift__ + - __imod__ + - __imul__ + - __ior__ + - __irshift__ + - __isub__ + - __ixor__ + - __lshift__ + - __matmul__ + - __mod__ + - __mul__ + - __nonzero__ + - __or__ + - __radd__ + - __rmul__ + - __rshift__ + - __setitem__ + - __sub__ + - __truediv__ + - __xor__ + - abs + - abs_ + - absolute + - absolute_ + - acos + - acos_ + - acosh + - acosh_ + - add + - add_ + - addbmm + - addbmm_ + - addcdiv + - addcdiv_ + - addcmul + - addcmul_ + - addmm + - addmm_ + - addmv + - addmv_ + - addr + - addr_ + - align_as + - align_to + - all + - allclose + - amax + - amin + - angle + - any + - arccos + - arccos_ + - arccosh + - arccosh_ + - arcsin + - arcsin_ + - arcsinh + - arcsinh_ + - arctan + - arctan_ + - arctanh + - arctanh_ + - argmax + - argmin + - argsort + - asin + - asin_ + - asinh + - asinh_ + - atan + - atan2 + - atan2_ + - atan_ + - atanh + - atanh_ + - baddbmm + - baddbmm_ + - bernoulli + - bernoulli_ + - bincount + - bitwise_and + - bitwise_and_ + - bitwise_not + - bitwise_not_ + - bitwise_or + - bitwise_or_ + - bitwise_xor + - bitwise_xor_ + - bmm + - broadcast_to + - cauchy_ + - ceil + - ceil_ + - cholesky + - chunk + - clamp + - cholesky_solve + - cholesky_inverse + - clamp_ + - clamp_max + - clamp_max_ + - clip + - clamp_min + - clamp_min_ + - clip_ + - copysign + - copysign_ + - cos + - cos_ + - cosh + - cosh_ + - count_nonzero + - cummax + - cummin + - cumprod + - cumprod_ + - cumsum + - cumsum_ + - deg2rad + - deg2rad_ + - det + - diag + - diag_embed + - diagflat + - diagonal + - diff + - dist + - digamma + - digamma_ + - div + - div_ + - divide + - divide_ + - dot + - eig + - eq + - eq_ + - erf + - equal + - erf_ + - erfc + - erfc_ + - erfinv + - erfinv_ + - exp + - exp2 + - exp2_ + - expm1 + - exp_ + - expm1_ + - exponential_ + - fill_ + - fix + - fill_diagonal_ + - fix_ + - flip + - fliplr + - flatten + - flipud + - float_power + - float_power_ + - floor + - floor_ + - floor_divide + - floor_divide_ + - fmax + - fmin + - fmod + - fmod_ + - frac + - frac_ + - gather + - gcd + - gcd_ + - ge + - ge_ + - geometric_ + - geqrf + - ger + - greater + - greater_ + - gt + - gt_ + - greater_equal + - greater_equal_ + - hardshrink + - heaviside + - heaviside_ + - histc + - hypot + - hypot_ + - igamma + - igamma_ + - igammac + - igammac_ + - index_add + - index_add_ + - inverse + - index_copy + - index_copy_ + - index_fill + - index_fill_ + - index_put + - index_put_ + - inner + - index_select + - isclose + - isfinite + - isinf + - isnan + - isneginf + - isposinf + - isreal + - kron + - kthvalue + - lcm + - lcm_ + - ldexp + - ldexp_ + - le + - le_ + - lerp + - lerp_ + - where + - less + - less_ + - less_equal + - less_equal_ + - lgamma + - lgamma_ + - log + - log10 + - log10_ + - log1p + - log1p_ + - log2 + - log2_ + - log_ + - log_normal_ + - log_softmax + - logcumsumexp + - logdet + - logaddexp + - logaddexp2 + - logical_and + - logical_and_ + - logical_not + - logit + - logical_not_ + - logical_or + - logical_or_ + - logical_xor + - logical_xor_ + - logit_ + - logsumexp + - lstsq + - lt + - lt_ + - lu_solve + - map2_ + - map_ + - masked_fill + - matmul + - masked_fill_ + - masked_scatter + - masked_scatter_ + - masked_select + - matrix_exp + - max + - maximum + - mean + - matrix_power + - median + - min + - minimum + - mm + - mode + - msort + - mul + - mul_ + - multinomial + - multiply + - multiply_ + - mv + - mvlgamma + - mvlgamma_ + - nansum + - narrow + - narrow_copy + - ne + - ne_ + - neg + - neg_ + - negative + - negative_ + - nonzero + - norm + - normal_ + - not_equal + - not_equal_ + - permute + - pinverse + - polygamma + - pow + - pow_ + - polygamma_ + - prelu + - prod + - put_ + - rad2deg + - rad2deg_ + - ravel + - real + - reciprocal + - reciprocal_ + - relu + - relu_ + - remainder + - repeat_interleave + - reshape + - remainder_ + - renorm + - renorm_ + - repeat + - reshape_as + - resize_ + - resize_as_ + - roll + - rot90 + - round + - round_ + - rsqrt + - rsqrt_ + - scatter + - scatter_ + - scatter_add + - scatter_add_ + - select + - sgn + - sgn_ + - sigmoid + - sigmoid_ + - sign + - sign_ + - signbit + - sin + - sin_ + - sinc + - sinc_ + - sinh + - sinh_ + - slogdet + - smm + - softmax + - solve + - sort + - split_with_sizes + - sqrt + - sqrt_ + - square + - square_ + - squeeze + - squeeze_ + - sspaddmm + - std + - sub + - sub_ + - sum + - sum_to_size + - svd + - symeig + - t + - t_ + - take + - tan + - tan_ + - tanh + - tanh_ + - tensor_split + - tile + - topk + - transpose + - transpose_ + - triangular_solve + - tril + - tril_ + - triu + - true_divide + - triu_ + - true_divide_ + - trunc + - trunc_ + - type_as + - unbind + - unflatten + - unfold + - unsafe_chunk + - unsqueeze + - unsafe_split + - unsafe_split_with_sizes + - var + - vdot + - unsqueeze_ + - view_as + - xlogy + - xlogy_ + +torch: + - linalg.norm + - linalg.vector_norm + - linalg.matrix_norm + - linalg.diagonal + - linalg.det + - linalg.slogdet + - linalg.cond + - linalg.matrix_rank + - linalg.qr + - linalg.lu + - linalg.lu_factor + - linalg.svd + - linalg.svdvals + - linalg.solve + - linalg.lstsq + - linalg.inv + - linalg.pinv + - linalg.matrix_exp + - linalg.matrix_power + - linalg.cross + - linalg.matmul + - linalg.vecdot + - linalg.multi_dot + - linalg.householder_product + - linalg.tensorsolve + - linalg.vander + - linalg.cholesky_ex + - linalg.inv_ex + - linalg.solve_ex + - linalg.lu_factor_ex + - linalg.ldl_factor + - linalg.ldl_factor_ex + - _adaptive_avg_pool2d + - _add_relu + - _add_relu_ + - _aminmax + - _batch_norm_impl_index + - _convolution + - _foreach_norm + - _softmax_backward_data + - abs + - abs_ + - absolute + - acos + - acos_ + - acosh + - acosh_ + - adaptive_avg_pool1d + - adaptive_max_pool1d + - add + - addbmm + - addcdiv + - addcmul + - addmm + - addmv + - addmv_ + - addr + - amax + - affine_grid_generator + - align_tensors + - all + - alpha_dropout + - amin + - alpha_dropout_ + - angle + - any + - arange + - arccos + - arccos_ + - arccosh + - arccosh_ + - arcsin + - arcsin_ + - arcsinh + - arcsinh_ + - arctan + - arctan_ + - arctanh + - arctanh_ + - argmax + - argmin + - argsort + - asin + - asin_ + - asinh + - asinh_ + - atan + - atan2 + - atan_ + - atanh + - atanh_ + - atleast_1d + - atleast_2d + - atleast_3d + - avg_pool1d + - baddbmm + - bartlett_window + - batch_norm_backward_elemt + - batch_norm_backward_reduce + - batch_norm_elemt + - batch_norm_gather_stats + - batch_norm_gather_stats_with_counts + - bernoulli + - batch_norm_stats + - batch_norm_update_stats + - bilinear + - bincount + - binomial + - binary_cross_entropy_with_logits + - bitwise_and + - bitwise_not + - bitwise_or + - bitwise_xor + - blackman_window + - block_diag + - bmm + - broadcast_tensors + - broadcast_to + - bucketize + - cartesian_prod + - cat + - cdist + - ceil + - ceil_ + - celu + - celu_ + - chain_matmul + - channel_shuffle + - cholesky + - cholesky_inverse + - cholesky_solve + - choose_qparams_optimized + - chunk + - clamp + - clamp_ + - clamp_max + - clamp_max_ + - clamp_min + - clamp_min_ + - clip + - clip_ + - clone + - column_stack + - combinations + - concat + - concatenate + - constant_pad_nd + - conv1d + - conv2d + - conv3d + - conv_tbc + - conv_transpose1d + - conv_transpose2d + - conv_transpose3d + - cos + - convolution + - copysign + - cos_ + - cosh + - cosh_ + - cosine_embedding_loss + - cosine_similarity + - count_nonzero + - cov + - cross + - ctc_loss + - cummax + - cummin + - cumprod + - cumsum + - deg2rad + - deg2rad_ + - det + - diag + - diag_embed + - diff + - diagflat + - diagonal + - digamma + - dist + - div + - divide + - dot + - dropout + - dropout_ + - dsmm + - dstack + - eig + - einsum + - embedding + - embedding_bag + - embedding_renorm_ + - eq + - equal + - erf + - erf_ + - erfc + - erfc_ + - erfinv + - exp + - exp2 + - exp2_ + - exp_ + - expm1 + - expm1_ + - eye + - feature_dropout + - feature_alpha_dropout + - feature_alpha_dropout_ + - feature_dropout_ + - fix + - fill_ + - fix_ + - flatten + - flip + - fliplr + - flipud + - float_power + - floor + - floor_ + - floor_divide + - fmax + - fmin + - fmod + - frac + - frac_ + - full + - frobenius_norm + - full_like + - gather + - gcd + - gcd_ + - ge + - geqrf + - ger + - greater + - greater_equal + - grid_sampler + - grid_sampler_2d + - group_norm + - grid_sampler_3d + - gru + - gru_cell + - gt + - hamming_window + - hann_window + - hardshrink + - heaviside + - hinge_embedding_loss + - histc + - hsmm + - hspmm + - hstack + - hypot + - igamma + - igammac + - index_add + - index_copy + - inner + - index_fill + - index_put + - index_put_ + - index_select + - instance_norm + - inverse + - isclose + - isfinite + - isinf + - isnan + - isneginf + - isposinf + - istft + - kaiser_window + - kl_div + - kron + - kthvalue + - layer_norm + - lcm + - lcm_ + - ldexp + - ldexp_ + - le + - lerp + - less + - less_equal + - lgamma + - linspace + - log + - log10 + - log10_ + - log1p + - log1p_ + - log2 + - log2_ + - log_softmax + - log_ + - logaddexp + - logaddexp2 + - logcumsumexp + - logdet + - logical_and + - logical_not + - logical_or + - logical_xor + - logit + - logit_ + - logspace + - logsumexp + - lstm + - lstm_cell + - lstsq + - lt + - lu_solve + - lu_unpack + - masked_fill + - margin_ranking_loss + - masked_scatter + - masked_select + - matrix_exp + - matmul + - matrix_power + - matrix_rank + - max + - max_pool1d + - max_pool2d + - max_pool1d_with_indices + - max_pool3d + - maximum + - mean + - median + - min + - minimum + - mm + - mode + - moveaxis + - movedim + - msort + - mul + - multinomial + - multiply + - mv + - mvlgamma + - nan_to_num + - nan_to_num_ + - nanmedian + - nansum + - narrow + - native_batch_norm + - native_group_norm + - narrow_copy + - native_layer_norm + - native_norm + - ne + - neg + - negative + - neg_ + - negative_ + - nextafter + - nonzero + - norm + - norm_except_dim + - normal + - not_equal + - nuclear_norm + - ones_like + - outer + - pairwise_distance + - pdist + - permute + - pinverse + - pixel_shuffle + - pixel_unshuffle + - poisson + - poisson_nll_loss + - polar + - polygamma + - pow + - prelu + - prod + - qr + - quantile + - rad2deg + - rad2deg_ + - range + - ravel + - real + - reciprocal + - relu + - reciprocal_ + - relu_ + - remainder + - renorm + - repeat_interleave + - reshape + - resize_as_ + - roll + - rot90 + - round + - round_ + - rrelu + - rrelu_ + - rsqrt + - row_stack + - rsqrt_ + - rsub + - saddmm + - scalar_tensor + - scatter + - select + - scatter_add + - searchsorted + - selu + - selu_ + - sgn + - sigmoid + - sigmoid_ + - sign + - signbit + - sin + - sin_ + - sinc + - sinc_ + - sinh + - sinh_ + - slogdet + - smm + - softmax + - solve + - sort + - sparse_coo_tensor + - square + - split + - split_with_sizes + - spmm + - sqrt + - sqrt_ + - square_ + - squeeze + - sspaddmm + - stack + - std + - std_mean + - stft + - sub + - subtract + - sum + - svd + - swapaxes + - swapdims + - symeig + - t + - take + - take_along_dim + - tan + - tan_ + - tanh + - tanh_ + - tensordot + - tensor_split + - threshold + - threshold_ + - tile + - topk + - transpose + - trapz + - triangular_solve + - tril + - tril_indices + - triplet_margin_loss + - triu + - triu_indices + - true_divide + - trunc + - trunc_ + - unique_consecutive + - xlogy + - unbind + - unsafe_chunk + - unsafe_split + - vander + - var + - vdot + - unsafe_split_with_sizes + - unsqueeze + - var_mean + - vstack + - where + - xlogy_ + +_VF: + - lstm + +torch_npu: + - one_ + - npu_sort_v2 + - npu_transpose + - npu_broadcast + - npu_dtype_cast + - empty_with_format + - npu_one_hot + - npu_stride_add + - npu_ps_roi_pooling + - npu_roi_align + - npu_nms_v4 + - npu_iou + - npu_nms_with_mask + - npu_pad + - npu_bounding_box_encode + - npu_bounding_box_decode + - npu_batch_nms + - npu_slice + - _npu_dropout + - npu_indexing + - npu_ifmr + - npu_max + - npu_scatter + - npu_layer_norm_eval + - npu_alloc_float_status + - npu_confusion_transpose + - npu_bmmV2 + - fast_gelu + - npu_sub_sample + - npu_deformable_conv2d + - npu_mish + - npu_anchor_response_flags + - npu_yolo_boxes_encode + - npu_grid_assign_positive + - npu_normalize_batch + - npu_masked_fill_range + - npu_linear + - npu_bert_apply_adam + - npu_giou + - npu_ciou + - npu_diou + - npu_sign_bits_pack + - npu_sign_bits_unpack + - npu_flash_attention + - npu_scaled_masked_softmax + - npu_rotary_mul + - npu_roi_align + - npu_roi_alignbk + - npu_ptiou + - npu_fusion_attention + - npu_dropout_with_add_softmax + - npu_random_choice_with_mask + - npu_rotated_iou + - npu_conv2d + - npu_conv3d + - npu_softmax_cross_entropy_with_logits + - npu_all_gather_base_mm + - npu_swiglu + - npu_rms_norm + - npu_mm_reduce_scatter_base + - npu_mm_all_reduce_base + - npu_conv_transpose2d + - npu_convolution + - npu_convolution_transpose + - npu_min + - npu_nms_rotated + - npu_reshape + - npu_rotated_box_decode + - npu_rotated_box_encode + - npu_rotated_overlaps + - npu_silu + - npu_fused_attention_score + - npu_multi_head_attention + - npu_gru + - npu_incre_flash_attention + - npu_prompt_flash_attention + - npu_lstm + - npu_apply_adam + +aten: + - signbit + - logical_not_ + - _foreach_copy_ + - clamp + - hardswish_ + - arcsin_ + - logsumexp + - native_group_norm + - special_i1e + - bitwise_and + - new_full + - fft_ihfft + - _adaptive_avg_pool2d + - scatter_add + - abs + - selu + - exponential + - silu + - _native_batch_norm_legit_functional + - special_hermite_polynomial_h + - tanh_ + - log_sigmoid_forward + - _fft_c2c + - heaviside_ + - sigmoid_backward + - zeros_like + - as_strided_scatter + - trace + - _assert_async + - avg_pool2d_backward + - exp2 + - binary_cross_entropy_backward + - geometric + - fft_ihfftn + - smooth_l1_loss + - multiply + - __lshift__ + - binary_cross_entropy_with_logits + - _embedding_bag + - arange + - linalg_qr + - _embedding_bag_forward_only + - _unsafe_view + - remainder + - cholesky_inverse + - sub_ + - zero + - fix + - xlogy + - __doc__ + - rsqrt_ + - cummin + - __xor__ + - eye + - _fused_adam + - ceil + - nll_loss2d_backward + - replication_pad3d_backward + - fill_ + - logaddexp2 + - _thnn_fused_lstm_cell_backward_impl + - native_dropout + - fft_ifft + - expand + - _cdist_backward + - avg_pool3d_backward + - round_ + - topk + - max_unpool3d + - xlogy_ + - reflection_pad2d_backward + - addcdiv_ + - relu6 + - multilabel_margin_loss_forward + - prelu + - logaddexp + - _cholesky_solve_helper + - _foreach_addcdiv + - arctan_ + - fft_irfftn + - logical_or + - bitwise_or_ + - hardtanh_backward + - uniform + - less_equal + - _foreach_sub + - linalg_cholesky_ex + - hardswish + - fft_fft2 + - sign + - min + - norm + - asin + - addcmul_ + - stft + - col2im + - special_chebyshev_polynomial_u + - adaptive_max_pool3d + - __ilshift__ + - _resize_output + - gather + - lu_unpack + - native_batch_norm_backward + - sigmoid + - sqrt + - new_empty_strided + - _foreach_lerp_ + - mean + - scatter_add_ + - _fft_c2r + - rand_like + - true_divide_ + - gcd_ + - multinomial + - permute + - index_put_ + - arcsinh_ + - log1p_ + - index_add + - atan + - glu_backward + - searchsorted + - fill + - _unsafe_index + - index_reduce_ + - replication_pad2d + - expm1_ + - hardsigmoid + - addmm + - fft_fftn + - fft_ifftshift + - special_modified_bessel_k1 + - fft_rfft + - ge + - _adaptive_avg_pool2d_backward + - argmin + - linalg_lu_factor_ex + - atanh_ + - addmv + - _foreach_sqrt_ + - huber_loss_backward + - empty_like + - softshrink + - subtract_ + - bitwise_left_shift_ + - special_modified_bessel_i0 + - _nested_tensor_from_tensor_list + - slice_backward + - special_modified_bessel_i1 + - special_chebyshev_polynomial_t + - conj_physical + - _cdist_forward + - margin_ranking_loss + - max_pool3d_with_indices_backward + - _foreach_reciprocal_ + - lcm + - transpose_ + - cudnn_batch_norm_backward + - reciprocal + - copysign_ + - _foreach_pow + - rad2deg + - _foreach_sqrt + - negative + - replication_pad3d + - atanh + - _linalg_eigh + - igamma_ + - special_i0e + - linalg_ldl_factor_ex + - special_ndtri + - logit + - diagonal_copy + - triu + - silu_ + - polygamma + - square_ + - nextafter_ + - special_scaled_modified_bessel_k0 + - bitwise_not + - var + - mkldnn_rnn_layer_backward + - upsample_bilinear2d + - arctan2 + - clone + - arcsin + - new_ones + - soft_margin_loss + - nan_to_num + - huber_loss + - linalg_lu_solve + - elu_backward + - acosh + - __ior__ + - _unsafe_index_put + - __or__ + - _linalg_slogdet + - arcsinh + - select_scatter + - less_ + - reflection_pad1d + - istft + - reflection_pad2d + - diagonal_backward + - special_entr + - _softmax_backward_data + - randn + - celu + - embedding + - igammac_ + - new_zeros + - native_layer_norm_backward + - nonzero_static + - diagonal_scatter + - grid_sampler_2d + - smooth_l1_loss_backward + - _to_copy + - fft_irfft2 + - relu_ + - fmod + - log1p + - i0 + - mse_loss_backward + - copy + - special_laguerre_polynomial_l + - addmv_ + - quantized_gru + - diag_embed + - acos + - fmod_ + - linalg_cross + - mvlgamma_ + - _foreach_mul + - cummax + - less_equal_ + - ne + - to + - _pdist_forward + - special_xlog1py + - digamma + - lgamma + - mv + - softplus + - special_bessel_y1 + - pin_memory + - logical_xor_ + - cat + - grid_sampler_2d_backward + - frac_ + - dropout + - unsafe_chunk + - masked_fill_ + - log + - negative_ + - _scaled_dot_product_flash_attention + - _amp_foreach_non_finite_check_and_unscale_ + - randn_like + - add + - roll + - threshold + - gcd + - asinh + - round + - t_ + - unfold_backward + - scatter_reduce + - softplus_backward + - bitwise_right_shift_ + - pdist + - select_backward + - relu + - special_bessel_j1 + - asinh_ + - pow + - fft_fftshift + - clamp_max_ + - logical_xor + - index_reduce + - _foreach_add_ + - adaptive_max_pool2d + - adaptive_max_pool3d_backward + - tan + - addbmm_ + - cosh_ + - __rshift__ + - _foreach_maximum + - fft_ifftn + - special_spherical_bessel_j0 + - split_with_sizes + - divide_ + - neg_ + - nll_loss + - _euclidean_dist + - pairwise_distance + - _adaptive_avg_pool3d + - slice + - absolute_ + - gelu_backward + - arccos + - sin + - tril_ + - triu_ + - fft_irfft + - flip + - _foreach_sign + - linalg_householder_product + - _list_to_tensor + - cumprod + - randint_like + - item + - narrow_copy + - tanh + - linalg_vector_norm + - _cudnn_rnn + - _scaled_dot_product_efficient_attention + - _reshape_alias + - _linalg_det + - constant_pad_nd + - _linalg_svd + - sinh_ + - view + - nll_loss_backward + - greater + - sqrt_ + - avg_pool3d + - arctan + - le_ + - _pdist_backward + - _adaptive_avg_pool3d_backward + - log_ + - logical_or_ + - mse_loss + - rrelu_with_noise_backward + - _native_batch_norm_legit + - log10 + - scatter_ + - atan2_ + - greater_equal + - index_select + - __iand__ + - digamma_ + - eq + - divide + - cholesky_solve + - _prelu_kernel + - fft_ifft2 + - _foreach_neg_ + - alias + - erfc_ + - not_equal + - mul + - gru + - _dir + - glu + - clip + - lt + - rsqrt + - avg_pool2d + - conj_physical_ + - quantized_lstm + - erfinv_ + - log10_ + - float_power_ + - _functional_assert_async + - hardtanh + - logical_and_ + - _resize_output_ + - clamp_min + - _functional_sym_constrain_range_for_size + - _addmm_activation + - bucketize + - _thnn_fused_lstm_cell + - zeros + - reflection_pad1d_backward + - tan_ + - bitwise_not_ + - addmm_ + - absolute + - as_strided + - special_ndtr + - gt_ + - baddbmm + - special_log_ndtr + - hardshrink + - fft_hfft + - hypot + - native_layer_norm + - _scaled_dot_product_flash_attention_backward + - floor_divide + - is_same_size + - std + - floor_divide_ + - clamp_min_ + - _foreach_sign_ + - std_mean + - tanh_backward + - _foreach_addcmul + - binary_cross_entropy + - threshold_backward + - deg2rad_ + - masked_fill + - linspace + - reflection_pad3d + - mish + - index_copy + - scatter_reduce_ + - _sparse_coo_tensor_with_dims_and_tensors + - __loader__ + - _foreach_div_ + - cosh + - _foreach_maximum_ + - neg + - lift_fresh + - logspace + - selu_ + - leaky_relu_ + - matmul + - _foreach_sub_ + - bitwise_or + - unfold + - fmin + - convolution + - argmax + - maximum + - reflection_pad3d_backward + - fft_fft + - mode + - remainder_ + - _foreach_neg + - erf_ + - special_zeta + - index_add_ + - arccos_ + - lgamma_ + - unsqueeze_ + - gelu_ + - bmm + - _add_relu + - unfold_copy + - not_equal_ + - subtract + - true_divide + - max_pool2d_with_indices_backward + - _native_batch_norm_legit_no_training + - replication_pad1d + - name + - greater_ + - log_normal + - minimum + - alpha_dropout + - rnn_tanh + - _functional_sym_constrain_range + - sum + - _prelu_kernel_backward + - cumsum_ + - ne_ + - _linalg_solve_ex + - native_batch_norm + - igammac + - hypot_ + - exp + - leaky_relu + - new_empty + - cudnn_batch_norm + - resize_as_ + - mm + - triangular_solve + - sign_ + - clamp_max + - bitwise_right_shift + - logical_and + - special_i0 + - index_copy_ + - arctanh_ + - elu + - index + - isposinf + - linalg_solve_triangular + - logcumsumexp + - arccosh + - nan_to_num_ + - nll_loss_forward + - convolution_backward + - sub + - special_scaled_modified_bessel_k1 + - mish_ + - diagonal + - median + - tril + - sgn + - native_group_norm_backward + - stack + - take + - linalg_lu + - log2 + - hardsigmoid_ + - erfc + - max + - native_dropout_backward + - logit_ + - addr + - clip_ + - _foreach_minimum_ + - atan_ + - repeat + - cumprod_ + - bitwise_xor_ + - less + - index_put + - rrelu_with_noise + - addbmm + - special_bessel_y0 + - __and__ + - bernoulli_ + - uniform_ + - log2_ + - mul_ + - adaptive_max_pool2d_backward + - _foreach_addcmul_ + - slice_scatter + - isneginf + - pow_ + - renorm_ + - arccosh_ + - replication_pad1d_backward + - bitwise_and_ + - heaviside + - renorm + - special_modified_bessel_k0 + - le + - is_pinned + - __ixor__ + - leaky_relu_backward + - count_nonzero + - _fused_adam_ + - repeat_interleave + - upsample_bicubic2d + - rsub + - arctan2_ + - frac + - scalar_tensor + - rrelu_with_noise_ + - rot90 + - erf + - lerp_ + - expm1 + - full + - sym_constrain_range_for_size + - prod + - normal_ + - elu_ + - special_airy_ai + - nextafter + - split + - addcdiv + - fft_rfft2 + - max_pool3d_with_indices + - positive + - transpose + - mish_backward + - clamp_ + - exp_ + - _foreach_reciprocal + - linalg_matrix_exp + - unsqueeze + - upsample_nearest2d + - sinc_ + - select + - rad2deg_ + - trunc_ + - _make_dep_token + - nanmedian + - fft_hfftn + - hardtanh_ + - sym_constrain_range + - index_fill_ + - deg2rad + - rand + - sinc + - pixel_shuffle + - tril_indices + - copy_ + - _int_mm + - greater_equal_ + - celu_ + - div + - igamma + - exp2_ + - cos + - log_normal_ + - _log_softmax_backward_data + - im2col + - reciprocal_ + - amax + - broadcast_tensors + - erfinv + - __spec__ + - _fused_dropout + - special_hermite_polynomial_he + - aminmax + - rnn_relu + - meshgrid + - var_mean + - eq_ + - upsample_nearest3d + - dot + - zero_ + - floor_ + - fft_rfftn + - special_erfcx + - _foreach_div + - fft_hfft2 + - _upsample_bilinear2d_aa + - sort + - log_sigmoid_backward + - add_ + - copysign + - bernoulli + - special_bessel_j0 + - max_pool2d_with_indices + - _scaled_dot_product_efficient_attention_backward + - t + - _softmax + - arctanh + - hinge_embedding_loss + - hardswish_backward + - fmax + - multiply_ + - floor + - lstm + - i0_ + - cholesky + - where + - __irshift__ + - addcmul + - embedding_dense_backward + - sigmoid_ + - fix_ + - ormqr + - exponential_ + - __name__ + - fft_ihfft2 + - logical_not + - ones + - sgn_ + - sinh + - any + - _foreach_addcdiv_ + - asin_ + - gt + - lift + - squeeze + - grid_sampler_3d_backward + - atan2 + - _fft_r2c + - angle + - silu_backward + - acosh_ + - abs_ + - lerp + - special_i1 + - complex + - ceil_ + - _foreach_minimum + - hardsigmoid_backward + - upsample_nearest1d + - mvlgamma + - acos_ + - lt_ + - grid_sampler_3d + - max_unpool2d + - ones_like + - soft_margin_loss_backward + - _fused_moving_avg_obs_fq_helper + - isnan + - nansum + - baddbmm_ + - amin + - isinf + - bitwise_left_shift + - unsafe_split_with_sizes + - full_like + - sin_ + - bitwise_xor + - linalg_ldl_solve + - cos_ + - div_ + - polar + - randint + - trunc + - __package__ + - nll_loss2d_forward + - diag + - argsort + - _foreach_mul_ + - square + - detach + - affine_grid_generator + - _pin_memory + - geometric_ + - unbind + - randperm + - upsample_nearest2d_backward + - all + - threshold_ + - unsafe_split + - cauchy + - normal + - linalg_inv_ex + - multi_margin_loss + - cumsum + - gelu + - index_fill + - scatter + - mkldnn_rnn_layer + - ge_ + - dist + - _foreach_add + - logit_backward + - triu_indices + - lcm_ + - empty_strided + - replication_pad2d_backward + - cauchy_ + - _log_softmax + - vdot + +distributed: + - send + - recv + - broadcast + - all_reduce + - reduce + - all_gather + - gather + - isend + - irecv + - scatter + - reduce_scatter + - _reduce_scatter_base + - _all_gather_base \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py new file mode 100644 index 000000000..f9b6137d8 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2023-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os +import torch + +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard +from atat.core.file_check_util import FileOpen + + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapAtenOps = yaml.safe_load(f).get('aten') + + +aten_func = {} +for f in dir(torch.ops.aten): + aten_func[f] = getattr(torch.ops.aten, f) + + +def get_aten_ops(): + global WrapAtenOps + _all_aten_ops = dir(torch.ops.aten) + return set(WrapAtenOps) & set(_all_aten_ops) + + +class HOOKAtenOP(object): + pass + + +class AtenOPTemplate(HOOKModule): + def __init__(self, op, hook): + if isinstance(op, torch._ops.OpOverloadPacket): + op_name_ = op._qualified_op_name.split("::")[-1] + else: + op_name_ = op.name().split("::")[-1] + overload_name = op._overloadname + if not '.' + overload_name in op_name_: + op_name_ = op_name_ + '.' + overload_name + self.op = op + self.prefix_op_name_ = "Aten_" + str(op_name_) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + return self.op(*args, **kwargs) + + +class AtenOPPacketTemplate(): + def __init__(self, opPacket, hook): + self.opPacket = opPacket + self.hook = hook + + def __getattr__(self, key): + try: + attr = getattr(self.opPacket, key) + except AttributeError as e: + raise AttributeError(f"AtenOPPacketTemplate or OpOverloadPacket does not have attribute '{key}'.") from e + if isinstance(attr, torch._ops.OpOverload): + return AtenOPTemplate(attr, self.hook) + else: + return attr + + def overloads(self): + return self.opPacket.overloads() + + @torch_device_guard + def __call__(self, *args, **kwargs): + return AtenOPTemplate(self.opPacket, self.hook)(*args, **kwargs) + + +def wrap_aten_op(op, hook): + return AtenOPPacketTemplate(op, hook) + + +def wrap_aten_ops_and_bind(hook): + _aten_ops = get_aten_ops() + for op_name in _aten_ops: + if not isinstance(aten_func.get(op_name), torch._ops.OpOverloadPacket): + continue + setattr(HOOKAtenOP, "wrap_" + str(op_name), wrap_aten_op(aten_func.get(op_name), hook)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py new file mode 100644 index 000000000..da3bff591 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os +from functools import wraps +import torch.distributed as dist +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard, Const +from atat.core.file_check_util import FileOpen + + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapDistributedOps = yaml.safe_load(f).get('distributed') + + +distributed_func = {} +for f in dir(dist): + distributed_func[f] = getattr(dist, f) + + +def get_distributed_ops(): + global WrapDistributedOps + _all_distributed_ops = dir(dist) + return set(WrapDistributedOps) & set(_all_distributed_ops) + + +class HOOKDistributedOP(object): + pass + + +class DistributedOPTemplate(HOOKModule): + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Distributed_" + str(op_name) + "_" + super().__init__(hook) + if self.op_name_ in Const.INPLACE_LIST: + self.register_forward_pre_hook(hook(self.prefix + Const.PRE_FORWARD)) + + @torch_device_guard + def forward(self, *args, **kwargs): + return distributed_func.get(self.op_name_)(*args, **kwargs) + + +def wrap_distributed_op(op_name, hook): + @wraps(DistributedOPTemplate) + def distributed_op_template(*args, **kwargs): + return DistributedOPTemplate(op_name, hook)(*args, **kwargs) + + distributed_op_template.__name__ = op_name + return distributed_op_template + + +def wrap_distributed_ops_and_bind(hook): + _distributed_ops = get_distributed_ops() + for op_name in _distributed_ops: + setattr(HOOKDistributedOP, "wrap_" + str(op_name), wrap_distributed_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py new file mode 100644 index 000000000..3de281ae0 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os + +import torch +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard +from atat.core.utils import print_info_log +from atat.core.file_check_util import FileOpen + + +def remove_dropout(): + if torch.__version__ > "1.8": + print_info_log("For precision comparison, the probability p in the dropout method is set to 0.") + import torch.nn.functional as F + from torch import _VF + from torch.overrides import has_torch_function_unary, handle_torch_function + + def function_dropout(input: torch.Tensor, p: float = 0.5, training: bool = True, + inplace: bool = False) -> torch.Tensor: + if has_torch_function_unary(input): + return handle_torch_function(function_dropout, (input,), input, p=0., training=training, inplace=inplace) + if p < 0.0 or p > 1.0: + raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p)) + return _VF.dropout_(input, 0., training) if inplace else _VF.dropout(input, 0., training) + + + def function_dropout2d(input: torch.Tensor, p: float = 0.5, training: bool = True, + inplace: bool = False) -> torch.Tensor: + if has_torch_function_unary(input): + return handle_torch_function(function_dropout2d, (input,), input, p=0., training=training, inplace=inplace) + if p < 0.0 or p > 1.0: + raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p)) + return _VF.feature_dropout_(input, 0., training) if inplace else _VF.feature_dropout(input, 0., training) + + + def function_dropout3d(input: torch.Tensor, p: float = 0.5, training: bool = True, + inplace: bool = False) -> torch.Tensor: + if has_torch_function_unary(input): + return handle_torch_function(function_dropout3d, (input,), input, p=0., training=training, inplace=inplace) + if p < 0.0 or p > 1.0: + raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p)) + return _VF.feature_dropout_(input, 0., training) if inplace else _VF.feature_dropout(input, 0., training) + + F.dropout = function_dropout + F.dropout2d = function_dropout2d + F.dropout3d = function_dropout3d + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapFunctionalOps = yaml.safe_load(f).get('functional') + + +def get_functional_ops(): + global WrapFunctionalOps + _all_functional_ops = dir(torch.nn.functional) + return set(WrapFunctionalOps) & set(_all_functional_ops) + + +TorchFunctions = {func: getattr(torch.nn.functional, func) for func in get_functional_ops()} + + +class HOOKFunctionalOP(object): + pass + + +class FunctionalOPTemplate(HOOKModule): + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Functional_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + return TorchFunctions[str(self.op_name_)](*args, **kwargs) + + +def wrap_functional_op(op_name, hook): + def functional_op_template(*args, **kwargs): + return FunctionalOPTemplate(op_name, hook)(*args, **kwargs) + + return functional_op_template + + +def wrap_functional_ops_and_bind(hook): + _functional_ops = get_functional_ops() + for op_name in _functional_ops: + setattr(HOOKFunctionalOP, "wrap_" + op_name, wrap_functional_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py new file mode 100644 index 000000000..5155a0c20 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os +import torch +import torch_npu +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard, torch_without_guard_version +from atat.core.file_check_util import FileOpen + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapNpuOps = yaml.safe_load(f).get('torch_npu') + + +def get_npu_ops(): + global WrapNpuOps + if torch_without_guard_version: + _npu_ops = dir(torch.ops.npu) + else: + _npu_ops = dir(torch_npu._C._VariableFunctionsClass) + return set(WrapNpuOps) & set(_npu_ops) + + +class HOOKNpuOP(object): + pass + + +class NpuOPTemplate(HOOKModule): + + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "NPU_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + if torch_without_guard_version: + return getattr(torch.ops.npu, str(self.op_name_))(*args, **kwargs) + else: + return getattr(torch_npu._C._VariableFunctionsClass, str(self.op_name_))(*args, **kwargs) + + +def wrap_npu_op(op_name, hook): + + def npu_op_template(*args, **kwargs): + return NpuOPTemplate(op_name, hook)(*args, **kwargs) + + return npu_op_template + + +def wrap_npu_ops_and_bind(hook): + _npu_ops = get_npu_ops() + for op_name in _npu_ops: + setattr(HOOKNpuOP, "wrap_" + str(op_name), wrap_npu_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py new file mode 100644 index 000000000..31dda087e --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os + +import torch +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard, parameter_adapter +from atat.core.file_check_util import FileOpen + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapTensorOps = yaml.safe_load(f).get('tensor') + + +def get_tensor_ops(): + global WrapTensorOps + _tensor_ops = dir(torch.Tensor) + return set(WrapTensorOps) & set(_tensor_ops) + + +TensorOps = {op: getattr(torch.Tensor, op) for op in get_tensor_ops()} + + +class HOOKTensor(object): + pass + + +class TensorOPTemplate(HOOKModule): + + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Tensor_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + @parameter_adapter + def forward(self, *args, **kwargs): + return TensorOps[str(self.op_name_)](*args, **kwargs) + + +def wrap_tensor_op(op_name, hook): + + def tensor_op_template(*args, **kwargs): + return TensorOPTemplate(op_name, hook)(*args, **kwargs) + + return tensor_op_template + + +def wrap_tensor_ops_and_bind(hook): + _tensor_ops = get_tensor_ops() + for op_name in _tensor_ops: + setattr(HOOKTensor, "wrap_" + str(op_name), wrap_tensor_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py new file mode 100644 index 000000000..5ea8b5e15 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os + +import torch +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard +from atat.core.file_check_util import FileOpen + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapTorchOps = yaml.safe_load(f).get('torch') + + +def get_torch_ops(): + global WrapTorchOps + _torch_ops = [] + for operation in WrapTorchOps: + if '.' in operation: + operation_sub_module_name, operation_sub_op = operation.rsplit('.', 1) + operation_sub_module = getattr(torch, operation_sub_module_name) + if operation_sub_op in dir(operation_sub_module): + _torch_ops.append(operation) + else: + if hasattr(torch, operation): + _torch_ops.append(operation) + return set(_torch_ops) + + +TorchOps = {} +for op in get_torch_ops(): + if '.' in op: + sub_module_name, sub_op = op.rsplit('.', 1) + sub_module = getattr(torch, sub_module_name) + TorchOps[op] = getattr(sub_module, sub_op) + else: + TorchOps[op] = getattr(torch, op) + + + +class HOOKTorchOP(object): + pass + + +class TorchOPTemplate(HOOKModule): + + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Torch_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + return TorchOps[str(self.op_name_)](*args, **kwargs) + + +def wrap_torch_op(op_name, hook): + + def torch_op_template(*args, **kwargs): + return TorchOPTemplate(op_name, hook)(*args, **kwargs) + + return torch_op_template + + +def wrap_torch_ops_and_bind(hook): + _torch_ops = get_torch_ops() + for op_name in _torch_ops: + setattr(HOOKTorchOP, "wrap_" + op_name, wrap_torch_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py new file mode 100644 index 000000000..18375587b --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os + +import torch +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard +from atat.core.file_check_util import FileOpen + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapVfOps = yaml.safe_load(f).get('_VF') + + +def get_vf_ops(): + global WrapVfOps + # _all_functional_ops = dir(torch.nn.functional) + # assert set(WrapFunctionalOps) <= set(_all_functional_ops) + return WrapVfOps + + +class HOOKVfOP(object): + pass + + +class VfOPTemplate(HOOKModule): + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "VF_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + return getattr(torch._C._VariableFunctionsClass, str(self.op_name_))(*args, **kwargs) + + +def wrap_vf_op(op_name, hook): + def vf_op_template(*args, **kwargs): + return VfOPTemplate(op_name, hook)(*args, **kwargs) + + return vf_op_template + + +def wrap_vf_ops_and_bind(hook): + _vf_ops = get_vf_ops() + for op_name in _vf_ops: + setattr(HOOKVfOP, "wrap_" + op_name, wrap_vf_op(op_name, hook)) diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/__init__.py b/debug/accuracy_tools/atat/pytorch/overflow_check/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py b/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py new file mode 100644 index 000000000..55065bfa5 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py @@ -0,0 +1,252 @@ +import inspect +import fcntl +import os +import threading + +import json +import numpy as np +import torch + +from ..common.utils import print_error_log +from atat.core.utils import get_time +from atat.core.file_check_util import FileOpen, FileCheckConst, change_mode + + +special_torch_object = ["memory_format"] +lock = threading.Lock() + + +def write_npy(file_path, tensor): + saved_tensor = tensor.contiguous().cpu().detach() + if tensor.dtype == torch.bfloat16: + saved_numpy = saved_tensor.to(torch.float32).numpy() + else: + saved_numpy = saved_tensor.numpy() + if os.path.exists(file_path): + raise ValueError(f"File {file_path} already exists") + np.save(file_path, saved_numpy) + full_path = os.path.abspath(file_path) + return full_path + + +class APIInfo: + def __init__(self, api_name, is_forward, save_real_data=False): + self.rank = os.getpid() + self.api_name = api_name + self.save_real_data = save_real_data + self.torch_object_key = {'device': self.analyze_device_in_kwargs, 'dtype': self.analyze_dtype_in_kwargs} + self.is_forward = is_forward + self.args_num = 0 + + def analyze_element(self, element): + if isinstance(element, (list, tuple)): + out = [] + for item in element: + out.append(self.analyze_element(item)) + return out + elif isinstance(element, dict): + out_dict = {} + for key, value in element.items(): + if key in self.torch_object_key.keys(): + fun = self.torch_object_key[key] + out_dict[key] = fun(value) + elif key in special_torch_object: + continue + else: + out_dict[key] = self.analyze_element(value) + return out_dict + elif isinstance(element, torch.Tensor): + out_tensor = self.analyze_tensor(element, self.save_real_data) + return out_tensor + elif self.is_builtin_class(element): + out_builtin = self.analyze_builtin(element) + return out_builtin + else: + msg = f"Type {type(element)} is unsupported at analyze_element" + print_error_log(msg) + + raise NotImplementedError(msg) + + def analyze_tensor(self, arg, save_real_data): + single_arg = {} + if not save_real_data: + single_arg.update({'type': 'torch.Tensor'}) + single_arg.update({'dtype': str(arg.dtype)}) + single_arg.update({'shape': arg.shape}) + single_arg.update({'Max': self.transfer_types(self.get_tensor_extremum(arg, 'max'), str(arg.dtype))}) + single_arg.update({'Min': self.transfer_types(self.get_tensor_extremum(arg, 'min'), str(arg.dtype))}) + single_arg.update({'requires_grad': arg.requires_grad}) + + else: + dump_path = "./" + api_args = self.api_name + '.' + str(self.args_num) + rank = arg.device.index + if self.is_forward: + forward_real_data_path = os.path.join(dump_path, "forward_real_data_" + get_time(), f"rank{rank}") + if not os.path.exists(forward_real_data_path): + os.makedirs(forward_real_data_path, 0o755) + + file_path = os.path.join(forward_real_data_path, f'{api_args}.npy') + else: + backward_real_data_path = os.path.join(dump_path, "backward_real_data_" + get_time(), f"rank{rank}") + if not os.path.exists(backward_real_data_path): + os.makedirs(backward_real_data_path, 0o755) + file_path = os.path.join(backward_real_data_path, f'{api_args}.npy') + self.args_num += 1 + npy_path = write_npy(file_path, arg) + single_arg.update({'type': 'torch.Tensor'}) + single_arg.update({'datapath': npy_path}) + single_arg.update({'requires_grad': arg.requires_grad}) + return single_arg + + def analyze_builtin(self, arg): + single_arg = {} + if isinstance(arg, slice): + single_arg.update({'type': "slice"}) + single_arg.update({'value': [arg.start, arg.stop, arg.step]}) + else: + single_arg.update({'type': self.get_type_name(str(type(arg)))}) + single_arg.update({'value': arg}) + return single_arg + + def transfer_types(self, data, dtype): + if 'int' in dtype or 'bool' in dtype: + return int(data) + else: + return float(data) + + def is_builtin_class(self, element): + if element is None or isinstance(element, (bool, int, float, str, slice)): + return True + return False + + def analyze_device_in_kwargs(self, element): + single_arg = {} + single_arg.update({'type': 'torch.device'}) + if not isinstance(element, str): + + if hasattr(element, "index"): + device_value = element.type + ":" + str(element.index) + single_arg.update({'value': device_value}) + else: + device_value = element.type + else: + single_arg.update({'value': element}) + return single_arg + + def analyze_dtype_in_kwargs(self, element): + single_arg = {} + single_arg.update({'type': 'torch.dtype'}) + single_arg.update({'value': str(element)}) + return single_arg + + def get_tensor_extremum(self, data, operator): + if data.dtype is torch.bool: + if operator == 'max': + return True in data + elif operator == 'min': + return False not in data + if operator == 'max': + return torch._C._VariableFunctionsClass.max(data).item() + else: + return torch._C._VariableFunctionsClass.min(data).item() + + def get_type_name(self, name): + + left = name.index("'") + right = name.rindex("'") + return name[left + 1: right] + + +class ForwardAPIInfo(APIInfo): + def __init__(self, name, save_real_data, args, kwargs): + super().__init__(name, is_forward=True, save_real_data=save_real_data) + self.analyze_api_input(args, kwargs) + self.analyze_api_call_stack() + + def analyze_api_input(self, args, kwargs): + args_info_list = self.analyze_element(args) + kwargs_info_dict = self.analyze_element(kwargs) + self.api_info_struct = {self.api_name: {"args": args_info_list, "kwargs": kwargs_info_dict}} + + def analyze_api_call_stack(self): + stack_str = [] + for (_, path, line, func, code, _) in inspect.stack()[3:]: + if not code: + continue + stack_line = " ".join([ + "File", ", ".join([path, " ".join(["line", str(line)]), " ".join(["in", func]), + " ".join(["\n", code[0].strip()])])]) + stack_str.append(stack_line) + self.stack_info_struct = {self.api_name: stack_str} + + +class BackwardAPIInfo(APIInfo): + def __init__(self, name, grads): + super().__init__(name, is_forward=False) + self.analyze_api_input(grads) + + def analyze_api_input(self, grads): + grads_info_list = self.analyze_element(grads) + self.grad_info_struct = {self.api_name: grads_info_list} + + +def write_api_info_json(api_info): + dump_path = "./" + rank = api_info.rank + if isinstance(api_info, ForwardAPIInfo): + file_path = os.path.join(dump_path, f'forward_info_{rank}.json') + stack_file_path = os.path.join(dump_path, f'stack_info_{rank}.json') + write_json(file_path, api_info.api_info_struct) + write_json(stack_file_path, api_info.stack_info_struct, indent=4) + + elif isinstance(api_info, BackwardAPIInfo): + file_path = os.path.join(dump_path, f'backward_info_{rank}.json') + write_json(file_path, api_info.grad_info_struct) + else: + raise ValueError(f"Invalid api_info type {type(api_info)}") + + +def write_json(file_path, data, indent=None): + if not os.path.exists(file_path): + with FileOpen(file_path, 'w') as f: + f.write("{\n}") + change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) + lock.acquire() + with FileOpen(file_path, 'a+') as f: + fcntl.flock(f, fcntl.LOCK_EX) + try: + f.seek(0, os.SEEK_END) + f.seek(f.tell() - 1, os.SEEK_SET) + f.truncate() + if f.tell() > 3: + f.seek(f.tell() - 1, os.SEEK_SET) + f.truncate() + f.write(',\n') + f.write(json.dumps(data, indent=indent)[1:-1] + '\n}') + except Exception as e: + raise ValueError(f"Json save failed:{e}") from e + finally: + fcntl.flock(f, fcntl.LOCK_UN) + lock.release() + + +def initialize_output_json(): + dump_path = os.path.realpath("./") + files = ['forward_info.json', 'backward_info.json', 'stack_info.json'] + + forward_real_data_path = os.path.join(dump_path, 'forward_real_data') + if os.path.exists(forward_real_data_path): + raise ValueError(f"file {forward_real_data_path} already exists, please remove it first") + else: + os.mkdir(forward_real_data_path, mode=0o750) + + backward_real_data_path = os.path.join(dump_path, 'backward_real_data') + if os.path.exists(backward_real_data_path): + raise ValueError(f"file {backward_real_data_path} already exists, please remove it first") + else: + os.mkdir(backward_real_data_path, mode=0o750) + for file in files: + file_path = os.path.join(dump_path, file) + if os.path.exists(file_path): + raise ValueError(f"file {file_path} already exists, please remove it first or use a new dump path") \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py b/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py new file mode 100644 index 000000000..1231453b7 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py @@ -0,0 +1,190 @@ +import os +from pathlib import Path + +import torch + +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + +from atat.core.utils import print_warn_log, get_time, print_info_log +from ..dump.dump import forward_init_status, forward_acl_dump +from .utils import OverFlowUtil, dump_overflow, check_overflow_npu, clear_overflow_npu +from ..dump.utils import DumpUtil, Const, get_tensor_rank, create_dirs_if_not_exist, check_single_rank_folder +from .info_dump import write_api_info_json, ForwardAPIInfo, BackwardAPIInfo +from ..dump import dump +from atat.core.file_check_util import FileCheckConst + +backward_init_status = False +api_overflow = [] +forward_api_info = {} +backward_api_info = {} +FORWARD_REAL_DATA_PATH = os.path.join('./', 'forward_real_data') +BACKWARD_REAL_DATA_PATH = os.path.join('./', 'backward_real_data') +rank = os.getpid() +pkl_name = '' + + +def check_overflow_environment(pid): + if not OverFlowUtil.get_overflow_check_switch(): + return False + if pid != os.getpid(): + return False + if is_gpu: + print_warn_log("Overflow detection is not supported in the GPU environment.") + return False + global backward_init_status + if backward_init_status or forward_init_status: + return False + return True + + +def check_data_overflow(x): + if isinstance(x, (tuple, list)) and x: + for i, item in enumerate(x): + if True == check_data_overflow(item): + return True + return False + else: + if isinstance(x, torch.Tensor) and x.numel() != 0 and x.dtype != torch.bool: + if x.is_meta: + return False + if len(x.shape) == 0: + tensor_max = x.cpu().detach().float().numpy().tolist() + tensor_min = tensor_max + else: + tensor_max = torch._C._VariableFunctionsClass.max(x).cpu().detach().float().numpy().tolist() + tensor_min = torch._C._VariableFunctionsClass.min(x).cpu().detach().float().numpy().tolist() + # inf + if tensor_max == float('inf') or tensor_min == float('-inf'): + return True + if x.dtype in [torch.float16, torch.float32, torch.bfloat16] and \ + (tensor_max == torch.finfo(x.dtype).max or tensor_min == torch.finfo(x.dtype).min): + return True + # nan + elif tensor_max != tensor_max or tensor_min != tensor_min: + return True + else: + return False + elif isinstance(x, bool) or isinstance(x, int) or isinstance(x, float): + if x == float('inf') or x == float('-inf') or x != x: + return True + else: + return False + else: + return False + + +def check_path(apis, path): + return any(api in path for api in apis) + + +def overflow_check(name, **kwargs): + overflow_nums = OverFlowUtil.overflow_nums + pid = kwargs.get('pid') + dump_mode = DumpUtil.dump_switch_mode + if not pid: + return RuntimeError("Not get the specified process pid.") + + def overflowcheck_hook(module, in_feat, out_feat=None): + if not check_overflow_environment(pid): + return + dump_file = DumpUtil.get_dump_path() + global rank + dump_dir, dump_filename = os.path.split(dump_file) + dump_dir = os.path.join(dump_dir, "step{}".format(DumpUtil.iter_num)) + if not os.path.exists(dump_dir): + Path(dump_dir).mkdir(mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True) + if DumpUtil.is_single_rank is None: + DumpUtil.is_single_rank = check_single_rank_folder(dump_dir) + dump_file = os.path.join(dump_dir, dump_filename) + rank_this = get_tensor_rank(in_feat, out_feat) + DumpUtil.dump_root = os.path.dirname(DumpUtil.dump_path) + if rank_this is not None and rank != rank_this: + rank = rank_this + dump.rename_() + if DumpUtil.target_rank is not None: + if rank != DumpUtil.target_rank: + return + dump_path = create_dirs_if_not_exist(rank, dump_file) + global pkl_name + pkl_name = dump_path + dump_dir = os.path.split(dump_path)[0] + global api_overflow + global forward_api_info + global backward_api_info + + module_name = name + if hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan(): + # backward API endwith backward + if module_name.endswith(Const.BACKWARD): + check_feat = in_feat + else: + check_feat = out_feat + module.has_overflow = check_data_overflow(check_feat) + else: + module.has_overflow = check_overflow_npu() + if not module.has_overflow: + if hasattr(module, 'input_args'): + del module.input_args + if hasattr(module, 'input_kwargs'): + del module.input_kwargs + if module.has_overflow and OverFlowUtil.check_overflow_dump_times(overflow_nums): + if overflow_type_judge(in_feat, out_feat, module_name) and DumpUtil.need_replicate: + if module_name.endswith(Const.FORWARD): + forward_api_info.update({name: ForwardAPIInfo(name, True, module.input_args, module.input_kwargs)}) + api_overflow.append(module_name) + else: + api_overflow.append(module_name.replace("backward", "forward")) + backward_api_info.update({name: BackwardAPIInfo(name, out_feat)}) + OverFlowUtil.inc_overflow_dump_times() + dump_file_name = os.path.join(dump_dir, + "{}_{}.pkl".format(module_name, OverFlowUtil.real_overflow_dump_times)) + dump_overflow(module_name, in_feat, out_feat, dump_file_name) + dump.pkl_name = dump_file_name + + print_warn_log("[overflow {} times]: module name :'{}' is overflow and dump file is saved in '{}'." + .format(OverFlowUtil.real_overflow_dump_times, module_name, + os.path.realpath(dump_file_name))) + if dump_mode == "acl": + acl_dump(module, module_name) + dump.write_to_disk() + # clear overflow flag for the next check + clear_overflow_npu() + if not OverFlowUtil.check_overflow_dump_times(overflow_nums): + for key in forward_api_info: + write_api_info_json(forward_api_info[key]) + for key in backward_api_info: + write_api_info_json(backward_api_info[key]) + raise ValueError("[overflow {} times]: dump file is saved in '{}'." + .format(OverFlowUtil.real_overflow_dump_times, os.path.realpath(dump_file_name))) + + def overflow_type_judge(in_feat, out_feat, module_name): + if module_name.endswith(Const.BACKWARD): + check_feat = out_feat + else: + check_feat = in_feat + if check_data_overflow(check_feat): + print_warn_log("module name :'{}' is overflow and its inputs already has an overflow, so you need " + "to go back to find where the overflow started.".format(module_name)) + return False + elif not check_data_overflow(in_feat) and not check_data_overflow(out_feat): + print_warn_log("module name :'{}' is overflow and its inputs and outputs do not overflow, " + "so this is a process overflow".format(module_name)) + return False + else: + print_warn_log("module name :'{}' is overflow. Its input is normal and its output " + "is overflow.".format(module_name)) + return True + + def acl_dump(module, module_name): + if "forward" in module_name: + forward_acl_dump(module, module_name) + if "backward" in module_name: + print_info_log("The overflow is caused by backward operator {}. " + "You can use reverse acl dump(mode='acl') to get operator dump data.".format(module_name)) + + return overflowcheck_hook diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py b/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py new file mode 100644 index 000000000..481f717f6 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py @@ -0,0 +1,114 @@ +import os +import torch + +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + +from ..common.utils import Const +from atat.core.utils import check_switch_valid, check_inplace_op, OverflowConst +from ..dump.dump import dump_stack_info, get_scalar_data_info, dump_data_by_rank_count, \ + get_not_float_tensor_info, get_float_tensor_info +from ..dump.utils import DumpUtil, make_dump_data_dir + + +class OverFlowUtil(object): + overflow_check_switch = None + overflow_filter_switch = Const.OFF + real_overflow_dump_times = 0 + overflow_nums = 1 + + @staticmethod + def set_overflow_check_switch(switch, filter_switch): + OverFlowUtil.overflow_check_switch = switch + OverFlowUtil.overflow_filter_switch = filter_switch + + @staticmethod + def get_overflow_check_switch(): + if OverFlowUtil.overflow_check_switch is None: + return True + return OverFlowUtil.overflow_check_switch == "ON" + + @staticmethod + def inc_overflow_dump_times(): + OverFlowUtil.real_overflow_dump_times += 1 + + @staticmethod + def check_overflow_dump_times(need_dump_times): + if need_dump_times == -1: + return True + return OverFlowUtil.real_overflow_dump_times < need_dump_times + + +def set_overflow_check_switch(switch, filter_switch=Const.OFF): + check_switch_valid(switch) + check_switch_valid(filter_switch) + + OverFlowUtil.set_overflow_check_switch(switch, filter_switch) + + +def dump_overflow(module_name, in_feat, out_feat, dump_file): + name_template = f"{module_name}" + "_{}" + DumpUtil.dump_data_dir = make_dump_data_dir(dump_file) + dump_stack_info(name_template) + if check_inplace_op(name_template): + if Const.PRE_FORWARD in name_template: + name_template = name_template.replace(Const.PRE_FORWARD, Const.FORWARD) + else: + _dump_tensor_completely(in_feat, name_template.format("output")) + return + + if "forward" in name_template: + _dump_tensor_completely(in_feat, name_template.format("input")) + _dump_tensor_completely(out_feat, name_template.format("output")) + else: + _dump_tensor_completely(in_feat, name_template.format("output")) + _dump_tensor_completely(out_feat, name_template.format("input")) + + +def _dump_tensor_completely(x, prefix): + dump_flag = Const.DUMP_RATIO_MAX + 1 + if isinstance(x, (tuple, list)) and x: + for i, item in enumerate(x): + _dump_tensor_completely(item, "{}.{}".format(prefix, i)) + elif isinstance(x, torch.Tensor): + if x.numel() == 0 or len(x.shape) == 0 or not x.is_floating_point(): + if OverFlowUtil.overflow_filter_switch == Const.OFF: + data_info = get_not_float_tensor_info(x) + dump_data_by_rank_count(dump_flag, prefix, data_info) + else: + data_info = get_float_tensor_info(x) + dump_data_by_rank_count(dump_flag, prefix, data_info) + + elif OverFlowUtil.overflow_filter_switch == Const.OFF: + if isinstance(x, bool) or isinstance(x, int) or isinstance(x, float): + data_info = get_scalar_data_info(x) + dump_data_by_rank_count(dump_flag, prefix, data_info) + + +def overflow_debug_mode_enalbe(): + overflow_mode = os.getenv(OverflowConst.OVERFLOW_DEBUG_MODE_ENABLE, Const.ENV_DISABLE) + return overflow_mode == Const.ENV_ENABLE + + +def check_overflow_npu(): + if overflow_debug_mode_enalbe(): + float_status = torch.zeros(8).npu() + result = torch_npu.npu_get_float_status(float_status, OverflowConst.OVERFLOW_DEBUG_MODE) + if (result.cpu()[0] != 0): + return True + else: + return False + else: + return torch_npu._C._check_overflow_npu() + + +def clear_overflow_npu(): + if overflow_debug_mode_enalbe(): + float_status = torch.zeros(8).npu() + torch_npu.npu_clear_float_status(float_status, OverflowConst.OVERFLOW_DEBUG_MODE) + else: + torch_npu._C._clear_overflow_npu() \ No newline at end of file -- Gitee From efd842731fbde343ef240ed0e9d3ddda1e4d3791 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Sun, 5 May 2024 16:48:05 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E3=80=90=E7=B2=BE=E5=BA=A6=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E5=90=88=E4=B8=80=E3=80=91cleancode=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../accuracy_tools/atat/pytorch/hook_module/register_hook.py | 4 ++-- debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py | 2 +- .../atat/pytorch/hook_module/wrap_distributed.py | 2 +- .../atat/pytorch/hook_module/wrap_functional.py | 4 ++-- .../atat/pytorch/hook_module/wrap_npu_custom.py | 2 +- debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py | 2 +- debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py | 2 +- debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py | 2 +- debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py | 4 ++-- .../atat/pytorch/overflow_check/overflow_check.py | 2 +- debug/accuracy_tools/atat/pytorch/overflow_check/utils.py | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py b/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py index 821f482f9..e0f3340c1 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py @@ -23,11 +23,11 @@ import torch import torch.distributed as dist from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten +from atat.core.utils import check_file_or_directory_path, print_error_log, CompareException, Const, \ + print_info_log, print_warn_log, get_process_rank from .hook_module import HOOKModule from .api_registry import api_register from .wrap_functional import remove_dropout -from atat.core.utils import check_file_or_directory_path, print_error_log, CompareException, Const, \ - print_info_log, print_warn_log, get_process_rank from ..common.utils import torch_without_guard_version from ..dump.utils import make_dump_dirs, DumpUtil from ..overflow_check.utils import OverFlowUtil, clear_overflow_npu diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py index f9b6137d8..9b6c694be 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_aten.py @@ -20,9 +20,9 @@ import torch import yaml +from atat.core.file_check_util import FileOpen from .hook_module import HOOKModule from ..common.utils import torch_device_guard -from atat.core.file_check_util import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py index da3bff591..4579caaf1 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_distributed.py @@ -20,9 +20,9 @@ from functools import wraps import torch.distributed as dist import yaml +from atat.core.file_check_util import FileOpen from .hook_module import HOOKModule from ..common.utils import torch_device_guard, Const -from atat.core.file_check_util import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py index 3de281ae0..23d90789e 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_functional.py @@ -20,10 +20,10 @@ import os import torch import yaml -from .hook_module import HOOKModule -from ..common.utils import torch_device_guard from atat.core.utils import print_info_log from atat.core.file_check_util import FileOpen +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard def remove_dropout(): diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py index 5155a0c20..ea02096b1 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_npu_custom.py @@ -20,9 +20,9 @@ import torch import torch_npu import yaml +from atat.core.file_check_util import FileOpen from .hook_module import HOOKModule from ..common.utils import torch_device_guard, torch_without_guard_version -from atat.core.file_check_util import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py index 31dda087e..bd62b5930 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_tensor.py @@ -20,9 +20,9 @@ import os import torch import yaml +from atat.core.file_check_util import FileOpen from .hook_module import HOOKModule from ..common.utils import torch_device_guard, parameter_adapter -from atat.core.file_check_util import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py index 5ea8b5e15..9fd71e90b 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_torch.py @@ -20,9 +20,9 @@ import os import torch import yaml +from atat.core.file_check_util import FileOpen from .hook_module import HOOKModule from ..common.utils import torch_device_guard -from atat.core.file_check_util import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py index 18375587b..a4da7f174 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/wrap_vf.py @@ -20,9 +20,9 @@ import os import torch import yaml +from atat.core.file_check_util import FileOpen from .hook_module import HOOKModule from ..common.utils import torch_device_guard -from atat.core.file_check_util import FileOpen cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py b/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py index 55065bfa5..161e9f23f 100644 --- a/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py +++ b/debug/accuracy_tools/atat/pytorch/overflow_check/info_dump.py @@ -7,9 +7,9 @@ import json import numpy as np import torch -from ..common.utils import print_error_log -from atat.core.utils import get_time from atat.core.file_check_util import FileOpen, FileCheckConst, change_mode +from atat.core.utils import get_time +from ..common.utils import print_error_log special_torch_object = ["memory_format"] diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py b/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py index 1231453b7..f8f9926b6 100644 --- a/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py +++ b/debug/accuracy_tools/atat/pytorch/overflow_check/overflow_check.py @@ -10,13 +10,13 @@ except ImportError: else: is_gpu = False +from atat.core.file_check_util import FileCheckConst from atat.core.utils import print_warn_log, get_time, print_info_log from ..dump.dump import forward_init_status, forward_acl_dump from .utils import OverFlowUtil, dump_overflow, check_overflow_npu, clear_overflow_npu from ..dump.utils import DumpUtil, Const, get_tensor_rank, create_dirs_if_not_exist, check_single_rank_folder from .info_dump import write_api_info_json, ForwardAPIInfo, BackwardAPIInfo from ..dump import dump -from atat.core.file_check_util import FileCheckConst backward_init_status = False api_overflow = [] diff --git a/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py b/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py index 481f717f6..d254d5845 100644 --- a/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py +++ b/debug/accuracy_tools/atat/pytorch/overflow_check/utils.py @@ -8,8 +8,8 @@ except ImportError: else: is_gpu = False -from ..common.utils import Const from atat.core.utils import check_switch_valid, check_inplace_op, OverflowConst +from ..common.utils import Const from ..dump.dump import dump_stack_info, get_scalar_data_info, dump_data_by_rank_count, \ get_not_float_tensor_info, get_float_tensor_info from ..dump.utils import DumpUtil, make_dump_data_dir -- Gitee From fc77ba05bc82e9253b7036d775fe1b61b2c8deb3 Mon Sep 17 00:00:00 2001 From: h00613304 Date: Sun, 5 May 2024 17:01:34 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E3=80=90=E7=B2=BE=E5=BA=A6=E5=B7=A5?= =?UTF-8?q?=E5=85=B7=E5=90=88=E4=B8=80=E3=80=91cleancode=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py b/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py index e0f3340c1..7715bda67 100644 --- a/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py +++ b/debug/accuracy_tools/atat/pytorch/hook_module/register_hook.py @@ -22,9 +22,9 @@ from inspect import isfunction import torch import torch.distributed as dist -from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten from atat.core.utils import check_file_or_directory_path, print_error_log, CompareException, Const, \ print_info_log, print_warn_log, get_process_rank +from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten from .hook_module import HOOKModule from .api_registry import api_register from .wrap_functional import remove_dropout -- Gitee