diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py index 78de5b5e6b448b597e798310a1bd7d25d26484be..4171801c0e455998404ba74b7660387d25189bc2 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/register_hook.py @@ -21,7 +21,7 @@ import os import torch import torch.distributed as dist -from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed +from . import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten from .hook_module import HOOKModule from .wrap_functional import remove_dropout from ..common.utils import check_file_or_directory_path, print_error_log, CompareException, Const, \ @@ -29,6 +29,8 @@ from ..common.utils import check_file_or_directory_path, print_error_log, Compar from ..dump.utils import make_dump_dirs, DumpUtil from ..overflow_check.utils import OverFlowUtil +torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' + try: import torch_npu except ImportError: @@ -66,19 +68,27 @@ def initialize_hook(hook): setattr(torch_npu.distributed.distributed_c10d, attr_name[5:], getattr(wrap_distributed.HOOKDistributedOP, attr_name)) + if torch_version_above_2: + wrap_aten.wrap_aten_ops_and_bind(hook) + for attr_name in dir(wrap_aten.HOOKAtenOP): + if attr_name.startswith("wrap_"): + setattr(torch.ops.aten, attr_name[5:], getattr(wrap_aten.HOOKAtenOP, attr_name)) + wrap_vf.wrap_vf_ops_and_bind(hook) for attr_name in dir(wrap_vf.HOOKVfOP): if attr_name.startswith("wrap_"): setattr(torch._VF, attr_name[5:], getattr(wrap_vf.HOOKVfOP, attr_name)) - + if not is_gpu: wrap_npu_custom.wrap_npu_ops_and_bind(hook) for attr_name in dir(wrap_npu_custom.HOOKNpuOP): if attr_name.startswith("wrap_"): setattr(torch_npu, attr_name[5:], getattr(wrap_npu_custom.HOOKNpuOP, attr_name)) + def add_clear_overflow(func, pid): first_module = True + def clear_overflow_wrapper(*args, **kwargs): child_pid = os.getpid() if pid != child_pid: @@ -88,6 +98,7 @@ def add_clear_overflow(func, pid): torch_npu._C._clear_overflow_npu() first_module = False return func(*args, **kwargs) + return clear_overflow_wrapper @@ -114,7 +125,7 @@ def register_hook_core(hook, **kwargs): if "overflow_check" in hook_name and not is_gpu: if hasattr(torch_npu._C, "_enable_overflow_npu"): torch_npu._C._enable_overflow_npu() - print_info_log("Enable overflow function success.") + print_info_log("Enable overflow function success.") else: print_warn_log("Api '_enable_overflow_npu' is not exist, " "the overflow detection function on milan platform maybe not work! " diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml index b0a4bd3cec3ac21974e7c79697b84e8cdee16907..613b1a29b45da2fc8578e0107339e169f3a2b7c2 100644 --- a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/support_wrap_ops.yaml @@ -1059,6 +1059,734 @@ torch_npu: - npu_roi_alignbk - npu_ptiou +aten: + - signbit + - logical_not_ + - _foreach_copy_ + - clamp + - hardswish_ + - arcsin_ + - logsumexp + - native_group_norm + - special_i1e + - bitwise_and + - new_full + - fft_ihfft + - _adaptive_avg_pool2d + - scatter_add + - abs + - selu + - exponential + - silu + - _native_batch_norm_legit_functional + - special_hermite_polynomial_h + - tanh_ + - log_sigmoid_forward + - _fft_c2c + - heaviside_ + - sigmoid_backward + - zeros_like + - as_strided_scatter + - trace + - _assert_async + - avg_pool2d_backward + - exp2 + - binary_cross_entropy_backward + - geometric + - fft_ihfftn + - smooth_l1_loss + - multiply + - __lshift__ + - binary_cross_entropy_with_logits + - _embedding_bag + - arange + - linalg_qr + - _embedding_bag_forward_only + - _unsafe_view + - remainder + - cholesky_inverse + - sub_ + - zero + - fix + - xlogy + - __doc__ + - rsqrt_ + - cummin + - __xor__ + - eye + - _fused_adam + - ceil + - nll_loss2d_backward + - replication_pad3d_backward + - fill_ + - logaddexp2 + - _thnn_fused_lstm_cell_backward_impl + - native_dropout + - fft_ifft + - expand + - _cdist_backward + - avg_pool3d_backward + - round_ + - topk + - max_unpool3d + - xlogy_ + - reflection_pad2d_backward + - addcdiv_ + - relu6 + - multilabel_margin_loss_forward + - prelu + - logaddexp + - _cholesky_solve_helper + - _foreach_addcdiv + - arctan_ + - fft_irfftn + - logical_or + - bitwise_or_ + - hardtanh_backward + - uniform + - less_equal + - _foreach_sub + - linalg_cholesky_ex + - hardswish + - fft_fft2 + - sign + - min + - norm + - asin + - addcmul_ + - stft + - col2im + - special_chebyshev_polynomial_u + - adaptive_max_pool3d + - __ilshift__ + - _resize_output + - gather + - lu_unpack + - native_batch_norm_backward + - sigmoid + - sqrt + - new_empty_strided + - _foreach_lerp_ + - mean + - scatter_add_ + - _fft_c2r + - rand_like + - true_divide_ + - gcd_ + - multinomial + - permute + - index_put_ + - arcsinh_ + - log1p_ + - index_add + - atan + - glu_backward + - searchsorted + - fill + - _unsafe_index + - index_reduce_ + - replication_pad2d + - expm1_ + - hardsigmoid + - addmm + - fft_fftn + - fft_ifftshift + - special_modified_bessel_k1 + - fft_rfft + - ge + - _adaptive_avg_pool2d_backward + - argmin + - linalg_lu_factor_ex + - atanh_ + - addmv + - _foreach_sqrt_ + - huber_loss_backward + - empty_like + - softshrink + - subtract_ + - bitwise_left_shift_ + - special_modified_bessel_i0 + - _nested_tensor_from_tensor_list + - slice_backward + - special_modified_bessel_i1 + - special_chebyshev_polynomial_t + - conj_physical + - _cdist_forward + - margin_ranking_loss + - max_pool3d_with_indices_backward + - _foreach_reciprocal_ + - lcm + - transpose_ + - cudnn_batch_norm_backward + - reciprocal + - copysign_ + - _foreach_pow + - rad2deg + - _foreach_sqrt + - negative + - replication_pad3d + - atanh + - _linalg_eigh + - igamma_ + - special_i0e + - linalg_ldl_factor_ex + - special_ndtri + - logit + - diagonal_copy + - triu + - silu_ + - polygamma + - square_ + - nextafter_ + - special_scaled_modified_bessel_k0 + - bitwise_not + - var + - mkldnn_rnn_layer_backward + - upsample_bilinear2d + - arctan2 + - clone + - arcsin + - new_ones + - soft_margin_loss + - nan_to_num + - huber_loss + - linalg_lu_solve + - elu_backward + - acosh + - __ior__ + - _unsafe_index_put + - __or__ + - _linalg_slogdet + - arcsinh + - select_scatter + - less_ + - reflection_pad1d + - istft + - reflection_pad2d + - diagonal_backward + - special_entr + - _softmax_backward_data + - randn + - celu + - embedding + - igammac_ + - new_zeros + - native_layer_norm_backward + - nonzero_static + - diagonal_scatter + - grid_sampler_2d + - smooth_l1_loss_backward + - _to_copy + - fft_irfft2 + - relu_ + - fmod + - log1p + - i0 + - mse_loss_backward + - copy + - special_laguerre_polynomial_l + - addmv_ + - quantized_gru + - diag_embed + - acos + - fmod_ + - linalg_cross + - mvlgamma_ + - _foreach_mul + - cummax + - less_equal_ + - ne + - to + - _pdist_forward + - special_xlog1py + - digamma + - lgamma + - mv + - softplus + - special_bessel_y1 + - pin_memory + - logical_xor_ + - cat + - grid_sampler_2d_backward + - frac_ + - dropout + - unsafe_chunk + - masked_fill_ + - log + - negative_ + - _scaled_dot_product_flash_attention + - _amp_foreach_non_finite_check_and_unscale_ + - randn_like + - add + - roll + - threshold + - gcd + - asinh + - round + - t_ + - unfold_backward + - scatter_reduce + - softplus_backward + - bitwise_right_shift_ + - pdist + - select_backward + - relu + - special_bessel_j1 + - asinh_ + - pow + - fft_fftshift + - clamp_max_ + - logical_xor + - index_reduce + - _foreach_add_ + - adaptive_max_pool2d + - adaptive_max_pool3d_backward + - tan + - addbmm_ + - cosh_ + - __rshift__ + - _foreach_maximum + - fft_ifftn + - special_spherical_bessel_j0 + - split_with_sizes + - divide_ + - neg_ + - nll_loss + - _euclidean_dist + - pairwise_distance + - _adaptive_avg_pool3d + - slice + - absolute_ + - gelu_backward + - arccos + - sin + - tril_ + - triu_ + - fft_irfft + - flip + - _foreach_sign + - linalg_householder_product + - _list_to_tensor + - cumprod + - randint_like + - item + - narrow_copy + - tanh + - linalg_vector_norm + - _cudnn_rnn + - _scaled_dot_product_efficient_attention + - _reshape_alias + - _linalg_det + - constant_pad_nd + - _linalg_svd + - sinh_ + - view + - nll_loss_backward + - greater + - sqrt_ + - avg_pool3d + - arctan + - le_ + - _pdist_backward + - _adaptive_avg_pool3d_backward + - log_ + - logical_or_ + - mse_loss + - rrelu_with_noise_backward + - _native_batch_norm_legit + - log10 + - scatter_ + - atan2_ + - greater_equal + - index_select + - __iand__ + - digamma_ + - eq + - divide + - cholesky_solve + - _prelu_kernel + - fft_ifft2 + - _foreach_neg_ + - alias + - erfc_ + - not_equal + - mul + - gru + - _dir + - glu + - clip + - lt + - rsqrt + - avg_pool2d + - conj_physical_ + - quantized_lstm + - erfinv_ + - log10_ + - float_power_ + - _functional_assert_async + - hardtanh + - logical_and_ + - _resize_output_ + - clamp_min + - _functional_sym_constrain_range_for_size + - _addmm_activation + - bucketize + - _thnn_fused_lstm_cell + - zeros + - reflection_pad1d_backward + - tan_ + - bitwise_not_ + - addmm_ + - absolute + - as_strided + - special_ndtr + - gt_ + - baddbmm + - special_log_ndtr + - hardshrink + - fft_hfft + - hypot + - native_layer_norm + - _scaled_dot_product_flash_attention_backward + - floor_divide + - is_same_size + - std + - floor_divide_ + - clamp_min_ + - _foreach_sign_ + - std_mean + - tanh_backward + - _foreach_addcmul + - binary_cross_entropy + - threshold_backward + - deg2rad_ + - masked_fill + - linspace + - reflection_pad3d + - mish + - index_copy + - scatter_reduce_ + - _sparse_coo_tensor_with_dims_and_tensors + - __loader__ + - _foreach_div_ + - cosh + - _foreach_maximum_ + - neg + - lift_fresh + - logspace + - selu_ + - leaky_relu_ + - matmul + - _foreach_sub_ + - bitwise_or + - unfold + - fmin + - convolution + - argmax + - maximum + - reflection_pad3d_backward + - fft_fft + - mode + - remainder_ + - _foreach_neg + - erf_ + - special_zeta + - index_add_ + - arccos_ + - lgamma_ + - unsqueeze_ + - gelu_ + - bmm + - _add_relu + - unfold_copy + - not_equal_ + - subtract + - true_divide + - max_pool2d_with_indices_backward + - _native_batch_norm_legit_no_training + - replication_pad1d + - name + - greater_ + - log_normal + - minimum + - alpha_dropout + - rnn_tanh + - _functional_sym_constrain_range + - sum + - _prelu_kernel_backward + - cumsum_ + - ne_ + - _linalg_solve_ex + - native_batch_norm + - igammac + - hypot_ + - exp + - leaky_relu + - new_empty + - cudnn_batch_norm + - resize_as_ + - mm + - triangular_solve + - sign_ + - clamp_max + - bitwise_right_shift + - logical_and + - special_i0 + - index_copy_ + - arctanh_ + - elu + - index + - isposinf + - linalg_solve_triangular + - logcumsumexp + - arccosh + - nan_to_num_ + - nll_loss_forward + - convolution_backward + - sub + - special_scaled_modified_bessel_k1 + - mish_ + - diagonal + - median + - tril + - sgn + - native_group_norm_backward + - stack + - take + - linalg_lu + - log2 + - hardsigmoid_ + - erfc + - max + - native_dropout_backward + - logit_ + - addr + - clip_ + - _foreach_minimum_ + - atan_ + - repeat + - cumprod_ + - bitwise_xor_ + - less + - index_put + - rrelu_with_noise + - addbmm + - special_bessel_y0 + - __and__ + - bernoulli_ + - uniform_ + - log2_ + - mul_ + - adaptive_max_pool2d_backward + - _foreach_addcmul_ + - slice_scatter + - isneginf + - pow_ + - renorm_ + - arccosh_ + - replication_pad1d_backward + - bitwise_and_ + - heaviside + - renorm + - special_modified_bessel_k0 + - le + - is_pinned + - __ixor__ + - leaky_relu_backward + - count_nonzero + - _fused_adam_ + - repeat_interleave + - upsample_bicubic2d + - rsub + - arctan2_ + - frac + - scalar_tensor + - rrelu_with_noise_ + - rot90 + - erf + - lerp_ + - expm1 + - full + - sym_constrain_range_for_size + - prod + - normal_ + - elu_ + - special_airy_ai + - nextafter + - split + - addcdiv + - fft_rfft2 + - max_pool3d_with_indices + - positive + - transpose + - mish_backward + - clamp_ + - exp_ + - _foreach_reciprocal + - linalg_matrix_exp + - unsqueeze + - upsample_nearest2d + - sinc_ + - select + - rad2deg_ + - trunc_ + - _make_dep_token + - nanmedian + - fft_hfftn + - hardtanh_ + - sym_constrain_range + - index_fill_ + - deg2rad + - rand + - sinc + - pixel_shuffle + - tril_indices + - copy_ + - _int_mm + - greater_equal_ + - celu_ + - div + - igamma + - exp2_ + - cos + - log_normal_ + - _log_softmax_backward_data + - im2col + - reciprocal_ + - amax + - broadcast_tensors + - erfinv + - __spec__ + - _fused_dropout + - special_hermite_polynomial_he + - aminmax + - rnn_relu + - meshgrid + - var_mean + - eq_ + - upsample_nearest3d + - dot + - zero_ + - floor_ + - fft_rfftn + - special_erfcx + - _foreach_div + - fft_hfft2 + - _upsample_bilinear2d_aa + - sort + - log_sigmoid_backward + - add_ + - copysign + - bernoulli + - special_bessel_j0 + - max_pool2d_with_indices + - _scaled_dot_product_efficient_attention_backward + - t + - _softmax + - arctanh + - hinge_embedding_loss + - hardswish_backward + - fmax + - multiply_ + - floor + - lstm + - i0_ + - cholesky + - where + - __irshift__ + - addcmul + - embedding_dense_backward + - sigmoid_ + - fix_ + - ormqr + - exponential_ + - __name__ + - fft_ihfft2 + - logical_not + - ones + - sgn_ + - sinh + - any + - _foreach_addcdiv_ + - asin_ + - gt + - lift + - squeeze + - grid_sampler_3d_backward + - atan2 + - _fft_r2c + - angle + - silu_backward + - acosh_ + - abs_ + - lerp + - special_i1 + - complex + - ceil_ + - _foreach_minimum + - hardsigmoid_backward + - upsample_nearest1d + - mvlgamma + - acos_ + - lt_ + - grid_sampler_3d + - max_unpool2d + - ones_like + - soft_margin_loss_backward + - _fused_moving_avg_obs_fq_helper + - isnan + - nansum + - baddbmm_ + - amin + - isinf + - bitwise_left_shift + - unsafe_split_with_sizes + - full_like + - sin_ + - bitwise_xor + - linalg_ldl_solve + - cos_ + - div_ + - polar + - randint + - trunc + - __package__ + - nll_loss2d_forward + - diag + - argsort + - _foreach_mul_ + - square + - detach + - affine_grid_generator + - _pin_memory + - geometric_ + - unbind + - randperm + - upsample_nearest2d_backward + - all + - threshold_ + - unsafe_split + - cauchy + - normal + - linalg_inv_ex + - multi_margin_loss + - cumsum + - gelu + - index_fill + - scatter + - mkldnn_rnn_layer + - ge_ + - dist + - _foreach_add + - logit_backward + - triu_indices + - lcm_ + - empty_strided + - replication_pad2d_backward + - cauchy_ + - _log_softmax + - vdot + distributed: - send - recv diff --git a/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8e33f225f2c76c30c61a97f580cb5e7f1ee84d --- /dev/null +++ b/debug/accuracy_tools/ptdbg_ascend/src/python/ptdbg_ascend/hook_module/wrap_aten.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2023-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os +import torch + +import yaml + +from .hook_module import HOOKModule +from ..common.utils import torch_device_guard + + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with open(yaml_path, 'r') as f: + WrapAtenOps = yaml.safe_load(f).get('aten') + + +aten_func = {} +for f in dir(torch.ops.aten): + aten_func[f] = getattr(torch.ops.aten, f) + + +def get_aten_ops(): + global WrapAtenOps + _all_aten_ops = dir(torch.ops.aten) + return set(WrapAtenOps) & set(_all_aten_ops) + + +class HOOKAtenOP(object): + pass + + +class AtenOPTemplate(HOOKModule): + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Aten_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + return aten_func.get(self.op_name_)(*args, **kwargs) + + +def wrap_aten_op(op_name, hook): + def aten_op_template(*args, **kwargs): + return AtenOPTemplate(op_name, hook)(*args, **kwargs) + + return aten_op_template + + +def wrap_aten_ops_and_bind(hook): + _aten_ops = get_aten_ops() + for op_name in _aten_ops: + setattr(HOOKAtenOP, "wrap_" + str(op_name), wrap_aten_op(op_name, hook))