diff --git a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py index 931dcae9f246d1ea264a915851ef0d793eb87d83..05b9a0eb62d7e7674dee8c0d879e4ad9eda5e9aa 100644 --- a/debug/accuracy_tools/api_accuracy_checker/dump/dump.py +++ b/debug/accuracy_tools/api_accuracy_checker/dump/dump.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ +import torch.distributed as dist from api_accuracy_checker.dump.api_info import ForwardAPIInfo, BackwardAPIInfo from api_accuracy_checker.dump.info_dump import write_api_info_json, initialize_output_json @@ -65,6 +66,14 @@ def pretest_info_dump(name, out_feat, module, phase): if not DumpUtil.get_dump_switch(): return if phase == DumpConst.forward: + if "Distributed" in name: + if module.input_kwargs.get("op"): + module.input_kwargs["op"] = module.input_kwargs["op"].name + if module.input_kwargs.get("group"): + if isinstance(module.input_kwargs["group"], dist.distributed_c10d.ProcessGroup): + module.input_kwargs["group"] = module.input_kwargs["group"].size() + else: + module.input_kwargs["group"] = dist.distributed_c10d.get_world_size() api_info = ForwardAPIInfo(name, module.input_args, module.input_kwargs) elif phase == DumpConst.backward: api_info = BackwardAPIInfo(name, out_feat) diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py index b355e029b6b74e2accc9241b42deebe31cb8e5ca..1a629e9ac0190984b665483c353986d7430bc204 100644 --- a/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/register_hook.py @@ -15,8 +15,18 @@ # limitations under the License. """ import torch +import torch.distributed as dist -from api_accuracy_checker.hook_module import wrap_torch, wrap_functional, wrap_tensor +from api_accuracy_checker.hook_module import wrap_torch, wrap_functional, wrap_tensor, wrap_distributed +from api_accuracy_checker.common.utils import torch_without_guard_version + +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + from . import wrap_npu_custom def initialize_hook(hook): @@ -35,3 +45,18 @@ def initialize_hook(hook): if attr_name.startswith("wrap_"): setattr(torch.nn.functional, attr_name[5:], getattr(wrap_functional.HOOKFunctionalOP, attr_name)) + wrap_distributed.wrap_distributed_ops_and_bind(hook) + for attr_name in dir(wrap_distributed.HOOKDistributedOP): + if attr_name.startswith("wrap_"): + setattr(dist, attr_name[5:], getattr(wrap_distributed.HOOKDistributedOP, attr_name)) + setattr(dist.distributed_c10d, attr_name[5:], getattr(wrap_distributed.HOOKDistributedOP, attr_name)) + if not is_gpu and not torch_without_guard_version: + setattr(torch_npu.distributed, attr_name[5:], getattr(wrap_distributed.HOOKDistributedOP, attr_name)) + setattr(torch_npu.distributed.distributed_c10d, attr_name[5:], + getattr(wrap_distributed.HOOKDistributedOP, attr_name)) + + if not is_gpu: + wrap_npu_custom.wrap_npu_ops_and_bind(hook) + for attr_name in dir(wrap_npu_custom.HOOKNpuOP): + if attr_name.startswith("wrap_"): + setattr(torch_npu, attr_name[5:], getattr(wrap_npu_custom.HOOKNpuOP, attr_name)) diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/api_accuracy_checker/hook_module/support_wrap_ops.yaml index c7ed0a1f81cf7b6b2e17ce0e6c37965567f5e42a..5ead4800c6bd386c0cc577e166054eb70bcd3b5a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/support_wrap_ops.yaml @@ -997,3 +997,70 @@ torch: - vstack - where - xlogy_ + +torch_npu: + - one_ + - npu_sort_v2 + - npu_transpose + - npu_broadcast + - npu_dtype_cast + - empty_with_format + - npu_one_hot + - npu_stride_add + - npu_ps_roi_pooling + - npu_roi_align + - npu_nms_v4 + - npu_iou + - npu_nms_with_mask + - npu_pad + - npu_bounding_box_encode + - npu_bounding_box_decode + - npu_batch_nms + - npu_slice + - _npu_dropout + - npu_indexing + - npu_ifmr + - npu_max + - npu_scatter + - npu_layer_norm_eval + - npu_alloc_float_status + - npu_confusion_transpose + - npu_bmmV2 + - fast_gelu + - npu_sub_sample + - npu_deformable_conv2d + - npu_mish + - npu_anchor_response_flags + - npu_yolo_boxes_encode + - npu_grid_assign_positive + - npu_normalize_batch + - npu_masked_fill_range + - npu_linear + - npu_bert_apply_adam + - npu_giou + - npu_ciou + - npu_diou + - npu_sign_bits_pack + - npu_sign_bits_unpack + - npu_flash_attention + - npu_scaled_masked_softmax + - npu_rotary_mul + - npu_roi_align + - npu_roi_alignbk + - npu_ptiou + - npu_fusion_attention + +distributed: + - send + - recv + - broadcast + - all_reduce + - reduce + - all_gather + - gather + - isend + - irecv + - scatter + - reduce_scatter + - _reduce_scatter_base + - _all_gather_base diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/utils.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/utils.py index 7d16ac993ed45faa0f9b48bb64050592e15ef4d2..dfa022db88703499aa6324adec7c690f532b7dda 100644 --- a/debug/accuracy_tools/api_accuracy_checker/hook_module/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/utils.py @@ -26,4 +26,6 @@ with FileOpen(yaml_path, 'r') as f: Ops = yaml.safe_load(f) WrapFunctionalOps = Ops.get('functional') WrapTensorOps = Ops.get('tensor') - WrapTorchOps = Ops.get('torch') \ No newline at end of file + WrapTorchOps = Ops.get('torch') + WrapDistributedOps = Ops.get('distributed') + WrapNpuOps = Ops.get('torch_npu') diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_distributed.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..b77f69225a801331df7b63fa8df8a60d848592de --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_distributed.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os + +import torch.distributed as dist +import yaml + +from api_accuracy_checker.hook_module.hook_module import HOOKModule +from api_accuracy_checker.common.utils import torch_device_guard +from api_accuracy_checker.common.config import msCheckerConfig +from api_accuracy_checker.hook_module.utils import WrapDistributedOps +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen + + +distributed_func = {} +for f in dir(dist): + distributed_func[f] = getattr(dist, f) + + +def get_distributed_ops(): + global WrapDistributedOps + _all_distributed_ops = dir(dist) + if msCheckerConfig.white_list: + return set(WrapDistributedOps) & set(_all_distributed_ops) & set(msCheckerConfig.white_list) + else: + return set(WrapDistributedOps) & set(_all_distributed_ops) + + +class HOOKDistributedOP(object): + pass + + +class DistributedOPTemplate(HOOKModule): + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "Distributed_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + return distributed_func.get(self.op_name_)(*args, **kwargs) + + +def wrap_distributed_op(op_name, hook): + def distributed_op_template(*args, **kwargs): + return DistributedOPTemplate(op_name, hook)(*args, **kwargs) + + return distributed_op_template + + +def wrap_distributed_ops_and_bind(hook): + _distributed_ops = get_distributed_ops() + for op_name in _distributed_ops: + setattr(HOOKDistributedOP, "wrap_" + str(op_name), wrap_distributed_op(op_name, hook)) diff --git a/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_npu_custom.py new file mode 100644 index 0000000000000000000000000000000000000000..342a1fd58edf75e71058eee1b32dc066f76314d4 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/hook_module/wrap_npu_custom.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2023. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os +import torch +import torch_npu +import yaml + +from api_accuracy_checker.hook_module.hook_module import HOOKModule +from api_accuracy_checker.common.utils import torch_device_guard, torch_without_guard_version +from api_accuracy_checker.common.config import msCheckerConfig +from api_accuracy_checker.hook_module.utils import WrapNpuOps +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen + +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") +with FileOpen(yaml_path, 'r') as f: + WrapNpuOps = yaml.safe_load(f).get('torch_npu') + + +def get_npu_ops(): + global WrapNpuOps + if torch_without_guard_version: + _npu_ops = dir(torch.ops.npu) + else: + _npu_ops = dir(torch_npu._C._VariableFunctionsClass) + + if msCheckerConfig.white_list: + return set(WrapNpuOps) & set(_npu_ops) & set(msCheckerConfig.white_list) + else: + return set(WrapNpuOps) & set(_npu_ops) + + +class HOOKNpuOP(object): + pass + + +class NpuOPTemplate(HOOKModule): + + def __init__(self, op_name, hook): + self.op_name_ = op_name + self.prefix_op_name_ = "NPU_" + str(op_name) + "_" + super().__init__(hook) + + @torch_device_guard + def forward(self, *args, **kwargs): + if torch_without_guard_version: + return getattr(torch.ops.npu, str(self.op_name_))(*args, **kwargs) + else: + return getattr(torch_npu._C._VariableFunctionsClass, str(self.op_name_))(*args, **kwargs) + + +def wrap_npu_op(op_name, hook): + + def npu_op_template(*args, **kwargs): + return NpuOPTemplate(op_name, hook)(*args, **kwargs) + + return npu_op_template + + +def wrap_npu_ops_and_bind(hook): + _npu_ops = get_npu_ops() + for op_name in _npu_ops: + setattr(HOOKNpuOP, "wrap_" + str(op_name), wrap_npu_op(op_name, hook)) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py index c6c273fdd4f58ac0b4253246497fe1f03aaa5f1d..107c4491946534ed50be27b638684c049f768471 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/run_ut.py @@ -142,6 +142,8 @@ def run_ut(config): [_, api_name, _] = api_full_name.split("*") if api_name not in set(msCheckerConfig.white_list): continue + if "Distributed" in api_full_name or "NPU" in api_full_name: + continue data_info = run_torch_api(api_full_name, api_setting_dict, config.backward_content, api_info_dict) is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, data_info.bench_out,