diff --git a/.gitignore b/.gitignore index 2f15a00811101c8743f981fecb6976c7066fb941..3ecaac256b34a364f9b1ade1cddeae9ba2295475 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,7 @@ cython_debug/ att_advisor*.html *.xlsx operator_tuning_file*.cfg -.ipynb_checkpoints/ \ No newline at end of file +.ipynb_checkpoints/ + +# pycharm settings +.idea \ No newline at end of file diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..7d3e2b226bde101c5aeba46f5353cbf3c53549d0 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import argparse +import json +import os +import math +import numpy as np +import torch +try: + import torch_npu +except ImportError: + pass + +from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi + + +TENSOR_DATA_LIST = ["torch.Tensor"] +TORCH_BOOL_TYPE = ["torch.bool"] +TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", + "torch.int64", "torch.long"] +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", + "torch.float64", "torch.double"] +TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] + + +def check_json(json_path): + json_file = os.path.realpath(json_path) + with open(json_file) as f: + json_content = json.load(f) + if not isinstance(json_content, dict): + raise ValueError("content of json file is not a dictionary!") + if len(list(json_content.items())) > 1: + raise ValueError("json file has more than one API, only one API is allowed!") + (api_full_name, api_info_dict) = list(json_content.items())[0] + (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + if api_type not in ("Functional", "Tensor", "Torch"): + raise ValueError("type {0} of API is not supported!".format(api_type)) + return (api_full_name, api_info_dict) + + +def check_user_settings(cmd_args): + iter_t = cmd_args.iter_times + if iter_t <= 0: + raise ValueError("iter_times should be an integer bigger than zero!") + (api_full_name, api_info_dict) = check_json(cmd_args.forward_json_path) + return api_full_name, api_info_dict + + +def get_compare_standard(api_name): + if api_name in BinaryStandardApi: + return "CompareStandard.BINARY_EQUALITY_STANDARD" + if api_name in AbsoluteStandardApi: + return "CompareStandard.ABSOLUTE_THRESHOLD_STANDARD" + if api_name in ULPStandardApi: + return "CompareStandard.ULP_ERROR_STANDARD" + return "CompareStandard.BENCHMARK_STANDARD" + + +def get_settings(cmd_args): + ''' + internal_settings contain all information needed for the operator program. + keys: + api_full_name: api_type.api_name.ordinal_number + api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch + api_name: name of API + ordinal_number: how many times the same api has been called + direction_status: forward + random_seed: if mode is random_data, random seed is random_seed + iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter + args_element_assignment: code for args assignment + args_list_generator_device: code for generate args list on device + args_list_generator_bench: code for generate args list on bench + kwargs_value_assignment: code for kwargs assignment + kwargs_dict_generator_device: code for generate kwargs dict on device + kwargs_dict_generator_bench: code for generate kwargs dict on bench + ''' + api_full_name, api_info_dict = check_user_settings(cmd_args) + args_info = api_info_dict.get("args") + kwargs_info = api_info_dict.get("kwargs") + + internal_settings = {} + internal_settings["api_full_name"] = api_full_name + (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + if api_type == "Functional": + internal_settings["api_type"] = "torch.nn.functional" + elif api_type == "Tensor": + internal_settings["api_type"] = "torch.Tensor" + else: + internal_settings["api_type"] = "torch" + internal_settings["api_name"] = api_name + internal_settings["compare_standard"] = get_compare_standard(api_name) + internal_settings["ordinal_number"] = ordinal_number + internal_settings["direction_status"] = "forward" + internal_settings["random_seed"] = cmd_args.random_seed + if cmd_args.mode == "real_data": + internal_settings["iter_times"] = 1 + else: + internal_settings["iter_times"] = cmd_args.iter_times + internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info) + internal_settings["args_list_generator_device"] = generate_args_list_device(args_info) + internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info) + internal_settings["kwargs_value_assignment"] = generate_kwargs_value_assignment_code(kwargs_info) + internal_settings["kwargs_dict_generator_device"] = generate_kwargs_dict_device(kwargs_info) + internal_settings["kwargs_dict_generator_bench"] = generate_kwargs_dict_bench(kwargs_info) + return internal_settings + + +def recursive_args_element_assignment(args_info, name_number): + args_element_assignment = "" + for index, arg in enumerate(args_info): + if isinstance(arg, (list, tuple)): + new_args_element_assignment = recursive_args_element_assignment(arg, name_number + "_" + str(index)) + args_element_assignment += new_args_element_assignment + else: + arg["parameter_name"] = "arg" + name_number + "_" + str(index) + args_element_assignment += " " + "arg_info" + name_number + "_" + str(index) + " = " + "{}".format(str(arg)) + "\n" + args_element_assignment += " " + "arg" + name_number + "_" + str(index) + " = " + "generate_data(arg_info" + name_number + "_" + str(index) + ")" + "\n" + return args_element_assignment + + +def generate_args_element_assignment_code(args_info): + args_element_assignment = recursive_args_element_assignment(args_info, "") + return args_element_assignment + + +def recursive_args_list(args_info, flag_device=False, flag_bench=False): + args_list_generator = "" + for index, arg in enumerate(args_info): + if isinstance(arg, (list, tuple)): + (left_bracket, right_bracket) = ("[", "]") if isinstance(arg, list) else ("(", ")") + args_list_generator += left_bracket + new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) + args_list_generator += new_args_list_generator + args_list_generator += right_bracket + else: + args_list_generator += arg.get("parameter_name") + if arg.get("type") in TENSOR_DATA_LIST: + if flag_device: + args_list_generator += ".to(device)" + if flag_bench: + args_list_generator += '.to(torch.device("cpu"))' + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), " + arg.get("parameter_name") + ".dtype))" + args_list_generator += ", " + return args_list_generator + + +def generate_args_list_device(args_info): + args_list_generator_device = recursive_args_list(args_info, flag_device=True) + return args_list_generator_device + + +def generate_args_list_bench(args_info): + args_list_generator_bench = recursive_args_list(args_info, flag_bench=True) + return args_list_generator_bench + + +def recursive_kwargs_value_assignment(info, key_name, name_number): + kwargs_value_assignment = "" + if isinstance(info, dict): + if info.get("type") == "torch.device" or info.get("type") == "torch.dtype": + kwargs_value_assignment += " " + "kwarg_" + key_name + name_number + " = " + info.get("value") + else: + kwargs_value_assignment += " " + "kwarg_info_" + key_name + name_number + " = " + "{}".format(str(info)) + "\n" + kwargs_value_assignment += " " + "kwarg_" + key_name + name_number + " = " + "generate_data(kwarg_info_" + key_name + name_number + ")" + "\n" + info["parameter_name"] = "kwarg_" + key_name + name_number + else: + for index, arg in enumerate(info): + new_kwargs_value_assignment = recursive_kwargs_value_assignment(arg, key_name, name_number + "_" + str(index)) + kwargs_value_assignment += new_kwargs_value_assignment + return kwargs_value_assignment + + +def generate_kwargs_value_assignment_code(kwargs_info): + kwargs_value_assignment = "" + for key, value in kwargs_info.items(): + kwargs_value_assignment += recursive_kwargs_value_assignment(value, key, "") + return kwargs_value_assignment + + +def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): + kwargs_dict_generator = "" + if isinstance(info, dict): + kwargs_dict_generator += info.get("parameter_name") + if info.get("type") in TENSOR_DATA_LIST: + if flag_device: + kwargs_dict_generator += ".to(device)" + if flag_bench: + kwargs_dict_generator += '.to(torch.device("cpu"))' + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), " + info.get("parameter_name") + ".dtype))" + else: + (left_bracket, right_bracket) = ("[", "]") if isinstance(info, list) else ("(", ")") + kwargs_dict_generator += left_bracket + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += right_bracket + return kwargs_dict_generator + + +def generate_kwargs_dict_device(kwargs_info): + kwargs_dict_generator_device = "" + for key, value in kwargs_info.items(): + kwargs_dict_generator_device += '"' + key + '"' + ": " + kwargs_dict_generator_device += recursive_kwargs_dict(value, flag_device=True) + ", " + return kwargs_dict_generator_device + + +def generate_kwargs_dict_bench(kwargs_info): + kwargs_dict_generator_bench = "" + for key, value in kwargs_info.items(): + kwargs_dict_generator_bench += '"' + key + '"' + ": " + kwargs_dict_generator_bench += recursive_kwargs_dict(value, flag_bench=True) + ", " + return kwargs_dict_generator_bench + + +def op_generator_parser(parser): + parser.add_argument("-forward", "--forward_json_path", dest="forward_json_path", type=str, + help=" Path of forward API json file.", + required=True) + parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), + help=" Execute mode, should be random_data or real_data.", + required=True) + parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, + help=" If mode is random_data, it is random seed.", + required=False) + parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, + help=" If mode is random_data, generate iter_times group of data.", + required=False) + + +def main(): + parser = argparse.ArgumentParser() + op_generator_parser(parser) + cmd_args = parser.parse_args() + internal_settings = get_settings(cmd_args) + + template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") + operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) + + try: + with open(template_path, 'r') as ftemp, open(operator_script_path, 'w') as fout: + code_template = ftemp.read() + fout.write(code_template.format(**internal_settings)) + except OSError: + print(f"Failed to open file. Please check file {template_path} or {operator_script_path}.") + + print(f"Generate operator script successfully and the name is {operator_script_path}.") + + +if __name__ == "__main__": + main() diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template new file mode 100644 index 0000000000000000000000000000000000000000..7630839aa937c6d0419629b5e93c34b51b71f295 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -0,0 +1,325 @@ +import json +import os +import math +from enum import Enum, auto +import torch +try: + import torch_npu +except ImportError: + pass + + +TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] +TORCH_BOOL_TYPE = ["torch.bool"] +TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", + "torch.int64", "torch.long"] +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", + "torch.float64", "torch.double"] +TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] +RAISE_PRECISION = {{ + "torch.float16": torch.float32, + "torch.half": torch.float32, + "torch.bfloat16": torch.float32, + "torch.float32": torch.float64, + "torch.float": torch.float64 +}} + + +class CompareStandard(Enum): + BINARY_EQUALITY_STANDARD = auto() + ABSOLUTE_THRESHOLD_STANDARD = auto() + ULP_ERROR_STANDARD = auto() + BENCHMARK_STANDARD = auto() + + +def get_device(): + if torch.cuda.is_available(): + device = torch.device("cuda") + elif torch_npu.npu.is_available(): + device = torch.device("npu") + else: + raise Exception("Error: This device is not NPU or GPU!") + return device + + +def generate_bool_tensor(low, high, shape): + low, high = int(low), int(high) + tensor = torch.randint(low, high + 1, shape) + bool_tensor = torch.gt(tensor, 0) + return bool_tensor + + +def generate_numerical_tensor(low, high, shape, data_dtype): + if data_dtype in TORCH_FLOAT_TYPE: + scale = high - low + rand01 = torch.rand(shape, dtype=eval(data_dtype)) + tensor = rand01 * scale + low + elif data_dtype in TORCH_INT_TYPE: + low, high = int(low), int(high) + tensor = torch.randint(low, high + 1, shape, dtype=eval(data_dtype)) + else: + raise NotImplementedError(f"{{data_dtype}} is not supported!") + if torch.numel(tensor) == 0: + return tensor + tmp_tensor = tensor.reshape(-1) + tmp_tensor[0] = low + tmp_tensor[-1] = high + data = tmp_tensor.reshape(shape) + return data + + +def generate_random_tensor(info): + low, high = info.get('Min'), info.get('Max') + data_dtype = info.get('dtype') + shape = tuple(info.get('shape')) + if data_dtype == "torch.bool": + data = generate_bool_tensor(low, high, shape) + else: + data = generate_numerical_tensor(low, high, shape, data_dtype) + return data + + +def generate_real_tensor(data_path): + data_path = os.path.realpath(data_path) + data = torch.load(data_path) + return data + + +def generate_data(info): + data_type = info.get("type") + data_path = info.get("datapath") + if data_type in TENSOR_DATA_LIST: + if data_path: + data = generate_real_tensor(data_path) + else: + data = generate_random_tensor(info) + else: + data = info.get("value") + return data + + +def get_input(): +{args_element_assignment} + args_device = [{args_list_generator_device}] + args_bench = [{args_list_generator_bench}] +{kwargs_value_assignment} + kwargs_device = {{{kwargs_dict_generator_device}}} + kwargs_bench = {{{kwargs_dict_generator_bench}}} + return args_device, kwargs_device, args_bench, kwargs_bench + + +def exec_api_device(args, kwargs): + output_device = {api_type}.{api_name}(*args, **kwargs) + return output_device + + +def exec_api_bench(args, kwargs): + output_bench = {api_type}.{api_name}(*args, **kwargs) + return output_bench + + +def compute_inf_nan_proportion(inf_nan_mask, out_device, out_bench, abs_bench_with_eps, rtol): + out_bench = out_bench.to(out_device.dtype) + min = torch.finfo(out_device.dtype).min + max = torch.finfo(out_device.dtype).max + bench_clip = torch.clamp(out_bench, min=min, max=max) + device_clip = torch.clamp(out_device, min=min, max=max) + clipped_abs_ae = torch.abs(device_clip - bench_clip) + clipped_re = clipped_abs_ae / abs_bench_with_eps + pass_mask = torch.less_equal(clipped_re, rtol) + both_nan_mask = torch.logical_and(torch.isnan(out_device), torch.isnan(bench_clip)) + pass_mask = torch.logical_or(pass_mask, both_nan_mask) + not_pass_mask = torch.logical_not(pass_mask) + not_pass_mask = torch.logical_and(not_pass_mask, inf_nan_mask) + inf_nan_err_cnt = torch.sum(not_pass_mask) + return 0 if torch.sum(inf_nan_mask) == 0 else inf_nan_err_cnt / torch.sum(inf_nan_mask) + + +def compute_rmse(abs_err, normal_value_mask): + if torch.sum(normal_value_mask) == 0: + return 0 + else: + masked_ae = torch.where(normal_value_mask, abs_err, 0) + mse = torch.sum(torch.square(masked_ae)) / torch.sum(normal_value_mask) + rmse = torch.sqrt(mse) + return rmse + + +def compute_error_balance(out_device, out_bench): + larger_count = torch.sum(torch.greater(out_device - out_bench.to(out_device.dtype), 0)) + smaller_count = torch.sum(torch.less(out_device - out_bench.to(out_device.dtype), 0)) + total_count = torch.numel(out_bench) + error_balance = abs(larger_count - smaller_count) / total_count + return error_balance + + +def compare_tensor(out_device, out_bench, api_name): + if out_device.shape != out_bench.shape: + print("ERROR: shape of out_device and out_bench is not equal!") + return None + if torch.numel(out_bench) == 0: + print("Both out_device and out_bench have zero elements.") + return None + print(f"shape is {{out_bench.shape}}") + print(f"dtype of out_device is {{out_device.dtype}}") + print(f"dtype of out_bench is {{out_bench.dtype}}") + dtype_device = out_device.dtype + dtype_bench = out_bench.dtype + if str(dtype_device) in TORCH_FLOAT_TYPE and str(dtype_bench) in TORCH_FLOAT_TYPE \ + or str(dtype_device) in TORCH_INT_TYPE and str(dtype_bench) in TORCH_INT_TYPE \ + or str(dtype_device) in TORCH_BOOL_TYPE and str(dtype_bench) in TORCH_BOOL_TYPE: + out_device = out_device.to(torch.device("cpu")) + if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: + print("compare standard: binary equality standard:") + error_number = torch.sum(out_device != out_bench).item() + error_rate = error_number / torch.numel(out_bench) + print(f"error rate is {{error_rate}}.") + else: + abs_err = torch.abs(out_device - out_bench) + abs_bench = torch.abs(out_bench) + if dtype_bench == torch.float32: + eps = 2 ** -23 + if dtype_bench == torch.float64: + eps = 2 ** -52 + abs_bench_with_eps = abs_bench + eps + rel_err = torch.abs(abs_err / abs_bench_with_eps) + device_finite_mask = torch.isfinite(out_device) + bench_finite_mask = torch.isfinite(out_bench.to(dtype_device)) + both_finite_mask = torch.logical_and(device_finite_mask, bench_finite_mask) + inf_nan_mask = torch.logical_not(both_finite_mask) + if compare_standard == CompareStandard.ABSOLUTE_THRESHOLD_STANDARD: + if dtype_device == torch.float16: + rtol, small_value, small_value_atol = 1.0e-3, 1.0e-3, 1.0e-5 + elif dtype_device == torch.bfloat16: + rtol, small_value, small_value_atol = 4.0e-3, 1.0e-3, 1.0e-5 + else: + rtol, small_value, small_value_atol = 1.0e-6, 1.0e-6, 1.0e-9 + small_value_mask = torch.less_equal(abs_bench, small_value) + small_value_mask = torch.logical_and(small_value_mask, both_finite_mask) + normal_value_mask = torch.logical_and(both_finite_mask, torch.logical_not(small_value_mask)) + inf_nan_proportion = compute_inf_nan_proportion(inf_nan_mask, out_device, out_bench, abs_bench_with_eps, rtol) + rel_err_mask = torch.greater(rel_err, rtol) + rel_err_mask = torch.logical_and(rel_err_mask, normal_value_mask) + if torch.sum(normal_value_mask) == 0: + rel_err_proportion = 0 + else: + rel_err_proportion = torch.sum(rel_err_mask) / torch.sum(normal_value_mask) + abs_err_mask = torch.greater(abs_err, small_value_atol) + abs_err_mask = torch.logical_and(abs_err_mask, small_value_mask) + if torch.sum(small_value_mask) == 0: + abs_err_proportion = 0 + else: + abs_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) + print("compare standard: absolute threshold standard") + print(f"relative error ratio is {{rel_err_proportion}}") + print(f"absolute error ratio is {{abs_err_proportion}}") + elif compare_standard == CompareStandard.ULP_ERROR_STANDARD: + if dtype_device == torch.float16: + min_eb, exponent_num = -14, 10 + elif dtype_device == torch.bfloat16: + min_eb, exponent_num = -126, 7 + else: + min_eb, exponent_num = -126, 23 + eb = torch.where(abs_bench == 0, torch.zeros(out_bench.shape), torch.floor(torch.log2(abs_bench))) + eb = torch.maximum(eb, min_eb * torch.ones(out_bench.shape)) + if dtype_device == torch.float32: + ulp_err = (out_device.to(torch.float64) - out_bench).to(torch.float64) * torch.exp2(-eb + exponent_num).to(torch.float64) + else: + ulp_err = (out_device.to(torch.float32) - out_bench).to(torch.float32) * torch.exp2(-eb + exponent_num).to(torch.float32) + ulp_err = torch.abs(ulp_err) + max_ulp_err = torch.max(ulp_err) + mean_ulp_err = torch.mean(ulp_err) + if dtype_device == torch.float32: + ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) + else: + ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) + print("compare standard: ulp error standard") + print(f"maximum ulp error is {{max_ulp_err}}") + print(f"mean ulp error is {{mean_ulp_err}}") + print(f"ulp error proportion is {{ulp_err_proportion}}") + else: + if dtype_device == torch.float16: + small_value, small_value_atol = 1.0e-3, 1.0e-5 + elif dtype_device == torch.bfloat16: + small_value, small_value_atol = 1.0e-3, 1.0e-5 + else: + small_value, small_value_atol = 1.0e-6, 1.0e-9 + small_value_mask = torch.less_equal(abs_bench, small_value) + small_value_mask = torch.logical_and(small_value_mask, both_finite_mask) + normal_value_mask = torch.logical_and(both_finite_mask, torch.logical_not(small_value_mask)) + abs_err_mask = torch.greater(abs_err, small_value_atol) + abs_err_mask = torch.logical_and(abs_err_mask, small_value_mask) + if torch.sum(small_value_mask) == 0: + small_value_err_proportion = 0 + else: + small_value_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) + rel_err = torch.where(normal_value_mask, rel_err, -1 * torch.ones(out_device.shape)) + if torch.max(rel_err) >= 0: + max_rel_err = torch.max(rel_err) + else: + max_rel_err = 0 + if torch.sum(normal_value_mask) == 0: + mean_rel_err = 0 + else: + mean_rel_err = torch.sum(torch.clamp(rel_err, min=0)) / torch.sum(normal_value_mask) + rmse = compute_rmse(abs_err, normal_value_mask) + error_balance = compute_error_balance(out_device, out_bench) + print("compare standard: benchmark standard") + print(f"small value error proportion is {{small_value_err_proportion}}") + print(f"maximum relative error is {{max_rel_err}}") + print(f"mean relative error is {{mean_rel_err}}") + print(f"root mean squared error is {{rmse}}") + print(f"error balance is {{error_balance}}") + else: + print(f"ERROR: out_device dtype is {{dtype_device}}, out_bench dtype is {{dtype_bench}}, not comparable.") + return None + + +def compare_element(out_device, out_bench, api_name): + if type(out_device) != type(out_bench): + print("ERROR: out_device and out_bench is not the same type!") + return None + if isinstance(out_bench, torch.Tensor): + print(f"data type: {{type(out_bench)}}") + compare_tensor(out_device, out_bench, api_name) + elif isinstance(out_bench, (bool, int, float, str)): + print(f"data type: {{type(out_bench)}}") + if out_device == out_bench: + print("PASS: out_device and out_bench equals.") + else: + print("ERROR: out_device and out_bench is not equal!") + else: + print(f"ERROR: comparison of type {{type(out_bench)}} is not supported.") + return None + + +def compare(out_device, out_bench, api_name): + print("Compare result:") + if type(out_device) != type(out_bench): + print("ERROR: out_device and out_bench is not the same type!") + print("Compare finished.") + return None + if isinstance(out_bench, (list, tuple)): + print(f"data type: {{type(out_bench)}}") + if len(out_device) != len(out_bench): + print("ERROR: len of out_device and out_bench is different!") + print("Compare finished.") + return None + for index, _ in enumerate(out_bench): + print(f"index {{index}}:") + compare_element(out_device[index], out_bench[index], api_name) + else: + compare_element(out_device, out_bench, api_name) + print("Compare finished.") + + +device = get_device() +api_name = "{api_name}" +compare_standard = {compare_standard} +torch.manual_seed({random_seed}) +for i in range({iter_times}): + print(f"iter: {{i}}:") + args_device, kwargs_device, args_bench, kwargs_bench = get_input() + output_device = exec_api_device(args_device, kwargs_device) + output_bench = exec_api_bench(args_bench, kwargs_bench) + compare(output_device, output_bench, api_name) diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/stack_blacklist.yaml b/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/stack_blacklist.yaml index bde045f22da06e6f721e1e1d06cfc4877db54b0d..f8ca4ba88e9e503792296a400373f243e5c73f20 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/stack_blacklist.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/stack_blacklist.yaml @@ -1,5 +1,9 @@ stack: +<<<<<<<< HEAD:debug/accuracy_tools/monitor/monitor/distributed/stack_blacklist.yaml +- monitor/distributed +======== - msprobe/pytorch/monitor/distributed +>>>>>>>> master:debug/accuracy_tools/msprobe/pytorch/monitor/distributed/stack_blacklist.yaml - site-packages/torch/nn/modules/module.py - multiprocessing - debugpy \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py b/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py index cea499668537cd0c599231cbf7cfa72f82020100..499a75aa80d4fcbf05fae6cafe42179a57a54777 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py @@ -17,6 +17,10 @@ import os import re import inspect +<<<<<<<< HEAD:debug/accuracy_tools/monitor/monitor/distributed/wrap_distributed.py +import yaml +======== +>>>>>>>> master:debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py import torch import torch.nn as nn import torch.distributed as dist @@ -103,10 +107,38 @@ class ApiRegistry: if args[0] in PENDING_ASYNC_CC_BY_HANDLE: store_func = PENDING_ASYNC_CC_BY_HANDLE.pop(args[0]) store_func() +<<<<<<<< HEAD:debug/accuracy_tools/monitor/monitor/distributed/wrap_distributed.py + return wrapped_wait + + dist.Work.wait = wrapped_wait(dist.Work) + + def redirect_api(self): + self.set_api_attr(dist, self.distributed_attr_hooked) + self.set_api_attr(dist.distributed_c10d, self.distributed_attr_hooked) + self.redirect_wait() + + def restore_api(self): + self.set_api_attr(dist, self.distributed_attr_origin) + self.set_api_attr(dist.distributed_c10d, self.distributed_attr_origin) + setattr(dist.Work, 'wait', ORIGIN_WAIT) + + def initialize_hook(self, pre_hooks, post_hooks): + self.store_ori_attr(dist, get_distributed_ops(), self.distributed_attr_origin) + for op_name in get_distributed_ops(): + self.distributed_attr_hooked[op_name] = DistributedOPTemplate(op_name, pre_hooks, post_hooks) + +def get_process_group(process_group): + return ( + process_group + if isinstance(process_group, dist.ProcessGroup) + else dist.GroupMember.WORLD + ) +======== return wrapped_wait dist.Work.wait = wrapped_wait(dist.Work) +>>>>>>>> master:debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py def redirect_api(self): self.set_api_attr(dist, self.distributed_attr_hooked) @@ -255,7 +287,11 @@ def create_hooks(context, monitor): RANK = dist.get_rank() if dist.is_initialized() and RANK not in monitor.module_rank_list and monitor.module_rank_list != []: return [pre_hooks, hooks] +<<<<<<<< HEAD:debug/accuracy_tools/monitor/monitor/distributed/wrap_distributed.py + +======== +>>>>>>>> master:debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py if monitor.cc_log_only: pre_hooks.append(cc_log_hook) return [pre_hooks, hooks] diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/features.py b/debug/accuracy_tools/msprobe/pytorch/monitor/features.py index 2035f75f1faf34300fd96f5896c758abe4b35871..a571f2da8bda328f5f0bf8785879c7f878583ec2 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/features.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/features.py @@ -15,7 +15,11 @@ import torch from torch.autograd.functional import jacobian +<<<<<<<< HEAD:debug/accuracy_tools/monitor/monitor/features.py +from monitor.utils import print_info_log +======== from msprobe.core.common.log import logger +>>>>>>>> master:debug/accuracy_tools/msprobe/pytorch/monitor/features.py @torch.no_grad() @@ -33,6 +37,10 @@ def get_mean(x: torch.tensor): return torch.mean(x) +@torch.no_grad() +def get_mean(x: torch.tensor): + return torch.mean(x) + @torch.no_grad() def get_norm(x: torch.tensor): return torch.norm(x, p=2) @@ -42,7 +50,6 @@ def get_norm(x: torch.tensor): def get_max(x: torch.tensor): return torch.max(x) - @torch.no_grad() def get_zeros(x: torch.tensor, eps: float): return torch.sum(torch.abs(x) < eps) / x.numel() diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/visualizer.py b/debug/accuracy_tools/msprobe/pytorch/monitor/visualizer.py index 525ed5317c3ce2ca3cece9326914f60b068cc7be..49404c09aee911a2bd02fd3bf8fb6c33583d67e7 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/visualizer.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/visualizer.py @@ -16,7 +16,11 @@ import torch import numpy as np import matplotlib.pyplot as plt +<<<<<<<< HEAD:debug/accuracy_tools/monitor/monitor/visualizer.py +from monitor.features import cal_histc +======== from msprobe.pytorch.monitor.features import cal_histc +>>>>>>>> master:debug/accuracy_tools/msprobe/pytorch/monitor/visualizer.py class HeatmapVisualizer: diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..f623a48ae3b9607103b4af63bd8838d3d13c8a0b --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..graph.graph import Graph +from ..graph.node_op import NodeOp +from ..utils import load_json_file, load_data_json_file, save_json_file, GraphConst +from .msprobe_adapter import get_input_output + + +class GraphBuilder: + @staticmethod + def build(construct_path, data_path, model_name='DefaultModel'): + """ + GraphBuilder的对外提供的构图方法 + Args: + construct_path: construct.json路径 + data_path: dump.json路径 + model_name: 模型名字,依赖外部输入 + Returns: Graph,代表图的数据结构 + """ + construct_dict = load_json_file(construct_path) + data_dict = load_data_json_file(data_path) + graph = Graph(model_name) + GraphBuilder._init_nodes(graph, construct_dict, data_dict) + return graph + + @staticmethod + def to_json(filename, graph_n, graph_b=None, tool_tip=None): + """ + 将graph导出成.vis文件的接口 + Args: + filename: 输出文件路径 + graph_n: Graph + graph_b: bench Graph,为空是只输出graph_b,不为空会同时输出两个graph,作为对比的结果 + tool_tip: 在对比模型下输出的意见 + """ + result = {} + if graph_b: + result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict() + result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict() + else: + result = graph_n.to_dict() + if tool_tip: + result[GraphConst.JSON_TIP_KEY] = tool_tip + save_json_file(filename, result) + + @staticmethod + def _init_nodes(graph, construct_dict, data_dict): + for subnode_id, upnode_id in construct_dict.items(): + if upnode_id: + upnode_op = NodeOp.get_node_op(upnode_id) + upnode = GraphBuilder._create_or_get_node(graph, data_dict, upnode_op, upnode_id) + else: + upnode = graph.root + node_op = NodeOp.get_node_op(subnode_id) + GraphBuilder._create_or_get_node(graph, data_dict, node_op, subnode_id, upnode) + + @staticmethod + def _create_or_get_node(graph, data_dict, op, name, upnode=None): + if name in graph.node_map: + node = graph.get_node(name) + else: + graph.add_node(op, name, upnode) + node = graph.get_node(name) + node_data = data_dict.get(name, {}) + # 添加输入输出数据 + input_data, output_data = get_input_output(node_data, node.id) + # 更新数据 + node.set_input_output(input_data, output_data) + # 添加节点 + node.add_upnode(upnode) + return node \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..7ea0dfabedf7c482975094abdd981baa1afeb44e --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, _do_multi_process +from ....core.common.utils import task_dumppath_get +from ..utils import GraphConst + + +# 用于将节点名字解析成对应的NodeOp的规则 +op_patterns = [ + r'^(Module)', #NodeOp.module + r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' #NodeOp.function_api +] + + +def get_compare_mode(dump_path_param): + """ + 获得比较模式,包括summary、MD5和真实数据三种模式 + Args: + dump_path_param: 调用acc_compare接口所依赖的参数 + Returns: 0 summary mode, 1 md5 mode, 2 true data mode + """ + summary_compare, md5_compare = task_dumppath_get(dump_path_param) + if summary_compare: + compare_mode = GraphConst.SUMMARY_COMPARE + elif md5_compare: + compare_mode = GraphConst.MD5_COMPARE + else: + compare_mode = GraphConst.REAL_DATA_COMPARE + return compare_mode + + +def run_real_data(dump_path_param, csv_path): + """ + 多进程运行生成真实数据 + Args: + dump_path_param: 调用acc_compare接口所依赖的参数 + csv_path: 生成文件路径 + """ + return _do_multi_process(dump_path_param, csv_path) + + +def get_input_output(node_data, node_id): + """ + 将dump的原始数据进行拆解,分解为output和input两个数据 + Args: + node_data: 属于单个节点的dump数据 + node_id: 节点名字 + """ + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node_id) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if not full_op_name: + continue + splits = full_op_name.split('.') + if len(splits) <= GraphConst.OUTPUT_INDEX: + continue + if 'output' in splits[GraphConst.OUTPUT_INDEX]: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + return input_data, output_data + + +def compare_data(data_dict_list1, data_dict_list2): + """ + 比较get_input_output中输出的结果是否结构一致,比较一致返回True + """ + if len(data_dict_list1) != len(data_dict_list2): + return False + # 用于比较两个节点是否相等的关键字段 + tag_keys = ['type', 'dtype', 'shape'] + for key1, key2 in zip(data_dict_list1, data_dict_list2): + dict1 = data_dict_list1[key1] + dict2 = data_dict_list2[key2] + for tag_key in tag_keys: + tag_value1 = dict1.get(tag_key, None) + tag_value2 = dict2.get(tag_key, None) + if tag_value1 != tag_value2: + return False + return True + + +def format_node_data(data_dict): + """ + 批量进行节点数据的输出 + """ + del_list = ['requires_grad', 'data_name', 'full_op_name'] + for _, value in data_dict.items(): + if not isinstance(value, dict): + continue + for item in del_list: + if item in value: + del value[item] + _format_data(value) + return data_dict + + +def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_md5_compare): + """ + 调用acc_compare.py中的get_accuracy获得精度对比指标 + 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + merge_n = _parse_node(node_ids[0], data_dicts[0], stack_json_data, is_summary_compare, is_md5_compare) + merge_b = _parse_node(node_ids[1], data_dicts[1], stack_json_data, is_summary_compare, is_md5_compare) + result = [] + get_accuracy(result, merge_n, merge_b, is_summary_compare, is_md5_compare) + return result + + +def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_compare): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node_id, {}), node_id) + if node_id in stack_json_data: + op_parsed_list.append( + {'full_op_name': node_id, 'full_info': stack_json_data[node_id]}) + else: + op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) + result = merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + if not result: + result['op_name'] = [] + return result + + +def _format_decimal_string(s): + """ + 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + """ + pattern = re.compile(r'\d{1,20}\.\d{1,20}%?') + matches = pattern.findall(s) + for match in matches: + is_percent = match.endswith('%') + number_str = match.rstrip('%') + decimal_part = number_str.split('.')[1] + # 如果小数位数大于6,进行处理 + if len(decimal_part) > GraphConst.ROUND_TH: + number_float = float(number_str) + formatted_number = f"{number_float:.{GraphConst.ROUND_TH}f}" + # 如果原来是百分数,加回百分号 + if is_percent: + formatted_number += '%' + # 替换原字符串中的数值部分 + s = s.replace(match, formatted_number) + return s + + +def _format_data(data_dict): + """ + 格式化数据,小数保留6位,处理一些异常值 + """ + pattern = r'^[+-]?(\d+(.\d*)?|.\d+)([eE][+-]?\d+)$' + for key, value in data_dict.items(): + if isinstance(value, str): + # 将单引号删掉,None换成null避免前端解析错误 + value = value.replace("'", "").replace('None', 'null') + value = _format_decimal_string(value) + elif value is None or value == ' ': + value = 'null' + # 科学计数法1.123123123123e-11,格式化为1.123123e-11 + elif isinstance(value, float) and len(str(value)) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): + value = "{:.6e}".format(value) + elif isinstance(value, float): + value = round(value, GraphConst.ROUND_TH) + # Inf会走入这里,确保转成Inf。另外给其他不符合预期的类型做兜底方案 + if not isinstance(value, (list, tuple, dict, str)): + value = str(value) + data_dict[key] = value diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..3d5f2972468adab8a436167d2f50eab9ace05873 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py @@ -0,0 +1,104 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data +from ..utils import GraphConst, load_json_file, load_data_json_file, get_csv_df +from ..graph.graph import Graph +from .mode_adapter import ModeAdapter + + +class GraphComparator: + def __init__(self, graphs, data_paths, stack_path, output_path): + self.graph_n = graphs[0] + self.graph_b = graphs[1] + self._parse_param(data_paths, stack_path, output_path) + + def compare(self): + """ + 比较函数,初始化结束后单独调用。比较结果写入graph_n + """ + self._compare_nodes(self.graph_n.root) + self._postcompare() + + def add_compare_result_to_node(self, node, compare_result_list): + """ + 将比对结果添加到节点的输入输出数据中 + Args: + node: 节点 + compare_result_list: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + # 真实数据比对,先暂存节点,在多进程对比得到精度指标后,再将指标添加到节点中 + if self.ma.prepare_real_data(node): + return + compare_in_dict = {} + compare_out_dict = {} + # input和output对比数据分开 + for item in compare_result_list: + if 'output' in item[0]: + compare_out_dict[item[0]] = item + else: + compare_in_dict[item[0]] = item + precision_status, precision_index, other_dict = self.ma.parse_result(node, [compare_in_dict, compare_out_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + node.data.update(other_dict) + if not precision_status: + self.ma.add_error_key(node.output_data) + node.get_suggestions() + + def _parse_param(self, data_paths, stack_path, output_path): + self.dump_path_param = { + 'npu_json_path': data_paths[0], + 'bench_json_path': data_paths[1], + 'stack_json_path': stack_path, + 'is_print_compare_log': True + } + self.output_path = output_path + compare_mode = get_compare_mode(self.dump_path_param) + self.ma = ModeAdapter(compare_mode) + self.data_n_dict = load_data_json_file(data_paths[0]) + self.data_b_dict = load_data_json_file(data_paths[1]) + self.stack_json_data = load_json_file(stack_path) + + def _postcompare(self): + if not self.ma.is_real_data_compare(): + return + df = get_csv_df(self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) + df = run_real_data(self.dump_path_param, df) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.ma.compare_nodes: + precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + if not precision_status: + self.ma.add_error_key(node.output_data) + node.get_suggestions() + + def _compare_nodes(self, node_n): + #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 + #这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 + node_b, ancestors = Graph.match(self.graph_n, node_n, self.graph_b) + if node_b: + ancestors.append(node_b.id) + node_n.add_link(node_b, ancestors) + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 + compare_result_list = compare_node([node_n.id, node_b.id], [self.data_n_dict, self.data_b_dict], + self.stack_json_data, self.ma.is_summary_compare(), + self.ma.is_md5_compare()) + if compare_result_list: + self.ma.add_csv_data(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + for subnode in node_n.subnodes: + self._compare_nodes(subnode) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..d58f2078b6f8996a31c2f830ef5adf79bc7948c3 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py @@ -0,0 +1,211 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from ....core.common.const import CompareConst, Const +from ..utils import ToolTip, GraphConst, str2float + + +class ModeAdapter: + def __init__(self, compare_mode): + self.compare_mode = compare_mode + self.csv_data = [] + self.compare_nodes = [] + + @staticmethod + def _add_md5_compare_data(node_data, compare_data_dict): + precision_status = True + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [GraphConst.JSON_MD5_KEY] + headers = CompareConst.MD5_COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # md5比对是否通过 + if value.get(GraphConst.JSON_MD5_KEY) != CompareConst.PASS: + precision_status = False + node_data[key] = value + return precision_status + + @staticmethod + def _add_real_compare_data(node_data, compare_data_dict): + min_thousandth = float(1) + numbers = [] + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + headers = CompareConst.COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # 获取一个节点所有的输入或输出最小的双千指标 + thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) + # 可能是None,可能是非数字内容str + try: + thousandth = float(thousandth) + except (ValueError, TypeError): + thousandth = None + if thousandth is not None: + numbers.append(thousandth) + node_data[key] = value + # 双千指标都是None的异常情况 + if not numbers: + min_thousandth = None + else: + min_thousandth = min(numbers + [min_thousandth]) + return min_thousandth + + @staticmethod + def _add_summary_compare_data( node_data, compare_data_dict): + precision_status = True + max_relative_err = 0 + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + # 对应比对结果csv的列 + key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, + CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + headers = CompareConst.SUMMARY_COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 + for index, item in enumerate(key_list[4:]): + value_diff = value.get(key_list[index]) + if isinstance(value_diff, float) and value_diff != 0 and abs(value_diff) < GraphConst.SMALL_VALUE: + value[item] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) + continue + relative_err = str2float(value.get(item)) + max_relative_err = max(max_relative_err, relative_err) + node_data[key] = value + if max_relative_err > GraphConst.MAX_RELATIVE_ERR_TH: + precision_status = False + max_relative_err = 1 if max_relative_err > 1 else max_relative_err + precision_index = 1 - max_relative_err + return precision_status, precision_index + + @staticmethod + def _match_data(data_dict, compare_data, key_list, id_list): + """ + 绑定精度指标到node的input_data和output_data + """ + if len(key_list) != len(id_list): + return + for id, key in zip(id_list, key_list): + data = compare_data[id] + if data is not None and 'nan' not in str(data) and str(data) != ' ': + data_dict[key] = data + else: + data_dict[key] = 'null' + + def parse_result(self, node, compare_data_dict): + """ + 根据结果返回数据,分别是precision_status,precision_index,和附加数据 + """ + other_dict = {} + if self.is_md5_compare(): + precision_status_in = ModeAdapter._add_md5_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out = ModeAdapter._add_md5_compare_data(node.output_data, compare_data_dict[1]) + # 所有输入输出md5对比通过,这个节点才算通过 + precision_status = precision_status_in and precision_status_out + precision_index = 1 if precision_status else 0 + other_result = CompareConst.PASS if precision_status else CompareConst.DIFF + other_dict[GraphConst.JSON_MD5_KEY] = other_result + elif self.is_summary_compare(): + precision_status_in, precision_index_in = ModeAdapter._add_summary_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out, precision_index_out = ModeAdapter._add_summary_compare_data(node.output_data, compare_data_dict[1]) + precision_status = precision_status_in and precision_status_out + precision_index = min(precision_index_in, precision_index_out) + else: + min_thousandth_in = ModeAdapter._add_real_compare_data(node.input_data, compare_data_dict[0]) + min_thousandth_out = ModeAdapter._add_real_compare_data(node.output_data, compare_data_dict[0]) + if min_thousandth_in and min_thousandth_out: + change_percentage = abs(min_thousandth_in - min_thousandth_out) + else: + change_percentage = 0 + precision_status = True + if change_percentage > GraphConst.REAL_DATA_TH: + precision_status = False + precision_index = 0 if change_percentage > 1 else 1 - change_percentage + return precision_status, precision_index, other_dict + + def prepare_real_data(self, node): + """ + 为真实数据比较模式准备节点信息 + """ + if self.is_real_data_compare(): + self.compare_nodes.append(node) + return True + return False + + def is_summary_compare(self): + return self.compare_mode == GraphConst.SUMMARY_COMPARE + + def is_md5_compare(self): + return self.compare_mode == GraphConst.MD5_COMPARE + + def is_real_data_compare(self): + return self.compare_mode == GraphConst.REAL_DATA_COMPARE + + def add_csv_data(self, compare_result_list): + if not self.is_real_data_compare(): + return + self.csv_data.extend(compare_result_list) + + def add_error_key(self, node_data): + """ + 根据不同的模式进行提供不同错误信息 + """ + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + if self.is_summary_compare(): + message = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + elif self.is_real_data_compare(): + message = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + else: + # 输出件优化 + message = [] + value[GraphConst.ERROR_KEY] = message + node_data[key] = value + + def get_tool_tip(self): + """ + 用于前端展示字段的具体含义 + """ + if self.is_summary_compare(): + tips = { + CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, + CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, + CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, + CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} + elif self.is_md5_compare(): + tips = {Const.MD5: ToolTip.MD5} + else: + tips = { + CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.COSINE: ToolTip.COSINE, + CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, + CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} + return tips diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py new file mode 100644 index 0000000000000000000000000000000000000000..f04f367f591244a6d1ed48529d1fb4aae7cb2453 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py @@ -0,0 +1,107 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .node_op import NodeOp +from ..utils import Suggestions, GraphConst +from ..builder.msprobe_adapter import format_node_data, compare_data + + +class BaseNode: + def __init__(self, node_op, node_id, up_node=None): + self.op = node_op + self.id = node_id + self.data = {} + self.output_data = {} + self.input_data = {} + self.upnode = None + self.add_upnode(up_node) + self.subnodes = [] + self.matched_node_link = [] + self.suggestions = {} + + def __str__(self): + info = f'id:\t{self.id}' + return info + + def __eq__(self, other): + """ + 用来判断两个节点是否可以被匹配上,认为结构上是否一致 + """ + if not compare_data(self.input_data, other.input_data): + return False + if not compare_data(self.output_data, other.output_data): + return False + return True + + def get_suggestions(self): + """ + 精度疑似有问题时,提供一些建议 + """ + if self.op == NodeOp.module: + self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.Module + self.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + elif self.op == NodeOp.function_api: + self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.API + self.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + def set_input_output(self, input_data, output_data): + self.input_data = input_data + self.output_data = output_data + + def add_upnode(self, node): + """ + 绑定upnode,用于对两个节点进行上下级关联 + """ + if not node or node.id == self.id or self.upnode: + return + self.upnode = node + node.subnodes.append(self) + + def add_link(self, node, ancestors): + """ + 在节点匹配成功后进行匹配数据的录入 + Args: + node: 和self相互匹配的节点 + ancestors: 对面节点的祖先信息 + """ + self.matched_node_link = ancestors + node.matched_node_link = ancestors + + def to_dict(self): + """ + 输出数据 + """ + result = {} + result['id'] = self.id + result['node_type'] = self.op.value + result['data'] = self.data + result['output_data'] = format_node_data(self.output_data) + result['input_data'] = format_node_data(self.input_data) + result['upnode'] = self.upnode.id if self.upnode else 'None' + result['subnodes'] = [node.id for node in self.subnodes] + result['matched_node_link'] = self.matched_node_link + result['suggestions'] = self.suggestions + return result + + def get_ancestors(self): + """ + 获取节点所有祖先的列表 + """ + ancestors = [] + current_node = self.upnode + while current_node: + ancestors.append(current_node.id) + current_node = current_node.upnode + return list(reversed(ancestors)) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..6bae10ad3fc8a041d3ef2e8fb707d40a22b42f19 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py @@ -0,0 +1,86 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base_node import BaseNode +from .node_op import NodeOp +from ..utils import GraphConst + + +class Graph: + def __init__(self, model_name): + self.node_map = {} + self.add_node(NodeOp.module, model_name) + self.root = self.get_node(model_name) + + def __str__(self): + infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] + info = "\n".join(infos) + return info + + @staticmethod + def match(graph_n, node_n, graph_b): + """ + 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 + 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 + 返回匹配结果,匹配到的节点,以及祖先树。没匹配到则返回None, [] + """ + if not node_n or node_n.id not in graph_b.node_map: + return None, [] + node_b = graph_b.node_map.get(node_n.id) + if node_n != node_b: + return None, [] + ancestors_n = node_n.get_ancestors() + ancestors_b = node_b.get_ancestors() + if ancestors_n != ancestors_b: + return None, [] + return node_b, ancestors_n + + @staticmethod + def dfs(node, result): + info = node.to_dict() + result[node.id] = info + for subnode in node.subnodes: + Graph.dfs(subnode, result) + + def add_node(self, node_op, node_id, up_node=None): + """ + 在graph中进行节点的添加 + Args: + node_op: 需要添加的节点类型 + node_id: 需要添加的节点id + up_node:对应节点的父节点 + """ + if node_id in self.node_map: + return + node = BaseNode(node_op, node_id, up_node) + self.node_map[node_id] = node + + def get_node(self, node_id): + """ + 返回节点,不存在返回None + """ + return self.node_map.get(node_id, None) + + def to_dict(self): + """ + 用于数据输出 + """ + result = {} + result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' + result[GraphConst.JSON_NODE_KEY] = {} + for node_id in self.node_map: + info = self.node_map.get(node_id).to_dict() + result[GraphConst.JSON_NODE_KEY][node_id] = info + return result diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1629caabd1989beac72646ea36efb4a82b328f3a --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py @@ -0,0 +1,37 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +import re +from ..builder.msprobe_adapter import op_patterns + + +class NodeOp(Enum): + module = 0 + function_api = 1 + + @staticmethod + def get_node_op(node_name: str): + """ + 基于代表节点的字符串,解析节点种类 + """ + for op in NodeOp: + index = op.value + if index < 0 or index >= len(op_patterns): + raise Exception("NodeOp and op_patterns in MsprobeAdapter do not match") + pattern = op_patterns[index] + if re.match(pattern, node_name): + return op + raise Exception(f"Cannot parse node_name {node_name} into NodeOp") diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py new file mode 100644 index 0000000000000000000000000000000000000000..165d54ce17ed295308c7fa52b4dc5251271453a8 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py @@ -0,0 +1,85 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import shutil +import filecmp +from .compare.graph_comparator import GraphComparator +from .utils import GraphConst +from .builder.graph_builder import GraphBuilder +from ...pytorch.common.log import logger +from ...core.common.file_check import create_directory + + +def compare_graph(dump_path_n, dump_path_b, out_path): + # 对两个数据进行构图 + construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) + graph_n = GraphBuilder.build(construct_path_n, data_path_n, 'TestNet') + graph_b = GraphBuilder.build(construct_path_b, data_path_b, 'TestNet') + # 基于graph、stack和data进行比较 + stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) + graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) + graph_comparator.compare() + output_path = os.path.join(out_path, 'compare.vis') + GraphBuilder.to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) + + +def build_graph(dump_path, out_path): + construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) + output_path = os.path.join(out_path, 'build.vis') + graph = GraphBuilder.build(construct_path, data_path, 'TestNet') + GraphBuilder.to_json(output_path, graph) + + +def run_st(data_path): + start_time = time.time() + run_bench(data_path, 'output2') + end_time = time.time() + logger.info(f'run_st time cost: {end_time - start_time}') + # 比较output2的结果和output1 的bench结果差距 + for data_dir in os.listdir(data_path): + data_dir = os.path.join(data_path, data_dir) + if not os.path.isdir(data_dir): + continue + output1 = os.path.join(data_dir, 'output1') + output2 = os.path.join(data_dir, 'output2') + files = ['build.vis', 'compare.vis'] + for vis_file in files: + file1 = os.path.join(output1, vis_file) + file2 = os.path.join(output2, vis_file) + result = filecmp.cmp(file1, file2) + if result: + logger.info('pass ' + file1) + else: + logger.info('not pass ' + file1) + + +def run_bench(data_path, output_dir): + for data_dir in os.listdir(data_path): + data_dir = os.path.join(data_path, data_dir) + if not os.path.isdir(data_dir): + continue + run_data_path = os.path.join(data_dir, 'data') + output_path = os.path.join(data_dir, output_dir) + if os.path.exists(output_path): + shutil.rmtree(output_path) + create_directory(output_path) + build_graph(run_data_path, output_path) + compare_graph(run_data_path, run_data_path, output_path) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fb046f9758686fe810a05b1a23d76880b86bb994 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py @@ -0,0 +1,118 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from ...core.common.file_check import FileOpen +from ..compare.acc_compare import result_to_csv + + +def load_json_file(file_path): + """ + 加载json文件 + """ + try: + with FileOpen(file_path, 'r') as f: + file_dict = json.load(f) + if not isinstance(file_dict, dict): + return {} + return file_dict + except json.JSONDecodeError: + return {} + + +def load_data_json_file(file_path): + """ + 加载dump.json中的data字段 + """ + return load_json_file(file_path).get(GraphConst.DATA_KEY, {}) + + +def save_json_file(file_path, data): + """ + 保存json文件 + """ + with FileOpen(file_path, 'w') as f: + f.write(json.dumps(data, indent=4)) + + +def get_csv_df(md5_compare, summary_compare, stack, csv_data): + """ + 调用acc接口写入csv + """ + return result_to_csv(md5_compare, summary_compare, stack, csv_data, None) + + +def str2float(percentage_str): + """ + 百分比字符串转换转换为浮点型 + Args: + percentage_str: '0.00%', '23.4%' + Returns: float 0.00, 0.234 + """ + try: + percentage_str = percentage_str.strip('%') + return float(percentage_str) / 100 + except ValueError: + return 0 + + +class ToolTip: + MAX_DIFF = 'NPU与标杆API统计信息比对,最大值的差值' + MIN_DIFF = 'NPU与标杆API统计信息比对,最小值的差值' + MEAN_DIFF = 'NPU与标杆API统计信息比对,平均值的差值' + NORM_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值' + MD5 = '数据MD5信息,用于比较两个数据信息是否完全一致' + ONE_THOUSANDTH_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差大于千分之一的比例占总元素个数的比例小于千分之一' + COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' + MAX_ABS_ERR = '当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001' + MAX_RELATIVE_ERR = '当最大相对误差越接近0表示其计算的误差越小。当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象' + SMALL_VALUE_TIP = '{} 小于1e-3,不计算相对误差' + + +class Suggestions: + Module = '此模块精度比对结果疑似异常,请使用ptdbg工具对模块中的api进行dump比对' + API = '此api精度比对结果疑似异常,请使用api accuracy checker工具对api进行精度检测' + PTDBG = 'ptdbg工具' + PTDBG_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/ptdbg_ascend' + API_ACCURACY_CHECKER = 'api accuracy checker工具' + API_ACCURACY_CHECKER_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker' + + +class GraphConst: + CONSTRUCT_FILE = 'construct.json' + DUMP_FILE = 'dump.json' + STACK_FILE = 'stack.json' + GRAPH_FILE = 'graph.vis' + ERROR_KEY = 'error_key' + SUMMARY_COMPARE = 0 + MD5_COMPARE = 1 + REAL_DATA_COMPARE = 2 + JSON_NPU_KEY = 'NPU' + JSON_BENCH_KEY = 'Bench' + JSON_TIP_KEY = 'Tooltip' + JSON_MD5_KEY = 'md5 Compare Result' + JSON_ROOT_KEY = 'root' + JSON_NODE_KEY = 'node' + DATA_KEY = 'data' + REAL_DATA_TH = 0.1 + MAX_RELATIVE_ERR_TH = 0.5 + ROUND_TH = 6 + JSON_STATUS_KEY = 'precision_status' + JSON_INDEX_KEY = 'precision_index' + SUGGEST_KEY = 'text' + TAG_NA = 'na' + OUTPUT_INDEX = -2 + STR_MAX_LEN = 50 + SMALL_VALUE = 1e-3 diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..66eceea4b2a1ccf48ac95491c1a2cdca718a403a --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py @@ -0,0 +1,52 @@ +import unittest +from unittest.mock import MagicMock, patch +from msprobe.pytorch.visualization.builder.graph_builder import GraphBuilder, Graph + + +class TestGraphBuilder(unittest.TestCase): + + def setUp(self): + self.construct_path = "step/rank/construct.json" + self.data_path = "step/rank/dump.json" + self.model_name = "TestModel" + self.graph = Graph(self.model_name) + self.construct_dict = { + "Tensor1": "Module1", + "Module1": None + } + self.data_dict = { + "Module1": {"data": "data for Module1"}, + "Tensor1": {"data": "data for Tensor1"} + } + + @patch('msprobe.pytorch.visualization.builder.graph_builder.load_json_file') + @patch('msprobe.pytorch.visualization.builder.graph_builder.load_data_json_file') + def test_build(self, mock_load_data_json_file, mock_load_json_file): + mock_load_data_json_file.return_value = self.data_dict + mock_load_json_file.return_value = self.construct_dict + + graph = GraphBuilder.build(self.construct_path, self.data_path, self.model_name) + self.assertIsNotNone(graph) + self.assertIsInstance(graph, Graph) + self.assertEqual(len(graph.node_map), 3) + + @patch('msprobe.pytorch.visualization.builder.graph_builder.save_json_file') + def test_to_json(self, mock_save_json_file): + GraphBuilder.to_json("step/rank/output.vis", self.graph) + mock_save_json_file.assert_called_once() + + @patch('msprobe.pytorch.visualization.graph.node_op.NodeOp.get_node_op') + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.get_input_output', return_value=([], [])) + def test__init_nodes(self, mock_get_input_output, mock_get_node_op): + GraphBuilder._init_nodes(self.graph, self.construct_dict, self.data_dict) + mock_get_node_op.assert_any_call("Tensor1") + mock_get_node_op.assert_any_call("Module1") + self.assertIs(self.graph.root, self.graph.get_node("TestModel")) + + def test__create_or_get_node(self): + node_op = MagicMock() + data_dict = {"node1": {}} + node = GraphBuilder._create_or_get_node(self.graph, data_dict, node_op, "node1") + self.assertIn("node1", self.graph.node_map) + self.assertEqual(node.input_data, {}) + self.assertEqual(node.output_data, {}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..12ae24279fd2af433d34f0cd3929eb075e209a49 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py @@ -0,0 +1,73 @@ +import unittest +from unittest.mock import patch +from msprobe.pytorch.visualization.builder.msprobe_adapter import ( + get_compare_mode, + run_real_data, + get_input_output, + compare_data, + format_node_data, + compare_node, + _format_decimal_string, + _format_data +) +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestMsprobeAdapter(unittest.TestCase): + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.task_dumppath_get', return_value=(True, False)) + def test_get_compare_mode_summary(self, mock_task_dumppath_get): + mode = get_compare_mode("dummy_param") + self.assertEqual(mode, GraphConst.SUMMARY_COMPARE) + + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter._do_multi_process') + def test_run_real_data(self, mock_do_multi_process): + run_real_data("dump_path", "csv_path") + mock_do_multi_process.assert_called_once_with("dump_path", "csv_path") + + def test_get_input_output(self): + node_data = { + 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.int64', 'shape': [5], + 'Max': 2049.0, 'Min': 0.0, 'Mean': 410.20001220703125, 'Norm': 2049.0009765625, + 'requires_grad': False, 'full_op_name': 'Distributed.broadcast.0.forward_input.0'}, + {'type': 'int', 'value': 0}], + 'input_kwargs': {'group': None}, + 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.int64', 'shape': [5], + 'Max': 2049.0, 'Min': 0.0, 'Mean': 410.20001220703125, 'Norm': 2049.0009765625, + 'requires_grad': False, 'full_op_name': 'Distributed.broadcast.0.forward_output.0'}, + {'type': 'int', 'value': 0}, None] + } + node_id = "Distributed.broadcast.0.forward" + input_data, output_data = get_input_output(node_data, node_id) + self.assertIn("Distributed.broadcast.0.forward_output.0", output_data) + self.assertIn("Distributed.broadcast.0.forward_input.0", input_data) + + def test_compare_data(self): + data_dict_list1 = {'key1': {'type': 'Type1', 'dtype': 'DType1', 'shape': 'Shape1'}} + data_dict_list2 = {'key1': {'type': 'Type1', 'dtype': 'DType1', 'shape': 'Shape1'}} + self.assertTrue(compare_data(data_dict_list1, data_dict_list2)) + + def test_format_node_data(self): + data_dict = {'node1': {'data_name': 'data1', 'full_op_name': 'op1'}} + result = format_node_data(data_dict) + self.assertNotIn('data_name', result['node1']) + self.assertNotIn('requires_grad', result['node1']) + + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.get_accuracy') + def test_compare_node(self, mock_get_accuracy): + node_ids = ["node1", "node2"] + data_dicts = [{'node1': {"input_args": [], "input_kwargs": {}, "output": {}}}, + {'node2': {"input_args": [], "input_kwargs": {}, "output": {}}}] + stack_json_data = {} + result = compare_node(node_ids, data_dicts, stack_json_data, False, False) + mock_get_accuracy.assert_called_once() + self.assertIsInstance(result, list) + + def test__format_decimal_string(self): + s = "0.123456789%" + formatted_s = _format_decimal_string(s) + self.assertIn("0.123457%", formatted_s) + + def test__format_data(self): + data_dict = {'value': 0.123456789} + _format_data(data_dict) + self.assertEqual(data_dict['value'], '0.123457') \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py new file mode 100644 index 0000000000000000000000000000000000000000..bece5380f04836a232a8c154a606c1cb68759b1c --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py @@ -0,0 +1,32 @@ +import unittest +from unittest.mock import patch +from msprobe.pytorch.visualization.compare.graph_comparator import GraphComparator +from msprobe.pytorch.visualization.graph.graph import Graph +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestGraphComparator(unittest.TestCase): + + def setUp(self): + self.graphs = [Graph("model1"), Graph("model2")] + self.data_paths = ["step1/rank/dump.json", "step2/rank/dump.json"] + self.stack_path = "step1/rank/stack.json" + self.output_path = "output/output.vis" + + @patch('msprobe.pytorch.visualization.compare.graph_comparator.get_compare_mode') + @patch('msprobe.pytorch.visualization.compare.graph_comparator.load_json_file') + @patch('msprobe.pytorch.visualization.compare.graph_comparator.load_data_json_file') + def test__parse_param(self, mock_load_data_json_file, mock_load_json_file, mock_get_compare_mode): + mock_load_data_json_file.return_value = "data_dict" + mock_load_json_file.return_value = "construct_dict" + mock_get_compare_mode.return_value = GraphConst.SUMMARY_COMPARE + self.comparator = GraphComparator(self.graphs, self.data_paths, self.stack_path, self.output_path) + self.comparator._parse_param(self.data_paths, self.stack_path, self.output_path) + + self.assertEqual(self.comparator.dump_path_param, { + 'npu_json_path': self.data_paths[0], + 'bench_json_path': self.data_paths[1], + 'stack_json_path': self.stack_path, + 'is_print_compare_log': True + }) + self.assertEqual(self.comparator.output_path, self.output_path) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..7883a09a34115132ac2b8b217de434e32e58c279 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py @@ -0,0 +1,61 @@ +import unittest +from unittest.mock import patch, MagicMock +from msprobe.pytorch.visualization.compare.mode_adapter import ModeAdapter +from msprobe.pytorch.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.pytorch.visualization.utils import GraphConst, ToolTip +from msprobe.core.common.const import CompareConst + + +class TestModeAdapter(unittest.TestCase): + + def setUp(self): + self.node_op = NodeOp.module + self.node_id = "node_1" + self.node = BaseNode(self.node_op, self.node_id) + self.compare_mode = GraphConst.REAL_DATA_COMPARE + self.adapter = ModeAdapter(self.compare_mode) + self.compare_data_dict = [{}, {}] + + def test_add_md5_compare_data(self): + node_data = {'md5_key': 'some_md5_value'} + compare_data_dict = {'md5_key': 'expected_md5_value'} + precision_status = ModeAdapter._add_md5_compare_data(node_data, compare_data_dict) + self.assertTrue(precision_status) + + @patch('msprobe.pytorch.visualization.compare.mode_adapter.ModeAdapter') + def test_parse_result(self, mock_mode_adapter): + mock_mode_adapter._add_summary_compare_data.return_value = (True, 0.5) + self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE + precision_status, precision_index, other_dict = self.adapter.parse_result( + self.node, self.compare_data_dict) + self.assertEqual(precision_status, True) + self.assertEqual(precision_index, 0.5) + self.assertEqual(other_dict, {}) + + def test_prepare_real_data(self): + self.adapter.is_real_data_compare = MagicMock(return_value=True) + result = self.adapter.prepare_real_data(self.node) + self.assertTrue(result) + + def test_compare_mode_methods(self): + self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE + self.assertTrue(self.adapter.is_summary_compare()) + self.assertFalse(self.adapter.is_md5_compare()) + self.assertFalse(self.adapter.is_real_data_compare()) + + def test_add_csv_data(self): + compare_result_list = ['result1', 'result2'] + self.adapter.add_csv_data(compare_result_list) + self.assertEqual(self.adapter.csv_data, compare_result_list) + + def test_add_error_key(self): + node_data = {'key': {}} + self.adapter.compare_mode = GraphConst.REAL_DATA_COMPARE + self.adapter.add_error_key(node_data) + self.assertEqual(node_data['key'][GraphConst.ERROR_KEY], + [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO]) + + def test_get_tool_tip(self): + self.adapter.compare_mode = GraphConst.MD5_COMPARE + tips = self.adapter.get_tool_tip() + self.assertEqual(tips, {'md5': ToolTip.MD5}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py new file mode 100644 index 0000000000000000000000000000000000000000..544950f35881e19eb449a138a4b0937ca91eb1d7 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py @@ -0,0 +1,64 @@ +import unittest +from msprobe.pytorch.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestBaseNode(unittest.TestCase): + + def setUp(self): + self.node_op = NodeOp.module + self.node_id = "node_1" + self.up_node = BaseNode(self.node_op, "up_node_1") + self.node = BaseNode(self.node_op, self.node_id, self.up_node) + + def test_init_and_str(self): + self.assertEqual(self.node.op, self.node_op) + self.assertEqual(self.node.id, self.node_id) + self.assertEqual(str(self.node), 'id:\tnode_1') + + def test_eq(self): + other_node = BaseNode(self.node_op, self.node_id, self.up_node) + self.assertEqual(self.node, other_node) + + def test_get_suggestions(self): + self.node.get_suggestions() + self.assertIn(GraphConst.SUGGEST_KEY, self.node.suggestions) + + def test_set_input_output(self): + input_data = {'input1': 'value1'} + output_data = {'output1': 'value2'} + self.node.set_input_output(input_data, output_data) + self.assertEqual(self.node.input_data, input_data) + self.assertEqual(self.node.output_data, output_data) + + def test_add_upnode(self): + self.node = BaseNode(self.node_op, self.node_id) + new_up_node = BaseNode(self.node_op, "new_up_node_1") + self.node.add_upnode(new_up_node) + self.assertEqual(self.node.upnode, new_up_node) + self.assertIn(self.node, new_up_node.subnodes) + + def test_add_link(self): + other_node = BaseNode(self.node_op, "other_node_1") + ancestors = ['a1', 'a2'] + self.node.add_link(other_node, ancestors) + self.assertEqual(self.node.matched_node_link, ancestors) + self.assertEqual(other_node.matched_node_link, ancestors) + + def test_to_dict(self): + expected_result = { + 'id': self.node_id, + 'node_type': self.node_op.value, + 'data': {}, + 'output_data': {}, + 'input_data': {}, + 'upnode': self.up_node.id, + 'subnodes': [], + 'matched_node_link': [], + 'suggestions': {} + } + self.assertEqual(self.node.to_dict(), expected_result) + + def test_get_ancestors(self): + expected_ancestors = ['up_node_1'] + self.assertEqual(self.node.get_ancestors(), expected_ancestors) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..19d098743458a61d13146b6da1b65098f90171b7 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py @@ -0,0 +1,50 @@ +import unittest +from msprobe.pytorch.visualization.graph.graph import Graph, NodeOp +from msprobe.pytorch.visualization.graph.base_node import BaseNode +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestGraph(unittest.TestCase): + + def setUp(self): + self.graph = Graph("model_name") + self.node_id = "node_id" + self.node_op = NodeOp.module + + def test_add_node_and_get_node(self): + self.graph.add_node(self.node_op, self.node_id) + node = self.graph.get_node(self.node_id) + self.assertIsNotNone(node) + self.assertIn(self.node_id, self.graph.node_map) + + def test_to_dict(self): + self.graph.add_node(self.node_op, self.node_id) + result = self.graph.to_dict() + self.assertEqual(result[GraphConst.JSON_ROOT_KEY], "model_name") + self.assertIn(self.node_id, result[GraphConst.JSON_NODE_KEY]) + + def test_str(self): + self.graph.add_node(self.node_op, self.node_id) + expected_str = f'{self.node_id}' + self.assertIn(expected_str, str(self.graph)) + + def test_match(self): + graph_a = Graph("model_name_a") + graph_b = Graph("model_name_b") + node_a = BaseNode(self.node_op, self.node_id) + graph_a.add_node(NodeOp.module, "node_id_a") + graph_b.add_node(NodeOp.module, "node_id_b") + matched_node, ancestors = Graph.match(graph_a, node_a, graph_b) + self.assertIsNone(matched_node) + self.assertEqual(ancestors, []) + + def test_dfs(self): + graph = Graph("model_name") + graph.add_node(NodeOp.module, "node_a") + graph.add_node(NodeOp.module, "node_b") + node_a = BaseNode(self.node_op, self.node_id) + result = {} + graph.dfs(node_a, result) + self.assertEqual(result, {'node_id': {'id': 'node_id', 'node_type': 0, 'data': {}, + 'output_data': {}, 'input_data': {}, 'upnode': 'None', 'subnodes': [], + 'matched_node_link': [], 'suggestions': {}}}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1a340ac8b3c7144a9e07485c93e289a950eee8c7 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py @@ -0,0 +1,28 @@ +import unittest +from msprobe.pytorch.visualization.graph.node_op import NodeOp + + +class TestNodeOp(unittest.TestCase): + + def test_get_node_op_valid(self): + node_name = "ModuleTest" + self.assertEqual(NodeOp.get_node_op(node_name), NodeOp.module) + + def test_get_node_op_invalid(self): + node_name = "InvalidNodeName" + with self.assertRaises(Exception): + NodeOp.get_node_op(node_name) + + def test_get_node_op_all(self): + test_cases = [ + ("ModuleTest", NodeOp.module), + ("TensorTest", NodeOp.function_api), + ("TorchTest", NodeOp.function_api), + ("FunctionalTest", NodeOp.function_api), + ("NPUTest", NodeOp.function_api), + ("VFTest", NodeOp.function_api), + ("DistributedTest", NodeOp.function_api), + ("AtenTest", NodeOp.function_api) + ] + for node_name, expected_op in test_cases: + self.assertEqual(NodeOp.get_node_op(node_name), expected_op) diff --git a/profiler/README.md b/profiler/README.md index df613679df2c0b0994baeb745e562e211b784480..2c890811522359194504d6f5d3e7e2000939478b 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -29,7 +29,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( ) with torch_npu.profiler.profile( activities=[ - torch_npu.profiler.ProfilerActivity.CPU, + torch_npu.profiler.ProfilerActivity.CPU, torch_npu.profiler.ProfilerActivity.NPU ], record_shapes=True, @@ -56,7 +56,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( ) prof = torch_npu.profiler.profile( activities=[ - torch_npu.profiler.ProfilerActivity.CPU, + torch_npu.profiler.ProfilerActivity.CPU, torch_npu.profiler.ProfilerActivity.NPU ], record_shapes=True, @@ -128,7 +128,7 @@ Successfully installed msprof-analyze-{version} | 1.1.1 | 2024-06-20 | [msprof_analyze-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.1/msprof_analyze-1.1.1-py3-none-any.whl) | 76aad967a3823151421153d368d4d2f8e5cfbcb356033575e0b8ec5acea8e5e4 | | 1.1.0 | 2024-05-28 | [msprof_analyze-1.1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.0/msprof_analyze-1.1.0-py3-none-any.whl) | b339f70e7d1e45e81f289332ca64990a744d0e7ce6fdd84a8d82e814fa400698 | | 1.0 | 2024-05-10 | [msprof_analyze-1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.0/msprof_analyze-1.0-py3-none-any.whl) | 95b2f41c8c8e8afe4887b738c8cababcb4f412e1874483b6adae4a025fcbb7d4 | - + 2. whl包校验。 1. 根据以上下载链接下载whl包到Linux安装环境。 diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md index fb93b76dfbb13ab5dbbdeac4496d5964f27a9ca1..39a357ea48db7d39a31522294543b4665fab7695 100644 --- a/profiler/advisor/README.md +++ b/profiler/advisor/README.md @@ -41,26 +41,26 @@ Ascend PyTorch Profiler、msprof采集方法请参见《[性能调优工具](htt 3. 查看结果。 分析结果输出相关简略建议到执行终端中,并生成`mstt_advisor_{timestamp}.html`和`mstt_advisor_{timestamp}.xlsx`文件供用户预览。 - + `mstt_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 - + `mstt_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 - + 执行终端输出示例如下: - + 总体性能瓶颈 - + ![all](./img/all.png) - + 计算瓶颈 - + ![computation](./img/computation.png) - + 调度瓶颈 - + ![schedule](./img/schedule.png) - - + + ### 命令详解 @@ -194,7 +194,7 @@ comparison模块内容如下图示例,识别标杆和待比对性能数据的K Diff Total Ratio、Diff Self Ratio、Diff Avg Ratio和Diff Calls Ratio大于1则表示当前环境性能更优,小于1则表示当前环境有待优化,等于1则表示当前环境与标杆环境性能接近。 ![comparison3](./img/comparison3.png) - + 其中inf表示分母为0(未获取到待对比数据或待对比数据为0),None表示未获取到数据。 `mstt_advisor_{timestamp}.html`文件的comparison模块内容仅展示Kernel和API的Top 10条数据,详细数据需要查看`mstt_advisor_{timestamp}.xlsx`文件。 @@ -306,7 +306,7 @@ comparison模块内容如下图示例,识别标杆和待比对性能数据的K Diff Total Ratio、Diff Self Ratio、Diff Avg Ratio和Diff Calls Ratio大于1则表示当前环境性能更优,小于1则表示当前环境有待优化,等于1则表示当前环境与标杆环境性能接近。 ![comparison1](./img/comparison1.png) - + 其中inf表示分母为0(未获取到待对比数据或待对比数据为0),None表示未获取到数据。 `mstt_advisor_{timestamp}.html`文件的comparison模块内容仅展示Kernel和API的Top 10条数据,详细数据需要查看`mstt_advisor_{timestamp}.xlsx`文件。 diff --git a/profiler/advisor/analyzer/communication/base_communication_analyzer.py b/profiler/advisor/analyzer/communication/base_communication_analyzer.py index be97e07fc08ebc5096e6a7ae984f77570b24d399..4dc515e7a6fa9990046f1061aeae39e43e730b8a 100644 --- a/profiler/advisor/analyzer/communication/base_communication_analyzer.py +++ b/profiler/advisor/analyzer/communication/base_communication_analyzer.py @@ -1,22 +1,22 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer - - -class BaseCommunicationAnalyzer(BaseAnalyzer): - requires_cluster_dataset = True - - def __init__(self, collection_path, n_processes: int = 1, **kwargs): - super().__init__(collection_path, n_processes, **kwargs) +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer + + +class BaseCommunicationAnalyzer(BaseAnalyzer): + requires_cluster_dataset = True + + def __init__(self, collection_path, n_processes: int = 1, **kwargs): + super().__init__(collection_path, n_processes, **kwargs) diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py index dd41df260eff3d03459e92d570923256816cb9f4..78139dcfbc4bdaaa541c0566e2c49c5ced11c390 100644 --- a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py @@ -54,4 +54,4 @@ class AICoreFreqAnalyzer(BaseAnalyzer): return self.result def get_priority(self): - return PriorityBackgroundColor.high \ No newline at end of file + return PriorityBackgroundColor.high diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py index 3d4679e258903cb71f1c26d5871376ae6df4d116..a0f544d2fd71f087826dc7bdf057bc31a22e1b74 100644 --- a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py @@ -43,4 +43,4 @@ class SyncBNAnalyzer(BaseAnalyzer): return self.result def get_priority(self): - return PriorityBackgroundColor.high \ No newline at end of file + return PriorityBackgroundColor.high diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 42847b88d2c449e8342c8aefa9329927d401b179..d85d0f3825873f9012060bb69b49c65656cd5a3b 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -5,7 +5,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -66,7 +66,7 @@ class ProfilingDataset(Dataset): if is_success: setattr(self, item, data_object) else: - logger.info("Skip parse %s with file pattern %s from local path %s", + logger.info("Skip parse %s with file pattern %s from local path %s", self.current_version_pattern.get('class_attr').get(item), file_pattern_list, current_path ) @@ -96,7 +96,7 @@ class ProfilingDataset(Dataset): def collection_path(self): """collection_path""" return self.collection_path - + def _parse(self): info = DeviceInfoParser(self.collection_path) if info.parse_data(): diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index fbac2c5bf0198bf2cec6aef4513ccfaf8aed0396..67c6deb3793c92fb9103e273f4936fef946161ae 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -20,6 +20,7 @@ from collections import OrderedDict import ijson from tqdm import tqdm +import yaml from profiler.prof_common.constant import Constant from profiler.advisor.common.timeline.event import TimelineEvent diff --git a/profiler/advisor/display/html/templates/slow_dataloader.html b/profiler/advisor/display/html/templates/slow_dataloader.html index b9ce7a574ab2a838633cb7c5181cfecb737097c9..2a3b2c4462aa6666b3ed42cc77995698ed8ce1c3 100644 --- a/profiler/advisor/display/html/templates/slow_dataloader.html +++ b/profiler/advisor/display/html/templates/slow_dataloader.html @@ -1,21 +1,21 @@ -
-

Slow Dataloader Issues

-
- {% if rank is not none %} - Analysis of rank {{ rank|safe }}. - {% endif %} - {{ desc }} - - - - - - {% for suggestion in suggestions %} - - - - {% endfor %} -
Suggestions
{{ loop.index }}. {{ suggestion|safe }}
- -
-
+
+

Slow Dataloader Issues

+
+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} + {{ desc }} + + + + + + {% for suggestion in suggestions %} + + + + {% endfor %} +
Suggestions
{{ loop.index }}. {{ suggestion|safe }}
+ +
+
diff --git a/profiler/advisor/display/html/templates/sync_batchnorm.html b/profiler/advisor/display/html/templates/sync_batchnorm.html index 402404c8a43706ec4a598300eec42c7d2b7767cc..ea322276645ae9ca374f699ed7dcbaec1caad1d8 100644 --- a/profiler/advisor/display/html/templates/sync_batchnorm.html +++ b/profiler/advisor/display/html/templates/sync_batchnorm.html @@ -1,33 +1,33 @@ - -
-

SyncBatchNorm Issues

-
- {% if rank is not none %} - Analysis of rank {{ rank|safe }}. - {% endif %} - {{ desc }} - - - - - {% for item in solutions %} - {% set rowloop = loop %} - {% for key, value in item.items() %} - - - - {% endfor %} - {% endfor %} -
Suggestions
{{ rowloop.index }}. {{ value.desc }}
- - More efficient code of syncbn forward as follows: - {% for item in solutions %} - {% for key, value in item.items() %} - {% if 'efficient_code' in value %} -
{{ value.efficient_code|safe }}
- {% endif %} - {% endfor %} - {% endfor %} - -
-
+ +
+

SyncBatchNorm Issues

+
+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} + {{ desc }} + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ + More efficient code of syncbn forward as follows: + {% for item in solutions %} + {% for key, value in item.items() %} + {% if 'efficient_code' in value %} +
{{ value.efficient_code|safe }}
+ {% endif %} + {% endfor %} + {% endfor %} + +
+
diff --git a/profiler/advisor/display/html/templates/synchronize_stream.html b/profiler/advisor/display/html/templates/synchronize_stream.html index eb132a6315d223ed36b096c7f6087cb53ca071d4..8636740275a66a5a7ba46703b978385cbc2df3a3 100644 --- a/profiler/advisor/display/html/templates/synchronize_stream.html +++ b/profiler/advisor/display/html/templates/synchronize_stream.html @@ -1,26 +1,26 @@ -
-

Synchronize Stream Issues

-
- {% if rank is not none %} - Analysis of rank {{ rank|safe }}. - {% endif %} - {{ desc }} - - - - - - - {% for item in solutions %} - {% set rowloop = loop %} - {% for key, value in item.items() %} - - - - - {% endfor %} - {% endfor %} -
Suggestions
{{ rowloop.index }}. {{ value.desc }}
- -
-
+
+

Synchronize Stream Issues

+
+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} + {{ desc }} + + + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ +
+
diff --git a/profiler/advisor/img/overall.png b/profiler/advisor/img/overall.png index c95bcd2e49575217e6fa92cd8f88e8bf2c275f32..1883d4c97388b1cfb774d05fc9e0d368d0c66901 100644 Binary files a/profiler/advisor/img/overall.png and b/profiler/advisor/img/overall.png differ diff --git a/profiler/advisor/img/overall_0.png b/profiler/advisor/img/overall_0.png index f033ecbe686cbfdd1bb2bad73d917fce0c378a63..f74cf2dcf131f36df9901e20ea327d509c6fee67 100644 Binary files a/profiler/advisor/img/overall_0.png and b/profiler/advisor/img/overall_0.png differ diff --git a/profiler/advisor/rules/dataloader.yaml b/profiler/advisor/rules/dataloader.yaml index 4b56be9ae3802b58b41a8d11ffb695dc6a0e4c91..8cc23cb00d03411c72e13d08f3ac3e72613cdd38 100644 --- a/profiler/advisor/rules/dataloader.yaml +++ b/profiler/advisor/rules/dataloader.yaml @@ -1,8 +1,8 @@ -dataloader_duration_threshold: 10000 # us -problem: "Found slow dataloader, cost {dataloader_duration} us for one step while profiling, normally less than {dataloader_duration_threshold} us." -solutions: - - "Please check the disk I/O of your data directory. If you are training model in ModelArts, please move data to '/cache' or mount a more efficient cloud disk for better I/O." - - "Please check if there are any other multiprocess operations in runtime that may have affected the dataloader, such as training process core binding command 'taskset ...' used for launching the training job." - - "Please check the format of your data, avoid file format like tar, tar.gz, zip." - - "Please set 'pin_memory=True' for your dataloader." +dataloader_duration_threshold: 10000 # us +problem: "Found slow dataloader, cost {dataloader_duration} us for one step while profiling, normally less than {dataloader_duration_threshold} us." +solutions: + - "Please check the disk I/O of your data directory. If you are training model in ModelArts, please move data to '/cache' or mount a more efficient cloud disk for better I/O." + - "Please check if there are any other multiprocess operations in runtime that may have affected the dataloader, such as training process core binding command 'taskset ...' used for launching the training job." + - "Please check the format of your data, avoid file format like tar, tar.gz, zip." + - "Please set 'pin_memory=True' for your dataloader." - "Try to adjust dataloader parameter 'num_workers'." \ No newline at end of file diff --git a/profiler/advisor/rules/sync_batchnorm.yaml b/profiler/advisor/rules/sync_batchnorm.yaml index 0f702af6eae4445244778fc5429912380c2d199a..d65bcb0d4a16c005e7d85979dc60ec1d05e19766 100644 --- a/profiler/advisor/rules/sync_batchnorm.yaml +++ b/profiler/advisor/rules/sync_batchnorm.yaml @@ -1,41 +1,41 @@ -problem: "Found {syncbn_num} SyncBatchNorm, which can lead to slow python task dispatch and frequent communication between devices and finally reducing training efficiency." -max_syncbn_num: 20 -solutions: - - enable batchnorm: - desc: "disable SyncBatchNorm by remove the code like 'torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)' if possible." - - enable efficient SyncBatchNorm: - desc: "replace the 'forward' method of python script 'torch_npu/utils/syncbatchnorm.py' in your runtime environment." - efficient_code: | - @staticmethod - def forward(self, input_tensor, weight, bias, running_mean, running_var, eps, momentum, process_group, world_size): - input_tensor = input_tensor.contiguous() - input_shape = input_tensor.shape - input_tensor_ = input_tensor.reshape(input_shape[0], input_shape[1], 1, -1) - sum_val, sum_square_val = torch.batch_norm_reduce(input_tensor_, eps) - - count = torch.full((1,), - input_tensor.numel() // input_tensor.size(1), - dtype=sum_val.dtype, - device=sum_val.device) - - num_channels = input_tensor.shape[1] - combined = torch.cat([sum_val, sum_square_val, count], dim=0) - combined_list = torch.empty((world_size,) + combined.shape, dtype=combined.dtype, device=combined.device) - dist.all_gather_togather(combined_list, combined, process_group, async_op=False) - sum_all, square_sum_all, count_all = torch.split(combined_list, num_channels, dim=1) - size = count_all.view(-1).sum() - if size == 1: - raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) - - mean, invstd = torch.batch_norm_gather_stats_update(input_tensor, - sum_all, - square_sum_all, - running_mean, - running_var, - momentum, - eps, - count_all.view(-1)) - self.save_for_backward(input_tensor, weight, mean, invstd, count_all.to(torch.int32)) - self.process_group = process_group - out = torch.batch_norm_elemt(input_tensor, weight, bias, mean, invstd, eps) +problem: "Found {syncbn_num} SyncBatchNorm, which can lead to slow python task dispatch and frequent communication between devices and finally reducing training efficiency." +max_syncbn_num: 20 +solutions: + - enable batchnorm: + desc: "disable SyncBatchNorm by remove the code like 'torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)' if possible." + - enable efficient SyncBatchNorm: + desc: "replace the 'forward' method of python script 'torch_npu/utils/syncbatchnorm.py' in your runtime environment." + efficient_code: | + @staticmethod + def forward(self, input_tensor, weight, bias, running_mean, running_var, eps, momentum, process_group, world_size): + input_tensor = input_tensor.contiguous() + input_shape = input_tensor.shape + input_tensor_ = input_tensor.reshape(input_shape[0], input_shape[1], 1, -1) + sum_val, sum_square_val = torch.batch_norm_reduce(input_tensor_, eps) + + count = torch.full((1,), + input_tensor.numel() // input_tensor.size(1), + dtype=sum_val.dtype, + device=sum_val.device) + + num_channels = input_tensor.shape[1] + combined = torch.cat([sum_val, sum_square_val, count], dim=0) + combined_list = torch.empty((world_size,) + combined.shape, dtype=combined.dtype, device=combined.device) + dist.all_gather_togather(combined_list, combined, process_group, async_op=False) + sum_all, square_sum_all, count_all = torch.split(combined_list, num_channels, dim=1) + size = count_all.view(-1).sum() + if size == 1: + raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) + + mean, invstd = torch.batch_norm_gather_stats_update(input_tensor, + sum_all, + square_sum_all, + running_mean, + running_var, + momentum, + eps, + count_all.view(-1)) + self.save_for_backward(input_tensor, weight, mean, invstd, count_all.to(torch.int32)) + self.process_group = process_group + out = torch.batch_norm_elemt(input_tensor, weight, bias, mean, invstd, eps) return out \ No newline at end of file diff --git a/profiler/advisor/rules/synchronize.yaml b/profiler/advisor/rules/synchronize.yaml index efaa8a8280aebddb4fa6c4253fc30a5bd1831c23..f73afd0c79e9635bb9bf8fc612c7489bbca324da 100644 --- a/profiler/advisor/rules/synchronize.yaml +++ b/profiler/advisor/rules/synchronize.yaml @@ -1,5 +1,5 @@ -problem: "SynchronizeStream will reduce training efficiency. Found {synchronize_num} SynchronizeStream and {node_launch_num} NodeLaunch, the co-occurrence ratio of SynchronizeStream and NodeLaunch is {co_occur_ratio}" -min_co_occurrence_ratio: 0.5 -solutions: - - disable ascend launch blocking: - desc: "please check your env 'ASCEND_LAUNCH_BLOCKING', if ASCEND_LAUNCH_BLOCKING=1, please execute 'unset ASCEND_LAUNCH_BLOCKING' and then start your training job." \ No newline at end of file +problem: "SynchronizeStream will reduce training efficiency. Found {synchronize_num} SynchronizeStream and {node_launch_num} NodeLaunch, the co-occurrence ratio of SynchronizeStream and NodeLaunch is {co_occur_ratio}" +min_co_occurrence_ratio: 0.5 +solutions: + - disable ascend launch blocking: + desc: "please check your env 'ASCEND_LAUNCH_BLOCKING', if ASCEND_LAUNCH_BLOCKING=1, please execute 'unset ASCEND_LAUNCH_BLOCKING' and then start your training job." diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 7a78c6fa063bb4278e997874a6195881b62667d6..c885182f1b688fafd40126aa45cdebcf9f364720 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -74,9 +74,9 @@ def singleton(cls): :param cls: any class :return: singleton handle - When using the singleton function, you need to manually specify collection_path='dataSet_path'. Otherwise, the + When using the singleton function, you need to manually specify collection_path='dataSet_path'. Otherwise, the singleton function is initialized by class name. - if cls has 'collection_path' property, _instance map will build by class_name and 'collection_path', the + if cls has 'collection_path' property, _instance map will build by class_name and 'collection_path', the default value of collection path is class absolute path. _instance = {cls.name: {collection_path: instance}} @@ -120,7 +120,7 @@ def singleton(cls): # 过滤出函数对象 function_objs = [ - member[1] for member in members + member[1] for member in members if inspect.isfunction(member[1]) or inspect.ismethod(member[1]) ] for function_obj in function_objs: diff --git a/profiler/cluster_analyse/README.md b/profiler/cluster_analyse/README.md index 1a9af98d51eba8732d75a54feed13036a97d3f20..56ad7e88e0a12d7abb2dd211aa040e83f3cd7216 100644 --- a/profiler/cluster_analyse/README.md +++ b/profiler/cluster_analyse/README.md @@ -21,7 +21,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( - ./profiler_info_x.json, - ./ASCEND_PROFILER_OUTPUT/step_trace_time.csv, - ./ASCEND_PROFILER_OUTPUT/trace_view.json, -- ./ASCEND_PROFILER_OUTPUT/kernel_details.csv, +- ./ASCEND_PROFILER_OUTPUT/kernel_details.csv, - ./ASCEND_PROFILER_OUTPUT/communication.json, - ./ASCEND_PROFILER_OUTPUT/communication_matrix.json @@ -47,13 +47,13 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( ``` 或 - + ```bash python3 cluster_analysis.py -d {cluster profiling data path} -m {mode} ``` 参数说明: - + | 参数名 | 说明 | 是否必选 | | --------------------- | ------------------------------------------------------------ | -------- | | --profiling_path或-d | 性能数据汇集目录。未配置-o参数时,运行分析脚本之后会在该目录下自动创建cluster_analysis_output文件夹,保存分析数据。 | 是 | @@ -61,16 +61,16 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( | --mode或-m | 数据解析模式,取值详见“**--mode参数说明**”表。 | 否 | | --data_simplification | 数据精简模式。对于数据量过大的性能数据db文件,可以通过配置该参数将数据精简,并提高工具分析效率。 | 否 | | --force | 强制跳过用户属主(文件是否属于当前用户)及文件大小(csv文件小于5G,json文件小于10G,db文件小于8G)校验。 | 否 | - + --mode参数说明: - + | 参数名 | 说明 | 是否必选 | | -------------------- | ------------------------------------------------------------ | -------- | | communication_matrix | 解析通信矩阵数据。 | 否 | | communication_time | 解析通信耗时数据。 | 否 | | all | 同时解析通信矩阵communication_matrix和通信耗时数据communication_time,--mode参数默认值为all。 | 否 | - - + + ### 交付件 diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index 62ec9b255cb4534c5dbce2005c1efda0bc55105e..35e2865e88b1190592d36b533cf018b377e0e7eb 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -199,6 +199,8 @@ MindSpore场景仅支持**总体性能**、**通信性能**和**kernel性能** 比对结果分为打屏和performance_comparison_result_{timestamp}.csv两种形式输出,其中打屏输出为概要信息,csv文件保存详细结果。 +比对结果分为打屏和performance_comparison_result_{timestamp}.csv两种形式输出,其中打屏输出为概要信息,csv文件保存详细结果。 + ### 总体性能 #### 打屏结果 diff --git a/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py index 528713d60f17329a9bd50ea8a22e8849e193afca..5ed246b70aa3811d7ef3bdbd8dfd8be5e482a73c 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py @@ -42,23 +42,23 @@ class KernelCompareInfo: @property def input_shapes(self): return self._input_shapes - + @property def total_dur(self): return self._total_dur if self._total_dur else 0.0 - + @property def number(self): return self._number - + @property def max_dur(self): return self._max_dur - + @property def min_dur(self): return self._min_dur - + @property def avg_dur(self): return round(self._total_dur / self._number, 2) if self._total_dur and self._number else 0.0 diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 59a307e46810bc1a27c2183e18e2d9002cae6d8d..d8334b803bc20a6a0271c5f92bd88f1e645cb16d 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -52,6 +52,9 @@ class ProfilingResult: def update_kernel_details(self, kernels: dict): self.kernel_details = kernels + def update_kernel_details(self, kernels: dict): + self.kernel_details = kernels + def update_bwd_tid(self, bwd_tid): self.bwd_tid = bwd_tid diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py index aca2974d2b18be0a4f98859920ffdd9c7b70ec7f..4a3ab2f6318f567bf53ec314610f40999a39ccb7 100644 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ b/profiler/compare_tools/compare_backend/utils/args_manager.py @@ -88,7 +88,6 @@ class ArgsManager: @property def enable_api_compare(self): return self._args.enable_api_compare - @property def enable_kernel_compare(self): return self._args.enable_kernel_compare diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py index 75d2c79a3042f427f79e8df0f1e21c122f1d1de3..c7a86df20fbfdff1ea139089fa00a7b7a8b50c48 100644 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py @@ -88,7 +88,6 @@ class TorchOpNode: @property def api_dur(self): return self._event.dur - @property def api_self_time(self): return self.api_dur - sum(child.api_dur for child in self._child_nodes) diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 78cf1070ffd8ab1233065c6a7e25a3df9e46f33d..29d202052c09cb68ad1219e223577ec24c408dfc 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -14,7 +14,7 @@ # limitations under the License. - + import os import stat @@ -29,6 +29,8 @@ class Constant(object): CPU_OP_EVENT = "op_event" TORCH_TO_NPU_FLOW = "torch_to_device" KERNEL_EVENT = "kernel_event" + HCCL_EVENT = "hccl_event" + OVERLAP_ANALYSIS_EVENT = "overlap_event" FWD_BWD_FLOW = "fwd_to_bwd" NPU_ROOT_ID = "NPU" @@ -134,14 +136,14 @@ class Constant(object): ALL = "all" COMMUNICATION_TIME = "communication_time" COMMUNICATION_MATRIX = "communication_matrix" - + STEP = "step" DATA_SIMPLIFICATION = "data_simplification" FORCE = "force" - + # compare tools - + GPU = "GPU" NPU = "NPU" NA = 'N/A' @@ -383,4 +385,4 @@ class Constant(object): MINDSPORE_VERSION = "mindspore_version" PYTORCH = "pytorch" - MINDSPORE = "mindspore" \ No newline at end of file + MINDSPORE = "mindspore" diff --git a/profiler/prof_common/kernel_bean.py b/profiler/prof_common/kernel_bean.py new file mode 100644 index 0000000000000000000000000000000000000000..4d60a69080fc909a79e03374e1730608cbfa9445 --- /dev/null +++ b/profiler/prof_common/kernel_bean.py @@ -0,0 +1,47 @@ +# Copyright (c) 2024 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from profiler.prof_common.utils import convert_to_decimal + + +class KernelBean: + def __init__(self, data: dict): + self._name = data.get("Name", "") + self._op_type = data.get("Type", "") + self._core_type = data.get("Accelerator Core", "") + self._input_shape = data.get("Input Shapes", "").replace("\"", "") + self._input_type = data.get("Input Data Types", "") + self._input_format = data.get("Input Formats", "") + self._duration = data.get("Duration(us)", 0) + self._ts = data.get("Start Time(us)", "") + + @property + def start_time(self): + return convert_to_decimal(self._ts) + + @property + def end_time(self): + return self.start_time + convert_to_decimal(self.dur) + + @property + def is_computing_op(self): + return self._core_type != "HCCL" + + @property + def dur(self): + return float(self._duration) + + @property + def kernel_info(self): + return [self._name, self._op_type, self._core_type, self._input_shape, self._input_type, self.dur] diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..3d8e22b7c66df5c865887e39d4e32e117ea129ee --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py @@ -0,0 +1,65 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.dataloader.dataloader_checker import DataloaderChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestDataloaderChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "dataloader.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=True) + + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_no_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_found_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") + 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertTrue(checker.dataloader_issues) + + desc = self.rule.get("problem").format(dataloader_duration=dataloader_duration / 1000, + dataloader_duration_threshold=self.rule.get( + "dataloader_duration_threshold")) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, dur, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["dataloader"] = [TimelineEvent({"dur": dur, "name": "dataloader"})] + return dataset + + +if __name__ == '__main__': + tester = TestDataloaderChecker() + tester.test_no_dataloader() + tester.test_no_slow_dataloader() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..d1df810a0ec9fcc28d28d73836ec6bb2ec86b6db --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py @@ -0,0 +1,62 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSyncBNChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "sync_batchnorm.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_syncbn(self): + dataset = self._get_mock_dataset(1, is_empty_dataset=True) + + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_syncbn_not_reach_threshold(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") - 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_found_slow_dataloader(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") + 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertTrue(checker.syncbn_issues) + + desc = self.rule.get("problem").format(syncbn_num=self.rule.get("max_syncbn_num") + 1) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, syncbn_num, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["sync_batchnorm"] = [] + for _ in range(syncbn_num): + dataset["sync_batchnorm"].append(TimelineEvent({"name": "SyncBatchNorm"})) + return dataset + + +if __name__ == '__main__': + tester = TestSyncBNChecker() + tester.test_no_syncbn() + tester.test_syncbn_not_reach_threshold() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py new file mode 100644 index 0000000000000000000000000000000000000000..360363ce371afb43cb61abd1a5b5fc2b2720aecc --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py @@ -0,0 +1,55 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSynchronizeChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "synchronize.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_synchronize_stream(self): + dataset = self._get_mock_dataset(1, [], is_empty_dataset=True) + + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def test_max_synchronize_stream(self): + dataset = self._get_mock_dataset(100, [], is_empty_dataset=False) + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def _get_mock_dataset(self, total_count, slow_synchronize_stream, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["synchronize_stream"] = TimelineEvent( + dict( + total_count=total_count, + slow_synchronize_stream=slow_synchronize_stream, + rule=dict(max_synchronize_num=10, problem="", solutions=[]), + ) + ) + return dataset + + +if __name__ == '__main__': + tester = TestSynchronizeChecker() + tester.test_no_synchronize_stream() + tester.test_max_synchronize_stream()