From a8e033c08c265f5a7b35d90240e8ec42ff5e4567 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Tue, 4 Jun 2024 21:43:15 +0800 Subject: [PATCH 001/333] compare finished --- .../generate_op_script/op_generator.py | 261 +++++++++++++++ .../operator_replication.template | 307 ++++++++++++++++++ 2 files changed, 568 insertions(+) create mode 100644 debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py create mode 100644 debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py new file mode 100644 index 00000000000..ba731f72394 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -0,0 +1,261 @@ +import json +import os +import math +import numpy as np +import torch +try: + import torch_npu +except ImportError: + pass + + +TENSOR_DATA_LIST = ["torch.Tensor"] +TORCH_BOOL_TYPE = ["torch.bool"] +TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", + "torch.int64", "torch.long"] +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", + "torch.float64", "torch.double"] +TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] +NUMPY_TYPE = ["numpy.int8", "numpy.int16", "numpy.int32", "numpy.int64", "numpy.uint8", "numpy.uint16", "numpy.uint32", + "numpy.uint64", "numpy.float16", "numpy.float32", "numpy.float64", "numpy.float128", "numpy.complex64", + "numpy.complex128", "numpy.complex256", "numpy.bool_", "numpy.string_", "numpy.bytes_", "numpy.unicode_"] +RAISE_PRECISION = { + "torch.float16": "torch.float32", + "torch.half": "torch.float32", + "torch.bfloat16": "torch.float32", + "torch.float32": "torch.float64", + "torch.float": "torch.float64" +} + + +''' +user_settings could be adjusted by user. +keys: + full_api_name: api_type.api_name.ordinal_number + direction_status: forward or backward + json_path : path of forward json file or backward json file + mode : random_data_mode or real_data_mode + random_seed: if mode is random_data_mode, random seed is random_seed + iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter + real_data_path: path of real data + output_path : path of output files +''' +user_settings = { + "full_api_name": "Torch.matmul.83", + "direction_status": "forward", + "json_path": "", + "mode": "real_data", + "random_seed": 1234, + "iter_times": 5, + "real_data_path": "", + "output_path": "" +} + + +def check_full_api_name(full_api_name): + ''' + {api_type}_{api_name}_{api调用次数} + two things: new name format, what parts of full_api_name is needed + ''' + pass + + +def check_user_settings(user_settings): + check_full_api_name(user_settings["full_api_name"]) + if user_settings["mode"] != "random" and user_settings["mode"] != "real_data": + raise Exception("Error: mode must be random or real_data!") + if user_settings["mode"] == "real_data": + pass + with open(user_settings["json_path"]) as f: + json_content = json.load(f) + (api_full_name, api_info_dict) = list(json_content.items())[0] + return api_info_dict + + +def get_settings(): + ''' + internal_settings contain all information needed for the program. + keys: + full_api_name: api_type.api_name.ordinal_number + api_type: type of api, should be Functional, Torch or Tensor + api_name: name of api + ordinal_number: how many times the same api has been called + direction_status: forward or backward + json_path : path of forward json file or backward json file + mode : random_data_mode or real_data_mode + random_seed: if mode is random_data_mode, random seed is random_seed + iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter + real_data_path: path of real data + output_path : path of output files + ''' + api_info_dict = check_user_settings(user_settings) + args_info = api_info_dict.get("args") + kwargs_info = api_info_dict.get("kwargs") + + internal_settings = {} + internal_settings["full_api_name"] = user_settings.get("full_api_name") + parts_of_full_api_name = internal_settings["full_api_name"].split(".", -1) + if parts_of_full_api_name[0] == "Functional": + internal_settings["api_type"] = "torch.nn.functional" + elif parts_of_full_api_name[0] == "Tensor": + internal_settings["api_type"] = "torch.Tensor" + else: + internal_settings["api_type"] = "torch" + internal_settings["api_name"] = parts_of_full_api_name[1] + internal_settings["ordinal_number"] = parts_of_full_api_name[2] + internal_settings["random_seed"] = user_settings.get("random_seed") + if user_settings.get("mode") == "random_data_mode": + internal_settings["iter_times"] = 1 + else: + internal_settings["iter_times"] = user_settings.get("iter_times") + internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info) + internal_settings["args_list_generator_device"] = generate_args_list_device(args_info) + internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info) + internal_settings["kwargs_value_assignment"] = generate_kwargs_value_assignment_code(kwargs_info) + internal_settings["kwargs_dict_generator_device"] = generate_kwargs_dict_device(kwargs_info) + internal_settings["kwargs_dict_generator_bench"] = generate_kwargs_dict_bench(kwargs_info) + return internal_settings + + +def recursive_args_element_assignment(args_info, name_number): + args_element_assignment = "" + for index, arg in enumerate(args_info): + if isinstance(arg, (list, tuple)): + new_args_element_assignment = recursive_args_element_assignment(arg, name_number + "_" + str(index)) + args_element_assignment += new_args_element_assignment + else: + arg["parameter_name"] = "arg" + name_number + "_" + str(index) + args_element_assignment += " " + "arg_info" + name_number + "_" + str(index) + " = " + "{}".format(str(arg)) + "\n" + args_element_assignment += " " + "arg" + name_number + "_" + str(index) + " = " + "generate_data(arg_info" + name_number + "_" + str(index) + ")" + "\n" + return args_element_assignment + + +def generate_args_element_assignment_code(args_info): + args_element_assignment = recursive_args_element_assignment(args_info, "") + return args_element_assignment + + +def recursive_args_list(args_info, flag_device=False, flag_bench=False): + args_list_generator = "" + for index, arg in enumerate(args_info): + if isinstance(arg, list): + args_list_generator += "[" + new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) + args_list_generator += new_args_list_generator + args_list_generator += "]" + elif isinstance(arg, tuple): + args_list_generator += "(" + new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) + args_list_generator += new_args_list_generator + args_list_generator += ")" + else: + args_list_generator += arg.get("parameter_name") + if arg.get("type") in TENSOR_DATA_LIST: + if flag_device: + args_list_generator += ".to(device)" + if flag_bench: + data_dtype = arg.get("dtype") + raised_dtype = RAISE_PRECISION.get(data_dtype) + if raised_dtype: + args_list_generator += ".to(" + raised_dtype + ")" + args_list_generator += ", " + return args_list_generator + + +def generate_args_list_device(args_info): + args_list_generator_device = recursive_args_list(args_info, flag_device=True) + return args_list_generator_device + + +def generate_args_list_bench(args_info): + args_list_generator_bench = recursive_args_list(args_info, flag_bench=True) + return args_list_generator_bench + + +def recursive_kwargs_value_assignment(info, key_name, name_number): + kwargs_value_assignment = "" + if isinstance(info, dict): + if info.get("type") == "torch.device" or info.get("type") == "torch.dtype": + kwargs_value_assignment += " " + "kwarg_" + key_name + name_number + " = " + info.get("value") + else: + kwargs_value_assignment += " " + "kwarg_info_" + key_name + name_number + " = " + "{}".format(str(info)) + "\n" + kwargs_value_assignment += " " + "kwarg_" + key_name + name_number + " = " + "generate_data(kwarg_info_" + key_name + name_number + ")" + "\n" + info["parameter_name"] = "kwarg_" + key_name + name_number + else: + for index, arg in enumerate(info): + new_kwargs_value_assignment = recursive_kwargs_value_assignment(arg, key_name, name_number + "_" + str(index)) + kwargs_value_assignment += new_kwargs_value_assignment + return kwargs_value_assignment + + +def generate_kwargs_value_assignment_code(kwargs_info): + kwargs_value_assignment = "" + for key, value in kwargs_info.items(): + kwargs_value_assignment += recursive_kwargs_value_assignment(value, key, "") + return kwargs_value_assignment + + +def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): + kwargs_dict_generator = "" + if isinstance(info, dict): + kwargs_dict_generator += info.get("parameter_name") + if info.get("type") in TENSOR_DATA_LIST: + if flag_device: + kwargs_dict_generator += "to(device)" + if flag_bench: + data_dtype = info.get("dtype") + raised_dtype = RAISE_PRECISION.get(data_dtype) + if raised_dtype: + kwargs_dict_generator += "to(" + raised_dtype + ")" + else: + kwargs_dict_generator = "" + if isinstance(info, list): + kwargs_dict_generator += "[" + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += "]" + else: + kwargs_dict_generator += "(" + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += ")" + return kwargs_dict_generator + + +def generate_kwargs_dict_device(kwargs_info): + kwargs_dict_generator_device = "" + for key, value in kwargs_info.items(): + kwargs_dict_generator_device += '"' + key + '"' + ": " + kwargs_dict_generator_device += recursive_kwargs_dict(value, flag_device=True) + ", " + return kwargs_dict_generator_device + + +def generate_kwargs_dict_bench(kwargs_info): + kwargs_dict_generator_bench = "" + for key, value in kwargs_info.items(): + kwargs_dict_generator_bench += '"' + key + '"' + ": " + kwargs_dict_generator_bench += recursive_kwargs_dict(value, flag_bench=True) + ", " + return kwargs_dict_generator_bench + + +def main(): + internal_settings = get_settings() + + template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") + operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("full_api_name"))) + + try: + with open(template_path, 'r') as ftemp, open(operator_script_path, 'w') as fout: + code_template = ftemp.read() + fout.write(code_template.format(**internal_settings)) + except OSError: + print(f"Failed to open file. Please check file {template_path} or {operator_script_path}.") + + print(f"Generate operator script successfully and the name is {operator_script_path}.") + + +if __name__ == "__main__": + main() + print("Job done successfully.") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template new file mode 100644 index 00000000000..54d1ab01e35 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -0,0 +1,307 @@ +import json +import os +import math +import torch +try: + import torch_npu +except ImportError: + pass + +from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi + + +TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] +TORCH_BOOL_TYPE = ["torch.bool"] +TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", + "torch.int64", "torch.long"] +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", + "torch.float64", "torch.double"] +TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] + + +def get_device(): + if torch.cuda.is_available(): + device = torch.device("cuda") + elif torch_npu.npu.is_available(): + device = torch.device("npu") + else: + raise Exception("Error: This device is not NPU or GPU!") + return device + + +def generate_bool_tensor(low, high, shape): + low, high = int(low), int(high) + tensor = torch.randint(low, high + 1, shape) + bool_tensor = torch.gt(tensor, 0) + return bool_tensor + + +def generate_numerical_tensor(low, high, shape, data_dtype): + if data_dtype in TORCH_FLOAT_TYPE: + scale = high - low + rand01 = torch.rand(shape, dtype=eval(data_dtype)) + tensor = rand01 * scale + low + elif data_dtype in TORCH_INT_TYPE: + low, high = int(low), int(high) + tensor = torch.randint(low, high + 1, shape, dtype=eval(data_dtype)) + else: + raise NotImplementedError(f"{{data_dtype}} is not supported!") + if torch.numel(tensor) == 0: + return tensor + tmp_tensor = tensor.reshape(-1) + tmp_tensor[0] = low + tmp_tensor[-1] = high + data = tmp_tensor.reshape(shape) + return data + + +def generate_random_tensor(info): + low, high = info.get('Min'), info.get('Max') + data_dtype = info.get('dtype') + shape = tuple(info.get('shape')) + if data_dtype == "torch.bool": + data = generate_bool_tensor(low, high, shape) + else: + data = generate_numerical_tensor(low, high, shape, data_dtype) + return data + + +def generate_real_tensor(data_path): + data_path = os.path.realpath(data_path) + data = torch.load(data_path) + return data + + +def generate_data(info): + data_type = info.get("type") + data_path = info.get("datapath") + if data_type in TENSOR_DATA_LIST: + if data_path: + data = generate_real_tensor(data_path) + else: + data = generate_random_tensor(info) + else: + data = info.get("value") + return data + + +def get_input(): +{args_element_assignment} + args_device = [{args_list_generator_device}] + args_bench = [{args_list_generator_bench}] +{kwargs_value_assignment} + kwargs_device = {{{kwargs_dict_generator_device}}} + kwargs_bench = {{{kwargs_dict_generator_bench}}} + return args_device, kwargs_device, args_bench, kwargs_bench + + +def exec_api_device(args, kwargs): + output_device = {api_type}.{api_name}(*args, **kwargs) + return output_device + + +def exec_api_bench(args, kwargs): + output_bench = {api_type}.{api_name}(*args, **kwargs) + return output_bench + + +def compute_inf_nan_proportion(inf_nan_mask, out_device, out_bench, abs_bench_with_eps, rtol): + out_bench = out_bench.to(out_device.dtype) + min = torch.finfo(out_device.dtype).min + max = torch.finfo(out_device.dtype).max + bench_clip = torch.clamp(out_bench, min=min, max=max) + device_clip = torch.clamp(out_device, min=min, max=max) + clipped_abs_ae = torch.abs(device_clip - bench_clip) + clipped_re = clipped_abs_ae / abs_bench_with_eps + pass_mask = torch.less_equal(clipped_re, rtol) + both_nan_mask = torch.logical_and(torch.isnan(out_device), torch.isnan(bench_clip)) + pass_mask = torch.logical_or(pass_mask, both_nan_mask) + not_pass_mask = torch.logical_not(pass_mask) + not_pass_mask = torch.logical_and(not_pass_mask, inf_nan_mask) + inf_nan_err_cnt = torch.sum(not_pass_mask) + return 0 if torch.sum(inf_nan_mask) == 0 else inf_nan_err_cnt / torch.sum(inf_nan_mask) + + +def compute_rmse(abs_err, normal_value_mask): + if torch.sum(normal_value_mask) == 0: + return 0 + else: + masked_ae = torch.where(normal_value_mask, abs_err, 0) + mse = torch.sum(torch.square(masked_ae)) / torch.sum(normal_value_mask) + rmse = torch.sqrt(mse) + return rmse + + +def compute_error_balance(out_device, out_bench): + larger_count = torch.sum(torch.greater(out_device - out_bench.to(out_device.dtype), 0)) + smaller_count = torch.sum(torch.less(out_device - out_bench.to(out_device.dtype), 0)) + total_count = torch.numel(out_bench) + error_balance = abs(larger_count - smaller_count) / total_count + return error_balance + + +def compare_tensor(out_device, out_bench, api_name): + if out_device.shape != out_bench.shape: + print("ERROR: shape of out_device and out_bench is not equal!") + return None + if torch.numel(out_bench) == 0: + print("Both out_device and out_bench have zero elements.") + return None + dtype_device = out_device.dtype + dtype_bench = out_bench.dtype + if str(dtype_device) in TORCH_FLOAT_TYPE and str(dtype_bench) in TORCH_FLOAT_TYPE \ + or str(dtype_device) in TORCH_INT_TYPE and str(dtype_bench) in TORCH_INT_TYPE \ + or str(dtype_device) in TORCH_BOOL_TYPE and str(dtype_bench) in TORCH_BOOL_TYPE: + out_device = out_device.to(torch.device("cpu")) + if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or api_name in BinaryStandardApi: + print("compare standard: binary consistency standard:") + error_number = torch.sum(out_device != out_bench).item() + error_rate = error_number / torch.numel(out_bench) + print(f"error rate is {{error_rate}}.") + else: + abs_err = torch.abs(out_device - out_bench) + abs_bench = torch.abs(out_bench) + if dtype_bench == torch.float32: + eps = 2 ** -23 + if dtype_bench == torch.float64: + eps = 2 ** -52 + abs_bench_with_eps = abs_bench + eps + rel_err = torch.abs(abs_err / abs_bench_with_eps) + device_finite_mask = torch.isfinite(out_device) + bench_finite_mask = torch.isfinite(out_bench.to(dtype_device)) + both_finite_mask = torch.logical_and(device_finite_mask, bench_finite_mask) + inf_nan_mask = torch.logical_not(both_finite_mask) + if api_name in AbsoluteStandardApi: + if dtype_device == torch.float16: + rtol, small_value, small_value_atol = 1.0e-3, 1.0e-3, 1.0e-5 + elif dtype_device == torch.bfloat16: + rtol, small_value, small_value_atol = 4.0e-3, 1.0e-3, 1.0e-5 + else: + rtol, small_value, small_value_atol = 1.0e-6, 1.0e-6, 1.0e-9 + small_value_mask = torch.less_equal(abs_bench, small_value) + small_value_mask = torch.logical_and(small_value_mask, both_finite_mask) + normal_value_mask = torch.logical_and(both_finite_mask, torch.logical_not(small_value_mask)) + inf_nan_proportion = compute_inf_nan_proportion(inf_nan_mask, out_device, out_bench, abs_bench_with_eps, rtol) + rel_err_mask = torch.greater(rel_err, rtol) + rel_err_mask = torch.logical_and(rel_err_mask, normal_value_mask) + if torch.sum(normal_value_mask) == 0: + rel_err_proportion = 0 + else: + rel_err_proportion = torch.sum(rel_err_mask) / torch.sum(normal_value_mask) + abs_err_mask = torch.greater(abs_err, small_value_atol) + abs_err_mask = torch.logical_and(abs_err_mask, small_value_mask) + if torch.sum(small_value_mask) == 0: + abs_err_proportion = 0 + else: + abs_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) + print("compare standard: absolute threshold standard") + print(f"relative error ratio is {{rel_err_proportion}}") + print(f"absolute error ratio is {{abs_err_proportion}}") + elif api_name in ULPStandardApi: + if dtype_device == torch.float16: + min_eb, exponent_num = -14, 10 + elif dtype_device == torch.bfloat16: + min_eb, exponent_num = -126, 7 + else: + min_eb, exponent_num = -126, 23 + eb = torch.where(abs_bench == 0, torch.zeros(out_bench.shape), torch.floor(torch.log2(abs_bench))) + eb = torch.maximum(eb, min_eb) + if dtype_device == torch.float32: + ulp_err = (out_device.to(torch.float64) - out_bench).to(torch.float64) * torch.exp2(-eb + exponent_num).to(torch.float64) + else: + ulp_err = (out_device.to(torch.float32) - out_bench).to(torch.float32) * torch.exp2(-eb + exponent_num).to(torch.float32) + ulp_err = torch.abs(ulp_err) + max_ulp_err = torch.max(ulp_err) + mean_ulp_err = torch.mean(ulp_err) + if dtype_device == torch.float32: + ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) + else: + ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) + print("compare standard: ulp error standard") + print(f"maximum ulp error is {{max_ulp_err}}") + print(f"mean ulp error is {{mean_ulp_err}}") + print(f"ulp error proportion is {{ulp_err_proportion}}") + else: + if dtype_device == torch.float16: + small_value, small_value_atol = 1.0e-3, 1.0e-5 + elif dtype_device == torch.bfloat16: + small_value, small_value_atol = 1.0e-3, 1.0e-5 + else: + small_value, small_value_atol = 1.0e-6, 1.0e-9 + small_value_mask = torch.less_equal(abs_bench, small_value) + small_value_mask = torch.logical_and(small_value_mask, both_finite_mask) + normal_value_mask = torch.logical_and(both_finite_mask, torch.logical_not(small_value_mask)) + abs_err_mask = torch.greater(abs_err, small_value_atol) + abs_err_mask = torch.logical_and(abs_err_mask, small_value_mask) + if torch.sum(small_value_mask) == 0: + small_value_err_proportion = 0 + else: + small_value_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) + rel_err = torch.where(normal_value_mask, rel_err, -1 * torch.ones(out_device.shape)) + if torch.max(rel_err) >= 0: + max_rel_err = torch.max(rel_err) + else: + max_rel_err = 0 + if torch.sum(normal_value_mask) == 0: + mean_rel_err = 0 + else: + mean_rel_err = torch.sum(torch.clamp(rel_err, min=0)) / torch.sum(normal_value_mask) + rmse = compute_rmse(abs_err, normal_value_mask) + error_balance = compute_error_balance(out_device, out_bench) + print("compare standard: benchmark standard") + print(f"small value error proportion is {{small_value_error_proportion}}") + print(f"maximum relative error is {{max_rel_err}}") + print(f"mean relative error is {{mean_rel_err}}") + print(f"root mean squared error is {{rmse}}") + print(f"error balance is {{error_balance}}") + else: + print(f"ERROR: out_device dtype is {{dtype_device}}, out_bench dtype is {{dtype_bench}}, not comparable.") + return None + + +def compare_element(out_device, out_bench, api_name): + if type(out_device) != type(out_bench): + print("ERROR: out_device and out_bench is not the same type!") + return None + if isinstance(out_bench, torch.Tensor): + print(f"data type: {{type(out_bench)}}") + compare_tensor(out_device, out_bench, api_name) + elif isinstance(out_bench, (bool, int, float, str)): + print(f"data type: {{type(out_bench)}}") + if out_device == out_bench: + print("PASS: out_device and out_bench equals.") + else: + print("ERROR: out_device and out_bench is not equal!") + else: + print(f"ERROR: comparison of type {{type(out_bench)}} is not supported.") + return None + + +def compare(out_device, out_bench, api_name): + print("Compare result:") + if type(out_device) != type(out_bench): + print("ERROR: out_device and out_bench is not the same type!") + print("Compare finished.") + return None + if isinstance(out_bench, (list, tuple)): + print(f"data type: {{type(out_bench)}}") + if len(out_device) != len(out_bench): + print("ERROR: len of out_device and out_bench is different!") + print("Compare finished.") + return None + for index, _ in enumerate(out_bench): + print(f"index {{index}}:") + compare_element(out_device[index], out_bench[index], api_name) + else: + compare_element(out_device, out_bench, api_name) + print("Compare finished.") + + +device = get_device() +api_name = {api_name} +torch.manual_seed({random_seed}) +for i in range({iter_times}): + args_device, kwargs_device, args_bench, kwargs_bench = get_input() + output_device = exec_api_device(args_device, kwargs_device) + output_bench = exec_api_bench(args_bench, kwargs_bench) + compare(output_device, output_bench, api_name) -- Gitee From c6b713996d81ab95564d2454aeadd5a25927bdd8 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Tue, 4 Jun 2024 22:40:03 +0800 Subject: [PATCH 002/333] bugfix test forward compare --- .../generate_op_script/operator_replication.template | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 54d1ab01e35..5e4f7cae519 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -205,7 +205,7 @@ def compare_tensor(out_device, out_bench, api_name): else: min_eb, exponent_num = -126, 23 eb = torch.where(abs_bench == 0, torch.zeros(out_bench.shape), torch.floor(torch.log2(abs_bench))) - eb = torch.maximum(eb, min_eb) + eb = torch.maximum(eb, min_eb * torch.ones(out_bench.shape)) if dtype_device == torch.float32: ulp_err = (out_device.to(torch.float64) - out_bench).to(torch.float64) * torch.exp2(-eb + exponent_num).to(torch.float64) else: @@ -249,7 +249,7 @@ def compare_tensor(out_device, out_bench, api_name): rmse = compute_rmse(abs_err, normal_value_mask) error_balance = compute_error_balance(out_device, out_bench) print("compare standard: benchmark standard") - print(f"small value error proportion is {{small_value_error_proportion}}") + print(f"small value error proportion is {{small_value_err_proportion}}") print(f"maximum relative error is {{max_rel_err}}") print(f"mean relative error is {{mean_rel_err}}") print(f"root mean squared error is {{rmse}}") @@ -298,9 +298,10 @@ def compare(out_device, out_bench, api_name): device = get_device() -api_name = {api_name} +api_name = "{api_name}" torch.manual_seed({random_seed}) for i in range({iter_times}): + print(f"iter: {{i}}:") args_device, kwargs_device, args_bench, kwargs_bench = get_input() output_device = exec_api_device(args_device, kwargs_device) output_bench = exec_api_bench(args_bench, kwargs_bench) -- Gitee From 4eaa6a0df3166b9b0162a5ceb6cb179b55c0b324 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 02:37:04 +0800 Subject: [PATCH 003/333] fix op_generator.py --- .../generate_op_script/op_generator.py | 102 ++++++++++-------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index ba731f72394..69a76a2bc85 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -16,9 +16,6 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -NUMPY_TYPE = ["numpy.int8", "numpy.int16", "numpy.int32", "numpy.int64", "numpy.uint8", "numpy.uint16", "numpy.uint32", - "numpy.uint64", "numpy.float16", "numpy.float32", "numpy.float64", "numpy.float128", "numpy.complex64", - "numpy.complex128", "numpy.complex256", "numpy.bool_", "numpy.string_", "numpy.bytes_", "numpy.unicode_"] RAISE_PRECISION = { "torch.float16": "torch.float32", "torch.half": "torch.float32", @@ -29,82 +26,94 @@ RAISE_PRECISION = { ''' -user_settings could be adjusted by user. +user_settings could be set by user. keys: - full_api_name: api_type.api_name.ordinal_number direction_status: forward or backward json_path : path of forward json file or backward json file - mode : random_data_mode or real_data_mode - random_seed: if mode is random_data_mode, random seed is random_seed - iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter - real_data_path: path of real data - output_path : path of output files + mode : random_data or real_data + random_seed: if mode is random_data, random seed is random_seed; if mode is real_data, random_seed does not matter + iter_times: if mode is random_data, generate iter_times groups of data; if mode is real_data, iter_times does not matter ''' user_settings = { - "full_api_name": "Torch.matmul.83", "direction_status": "forward", "json_path": "", - "mode": "real_data", + "mode": "random_data", "random_seed": 1234, - "iter_times": 5, - "real_data_path": "", - "output_path": "" + "iter_times": 5 } -def check_full_api_name(full_api_name): +def check_json(json_path): ''' + api_full_name: api_type.api_name.ordinal_number {api_type}_{api_name}_{api调用次数} - two things: new name format, what parts of full_api_name is needed + two things: new name format, what parts of api_full_name is needed ''' - pass + json_file = os.path.realpath(json_path) + with open(json_file) as f: + json_content = json.load(f) + if not isinstance(json_content, dict): + raise ValueError("content of json file is not a dictionary!") + if len(list(json_content.items())) > 1: + raise ValueError("json file has more than one API, only one API is allowed!") + (api_full_name, api_info_dict) = list(json_content.items())[0] + (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + if api_type not in ("Functional", "Tensor", "Torch"): + raise ValueError("this type of API is not supported!") + return (api_full_name, api_info_dict) def check_user_settings(user_settings): - check_full_api_name(user_settings["full_api_name"]) - if user_settings["mode"] != "random" and user_settings["mode"] != "real_data": - raise Exception("Error: mode must be random or real_data!") - if user_settings["mode"] == "real_data": - pass - with open(user_settings["json_path"]) as f: - json_content = json.load(f) - (api_full_name, api_info_dict) = list(json_content.items())[0] - return api_info_dict + if user_settings.get("direction_status") not in ("forward", "backward"): + raise ValueError("direction_status should be forward or backward!") + if user_settings.get("mode") not in ("random_data","real_data"): + raise ValueError("mode should be random_data or real_data!") + r_seed = user_settings.get("random_seed") + if not isinstance(r_seed, int): + raise ValueError("random_seed should be an integer!") + iter_t = user_settings.get("iter_times") + if not isinstance(iter_t, int) or iter_t <= 0: + raise ValueError("iter_times should be an integer bigger than zero!") + (api_full_name, api_info_dict) = check_json(user_settings.get("json_path")) + return api_full_name, api_info_dict def get_settings(): ''' - internal_settings contain all information needed for the program. + internal_settings contain all information needed for the operator program. keys: - full_api_name: api_type.api_name.ordinal_number - api_type: type of api, should be Functional, Torch or Tensor - api_name: name of api + api_full_name: api_type.api_name.ordinal_number + api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch + api_name: name of API ordinal_number: how many times the same api has been called direction_status: forward or backward - json_path : path of forward json file or backward json file - mode : random_data_mode or real_data_mode - random_seed: if mode is random_data_mode, random seed is random_seed - iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter - real_data_path: path of real data - output_path : path of output files + random_seed: if mode is random_data, random seed is random_seed + iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter + args_element_assignment: code for args assignment + args_list_generator_device: code for generate args list on device + args_list_generator_bench: code for generate args list on bench + kwargs_value_assignment: code for kwargs assignment + kwargs_dict_generator_device: code for generate kwargs dict on device + kwargs_dict_generator_bench: code for generate kwargs dict on bench ''' - api_info_dict = check_user_settings(user_settings) + api_full_name, api_info_dict = check_user_settings(user_settings) args_info = api_info_dict.get("args") kwargs_info = api_info_dict.get("kwargs") internal_settings = {} - internal_settings["full_api_name"] = user_settings.get("full_api_name") - parts_of_full_api_name = internal_settings["full_api_name"].split(".", -1) - if parts_of_full_api_name[0] == "Functional": + internal_settings["api_full_name"] = api_full_name + (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + if api_type == "Functional": internal_settings["api_type"] = "torch.nn.functional" - elif parts_of_full_api_name[0] == "Tensor": + elif api_type == "Tensor": internal_settings["api_type"] = "torch.Tensor" else: internal_settings["api_type"] = "torch" - internal_settings["api_name"] = parts_of_full_api_name[1] - internal_settings["ordinal_number"] = parts_of_full_api_name[2] + internal_settings["api_name"] = api_name + internal_settings["ordinal_number"] = ordinal_number + internal_settings["direction_status"] = user_settings.get("direction_status") internal_settings["random_seed"] = user_settings.get("random_seed") - if user_settings.get("mode") == "random_data_mode": + if user_settings.get("mode") == "real_data": internal_settings["iter_times"] = 1 else: internal_settings["iter_times"] = user_settings.get("iter_times") @@ -244,7 +253,7 @@ def main(): internal_settings = get_settings() template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") - operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("full_api_name"))) + operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) try: with open(template_path, 'r') as ftemp, open(operator_script_path, 'w') as fout: @@ -258,4 +267,3 @@ def main(): if __name__ == "__main__": main() - print("Job done successfully.") -- Gitee From e5fb95bb631683c9f9641dac01b28121bb13b2e2 Mon Sep 17 00:00:00 2001 From: TAJh <2559659915@qq.com> Date: Wed, 5 Jun 2024 01:14:05 +0000 Subject: [PATCH 004/333] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: TAJh <2559659915@qq.com> --- debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py b/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py index 993b5e2cf17..6ee91dd7e85 100644 --- a/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py +++ b/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py @@ -21,12 +21,13 @@ class ArgMaxWithValueUT(UTBase): len_args = len(args) self.axis = self.kwargs.get("axis") if self.kwargs else 0 self.keep_dims = self.kwargs.get("keep_dims") if self.kwargs else False + def forward_mindspore_impl(self, *args): x = args[0] net = ArgMaxWithValue(self.axis, self.keep_dims) out = net(x) return out - + def forward_pytorch_impl(self, *args): input_pt_x = args[0] value, index = torch.max(input_pt_x, self.axis, self.keep_dims) -- Gitee From 6e3e1112c233382f269c064754b48d54c098f453 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:11:56 +0800 Subject: [PATCH 005/333] bugfix real_data --- .../generate_op_script/op_generator.py | 21 +++++-------------- .../operator_replication.template | 8 ++++++- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 69a76a2bc85..d5edca540dd 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -16,13 +16,6 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -RAISE_PRECISION = { - "torch.float16": "torch.float32", - "torch.half": "torch.float32", - "torch.bfloat16": "torch.float32", - "torch.float32": "torch.float64", - "torch.float": "torch.float64" -} ''' @@ -163,10 +156,8 @@ def recursive_args_list(args_info, flag_device=False, flag_bench=False): if flag_device: args_list_generator += ".to(device)" if flag_bench: - data_dtype = arg.get("dtype") - raised_dtype = RAISE_PRECISION.get(data_dtype) - if raised_dtype: - args_list_generator += ".to(" + raised_dtype + ")" + args_list_generator += '.to(torch.device("cpu"))' + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("paramter_name") + ".dtype))" args_list_generator += ", " return args_list_generator @@ -210,12 +201,10 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += info.get("parameter_name") if info.get("type") in TENSOR_DATA_LIST: if flag_device: - kwargs_dict_generator += "to(device)" + kwargs_dict_generator += ".to(device)" if flag_bench: - data_dtype = info.get("dtype") - raised_dtype = RAISE_PRECISION.get(data_dtype) - if raised_dtype: - kwargs_dict_generator += "to(" + raised_dtype + ")" + kwargs_dict_generator += '.to(torch.device("cpu"))' + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("paramter_name") + ".dtype))" else: kwargs_dict_generator = "" if isinstance(info, list): diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 5e4f7cae519..714f60e300e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -17,7 +17,13 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] - +RAISE_PRECISION = { + "torch.float16": torch.float32, + "torch.half": torch.float32, + "torch.bfloat16": torch.float32, + "torch.float32": torch.float64, + "torch.float": torch.float64 +} def get_device(): if torch.cuda.is_available(): -- Gitee From b3aad7c2e896d3df259162181e85309cd88ed92e Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:21:49 +0800 Subject: [PATCH 006/333] bugfix real_data --- .../api_accuracy_checker/generate_op_script/op_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index d5edca540dd..1964227f737 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -157,7 +157,7 @@ def recursive_args_list(args_info, flag_device=False, flag_bench=False): args_list_generator += ".to(device)" if flag_bench: args_list_generator += '.to(torch.device("cpu"))' - args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("paramter_name") + ".dtype))" + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("parameter_name") + ".dtype))" args_list_generator += ", " return args_list_generator @@ -204,7 +204,7 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += ".to(device)" if flag_bench: kwargs_dict_generator += '.to(torch.device("cpu"))' - kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("paramter_name") + ".dtype))" + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("parameter_name") + ".dtype))" else: kwargs_dict_generator = "" if isinstance(info, list): -- Gitee From 45235137d93b90df94abd86fc1f7124e35524bbc Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:24:41 +0800 Subject: [PATCH 007/333] bugfix real_data --- .../generate_op_script/operator_replication.template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 714f60e300e..ad9664e1778 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -17,13 +17,13 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -RAISE_PRECISION = { +RAISE_PRECISION = {{ "torch.float16": torch.float32, "torch.half": torch.float32, "torch.bfloat16": torch.float32, "torch.float32": torch.float64, "torch.float": torch.float64 -} +}} def get_device(): if torch.cuda.is_available(): -- Gitee From c8854fc92c0acda1ddb0bc12d4cc60d234f7e1a9 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:29:39 +0800 Subject: [PATCH 008/333] bugfix real_data --- .../api_accuracy_checker/generate_op_script/op_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 1964227f737..b50437e599a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -157,7 +157,7 @@ def recursive_args_list(args_info, flag_device=False, flag_bench=False): args_list_generator += ".to(device)" if flag_bench: args_list_generator += '.to(torch.device("cpu"))' - args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("parameter_name") + ".dtype))" + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), " + arg.get("parameter_name") + ".dtype))" args_list_generator += ", " return args_list_generator @@ -204,7 +204,7 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += ".to(device)" if flag_bench: kwargs_dict_generator += '.to(torch.device("cpu"))' - kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("parameter_name") + ".dtype))" + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), " + info.get("parameter_name") + ".dtype))" else: kwargs_dict_generator = "" if isinstance(info, list): -- Gitee From a166c2db2590998751864d1b3b0b9541dd81cd06 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 12:54:09 +0800 Subject: [PATCH 009/333] bugfix from others opinion --- .../api_accuracy_checker/generate_op_script/op_generator.py | 4 ++-- .../generate_op_script/operator_replication.template | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index b50437e599a..97d4ba1d8ac 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -52,7 +52,7 @@ def check_json(json_path): (api_full_name, api_info_dict) = list(json_content.items())[0] (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) if api_type not in ("Functional", "Tensor", "Torch"): - raise ValueError("this type of API is not supported!") + raise ValueError("type {0} of API is not supported!".format(api_type)) return (api_full_name, api_info_dict) @@ -61,7 +61,7 @@ def check_user_settings(user_settings): raise ValueError("direction_status should be forward or backward!") if user_settings.get("mode") not in ("random_data","real_data"): raise ValueError("mode should be random_data or real_data!") - r_seed = user_settings.get("random_seed") + r_seed = user_settings.get("random_seed", 1234) if not isinstance(r_seed, int): raise ValueError("random_seed should be an integer!") iter_t = user_settings.get("iter_times") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index ad9664e1778..c0a571208bd 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -153,6 +153,9 @@ def compare_tensor(out_device, out_bench, api_name): if torch.numel(out_bench) == 0: print("Both out_device and out_bench have zero elements.") return None + print(f"shape is {{out_bench.shape}}") + print(f"dtype of out_device is {{out_device.dtype}}") + print(f"dtype of out_bench is {{out_bench.dtype}}") dtype_device = out_device.dtype dtype_bench = out_bench.dtype if str(dtype_device) in TORCH_FLOAT_TYPE and str(dtype_bench) in TORCH_FLOAT_TYPE \ -- Gitee From 738da5a838787dcdae650b212627c2fa0bcb84c4 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 17:37:32 +0800 Subject: [PATCH 010/333] script generated not rely on accuracy_tools --- .../generate_op_script/op_generator.py | 15 +++++++++++++- .../operator_replication.template | 20 +++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 97d4ba1d8ac..b1bd26e711d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -8,6 +8,8 @@ try: except ImportError: pass +from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi + TENSOR_DATA_LIST = ["torch.Tensor"] TORCH_BOOL_TYPE = ["torch.bool"] @@ -39,7 +41,7 @@ user_settings = { def check_json(json_path): ''' api_full_name: api_type.api_name.ordinal_number - {api_type}_{api_name}_{api调用次数} + {api_type}.{api_name}.{api调用次数} two things: new name format, what parts of api_full_name is needed ''' json_file = os.path.realpath(json_path) @@ -71,6 +73,16 @@ def check_user_settings(user_settings): return api_full_name, api_info_dict +def get_compare_standard(api_name): + if api_name in BinaryStandardApi: + return "CompareStandard.BINARY_EQUALITY_STANDARD" + if api_name in AbsoluteStandardApi: + return "CompareStandard.ABSOLUTE_THRESHOLD_STANDARD" + if api_name in ULPStandardApi: + return "CompareStandard.ULP_ERROR_STANDARD" + return "CompareStandard.BENCHMARK_STANDARD" + + def get_settings(): ''' internal_settings contain all information needed for the operator program. @@ -103,6 +115,7 @@ def get_settings(): else: internal_settings["api_type"] = "torch" internal_settings["api_name"] = api_name + internal_settings["compare_standard"] = get_compare_standard(api_name) internal_settings["ordinal_number"] = ordinal_number internal_settings["direction_status"] = user_settings.get("direction_status") internal_settings["random_seed"] = user_settings.get("random_seed") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index c0a571208bd..aff89c6a4d9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -1,14 +1,13 @@ import json import os import math +from enum import Enum import torch try: import torch_npu except ImportError: pass -from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi - TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] TORCH_BOOL_TYPE = ["torch.bool"] @@ -25,6 +24,14 @@ RAISE_PRECISION = {{ "torch.float": torch.float64 }} + +class CompareStandard(Enum): + BINARY_EQUALITY_STANDARD = auto() + ABSOLUTE_THRESHOLD_STANDARD = auto() + ULP_ERROR_STANDARD = auto() + BENCHMARK_STANDARD = auto() + + def get_device(): if torch.cuda.is_available(): device = torch.device("cuda") @@ -162,8 +169,8 @@ def compare_tensor(out_device, out_bench, api_name): or str(dtype_device) in TORCH_INT_TYPE and str(dtype_bench) in TORCH_INT_TYPE \ or str(dtype_device) in TORCH_BOOL_TYPE and str(dtype_bench) in TORCH_BOOL_TYPE: out_device = out_device.to(torch.device("cpu")) - if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or api_name in BinaryStandardApi: - print("compare standard: binary consistency standard:") + if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: + print("compare standard: binary equality standard:") error_number = torch.sum(out_device != out_bench).item() error_rate = error_number / torch.numel(out_bench) print(f"error rate is {{error_rate}}.") @@ -180,7 +187,7 @@ def compare_tensor(out_device, out_bench, api_name): bench_finite_mask = torch.isfinite(out_bench.to(dtype_device)) both_finite_mask = torch.logical_and(device_finite_mask, bench_finite_mask) inf_nan_mask = torch.logical_not(both_finite_mask) - if api_name in AbsoluteStandardApi: + if compare_standard == CompareStandard.ABSOLUTE_THRESHOLD_STANDARD: if dtype_device == torch.float16: rtol, small_value, small_value_atol = 1.0e-3, 1.0e-3, 1.0e-5 elif dtype_device == torch.bfloat16: @@ -206,7 +213,7 @@ def compare_tensor(out_device, out_bench, api_name): print("compare standard: absolute threshold standard") print(f"relative error ratio is {{rel_err_proportion}}") print(f"absolute error ratio is {{abs_err_proportion}}") - elif api_name in ULPStandardApi: + elif compare_standard == CompareStandard.ULP_ERROR_STANDARD: if dtype_device == torch.float16: min_eb, exponent_num = -14, 10 elif dtype_device == torch.bfloat16: @@ -308,6 +315,7 @@ def compare(out_device, out_bench, api_name): device = get_device() api_name = "{api_name}" +compare_standard = {compare_standard} torch.manual_seed({random_seed}) for i in range({iter_times}): print(f"iter: {{i}}:") -- Gitee From 7cdd0637b593f764d31e457b444e8af0122e25d0 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 12:41:32 +0800 Subject: [PATCH 011/333] add argparse --- .../generate_op_script/op_generator.py | 88 ++++++++++--------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index b1bd26e711d..48f64fb1e6d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -1,3 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import argparse import json import os import math @@ -20,30 +37,7 @@ TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.floa TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -''' -user_settings could be set by user. -keys: - direction_status: forward or backward - json_path : path of forward json file or backward json file - mode : random_data or real_data - random_seed: if mode is random_data, random seed is random_seed; if mode is real_data, random_seed does not matter - iter_times: if mode is random_data, generate iter_times groups of data; if mode is real_data, iter_times does not matter -''' -user_settings = { - "direction_status": "forward", - "json_path": "", - "mode": "random_data", - "random_seed": 1234, - "iter_times": 5 -} - - def check_json(json_path): - ''' - api_full_name: api_type.api_name.ordinal_number - {api_type}.{api_name}.{api调用次数} - two things: new name format, what parts of api_full_name is needed - ''' json_file = os.path.realpath(json_path) with open(json_file) as f: json_content = json.load(f) @@ -58,18 +52,11 @@ def check_json(json_path): return (api_full_name, api_info_dict) -def check_user_settings(user_settings): - if user_settings.get("direction_status") not in ("forward", "backward"): - raise ValueError("direction_status should be forward or backward!") - if user_settings.get("mode") not in ("random_data","real_data"): - raise ValueError("mode should be random_data or real_data!") - r_seed = user_settings.get("random_seed", 1234) - if not isinstance(r_seed, int): - raise ValueError("random_seed should be an integer!") - iter_t = user_settings.get("iter_times") - if not isinstance(iter_t, int) or iter_t <= 0: +def check_user_settings(cmd_args): + iter_t = cmd_args.iter_times + if iter_t <= 0: raise ValueError("iter_times should be an integer bigger than zero!") - (api_full_name, api_info_dict) = check_json(user_settings.get("json_path")) + (api_full_name, api_info_dict) = check_json(cmd_args.forward_json_path) return api_full_name, api_info_dict @@ -83,7 +70,7 @@ def get_compare_standard(api_name): return "CompareStandard.BENCHMARK_STANDARD" -def get_settings(): +def get_settings(cmd_args): ''' internal_settings contain all information needed for the operator program. keys: @@ -91,7 +78,7 @@ def get_settings(): api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch api_name: name of API ordinal_number: how many times the same api has been called - direction_status: forward or backward + direction_status: forward random_seed: if mode is random_data, random seed is random_seed iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter args_element_assignment: code for args assignment @@ -101,7 +88,7 @@ def get_settings(): kwargs_dict_generator_device: code for generate kwargs dict on device kwargs_dict_generator_bench: code for generate kwargs dict on bench ''' - api_full_name, api_info_dict = check_user_settings(user_settings) + api_full_name, api_info_dict = check_user_settings(cmd_args) args_info = api_info_dict.get("args") kwargs_info = api_info_dict.get("kwargs") @@ -117,12 +104,12 @@ def get_settings(): internal_settings["api_name"] = api_name internal_settings["compare_standard"] = get_compare_standard(api_name) internal_settings["ordinal_number"] = ordinal_number - internal_settings["direction_status"] = user_settings.get("direction_status") - internal_settings["random_seed"] = user_settings.get("random_seed") - if user_settings.get("mode") == "real_data": + internal_settings["direction_status"] = "forward" + internal_settings["random_seed"] = cmd_args.random_seed + if cmd_args.mode == "real_data": internal_settings["iter_times"] = 1 else: - internal_settings["iter_times"] = user_settings.get("iter_times") + internal_settings["iter_times"] = cmd_args.iter_times internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info) internal_settings["args_list_generator_device"] = generate_args_list_device(args_info) internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info) @@ -251,8 +238,25 @@ def generate_kwargs_dict_bench(kwargs_info): return kwargs_dict_generator_bench +def op_generator_parser(parser): + parser.add_argument("-forward", "forward_json_path", dest="forward_json_path", type=str, + help=" Path of forward API json file.", + required=True) + parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), + help=" Execute mode, should be random_data or real_data.", + required=True) + parser.add_argement("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, + help=" If mode is random_data, it is random seed.", + required=False) + parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, + help=" If mode is random_data, generate iter_times group of data." + required=False) + + def main(): - internal_settings = get_settings() + parser = argparse.ArgumentParser() + cmd_args = parser.parse_args() + internal_settings = get_settings(cmd_args) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) -- Gitee From eb723ff50b1c71202559fee491594533f1699720 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:43:00 +0800 Subject: [PATCH 012/333] forward full version --- .../generate_op_script/op_generator.py | 31 ++++++------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 48f64fb1e6d..8fda0dfac12 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -140,16 +140,12 @@ def generate_args_element_assignment_code(args_info): def recursive_args_list(args_info, flag_device=False, flag_bench=False): args_list_generator = "" for index, arg in enumerate(args_info): - if isinstance(arg, list): - args_list_generator += "[" - new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) - args_list_generator += new_args_list_generator - args_list_generator += "]" - elif isinstance(arg, tuple): - args_list_generator += "(" + if isinstance(arg, (list, tuple)): + (left_bracket, right_bracket) = ("[", "]") if isinstance(arg, list) else ("(", ")") + args_list_generator += left_bracket new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) args_list_generator += new_args_list_generator - args_list_generator += ")" + args_list_generator += right_bracket else: args_list_generator += arg.get("parameter_name") if arg.get("type") in TENSOR_DATA_LIST: @@ -206,19 +202,12 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += '.to(torch.device("cpu"))' kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), " + info.get("parameter_name") + ".dtype))" else: - kwargs_dict_generator = "" - if isinstance(info, list): - kwargs_dict_generator += "[" - for arg in info: - kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) - kwargs_dict_generator += ", " - kwargs_dict_generator += "]" - else: - kwargs_dict_generator += "(" - for arg in info: - kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) - kwargs_dict_generator += ", " - kwargs_dict_generator += ")" + (left_bracket, right_bracket) = ("[", "]") if isinstance(info, list) else ("(", ")") + kwargs_dict_generator += left_bracket + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += right_bracket return kwargs_dict_generator -- Gitee From 8fc903a8cd5d58e1d2b55cb88375cae99a952e9e Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:49:50 +0800 Subject: [PATCH 013/333] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 8fda0dfac12..25ee5e3d96e 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -238,7 +238,7 @@ def op_generator_parser(parser): help=" If mode is random_data, it is random seed.", required=False) parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, - help=" If mode is random_data, generate iter_times group of data." + help=" If mode is random_data, generate iter_times group of data.", required=False) -- Gitee From bf58fddd6b60e9f5421c91a64b0fcaa4d614ed5d Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:54:45 +0800 Subject: [PATCH 014/333] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 25ee5e3d96e..09548d260da 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -243,7 +243,8 @@ def op_generator_parser(parser): def main(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser() + op_generator_parser(parser) cmd_args = parser.parse_args() internal_settings = get_settings(cmd_args) -- Gitee From c1171b5c8580d785da2f0881fba89851c1d96c91 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:56:52 +0800 Subject: [PATCH 015/333] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 09548d260da..d9356cf7a35 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -228,7 +228,7 @@ def generate_kwargs_dict_bench(kwargs_info): def op_generator_parser(parser): - parser.add_argument("-forward", "forward_json_path", dest="forward_json_path", type=str, + parser.add_argument("-forward", "--forward_json_path", dest="forward_json_path", type=str, help=" Path of forward API json file.", required=True) parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), -- Gitee From 922873bb72c656f9d49201d237dceb21257260d8 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:58:40 +0800 Subject: [PATCH 016/333] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index d9356cf7a35..7d3e2b226bd 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -234,7 +234,7 @@ def op_generator_parser(parser): parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), help=" Execute mode, should be random_data or real_data.", required=True) - parser.add_argement("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, + parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, help=" If mode is random_data, it is random seed.", required=False) parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, -- Gitee From c684a2bbd12112be134d1beae10acb229bba5763 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 15:04:06 +0800 Subject: [PATCH 017/333] bugfix --- .../generate_op_script/operator_replication.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index aff89c6a4d9..7630839aa93 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -1,7 +1,7 @@ import json import os import math -from enum import Enum +from enum import Enum, auto import torch try: import torch_npu -- Gitee From 56dbcfe85d673d944921665a0b7d32b022ca6620 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Thu, 20 Jun 2024 19:27:43 +0800 Subject: [PATCH 018/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/atat/pytorch/__init__.py | 1 + .../atat/pytorch/compare/acc_compare.py | 3 +- .../atat/pytorch/visualization/__init__.py | 0 .../pytorch/visualization/compare_tree.py | 313 ++++++++++++++++++ .../pytorch/visualization/graph/__init__.py | 0 .../pytorch/visualization/graph/base_node.py | 97 ++++++ .../atat/pytorch/visualization/graph/graph.py | 28 ++ .../visualization/graph/graph_builder.py | 53 +++ .../pytorch/visualization/graph/node_op.py | 24 ++ .../atat/pytorch/visualization/graph_utils.py | 31 ++ .../pytorch/visualization/json_parse_graph.py | 211 ++++++++++++ 11 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/__init__.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py diff --git a/debug/accuracy_tools/atat/pytorch/__init__.py b/debug/accuracy_tools/atat/pytorch/__init__.py index 482e850f7ba..198cea96de8 100644 --- a/debug/accuracy_tools/atat/pytorch/__init__.py +++ b/debug/accuracy_tools/atat/pytorch/__init__.py @@ -2,3 +2,4 @@ from .debugger.precision_debugger import PrecisionDebugger from .common.utils import seed_all from .compare.acc_compare import compare from .compare.distributed_compare import compare_distributed +from .visualization.json_parse_graph import compare_graph, build_graph diff --git a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py index be98a51c5a9..1de3c2addb2 100644 --- a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py @@ -861,8 +861,9 @@ def compare_process(file_handles, stack_mode, fuzzy_match, summary_compare=False if npu_ops_queue: for npu_data in npu_ops_queue: get_un_match_accuracy(result, npu_data, md5_compare, summary_compare) + result_to_csv(md5_compare, summary_compare, stack_mode, result, output_csv_handle) - header = [] +def result_to_csv(md5_compare, summary_compare, stack_mode, result, output_csv_handle): if md5_compare: header = CompareConst.MD5_COMPARE_RESULT_HEADER[:] elif summary_compare: diff --git a/debug/accuracy_tools/atat/pytorch/visualization/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py new file mode 100644 index 00000000000..15d744280ad --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -0,0 +1,313 @@ +import os +import json +import stat +from .graph_utils import ToolTip, Suggestions +from .graph.node_op import NodeOp +from ..compare.acc_compare import read_op, merge_tensor, get_accuracy, result_to_csv +from ...core.utils import CompareConst, Const + + +class CompareTree: + def __init__(self, trees, datas, stack_json_data, csv_path, compare_modes, stack_mode=True): + self.tree_n = trees[0] + self.tree_b = trees[1] + self.data_n_dict = datas[0] + self.data_b_dict = datas[1] + self.csv_path = csv_path + self.b_nodes_by_value = {} + self.to_csv_result = [] + self.md5_compare = compare_modes[0] + self.summary_compare = compare_modes[1] + self.real_data_compare = self.summary_compare is False and self.md5_compare is False + self.stack_mode = stack_mode + self.stack_json_data = stack_json_data + self.real_data_compare_nodes = [] + self.fill_b_nodes_dict(self.tree_b) + self.compare_nodes(self.tree_n) + + # 获取节点所有祖先的列表 + @staticmethod + def get_ancestors(node): + ancestors = [] + current_node = node.upnode + while current_node: + ancestors.append(current_node.type) + current_node = current_node.upnode + return list(reversed(ancestors)) + + @staticmethod + def add_real_compare_node_error_key(node_data): + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + value['error_key'] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + node_data[key] = value + + @staticmethod + def add_summary_compare_node_error_key(node_data): + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + value['error_key'] = ['Max Magnitude Diff', 'Min Magnitude Diff', 'Mean Magnitude Diff', + 'L2norm Magnitude Diff'] + node_data[key] = value + + @staticmethod + def add_real_compare_suggestions(node): + if node.op == NodeOp.module: + node.suggestions['text'] = Suggestions.Module + node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + if node.op == NodeOp.function_api: + node.suggestions['text'] = Suggestions.API + node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + @staticmethod + def _match_data(data_dict, compare_data, key_list, id_list): + if len(key_list) != len(id_list): + return + for i, key in enumerate(key_list): + data = compare_data[id_list[i]] + if data is not None and 'nan' not in str(data): + data_dict[key] = compare_data[id_list[i]] + + @staticmethod + def _del_item_by_list(data_dict, del_list): + if isinstance(data_dict, dict): + for item in del_list: + if item in data_dict: + del data_dict[item] + + def have_same_ancestors(self, node_a, node_b): + """ + 比较两个节点的所有祖先是否相同 + Args: + node_a: NPU节点 + node_b: Bench节点 + Returns: bool + """ + ancestors_a = self.get_ancestors(node_a) + ancestors_b = self.get_ancestors(node_b) + return ancestors_a == ancestors_b, ancestors_a + + def fill_b_nodes_dict(self, node): + """ + 将树展开为dict,key为node唯一名称,value为node自身,方便根据node唯一名称查找node + """ + if node.type not in self.b_nodes_by_value: + self.b_nodes_by_value[node.type] = [] + self.b_nodes_by_value[node.type].append(node) + for subnode in node.subnodes: + self.fill_b_nodes_dict(subnode) + + def result_to_csv(self): + with os.fdopen(os.open(self.csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), + 'w+') as file_out: + result_to_csv(self.md5_compare, self.summary_compare, self.stack_mode, self.to_csv_result, file_out) + + def compare_nodes(self, node_n): + """ + 递归比较NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查它们的祖先和参数信息,检查一致则进行精度数据比对 + Args: + node_n: NPU节点 + """ + if node_n.type in self.b_nodes_by_value: + for node_b in self.b_nodes_by_value[node_n.type]: + # 检查两个节点是否有完全相同的祖先链 + flag, ancestors = self.have_same_ancestors(node_n, node_b) + flag = flag and node_n.data_info == node_b.data_info + if flag: + # 如果祖先链相同,data_info相同,将node_b及其祖先添加到node_n的matched_node_link属性中 + ancestors.append(node_b.type) + node_n.matched_node_link = ancestors + node_b.matched_node_link = ancestors + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程比对接口 + compare_result_list = self.compare_node(node_n, node_b) + if compare_result_list: + self.to_csv_result.extend(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + + for subnode in node_n.subnodes: + self.compare_nodes(subnode) + + def compare_node(self, node_n, node_b): + """ + 调用acc_compare.py中的get_accuracy获得精度比对指标 + 真实数据比对模式无法获得精度比对指标,需要调用多进程比对接口 + Args: + node_n: NPU节点 + node_b: Bench节点 + + Returns: 包含参数信息和比对指标(真实数据比对模式除外)的list + """ + result = [] + merge_n = self.parse_node(node_n, self.data_n_dict) + merge_b = self.parse_node(node_b, self.data_b_dict) + get_accuracy(result, merge_n, merge_b, self.summary_compare, self.md5_compare) + return result + + def parse_node(self, node, data_dict): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) + if node.type in self.stack_json_data: + op_parsed_list.append( + {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) + else: + op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) + return merge_tensor(op_parsed_list, self.summary_compare, self.md5_compare) + + def add_compare_result_to_node(self, node, compare_result_list): + """ + 将比对结果添加到节点的输入输出数据中 + Args: + node: 节点 + compare_result_list: 包含参数信息和比对指标(真实数据比对模式除外)的list + """ + # 真实数据比对,先暂存节点,在多进程比对得到精度指标后,再将指标添加到节点 + if self.real_data_compare: + self.real_data_compare_nodes.append(node) + return + compare_in_dict = {} + compare_out_dict = {} + # input和output比对数据分开 + for item in compare_result_list: + if 'output' in item[0]: + compare_out_dict[item[0]] = item + else: + compare_in_dict[item[0]] = item + if self.md5_compare: + precision_status_in = self.add_md5_compare_data(node.input_data, compare_in_dict) + precision_status_out = self.add_md5_compare_data(node.output_data, compare_out_dict) + # 所有输入输出md5比对通过,这个节点才算通过 + precision_status = precision_status_in and precision_status_out + node.data['precision_status'] = precision_status + # md5比对通过为1,否则0 + node.data['precision_index'] = 1 if precision_status else 0 + node.data['md5 Compare Result'] = CompareConst.PASS if precision_status else CompareConst.DIFF + elif self.summary_compare: + precision_status_in, precision_index_in = self.add_summary_compare_data(node.input_data, compare_in_dict) + precision_status_out, precision_index_out = self.add_summary_compare_data(node.output_data, + compare_out_dict) + precision_status = precision_status_in and precision_status_out + precision_index = min(precision_index_in, precision_index_out) + node.data['precision_status'] = precision_status + node.data['precision_index'] = precision_index + if not precision_status: + self.add_summary_compare_node_error_key(node.output_data) + self.add_real_compare_suggestions(node) + + def add_summary_compare_data(self, node_data, compare_data_dict): + precision_status = True + precision_index = 1 + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, + CompareConst.NORM_DIFF] + # 取npu和bench数据进行比较,用完删除 + del_list = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, + CompareConst.NPU_NORM, CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, + CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM] + key_list.extend(del_list) + id_list = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + self._match_data(value, compare_data, key_list, id_list) + # summary比对是否通过 + precision_status, precision_index = self._summary_compare_judgment(value, precision_status, + precision_index) + self._del_item_by_list(value, del_list) + node_data[key] = value + return precision_status, precision_index + + @staticmethod + def _summary_compare_judgment(data_dict, precision_status, precision_index): + max_magnitude_diff = 0 + item_dict = {(CompareConst.NPU_MAX, CompareConst.BENCH_MAX): (CompareConst.MAX_DIFF, 'Max Magnitude Diff'), + (CompareConst.NPU_MIN, CompareConst.BENCH_MIN): (CompareConst.MIN_DIFF, 'Min Magnitude Diff'), + (CompareConst.NPU_MEAN, CompareConst.BENCH_MEAN): (CompareConst.MEAN_DIFF, 'Mean Magnitude Diff'), + (CompareConst.NPU_NORM, CompareConst.BENCH_NORM): ( + CompareConst.NORM_DIFF, 'L2norm Magnitude Diff')} + for key, value in item_dict.items(): + if isinstance(data_dict.get(key[0]), (float, int)) and isinstance(data_dict.get(key[1]), (float, int)) \ + and isinstance(data_dict.get(value[0]), (float, int)): + magnitude_diff = abs(data_dict.get(value[0])) / ( + max(abs(data_dict.get(key[0])), abs(data_dict.get(key[1]))) + 1e-10) + magnitude_diff = 1 if magnitude_diff > 1 else magnitude_diff + data_dict[value[1]] = magnitude_diff + if magnitude_diff > 0.3: + precision_status = False + max_magnitude_diff = max(max_magnitude_diff, magnitude_diff) + precision_index = 1 - max_magnitude_diff + return precision_status, precision_index + + def add_md5_compare_data(self, node_data, compare_data_dict): + precision_status = True + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = ['md5 Compare Result'] + id_list = [8] + self._match_data(value, compare_data, key_list, id_list) + # md5比对是否通过 + if value.get('md5 Compare Result') != CompareConst.PASS: + precision_status = False + node_data[key] = value + return precision_status + + def add_real_compare_data(self, node_data, compare_data_dict): + min_thousandth = float(1) + numbers = [] + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + id_list = [6, 7, 8, 9, 10] + self._match_data(value, compare_data, key_list, id_list) + # 获取一个节点所有的输入或输出最小的双千指标 + thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) + # 可能是None,可能是非数字内容str + try: + thousandth = float(thousandth) + except (ValueError, TypeError): + thousandth = None + if thousandth is not None: + numbers.append(thousandth) + node_data[key] = value + # 双千指标都是None的异常情况 + if not numbers: + min_thousandth = None + else: + min_thousandth = min(numbers + [min_thousandth]) + return min_thousandth + + def get_tool_tip(self): + """ + 用于前端展示字段的具体含义 + """ + if self.summary_compare: + tips = { + CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, + CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, + CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, + CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} + elif self.md5_compare: + tips = { + Const.MD5: ToolTip.MD5} + else: + tips = { + CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.COSINE: ToolTip.COSINE, + CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, + CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} + return json.dumps(tips) + + + + diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py new file mode 100644 index 00000000000..8dfcfbe90bb --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -0,0 +1,97 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BaseNode: + def __init__(self, node_op, node_type, up_node=None, is_forward=True): + self.op = node_op + self.type = node_type + self.id = node_type + self.data = {} + self.outputs = [] + self.inputs = [] + self.output_data = {} + self.input_data = {} + self.upnode = up_node + self.subnodes = [] + if up_node: + up_node.add_subnode(self) + self.is_forward = is_forward + self.pair = None + self.matched_node_link = [] + self.data_info = '' + self.suggestions = {} + + def __str__(self): + info = f'id:\t{self.id}' + return info + + @staticmethod + def _handle_item(data_dict): + del_list = ['requires_grad', 'data_name', 'full_op_name'] + for key, value in data_dict.items(): + if not isinstance(value, dict): + continue + for item in del_list: + if item in value: + del value[item] + BaseNode._formate_floats(value) + + return data_dict + + @staticmethod + def _formate_floats(data_dict): + for key, value in data_dict.items(): + if isinstance(value, float): + data_dict[key] = round(value, 6) + if isinstance(value, str): + # 将单引号删掉,None换成null避免前端解析错误 + value = value.replace("'", "").replace('None', 'null') + if value is None: + value = 'null' + if not isinstance(value, (list, tuple, dict, str)): + value = str(value) + data_dict[key] = value + + def get_info(self): + info = f'{self.id}\t{self.op}' + if not self.is_forward: + info += '(b)' + for key in self.data: + info += f'\n{key}:\t{self.data.get(key)}' + return info + + def add_subnode(self, node): + if node.id == self.id: + return + self.subnodes.append(node) + + def get_yaml_dict(self): + result = {} + result['id'] = self.id + result['node_type'] = self.op.value + result['type'] = self.type + result['data'] = self.data + result['output_data'] = self._handle_item(self.output_data) + result['input_data'] = self._handle_item(self.input_data) + result['outputs'] = [(edge_id, node.id) for edge_id, node in self.outputs] + result['inputs'] = [(edge_id, node.id) for edge_id, node in self.inputs] + result['upnode'] = self.upnode.id if self.upnode else 'None' + result['subnodes'] = [node.id for node in self.subnodes] + result['is_forward'] = self.is_forward + result['pair'] = self.pair.id if self.pair else 'None' + result['matched_node_link'] = self.matched_node_link + result['suggestions'] = self.suggestions + return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py new file mode 100644 index 00000000000..849a07a1081 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -0,0 +1,28 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Graph: + def __init__(self): + self.root = None + self.recent_node = None + self.depth = 0 + self.node_map = {} + self.rawid_map = {} + + def __str__(self): + infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] + info = "\n".join(infos) + return info diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py new file mode 100644 index 00000000000..22bb2739f95 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py @@ -0,0 +1,53 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from ....core.file_check_util import FileOpen + + +class GraphBuilder: + + @staticmethod + def export_to_json(filename, graph): + result = {} + result['root'] = graph.root.id if graph.root else 'None' + result['node'] = {} + GraphBuilder._export_dfs(graph.root, result['node']) + with FileOpen(filename, 'w') as f: + f.write(json.dumps(result, indent=4)) + + @staticmethod + def get_graph_result(graph): + result = {} + result['root'] = graph.root.id if graph.root else 'None' + result['node'] = {} + GraphBuilder._export_dfs(graph.root, result['node']) + return result + + @staticmethod + def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): + result = {} + result['NPU'] = GraphBuilder.get_graph_result(graph_n) + result['Bench'] = GraphBuilder.get_graph_result(graph_b) + result['Tooltip'] = tool_tip + with FileOpen(filename, 'w') as f: + f.write(json.dumps(result, indent=4)) + + @staticmethod + def _export_dfs(node, result): + info = node.get_yaml_dict() + result[node.id] = info + for subnode in node.subnodes: + GraphBuilder._export_dfs(subnode, result) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py new file mode 100644 index 00000000000..3249df10c4e --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum + + +class NodeOp(Enum): + module = 1 + function_api = 2 + module_api = 3 + tensor = 4 + output = 5 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py new file mode 100644 index 00000000000..193dbaf18e5 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py @@ -0,0 +1,31 @@ +class ToolTip: + MAX_DIFF = 'NPU与标杆API统计信息比对,最大值的差值' + MIN_DIFF = 'NPU与标杆API统计信息比对,最小值的差值' + MEAN_DIFF = 'NPU与标杆API统计信息比对,平均值的差值' + NORM_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值' + MAX_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最大值的差值相对误差' + MIN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最小值的差值相对误差' + MEAN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,平均值的差值相对误差' + NORM_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值相对误差' + MD5 = '数据MD5信息,用于比较两个数据信息是否完全一致' + ONE_THOUSANDTH_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差大于千分之一的比例占总元素个数的比例小于千分之一' + COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' + MAX_ABS_ERR = '当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001' + MAX_RELATIVE_ERR = '当最大相对误差越接近0表示其计算的误差越小。当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象' + + +class Suggestions: + Module = '此模块精度比对结果疑似异常,请使用ptdbg工具对模块中的api进行dump比对' + API = '此api精度比对结果疑似异常,请使用api accuracy checker工具对api进行精度检测' + PTDBG = 'ptdbg工具' + PTDBG_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/ptdbg_ascend' + API_ACCURACY_CHECKER = 'api accuracy checker工具' + API_ACCURACY_CHECKER_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker' + + +class Const: + CONSTRUCT_FILE = 'construct.json' + DUMP_FILE = 'dump.json' + STACK_FILE = 'stack.json' + GRAPH_FILE = 'graph.vis' + CSV_NAME = 'compare_result' \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py new file mode 100644 index 00000000000..178b1b3d281 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py @@ -0,0 +1,211 @@ +import os +import json +import re +import time +import pandas as pd +from .compare_tree import CompareTree +from .graph_utils import Const +from .graph.graph import Graph +from .graph.base_node import BaseNode +from .graph.node_op import NodeOp +from .graph.graph_builder import GraphBuilder +from ..compare.acc_compare import read_op, task_dumppath_get, _do_multi_process +from ...core.utils import add_time_as_suffix +from ...core.file_check_util import FileOpen, FileChecker, FileCheckConst, create_directory + + +def _load_json_file(file_path): + try: + with FileOpen(file_path, 'r') as file: + file_dict = json.load(file) + if not isinstance(file_dict, dict): + return {} + return file_dict + except json.JSONDecodeError: + return {} + + +def _get_data_inputs_outputs(data_dict: dict): + input_args = data_dict.get('input_args', []) + input_kwargs = data_dict.get('input_kwargs', {}) + output = data_dict.get('output', []) + + input_args = input_args if isinstance(input_args, list) else [] + input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} + output = output if isinstance(output, list) else [] + return input_args, input_kwargs, output + + +def _add_node_data(node_data, node): + """ + acc_compare read_op 解析数据 + """ + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node.type) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if 'output' in full_op_name: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + node.input_data = input_data + node.output_data = output_data + + +def _get_data_info(item): + if isinstance(item, dict): + return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) + elif isinstance(item, (list, tuple)): + return str([_get_data_info(sub_item) for sub_item in item]) + return '' + + +def _process_node_data_info(items): + info_str = '' + for item in items: + info_str += _get_data_info(item) + return info_str + + +# 节点所有输入、输出的type、dtype和shape要一样 +def _get_node_data_info(input_args, input_kwargs, output): + return _process_node_data_info(input_args) + _process_node_data_info(input_kwargs) + _process_node_data_info(output) + + +def _get_node_op(node_name: str): + pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' + match = re.match(pattern, node_name) + if match: + return NodeOp.function_api + else: + return NodeOp.module + + +def build_tree(construct_dict, data_dict, root_name='NPU'): + # 创建一个字典来存储已经创建的节点,以便重用 + created_nodes = {} + root_node = BaseNode(NodeOp.module, root_name) + + # 创建一个函数来递归地创建或获取节点 + def get_or_create_node(op, name, up_node=None): + if name not in created_nodes: + # add data + base_node = BaseNode(op, name, up_node) + node_data = data_dict.get(name, {}) + input_args, input_kwargs, output = _get_data_inputs_outputs(node_data) + # 添加输入输出数据 + _add_node_data(node_data, base_node) + + # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 + data_info = _get_node_data_info(input_args, input_kwargs, output) + base_node.data_info = data_info + created_nodes[name] = base_node + elif up_node: + # 如果节点已经存在,但我们现在才知道它的上级节点 + created_nodes[name].upnode = up_node + up_node.add_subnode(created_nodes[name]) + return created_nodes[name] + + # 遍历字典,为每个键值对创建或获取节点 + for subnode, upnode in construct_dict.items(): + if upnode: + up_node = get_or_create_node(_get_node_op(upnode), upnode) + else: + up_node = root_node + get_or_create_node(_get_node_op(subnode), subnode, up_node) + + return root_node, created_nodes + + +def do_build_graph(construct_path, data_path, output_path): + construct_dict = _load_json_file(construct_path) + data_dict = _load_json_file(data_path).get('data', {}) + root_node, created_nodes = build_tree(construct_dict, data_dict, 'root_node') + graph = Graph() + graph.root = root_node + graph.node_map = created_nodes + GraphBuilder.export_to_json(output_path, graph) + + +def do_compare_graph(construct_path_list, data_path_list, stack_path, output_path, csv_path): + dump_path_param = { + "npu_json_path": data_path_list[0], + "bench_json_path": data_path_list[1], + "stack_json_path": stack_path, + "is_print_compare_log": True + } + # 判断比对模式 + summary_compare, md5_compare = task_dumppath_get(dump_path_param) + + construct_n_dict = _load_json_file(construct_path_list[0]) + data_n_dict = _load_json_file(data_path_list[0]).get('data', {}) + root_n_node, created_n_nodes = build_tree(construct_n_dict, data_n_dict) + construct_b_dict = _load_json_file(construct_path_list[1]) + data_b_dict = _load_json_file(data_path_list[1]).get('data', {}) + root_b_node, created_b_nodes = build_tree(construct_b_dict, data_b_dict) + stack_json_data = _load_json_file(stack_path) + + compare_tree = CompareTree([root_n_node, root_b_node], [data_n_dict, data_b_dict], stack_json_data, + csv_path,[md5_compare, summary_compare]) + compare_tree.result_to_csv() + + if summary_compare is False and md5_compare is False: + # 真实数据比对,开启多进程比对得到精度指标,再写进已创建的csv中 + _do_multi_process(dump_path_param, csv_path) + # 从csv文件读取精度指标,添加到node节点中 + df = pd.read_csv(csv_path) + compare_data_dict = {row[0]: row.tolist() for index, row in df.iterrows()} + for node in compare_tree.real_data_compare_nodes: + min_thousandth_in = compare_tree.add_real_compare_data(node.input_data, compare_data_dict) + min_thousandth_out = compare_tree.add_real_compare_data(node.output_data, compare_data_dict) + if min_thousandth_in and min_thousandth_out: + change_percentage = abs(min_thousandth_in - min_thousandth_out) + else: + change_percentage = 0 + precision_status = True + if change_percentage > 0.1: + precision_status = False + # 精度不达标,双千指标标红 + CompareTree.add_real_compare_node_error_key(node.output_data) + # 添加建议 + CompareTree.add_real_compare_suggestions(node) + node.data['precision_status'] = precision_status + node.data['precision_index'] = 0 if change_percentage > 1 else 1 - change_percentage + + graph_n = Graph() + graph_n.root = root_n_node + graph_n.node_map = created_n_nodes + graph_b = Graph() + graph_b.root = root_b_node + graph_n.node_map = created_b_nodes + start_time = time.time() + GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, compare_tree.get_tool_tip()) + end_time = time.time() + print('export_graphs_to_yaml', end_time - start_time) + + +def compare_graph(dump_path_n, dump_path_b, out_path): + create_directory(out_path) + n_path_checker = FileChecker(dump_path_n, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) + n_path_checker.common_check() + b_path_checker = FileChecker(dump_path_b, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) + b_path_checker.common_check() + construct_path_n = os.path.join(dump_path_n, Const.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, Const.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, Const.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, Const.DUMP_FILE) + stack_path = os.path.join(dump_path_n, Const.STACK_FILE) + output_path = os.path.join(out_path, Const.GRAPH_FILE) + csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(Const.CSV_NAME)) + do_compare_graph([construct_path_n, construct_path_b], [data_path_n, data_path_b], + stack_path, output_path,csv_path) + + +def build_graph(dump_path, out_path): + create_directory(out_path) + path_checker = FileChecker(dump_path, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) + path_checker.common_check() + construct_path = os.path.join(dump_path, Const.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, Const.DUMP_FILE) + do_build_graph(construct_path, data_path, out_path) -- Gitee From e50c85984a04e5f9a16d0c378bcc44d5c5b8e603 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 21 Jun 2024 10:47:08 +0800 Subject: [PATCH 019/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/compare_tree.py | 96 +++++++++---------- .../pytorch/visualization/graph/base_node.py | 39 +++++++- .../atat/pytorch/visualization/graph_utils.py | 5 +- .../pytorch/visualization/json_parse_graph.py | 27 +++--- 4 files changed, 99 insertions(+), 68 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py index 15d744280ad..f4026248071 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -1,7 +1,7 @@ import os import json import stat -from .graph_utils import ToolTip, Suggestions +from .graph_utils import ToolTip, Suggestions, GraphConst from .graph.node_op import NodeOp from ..compare.acc_compare import read_op, merge_tensor, get_accuracy, result_to_csv from ...core.utils import CompareConst, Const @@ -25,9 +25,11 @@ class CompareTree: self.fill_b_nodes_dict(self.tree_b) self.compare_nodes(self.tree_n) - # 获取节点所有祖先的列表 @staticmethod def get_ancestors(node): + """ + 获取节点所有祖先的列表 + """ ancestors = [] current_node = node.upnode while current_node: @@ -37,10 +39,14 @@ class CompareTree: @staticmethod def add_real_compare_node_error_key(node_data): + """ + 精度疑似有问题,这些指标将在前端标红 + """ for key, value in node_data.items(): if not isinstance(value, dict): continue - value['error_key'] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + value[GraphConst.ERROR_KEY] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] node_data[key] = value @staticmethod @@ -48,12 +54,15 @@ class CompareTree: for key, value in node_data.items(): if not isinstance(value, dict): continue - value['error_key'] = ['Max Magnitude Diff', 'Min Magnitude Diff', 'Mean Magnitude Diff', - 'L2norm Magnitude Diff'] + value[GraphConst.ERROR_KEY] = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] node_data[key] = value @staticmethod def add_real_compare_suggestions(node): + """ + 精度疑似有问题,给一些建议 + """ if node.op == NodeOp.module: node.suggestions['text'] = Suggestions.Module node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL @@ -61,21 +70,33 @@ class CompareTree: node.suggestions['text'] = Suggestions.API node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + @staticmethod + def convert_percentage_to_float(percentage_str): + """ + 百分比字符串转换为浮点型 + Args: + percentage_str: '0.00%'、'23.4%' + Returns: float 0.00、0.234 + """ + try: + percentage_str = percentage_str.replace('%', '') + return float(percentage_str) / 100 + except ValueError: + return 0 + @staticmethod def _match_data(data_dict, compare_data, key_list, id_list): + """ + 绑定精度指标到node的input_data和output_data中 + """ if len(key_list) != len(id_list): return for i, key in enumerate(key_list): data = compare_data[id_list[i]] - if data is not None and 'nan' not in str(data): + if data is not None and 'nan' not in str(data) and str(data) != ' ': data_dict[key] = compare_data[id_list[i]] - - @staticmethod - def _del_item_by_list(data_dict, del_list): - if isinstance(data_dict, dict): - for item in del_list: - if item in data_dict: - del data_dict[item] + else: + data_dict[key] = 'null' def have_same_ancestors(self, node_a, node_b): """ @@ -199,47 +220,26 @@ class CompareTree: def add_summary_compare_data(self, node_data, compare_data_dict): precision_status = True - precision_index = 1 + max_relative_err = 0 for key, value in node_data.items(): if not isinstance(value, dict): continue compare_data = compare_data_dict.get(key) if compare_data: + # 对应比对结果csv的列 key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, - CompareConst.NORM_DIFF] - # 取npu和bench数据进行比较,用完删除 - del_list = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, - CompareConst.NPU_NORM, CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, - CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM] - key_list.extend(del_list) - id_list = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + id_list = [6, 7, 8, 9, 10, 11, 12, 13] self._match_data(value, compare_data, key_list, id_list) - # summary比对是否通过 - precision_status, precision_index = self._summary_compare_judgment(value, precision_status, - precision_index) - self._del_item_by_list(value, del_list) + # 相对误差大于0.5疑似有精度问题 + for item in key_list[4:]: + relative_err = CompareTree.convert_percentage_to_float(value.get(item)) + max_relative_err = max(max_relative_err, relative_err) node_data[key] = value - return precision_status, precision_index - - @staticmethod - def _summary_compare_judgment(data_dict, precision_status, precision_index): - max_magnitude_diff = 0 - item_dict = {(CompareConst.NPU_MAX, CompareConst.BENCH_MAX): (CompareConst.MAX_DIFF, 'Max Magnitude Diff'), - (CompareConst.NPU_MIN, CompareConst.BENCH_MIN): (CompareConst.MIN_DIFF, 'Min Magnitude Diff'), - (CompareConst.NPU_MEAN, CompareConst.BENCH_MEAN): (CompareConst.MEAN_DIFF, 'Mean Magnitude Diff'), - (CompareConst.NPU_NORM, CompareConst.BENCH_NORM): ( - CompareConst.NORM_DIFF, 'L2norm Magnitude Diff')} - for key, value in item_dict.items(): - if isinstance(data_dict.get(key[0]), (float, int)) and isinstance(data_dict.get(key[1]), (float, int)) \ - and isinstance(data_dict.get(value[0]), (float, int)): - magnitude_diff = abs(data_dict.get(value[0])) / ( - max(abs(data_dict.get(key[0])), abs(data_dict.get(key[1]))) + 1e-10) - magnitude_diff = 1 if magnitude_diff > 1 else magnitude_diff - data_dict[value[1]] = magnitude_diff - if magnitude_diff > 0.3: - precision_status = False - max_magnitude_diff = max(max_magnitude_diff, magnitude_diff) - precision_index = 1 - max_magnitude_diff + if max_relative_err > 0.5: + precision_status = False + precision_index = 1 - max_relative_err return precision_status, precision_index def add_md5_compare_data(self, node_data, compare_data_dict): @@ -307,7 +307,3 @@ class CompareTree: CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} return json.dumps(tips) - - - - diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 8dfcfbe90bb..049eb487558 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re class BaseNode: @@ -47,24 +48,52 @@ class BaseNode: for item in del_list: if item in value: del value[item] - BaseNode._formate_floats(value) + BaseNode._format_data(value) return data_dict @staticmethod - def _formate_floats(data_dict): + def _format_data(data_dict): + """ + 格式化数据,小数保留6位,处理一些异常值 + """ for key, value in data_dict.items(): - if isinstance(value, float): - data_dict[key] = round(value, 6) if isinstance(value, str): # 将单引号删掉,None换成null避免前端解析错误 value = value.replace("'", "").replace('None', 'null') - if value is None: + value = BaseNode._format_decimal_string(value) + if value is None or value == ' ': value = 'null' + if isinstance(value, float): + value = round(value, 6) if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value + @staticmethod + def _format_decimal_string(s): + """ + 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + """ + pattern = re.compile(r'\d+\.\d+%?') + matches = pattern.findall(s) + for match in matches: + is_percent = match.endswith('%') + number_str = match.rstrip('%') + decimal_part = number_str.split('.')[1] + # 如果小数位数大于6,进行处理 + if len(decimal_part) > 6: + number_float = float(number_str) + if is_percent: + number_float /= 100 + formatted_number = f"{number_float:.6f}" + # 如果原来是百分数,加回百分号 + if is_percent: + formatted_number += '%' + # 替换原字符串中的数值部分 + s = s.replace(match, formatted_number) + return s + def get_info(self): info = f'{self.id}\t{self.op}' if not self.is_forward: diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py index 193dbaf18e5..1f3598a442e 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py @@ -23,9 +23,10 @@ class Suggestions: API_ACCURACY_CHECKER_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker' -class Const: +class GraphConst: CONSTRUCT_FILE = 'construct.json' DUMP_FILE = 'dump.json' STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' - CSV_NAME = 'compare_result' \ No newline at end of file + CSV_NAME = 'compare_result' + ERROR_KEY = 'error_key' \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py index 178b1b3d281..8bf29c36879 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py @@ -4,7 +4,7 @@ import re import time import pandas as pd from .compare_tree import CompareTree -from .graph_utils import Const +from .graph_utils import GraphConst from .graph.graph import Graph from .graph.base_node import BaseNode from .graph.node_op import NodeOp @@ -54,6 +54,9 @@ def _add_node_data(node_data, node): def _get_data_info(item): + """ + 将api的参数信息拼接成字符串进行匹配 + """ if isinstance(item, dict): return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) elif isinstance(item, (list, tuple)): @@ -68,8 +71,10 @@ def _process_node_data_info(items): return info_str -# 节点所有输入、输出的type、dtype和shape要一样 def _get_node_data_info(input_args, input_kwargs, output): + """ + 节点所有输入、输出的type、dtype和shape要一样 + """ return _process_node_data_info(input_args) + _process_node_data_info(input_kwargs) + _process_node_data_info(output) @@ -191,13 +196,13 @@ def compare_graph(dump_path_n, dump_path_b, out_path): n_path_checker.common_check() b_path_checker = FileChecker(dump_path_b, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) b_path_checker.common_check() - construct_path_n = os.path.join(dump_path_n, Const.CONSTRUCT_FILE) - construct_path_b = os.path.join(dump_path_b, Const.CONSTRUCT_FILE) - data_path_n = os.path.join(dump_path_n, Const.DUMP_FILE) - data_path_b = os.path.join(dump_path_b, Const.DUMP_FILE) - stack_path = os.path.join(dump_path_n, Const.STACK_FILE) - output_path = os.path.join(out_path, Const.GRAPH_FILE) - csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(Const.CSV_NAME)) + construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) + stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) + output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) + csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(GraphConst.CSV_NAME)) do_compare_graph([construct_path_n, construct_path_b], [data_path_n, data_path_b], stack_path, output_path,csv_path) @@ -206,6 +211,6 @@ def build_graph(dump_path, out_path): create_directory(out_path) path_checker = FileChecker(dump_path, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) path_checker.common_check() - construct_path = os.path.join(dump_path, Const.CONSTRUCT_FILE) - data_path = os.path.join(dump_path, Const.DUMP_FILE) + construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) do_build_graph(construct_path, data_path, out_path) -- Gitee From 52ae930160301b53713a888620b4b2763a8cc147 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 21 Jun 2024 11:01:33 +0800 Subject: [PATCH 020/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/json_parse_graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py index 8bf29c36879..026bd47c472 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py @@ -213,4 +213,5 @@ def build_graph(dump_path, out_path): path_checker.common_check() construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) - do_build_graph(construct_path, data_path, out_path) + output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) + do_build_graph(construct_path, data_path, output_path) -- Gitee From b3f8020a22515309b0e5165b2fc36a194c8a8bad Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 24 Jun 2024 11:42:03 +0800 Subject: [PATCH 021/333] =?UTF-8?q?=E7=BB=9F=E8=AE=A1=E5=80=BC=E6=AF=94?= =?UTF-8?q?=E5=AF=B9bug=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../accuracy_tools/atat/pytorch/visualization/compare_tree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py index f4026248071..0ca80be88ce 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -239,6 +239,7 @@ class CompareTree: node_data[key] = value if max_relative_err > 0.5: precision_status = False + max_relative_err = 1 if max_relative_err > 1 else max_relative_err precision_index = 1 - max_relative_err return precision_status, precision_index @@ -254,7 +255,7 @@ class CompareTree: self._match_data(value, compare_data, key_list, id_list) # md5比对是否通过 if value.get('md5 Compare Result') != CompareConst.PASS: - precision_status = False + precision_status = False node_data[key] = value return precision_status -- Gitee From a1cdec6b6890a00b19f50ad5475c26afaa8aec6c Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 25 Jun 2024 11:19:08 +0800 Subject: [PATCH 022/333] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/compare_tree.py | 4 +++- .../atat/pytorch/visualization/graph/base_node.py | 7 ++++--- .../atat/pytorch/visualization/graph_utils.py | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py index 0ca80be88ce..3f2fb406aff 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -31,6 +31,8 @@ class CompareTree: 获取节点所有祖先的列表 """ ancestors = [] + if not node: + return ancestors current_node = node.upnode while current_node: ancestors.append(current_node.type) @@ -237,7 +239,7 @@ class CompareTree: relative_err = CompareTree.convert_percentage_to_float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value - if max_relative_err > 0.5: + if max_relative_err > GraphConst.MAX_RELATIVE_ERR: precision_status = False max_relative_err = 1 if max_relative_err > 1 else max_relative_err precision_index = 1 - max_relative_err diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 049eb487558..26713200ea7 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re +from ..graph_utils import GraphConst class BaseNode: @@ -65,7 +66,7 @@ class BaseNode: if value is None or value == ' ': value = 'null' if isinstance(value, float): - value = round(value, 6) + value = round(value, GraphConst.DECIMAL) if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value @@ -82,11 +83,11 @@ class BaseNode: number_str = match.rstrip('%') decimal_part = number_str.split('.')[1] # 如果小数位数大于6,进行处理 - if len(decimal_part) > 6: + if len(decimal_part) > GraphConst.DECIMAL: number_float = float(number_str) if is_percent: number_float /= 100 - formatted_number = f"{number_float:.6f}" + formatted_number = f"{number_float:.{GraphConst.DECIMAL}f}" # 如果原来是百分数,加回百分号 if is_percent: formatted_number += '%' diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py index 1f3598a442e..eae5084677e 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py @@ -29,4 +29,6 @@ class GraphConst: STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' CSV_NAME = 'compare_result' - ERROR_KEY = 'error_key' \ No newline at end of file + ERROR_KEY = 'error_key' + DECIMAL = 6 + MAX_RELATIVE_ERR = 0.5 \ No newline at end of file -- Gitee From 1e099f5a5cf28b6d375dc2c06013c059f64ea416 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:27:33 +0800 Subject: [PATCH 023/333] =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E9=87=8D?= =?UTF-8?q?=E6=9E=84=EF=BC=8C=E4=B8=8D=E4=BF=AE=E6=94=B9=E5=B7=B2=E7=9F=A5?= =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E4=BF=9D=E8=AF=81=E8=BE=93=E5=85=A5?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/__init__.py | 0 .../{graph => builder}/graph_builder.py | 45 +-- .../visualization/builder/graph_parser.py | 108 ++++++ .../pytorch/visualization/compare/__init__.py | 0 .../comparator.py} | 356 ++++++------------ .../visualization/compare/graph_comparator.py | 175 +++++++++ .../pytorch/visualization/graph/base_node.py | 32 +- .../atat/pytorch/visualization/graph/graph.py | 8 +- .../pytorch/visualization/graph/node_op.py | 9 + .../pytorch/visualization/json_parse_graph.py | 217 ----------- .../atat/pytorch/visualization/test.py | 81 ++++ .../{graph_utils.py => utils.py} | 65 +++- 12 files changed, 605 insertions(+), 491 deletions(-) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py rename debug/accuracy_tools/atat/pytorch/visualization/{graph => builder}/graph_builder.py (59%) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py rename debug/accuracy_tools/atat/pytorch/visualization/{compare_tree.py => compare/comparator.py} (31%) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py delete mode 100644 debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/test.py rename debug/accuracy_tools/atat/pytorch/visualization/{graph_utils.py => utils.py} (50%) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py similarity index 59% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py rename to debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 22bb2739f95..76d476a996c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,39 +12,42 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json -from ....core.file_check_util import FileOpen +from .graph_parser import GraphParser +from ..utils import load_json_file, save_json_file class GraphBuilder: - + @staticmethod + def build(construct_path, data_path, model_name): + construct_dict = load_json_file(construct_path) + data_dict = load_json_file(data_path).get('data', {}) + graph = GraphParser().parse(construct_dict, data_dict, model_name) + return graph + @staticmethod def export_to_json(filename, graph): + result = GraphBuilder._get_graph_dict(graph) + save_json_file(filename, result) + + # todo 吧两个export归一 + @staticmethod + def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): result = {} - result['root'] = graph.root.id if graph.root else 'None' - result['node'] = {} - GraphBuilder._export_dfs(graph.root, result['node']) - with FileOpen(filename, 'w') as f: - f.write(json.dumps(result, indent=4)) - + result['NPU'] = GraphBuilder._get_graph_dict(graph_n) + result['Bench'] = GraphBuilder._get_graph_dict(graph_b) + result['Tooltip'] = tool_tip + save_json_file(filename, result) + @staticmethod - def get_graph_result(graph): + def _get_graph_dict(graph): result = {} result['root'] = graph.root.id if graph.root else 'None' result['node'] = {} + # todo 可以把这个遍历删除 GraphBuilder._export_dfs(graph.root, result['node']) return result - - @staticmethod - def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): - result = {} - result['NPU'] = GraphBuilder.get_graph_result(graph_n) - result['Bench'] = GraphBuilder.get_graph_result(graph_b) - result['Tooltip'] = tool_tip - with FileOpen(filename, 'w') as f: - f.write(json.dumps(result, indent=4)) - + @staticmethod def _export_dfs(node, result): info = node.get_yaml_dict() diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py new file mode 100644 index 00000000000..2227710b743 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ...compare.acc_compare import read_op +from ..graph.graph import Graph +from ..graph.base_node import BaseNode +from ..graph.node_op import NodeOp + + +class GraphParser: + def __init__(self): + pass + + def parse(self, construct_dict, data_dict, model_name): + self.graph = Graph() + self.data_dict = data_dict + self.graph.root = BaseNode(NodeOp.module, model_name) + self.graph.node_map[model_name] = self.graph.root + self._init_nodes(construct_dict) + self.data_dict.clear() + return self.graph + + def _init_nodes(self, construct_dict): + for subnode, upnode in construct_dict.items(): + if upnode: + up_node = self._get_or_create_node(NodeOp.get_node_op(upnode), upnode) + else: + up_node = self.graph.root + self._get_or_create_node(NodeOp.get_node_op(subnode), subnode, up_node) + + # todo 这个函数也得改改 + def _get_or_create_node(self, op, name, up_node=None): + if name not in self.graph.node_map: + # add data + base_node = BaseNode(op, name, up_node) + node_data = self.data_dict.get(name, {}) + input_args, input_kwargs, output = GraphParser._get_data_inputs_outputs(node_data) + # 添加输入输出数据 + GraphParser._add_node_data(node_data, base_node) + + # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 + # 这个东西必须改了,todo + data_info = GraphParser._get_node_data_info(input_args, input_kwargs, output) + base_node.data_info = data_info + self.graph.node_map[name] = base_node + elif up_node: + # 如果节点已经存在,但是我们刚刚获取了他的上级节点 + # todo 这里要加个函数 + self.graph.node_map[name].upnode = up_node + up_node.add_subnode(self.graph.node_map[name]) + return self.graph.node_map[name] + + @staticmethod + def _get_data_inputs_outputs(data_dict: dict): + input_args = data_dict.get('input_args', []) + input_kwargs = data_dict.get('input_kwargs', {}) + output = data_dict.get('output', []) + + input_args = input_args if isinstance(input_args, list) else [] + input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} + output = output if isinstance(output, list) else [] + return input_args, input_kwargs, output + + # todo 要加入basenode + @staticmethod + def _add_node_data(node_data, node): + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node.type) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if 'output' in full_op_name: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + node.input_data = input_data + node.output_data = output_data + + @staticmethod + def _get_data_info(item): + if isinstance(item, dict): + return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) + elif isinstance(item, (list, tuple)): + return str([GraphParser._get_data_info(sub_item) for sub_item in item]) + return '' + + @staticmethod + def _process_node_data_info(items): + info_str = '' + for item in items: + info_str += GraphParser._get_data_info(item) + return info_str + + @staticmethod + def _get_node_data_info(input_args, input_kwargs, output): + return GraphParser._process_node_data_info(input_args) + GraphParser._process_node_data_info(input_kwargs) + GraphParser._process_node_data_info(output) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py similarity index 31% rename from debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py rename to debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py index 3f2fb406aff..21f0ae9f394 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py @@ -1,95 +1,126 @@ -import os -import json -import stat -from .graph_utils import ToolTip, Suggestions, GraphConst -from .graph.node_op import NodeOp -from ..compare.acc_compare import read_op, merge_tensor, get_accuracy, result_to_csv -from ...core.utils import CompareConst, Const - - -class CompareTree: - def __init__(self, trees, datas, stack_json_data, csv_path, compare_modes, stack_mode=True): - self.tree_n = trees[0] - self.tree_b = trees[1] - self.data_n_dict = datas[0] - self.data_b_dict = datas[1] - self.csv_path = csv_path - self.b_nodes_by_value = {} - self.to_csv_result = [] - self.md5_compare = compare_modes[0] - self.summary_compare = compare_modes[1] - self.real_data_compare = self.summary_compare is False and self.md5_compare is False - self.stack_mode = stack_mode - self.stack_json_data = stack_json_data - self.real_data_compare_nodes = [] - self.fill_b_nodes_dict(self.tree_b) - self.compare_nodes(self.tree_n) +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. - @staticmethod - def get_ancestors(node): - """ - 获取节点所有祖先的列表 - """ - ancestors = [] - if not node: - return ancestors - current_node = node.upnode - while current_node: - ancestors.append(current_node.type) - current_node = current_node.upnode - return list(reversed(ancestors)) +import json +from ....core.utils import CompareConst, Const +from ..utils import ToolTip, GraphConst, convert_percentage_to_float - @staticmethod - def add_real_compare_node_error_key(node_data): - """ - 精度疑似有问题,这些指标将在前端标红 - """ - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - value[GraphConst.ERROR_KEY] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, - CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] - node_data[key] = value - @staticmethod - def add_summary_compare_node_error_key(node_data): +class Comparator: + def __init__(self, summary_compare, md5_compare): + if summary_compare: #0 summary mode, 1 md5 mode, 2 true data mode + self.compare_mode = GraphConst.SUMMARY_COMPARE + elif md5_compare: + self.compare_mode = GraphConst.MD5_COMPARE + else: + self.compare_mode = GraphConst.REAL_DATA_COMPARE + self.csv_data = [] + self.compare_nodes = [] + + def parse_result(self, node, compare_data_dict): + """ + 根据结果返回数据,分别是precision_status,precision_index,和附加数据 + """ + other_dict = {} + if self.is_md5_compare(): + precision_status_in = Comparator.add_md5_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out = Comparator.add_md5_compare_data(node.output_data, compare_data_dict[1]) + # 所有输入输出md5对比通过,这个节点才算通过 + precision_status = precision_status_in and precision_status_out + precision_index = 1 if precision_status else 0 + other_result = CompareConst.PASS if precision_status else CompareConst.DIFF + other_dict['md5 Compare Result'] = other_result + elif self.is_summary_compare(): + precision_status_in, precision_index_in = Comparator.add_summary_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out, precision_index_out = Comparator.add_summary_compare_data(node.output_data, compare_data_dict[1]) + precision_status = precision_status_in and precision_status_out + precision_index = min(precision_index_in, precision_index_out) + else: + min_thousandth_in = Comparator.add_real_compare_data(node.input_data, compare_data_dict[0]) + min_thousandth_out = Comparator.add_real_compare_data(node.output_data, compare_data_dict[0]) + if min_thousandth_in and min_thousandth_out: + change_percentage = abs(min_thousandth_in - min_thousandth_out) + else: + change_percentage = 0 + precision_status = True + if change_percentage > 0.1: + precision_status = False + precision_index = 0 if change_percentage > 1 else 1 - change_percentage + return precision_status, precision_index, other_dict + + def prepare_real_data(self, node): + if self.is_real_data_compare(): + self.compare_nodes.append(node) + return True + return False + + # todo 改成私有 + def is_summary_compare(self): + return self.compare_mode == GraphConst.SUMMARY_COMPARE + + def is_md5_compare(self): + return self.compare_mode == GraphConst.MD5_COMPARE + + def is_real_data_compare(self): + return self.compare_mode == GraphConst.REAL_DATA_COMPARE + + def add_csv_data(self, compare_result_list): + if not self.is_real_data_compare(): + return + self.csv_data.extend(compare_result_list) + + def add_error_key(self, node_data): for key, value in node_data.items(): if not isinstance(value, dict): continue - value[GraphConst.ERROR_KEY] = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + if self.is_summary_compare(): + message = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + elif self.is_real_data_compare(): + message = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + else: + # todo 这个应该是bug,应该修复 + message = [] + value[GraphConst.ERROR_KEY] = message node_data[key] = value - - @staticmethod - def add_real_compare_suggestions(node): - """ - 精度疑似有问题,给一些建议 - """ - if node.op == NodeOp.module: - node.suggestions['text'] = Suggestions.Module - node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL - if node.op == NodeOp.function_api: - node.suggestions['text'] = Suggestions.API - node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL - - @staticmethod - def convert_percentage_to_float(percentage_str): + + def get_tool_tip(self): """ - 百分比字符串转换为浮点型 - Args: - percentage_str: '0.00%'、'23.4%' - Returns: float 0.00、0.234 + 用于前端展示字段的具体含义 """ - try: - percentage_str = percentage_str.replace('%', '') - return float(percentage_str) / 100 - except ValueError: - return 0 + if self.is_summary_compare(): + tips = { + CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, + CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, + CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, + CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} + elif self.is_md5_compare(): + tips = {Const.MD5: ToolTip.MD5} + else: + tips = { + CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.COSINE: ToolTip.COSINE, + CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, + CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} + # todo 这个要放在外面去 + return json.dumps(tips) @staticmethod def _match_data(data_dict, compare_data, key_list, id_list): """ - 绑定精度指标到node的input_data和output_data中 + 绑定精度指标到node的input_data和output_data """ if len(key_list) != len(id_list): return @@ -99,128 +130,9 @@ class CompareTree: data_dict[key] = compare_data[id_list[i]] else: data_dict[key] = 'null' - - def have_same_ancestors(self, node_a, node_b): - """ - 比较两个节点的所有祖先是否相同 - Args: - node_a: NPU节点 - node_b: Bench节点 - Returns: bool - """ - ancestors_a = self.get_ancestors(node_a) - ancestors_b = self.get_ancestors(node_b) - return ancestors_a == ancestors_b, ancestors_a - - def fill_b_nodes_dict(self, node): - """ - 将树展开为dict,key为node唯一名称,value为node自身,方便根据node唯一名称查找node - """ - if node.type not in self.b_nodes_by_value: - self.b_nodes_by_value[node.type] = [] - self.b_nodes_by_value[node.type].append(node) - for subnode in node.subnodes: - self.fill_b_nodes_dict(subnode) - - def result_to_csv(self): - with os.fdopen(os.open(self.csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), - 'w+') as file_out: - result_to_csv(self.md5_compare, self.summary_compare, self.stack_mode, self.to_csv_result, file_out) - - def compare_nodes(self, node_n): - """ - 递归比较NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查它们的祖先和参数信息,检查一致则进行精度数据比对 - Args: - node_n: NPU节点 - """ - if node_n.type in self.b_nodes_by_value: - for node_b in self.b_nodes_by_value[node_n.type]: - # 检查两个节点是否有完全相同的祖先链 - flag, ancestors = self.have_same_ancestors(node_n, node_b) - flag = flag and node_n.data_info == node_b.data_info - if flag: - # 如果祖先链相同,data_info相同,将node_b及其祖先添加到node_n的matched_node_link属性中 - ancestors.append(node_b.type) - node_n.matched_node_link = ancestors - node_b.matched_node_link = ancestors - # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程比对接口 - compare_result_list = self.compare_node(node_n, node_b) - if compare_result_list: - self.to_csv_result.extend(compare_result_list) - self.add_compare_result_to_node(node_n, compare_result_list) - - for subnode in node_n.subnodes: - self.compare_nodes(subnode) - - def compare_node(self, node_n, node_b): - """ - 调用acc_compare.py中的get_accuracy获得精度比对指标 - 真实数据比对模式无法获得精度比对指标,需要调用多进程比对接口 - Args: - node_n: NPU节点 - node_b: Bench节点 - - Returns: 包含参数信息和比对指标(真实数据比对模式除外)的list - """ - result = [] - merge_n = self.parse_node(node_n, self.data_n_dict) - merge_b = self.parse_node(node_b, self.data_b_dict) - get_accuracy(result, merge_n, merge_b, self.summary_compare, self.md5_compare) - return result - - def parse_node(self, node, data_dict): - """ - 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 - """ - op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) - if node.type in self.stack_json_data: - op_parsed_list.append( - {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) - else: - op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) - return merge_tensor(op_parsed_list, self.summary_compare, self.md5_compare) - - def add_compare_result_to_node(self, node, compare_result_list): - """ - 将比对结果添加到节点的输入输出数据中 - Args: - node: 节点 - compare_result_list: 包含参数信息和比对指标(真实数据比对模式除外)的list - """ - # 真实数据比对,先暂存节点,在多进程比对得到精度指标后,再将指标添加到节点 - if self.real_data_compare: - self.real_data_compare_nodes.append(node) - return - compare_in_dict = {} - compare_out_dict = {} - # input和output比对数据分开 - for item in compare_result_list: - if 'output' in item[0]: - compare_out_dict[item[0]] = item - else: - compare_in_dict[item[0]] = item - if self.md5_compare: - precision_status_in = self.add_md5_compare_data(node.input_data, compare_in_dict) - precision_status_out = self.add_md5_compare_data(node.output_data, compare_out_dict) - # 所有输入输出md5比对通过,这个节点才算通过 - precision_status = precision_status_in and precision_status_out - node.data['precision_status'] = precision_status - # md5比对通过为1,否则0 - node.data['precision_index'] = 1 if precision_status else 0 - node.data['md5 Compare Result'] = CompareConst.PASS if precision_status else CompareConst.DIFF - elif self.summary_compare: - precision_status_in, precision_index_in = self.add_summary_compare_data(node.input_data, compare_in_dict) - precision_status_out, precision_index_out = self.add_summary_compare_data(node.output_data, - compare_out_dict) - precision_status = precision_status_in and precision_status_out - precision_index = min(precision_index_in, precision_index_out) - node.data['precision_status'] = precision_status - node.data['precision_index'] = precision_index - if not precision_status: - self.add_summary_compare_node_error_key(node.output_data) - self.add_real_compare_suggestions(node) - - def add_summary_compare_data(self, node_data, compare_data_dict): + + @staticmethod + def add_summary_compare_data( node_data, compare_data_dict): precision_status = True max_relative_err = 0 for key, value in node_data.items(): @@ -233,19 +145,19 @@ class CompareTree: CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] id_list = [6, 7, 8, 9, 10, 11, 12, 13] - self._match_data(value, compare_data, key_list, id_list) + Comparator._match_data(value, compare_data, key_list, id_list) # 相对误差大于0.5疑似有精度问题 for item in key_list[4:]: - relative_err = CompareTree.convert_percentage_to_float(value.get(item)) + relative_err = convert_percentage_to_float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value - if max_relative_err > GraphConst.MAX_RELATIVE_ERR: + if max_relative_err > 0.5: precision_status = False - max_relative_err = 1 if max_relative_err > 1 else max_relative_err precision_index = 1 - max_relative_err return precision_status, precision_index - def add_md5_compare_data(self, node_data, compare_data_dict): + @staticmethod + def add_md5_compare_data( node_data, compare_data_dict): precision_status = True for key, value in node_data.items(): if not isinstance(value, dict): @@ -254,14 +166,15 @@ class CompareTree: if compare_data: key_list = ['md5 Compare Result'] id_list = [8] - self._match_data(value, compare_data, key_list, id_list) + Comparator._match_data(value, compare_data, key_list, id_list) # md5比对是否通过 if value.get('md5 Compare Result') != CompareConst.PASS: precision_status = False node_data[key] = value return precision_status - - def add_real_compare_data(self, node_data, compare_data_dict): + + @staticmethod + def add_real_compare_data(node_data, compare_data_dict): min_thousandth = float(1) numbers = [] for key, value in node_data.items(): @@ -272,7 +185,7 @@ class CompareTree: key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] id_list = [6, 7, 8, 9, 10] - self._match_data(value, compare_data, key_list, id_list) + Comparator._match_data(value, compare_data, key_list, id_list) # 获取一个节点所有的输入或输出最小的双千指标 thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) # 可能是None,可能是非数字内容str @@ -288,25 +201,4 @@ class CompareTree: min_thousandth = None else: min_thousandth = min(numbers + [min_thousandth]) - return min_thousandth - - def get_tool_tip(self): - """ - 用于前端展示字段的具体含义 - """ - if self.summary_compare: - tips = { - CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, - CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, - CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, - CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} - elif self.md5_compare: - tips = { - Const.MD5: ToolTip.MD5} - else: - tips = { - CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, - CompareConst.COSINE: ToolTip.COSINE, - CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, - CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} - return json.dumps(tips) + return min_thousandth \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py new file mode 100644 index 00000000000..51f6bc34d41 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -0,0 +1,175 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import pandas as pd +from ..utils import Suggestions, GraphConst, load_json_file, write_csv_data +from ..graph.node_op import NodeOp +from .comparator import Comparator +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy +from ....core.utils import Const +from ...compare.acc_compare import task_dumppath_get, _do_multi_process + + +class GraphComparator: + def __init__(self, graphs, data_paths, stack_path, output_path): + self.graph_n = graphs[0] + self.graph_b = graphs[1] + self._parse_param(data_paths, stack_path, output_path) + + def _parse_param(self, data_paths, stack_path, output_path): + self.dump_path_param = { + 'npu_json_path': data_paths[0], + 'bench_json_path': data_paths[1], + 'stack_json_path': stack_path, + 'is_print_compare_log': True + } + self.output_path = output_path + summary_compare, md5_compare = task_dumppath_get(self.dump_path_param) + self.comparator = Comparator(summary_compare, md5_compare) + self.data_n_dict = load_json_file(data_paths[0]).get('data', {}) + self.data_b_dict = load_json_file(data_paths[1]).get('data', {}) + self.stack_json_data = load_json_file(stack_path) + + def compare(self): + self._compare_nodes(self.graph_n.root) + self._postcompare() + + def _postcompare(self): + if not self.comparator.is_real_data_compare(): + return + csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) + write_csv_data(csv_path, self.comparator.is_md5_compare(), self.comparator.is_summary_compare(), True, self.comparator.csv_data) + _do_multi_process(self.dump_path_param, csv_path) + # 从csv文件读取精度指标,添加到node节点中 + df = pd.read_csv(csv_path) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.comparator.compare_nodes: + precision_status, precision_index, _ = self.comparator.parse_result(node, [compare_data_dict]) + # todo 常量改成变量 + node.data['precision_status'] = precision_status + node.data['precision_index'] = precision_index + if not precision_status: + self.comparator.add_error_key(node.output_data) + self.add_suggestions(node) + if os.path.isfile(csv_path): + os.remove(csv_path) + + @staticmethod + def add_suggestions(node): + """ + 精度疑似有问题时,提供一些建议 + """ + if node.op == NodeOp.module: + node.suggestions['text'] = Suggestions.Module + node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + elif node.op == NodeOp.function_api: + node.suggestions['text'] = Suggestions.API + node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + def _compare_nodes(self, node_n): + """ + 递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 + 这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 + Args: + node_n: NPU节点 + """ + # todo 这个函数也需要改改,把is_matched删掉, + is_matched, node_b, ancestors = GraphComparator._match_node(node_n, self.graph_b) + if is_matched: + ancestors.append(node_b.type) + node_n.matched_node_link = ancestors + node_b.matched_node_link = ancestors + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 + compare_result_list = self.compare_node(node_n, node_b) + if compare_result_list: + self.comparator.add_csv_data(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + for subnode in node_n.subnodes: + self._compare_nodes(subnode) + + + @staticmethod + def _match_node(node_n, graph_b): + """ + 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 + 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 + 返回匹配结果,匹配到的系欸但,以及祖先列表 + """ + if node_n.id not in graph_b.node_map: + return False, None, None + node_b = graph_b.node_map[node_n.id] + if node_n.data_info != node_b.data_info: + return False, None, None + ancestors_n = node_n.get_ancestors() + ancestors_b = node_b.get_ancestors() + if ancestors_n != ancestors_b: + return False, None, None + return True, node_b, ancestors_n + + def compare_node(self, node_n, node_b): + """ + 调用acc_compare.py中的get_accuracy获得精度对比指标 + 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + Args: + node_n: NPU节点 + node_b: Bench节点 + Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + result = [] + # todo 写一个atat adpator + merge_n = self.parse_node(node_n, self.data_n_dict) + merge_b = self.parse_node(node_b, self.data_b_dict) + get_accuracy(result, merge_n, merge_b, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) + return result + + def parse_node(self, node, data_dict): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) + if node.type in self.stack_json_data: + op_parsed_list.append( + {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) + else: + op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) + return merge_tensor(op_parsed_list, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) + + def add_compare_result_to_node(self, node, compare_result_list): + """ + 将比对结果添加到节点的输入输出数据中 + Args: + node: 节点 + compare_result_list: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + # 真实数据比对,先暂存节点,在多进程对比得到精度指标后,再将指标添加到节点中 + if self.comparator.prepare_real_data(node): + return + compare_in_dict = {} + compare_out_dict = {} + # input和output对比数据分开 + for item in compare_result_list: + if 'output' in item[0]: + compare_out_dict[item[0]] = item + else: + compare_in_dict[item[0]] = item + precision_status, precision_index, other_dict = self.comparator.parse_result(node, [compare_in_dict, compare_out_dict]) + node.data['precision_status'] = precision_status + node.data['precision_index'] = precision_index + node.data.update(other_dict) + if not precision_status: + self.comparator.add_error_key(node.output_data) + self.add_suggestions(node) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 26713200ea7..5628e3e0ee3 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -12,15 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import re -from ..graph_utils import GraphConst class BaseNode: - def __init__(self, node_op, node_type, up_node=None, is_forward=True): + def __init__(self, node_op, node_id, up_node=None): self.op = node_op - self.type = node_type - self.id = node_type + self.type = node_id + self.id = node_id self.data = {} self.outputs = [] self.inputs = [] @@ -30,16 +30,17 @@ class BaseNode: self.subnodes = [] if up_node: up_node.add_subnode(self) - self.is_forward = is_forward + self.is_forward = True self.pair = None self.matched_node_link = [] self.data_info = '' self.suggestions = {} - + # todo 这些都在做什么,都应该确认一下 + def __str__(self): info = f'id:\t{self.id}' return info - + @staticmethod def _handle_item(data_dict): del_list = ['requires_grad', 'data_name', 'full_op_name'] @@ -66,7 +67,7 @@ class BaseNode: if value is None or value == ' ': value = 'null' if isinstance(value, float): - value = round(value, GraphConst.DECIMAL) + value = round(value, 6) if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value @@ -83,11 +84,11 @@ class BaseNode: number_str = match.rstrip('%') decimal_part = number_str.split('.')[1] # 如果小数位数大于6,进行处理 - if len(decimal_part) > GraphConst.DECIMAL: + if len(decimal_part) > 6: number_float = float(number_str) if is_percent: number_float /= 100 - formatted_number = f"{number_float:.{GraphConst.DECIMAL}f}" + formatted_number = f"{number_float:.6f}" # 如果原来是百分数,加回百分号 if is_percent: formatted_number += '%' @@ -125,3 +126,14 @@ class BaseNode: result['matched_node_link'] = self.matched_node_link result['suggestions'] = self.suggestions return result + + def get_ancestors(self): + """ + 获取节点所有祖先的列表 + """ + ancestors = [] + current_node = self.upnode + while current_node: + ancestors.append(current_node.id) + current_node = current_node.upnode + return list(reversed(ancestors)) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index 849a07a1081..347e8c2c88b 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -15,14 +15,14 @@ class Graph: + # todo,这里应该加入一些和图相关的操作 + # 可以把root node 的初始化放进Graph里面 def __init__(self): self.root = None - self.recent_node = None - self.depth = 0 self.node_map = {} - self.rawid_map = {} - + def __str__(self): infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] info = "\n".join(infos) return info + \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index 3249df10c4e..015a83abda6 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -14,6 +14,7 @@ # limitations under the License. from enum import Enum +import re class NodeOp(Enum): @@ -22,3 +23,11 @@ class NodeOp(Enum): module_api = 3 tensor = 4 output = 5 + + @staticmethod + def get_node_op(node_name: str): + pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' + if re.match(pattern, node_name): + return NodeOp.function_api + else: + return NodeOp.module diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py deleted file mode 100644 index 026bd47c472..00000000000 --- a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py +++ /dev/null @@ -1,217 +0,0 @@ -import os -import json -import re -import time -import pandas as pd -from .compare_tree import CompareTree -from .graph_utils import GraphConst -from .graph.graph import Graph -from .graph.base_node import BaseNode -from .graph.node_op import NodeOp -from .graph.graph_builder import GraphBuilder -from ..compare.acc_compare import read_op, task_dumppath_get, _do_multi_process -from ...core.utils import add_time_as_suffix -from ...core.file_check_util import FileOpen, FileChecker, FileCheckConst, create_directory - - -def _load_json_file(file_path): - try: - with FileOpen(file_path, 'r') as file: - file_dict = json.load(file) - if not isinstance(file_dict, dict): - return {} - return file_dict - except json.JSONDecodeError: - return {} - - -def _get_data_inputs_outputs(data_dict: dict): - input_args = data_dict.get('input_args', []) - input_kwargs = data_dict.get('input_kwargs', {}) - output = data_dict.get('output', []) - - input_args = input_args if isinstance(input_args, list) else [] - input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} - output = output if isinstance(output, list) else [] - return input_args, input_kwargs, output - - -def _add_node_data(node_data, node): - """ - acc_compare read_op 解析数据 - """ - input_data = {} - output_data = {} - op_parsed_list = read_op(node_data, node.type) - for item in op_parsed_list: - full_op_name = item.get('full_op_name', '') - if 'output' in full_op_name: - output_data[full_op_name] = item - else: - input_data[full_op_name] = item - node.input_data = input_data - node.output_data = output_data - - -def _get_data_info(item): - """ - 将api的参数信息拼接成字符串进行匹配 - """ - if isinstance(item, dict): - return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) - elif isinstance(item, (list, tuple)): - return str([_get_data_info(sub_item) for sub_item in item]) - return '' - - -def _process_node_data_info(items): - info_str = '' - for item in items: - info_str += _get_data_info(item) - return info_str - - -def _get_node_data_info(input_args, input_kwargs, output): - """ - 节点所有输入、输出的type、dtype和shape要一样 - """ - return _process_node_data_info(input_args) + _process_node_data_info(input_kwargs) + _process_node_data_info(output) - - -def _get_node_op(node_name: str): - pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' - match = re.match(pattern, node_name) - if match: - return NodeOp.function_api - else: - return NodeOp.module - - -def build_tree(construct_dict, data_dict, root_name='NPU'): - # 创建一个字典来存储已经创建的节点,以便重用 - created_nodes = {} - root_node = BaseNode(NodeOp.module, root_name) - - # 创建一个函数来递归地创建或获取节点 - def get_or_create_node(op, name, up_node=None): - if name not in created_nodes: - # add data - base_node = BaseNode(op, name, up_node) - node_data = data_dict.get(name, {}) - input_args, input_kwargs, output = _get_data_inputs_outputs(node_data) - # 添加输入输出数据 - _add_node_data(node_data, base_node) - - # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 - data_info = _get_node_data_info(input_args, input_kwargs, output) - base_node.data_info = data_info - created_nodes[name] = base_node - elif up_node: - # 如果节点已经存在,但我们现在才知道它的上级节点 - created_nodes[name].upnode = up_node - up_node.add_subnode(created_nodes[name]) - return created_nodes[name] - - # 遍历字典,为每个键值对创建或获取节点 - for subnode, upnode in construct_dict.items(): - if upnode: - up_node = get_or_create_node(_get_node_op(upnode), upnode) - else: - up_node = root_node - get_or_create_node(_get_node_op(subnode), subnode, up_node) - - return root_node, created_nodes - - -def do_build_graph(construct_path, data_path, output_path): - construct_dict = _load_json_file(construct_path) - data_dict = _load_json_file(data_path).get('data', {}) - root_node, created_nodes = build_tree(construct_dict, data_dict, 'root_node') - graph = Graph() - graph.root = root_node - graph.node_map = created_nodes - GraphBuilder.export_to_json(output_path, graph) - - -def do_compare_graph(construct_path_list, data_path_list, stack_path, output_path, csv_path): - dump_path_param = { - "npu_json_path": data_path_list[0], - "bench_json_path": data_path_list[1], - "stack_json_path": stack_path, - "is_print_compare_log": True - } - # 判断比对模式 - summary_compare, md5_compare = task_dumppath_get(dump_path_param) - - construct_n_dict = _load_json_file(construct_path_list[0]) - data_n_dict = _load_json_file(data_path_list[0]).get('data', {}) - root_n_node, created_n_nodes = build_tree(construct_n_dict, data_n_dict) - construct_b_dict = _load_json_file(construct_path_list[1]) - data_b_dict = _load_json_file(data_path_list[1]).get('data', {}) - root_b_node, created_b_nodes = build_tree(construct_b_dict, data_b_dict) - stack_json_data = _load_json_file(stack_path) - - compare_tree = CompareTree([root_n_node, root_b_node], [data_n_dict, data_b_dict], stack_json_data, - csv_path,[md5_compare, summary_compare]) - compare_tree.result_to_csv() - - if summary_compare is False and md5_compare is False: - # 真实数据比对,开启多进程比对得到精度指标,再写进已创建的csv中 - _do_multi_process(dump_path_param, csv_path) - # 从csv文件读取精度指标,添加到node节点中 - df = pd.read_csv(csv_path) - compare_data_dict = {row[0]: row.tolist() for index, row in df.iterrows()} - for node in compare_tree.real_data_compare_nodes: - min_thousandth_in = compare_tree.add_real_compare_data(node.input_data, compare_data_dict) - min_thousandth_out = compare_tree.add_real_compare_data(node.output_data, compare_data_dict) - if min_thousandth_in and min_thousandth_out: - change_percentage = abs(min_thousandth_in - min_thousandth_out) - else: - change_percentage = 0 - precision_status = True - if change_percentage > 0.1: - precision_status = False - # 精度不达标,双千指标标红 - CompareTree.add_real_compare_node_error_key(node.output_data) - # 添加建议 - CompareTree.add_real_compare_suggestions(node) - node.data['precision_status'] = precision_status - node.data['precision_index'] = 0 if change_percentage > 1 else 1 - change_percentage - - graph_n = Graph() - graph_n.root = root_n_node - graph_n.node_map = created_n_nodes - graph_b = Graph() - graph_b.root = root_b_node - graph_n.node_map = created_b_nodes - start_time = time.time() - GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, compare_tree.get_tool_tip()) - end_time = time.time() - print('export_graphs_to_yaml', end_time - start_time) - - -def compare_graph(dump_path_n, dump_path_b, out_path): - create_directory(out_path) - n_path_checker = FileChecker(dump_path_n, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) - n_path_checker.common_check() - b_path_checker = FileChecker(dump_path_b, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) - b_path_checker.common_check() - construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) - construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) - data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) - data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) - stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) - output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) - csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(GraphConst.CSV_NAME)) - do_compare_graph([construct_path_n, construct_path_b], [data_path_n, data_path_b], - stack_path, output_path,csv_path) - - -def build_graph(dump_path, out_path): - create_directory(out_path) - path_checker = FileChecker(dump_path, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) - path_checker.common_check() - construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) - data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) - output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) - do_build_graph(construct_path, data_path, output_path) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/atat/pytorch/visualization/test.py new file mode 100644 index 00000000000..3bd43362a21 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/test.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import time +import shutil +import filecmp +from .compare.graph_comparator import GraphComparator +from .utils import GraphConst +from .builder.graph_builder import GraphBuilder + + +def compare_graph(dump_path_n, dump_path_b, out_path): + # 对两个数据进行构图 + construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) + graph_n = GraphBuilder.build(construct_path_n, data_path_n, 'TestNet') + graph_b = GraphBuilder.build(construct_path_b, data_path_b, 'TestNet') + # 基于graph、stack和data进行比较 + stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) + graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) + graph_comparator.compare() + output_path = os.path.join(out_path, 'compare.vis') + GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, graph_comparator.comparator.get_tool_tip()) + +def build_graph(dump_path, out_path): + construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) + output_path = os.path.join(out_path, 'build.vis') + graph = GraphBuilder.build(construct_path, data_path, 'TestNet') + GraphBuilder.export_to_json(output_path, graph) + +def run_st(data_path): + start_time = time.time() + run_bench(data_path, 'output2') + end_time = time.time() + print('run_st time cost:', end_time - start_time) + # 比较output2的结果和output1 的bench结果差距 + for data_dir in os.listdir(data_path): + data_dir = os.path.join(data_path, data_dir) + if not os.path.isdir(data_dir): + continue + output1 = os.path.join(data_dir, 'output1') + output2 = os.path.join(data_dir, 'output2') + files = ['build.vis', 'compare.vis'] + for vis_file in files: + file1 = os.path.join(output1, vis_file) + file2 = os.path.join(output2, vis_file) + result = filecmp.cmp(file1, file2) + if result: + print('pass ' + file1) + else: + print('not pass ' + file1) + +def run_bench(data_path, output_dir): + for data_dir in os.listdir(data_path): + data_dir = os.path.join(data_path, data_dir) + if not os.path.isdir(data_dir): + continue + run_data_path = os.path.join(data_dir, 'data') + output_path = os.path.join(data_dir, output_dir) + if os.path.exists(output_path): + shutil.rmtree(output_path) + os.makedirs(output_path) + build_graph(run_data_path, output_path) + compare_graph(run_data_path, run_data_path, output_path) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py similarity index 50% rename from debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py rename to debug/accuracy_tools/atat/pytorch/visualization/utils.py index eae5084677e..ab02ef7d200 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -1,12 +1,62 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import stat +from ...core.file_check_util import FileOpen +from ..compare.acc_compare import result_to_csv + + +def load_json_file(file_path): + try: + with FileOpen(file_path, 'r') as f: + file_dict = json.load(f) + if not isinstance(file_dict, dict): + return {} + return file_dict + except json.JSONDecodeError: + return {} + +def save_json_file(file_path, data): + with FileOpen(file_path, 'w') as f: + f.write(json.dumps(data, indent=4)) + +def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): + with os.fdopen(os.open(csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), 'w+') as f: + result_to_csv(md5_compare, summary_compare, stack, csv_data, f) + +def convert_percentage_to_float(percentage_str): + """ + 百分比字符串转换转换为浮点型 + Args: + percentage_str: '0.00%', '23.4%' + Returns: float 0.00, 0.234 + """ + try: + percentage_str = percentage_str.replace('%', '') + return float(percentage_str) / 100 + except ValueError: + return 0 + + class ToolTip: MAX_DIFF = 'NPU与标杆API统计信息比对,最大值的差值' MIN_DIFF = 'NPU与标杆API统计信息比对,最小值的差值' MEAN_DIFF = 'NPU与标杆API统计信息比对,平均值的差值' NORM_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值' - MAX_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最大值的差值相对误差' - MIN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最小值的差值相对误差' - MEAN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,平均值的差值相对误差' - NORM_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值相对误差' MD5 = '数据MD5信息,用于比较两个数据信息是否完全一致' ONE_THOUSANDTH_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差大于千分之一的比例占总元素个数的比例小于千分之一' COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' @@ -28,7 +78,8 @@ class GraphConst: DUMP_FILE = 'dump.json' STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' - CSV_NAME = 'compare_result' + CSV_FILE = 'tmp.csv' ERROR_KEY = 'error_key' - DECIMAL = 6 - MAX_RELATIVE_ERR = 0.5 \ No newline at end of file + SUMMARY_COMPARE = 0 + MD5_COMPARE = 1 + REAL_DATA_COMPARE = 2 \ No newline at end of file -- Gitee From 91b11f0511f31754a70519df3c9b36ad2be223e8 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:29:27 +0800 Subject: [PATCH 024/333] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=9Bmsprobe=E4=BE=9D=E8=B5=96=E5=88=86?= =?UTF-8?q?=E7=A6=BB=EF=BC=9B=E4=B8=BABaseNode=E5=92=8CGraph=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=9B=B4=E5=A4=9A=E5=9F=BA=E7=A1=80=E8=83=BD=E5=8A=9B?= =?UTF-8?q?=EF=BC=9B=E5=87=8F=E5=B0=91=E6=A8=A1=E5=9D=97=E9=97=B4=E4=BE=9D?= =?UTF-8?q?=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../visualization/builder/graph_builder.py | 87 ++++--- .../visualization/builder/graph_parser.py | 108 --------- .../visualization/builder/msprobe_adapter.py | 174 ++++++++++++++ .../visualization/compare/graph_comparator.py | 200 ++++++---------- .../{comparator.py => mode_adapter.py} | 214 +++++++++--------- .../pytorch/visualization/graph/base_node.py | 123 ++++------ .../atat/pytorch/visualization/graph/graph.py | 79 ++++++- .../pytorch/visualization/graph/node_op.py | 3 + .../atat/pytorch/visualization/test.py | 15 +- .../atat/pytorch/visualization/utils.py | 37 ++- 10 files changed, 582 insertions(+), 458 deletions(-) delete mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py rename debug/accuracy_tools/atat/pytorch/visualization/compare/{comparator.py => mode_adapter.py} (74%) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 76d476a996c..8cce30f0d24 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -13,44 +13,75 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .graph_parser import GraphParser -from ..utils import load_json_file, save_json_file +from ..graph.graph import Graph +from ..graph.node_op import NodeOp +from ..utils import load_json_file, load_data_json_file, save_json_file, GraphConst +from .msprobe_adapter import parse_raw_data, get_input_output, get_node_tag class GraphBuilder: @staticmethod - def build(construct_path, data_path, model_name): + def build(construct_path, data_path, model_name='DefaultModel'): + """ + GraphBuilder的对外提供的构图方法 + Args: + construct_path: construct.json路径 + data_path: dump.json路径 + model_name: 模型名字,依赖外部输入 + Returns: Graph,代表图的数据结构 + """ construct_dict = load_json_file(construct_path) - data_dict = load_json_file(data_path).get('data', {}) - graph = GraphParser().parse(construct_dict, data_dict, model_name) + data_dict = load_data_json_file(data_path) + graph = Graph(model_name) + GraphBuilder._init_nodes(graph, construct_dict, data_dict) return graph @staticmethod - def export_to_json(filename, graph): - result = GraphBuilder._get_graph_dict(graph) - save_json_file(filename, result) - - # todo 吧两个export归一 - @staticmethod - def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): + def export_to_json(filename, graph_n, graph_b=None, tool_tip=None): + """ + 将graph到处成.vis文件的接口 + Args: + filename: 输出文件路径 + graph_n: Graph + graph_b: bench Graph,为空是只输出graph_b,不为空会同时输出两个graph,作为对比的结果 + tool_tip: 在对比模型下输出的意见 + """ result = {} - result['NPU'] = GraphBuilder._get_graph_dict(graph_n) - result['Bench'] = GraphBuilder._get_graph_dict(graph_b) - result['Tooltip'] = tool_tip + if graph_b: + result[GraphConst.JSON_NPU_KEY] = graph_n.get_dict2() + result[GraphConst.JSON_BENCH_KEY] = graph_b.get_dict2() + else: + result = graph_n.get_dict2() + if tool_tip: + result[GraphConst.JSON_TIP_KEY] = tool_tip save_json_file(filename, result) @staticmethod - def _get_graph_dict(graph): - result = {} - result['root'] = graph.root.id if graph.root else 'None' - result['node'] = {} - # todo 可以把这个遍历删除 - GraphBuilder._export_dfs(graph.root, result['node']) - return result - + def _init_nodes(graph, construct_dict, data_dict): + for subnode_id, upnode_id in construct_dict.items(): + if upnode_id: + upnode_op = NodeOp.get_node_op(upnode_id) + upnode = GraphBuilder._create_or_get_node(graph, data_dict, upnode_op, upnode_id) + else: + upnode = graph.root + node_op = NodeOp.get_node_op(subnode_id) + GraphBuilder._create_or_get_node(graph, data_dict, node_op, subnode_id, upnode) + @staticmethod - def _export_dfs(node, result): - info = node.get_yaml_dict() - result[node.id] = info - for subnode in node.subnodes: - GraphBuilder._export_dfs(subnode, result) + def _create_or_get_node(graph, data_dict, op, name, upnode=None): + if name in graph.node_map: + node = graph.get_node(name) + else: + graph.add_node(op, name, upnode) + node = graph.get_node(name) + node_data = data_dict.get(name, {}) + input_args, input_kwargs, output = parse_raw_data(node_data) + # 添加输入输出数据 + input_data, output_data = get_input_output(node_data, node.id) + # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 + tag = get_node_tag([input_args, input_kwargs, output]) + # 跟新数据 + node.set_input_output_tag(input_data, output_data, tag) + # 添加节点 + node.add_upnode(upnode) + return node \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py deleted file mode 100644 index 2227710b743..00000000000 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ...compare.acc_compare import read_op -from ..graph.graph import Graph -from ..graph.base_node import BaseNode -from ..graph.node_op import NodeOp - - -class GraphParser: - def __init__(self): - pass - - def parse(self, construct_dict, data_dict, model_name): - self.graph = Graph() - self.data_dict = data_dict - self.graph.root = BaseNode(NodeOp.module, model_name) - self.graph.node_map[model_name] = self.graph.root - self._init_nodes(construct_dict) - self.data_dict.clear() - return self.graph - - def _init_nodes(self, construct_dict): - for subnode, upnode in construct_dict.items(): - if upnode: - up_node = self._get_or_create_node(NodeOp.get_node_op(upnode), upnode) - else: - up_node = self.graph.root - self._get_or_create_node(NodeOp.get_node_op(subnode), subnode, up_node) - - # todo 这个函数也得改改 - def _get_or_create_node(self, op, name, up_node=None): - if name not in self.graph.node_map: - # add data - base_node = BaseNode(op, name, up_node) - node_data = self.data_dict.get(name, {}) - input_args, input_kwargs, output = GraphParser._get_data_inputs_outputs(node_data) - # 添加输入输出数据 - GraphParser._add_node_data(node_data, base_node) - - # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 - # 这个东西必须改了,todo - data_info = GraphParser._get_node_data_info(input_args, input_kwargs, output) - base_node.data_info = data_info - self.graph.node_map[name] = base_node - elif up_node: - # 如果节点已经存在,但是我们刚刚获取了他的上级节点 - # todo 这里要加个函数 - self.graph.node_map[name].upnode = up_node - up_node.add_subnode(self.graph.node_map[name]) - return self.graph.node_map[name] - - @staticmethod - def _get_data_inputs_outputs(data_dict: dict): - input_args = data_dict.get('input_args', []) - input_kwargs = data_dict.get('input_kwargs', {}) - output = data_dict.get('output', []) - - input_args = input_args if isinstance(input_args, list) else [] - input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} - output = output if isinstance(output, list) else [] - return input_args, input_kwargs, output - - # todo 要加入basenode - @staticmethod - def _add_node_data(node_data, node): - input_data = {} - output_data = {} - op_parsed_list = read_op(node_data, node.type) - for item in op_parsed_list: - full_op_name = item.get('full_op_name', '') - if 'output' in full_op_name: - output_data[full_op_name] = item - else: - input_data[full_op_name] = item - node.input_data = input_data - node.output_data = output_data - - @staticmethod - def _get_data_info(item): - if isinstance(item, dict): - return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) - elif isinstance(item, (list, tuple)): - return str([GraphParser._get_data_info(sub_item) for sub_item in item]) - return '' - - @staticmethod - def _process_node_data_info(items): - info_str = '' - for item in items: - info_str += GraphParser._get_data_info(item) - return info_str - - @staticmethod - def _get_node_data_info(input_args, input_kwargs, output): - return GraphParser._process_node_data_info(input_args) + GraphParser._process_node_data_info(input_kwargs) + GraphParser._process_node_data_info(output) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py new file mode 100644 index 00000000000..d668598f1c2 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -0,0 +1,174 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, task_dumppath_get, _do_multi_process +from ..utils import GraphConst +from ....core.utils import print_info_log + + +def get_compare_mode(dump_path_param): + """ + 获得比较模式,包括summary、MD5和真实数据三种模式 + Args: + dump_path_param: 调用acc_compare接口所以来的参数结构 + Returns: 0 summary mode, 1 md5 mode, 2 true data mode + """ + summary_compare, md5_compare = task_dumppath_get(dump_path_param) + if summary_compare: + compare_mode = GraphConst.SUMMARY_COMPARE + elif md5_compare: + compare_mode = GraphConst.MD5_COMPARE + else: + compare_mode = GraphConst.REAL_DATA_COMPARE + return compare_mode + +def run_real_data(dump_path_param, csv_path): + """ + 多进程运行生成真实数据 + Args: + dump_path_param: 调用acc_compare接口所以来的参数结构 + csv_path: 生成文件路径 + """ + _do_multi_process(dump_path_param, csv_path) + +def parse_raw_data(data_dict: dict): + """ + 进行dump的原始数据解析,提取三个关键字段以进一步处理 + """ + input_args = data_dict.get('input_args', []) + input_kwargs = data_dict.get('input_kwargs', {}) + output = data_dict.get('output', []) + + input_args = input_args if isinstance(input_args, list) else [] + input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} + output = output if isinstance(output, list) else [] + + return input_args, input_kwargs, output + +def get_input_output(node_data, node_id): + """ + 将dump的原始数据进行拆解,分解为output和input两个数据 + Args: + node_data: 属于单个节点的dump数据 + node_id: 节点名字 + """ + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node_id) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if 'output' in full_op_name: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + return input_data, output_data + +def get_node_tag(inputs): + """ + 基于inputs生成节点专属tag,一次判断节点是否相同 + """ + result = "" + for single_input in inputs: + info = '' + for item in single_input: + info += _get_single_tag(item) + result += info + return result + +def format_node_data(data_dict): + """ + 批量进行节点数据的输出 + """ + del_list = ['requires_grad', 'data_name', 'full_op_name'] + for _, value in data_dict.items(): + if not isinstance(value, dict): + continue + for item in del_list: + if item in value: + del value[item] + _format_data(value) + return data_dict + +def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_md5_compare): + """ + 调用acc_compare.py中的get_accuracy获得精度对比指标 + 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + merge_n = _parse_node(node_ids[0], data_dicts[0], stack_json_data, is_summary_compare, is_md5_compare) + merge_b = _parse_node(node_ids[1], data_dicts[1], stack_json_data, is_summary_compare, is_md5_compare) + result = [] + get_accuracy(result, merge_n, merge_b, is_summary_compare, is_md5_compare) + return result + +def _get_single_tag(item): + if isinstance(item, dict): + return str(item.get('type', GraphConst.TAG_NA)) + '_' + str(item.get('dtype', GraphConst.TAG_NA)) + '_' + str(item.get('shape', GraphConst.TAG_NA)) + elif isinstance(item, (list, tuple)): + return str([_get_single_tag(sub_item) for sub_item in item]) + return '' + +def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_compare): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node_id, {}), node_id) + if node_id in stack_json_data: + op_parsed_list.append( + {'full_op_name': node_id, 'full_info': stack_json_data[node_id]}) + else: + op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) + return merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + +def _format_decimal_string(s): + """ + 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + """ + pattern = re.compile(r'\d{1,20}\.\d{1,20}%?') + matches = pattern.findall(s) + for match in matches: + is_percent = match.endswith('%') + number_str = match.rstrip('%') + decimal_part = number_str.split('.')[1] + # 如果小数位数大于6,进行处理 + if len(decimal_part) > GraphConst.ROUND_TH: + number_float = float(number_str) + if is_percent: + number_float /= 100 + formatted_number = f"{number_float:.{GraphConst.ROUND_TH}f}" + # 如果原来是百分数,加回百分号 + if is_percent: + formatted_number += '%' + # 替换原字符串中的数值部分 + s = s.replace(match, formatted_number) + return s + +def _format_data(data_dict): + """ + 格式化数据,小数保留6位,处理一些异常值 + """ + for key, value in data_dict.items(): + if isinstance(value, str): + # 将单引号删掉,None换成null避免前端解析错误 + value = value.replace("'", "").replace('None', 'null') + value = _format_decimal_string(value) + if value is None or value == ' ': + value = 'null' + if isinstance(value, float): + value = round(value, GraphConst.ROUND_TH) + if not isinstance(value, (list, tuple, dict, str)): + value = str(value) + data_dict[key] = value diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 51f6bc34d41..5426679ae88 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -14,14 +14,12 @@ # limitations under the License. import os -import json import pandas as pd -from ..utils import Suggestions, GraphConst, load_json_file, write_csv_data -from ..graph.node_op import NodeOp -from .comparator import Comparator -from ...compare.acc_compare import read_op, merge_tensor, get_accuracy -from ....core.utils import Const -from ...compare.acc_compare import task_dumppath_get, _do_multi_process +from ....core.utils import Const, print_info_log +from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data +from ..utils import GraphConst, load_json_file, load_data_json_file, write_csv_data +from ..graph.graph import Graph +from .mode_adapter import ModeAdapter class GraphComparator: @@ -29,124 +27,18 @@ class GraphComparator: self.graph_n = graphs[0] self.graph_b = graphs[1] self._parse_param(data_paths, stack_path, output_path) - - def _parse_param(self, data_paths, stack_path, output_path): - self.dump_path_param = { - 'npu_json_path': data_paths[0], - 'bench_json_path': data_paths[1], - 'stack_json_path': stack_path, - 'is_print_compare_log': True - } - self.output_path = output_path - summary_compare, md5_compare = task_dumppath_get(self.dump_path_param) - self.comparator = Comparator(summary_compare, md5_compare) - self.data_n_dict = load_json_file(data_paths[0]).get('data', {}) - self.data_b_dict = load_json_file(data_paths[1]).get('data', {}) - self.stack_json_data = load_json_file(stack_path) - - def compare(self): - self._compare_nodes(self.graph_n.root) - self._postcompare() - - def _postcompare(self): - if not self.comparator.is_real_data_compare(): - return - csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) - write_csv_data(csv_path, self.comparator.is_md5_compare(), self.comparator.is_summary_compare(), True, self.comparator.csv_data) - _do_multi_process(self.dump_path_param, csv_path) - # 从csv文件读取精度指标,添加到node节点中 - df = pd.read_csv(csv_path) - compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} - for node in self.comparator.compare_nodes: - precision_status, precision_index, _ = self.comparator.parse_result(node, [compare_data_dict]) - # todo 常量改成变量 - node.data['precision_status'] = precision_status - node.data['precision_index'] = precision_index - if not precision_status: - self.comparator.add_error_key(node.output_data) - self.add_suggestions(node) - if os.path.isfile(csv_path): - os.remove(csv_path) - - @staticmethod - def add_suggestions(node): - """ - 精度疑似有问题时,提供一些建议 - """ - if node.op == NodeOp.module: - node.suggestions['text'] = Suggestions.Module - node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL - elif node.op == NodeOp.function_api: - node.suggestions['text'] = Suggestions.API - node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL - - def _compare_nodes(self, node_n): - """ - 递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 - 这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 - Args: - node_n: NPU节点 - """ - # todo 这个函数也需要改改,把is_matched删掉, - is_matched, node_b, ancestors = GraphComparator._match_node(node_n, self.graph_b) - if is_matched: - ancestors.append(node_b.type) - node_n.matched_node_link = ancestors - node_b.matched_node_link = ancestors - # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 - compare_result_list = self.compare_node(node_n, node_b) - if compare_result_list: - self.comparator.add_csv_data(compare_result_list) - self.add_compare_result_to_node(node_n, compare_result_list) - for subnode in node_n.subnodes: - self._compare_nodes(subnode) - - - @staticmethod - def _match_node(node_n, graph_b): - """ - 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 - 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 - 返回匹配结果,匹配到的系欸但,以及祖先列表 - """ - if node_n.id not in graph_b.node_map: - return False, None, None - node_b = graph_b.node_map[node_n.id] - if node_n.data_info != node_b.data_info: - return False, None, None - ancestors_n = node_n.get_ancestors() - ancestors_b = node_b.get_ancestors() - if ancestors_n != ancestors_b: - return False, None, None - return True, node_b, ancestors_n - def compare_node(self, node_n, node_b): + def compare(self): """ - 调用acc_compare.py中的get_accuracy获得精度对比指标 - 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + 比较函数,初始化结束后单独调用。比较结果写入graph_n Args: - node_n: NPU节点 - node_b: Bench节点 - Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list - """ - result = [] - # todo 写一个atat adpator - merge_n = self.parse_node(node_n, self.data_n_dict) - merge_b = self.parse_node(node_b, self.data_b_dict) - get_accuracy(result, merge_n, merge_b, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) - return result - - def parse_node(self, node, data_dict): - """ - 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + construct_path: construct.json路径 + data_path: dump.json路径 + model_name: 模型名字,依赖外部输入 + Returns: Graph,代表图的数据结构 """ - op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) - if node.type in self.stack_json_data: - op_parsed_list.append( - {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) - else: - op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) - return merge_tensor(op_parsed_list, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) + self._compare_nodes(self.graph_n.root) + self._postcompare() def add_compare_result_to_node(self, node, compare_result_list): """ @@ -156,7 +48,7 @@ class GraphComparator: compare_result_list: 包含参数信息和对比指标(真实数据对比模式除外)的list """ # 真实数据比对,先暂存节点,在多进程对比得到精度指标后,再将指标添加到节点中 - if self.comparator.prepare_real_data(node): + if self.ma.prepare_real_data(node): return compare_in_dict = {} compare_out_dict = {} @@ -166,10 +58,64 @@ class GraphComparator: compare_out_dict[item[0]] = item else: compare_in_dict[item[0]] = item - precision_status, precision_index, other_dict = self.comparator.parse_result(node, [compare_in_dict, compare_out_dict]) - node.data['precision_status'] = precision_status - node.data['precision_index'] = precision_index + precision_status, precision_index, other_dict = self.ma.parse_result(node, [compare_in_dict, compare_out_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index node.data.update(other_dict) if not precision_status: - self.comparator.add_error_key(node.output_data) - self.add_suggestions(node) + self.ma.add_error_key(node.output_data) + node.add_suggestions() + + def _parse_param(self, data_paths, stack_path, output_path): + self.dump_path_param = { + 'npu_json_path': data_paths[0], + 'bench_json_path': data_paths[1], + 'stack_json_path': stack_path, + 'is_print_compare_log': True + } + self.output_path = output_path + compare_mode = get_compare_mode(self.dump_path_param) + self.ma = ModeAdapter(compare_mode) + self.data_n_dict = load_data_json_file(data_paths[0]) + self.data_b_dict = load_data_json_file(data_paths[1]) + self.stack_json_data = load_json_file(stack_path) + + def _postcompare(self): + if not self.ma.is_real_data_compare(): + return + csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) + try: + write_csv_data(csv_path, self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) + run_real_data(self.dump_path_param, csv_path) + # 从csv文件读取精度指标,添加到node节点中 + df = pd.read_csv(csv_path) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.ma.compare_nodes: + precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + if not precision_status: + self.ma.add_error_key(node.output_data) + node.add_suggestions() + except (FileNotFoundError, IOError) as e: + print_info_log('File error in _postcompare: {e}') + finally: + if os.path.isfile(csv_path): + os.remove(csv_path) + + def _compare_nodes(self, node_n): + #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 + #这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 + node_b, ancestors = Graph.match(self.graph_n, node_n, self.graph_b) + if node_b: + ancestors.append(node_b.id) + node_n.add_link(node_b, ancestors) + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 + compare_result_list = compare_node([node_n.id, node_b.id], [self.data_n_dict, self.data_b_dict], + self.stack_json_data, self.ma.is_summary_compare(), + self.ma.is_md5_compare()) + if compare_result_list: + self.ma.add_csv_data(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + for subnode in node_n.subnodes: + self._compare_nodes(subnode) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py similarity index 74% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py rename to debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index 21f0ae9f394..9d5e3aab4f1 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -18,55 +18,142 @@ from ....core.utils import CompareConst, Const from ..utils import ToolTip, GraphConst, convert_percentage_to_float -class Comparator: - def __init__(self, summary_compare, md5_compare): - if summary_compare: #0 summary mode, 1 md5 mode, 2 true data mode - self.compare_mode = GraphConst.SUMMARY_COMPARE - elif md5_compare: - self.compare_mode = GraphConst.MD5_COMPARE - else: - self.compare_mode = GraphConst.REAL_DATA_COMPARE +class ModeAdapter: + def __init__(self, compare_mode): + self.compare_mode = compare_mode self.csv_data = [] self.compare_nodes = [] + @staticmethod + def _add_md5_compare_data(node_data, compare_data_dict): + precision_status = True + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [GraphConst.JSON_MD5_KEY] + headers = CompareConst.MD5_COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # md5比对是否通过 + if value.get(GraphConst.JSON_MD5_KEY) != CompareConst.PASS: + precision_status = False + node_data[key] = value + return precision_status + + @staticmethod + def _add_real_compare_data(node_data, compare_data_dict): + min_thousandth = float(1) + numbers = [] + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + headers = CompareConst.COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # 获取一个节点所有的输入或输出最小的双千指标 + thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) + # 可能是None,可能是非数字内容str + try: + thousandth = float(thousandth) + except (ValueError, TypeError): + thousandth = None + if thousandth is not None: + numbers.append(thousandth) + node_data[key] = value + # 双千指标都是None的异常情况 + if not numbers: + min_thousandth = None + else: + min_thousandth = min(numbers + [min_thousandth]) + return min_thousandth + + @staticmethod + def _add_summary_compare_data( node_data, compare_data_dict): + precision_status = True + max_relative_err = 0 + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + # 对应比对结果csv的列 + key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, + CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + headers = CompareConst.SUMMARY_COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # 相对误差大于0.5疑似有精度问题 + for item in key_list[4:]: + relative_err = convert_percentage_to_float(value.get(item)) + max_relative_err = max(max_relative_err, relative_err) + node_data[key] = value + if max_relative_err > GraphConst.MAX_RELATIVE_ERR_TH: + precision_status = False + max_relative_err = 1 if max_relative_err > 1 else max_relative_err + precision_index = 1 - max_relative_err + return precision_status, precision_index + + @staticmethod + def _match_data(data_dict, compare_data, key_list, id_list): + """ + 绑定精度指标到node的input_data和output_data + """ + if len(key_list) != len(id_list): + return + for id, key in zip(id_list, key_list): + data = compare_data[id] + if data is not None and 'nan' not in str(data) and str(data) != ' ': + data_dict[key] = data + else: + data_dict[key] = 'null' + def parse_result(self, node, compare_data_dict): """ 根据结果返回数据,分别是precision_status,precision_index,和附加数据 """ other_dict = {} if self.is_md5_compare(): - precision_status_in = Comparator.add_md5_compare_data(node.input_data, compare_data_dict[0]) - precision_status_out = Comparator.add_md5_compare_data(node.output_data, compare_data_dict[1]) + precision_status_in = ModeAdapter._add_md5_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out = ModeAdapter._add_md5_compare_data(node.output_data, compare_data_dict[1]) # 所有输入输出md5对比通过,这个节点才算通过 precision_status = precision_status_in and precision_status_out precision_index = 1 if precision_status else 0 other_result = CompareConst.PASS if precision_status else CompareConst.DIFF - other_dict['md5 Compare Result'] = other_result + other_dict[GraphConst.JSON_MD5_KEY] = other_result elif self.is_summary_compare(): - precision_status_in, precision_index_in = Comparator.add_summary_compare_data(node.input_data, compare_data_dict[0]) - precision_status_out, precision_index_out = Comparator.add_summary_compare_data(node.output_data, compare_data_dict[1]) + precision_status_in, precision_index_in = ModeAdapter._add_summary_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out, precision_index_out = ModeAdapter._add_summary_compare_data(node.output_data, compare_data_dict[1]) precision_status = precision_status_in and precision_status_out precision_index = min(precision_index_in, precision_index_out) else: - min_thousandth_in = Comparator.add_real_compare_data(node.input_data, compare_data_dict[0]) - min_thousandth_out = Comparator.add_real_compare_data(node.output_data, compare_data_dict[0]) + min_thousandth_in = ModeAdapter._add_real_compare_data(node.input_data, compare_data_dict[0]) + min_thousandth_out = ModeAdapter._add_real_compare_data(node.output_data, compare_data_dict[0]) if min_thousandth_in and min_thousandth_out: change_percentage = abs(min_thousandth_in - min_thousandth_out) else: change_percentage = 0 precision_status = True - if change_percentage > 0.1: + if change_percentage > GraphConst.REAL_DATA_TH: precision_status = False precision_index = 0 if change_percentage > 1 else 1 - change_percentage return precision_status, precision_index, other_dict def prepare_real_data(self, node): + """ + 为真实数据比较模式准备节点信息 + """ if self.is_real_data_compare(): self.compare_nodes.append(node) return True return False - # todo 改成私有 def is_summary_compare(self): return self.compare_mode == GraphConst.SUMMARY_COMPARE @@ -82,6 +169,9 @@ class Comparator: self.csv_data.extend(compare_result_list) def add_error_key(self, node_data): + """ + 根据不同的模式进行提供不同错误信息 + """ for key, value in node_data.items(): if not isinstance(value, dict): continue @@ -91,7 +181,7 @@ class Comparator: elif self.is_real_data_compare(): message = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] else: - # todo 这个应该是bug,应该修复 + # 输出件优化 message = [] value[GraphConst.ERROR_KEY] = message node_data[key] = value @@ -114,91 +204,5 @@ class Comparator: CompareConst.COSINE: ToolTip.COSINE, CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} - # todo 这个要放在外面去 + # 输出件优化 return json.dumps(tips) - - @staticmethod - def _match_data(data_dict, compare_data, key_list, id_list): - """ - 绑定精度指标到node的input_data和output_data - """ - if len(key_list) != len(id_list): - return - for i, key in enumerate(key_list): - data = compare_data[id_list[i]] - if data is not None and 'nan' not in str(data) and str(data) != ' ': - data_dict[key] = compare_data[id_list[i]] - else: - data_dict[key] = 'null' - - @staticmethod - def add_summary_compare_data( node_data, compare_data_dict): - precision_status = True - max_relative_err = 0 - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - compare_data = compare_data_dict.get(key) - if compare_data: - # 对应比对结果csv的列 - key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, - CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] - id_list = [6, 7, 8, 9, 10, 11, 12, 13] - Comparator._match_data(value, compare_data, key_list, id_list) - # 相对误差大于0.5疑似有精度问题 - for item in key_list[4:]: - relative_err = convert_percentage_to_float(value.get(item)) - max_relative_err = max(max_relative_err, relative_err) - node_data[key] = value - if max_relative_err > 0.5: - precision_status = False - precision_index = 1 - max_relative_err - return precision_status, precision_index - - @staticmethod - def add_md5_compare_data( node_data, compare_data_dict): - precision_status = True - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - compare_data = compare_data_dict.get(key) - if compare_data: - key_list = ['md5 Compare Result'] - id_list = [8] - Comparator._match_data(value, compare_data, key_list, id_list) - # md5比对是否通过 - if value.get('md5 Compare Result') != CompareConst.PASS: - precision_status = False - node_data[key] = value - return precision_status - - @staticmethod - def add_real_compare_data(node_data, compare_data_dict): - min_thousandth = float(1) - numbers = [] - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - compare_data = compare_data_dict.get(key) - if compare_data: - key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, - CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] - id_list = [6, 7, 8, 9, 10] - Comparator._match_data(value, compare_data, key_list, id_list) - # 获取一个节点所有的输入或输出最小的双千指标 - thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) - # 可能是None,可能是非数字内容str - try: - thousandth = float(thousandth) - except (ValueError, TypeError): - thousandth = None - if thousandth is not None: - numbers.append(thousandth) - node_data[key] = value - # 双千指标都是None的异常情况 - if not numbers: - min_thousandth = None - else: - min_thousandth = min(numbers + [min_thousandth]) - return min_thousandth \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 5628e3e0ee3..84bba4d17b2 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -13,116 +13,81 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re +from ..utils import Suggestions, GraphConst +from ..builder.msprobe_adapter import format_node_data class BaseNode: def __init__(self, node_op, node_id, up_node=None): self.op = node_op - self.type = node_id self.id = node_id self.data = {} - self.outputs = [] - self.inputs = [] self.output_data = {} self.input_data = {} - self.upnode = up_node + self.upnode = None + self.add_upnode(up_node) self.subnodes = [] - if up_node: - up_node.add_subnode(self) - self.is_forward = True - self.pair = None self.matched_node_link = [] - self.data_info = '' + self.tag = '' self.suggestions = {} - # todo 这些都在做什么,都应该确认一下 def __str__(self): info = f'id:\t{self.id}' return info - - @staticmethod - def _handle_item(data_dict): - del_list = ['requires_grad', 'data_name', 'full_op_name'] - for key, value in data_dict.items(): - if not isinstance(value, dict): - continue - for item in del_list: - if item in value: - del value[item] - BaseNode._format_data(value) - - return data_dict - @staticmethod - def _format_data(data_dict): + def get_suggestions(self): """ - 格式化数据,小数保留6位,处理一些异常值 + 精度疑似有问题时,提供一些建议 """ - for key, value in data_dict.items(): - if isinstance(value, str): - # 将单引号删掉,None换成null避免前端解析错误 - value = value.replace("'", "").replace('None', 'null') - value = BaseNode._format_decimal_string(value) - if value is None or value == ' ': - value = 'null' - if isinstance(value, float): - value = round(value, 6) - if not isinstance(value, (list, tuple, dict, str)): - value = str(value) - data_dict[key] = value + if self.op == NodeOp.module: + self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.Module + self.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + elif self.op == NodeOp.function_api: + self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.API + self.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + def set_input_output_tag(self, input_data, output_data, tag): + self.input_data = input_data + self.output_data = output_data + self.tag = tag - @staticmethod - def _format_decimal_string(s): + def add_upnode(self, node): """ - 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + 绑定upnode,用于对两个节点进行上下级关联 """ - pattern = re.compile(r'\d+\.\d+%?') - matches = pattern.findall(s) - for match in matches: - is_percent = match.endswith('%') - number_str = match.rstrip('%') - decimal_part = number_str.split('.')[1] - # 如果小数位数大于6,进行处理 - if len(decimal_part) > 6: - number_float = float(number_str) - if is_percent: - number_float /= 100 - formatted_number = f"{number_float:.6f}" - # 如果原来是百分数,加回百分号 - if is_percent: - formatted_number += '%' - # 替换原字符串中的数值部分 - s = s.replace(match, formatted_number) - return s - - def get_info(self): - info = f'{self.id}\t{self.op}' - if not self.is_forward: - info += '(b)' - for key in self.data: - info += f'\n{key}:\t{self.data.get(key)}' - return info - - def add_subnode(self, node): - if node.id == self.id: + if not node or node.id == self.id or self.upnode: return - self.subnodes.append(node) + self.upnode = node + node.subnodes.append(self) + + def add_link(self, node, ancestors): + """ + 在节点匹配成功后进行匹配数据的录入 + Args: + node: 和self相互匹配的节点 + ancestors: 对面节点的祖先信息 + """ + self.matched_node_link = ancestors + node.matched_node_link = ancestors def get_yaml_dict(self): + """ + 输出数据 + """ + # 输出件优化 result = {} result['id'] = self.id result['node_type'] = self.op.value - result['type'] = self.type + result['type'] = self.id result['data'] = self.data - result['output_data'] = self._handle_item(self.output_data) - result['input_data'] = self._handle_item(self.input_data) - result['outputs'] = [(edge_id, node.id) for edge_id, node in self.outputs] - result['inputs'] = [(edge_id, node.id) for edge_id, node in self.inputs] + result['output_data'] = format_node_data(self.output_data) + result['input_data'] = format_node_data(self.input_data) + result['outputs'] = [] + result['inputs'] = [] result['upnode'] = self.upnode.id if self.upnode else 'None' result['subnodes'] = [node.id for node in self.subnodes] - result['is_forward'] = self.is_forward - result['pair'] = self.pair.id if self.pair else 'None' + result['is_forward'] = True + result['pair'] = 'None' result['matched_node_link'] = self.matched_node_link result['suggestions'] = self.suggestions return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index 347e8c2c88b..b53a691c02f 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -13,16 +13,85 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .base_node import BaseNode +from .node_op import NodeOp +from ..utils import GraphConst + class Graph: - # todo,这里应该加入一些和图相关的操作 - # 可以把root node 的初始化放进Graph里面 - def __init__(self): - self.root = None + def __init__(self, model_name): self.node_map = {} + self.add_node(NodeOp.module, model_name) + self.root = self.get_node(model_name) def __str__(self): infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] info = "\n".join(infos) return info - \ No newline at end of file + + @staticmethod + def match(graph_n, node_n, graph_b): + """ + 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 + 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 + 返回匹配结果,匹配到的节点,以及祖先树。没匹配到则返回None, [] + """ + if not node_n or node_n.id not in graph_b.node_map: + return None, [] + node_b = graph_b.node_map.get(node_n.id) + if node_n.tag != node_b.tag: + return None, [] + + ancestors_n = node_n.get_ancestors() + ancestors_b = node_b.get_ancestors() + if ancestors_n != ancestors_b: + return None, [] + return node_b, ancestors_n + + @staticmethod + def dfs(node, result): + info = node.get_yaml_dict() + result[node.id] = info + for subnode in node.subnodes: + Graph.dfs(subnode, result) + + def add_node(self, node_op, node_id, up_node=None): + """ + 在graph中进行节点的添加 + Args: + node_op: 需要添加的节点类型 + node_id: 需要添加的节点id + up_node:对应节点的父节点 + """ + if node_id in self.node_map: + return self.node_map.get(node_id) + node = BaseNode(node_op, node_id, up_node) + self.node_map[node_id] = node + + def get_node(self, node_id): + """ + 返回节点,不存在返回None + """ + return self.node_map.get(node_id, None) + + def get_dict(self): + """ + 用于数据输出 + """ + result = {} + result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' + result[GraphConst.JSON_NODE_KEY] = {} + for node_id in self.node_map: + info = self.node_map.get(node_id).get_yaml_dict() + result[GraphConst.JSON_NODE_KEY][node_id] = info + return result + + def get_dict2(self): + # 输出件优化 + # 为了防止输出件变动临时使用方法,会在后续重构中删除 + # 递归遍历,在正式交付中尽量避免 + result = {} + result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' + result[GraphConst.JSON_NODE_KEY] = {} + Graph.dfs(self.root, result[GraphConst.JSON_NODE_KEY]) + return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index 015a83abda6..ad7be35f0cf 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -26,6 +26,9 @@ class NodeOp(Enum): @staticmethod def get_node_op(node_name: str): + """ + 基于代表节点的字符串,解析节点种类 + """ pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' if re.match(pattern, node_name): return NodeOp.function_api diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/atat/pytorch/visualization/test.py index 3bd43362a21..dcd0b9d1f91 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/test.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/test.py @@ -21,6 +21,8 @@ import filecmp from .compare.graph_comparator import GraphComparator from .utils import GraphConst from .builder.graph_builder import GraphBuilder +from ...core.utils import print_info_log +from ...core.file_check_util import FileOpen, create_directory def compare_graph(dump_path_n, dump_path_b, out_path): @@ -36,7 +38,8 @@ def compare_graph(dump_path_n, dump_path_b, out_path): graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) graph_comparator.compare() output_path = os.path.join(out_path, 'compare.vis') - GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, graph_comparator.comparator.get_tool_tip()) + GraphBuilder.export_to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) + def build_graph(dump_path, out_path): construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) @@ -45,11 +48,12 @@ def build_graph(dump_path, out_path): graph = GraphBuilder.build(construct_path, data_path, 'TestNet') GraphBuilder.export_to_json(output_path, graph) + def run_st(data_path): start_time = time.time() run_bench(data_path, 'output2') end_time = time.time() - print('run_st time cost:', end_time - start_time) + print_info_log(f'run_st time cost: {end_time - start_time}') # 比较output2的结果和output1 的bench结果差距 for data_dir in os.listdir(data_path): data_dir = os.path.join(data_path, data_dir) @@ -63,9 +67,10 @@ def run_st(data_path): file2 = os.path.join(output2, vis_file) result = filecmp.cmp(file1, file2) if result: - print('pass ' + file1) + print_info_log('pass ' + file1) else: - print('not pass ' + file1) + print_info_log('not pass ' + file1) + def run_bench(data_path, output_dir): for data_dir in os.listdir(data_path): @@ -76,6 +81,6 @@ def run_bench(data_path, output_dir): output_path = os.path.join(data_dir, output_dir) if os.path.exists(output_path): shutil.rmtree(output_path) - os.makedirs(output_path) + create_directory(output_path) build_graph(run_data_path, output_path) compare_graph(run_data_path, run_data_path, output_path) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index ab02ef7d200..6505ce29900 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -21,6 +21,9 @@ from ..compare.acc_compare import result_to_csv def load_json_file(file_path): + """ + 加载json文件 + """ try: with FileOpen(file_path, 'r') as f: file_dict = json.load(f) @@ -30,14 +33,32 @@ def load_json_file(file_path): except json.JSONDecodeError: return {} + +def load_data_json_file(file_path): + """ + 加载dump.json中的data字段 + """ + return load_json_file(file_path).get(GraphConst.DATA_KEY, {}) + + def save_json_file(file_path, data): + """ + 保存json文件 + """ with FileOpen(file_path, 'w') as f: f.write(json.dumps(data, indent=4)) + def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): + """ + 调用acc接口写入csv + """ + if os.path.exists(csv_path): + os.remove(csv_path) with os.fdopen(os.open(csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), 'w+') as f: result_to_csv(md5_compare, summary_compare, stack, csv_data, f) + def convert_percentage_to_float(percentage_str): """ 百分比字符串转换转换为浮点型 @@ -82,4 +103,18 @@ class GraphConst: ERROR_KEY = 'error_key' SUMMARY_COMPARE = 0 MD5_COMPARE = 1 - REAL_DATA_COMPARE = 2 \ No newline at end of file + REAL_DATA_COMPARE = 2 + JSON_NPU_KEY = 'NPU' + JSON_BENCH_KEY = 'Bench' + JSON_TIP_KEY = 'Tooltip' + JSON_MD5_KEY = 'md5 Compare Result' + JSON_ROOT_KEY = 'root' + JSON_NODE_KEY = 'node' + DATA_KEY = 'data' + REAL_DATA_TH = 0.1 + MAX_RELATIVE_ERR_TH = 0.5 + ROUND_TH = 6 + JSON_STATUS_KEY = 'precision_status' + JSON_INDEX_KEY = 'precision_index' + SUGGEST_KEY = 'text' + TAG_NA = 'na' -- Gitee From 85e6d5410b2065a54e85f1fa2ed028c30439319a Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:29:53 +0800 Subject: [PATCH 025/333] =?UTF-8?q?NodeOp=E8=A7=A3=E6=9E=90=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E3=80=81BaseNode=E6=AF=94=E8=BE=83=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=8C=E7=A7=BB=E9=99=A4tag=E5=B1=9E?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../visualization/builder/graph_builder.py | 17 ++--- .../visualization/builder/msprobe_adapter.py | 67 ++++++++++--------- .../visualization/compare/mode_adapter.py | 4 +- .../pytorch/visualization/graph/base_node.py | 18 +++-- .../atat/pytorch/visualization/graph/graph.py | 13 ++-- .../pytorch/visualization/graph/node_op.py | 17 ++--- .../atat/pytorch/visualization/test.py | 4 +- .../atat/pytorch/visualization/utils.py | 5 +- 8 files changed, 78 insertions(+), 67 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 8cce30f0d24..85634a461c4 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -16,7 +16,7 @@ from ..graph.graph import Graph from ..graph.node_op import NodeOp from ..utils import load_json_file, load_data_json_file, save_json_file, GraphConst -from .msprobe_adapter import parse_raw_data, get_input_output, get_node_tag +from .msprobe_adapter import get_input_output class GraphBuilder: @@ -37,9 +37,9 @@ class GraphBuilder: return graph @staticmethod - def export_to_json(filename, graph_n, graph_b=None, tool_tip=None): + def to_json(filename, graph_n, graph_b=None, tool_tip=None): """ - 将graph到处成.vis文件的接口 + 将graph导出成.vis文件的接口 Args: filename: 输出文件路径 graph_n: Graph @@ -48,10 +48,10 @@ class GraphBuilder: """ result = {} if graph_b: - result[GraphConst.JSON_NPU_KEY] = graph_n.get_dict2() - result[GraphConst.JSON_BENCH_KEY] = graph_b.get_dict2() + result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict2() + result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict2() else: - result = graph_n.get_dict2() + result = graph_n.to_dict2() if tool_tip: result[GraphConst.JSON_TIP_KEY] = tool_tip save_json_file(filename, result) @@ -75,13 +75,10 @@ class GraphBuilder: graph.add_node(op, name, upnode) node = graph.get_node(name) node_data = data_dict.get(name, {}) - input_args, input_kwargs, output = parse_raw_data(node_data) # 添加输入输出数据 input_data, output_data = get_input_output(node_data, node.id) - # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 - tag = get_node_tag([input_args, input_kwargs, output]) # 跟新数据 - node.set_input_output_tag(input_data, output_data, tag) + node.set_input_output(input_data, output_data) # 添加节点 node.add_upnode(upnode) return node \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index d668598f1c2..960e1c4f1a0 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -19,6 +19,13 @@ from ..utils import GraphConst from ....core.utils import print_info_log +# 用于将节点名字解析成对应的NodeOp的规则 +op_patterns = [ + r'^(Module)', #NodeOp.module + r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' #NodeOp.function_api +] + + def get_compare_mode(dump_path_param): """ 获得比较模式,包括summary、MD5和真实数据三种模式 @@ -35,6 +42,7 @@ def get_compare_mode(dump_path_param): compare_mode = GraphConst.REAL_DATA_COMPARE return compare_mode + def run_real_data(dump_path_param, csv_path): """ 多进程运行生成真实数据 @@ -44,19 +52,6 @@ def run_real_data(dump_path_param, csv_path): """ _do_multi_process(dump_path_param, csv_path) -def parse_raw_data(data_dict: dict): - """ - 进行dump的原始数据解析,提取三个关键字段以进一步处理 - """ - input_args = data_dict.get('input_args', []) - input_kwargs = data_dict.get('input_kwargs', {}) - output = data_dict.get('output', []) - - input_args = input_args if isinstance(input_args, list) else [] - input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} - output = output if isinstance(output, list) else [] - - return input_args, input_kwargs, output def get_input_output(node_data, node_id): """ @@ -70,23 +65,36 @@ def get_input_output(node_data, node_id): op_parsed_list = read_op(node_data, node_id) for item in op_parsed_list: full_op_name = item.get('full_op_name', '') - if 'output' in full_op_name: + if not full_op_name: + continue + splits = full_op_name.split('.') + if len(splits) <= GraphConst.OUTPUT_INDEX: + continue + if 'output' in splits[GraphConst.OUTPUT_INDEX]: output_data[full_op_name] = item else: input_data[full_op_name] = item return input_data, output_data -def get_node_tag(inputs): - """ - 基于inputs生成节点专属tag,一次判断节点是否相同 - """ - result = "" - for single_input in inputs: - info = '' - for item in single_input: - info += _get_single_tag(item) - result += info - return result + +def compare_data(data_dict1, data_dict2): + """ + 比较get_input_output中输出的结果是否结构一致,比较一致返回True + """ + if len(data_dict1) != len(data_dict2): + return False + # 用于比较两个节点是否相等的关键字段 + tag_keys = ['type', 'dtype', 'shape'] + for key1, key2 in zip(data_dict1, data_dict2): + dict1 = data_dict1[key1] + dict2 = data_dict2[key2] + for tag_key in tag_keys: + tag_value1 = dict1.get(tag_key, None) + tag_value2 = dict2.get(tag_key, None) + if tag_value1 != tag_value2: + return False + return True + def format_node_data(data_dict): """ @@ -102,6 +110,7 @@ def format_node_data(data_dict): _format_data(value) return data_dict + def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_md5_compare): """ 调用acc_compare.py中的get_accuracy获得精度对比指标 @@ -114,12 +123,6 @@ def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_m get_accuracy(result, merge_n, merge_b, is_summary_compare, is_md5_compare) return result -def _get_single_tag(item): - if isinstance(item, dict): - return str(item.get('type', GraphConst.TAG_NA)) + '_' + str(item.get('dtype', GraphConst.TAG_NA)) + '_' + str(item.get('shape', GraphConst.TAG_NA)) - elif isinstance(item, (list, tuple)): - return str([_get_single_tag(sub_item) for sub_item in item]) - return '' def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_compare): """ @@ -133,6 +136,7 @@ def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_ op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) return merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + def _format_decimal_string(s): """ 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 @@ -156,6 +160,7 @@ def _format_decimal_string(s): s = s.replace(match, formatted_number) return s + def _format_data(data_dict): """ 格式化数据,小数保留6位,处理一些异常值 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index 9d5e3aab4f1..18c6bac6ba8 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -15,7 +15,7 @@ import json from ....core.utils import CompareConst, Const -from ..utils import ToolTip, GraphConst, convert_percentage_to_float +from ..utils import ToolTip, GraphConst, str2float class ModeAdapter: @@ -91,7 +91,7 @@ class ModeAdapter: ModeAdapter._match_data(value, compare_data, key_list, id_list) # 相对误差大于0.5疑似有精度问题 for item in key_list[4:]: - relative_err = convert_percentage_to_float(value.get(item)) + relative_err = str2float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value if max_relative_err > GraphConst.MAX_RELATIVE_ERR_TH: diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 84bba4d17b2..74868f83dbf 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -14,7 +14,7 @@ # limitations under the License. from ..utils import Suggestions, GraphConst -from ..builder.msprobe_adapter import format_node_data +from ..builder.msprobe_adapter import format_node_data, compare_data class BaseNode: @@ -28,12 +28,21 @@ class BaseNode: self.add_upnode(up_node) self.subnodes = [] self.matched_node_link = [] - self.tag = '' self.suggestions = {} def __str__(self): info = f'id:\t{self.id}' return info + + def __eq__(self,other): + """ + 用来判断两个节点是否可以被匹配上,认为结构上是否一致 + """ + if not compare_data(self.input_data, other.input_data): + return False + if not compare_data(self.output_data, other.output_data): + return False + return True def get_suggestions(self): """ @@ -46,10 +55,9 @@ class BaseNode: self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.API self.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL - def set_input_output_tag(self, input_data, output_data, tag): + def set_input_output(self, input_data, output_data): self.input_data = input_data self.output_data = output_data - self.tag = tag def add_upnode(self, node): """ @@ -70,7 +78,7 @@ class BaseNode: self.matched_node_link = ancestors node.matched_node_link = ancestors - def get_yaml_dict(self): + def to_dict(self): """ 输出数据 """ diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index b53a691c02f..7af11fceec0 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -39,9 +39,8 @@ class Graph: if not node_n or node_n.id not in graph_b.node_map: return None, [] node_b = graph_b.node_map.get(node_n.id) - if node_n.tag != node_b.tag: + if node_n != node_b: return None, [] - ancestors_n = node_n.get_ancestors() ancestors_b = node_b.get_ancestors() if ancestors_n != ancestors_b: @@ -50,7 +49,7 @@ class Graph: @staticmethod def dfs(node, result): - info = node.get_yaml_dict() + info = node.to_dict() result[node.id] = info for subnode in node.subnodes: Graph.dfs(subnode, result) @@ -64,7 +63,7 @@ class Graph: up_node:对应节点的父节点 """ if node_id in self.node_map: - return self.node_map.get(node_id) + return node = BaseNode(node_op, node_id, up_node) self.node_map[node_id] = node @@ -74,7 +73,7 @@ class Graph: """ return self.node_map.get(node_id, None) - def get_dict(self): + def to_dict(self): """ 用于数据输出 """ @@ -82,11 +81,11 @@ class Graph: result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' result[GraphConst.JSON_NODE_KEY] = {} for node_id in self.node_map: - info = self.node_map.get(node_id).get_yaml_dict() + info = self.node_map.get(node_id).to_dict() result[GraphConst.JSON_NODE_KEY][node_id] = info return result - def get_dict2(self): + def to_dict2(self): # 输出件优化 # 为了防止输出件变动临时使用方法,会在后续重构中删除 # 递归遍历,在正式交付中尽量避免 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index ad7be35f0cf..a5bf8a44388 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -15,22 +15,23 @@ from enum import Enum import re +from ..builder.msprobe_adapter import op_patterns class NodeOp(Enum): module = 1 function_api = 2 - module_api = 3 - tensor = 4 - output = 5 @staticmethod def get_node_op(node_name: str): """ 基于代表节点的字符串,解析节点种类 """ - pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' - if re.match(pattern, node_name): - return NodeOp.function_api - else: - return NodeOp.module + for op in NodeOp: + index = op.value - 1 + if index < 0 or index >= len(op_patterns): + raise Exception("NodeOp and op_patterns in MsprobeAdapter do not match") + pattern = op_patterns[index] + if re.match(pattern, node_name): + return op + raise Exception("Cannot parse node_name {node_name} into NodeOp") diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/atat/pytorch/visualization/test.py index dcd0b9d1f91..9784a96b808 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/test.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/test.py @@ -38,7 +38,7 @@ def compare_graph(dump_path_n, dump_path_b, out_path): graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) graph_comparator.compare() output_path = os.path.join(out_path, 'compare.vis') - GraphBuilder.export_to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) + GraphBuilder.to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) def build_graph(dump_path, out_path): @@ -46,7 +46,7 @@ def build_graph(dump_path, out_path): data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) output_path = os.path.join(out_path, 'build.vis') graph = GraphBuilder.build(construct_path, data_path, 'TestNet') - GraphBuilder.export_to_json(output_path, graph) + GraphBuilder.to_json(output_path, graph) def run_st(data_path): diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 6505ce29900..692ac63270c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -59,7 +59,7 @@ def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): result_to_csv(md5_compare, summary_compare, stack, csv_data, f) -def convert_percentage_to_float(percentage_str): +def str2float(percentage_str): """ 百分比字符串转换转换为浮点型 Args: @@ -67,7 +67,7 @@ def convert_percentage_to_float(percentage_str): Returns: float 0.00, 0.234 """ try: - percentage_str = percentage_str.replace('%', '') + percentage_str = percentage_str.strip('%') return float(percentage_str) / 100 except ValueError: return 0 @@ -118,3 +118,4 @@ class GraphConst: JSON_INDEX_KEY = 'precision_index' SUGGEST_KEY = 'text' TAG_NA = 'na' + OUTPUT_INDEX = -2 -- Gitee From 1744839f6e88fecb4c5e3f14d014c6d885760fcf Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:30:49 +0800 Subject: [PATCH 026/333] =?UTF-8?q?=E9=80=82=E9=85=8Dacc=5Fcompare?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/atat/pytorch/__init__.py | 3 +- .../visualization/builder/msprobe_adapter.py | 7 +++-- .../visualization/compare/graph_comparator.py | 31 +++++++------------ .../atat/pytorch/visualization/utils.py | 8 ++--- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/__init__.py b/debug/accuracy_tools/atat/pytorch/__init__.py index 198cea96de8..dcdf4cb3a3a 100644 --- a/debug/accuracy_tools/atat/pytorch/__init__.py +++ b/debug/accuracy_tools/atat/pytorch/__init__.py @@ -2,4 +2,5 @@ from .debugger.precision_debugger import PrecisionDebugger from .common.utils import seed_all from .compare.acc_compare import compare from .compare.distributed_compare import compare_distributed -from .visualization.json_parse_graph import compare_graph, build_graph +from .visualization.builder.graph_builder import GraphBuilder +from .visualization.compare.graph_comparator import GraphComparator diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 960e1c4f1a0..56f9e4da615 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -50,7 +50,7 @@ def run_real_data(dump_path_param, csv_path): dump_path_param: 调用acc_compare接口所以来的参数结构 csv_path: 生成文件路径 """ - _do_multi_process(dump_path_param, csv_path) + return _do_multi_process(dump_path_param, csv_path) def get_input_output(node_data, node_id): @@ -134,7 +134,10 @@ def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_ {'full_op_name': node_id, 'full_info': stack_json_data[node_id]}) else: op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) - return merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + result = merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + if not result: + result['op_name'] = [] + return result def _format_decimal_string(s): diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 5426679ae88..94c6f76bb96 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -17,7 +17,7 @@ import os import pandas as pd from ....core.utils import Const, print_info_log from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data -from ..utils import GraphConst, load_json_file, load_data_json_file, write_csv_data +from ..utils import GraphConst, load_json_file, load_data_json_file, get_csv_df from ..graph.graph import Graph from .mode_adapter import ModeAdapter @@ -83,25 +83,16 @@ class GraphComparator: def _postcompare(self): if not self.ma.is_real_data_compare(): return - csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) - try: - write_csv_data(csv_path, self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) - run_real_data(self.dump_path_param, csv_path) - # 从csv文件读取精度指标,添加到node节点中 - df = pd.read_csv(csv_path) - compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} - for node in self.ma.compare_nodes: - precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) - node.data[GraphConst.JSON_STATUS_KEY] = precision_status - node.data[GraphConst.JSON_INDEX_KEY] = precision_index - if not precision_status: - self.ma.add_error_key(node.output_data) - node.add_suggestions() - except (FileNotFoundError, IOError) as e: - print_info_log('File error in _postcompare: {e}') - finally: - if os.path.isfile(csv_path): - os.remove(csv_path) + df = get_csv_df(self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) + df = run_real_data(self.dump_path_param, df) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.ma.compare_nodes: + precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + if not precision_status: + self.ma.add_error_key(node.output_data) + node.add_suggestions() def _compare_nodes(self, node_n): #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 692ac63270c..3dda565388b 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -49,14 +49,11 @@ def save_json_file(file_path, data): f.write(json.dumps(data, indent=4)) -def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): +def get_csv_df(md5_compare, summary_compare, stack, csv_data): """ 调用acc接口写入csv """ - if os.path.exists(csv_path): - os.remove(csv_path) - with os.fdopen(os.open(csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), 'w+') as f: - result_to_csv(md5_compare, summary_compare, stack, csv_data, f) + return result_to_csv(md5_compare, summary_compare, stack, csv_data, None) def str2float(percentage_str): @@ -99,7 +96,6 @@ class GraphConst: DUMP_FILE = 'dump.json' STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' - CSV_FILE = 'tmp.csv' ERROR_KEY = 'error_key' SUMMARY_COMPARE = 0 MD5_COMPARE = 1 -- Gitee From f1f56e778ef60787b3aed8217805920be96b5eee Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Thu, 11 Jul 2024 12:31:09 +0800 Subject: [PATCH 027/333] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/graph_builder.py | 2 +- .../visualization/builder/msprobe_adapter.py | 14 +++++++------- .../visualization/compare/graph_comparator.py | 5 ----- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 85634a461c4..9edc260589b 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -77,7 +77,7 @@ class GraphBuilder: node_data = data_dict.get(name, {}) # 添加输入输出数据 input_data, output_data = get_input_output(node_data, node.id) - # 跟新数据 + # 更新数据 node.set_input_output(input_data, output_data) # 添加节点 node.add_upnode(upnode) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 56f9e4da615..cb39538ac9d 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -30,7 +30,7 @@ def get_compare_mode(dump_path_param): """ 获得比较模式,包括summary、MD5和真实数据三种模式 Args: - dump_path_param: 调用acc_compare接口所以来的参数结构 + dump_path_param: 调用acc_compare接口所依赖的参数 Returns: 0 summary mode, 1 md5 mode, 2 true data mode """ summary_compare, md5_compare = task_dumppath_get(dump_path_param) @@ -47,7 +47,7 @@ def run_real_data(dump_path_param, csv_path): """ 多进程运行生成真实数据 Args: - dump_path_param: 调用acc_compare接口所以来的参数结构 + dump_path_param: 调用acc_compare接口所依赖的参数 csv_path: 生成文件路径 """ return _do_multi_process(dump_path_param, csv_path) @@ -77,17 +77,17 @@ def get_input_output(node_data, node_id): return input_data, output_data -def compare_data(data_dict1, data_dict2): +def compare_data(data_dict_list1, data_dict_list2): """ 比较get_input_output中输出的结果是否结构一致,比较一致返回True """ - if len(data_dict1) != len(data_dict2): + if len(data_dict_list1) != len(data_dict_list2): return False # 用于比较两个节点是否相等的关键字段 tag_keys = ['type', 'dtype', 'shape'] - for key1, key2 in zip(data_dict1, data_dict2): - dict1 = data_dict1[key1] - dict2 = data_dict2[key2] + for key1, key2 in zip(data_dict_list1, data_dict_list2): + dict1 = data_dict_list1[key1] + dict2 = data_dict_list2[key2] for tag_key in tag_keys: tag_value1 = dict1.get(tag_key, None) tag_value2 = dict2.get(tag_key, None) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 94c6f76bb96..221b091ec60 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -31,11 +31,6 @@ class GraphComparator: def compare(self): """ 比较函数,初始化结束后单独调用。比较结果写入graph_n - Args: - construct_path: construct.json路径 - data_path: dump.json路径 - model_name: 模型名字,依赖外部输入 - Returns: Graph,代表图的数据结构 """ self._compare_nodes(self.graph_n.root) self._postcompare() -- Gitee From ae12e7df789067d3d80d3d57153d54f17f1b8412 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 15 Jul 2024 16:51:36 +0800 Subject: [PATCH 028/333] =?UTF-8?q?=E5=A4=84=E7=90=86=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E5=B7=B2=E7=9F=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 6 ++++-- .../atat/pytorch/visualization/compare/graph_comparator.py | 4 ++-- .../atat/pytorch/visualization/graph/base_node.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index cb39538ac9d..c73a7c5879f 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -153,8 +153,6 @@ def _format_decimal_string(s): # 如果小数位数大于6,进行处理 if len(decimal_part) > GraphConst.ROUND_TH: number_float = float(number_str) - if is_percent: - number_float /= 100 formatted_number = f"{number_float:.{GraphConst.ROUND_TH}f}" # 如果原来是百分数,加回百分号 if is_percent: @@ -168,6 +166,7 @@ def _format_data(data_dict): """ 格式化数据,小数保留6位,处理一些异常值 """ + pattern = r'^[+-]?(\d+(.\d*)?|.\d+)([eE][+-]?\d+)$' for key, value in data_dict.items(): if isinstance(value, str): # 将单引号删掉,None换成null避免前端解析错误 @@ -175,6 +174,9 @@ def _format_data(data_dict): value = _format_decimal_string(value) if value is None or value == ' ': value = 'null' + # 科学计数法1.123123123123e-11,格式化为1.123123e-11 + if isinstance(value, float) and re.match(pattern, str(value)): + value = "{:.6e}".format(value) if isinstance(value, float): value = round(value, GraphConst.ROUND_TH) if not isinstance(value, (list, tuple, dict, str)): diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 221b091ec60..18b905f338d 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -59,7 +59,7 @@ class GraphComparator: node.data.update(other_dict) if not precision_status: self.ma.add_error_key(node.output_data) - node.add_suggestions() + node.get_suggestions() def _parse_param(self, data_paths, stack_path, output_path): self.dump_path_param = { @@ -87,7 +87,7 @@ class GraphComparator: node.data[GraphConst.JSON_INDEX_KEY] = precision_index if not precision_status: self.ma.add_error_key(node.output_data) - node.add_suggestions() + node.get_suggestions() def _compare_nodes(self, node_n): #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 74868f83dbf..21b1db7fc24 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .node_op import NodeOp from ..utils import Suggestions, GraphConst from ..builder.msprobe_adapter import format_node_data, compare_data @@ -34,7 +35,7 @@ class BaseNode: info = f'id:\t{self.id}' return info - def __eq__(self,other): + def __eq__(self, other): """ 用来判断两个节点是否可以被匹配上,认为结构上是否一致 """ -- Gitee From 2b3630d2f641ff34356368000f3709068fd96303 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 15 Jul 2024 17:34:47 +0800 Subject: [PATCH 029/333] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index c73a7c5879f..77bda681404 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -172,13 +172,13 @@ def _format_data(data_dict): # 将单引号删掉,None换成null避免前端解析错误 value = value.replace("'", "").replace('None', 'null') value = _format_decimal_string(value) - if value is None or value == ' ': + elif value is None or value == ' ': value = 'null' # 科学计数法1.123123123123e-11,格式化为1.123123e-11 - if isinstance(value, float) and re.match(pattern, str(value)): + elif isinstance(value, float) and re.match(pattern, str(value)): value = "{:.6e}".format(value) - if isinstance(value, float): + elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) - if not isinstance(value, (list, tuple, dict, str)): + elif not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value -- Gitee From bfb6286b7dac55fff62b027c12ebb26054f05a88 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 16 Jul 2024 11:18:07 +0800 Subject: [PATCH 030/333] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 2 +- debug/accuracy_tools/atat/pytorch/visualization/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 77bda681404..738f8dac54c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -175,7 +175,7 @@ def _format_data(data_dict): elif value is None or value == ' ': value = 'null' # 科学计数法1.123123123123e-11,格式化为1.123123e-11 - elif isinstance(value, float) and re.match(pattern, str(value)): + elif isinstance(value, float) and len(value) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): value = "{:.6e}".format(value) elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 3dda565388b..9a26c2cdfe9 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -115,3 +115,4 @@ class GraphConst: SUGGEST_KEY = 'text' TAG_NA = 'na' OUTPUT_INDEX = -2 + STR_MAX_LEN = 50 -- Gitee From 50b6eb72ea1d567d289ad8928d6ae6f92f039201 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 19 Jul 2024 10:56:19 +0800 Subject: [PATCH 031/333] =?UTF-8?q?=E5=B0=8F=E5=80=BC=E5=9F=9F=E4=B8=8D?= =?UTF-8?q?=E6=98=BE=E7=A4=BA=E7=9B=B8=E5=AF=B9=E8=AF=AF=E5=B7=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/compare/mode_adapter.py | 8 ++++++-- debug/accuracy_tools/atat/pytorch/visualization/utils.py | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index 18c6bac6ba8..b291db60d8c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -89,8 +89,12 @@ class ModeAdapter: headers = CompareConst.SUMMARY_COMPARE_RESULT_HEADER id_list = [headers.index(x) for x in key_list] ModeAdapter._match_data(value, compare_data, key_list, id_list) - # 相对误差大于0.5疑似有精度问题 - for item in key_list[4:]: + # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 + for index, item in enumerate(key_list[4:]): + value_diff = value.get(index) + if abs(value_diff) < GraphConst.SMALL_VALUE: + value[index] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) + continue relative_err = str2float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 9a26c2cdfe9..3e07122a943 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -80,6 +80,7 @@ class ToolTip: COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' MAX_ABS_ERR = '当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001' MAX_RELATIVE_ERR = '当最大相对误差越接近0表示其计算的误差越小。当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象' + SMALL_VALUE_TIP = '{} 小于1e-3,不计算相对误差' class Suggestions: @@ -116,3 +117,4 @@ class GraphConst: TAG_NA = 'na' OUTPUT_INDEX = -2 STR_MAX_LEN = 50 + SMALL_VALUE = 1e-3 -- Gitee From e3af5d50fe6a6c8562ee8f890e6d8640c6757eb0 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 19 Jul 2024 12:43:07 +0800 Subject: [PATCH 032/333] bugfix --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 2 +- .../atat/pytorch/visualization/compare/mode_adapter.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 738f8dac54c..2b77b7c5bf9 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -175,7 +175,7 @@ def _format_data(data_dict): elif value is None or value == ' ': value = 'null' # 科学计数法1.123123123123e-11,格式化为1.123123e-11 - elif isinstance(value, float) and len(value) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): + elif isinstance(value, float) and len(str(value)) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): value = "{:.6e}".format(value) elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index b291db60d8c..f4b56494f02 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -91,9 +91,9 @@ class ModeAdapter: ModeAdapter._match_data(value, compare_data, key_list, id_list) # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 for index, item in enumerate(key_list[4:]): - value_diff = value.get(index) - if abs(value_diff) < GraphConst.SMALL_VALUE: - value[index] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) + value_diff = value.get(key_list[index]) + if isinstance(value_diff, float) and abs(value_diff) < GraphConst.SMALL_VALUE: + value[item] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) continue relative_err = str2float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) -- Gitee From 794ae5f397f3989ee156fc4890c3ad79a9a27405 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 19 Jul 2024 15:17:25 +0800 Subject: [PATCH 033/333] =?UTF-8?q?=E7=BB=9D=E5=AF=B9=E8=AF=AF=E5=B7=AE?= =?UTF-8?q?=E4=B8=BA0=E6=97=B6=E8=AE=A1=E7=AE=97=E7=9B=B8=E5=AF=B9?= =?UTF-8?q?=E8=AF=AF=E5=B7=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/compare/mode_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index f4b56494f02..c26c6759ab1 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -92,7 +92,7 @@ class ModeAdapter: # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 for index, item in enumerate(key_list[4:]): value_diff = value.get(key_list[index]) - if isinstance(value_diff, float) and abs(value_diff) < GraphConst.SMALL_VALUE: + if isinstance(value_diff, float) and value_diff != 0 and abs(value_diff) < GraphConst.SMALL_VALUE: value[item] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) continue relative_err = str2float(value.get(item)) -- Gitee From 298e6dc98322734303252dd9c007f4dde9f01628 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Tue, 23 Jul 2024 14:45:04 +0800 Subject: [PATCH 034/333] =?UTF-8?q?=E6=9B=BF=E6=8D=A2graph=20to=5Fdict?= =?UTF-8?q?=E6=96=B9=E6=B3=95=EF=BC=8C=E9=80=92=E5=BD=92=E6=94=B9=E9=81=8D?= =?UTF-8?q?=E5=8E=86=EF=BC=9B=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99=E7=9A=84?= =?UTF-8?q?json.dumps=EF=BC=9B=E6=B8=85=E7=90=86=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E7=9A=84node=E6=95=B0=E6=8D=AE=E8=BE=93=E5=87=BA=EF=BC=9BNodeO?= =?UTF-8?q?p=E4=B8=8B=E6=A0=87=E4=BF=AE=E6=94=B9=E4=B8=BA=E4=BB=8E0?= =?UTF-8?q?=E5=BC=80=E5=A7=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/graph_builder.py | 6 +++--- .../atat/pytorch/visualization/compare/mode_adapter.py | 3 +-- .../atat/pytorch/visualization/graph/base_node.py | 6 ------ .../atat/pytorch/visualization/graph/graph.py | 10 ---------- .../atat/pytorch/visualization/graph/node_op.py | 6 +++--- 5 files changed, 7 insertions(+), 24 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 9edc260589b..f623a48ae3b 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -48,10 +48,10 @@ class GraphBuilder: """ result = {} if graph_b: - result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict2() - result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict2() + result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict() + result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict() else: - result = graph_n.to_dict2() + result = graph_n.to_dict() if tool_tip: result[GraphConst.JSON_TIP_KEY] = tool_tip save_json_file(filename, result) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index c26c6759ab1..3ce2f414c59 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -208,5 +208,4 @@ class ModeAdapter: CompareConst.COSINE: ToolTip.COSINE, CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} - # 输出件优化 - return json.dumps(tips) + return tips diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 21b1db7fc24..f04f367f591 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -83,20 +83,14 @@ class BaseNode: """ 输出数据 """ - # 输出件优化 result = {} result['id'] = self.id result['node_type'] = self.op.value - result['type'] = self.id result['data'] = self.data result['output_data'] = format_node_data(self.output_data) result['input_data'] = format_node_data(self.input_data) - result['outputs'] = [] - result['inputs'] = [] result['upnode'] = self.upnode.id if self.upnode else 'None' result['subnodes'] = [node.id for node in self.subnodes] - result['is_forward'] = True - result['pair'] = 'None' result['matched_node_link'] = self.matched_node_link result['suggestions'] = self.suggestions return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index 7af11fceec0..6bae10ad3fc 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -84,13 +84,3 @@ class Graph: info = self.node_map.get(node_id).to_dict() result[GraphConst.JSON_NODE_KEY][node_id] = info return result - - def to_dict2(self): - # 输出件优化 - # 为了防止输出件变动临时使用方法,会在后续重构中删除 - # 递归遍历,在正式交付中尽量避免 - result = {} - result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' - result[GraphConst.JSON_NODE_KEY] = {} - Graph.dfs(self.root, result[GraphConst.JSON_NODE_KEY]) - return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index a5bf8a44388..ed06e0ef733 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -19,8 +19,8 @@ from ..builder.msprobe_adapter import op_patterns class NodeOp(Enum): - module = 1 - function_api = 2 + module = 0 + function_api = 1 @staticmethod def get_node_op(node_name: str): @@ -28,7 +28,7 @@ class NodeOp(Enum): 基于代表节点的字符串,解析节点种类 """ for op in NodeOp: - index = op.value - 1 + index = op.value if index < 0 or index >= len(op_patterns): raise Exception("NodeOp and op_patterns in MsprobeAdapter do not match") pattern = op_patterns[index] -- Gitee From eb8904f2638f9ff784015dff4841f881dc26a6f7 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Tue, 23 Jul 2024 17:38:42 +0800 Subject: [PATCH 035/333] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E5=89=8D=E7=AB=AF=E6=97=A0=E6=B3=95=E8=A7=A3=E6=9E=90?= =?UTF-8?q?Inf=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 2b77b7c5bf9..adee140eeab 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -179,6 +179,7 @@ def _format_data(data_dict): value = "{:.6e}".format(value) elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) - elif not isinstance(value, (list, tuple, dict, str)): + # Inf会走入这里,确保转成Inf。另外给其他不符合预期的类型做兜底方案 + if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value -- Gitee From 40bdba690a3873b201c0505e52ecf67f53e662f4 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Thu, 25 Jul 2024 16:51:02 +0800 Subject: [PATCH 036/333] =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E7=A7=BB=E5=8A=A8=E5=88=B0msprobe=E5=8C=85=E5=86=85?= =?UTF-8?q?=EF=BC=8C=E9=80=82=E9=85=8D=E6=94=B9=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{atat => msprobe}/pytorch/visualization/__init__.py | 0 .../{atat => msprobe}/pytorch/visualization/builder/__init__.py | 0 .../pytorch/visualization/builder/graph_builder.py | 0 .../pytorch/visualization/builder/msprobe_adapter.py | 0 .../{atat => msprobe}/pytorch/visualization/compare/__init__.py | 0 .../pytorch/visualization/compare/graph_comparator.py | 0 .../pytorch/visualization/compare/mode_adapter.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/__init__.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/base_node.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/graph.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/node_op.py | 0 .../{atat => msprobe}/pytorch/visualization/test.py | 0 .../{atat => msprobe}/pytorch/visualization/utils.py | 0 13 files changed, 0 insertions(+), 0 deletions(-) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/builder/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/builder/graph_builder.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/builder/msprobe_adapter.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/compare/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/compare/graph_comparator.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/compare/mode_adapter.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/base_node.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/graph.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/node_op.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/test.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/utils.py (100%) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/builder/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/compare/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/test.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/test.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/utils.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/utils.py -- Gitee From 43a1ab1c3be7e4678baf2a29a36a138eeba0f41b Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 26 Jul 2024 14:17:24 +0800 Subject: [PATCH 037/333] =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E9=80=82=E9=85=8D=E5=B7=A5=E5=85=B7=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/msprobe_adapter.py | 4 ++-- .../pytorch/visualization/compare/graph_comparator.py | 3 --- .../pytorch/visualization/compare/mode_adapter.py | 2 +- .../msprobe/pytorch/visualization/test.py | 11 +++++------ .../msprobe/pytorch/visualization/utils.py | 4 +--- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py index 2b77b7c5bf9..4dfbf4c7fa6 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py @@ -14,9 +14,9 @@ # limitations under the License. import re -from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, task_dumppath_get, _do_multi_process +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, _do_multi_process +from ....core.common.utils import task_dumppath_get from ..utils import GraphConst -from ....core.utils import print_info_log # 用于将节点名字解析成对应的NodeOp的规则 diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py index 18b905f338d..3d5f2972468 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py @@ -13,9 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import pandas as pd -from ....core.utils import Const, print_info_log from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data from ..utils import GraphConst, load_json_file, load_data_json_file, get_csv_df from ..graph.graph import Graph diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py index c26c6759ab1..d8a7c8f21f8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py @@ -14,7 +14,7 @@ # limitations under the License. import json -from ....core.utils import CompareConst, Const +from ....core.common.const import CompareConst, Const from ..utils import ToolTip, GraphConst, str2float diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py index 9784a96b808..61f3b788f9a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py @@ -14,15 +14,14 @@ # limitations under the License. import os -import re import time import shutil import filecmp from .compare.graph_comparator import GraphComparator from .utils import GraphConst from .builder.graph_builder import GraphBuilder -from ...core.utils import print_info_log -from ...core.file_check_util import FileOpen, create_directory +from ...core.common.log import logger +from ...core.common.file_check import create_directory def compare_graph(dump_path_n, dump_path_b, out_path): @@ -53,7 +52,7 @@ def run_st(data_path): start_time = time.time() run_bench(data_path, 'output2') end_time = time.time() - print_info_log(f'run_st time cost: {end_time - start_time}') + logger.info(f'run_st time cost: {end_time - start_time}') # 比较output2的结果和output1 的bench结果差距 for data_dir in os.listdir(data_path): data_dir = os.path.join(data_path, data_dir) @@ -67,9 +66,9 @@ def run_st(data_path): file2 = os.path.join(output2, vis_file) result = filecmp.cmp(file1, file2) if result: - print_info_log('pass ' + file1) + logger.info('pass ' + file1) else: - print_info_log('not pass ' + file1) + logger.info('not pass ' + file1) def run_bench(data_path, output_dir): diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py index 3e07122a943..fb046f97586 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py @@ -14,9 +14,7 @@ # limitations under the License. import json -import os -import stat -from ...core.file_check_util import FileOpen +from ...core.common.file_check import FileOpen from ..compare.acc_compare import result_to_csv -- Gitee From 5961e71345bc701a187849e56efa0f3461a5d4f8 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 26 Jul 2024 17:58:47 +0800 Subject: [PATCH 038/333] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/pytorch/visualization/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py index 61f3b788f9a..165d54ce17e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py @@ -20,7 +20,7 @@ import filecmp from .compare.graph_comparator import GraphComparator from .utils import GraphConst from .builder.graph_builder import GraphBuilder -from ...core.common.log import logger +from ...pytorch.common.log import logger from ...core.common.file_check import create_directory -- Gitee From 2943fde2d13c6fac76cf598992088c4768de5eda Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 1 Aug 2024 09:58:14 +0800 Subject: [PATCH 039/333] update_profiler_pre_research_code --- .idea/workspace.xml | 81 ++++++ profiler/README.md | 1 + profiler/advisor/README.md | 11 +- profiler/advisor/analyzer/base_analyzer.py | 8 - .../computation/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_analyzer.py | 36 +++ .../ai_core_freq/ai_core_freq_checker.py | 100 ++++++++ .../computation/aicpu/aicpu_checker.py | 6 +- .../computation/profiling_analyzer.py | 9 +- .../dataloader/dataloader_analyzer.py | 30 +++ .../analyzer/dataloader/dataloader_checker.py | 84 ++++++ .../graph_fusion/graph_fusion_checker.py | 2 +- .../analyzer/overall/overall_analyzer.py | 45 ---- .../overall/overall_summary_analyzer.py | 240 ++++++++---------- .../analyzer/schedule/syncbn/__init__.py | 0 .../schedule/syncbn/syncbn_analyzer.py | 30 +++ .../schedule/syncbn/syncbn_checker.py | 70 +++++ .../schedule/synchronize_stream/__init__.py | 0 .../synchronize_stream_analyzer.py | 32 +++ .../synchronize_stream_checker.py | 89 +++++++ .../schedule/timeline_base_checker.py | 91 +++++++ profiler/advisor/common/analyzer_scopes.py | 4 + profiler/advisor/common/constant.py | 9 +- profiler/advisor/common/graph/graph_parser.py | 9 +- profiler/advisor/common/profiling/ge_info.py | 3 +- profiler/advisor/common/profiling/msprof.py | 3 +- .../advisor/common/profiling/op_summary.py | 4 +- profiler/advisor/common/profiling/tasktime.py | 4 +- .../advisor/common/timeline/fusion_ops_db.py | 6 +- profiler/advisor/config/config.ini | 1 + profiler/advisor/config/config.py | 7 + .../config/profiling_data_version_config.yaml | 17 +- .../advisor/dataset/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_dataset.py | 148 +++++++++++ .../advisor/dataset/profiling/device_info.py | 2 + .../dataset/profiling/profiling_dataset.py | 15 +- .../dataset/profiling/profiling_parser.py | 27 +- .../advisor/dataset/timeline_event_dataset.py | 176 ++++++++++--- .../html/templates/ai_core_frequency.html | 27 ++ .../html/templates/slow_dataloader.html | 18 ++ .../html/templates/sync_batchnorm.html | 30 +++ .../html/templates/synchronize_stream.html | 57 +++++ profiler/advisor/img/overall.png | Bin 64492 -> 49616 bytes profiler/advisor/img/overall_0.png | Bin 0 -> 56377 bytes profiler/advisor/interface/interface.py | 18 +- profiler/advisor/result/item.py | 2 +- profiler/advisor/result/result.py | 18 +- profiler/advisor/rules/dataloader.yaml | 9 + profiler/advisor/rules/sync_batchnorm.yaml | 41 +++ profiler/advisor/rules/synchronize.yaml | 8 + profiler/advisor/utils/utils.py | 64 ++++- profiler/cli/__init__.py | 2 +- profiler/cli/analyze_cli.py | 3 - profiler/cli/compare_cli.py | 2 + .../common_func/file_manager.py | 19 ++ profiler/compare_tools/README.md | 82 +++++- .../comparator/api_compare_comparator.py | 32 +++ .../comparator/kernel_compare_comparator.py | 35 +++ .../compare_bean/api_compare_bean.py | 47 ++++ .../compare_bean/kernel_compare_bean.py | 75 ++++++ .../origin_data_bean/kernel_details_bean.py | 6 + .../data_prepare/operator_data_prepare.py | 17 ++ .../disaggregate/overall_perf_interface.py | 28 +- .../generator/detail_performance_generator.py | 22 +- .../profiling_parser/base_profiling_parser.py | 19 +- .../profiling_parser/gpu_profiling_parser.py | 5 + .../profiling_parser/npu_profiling_parser.py | 24 ++ .../compare_backend/utils/args_manager.py | 13 +- .../compare_backend/utils/compare_args.py | 4 + .../compare_backend/utils/constant.py | 7 +- .../compare_backend/utils/excel_config.py | 48 +++- .../compare_backend/utils/torch_op_node.py | 8 + .../compare_backend/utils/tree_builder.py | 3 +- .../view/work_sheet_creator.py | 12 +- profiler/compare_tools/img/OverallMetrics.png | Bin 0 -> 66941 bytes profiler/compare_tools/performance_compare.py | 2 + profiler/test/run_ut.py | 2 + .../test_dataloader_checker.py | 65 +++++ .../timeline_advice/test_syncbn_checker.py | 62 +++++ .../test_synchronize_stream.py | 55 ++++ .../compute_advice/test_frequency_advice.py | 145 +++++++++++ .../test_base_profiling_parser.py | 5 + 82 files changed, 2236 insertions(+), 305 deletions(-) create mode 100644 .idea/workspace.xml create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/__init__.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py create mode 100644 profiler/advisor/analyzer/dataloader/dataloader_analyzer.py create mode 100644 profiler/advisor/analyzer/dataloader/dataloader_checker.py delete mode 100644 profiler/advisor/analyzer/overall/overall_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/__init__.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py create mode 100644 profiler/advisor/analyzer/schedule/timeline_base_checker.py create mode 100644 profiler/advisor/dataset/ai_core_freq/__init__.py create mode 100644 profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py create mode 100644 profiler/advisor/display/html/templates/ai_core_frequency.html create mode 100644 profiler/advisor/display/html/templates/slow_dataloader.html create mode 100644 profiler/advisor/display/html/templates/sync_batchnorm.html create mode 100644 profiler/advisor/display/html/templates/synchronize_stream.html create mode 100644 profiler/advisor/img/overall_0.png create mode 100644 profiler/advisor/rules/dataloader.yaml create mode 100644 profiler/advisor/rules/sync_batchnorm.yaml create mode 100644 profiler/advisor/rules/synchronize.yaml create mode 100644 profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py create mode 100644 profiler/compare_tools/img/OverallMetrics.png create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py create mode 100644 profiler/test/ut/advisor/compute_advice/test_frequency_advice.py diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 00000000000..a364b7d06a1 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + { + "keyToString": { + "RunOnceActivity.OpenProjectViewOnStart": "true", + "RunOnceActivity.ShowReadmeOnStart": "true", + "last_opened_file_path": "D:/mycode/att" + } +} + + + + + + + + + + + + + + + + + + + + + + + + 1716885945639 + + + + \ No newline at end of file diff --git a/profiler/README.md b/profiler/README.md index 1669e3524e5..549ffefc14c 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -91,6 +91,7 @@ ascend pytorch profiler数据目录结构如下: | profiler版本 | 发布日期 | 下载链接 | 校验码 | | ------------ | ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | + | 1.2.0 | 2024-07-25 | [msprof_analyze-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.0/msprof_analyze-1.2.0-py3-none-any.whl) | 6a4366e3beca40b4a8305080e6e441d6ecafb5c05489e5905ac0265787555f37 | | 1.1.2 | 2024-07-12 | [msprof_analyze-1.1.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.2/msprof_analyze-1.1.2-py3-none-any.whl) | af62125b1f9348bf491364e03af712fc6d0282ccee3fb07458bc9bbef82dacc6 | | 1.1.1 | 2024-06-20 | [msprof_analyze-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.1/msprof_analyze-1.1.1-py3-none-any.whl) | 76aad967a3823151421153d368d4d2f8e5cfbcb356033575e0b8ec5acea8e5e4 | | 1.1.0 | 2024-05-28 | [msprof_analyze-1.1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.0/msprof_analyze-1.1.0-py3-none-any.whl) | b339f70e7d1e45e81f289332ca64990a744d0e7ce6fdd84a8d82e814fa400698 | diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md index c650f40b3ea..77027110559 100644 --- a/profiler/advisor/README.md +++ b/profiler/advisor/README.md @@ -36,11 +36,11 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 3. 查看结果。 - 分析结果输出相关简略建议到执行终端中,并生成`att_advisor_{timestamp}.html`和`att_advisor_{timestamp}.xlsx`文件供用户预览。 + 分析结果输出相关简略建议到执行终端中,并生成`mstt_advisor_{timestamp}.html`和`mstt_advisor_{timestamp}.xlsx`文件供用户预览。 - `att_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 + `mstt_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 - `att_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 + `mstt_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 执行终端输出示例如下: @@ -72,6 +72,7 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 | | block_dim_analysis | block dim算子调优 | | | operator_no_bound_analysis | operator no bound | | | graph | 融合算子图调优 | +| | freq_analysis | AI Core算子降频分析 | | scheduling | timeline_fusion_ops | 亲和API替换调优 | | | timeline_op_dispatch | 识别算子下发问题(路径3/路径5) | @@ -132,6 +133,8 @@ cluster模块的分析包含快慢卡和快慢链路分析,仅识别问题, overall模块的分析包含当前训练任务慢卡的性能拆解,按照计算、通信和下发三个维度进行耗时的统计,可以基于该分析识别到训练性能瓶颈是计算、通信还是下发问题,同样不提供调优建议。 +![输入图片说明](./img/overall_0.png) + ![输入图片说明](./img/overall.png) schedule模块包含亲和API、aclOpCompile、syncBatchNorm、SynchronizeStream等多项检测。 @@ -152,7 +155,7 @@ torch_npu.npu.config.allow_internal_format = False ![schedule_3](./img/schedule_3.png) -computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 +computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape、AI Core算子降频分析等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 ![computation_1](./img/computation_1.png) diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index e0e17320b33..ada1b0bf4f4 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -73,14 +73,6 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def optimize(self, **kwargs): pass - @abstractmethod - def make_record(self): - pass - - @abstractmethod - def make_render(self): - pass - def init_dataset_list(self)->None: dataset_cls_list = self.dataset_cls_list if len(dataset_cls_list) == 0: diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py b/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py new file mode 100644 index 00000000000..4f25deff7c0 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py @@ -0,0 +1,36 @@ +import logging + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqAnalyzer(BaseAnalyzer): + dataset_cls_list = [AICoreFreqDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = AICoreFreqDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + self.html = None + + @BaseAnalyzer.check_data((AICoreFreqDataset.get_key(),)) + def optimize(self, **kwargs): + if not Config().get_config("aic_frequency"): + logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") + return self.result + add_render_list = kwargs.get("add_render_list", True) + ai_core_freq_checker = AICoreFreqChecker() + ai_core_freq_checker.check_ai_core_freq(self.dataset) + if not ai_core_freq_checker.ai_core_freq_issues: + return self.result + ai_core_freq_checker.make_record(self.result) + self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list) + return self.result diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py new file mode 100644 index 00000000000..5ea4dbd7542 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py @@ -0,0 +1,100 @@ +import logging + +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import convert_to_float + +logger = logging.getLogger() + + +class AICoreFreqChecker: + DEFAULT_FREQ = 1800 + DECREASE_FREQ_RATIO = 0.05 + SHOW_TOPK_OPS = 10 + TOTAL_DURATION_INDEX = 2 + DECREASE_FREQ_RATIO_INDEX = 3 + + def __init__(self): + + self.ai_core_freq_issues = False + self.desc = "" + self.suggestions = "" + self.decrease_freq_ops = [] + self.headers = [] + self.op_freq = None + self.rank_id = None + self.stage = None + + def check_ai_core_freq(self, event_dataset: AICoreFreqDataset, rank_id=None, stage=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"): + logger.debug("Skip slow ai core frequency checker, " + "because no ai core frequency were recorded in trace_view.json") + return + + self.rank_id = rank_id + self.stage = stage + self.op_freq = event_dataset.op_freq + for op_name, op_info in self.op_freq.items(): + freq_list = op_info.get("freq_list", []) + if not freq_list: + continue + + op_count = op_info.get("count", 0) + op_total_duration = round(op_info.get("dur", 0), 2) + max_freq = max(self.DEFAULT_FREQ, convert_to_float(Config().get_config("aic_frequency"))) + + decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list)) + if decrease_freq_ratio >= self.DECREASE_FREQ_RATIO: + self.ai_core_freq_issues = True + self.decrease_freq_ops.append([op_name, op_count, op_total_duration, + f"{round(decrease_freq_ratio, 4):.2%}", + round(sum(freq_list) / len(freq_list), 2), + max(freq_list), min(freq_list)]) + + if self.decrease_freq_ops: + # 按算子总耗时和降频比率 降序排列 + self.decrease_freq_ops.sort(key= + lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), + reverse=True) + + self.desc = (f"{len(self.decrease_freq_ops)} operators are found during frequency reduction, and the reduction " + f"ratio is larger than {self.DECREASE_FREQ_RATIO}.") + if self.rank_id: + self.desc = f"For rank {self.rank_id}, " + self.desc.lower() + self.suggestions = "Please check the temperature or max power of your machine." + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem("AI Core Frequency", self.desc, [self.suggestions]) + result.add(OptimizeRecord(optimization_item)) + + self.headers = ["Operator name", "Count", "Total duration(us)", "AI CORE frequency decreased ratio", + "Average frequency", "Max frequency", "Min frequency"] + if self.rank_id: + self.headers = ["Rank id"] + self.headers + sub_table_name = "AI Core Frequency" if not self.stage else f"Stage-{self.stage}: AI Core Frequency" + result.add_detail(sub_table_name, headers=self.headers) + + for row in self.decrease_freq_ops: + if self.rank_id: + row = [self.rank_id] + row + result.add_detail(sub_table_name, detail=row) + + def make_render(self, html_render, add_render_list=True): + if self.SHOW_TOPK_OPS: + self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details." + return html_render.render_template(key="computation", + template_dir="templates", + template_name="ai_core_frequency.html", + desc=self.desc, + suggestion=self.suggestions, + headers=self.headers, + data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS], + add_render_list=add_render_list) diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py index 4eca1c6c027..0caede4b894 100644 --- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -3,13 +3,13 @@ import os from functools import partial from typing import List, Dict, Optional -import yaml from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker, logger from profiler.advisor.analyzer.schedule.fusion_ops.timeline_api_stack_checker import OpStackFinder from profiler.advisor.common import constant from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.cluster_analyse.common_func.file_manager import FileManager class AicpuChecker(OperatorChecker): @@ -47,8 +47,8 @@ class AicpuChecker(OperatorChecker): if not os.path.exists(rule_path): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) return {} - with open(rule_path, 'r') as f: - self.aicpu_rules = yaml.safe_load(f) + + self.aicpu_rules = FileManager.read_yaml_file(rule_path) self.filter_aicpu_rules(self.aicpu_rules) for checker_name, check_rule in self.aicpu_rules.items(): if not isinstance(check_rule, (list, dict,)): diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index 86826177007..2021bcd5765 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -1,19 +1,15 @@ import logging from abc import ABC -from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker -from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.utils.utils import get_supported_subclass logger = logging.getLogger() @@ -76,14 +72,15 @@ class BlockDimAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = BlockDimChecker(self.cann_version) - + class OperatorBoundAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = OperatorBoundChecker(self.cann_version) + class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) - self.checker = AicpuChecker(self.cann_version) \ No newline at end of file + self.checker = AicpuChecker(self.cann_version) diff --git a/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py new file mode 100644 index 00000000000..291c3a1f941 --- /dev/null +++ b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py @@ -0,0 +1,30 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.dataloader.dataloader_checker import DataloaderChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class DataloaderAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = TimelineEventDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + dataloader_checker = DataloaderChecker() + dataloader_checker.check_slow_dataloader(self.dataset) + dataloader_checker.make_record(self.result) + dataloader_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/dataloader/dataloader_checker.py b/profiler/advisor/analyzer/dataloader/dataloader_checker.py new file mode 100644 index 00000000000..eb1886284ef --- /dev/null +++ b/profiler/advisor/analyzer/dataloader/dataloader_checker.py @@ -0,0 +1,84 @@ +import os +import re +import logging +import yaml + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.cluster_analyse.common_func.file_manager import FileManager + +logger = logging.getLogger() + + +class DataloaderChecker: + + def __init__(self): + + self.dataloader_issues = False + self.optimization_item = [] + self.desc = "" + self.suggestions = [] + self.dataloader_duration_threshold = None + self._init_rule() + + def check_slow_dataloader(self, event_dataset: TimelineEventDataset): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "dataloader") or not getattr(event_dataset, "dataloader"): + logger.debug("Skip slow dataloader checker, because no dataloader duration larger than %s", + self.dataloader_duration_threshold) + return + for event in event_dataset.dataloader: + + dataloader_duration = float(event.dur) / 1000 + if dataloader_duration < self.dataloader_duration_threshold: + continue + self.desc = self.desc.format(dataloader_duration=dataloader_duration, + dataloader_duration_threshold=self.dataloader_duration_threshold) + self.dataloader_issues = True + + if re.search("singleprocess", event.name.lower()): + self.suggestions = self._reset_suggestions(["I/O", "num_workers"]) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.dataloader_issues: + return + + self.optimization_item.append(OptimizeItem("Slow dataloader", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.dataloader_issues: + return + html_render.render_template(key="dataloader", + template_dir="templates", + template_name="slow_dataloader.html", + desc=self.desc, + suggestions=self.suggestions) + + def _init_rule(self): + dataloader_rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), + "rules", + "dataloader.yaml" + ) + dataloader_rule = FileManager.read_yaml_file(dataloader_rule_path) + + self.dataloader_duration_threshold = dataloader_rule.get("dataloader_duration_threshold") + self.desc = dataloader_rule.get("problem") + self.suggestions = dataloader_rule.get("solutions") + + def _reset_suggestions(self, suggestion_pattern_list): + + suggestions = [] + for solution in self.suggestions: + for suggestion_pattern in suggestion_pattern_list: + if re.search(suggestion_pattern, solution): + suggestions.append(solution) + return suggestions diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py index e64020fdfe2..30bd4323795 100644 --- a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py @@ -149,7 +149,7 @@ class GraphFusionRules: optimization_item = OptimizeItem( "fusion issue", f"Found {len(self.candidates)} fusion issues", - ["Check fusion issues detail in att_advisor*.html"] + ["Check fusion issues detail in mstt_advisor*.html"] ) total_time = 0.0 for candidate in self.task_duration_list: diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py deleted file mode 100644 index 916a396b3d0..00000000000 --- a/profiler/advisor/analyzer/overall/overall_analyzer.py +++ /dev/null @@ -1,45 +0,0 @@ -import logging -from typing import Dict, List - -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult -from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface - -logger = logging.getLogger() - - -class OverallSummaryAnalyzer(BaseAnalyzer): - - def __init__(self, profiling_path, benchmark_profiling_path=None, **kwargs): - self.benchmark_profiling_path = benchmark_profiling_path or profiling_path - self.profiling_path = profiling_path - self.html_render = HTMLRender() - self.result = OptimizeResult() - - def optimize(self, **kwargs): - compare_result = ComparisonInterface(self.benchmark_profiling_path, self.profiling_path).compare( - Constant.OVERALL_COMPARE) - - headers = compare_result.get('Model Profiling Time Distribution').get("headers", []) - rows = compare_result.get('Model Profiling Time Distribution').get("rows", []) - - self.make_record() - self.make_render(headers=headers, rows=rows) - return compare_result - - def make_record(self): - pass - - def make_render(self, **kwargs): - headers = kwargs.get("headers") - rows = kwargs.get("rows") - - if not headers or not rows: - logger.info("Empty headers or rows, skip render overall analysis html") - self.html_render.render_template(key="overall", - template_dir="templates", - template_name="overall_analysis.html", - headers=kwargs.get("headers"), - rows=kwargs.get("rows")) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index c74ae051033..8e93dbda77d 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -13,27 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -import copy - -import logging -from typing import Dict, List +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.advisor.common import constant as const from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface -from profiler.advisor.utils.utils import get_file_path_from_directory, load_parameter class OverallSummaryAnalyzer(BaseAnalyzer): OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" advice_map = { - "Computing Time": "if you want more detailed advice please go to att_advisor_*.html", - "Uncovered Communication Time": "if you want more detailed advice please go to att_advisor_*.html", - "Free Time": "if you want more detailed advice please go to att_advisor_*.html" + "Computing Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Uncovered Communication Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Free Time": "if you want more detailed advice please go to mstt_advisor_*.html" } time_name_map = { "Computing Time": "computing", @@ -47,45 +41,37 @@ class OverallSummaryAnalyzer(BaseAnalyzer): 'SDMA Time(Num)': 'SDMA Time' } performance_time_dict = { - "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', - 'Flash Attention Time(Backward)(Num)', 'Other Time'], - "Uncovered Communication Time(Wait Time)": [], - "Free Time": ['SDMA Time(Num)'] + "Computing Time": "computing_time_ms", + " -- Flash Attention": "fa_time_ms", + " -- Conv": "conv_time_ms", + " -- Matmul": "matmul_time_ms", + " -- Vector": "vector_time_ms", + " -- SDMA(Tensor Move)": "tensor_move_time_ms", + " -- Other Cube": "other_cube_time_ms", + "Uncovered Communication Time": "uncovered_communication_time_ms", + " -- Wait": "wait_time_ms", + " -- Transmit": "transmit_time_ms", + "Free Time": "free_time_ms", + " -- SDMA": "sdma_time_ms", + " -- Free": "free_ms", + "E2E Time": "e2e_time_ms" } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): profile_path = get_profile_path(collection_path) super().__init__(profile_path, n_processes, **kwargs) - self.base_collection_path = kwargs.get("base_collection_path", "") - self._has_base_collection = False + self.benchmark_profiling_path = kwargs.get("benchmark_profiling_path", "") + self._has_benchmark_profiling = False self._is_minimal_profiling = False self.cur_data = {} - self.cur_data_table = {} self.cur_bottleneck = {} + self._disaggregate_perf = {} + self._disaggregate_benchmark_perf = {} self.cur_advices = "" - self._headers = [] - self._base_data = [] - self._comparison_data = [] self.html_render = HTMLRender() self.result = OptimizeResult() self.bottleneck_str = "" - self.bottleneck_table = {} - - @staticmethod - def split_duration_and_num(time_value: str) -> tuple: - split_data = time_value.split("s") # time value example: 0.229s(1756) - duration, num = 0.0, None - if len(split_data) >= 2: - try: - num = int(split_data[1].strip("()")) - except ValueError: - pass - if len(split_data) >= 1: - try: - duration = float(split_data[0]) - except ValueError: - print(f"[WARNING] Invalid time value: {time_value}.") - return duration, num + self.over_summary_analysis = {} @staticmethod def calculate_ratio(dividend, divisor): @@ -93,131 +79,121 @@ class OverallSummaryAnalyzer(BaseAnalyzer): return float("inf") return dividend / divisor + @staticmethod + def get_time_category_dict(overall_dict: dict): + time_category_dict = { + "Computing Time": round(overall_dict.get('computing_time_ms', 0.0), 3), + "Uncovered Communication Time": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), + "Free Time": round(overall_dict.get('free_time_ms', 0.0), 3) + } + return time_category_dict + def path_check(self): - if self.base_collection_path: - if os.path.exists(self.base_collection_path): - self._has_base_collection = True + if self.benchmark_profiling_path: + if os.path.exists(self.benchmark_profiling_path): + self._has_benchmark_profiling = True else: - print(f"[WARNING] Invalid path which not exists: {self.base_collection_path}.") + print(f"[WARNING] Invalid path which not exists: {self.benchmark_profiling_path}.") return os.path.exists(self.collection_path) def process(self): - base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path - result_data = ComparisonInterface(base_collection_path, self.collection_path).compare(Constant.OVERALL_COMPARE) - for data in result_data.values(): - self._headers = data.get("headers", []) - rows = data.get("rows", []) - if len(rows) == 2: - self._base_data = rows[0] - self._comparison_data = rows[1] - if not self._headers or not self._comparison_data: + self._disaggregate_perf = ComparisonInterface(self.collection_path).disaggregate_perf(Constant.OVERALL_COMPARE) + if not self._disaggregate_perf: return - self._is_minimal_profiling = 'E2E Time(Not minimal profiling)' not in self._headers - if self._has_base_collection: - self.cur_data["comparison_result"] = result_data - time_category_dict = {} - for time_category, time_list in self.performance_time_dict.items(): - time_value = self.get_time_value(time_category, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - duration, _ = self.split_duration_and_num(time_value) - time_category = time_category.split("(")[0] - time_category_dict[time_category] = duration - self.get_sub_category_time(time_category, time_list, duration) - self.cur_data["overall_data"] = time_category_dict - - def get_time_value(self, header_name: str, data_list: list): - try: - data_index = self._headers.index(header_name) - except ValueError: - return Constant.INVALID_VALUE - try: - time_value = data_list[data_index] - except IndexError: - return Constant.INVALID_VALUE - return time_value - - def get_sub_category_time(self, category: str, time_list: list, total_duration: float): - sub_time_dict = {} - for time_name in time_list: - time_value = self.get_time_value(time_name, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) - duration, num = self.split_duration_and_num(time_value) - sub_time_dict.setdefault(f"Duration(s)", []).append(duration) - sub_time_dict.setdefault(f"Duration Ratio", []).append( - "{:.2%}".format(self.calculate_ratio(duration, total_duration))) - sub_time_dict.setdefault(f"Kernel Number", []).append(num) - self.cur_data[self.time_name_map.get(category)] = sub_time_dict + self._is_minimal_profiling = self._disaggregate_perf.get("minimal_profiling", False) + self.cur_data["overall_data"] = self.get_time_category_dict(self._disaggregate_perf.get('overall', {})) + if self._has_benchmark_profiling: + self._disaggregate_benchmark_perf = ComparisonInterface( + self.benchmark_profiling_path).disaggregate_perf(Constant.OVERALL_COMPARE) def identify_bottleneck(self): overall_data = self.cur_data.get("overall_data") if not overall_data: return e2e_time = '%.3f' % sum([data for data in overall_data.values()]) - overall_bottleneck = f"The Model E2E Time is {e2e_time}s.\n" + overall_bottleneck = f"The Model E2E Time is {e2e_time}ms.\n" comparison_bottleneck = "" for time_type, time_value in overall_data.items(): - # add subtype time bottleneck - self.cur_bottleneck[self.time_name_map.get(time_type)] = f"{time_type} is {time_value}s.\n" # add overall bottleneck - overall_bottleneck += f" -- {time_type} is {time_value}s\n" + overall_bottleneck += f" -- {time_type} is {time_value}ms\n" if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, e2e_time) > 0.1: overall_bottleneck += "percentage of free time exceed the threshold 10%." - if not self._has_base_collection: + if not self._has_benchmark_profiling: continue # add comparison bottleneck - time_type_origin = "Uncovered Communication Time(Wait Time)" \ - if time_type == "Uncovered Communication Time" else time_type - base_duration, _ = self.split_duration_and_num(self.get_time_value(time_type_origin, self._base_data)) + base_duration = self.get_time_category_dict( + self._disaggregate_benchmark_perf.get('overall', {}) + ).get(time_type) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" self.cur_bottleneck["overall_data"] = overall_bottleneck if comparison_bottleneck: self.cur_bottleneck["comparison_result"] = comparison_bottleneck + def optimize(self, **kwargs): if self.path_check(): self.process() self.identify_bottleneck() self.format_bottleneck() - self.format_cur_data() + self.format_over_summary_analysis() self.make_record() self.make_render() return self.result def format_bottleneck(self): result = '' - headers = [] - data_list = [] - data = [] - for key, value in self.cur_bottleneck.items(): + for _, value in self.cur_bottleneck.items(): if not value: continue - result += f'{key}: {value} \n' - headers.append(key) - data.append(value) - data_list.append(data) + result += f'{value} \n' self.bottleneck_str = result - self.bottleneck_table["headers"] = headers - self.bottleneck_table["data"] = data_list - def format_cur_data(self): - if not self.cur_data: - return - for data_type, data in self.cur_data.items(): - if not data: - continue - if data_type not in list(self.time_name_map.values()): - data_list = list(data.values()) - else: - data_list = [','.join(map(str, value)) for value in data.values()] - headers = list(data.keys()) - data_table = {"headers": headers, "data": [data_list]} - self.cur_data_table[data_type] = copy.deepcopy(data_table) + def format_over_summary_analysis(self): + headers = ['Performance Index', 'Duration(ms)', 'Duration Ratio'] + performance_data = self.get_analysis_data(self._disaggregate_perf) + benchmark_data = self.get_analysis_data(self._disaggregate_benchmark_perf) + if self._has_benchmark_profiling: + headers.append('Diff Duration(ms)') + self.format_analysis_with_benchmark(performance_data, benchmark_data, headers) + else: + self.format_analysis_only(performance_data, headers) + + def get_analysis_data(self, data_dict: dict): + if not data_dict: + return {} + return { + **data_dict.get("overall"), + **data_dict.get("computing_time_disaggregate"), + **data_dict.get("communication_time_disaggregate"), + **data_dict.get("free_time_disaggregate"), + } + def format_analysis_only(self, performance_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res + + def format_analysis_with_benchmark(self, performance_data: dict, benchmark_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + row.append("{:.3f}".format(duration - benchmark_data.get(time_key, 0.0))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res def make_record(self): """ @@ -232,20 +208,23 @@ class OverallSummaryAnalyzer(BaseAnalyzer): ) self.result.add(OptimizeRecord(optimization_item)) - self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - for data_type, data_dict in self.cur_data_table.items(): - if data_dict: - self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) + self.result.add_detail( + OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + headers=self.over_summary_analysis["headers"] + ) + for data in self.over_summary_analysis["data"]: + self.result.add_detail(OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, detail=data) def make_render(self): if not self.bottleneck_str and not self.cur_advices: return + # 将\n替换为html换行 + bottleneck_str = self.bottleneck_str.replace('\n', '
') result_for_html = { - "Description" : self.bottleneck_str, - "suggestion" : self.cur_advices, - "details" : [self.bottleneck_table] + "Description": bottleneck_str, + "suggestion": self.cur_advices, + "details": [self.over_summary_analysis] } - self.html_render.render_template(key="overall", title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, template_dir="templates", @@ -254,9 +233,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): torch_version=self.torch_version, result=result_for_html) + def get_profile_path(collection_path): for root, dirs, files in os.walk(collection_path): for file in files: if file.startswith("profiler_info"): return root - return "" \ No newline at end of file + return "" diff --git a/profiler/advisor/analyzer/schedule/syncbn/__init__.py b/profiler/advisor/analyzer/schedule/syncbn/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py new file mode 100644 index 00000000000..fc6dfce5f0b --- /dev/null +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py @@ -0,0 +1,30 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class SyncBNAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, **kwargs): + super().__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + syncbn_checker = SyncBNChecker() + syncbn_checker.check_syncbn(self.timeline_event_dataset) + syncbn_checker.make_record(self.result) + syncbn_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py new file mode 100644 index 00000000000..83988c4e60b --- /dev/null +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py @@ -0,0 +1,70 @@ +import logging +import os + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.cluster_analyse.common_func.file_manager import FileManager + +logger = logging.getLogger() + + +class SyncBNChecker: + + def __init__(self): + self.optimization_item = [] + self.syncbn_issues = False + self.desc = "" + self.suggestions = [] + self.solutions = None + self.max_syncbn_num = None + self._init_rule() + + def check_syncbn(self, event_dataset: TimelineEventDataset): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "sync_batchnorm") or not getattr(event_dataset, "sync_batchnorm"): + logger.debug("Skip syncbn checker, because no syncbn found") + return + + syncbn_num = len(event_dataset.sync_batchnorm) + self.syncbn_issues = syncbn_num >= self.max_syncbn_num + self.desc = self.desc.format(syncbn_num=syncbn_num) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.syncbn_issues: + return + + self.optimization_item.append(OptimizeItem("SyncBatchNorm", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.syncbn_issues: + return + html_render.render_template(key="schedule", + template_dir="templates", + template_name="sync_batchnorm.html", + desc=self.desc, + solutions=self.solutions) + + def _init_rule(self): + syncbn_rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))), + "rules", + "sync_batchnorm.yaml" + ) + + syncbn_rule = FileManager.read_yaml_file(syncbn_rule_path) + + self.max_syncbn_num = syncbn_rule.get("max_syncbn_num") + self.desc = syncbn_rule.get("problem") + + self.solutions = syncbn_rule.get("solutions") + for solution in self.solutions: + for key, val in solution.items(): + self.suggestions.append(f"{key}, {val.get('desc')}") diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py b/profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py new file mode 100644 index 00000000000..88e55449c55 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py @@ -0,0 +1,32 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class SynchronizeStreamAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, **kwargs): + super().__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + + synchronize_stream_checker = SynchronizeStreamChecker() + synchronize_stream_checker.check_synchronize(self.timeline_event_dataset, kwargs.get("profiling_with_stack")) + synchronize_stream_checker.make_record(self.result) + synchronize_stream_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py new file mode 100644 index 00000000000..03d88d281ca --- /dev/null +++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py @@ -0,0 +1,89 @@ +import logging + +from profiler.advisor.common import constant as const +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.analyzer.schedule.timeline_base_checker import TimelineBaseChecker +from profiler.advisor.utils.utils import format_timeline_result + +logger = logging.getLogger() + + +class SynchronizeStreamChecker(TimelineBaseChecker): + + def __init__(self): + super().__init__(n_processes=1) + self.optimization_item = [] + self.synchronize_issues = False + self.desc = "" + self.suggestions = [] + self.solutions = [] + self.max_synchronize_num = None + + def check_synchronize(self, event_dataset: TimelineEventDataset, profiling_with_stack=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "synchronize_stream") or not getattr(event_dataset, "synchronize_stream"): + logger.debug("Skip synchronize stream checker, because no synchronize stream found") + return + + synchronize_num = event_dataset.synchronize_stream.total_count + slow_synchronize_stream = event_dataset.synchronize_stream.slow_synchronize_stream + total_slow_synchronize_time = sum((float(sync_stream.dur) for sync_stream in slow_synchronize_stream)) + + synchronize_stream_rule = event_dataset.synchronize_stream.rule + self.max_synchronize_num = synchronize_stream_rule.get("max_synchronize_num") + self.synchronize_issues = synchronize_num >= self.max_synchronize_num and len(slow_synchronize_stream) > 0 + if not self.synchronize_issues: + return + + for sync_stream in slow_synchronize_stream: + if sync_stream.name not in self._matched_op_index: + self._matched_op_index[sync_stream.name] = [] + self._matched_op_index[sync_stream.name].append(sync_stream.dataset_index) + self.query_stack(event_dataset, profiling_with_stack) + + self.desc = synchronize_stream_rule.get("problem") + self.desc = self.desc.format(synchronize_num=synchronize_num, + slow_synchronize_num=len(slow_synchronize_stream), + total_synchronize_stream_time=total_slow_synchronize_time) + + solutions = synchronize_stream_rule.get("solutions") + for solution in solutions: + renderer_solution = {} + for key, val in solution.items(): + if self.empty_stacks and self.framework_black_list: + # 如果堆栈源于torch, torch_npu等框架,则不提示修改的代码 + if "modify code" in key.lower(): + continue + self.suggestions.append(f"{key}, {val.get('desc')}") + renderer_solution.update({key: val}) + self.solutions.append(renderer_solution) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.synchronize_issues: + return + + self.optimization_item.append(OptimizeItem("SynchronizeStream", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.synchronize_issues: + return + + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + html_render.render_template(key="schedule", + template_dir="templates", + template_name="synchronize_stream.html", + desc=self.desc, + solutions=self.solutions, + result=format_result_for_html, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + empty_stacks=self.empty_stacks, + framework_black_list=self.framework_black_list) diff --git a/profiler/advisor/analyzer/schedule/timeline_base_checker.py b/profiler/advisor/analyzer/schedule/timeline_base_checker.py new file mode 100644 index 00000000000..8bc69150263 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/timeline_base_checker.py @@ -0,0 +1,91 @@ +from abc import ABC, abstractmethod +import multiprocessing +import logging + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult + +logger = logging.getLogger() + + +class TimelineBaseChecker(ABC): + + def __init__(self, n_processes: int = 1): + self.n_processes = n_processes + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.empty_stacks = True + self.framework_black_list = False + + @abstractmethod + def make_record(self, result: OptimizeResult): + pass + + @abstractmethod + def make_render(self, html_render): + pass + + def query_stack(self, event_dataset: TimelineEventDataset = None, profiling_with_stack: str = None): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + event_dataset = event_dataset if not profiling_with_stack else TimelineEventDataset( + collection_path=profiling_with_stack, data={}, _datasets={}, analysis_mode="fusion_ops", + build_dataset=False) + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op, stack in op_stack.items(): + if op not in self.matched_op_stacks: + self.matched_op_stacks[op] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op]: + self.matched_op_stacks[op][stack] = 0 + self.matched_op_stacks[op][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_ops = [] + for op, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_ops.append(op) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if not self._is_keep_stack(stack): + self.framework_black_list = True + logger.debug("Drop stack from framework %s", const.FRAMEWORK_STACK_BLACK_LIST) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op] = stack + + if matched_ops and not stack_record: + for op in matched_ops: + stack_record[op] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _is_keep_stack(self, stack): + # 过滤掉torch, torch_npu, megatron, deepspeed等框架下的堆栈,这些源码基本是不能被修改的 + stack_list = stack.replace("\\r\\n", ";").split(";") + if not stack_list: + return False + + final_called_stack = stack_list[0] + for framework in const.FRAMEWORK_STACK_BLACK_LIST: + if framework in final_called_stack.split("/"): + return False + return True diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 592f9d421e2..52e3e07554f 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -12,3 +12,7 @@ class SupportedScopes: BLOCK_DIM_ANALYSIS = "block_dim_analysis" OPERATOR_NO_BOUND_ANALYSIS = "operator_no_bound_analysis" TIMELINE_OP_DISPATCH = "timeline_op_dispatch" + DATALOADER = "dataloader" + SYNCBN = "syncbn" + SYNCHRONIZE_STREAM = "synchronize_stream" + FREQ_ANALYSIS = "freq_analysis" diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 697430ee6ca..87245a43ea3 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -26,6 +26,7 @@ ENQUEUE = "enqueue" TORCH_TO_NPU = "torch_to_npu" OP_COMPILE_NAME = "AscendCL@aclopCompileAndExecute" OP_COMPILE_ID = "aclopCompileAndExecute" +SYNC_STREAM = "AscendCL@aclrtSynchronizeStream" MAX_OP_COMPILE_NUM = 20 ACL_TO_NPU = "acl_to_npu" TASK_TYPE = "Task Type" @@ -111,7 +112,7 @@ HTTP_PREFIXES = "http://" HTTPS_PREFIXES = "https://" COMMON_YAML_DIR = "modelarts/solution/ma_advisor_rules/" COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" -INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" +INNER_ENDPOINT_SUFFIX = "obs.{}.ulanqab.huawei.com" AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" FUSION_PASS_YAML_NAME = "op_fusion_pass.yaml" @@ -138,4 +139,8 @@ CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" CLUSTER_COMM_JSON = "cluster_communication.json" BOTTLENECK = "bottleneck" -DATA = "data" \ No newline at end of file +DATA = "data" + +FRAMEWORK_STACK_BLACK_LIST = ["torch", "torch_npu", "megatron", "deepspeed"] +DISABLE_STREAMING_READER = "DISABLE_STREAMING_READER" +MAX_FILE_SIZE = 10**10 diff --git a/profiler/advisor/common/graph/graph_parser.py b/profiler/advisor/common/graph/graph_parser.py index d4c67fc1918..ef4dc4d681e 100644 --- a/profiler/advisor/common/graph/graph_parser.py +++ b/profiler/advisor/common/graph/graph_parser.py @@ -1,11 +1,12 @@ import os import logging -import yaml import itertools from collections import deque from dataclasses import dataclass from typing import List, Tuple, Dict +from profiler.cluster_analyse.common_func.file_manager import FileManager + logger = logging.getLogger() @@ -344,9 +345,9 @@ class QueryGraphParser: if not os.path.exists(rule_database): raise FileNotFoundError(f"Path {rule_database} does not exist.") - with open(rule_database, 'r') as f: - database = yaml.safe_load(f) - self.parse_yaml(database) + + database = FileManager.read_yaml_file(rule_database) + self.parse_yaml(database) def parse_yaml(self, yaml_database): fusion_strategy_list = yaml_database.get("GraphFusion", []) diff --git a/profiler/advisor/common/profiling/ge_info.py b/profiler/advisor/common/profiling/ge_info.py index 9996ec611a2..4fd5846d88d 100644 --- a/profiler/advisor/common/profiling/ge_info.py +++ b/profiler/advisor/common/profiling/ge_info.py @@ -17,12 +17,13 @@ class GeInfo(ProfilingParser): """ ge info file """ - FILE_PATTERN = r"ge_info.db" FILE_PATTERN_MSG = "ge_info.db" FILE_INFO = "ge info" STATIC_OP_STATE = "0" DYNAMIC_OP_STATE = "1" + file_pattern_list = [r"ge_info.db"] + def __init__(self, path: str) -> None: super().__init__(path) self.op_state_info_list = None diff --git a/profiler/advisor/common/profiling/msprof.py b/profiler/advisor/common/profiling/msprof.py index 9453986b822..750c5481e67 100644 --- a/profiler/advisor/common/profiling/msprof.py +++ b/profiler/advisor/common/profiling/msprof.py @@ -33,10 +33,11 @@ class Msprof(ProfilingParser): msprof """ - FILE_PATTERN = r"^msprof[_\d]+.json$" FILE_PATTERN_MSG = "msprof_*.json" FILE_INFO = "msprof" + file_pattern_list = [r"^msprof[_\d]+.json$"] + def __init__(self, path: str) -> None: super().__init__(path) self._tasks: List[TaskInfo] = [] diff --git a/profiler/advisor/common/profiling/op_summary.py b/profiler/advisor/common/profiling/op_summary.py index d79439dbad8..4744b5029ad 100644 --- a/profiler/advisor/common/profiling/op_summary.py +++ b/profiler/advisor/common/profiling/op_summary.py @@ -16,13 +16,13 @@ class OpSummary(ProfilingParser): """ op summary """ - - FILE_PATTERN = r"^op_summary_[_\d]+\.csv$" FILE_PATTERN_MSG = "op_summary_*.csv" FILE_INFO = "op summary" STATIC_OP_STATE = "static" DYNAMIC_OP_STATE = "dynamic" + file_pattern_list = [r"^op_summary_[_\d]+\.csv$"] + def __init__(self, path: str) -> None: super().__init__(path) self.op_list: List[OpInfo] = [] diff --git a/profiler/advisor/common/profiling/tasktime.py b/profiler/advisor/common/profiling/tasktime.py index 3ce09a78385..732ff0f3679 100644 --- a/profiler/advisor/common/profiling/tasktime.py +++ b/profiler/advisor/common/profiling/tasktime.py @@ -17,11 +17,11 @@ class TaskTime(ProfilingParser): """ task time info """ - - FILE_PATTERN = r"^task_time_[_\d]+\.json$" FILE_PATTERN_MSG = "task_time*.json" FILE_INFO = "task time" + file_pattern_list = [r"^task_time_[_\d]+\.json$"] + def __init__(self, path: str) -> None: super().__init__(path) self._tasks: List[TaskInfo] = [] diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py index 8637befd1ab..64cc849295f 100644 --- a/profiler/advisor/common/timeline/fusion_ops_db.py +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -1,13 +1,12 @@ import logging import os -import yaml - from profiler.advisor.common import constant from profiler.advisor.common.timeline.fusion_ops_rule import OpRule from profiler.advisor.common.timeline.fusion_ops_rule_handler import TimelineOpRuleHandler from profiler.advisor.utils.log import get_log_level from profiler.advisor.utils.utils import get_file_path_by_walk +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() logger.setLevel(get_log_level()) @@ -241,8 +240,7 @@ class FusionOperatorDB: logger.debug("The rule yaml file is successfully found in path: %s", os.path.abspath(file_path)) - with open(file_path, "rb") as file: - db_content = yaml.safe_load(file) + db_content = FileManager.read_yaml_file(file_path) if not self._is_version_supported(db_content): self.is_empty = True diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini index c56c1dad9f0..06e99316010 100644 --- a/profiler/advisor/config/config.ini +++ b/profiler/advisor/config/config.ini @@ -9,6 +9,7 @@ tune_ops_file = operator_tuning_file.cfg [THRESHOLD] # operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker operator_bound_ratio = 0.8 +frequency_threshold = 0.05 [RULE-BUCKET] # region : URL of different regions where can download rule yaml file cn-north-9 = cnnorth9-modelarts-sdk diff --git a/profiler/advisor/config/config.py b/profiler/advisor/config/config.py index 12f4526f8c9..4f36dfedfc8 100644 --- a/profiler/advisor/config/config.py +++ b/profiler/advisor/config/config.py @@ -97,6 +97,13 @@ class Config: """ return float(self.config.get("THRESHOLD", "operator_bound_ratio")) + @property + def frequency_threshold(self) -> float: + """ + frequency_threshold + """ + return float(self.config.get("THRESHOLD", "frequency_threshold")) + def set_log_path(self, result_file: str, log_path: str = None): self.log_path = log_path if log_path is not None else os.path.join(self._work_path, "log") os.makedirs(self.log_path, exist_ok=True) diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml index 4ef76105a07..b8c92fe074d 100644 --- a/profiler/advisor/config/profiling_data_version_config.yaml +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -1,18 +1,19 @@ versions: - version: 8.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: - mindstudio_profiler_output: - [ op_summary, msprof ] + mindstudio_profiler_output: [ op_summary, msprof ] class_attr: op_summary: OpSummary msprof: Msprof file_attr: - op_summary: ^op_summary_\d{14}\.csv$ msprof: ^msprof_\d{14}\.json$ + op_summary: [ kernel_details.csv, '^op_summary_\d{14}\.csv$' ] - version: 7.0.0 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -28,13 +29,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 7.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -50,13 +52,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 6.3.RC2 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -72,9 +75,7 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+\.csv$'] task_time: ^task_time_\d+_\d+\.json$ msprof: ^msprof_\d+_\d+\.json$ ge_info: ge_info.db - - diff --git a/profiler/advisor/dataset/ai_core_freq/__init__.py b/profiler/advisor/dataset/ai_core_freq/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py new file mode 100644 index 00000000000..c99baea6564 --- /dev/null +++ b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py @@ -0,0 +1,148 @@ +import json +import logging +import math +import os +import traceback + +import ijson +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.utils import convert_to_float, parse_json_with_generator +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqDataset: + + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: + + self._profiler_step = [] + self._ai_core_ops = [] + self._ai_core_freq: [TimelineEvent] = [] + self._previous_freq_index = -1 + + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) + + self.step = kwargs.get("step") + self.op_freq = {} + info = DeviceInfoParser(collection_path) + info.parse_data() + if not Config().get_config("aic_frequency"): + return + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + @property + def profiler_step(self): + return self._profiler_step + + @property + def ai_core_freq(self): + return self._ai_core_freq + + @property + def ai_core_ops(self): + return self._ai_core_ops + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): + + if len(self.timeline_data_list) == 0: + logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + if len(self.timeline_data_list) > 1: + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) + + _ = parse_json_with_generator(sorted(self.timeline_data_list)[0], self._add_event) + + target_ai_core_ops = self._get_target_ai_core_ops() + self._get_op_frequency(target_ai_core_ops) + return True + + def _add_profiler_step(self, event): + if event.name.startswith("ProfilerStep"): + self._profiler_step.append(event) + + def _add_ai_core_ops(self, event): + if event.args.get("Task Type") in ["MIX_AIC", "AI_CORE"]: + self._ai_core_ops.append(event) + + def _add_ai_core_freq(self, event): + if event.name == "AI Core Freq": + if self._previous_freq_index != -1: + self._ai_core_freq[self._previous_freq_index]["end"] = event.get("ts", float(math.inf)) + self._previous_freq_index += 1 + event.setdefault("end", float(math.inf)) + self._ai_core_freq.append(event) + + def _add_event(self, index, event): + event["dataset_index"] = index + if not isinstance(event, TimelineEvent): + event = TimelineEvent(event) + + self._add_profiler_step(event) + self._add_ai_core_ops(event) + self._add_ai_core_freq(event) + + return True + + def _get_target_ai_core_ops(self): + target_ai_core_ops = [] + if not self.step or f"ProfilerStep#{self.step}" not in [event.name for event in self._profiler_step]: + target_ai_core_ops = self._ai_core_ops + else: + for step_event in self._profiler_step: + if step_event.name != f"ProfilerStep#{self.step}": + continue + + for ai_core_op_event in self._ai_core_ops: + if step_event.ts_include(ai_core_op_event): + target_ai_core_ops.append(ai_core_op_event) + target_ai_core_ops = sorted(target_ai_core_ops, key=lambda x: float(x.ts)) + return target_ai_core_ops + + def _get_op_frequency(self, ai_core_ops): + ai_core_freq = sorted(self._ai_core_freq, key=lambda x: float(x.ts)) + + op_index, freq_index = 0, 0 + while op_index < len(ai_core_ops) and freq_index < len(ai_core_freq): + op_event = ai_core_ops[op_index] + op_end_time = convert_to_float(op_event.ts) + convert_to_float(op_event.dur) + op_freq_list = [] + while freq_index < len(ai_core_freq): + freq_event = ai_core_freq[freq_index] + if convert_to_float(freq_event.end) < op_end_time: + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + freq_index += 1 + continue + elif convert_to_float(freq_event.ts) < op_end_time: + if op_event.name not in self.op_freq: + self.op_freq[op_event.name] = {"count": 0, "dur": 0, "freq_list": []} + self.op_freq[op_event.name]["count"] += 1 + self.op_freq[op_event.name]["dur"] += convert_to_float(op_event.dur) + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + self.op_freq[op_event.name]["freq_list"].append(min(op_freq_list)) + break + else: + break + + op_index += 1 diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py index b58930777f9..110cd0794c6 100644 --- a/profiler/advisor/dataset/profiling/device_info.py +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -54,6 +54,8 @@ class DeviceInfoParser: config.set_config("device_id", device_info["id"]) if "aiv_num" in device_info: config.set_config("aiv_num", device_info["aiv_num"]) + if "aic_frequency" in device_info: + config.set_config("aic_frequency", device_info["aic_frequency"]) if "ai_core_num" in device_info: config.set_config("ai_core_num", device_info["ai_core_num"]) return True diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 46d4a4fe8b1..ebd90951abf 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -10,6 +10,7 @@ from profiler.advisor.common.profiling.tasktime import TaskTime from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser from profiler.advisor.utils.utils import join_prof_path +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -42,14 +43,21 @@ class ProfilingDataset(Dataset): self.build_from_pattern(value, join_prof_path(current_path, key)) elif isinstance(dirs_pattern, list): for item in dirs_pattern: + if hasattr(self, item) and getattr(self, item): + # 避免重复构建kernel_details.csv, op_summary.csv的数据对象 + continue + file_pattern_list = self.current_version_pattern.get('file_attr').get(item) data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] - data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) + if not hasattr(data_class, "file_pattern_list"): + continue + setattr(data_class, "file_pattern_list", self.current_version_pattern.get('file_attr').get(item)) data_object = data_class(current_path) is_success = data_object.parse_data() if is_success: setattr(self, item, data_object) else: - logger.warning("Skip parse %s from local path %s", self.current_version_pattern.get('class_attr').get(item), current_path) + logger.info("Skip parse %s with file pattern %s from local path %s", + self.current_version_pattern.get('class_attr').get(item), file_pattern_list, current_path) else: logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) @@ -69,8 +77,7 @@ class ProfilingDataset(Dataset): logger.warning("Skip parse profiling dataset, because %s does not exist.", config_path) return [] - with open(config_path, 'r') as f: - patterns = yaml.safe_load(f) + patterns = FileManager.read_yaml_file(config_path) return patterns diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py index bb4caeb29e5..51996617c2b 100644 --- a/profiler/advisor/dataset/profiling/profiling_parser.py +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -12,10 +12,10 @@ class ProfilingParser: """ profiling """ - FILE_PATTERN = "" FILE_PATTERN_MSG = "" FILE_INFO = "" - FILE_PATH = "" + + file_pattern_list = [] def __init__(self, path: str) -> None: self._path = path @@ -37,15 +37,20 @@ class ProfilingParser: return False def _parse_from_file(self): - file_list = get_file_path_from_directory(self._path, self.file_match_func(self.FILE_PATTERN)) - if not file_list: - return False - ## get last file - file = file_list[-1] - self.FILE_PATH = file - if len(file_list) > 1: - logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) - return self.parse_from_file(file) + + if not isinstance(self.file_pattern_list, list): + self.file_pattern_list = [self.file_pattern_list] + + for file_pattern in self.file_pattern_list: + file_list = get_file_path_from_directory(self._path, self.file_match_func(file_pattern)) + if not file_list: + continue + ## get last file + target_file = file_list[-1] + if len(file_list) > 1: + logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, target_file) + return self.parse_from_file(target_file) + return False @staticmethod def get_float(data) -> float: diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index d3889e4458f..1504e65f54f 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -1,14 +1,16 @@ +import json import logging -from typing import List +import os +from typing import List, Any +import traceback import ijson -from profiler.advisor.dataset.dataset import Dataset from tqdm import tqdm +import yaml from profiler.advisor.common import constant as const from profiler.advisor.common.timeline.event import TimelineEvent -from profiler.advisor.utils.utils import get_file_path_from_directory -from profiler.advisor.utils.utils import singleton +from profiler.advisor.utils.utils import get_file_path_from_directory, check_path_valid, singleton from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -39,37 +41,76 @@ class OpCompileCollector: self._total_op_compile_time = 0.0 +class SynchronizeStreamCollector: + + def __init__(self): + self._synchronize_stream_count = 0 + self._slow_synchronize_stream = [] + self.rule = SynchronizeStreamCollector._load_rule() + + @property + def total_count(self): + return self._synchronize_stream_count + + @property + def slow_synchronize_stream(self): + return self._slow_synchronize_stream + + @staticmethod + def _load_rule(): + sync_stream_rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "rules", + "synchronize.yaml") + + sync_stream_rule = FileManager.read_yaml_file(sync_stream_rule_path) + return sync_stream_rule + + def update_sync_stream_count(self): + self._synchronize_stream_count += 1 + + def append_slow_sync_stream(self, event): + if float(event.dur) / 1000 >= self.rule.get("slow_synchronize_threshold", 10): + self._slow_synchronize_stream.append(event) + + def unset(self): + self._synchronize_stream_count = 0 + self._slow_synchronize_stream = [] + + @singleton -class TimelineEventDataset(Dataset): +class TimelineEventDataset: - def __init__(self, collection_path, data: dict, **kwargs) -> None: + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: self._ops_with_task_type = {} self._ops_with_stack = {} self._ops_compile = OpCompileCollector() self._torch_to_npu = {} self._acl_to_npu = set() - self._aten: List[str] = [] - self._optimizer: List[str] = [] + self._aten: List[Any] = [] + self._optimizer: List[Any] = [] + self._dataloader: List[Any] = [] + self._sync_batchnorm: List[Any] = [] + self._synchronize_stream = SynchronizeStreamCollector() self.timeline_dir = collection_path - self.timeline_data_list = get_file_path_from_directory(collection_path, lambda file: file.endswith("trace_view.json")) + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) self.dataset_len = None self.analysis_mode = kwargs.get("analysis_mode") self.task_type = kwargs.get("task_type") - self.cann_version = kwargs.get("cann_version") - self.torch_version = kwargs.get("torch_version") - if self.analysis_mode in ["fusion_ops", "all"]: - logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", - self.cann_version, self.torch_version) + if not build_dataset: + return - super().__init__(collection_path, data) + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) if self.analysis_mode in ["op_stack", "all"]: self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) self._post_process() - @property def ops_with_stack(self): return self._ops_with_stack @@ -102,36 +143,60 @@ class TimelineEventDataset(Dataset): def aten(self): return self._aten - def _parse(self): + @property + def dataloader(self): + return self._dataloader + + @property + def sync_batchnorm(self): + return self._sync_batchnorm + + @property + def synchronize_stream(self): + return self._synchronize_stream + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): if len(self.timeline_data_list) == 0: logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) return False if len(self.timeline_data_list) > 1: - logger.warning("Please ensure only one trace_view.json in %s, there will analyze first timeline profiling data.", self.timeline_dir) - self.timeline_data_list = [self.timeline_data_list[0]] + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) result = self.parse_data_with_generator(self._add_event) if not self.dataset_len: self.dataset_len = len(result) - return True def parse_data_with_generator(self, func): result = [] + timeline_data_path = sorted(self.timeline_data_list)[0] + if not check_path_valid(timeline_data_path): + return result + try: - json_content = FileManager.read_json_file(self.timeline_data_list[0]) - for i, event in tqdm(enumerate(json_content), leave=False, ncols=100, - desc="Building dataset for timeline analysis", - total=self.dataset_len): - func_res = func(index=i, event=event) - if func_res: - result.append(func_res) - except Exception as e: - logger.warning("Error %s while parsing file %s, continue to timeline analysis", e, - self.timeline_data_list[0]) + with open(timeline_data_path, "r") as f: + for i, event in tqdm(enumerate(ijson.items(f, "item")), + leave=False, ncols=100, desc="Building dataset for timeline analysis", + total=self.dataset_len): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) return result def _add_ops_with_task_type(self, event): @@ -169,12 +234,40 @@ class TimelineEventDataset(Dataset): "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur })) + def _add_dataloader(self, event: TimelineEvent): + if "dataloader" in event.name.lower(): + self._dataloader.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur, + "stack": event.args.get("Call stack") + })) + + def _add_sync_batchnorm(self, event: TimelineEvent): + if event.name.lower() == "syncbatchnorm": + self._sync_batchnorm.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur + })) + + def _add_synchronize(self, event: TimelineEvent): + if event.name.startswith(const.SYNC_STREAM): + self._synchronize.append(TimelineEvent({ + "name": event.name, "ts": event.ts, "dur": event.dur + })) + + def _add_specific_operator(self, event): + # for analysis of operator aclOpCompile, enable jit_compILE=False + self._add_op_compile(event) + # for analysis of slow dataloader.__next__ + self._add_dataloader(event) + # for analysis of syncBatchNorm operator, prompt users to replace source code of torch_npu's syncbn + self._add_sync_batchnorm(event) + def _add_event(self, index, event): event["dataset_index"] = index if not isinstance(event, TimelineEvent): event = TimelineEvent(event) - self._add_op_compile(event) + self._add_specific_operator(event) + if self.analysis_mode == "fusion_ops": self._add_event_for_fusion_ops(event) elif self.analysis_mode == "op_stack": @@ -190,6 +283,10 @@ class TimelineEventDataset(Dataset): self._add_aten(event) return + # 检查cann层同步操作,根据时间窗口索引到host侧的aten算子并给出堆栈 + if event.name.startswith(const.SYNC_STREAM): + self._add_aten(event) + if event.name.startswith(f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}"): self._add_optimizer(event) return @@ -215,7 +312,18 @@ class TimelineEventDataset(Dataset): # eliminate sub aten operator of the first level aten operator by 'ts' and 'dur', # keep the first level aten operator contiguous formated_atens = [] - for aten_event in sorted(self._aten, key=lambda x: x.get("ts", -1)): - if not formated_atens or not formated_atens[-1].ts_include(aten_event): - formated_atens.append(aten_event) + for event in sorted(self._aten, key=lambda x: x.get("ts", -1)): + if event.name.startswith(const.ATEN): + if not formated_atens or not formated_atens[-1].ts_include(event): + formated_atens.append(event) + + elif event.name.startswith(const.SYNC_STREAM): + self._synchronize_stream.update_sync_stream_count() + if formated_atens[-1].ts_include(event): + # 使用aten算子的索引,用于查询堆栈 + event["dataset_index"] = formated_atens[-1].get("dataset_index") + self._synchronize_stream.append_slow_sync_stream(event) + + else: + continue self._aten = formated_atens diff --git a/profiler/advisor/display/html/templates/ai_core_frequency.html b/profiler/advisor/display/html/templates/ai_core_frequency.html new file mode 100644 index 00000000000..d0451420373 --- /dev/null +++ b/profiler/advisor/display/html/templates/ai_core_frequency.html @@ -0,0 +1,27 @@ +{% if data|length > 0 %} +
+

AI CORE Frequency Issues

+
+ Issue: {{ desc }} +
+ Suggestion: {{ suggestion }} +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
+{% endif %} \ No newline at end of file diff --git a/profiler/advisor/display/html/templates/slow_dataloader.html b/profiler/advisor/display/html/templates/slow_dataloader.html new file mode 100644 index 00000000000..ae3a22f283c --- /dev/null +++ b/profiler/advisor/display/html/templates/slow_dataloader.html @@ -0,0 +1,18 @@ +
+

Slow Dataloader Issues

+
+ {{ desc }} + + + + + + {% for suggestion in suggestions %} + + + + {% endfor %} +
Suggestions
{{ loop.index }}. {{ suggestion|safe }}
+ +
+
diff --git a/profiler/advisor/display/html/templates/sync_batchnorm.html b/profiler/advisor/display/html/templates/sync_batchnorm.html new file mode 100644 index 00000000000..0a4cb3e7302 --- /dev/null +++ b/profiler/advisor/display/html/templates/sync_batchnorm.html @@ -0,0 +1,30 @@ + +
+

SyncBatchNorm Issues

+
+ {{ desc }} + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ + More efficient code of syncbn forward as follows: + {% for item in solutions %} + {% for key, value in item.items() %} + {% if 'efficient_code' in value %} +
{{ value.efficient_code|safe }}
+ {% endif %} + {% endfor %} + {% endfor %} + +
+
diff --git a/profiler/advisor/display/html/templates/synchronize_stream.html b/profiler/advisor/display/html/templates/synchronize_stream.html new file mode 100644 index 00000000000..fd95b486151 --- /dev/null +++ b/profiler/advisor/display/html/templates/synchronize_stream.html @@ -0,0 +1,57 @@ +
+

Synchronize Stream Issues

+
+ {{ desc }} + + + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ +
+ {% if not empty_stacks %} + Please click on the collapsible box below to view the detailed code stack that triggers synchronizeStream + {% elif not framework_black_list %} + Suggestion: + These operators have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to + Ascend PyTorch Profiler to set + 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. + {% endif %} + + {% for api_name, stacks in result.items() %} + + {% if empty_stacks %} +
{{api_name|safe}}
+ + {% elif stacks | length > 0 %} + +
{{api_name|safe}}
+
+
+ {% for stack in stacks %} +
No.{{loop.index|safe}} code stack, called {{stack[1]|safe}} times
+ + {% endfor %} +
+
+ {% endif %} + + {% endfor %} + +
+ +
+
diff --git a/profiler/advisor/img/overall.png b/profiler/advisor/img/overall.png index 6d5da107a3f7f6c8c655922bd80d193708fe71aa..1883d4c97388b1cfb774d05fc9e0d368d0c66901 100644 GIT binary patch literal 49616 zcmd42Rajlk(lrVz$ijjINpN=w1PSi$?yztI1b26Wy9ak)!QFxc2<`+6?j*Q#X7;!D z`(^*n|D2n1b8hGC*;2E*yK0P4;YtdUXvp};FfcG^(o$k7FfcDjVPHV)U=Z-cihKzH z28I+yT1;5Y9rmc>QI$Zmm6_*xx?Dwgwq=SkzE}nOjcO15q26oN-|5{p1ovujcG^l7 zjHy9oqmx(e&WT?`;?Q}`k08}B*wvM(y5pqJQq0BANQQs$uh)te|vg z=6NKktE+o{I=y)T5`qE4z>>ni`TI{}-o!300xvHrYOWlq<@eOoRptG7xzXD@UDJ7YHo~&Ivom$`KOMqgz<`9h zhrr)QM}2O$Qx&`JtQ?GtwhwD^o{#O;{GJ~L4cc!$xTQYdTD&MS4d`AP+HGoT?EJBC zGv3gUT+`*-SYO`^efvD$HTHa2SUU%;%gybVflo8kZI1_l<7Olej%c*6^- z7fhT1`s)P|T=^X_<8OF0o`!Cw{~r0EV<-zS6N#Iv@4~7prl%$HpC9&bcW}ng($l)R z%CzHjY$0iE@>^cy?t4~`p@lX!H9A`b=>Df2uo8Pr5X$-aT4m+d)6?~F6%&{d?+vaz z>_1&ug~7mS9783C{`tmV7BlVn&``7U2_KU&H3M%H^uLV*fW}NplIj23xB}C^+p)o8 z^8G*4&&3757lmZK@whwAd4}3~8*qDc?sEBxe1kn-bc6pz`p-Jn>EQIppikR%c3thh z#g!{hhhDLoY&qFs-Sm+dzW=*gM)7VGkSr`JCL=tG9xUvC1_}%HkA~N{d8pbiveVS) z^vTHZtnW(!XKqrI-EJoXk9J6W+~*XlAJPp_{1l&^Gva>T3i5l>d+TT1*LP{doDAn^ z?nqh&%VKJI4!uM(HARK{&!2Zkf&GD8X=r$RuCg;3_KZ#bG~a4hq$w6~`2)SUOaJa@ z*zSSyLeVe7AiZl=ezb@Y^XYCyk|AC2Id4?)q03`;Ek&dFDa5ve9!INVQ#QS;or3cI z>&(<}*JF*L*T|72(?3SAPXjM#KrR)l7=4x~II_gEpY|2^ll|`f(q1RHBzp@#bUg{i za)tGkJZL_V!&l3|Ym8TwtQFH#KNybIV4TU^-1zxKAH4Pc&(vXHNg+s}MjS1_G#$*8 zhhPo10GAy_!(o)bjed(~)mJ^_Piv1qpSn;`B70l&n%T7Ip`V`JPbG6knHYWAYT4aR z(VyXb%#bqUkp7t?DJeuZKwTj4auAE7Pz>5?$FG08vH7kGH{n~WOeaJ{9?5y~a*#!H zN8pL(7wli#OM?vfu&4|1@cZDtU5>k4By>z{_3YHA%mejoPN zp630$LT>yX*TSAYmMvqEt(0^4)pU(2O3@+*nBk}Q=p z#QAVOv6zti`Udu&-wqStLjf@fXgKBkDf}NV13Y~O8x~xd(Hq+v@b7~G29}E)CSY5l zB47TWjUXfsY=m;U2U@~^)^AS%!qBp$Q^IXU%PTV1+yu-M(r@umOe-O^>4bl>yc=LtBtmlqd-S2>T5{v`jb6$WH0 zzW#7~K8vd*Xk%x$<(SH>@3~);x{S=Q^uI@;`>O!Mr?b?qXOid55tc5!zYj5Vu0Zwm zTi`bH{{0mlxZ8jmZagI=+xJqPC+i8g`WF43-5&mT_X~CJgC7QuYODM`Yo+I0;ltz+w8pS@gs1#;o;!|JQ*&LA;9tJq{2~DOCD`I)h^zaFW4}0r9yi-Uw*~_vs!n`zr&k*4F7KWdp}Ia zoG+8)|BRo@0~Ue^L;n8j#&D!iMud8v*4ITau>Wc3DggsR0ipcmjkpvr0cNbZt{T&& z1pmxXi3s?Z6#AFXLs&tx7+OsdlK=I;GASTN_7yUqkzRZX20RLTYv-B%D$(gmaTuE~ zxQckY2<6QS&2yZiI^{wTZ9=U2xdnrfQ5{DwH2#_fJe%Jc{nsl7N@i;l9>>DgWuO4~ z05b}>N~)%Ly`cFelt%Z_jeA>`(ptN{_N&|mwSey0`1W7;$+ADJnObDe2Z^}IU>ci; zGQyT{m9~|Bx~T5VslWrP8<7BO$=Y(cc-0qS!$OO&UoB6H!>CP$z;nrBLR085v^W#+ ziUzKhh^&W>IWF4g450nixNB}+!pT0OyA6m{MaXcta4FFk(3E8ZCo}_VmW2(D$kVot z+~Ll=^QE9agN#U=^;)Yjf2?WPVjd-NTMmX<*r2v1w$c?!77dw~PuG>j0W*Rb3CJK! zEh{s}lPM~2-6<+^_L6w8O5%99%fLnb$0UnV21)ootlj=BdE#8hs|UA6+k5R8Ui}I! zYo541gBYA%WgbN9jW)l^cBJ!iwH!-IYbWoe?gD{sB$~ zT3KeZ+plBi=uNI2mV1xgRGS0LR{beic2kjhvsJDz5jug51@9I~)Mr8sIvpZja^_pC zdMCWcB@ON?E*A}YSX%CVBJ7{r6uT#xzOU8aDM*(y2w9x&|)2U854~XPU10E6b3Np5vx4NtY2RBJEuDYU-4Lf-H6DQVNzA{ zg6Z&2EHhjmQ}#UNO8aPA)t7vx@KhEmH2Vx@f9OjQevo56NtSDJ9$#a(51HYXGFpF_ ztJBpdoARY-B6-0Yrw%%V4CI3%9n879_&n*?5AoNSQ+EUCh7bJW8r#;jUzCGEvbEpF z%*~IgFg%=03&31f4O$T^GgLv3w5bFh?@S%vGFfc&k3>hGGA7;6CSOOF_JSduKd-QW zpg*;xje~1J+N8Fy^Y_fRcL)9|{Drs6MPr)#v=zwapL|T5Y^A_I6Yjcyi^-sh%k_Vo z)LI+DAp3%SOjd$rjBiLfm7~*65<5X_=d+oPNeB9pPK-52h$R2ZG%}y=eRBZj7wQ0$ z@DC20LYg$0F9gU7<)l^9XTu!GY~?`leVh5~5gW80UffIef$Ns*druDBV<(n`C|9Qk z1`s-52kU1jZuuBVJD=N@u*_U9B)#TP{IpcePTfQfh0C|ov!k$6(xi0?)yN)F<!ME2oRlV#+LK07Na0rMj~UIH?{~L)gD=;QZ74T{OVWywS6OOz)E=J#F0(Hyd?~_! zyY>N<^pkqaF8!}|SY5S(tXzF!Lu9z%mS*!p(+(B;mp+f9mw^}YyhMlJ%F+nAal|rv zn^tC}CpS8U&eJhjGpg5mwh^Xk)4KDyR9UYsA=pebllD2}5Fwm~?);ag*V2Zh6W3`1 zRBt?GzWAB$JrnRy*9CZ>_B5N(4e}+d>!i2Ln!KvHxn)Z-QJg$TFJ3-_P}+acq0H|7 z#UCoTc&dEe@4CWBO|UIz`m*h=_Y-|G*Rlb~+TvEj!rwW17iC;>$tQ2&aTJte*tVn) zLx~69ny56tP`5llt&Wp4!5Ao>iX!^u_wH@)$)LB+UAXjFPVp&9`(19 zu`m(46`g`u7Sx8g#zXEUCQ(W@p~1~6$yEGEi=ywzBULw{8ZHldLNbJwDIITz zbZnh0&TUh-Jm7fmjOmQR#YS0gJzH7CbP({GZbf`uj;fS$!>%NUr% zI8Q1t*=PwDi>0~3BPu=yv5M8m5`whiJ%ki+sHOP~&;yoxsXR6K%p3~T?+V{NApH($lGw>s6)eL?)+SMIP$R1&u)|j_I=fmDhJ3mG zTz-{Qxup1@%vzPctUM}KMEII!<~y@-n$<|lRHO#UKrG^3dEqb2mo&AN!Jmt;%4nF% z>gb~w`JXyH4XYAjRX&qLlMLzHv>RN6bCJ&6+Q1*$%E#8ewY)DCe4VJP2$}3rL037J z&#orrn#M8uP4V&z{eqe2+h0@@g*D|#5#3YENmbN*w7g(_x*JQen4x%vO{~R`-F#m~;VbOGo6%9gH?tEi)77e- z4OK}}>UN5YpQ5frsAXy&4&42`&PQhW!;M}};xf*y;{?7@7B{m^6l*B!&hH`MZXePq zQW9=1z|vX*$2ETvsG`M*=wC(El_DlP)_ljCfdh+%ryZUF)-du)WY!jlV3=gnFiJ*C z@@B7!;|Xefstnr;RKm#b%iH`I9k8&~85_1YGlBa!n6@kqvL5HxI=km}9ccR&FH2|S zSZo|!>W$i0+8q6$MB{M)jY#fzwFNm&cemvYLwy5&1j zY`HRDy6IPf)cL$E<-dD>&>m*>WA^WUUDVmawY{L~4WINb0~GcRU^=w zp`1fC!Jm5TCVkC0Ab=yIS3DRqECF>lLf43ehA?!!ol#MA%d#uk$Wyv^X+186cD!{w znKD&4K1;&VcId6C3>DdBTns8g6fwF}k&4rp_Q_LHlvxO`Vg9x_Kd_|%H)reX!MFNd z0aKb7Bf36pC2k*hIyn{i3l{qU~Ye0@CIkVb6tg4&OJLbwOHw zLr<}Iw~^B9-<{k zG3OAM3eQ8mg?*4k7o>}gGkZVCGi=#PjEAyET2Py$ZjM$i9lVgohVv}XA$q^2&=1O6 zP@8lRx473faa0YU@~J=VSu%L-sjBsiB8Fv`1E5y$R|+~RYp=a_Rm2t;M8~D|>`KC( z#xi-uzZuY%V$|$mlhtM1S*B%(jF_;-LoTC_rCrCIJ!k_}gUhp3U;}{7|FKU%9O(9S ztaM4@xL!nfrZW4OkyESTd*lO@`mV&dm$bhg#tLLF%2LqhXEv={DD=-NVz*y1x0yRM zo3RvQq7ORzwt7G!H2i-&O}JjDBB)`)rF_S^1Uynl`zyw*&tj!9M0ZT${Gj1s!++gk zFJMD9s|XaimgpGKP45)}O>*on0c%HT@an4#6j(GFZ_wS`4O|IEgzbeSlYAyu({7nE zP_hni@h{h0QY&l{1WEW#gsSCvrZQb0)p?>ww=>MYBH*Z75mjivsVbbY*R?e+J=}VZ!Ln zSgtR@z$*2_5Iz)2{3{s~hWx9ysuRv;R}v5;^4`W%iV39t5=raY>|etHy(~-4$>^Fg zwJ};Hbbto9p~?2*!Z9P z)5o-`UVJX0ZE7lhRU7QjG*+vLW{pxlgW+v~^YPEBfprJ_1H)(xN_=DhGV(HP*70IQ zbw2Am83J1-^>Rw6ZFI~A`d?Upt3M$?R*2{en)`&P6{Jvi9l6|30SimE)nccPaPn_24=MHZUA?qx6ajvO}vF}V+gre zq~u2M9(#hT0=s$Ol;Vr{G5ksD!-zgBqd@!LxYi0Y3F8pF?s=mQz~iA=N+Ui%zs=|q zeD~mm4ee>oC3~eJl|H^qV$eukv76u5pNK1VCWkuUMy;@-5hswko-O1s12MpiDCOIb zIWX|jchbh{mkY+$=L=3`Vy6l4axxe73A6wMRW+?xG+iISox~dShjBAiwcHT zm{IcOt>nAK&kfup64`Te6Rs^i=u2aioYfc$#wME!6;`2j=| zW3et_kL=3&qLOErZ55+fYcFwNMH|%=U}82M_kSj$&EVWV$kT*PZ$w4%ltR}e<&W+5 zv#{l|d?tRV(JVY!-6{R}^qon#JC1(&PR8@B<0%H$&sxP~^s>g*wF&M~fKzUwT+UgQ z8al3g{DQEgIV4rbi^Ot8Bqrv4#O82u0{P+rKF?~Vc2i;2x2J6f&TU&H#K$WSz%>ayUcKJASzA$?pfzqb2Q4Dw%GLm4o94(}YU!cqWRZ2>9J>^hIGmlv>kfQBA{b zP_~2;R%lO3_$p2CxX{>@e-k-uc^cZE$|S#e5%^m@I(^?yUecuu3p%na$0D1q5SlDa z^Uh&M!S7m~=MI^~-13U(SpyO9Hl+eJDVV&|*e|WI;?ky&>`W3{yFH(7TN$_sisD^bA9R9tAr%HikkM#oxX2*k%E>~}hCxd{zsCT`Jr@R^Z=@E(N^am^Ed z8oHog42_xQ|Ckr!X#FK^_<{V3V~fm(Uy3dm_Nm>*6dJ6qEko{c!8PA2t>FPvio{$M z2iy0KG7FOZd>Ria+n#+nH6bg>*9yTqc|dbVP7+u-RP6-~{iSUuFkX&{bgY-{b z5(<7BlZ#PbNZMD#b=nF;xV7&OR=3!`(nvvtm4=3~R&8qO7BI~=+BRQ|I{Ie>uv=Yb z8abS`_}1{2MnK@jFIh^)s!D2H;V531ohPVn^jg71EdZ`+Ilxyn9ak?0_O!PIZIkN@7qomR6E|>xk8LD^(9AOr}Ro^5?=s zjtqgf#X1Qnd7n4_qXj64e`_=tB_Uh2#wV@d9XC0pSx{8SuQhBVTGje~j)c0^=jp~G zA$nv^TeaQ_fnq+|JWiprx^k~>N6z^^=^HXby=PLbqI9TlT!zF{=de}QiF}N1=9|v6 z&ssXWj)(6wd&Ts63pRM&?@W~UsYh5O#xJ^dVn>8gY<+4a!3J0=c+$mcQ>nu~BH#SD zS@r6(Mg;s@Td5^%7SUl%$4PTFmb3aVxA(-9l}tF_Fo&17;=4$4j(wu96#h&K-r_@aiRf<#TWH4cgetcUiMY=?BOb7pe@@8-o=+tgymyqEMHB;ol9Qj zXY0qaJHxvbdwoi2{qo$|gVi{4-O8v-)D;_t)}uz$l$XysG5%z_Gezl?{>pF+_oS;^ zc0>?s9+i`p^i`>gD;ZC%1wo;L@@?1M(u$zNatcT&eMXNsu~KmP0n5)x+Wl~HH=#re znoY@vs?*xst7V}Z%>-smB*u(N&4m}L^AnTL_Zns-BC|ch0akF!RM5^ny9pq1!tcRC z;FKp(icNL&Q(D`qjDW}wJT8F~ZD-_Foq?n+yIZAZup;`n!CF*Ms%r6$_y-3)65~qd zXV+M40z<*mk4nc9EEG_-uU$rU>Jb9`QN@)D%xFysujVx(&T@dmBwWUG)Re=f23Ge< z9LDO9_D4{IZ-MASkob?LCvi4Cnj@?EM%Q8%LZT7J>(>4ou><25(W#pzf&gGZuBq{^ zxpL(QqbU_EI0CZXv2uGRGB?%_5Ra9dDdQSZ?5j_-xRd9eVKh}N(1yg=d|EPSmW4R< z()Es6f7r&(gR<&%AX@Ji6Xwlcz&7M6qcG-Jp{FP=c+2?VJQP4p23UwDOL}5sV*6>U z2@9sg#I}$SS14R^`L!Hlr?$p3>I5)NBQT5u~jZx}zU<;ye_(-(C+RBW-jyUAs)jUv8yvUTIArV zHrE*z-Ey8bE4_6CUwfY&{@>mucgbq4TxiHp;6~rr9tT{95w)1(rNsP6qCmq4m=WIH3>% zF6~VxEpk((g-b(|@=pzO$c%U8FNb!gP1w%vPiyIZ^SxJY?|Eo5nqrfa%$RZ`FE^Yo zSqu^%DV#Y&)NKphUOtY<2|L*42{5ZVj!=&gZ>d;y6tf`HZ(Fyhc_{`mt=8QN=%SV% zy?TGjj}TyHdHxks>q5L>>OS$Iir5#kl%@?7lsL4t5Hjh zEe;pTZ85YO*%0YXsDRfR`TJMx-MCG<3$<4P^eMvoS5c(UWm_ZkCUoD4uLCvB`w-dw zIFCyp!mmiFXIDjdC-z(=@N(q-jRO!z{hz#@` z2C9&K2d}GVrFEDIU#=BfChHX3?O0|s$ruwv0*CUQ0;Vx-aCnnW7$^2sxA!?2^t(w& z(o0bo-w7jxiVrS5DH*+|g}uT!G};x#E-r=LtsFcNfAgvfZvsH$x|i1|6>MzZ26dz@ zl`woAc|f@l(@a{UcR@_EVm~I$0_bBn4LKO>{-iyO)8(_20lFi#Da*FvNXJ6nVMJ(3>7q`{b{+k^$5tWQ851__k@R~nfTy%i#j(EL6f zNCH`vEb*@(0sl1%8sti~D6!BK222T6R!1BA#n$V!)5O5#-es;^9|7I-Va+1YTmpeo z8v!h|f90wH>XI7~0%_%Cur4!p3P_H?JPtc3{EDSTF#khped?lGrnM}2@eC5&r;>bt zI;-lGD!R2*RVCx}OXZILTSq!E&$N~zxD23bI-v#P<{qRXGN|<~g1EF4g8(o}AV`dS zBV{Ag=uUs4`dgl17_HW^Y4FR(!{NH3KD#7)h_x+LHGirsUfg4;K-9l#Fnq@|z_H-X z50QX~Hxx?Q^eSs>8AL~X_@$cXTZ^B*VPILFwCAz;__{oZ=+X*FDVEnyG~EA|^1q9_ z8&DN>$}|ql8l7#q5BklO`C?Z+_S@=rJWGLhwUxwf_1RVR*vs5}a94#g;^MqQkg@^6 zFYgEwx^<+8KiJq%>TlLIZKI?hS4N<2j`oFajm36}i|_-i>ur{utxE{#3te(iHkw2! z=M??=XxJAFGOH3guWE~+_`97OcXI}o3WiST{aXozXuFD&Z~JJq3DfKOt>D+S;%0sO zvUwCk;w~$Vr>?gmU+y#E1-dvT%9606t3~~1V|BZsDK4i1-5!iIa(PLe& z9#y@xYG74wEH(yU#R+FyNo8)7Aqs%Jv2fsx48k-taa4xP*qWCXb>2R#Y|AH~$Cq(r zg5edSC(R-r21>s8v+FDNzZgBnLkJt=C}-EU-IB-JSF}4m3pozS!^JvKv#3t zJGYT45gR$G%>tClSZqYl%u`plpfr0nzIHP5>{DS?I2sSWbvzBdxfHNH?CZN#MDXEH zY#B+bFTwAE-0I)Omc1s+tNt=el)Z)@3-A2}`;eQ>rFiWnduN?euBz@VLJb>cWi-Jt zC3fZ(z|371a3T4`m4G7aDR=fpL$+A4-eBbqms5!p;Q6SxF=hOS zvqD{6TGZZv{_^VLAA@0vL&}08M?~W%Zf9bU4?yG7+8;*3?ofu=l_qg{S-`dpDOsJ^ z;9z`s2jcg*Ii@G$v#XzvN=|DemUVwQPNSy_vwCC3`km%&zE6AvM-6gG!UDaL%(isQ4#YDDxtr z=Px1CqYKcX*|+_f-ztL&njVDjq5P%$n9Jr{<2xh5BWOluOc#yOW8CI}%~Zr!F6heK8q`8osl7z=5T!1GO;97l^V4Oant-iXQWVOB+L7&v>iwac2&7u&dK z3YJE#iEx>OO!@7%0(zB-(h_OI$~i!)TELq za;<<)nhcmIjcWBA(+2uU_LdK(fe5oC{j(yP?J+BNN{jaczUH2?(50&tg(>RO!g%<( z4NE@m{(PeR?mMfRO8QgBhTb`!?UNzg4=Za666_hX0tp3u6J-AOIZw{qB3%7~5!3ya z**S-HeHk%9uK|{~1nH8zmJ+HV7!~*WM^G|}AjL^fKAwzFy%^-fHDvOqG!c;!0=&ex zs{1cWvmvpn<% zXk797%fhKz6g%}!ADXTjeHQ+!V*HU)$pgc#r2LAw+vqYe8%rZehVZ1$LSLEHt?H$q zhVvvMWo2nrF=*85_AKekrrEH1mYMYi*7f=08QXz;t7ef^)*{fW8o2}{rG()Jpx|jk9Z-_Gu)md zOQ%ryBI`GE>%6A>Vji@m(I*P&LS%@T@#h}-jQZs=ywj#vlBHBKKOp7rodiW%IQU|@ zze|5Fx~D+x8FQx*l|^pzTwq_XMD&SX`(1`FFUHj>q0Fy(RQQWZnX_5kBI1aQrnfY#IGa`%6PKO+L`i2{ZB47$ORJSj z9TkVRwxLaKFTG7Qn9Xy%id?}*!pX=C z=1u|IX)hTshma=+RmB0p+e!ZS-?L@_55W9x?u;$CIb59q!PDa*ug6RPqEx_o9QLF~9{ z++fda&$+*2Z@RAao`FCnV)nbI3?Q{a=Rz0w581=_Z6}{v_y5f#-L?N;O(K&FH&->O z{9i)bF|ZabZ3QLty|=$V5*)y;WJrZj3i$(}xj!N`CXBiqD8w`-3esaE)Qt+HPUcD? z_?WOzc)7RXFr~&=w3I>JAVj@LAm_4{_X4B6(0vK3=4Osm-aK6$HbD=r6otXjUU~t0 zBkZ-%^QHkvON?j;X0X)01Ymwvh6giREFd*3BLe;2!;IF&`ZHy7_!OZbs?&>quiGbs z@>zRca}oMZJaRTxhd?4-v|9OSl=`<38mm1pwC0S$Kih-?1COE_A0GHKHFCyg(Ib{G z4B=e_N`a`NHBWXIGH+fMqb9|O7?pt#@TwEOb`4lsks*~s>b6$t;`L)+l z{aVHPG$^4mDY&_o4z!A;wQt&dBIRuy{xaauX7Ea|BpdmrjIWN6`Um)eFa}& zq8+1~^Lztv30Yd3@0p#Bn<%|mlSdtvm*k$6r|t|S5^*{gP+?y*R{A++z!fxUzGqJD z@Zpr=v9szJkbAbK^cIJV;tfG-@}5y-;{Kv1qNg^-cm_w&#d?RdV@3=Pe+|-b$7IT9 z@1;-dn7`9g%EJ01**Qbdb7jQ8$m*2t2so40N4--t$SfhIcB&gWedae4=2<+j+u!r6 zX-W>p2`GAx`&gMI=U}D@*G>s_e5GtCdP>ocy>FV<&MG&^@1a4~$5c49lSRwD#LGCQ zcZeTsFK&LrRk#_th5qieA*ES|%1LjQ5SBuaR@0XGjRu?sk4KW&;DVirGS-_ki?zKj zbWeh%utg%W-$XHO^YR9Ywf15>w>am1k zhVQ*~L)1C+TSE??h4Owdig>g|2Crr9Hiz39zM56(rST~W@1u@@*I>@F;bv_&#e-8q zPXxbK5~qA|c<@TO0g~ThL5;OCU;2bA;QT3GA92uO>B24FGbV&T(l5*3KNT*;oObrL z#hFOl0Mnc$>V}TRgr7#5>S{O^7T+UlmW`=k_?^#WIAQATIvh9MnBlvxcBC-H0h6|d zQ4hI#nrIB{LFAf8Pg@qjkw>F3JVwqlH^R0Xj7Swe)Pq$l@>j%Yu4|&7MNo1_JCiL& z9)!fG+BkwSmvWUC*kGyi#W1zlQVXd>456D=JXtpUJ}(XhI5z{r50Xld2dm_r5&PX6 zFuTO}Y(~jh!Zc=etadW1Yw^${(_Y%1y`yti-EdHe4ikzOZ)T_WUwPfJg^6RP& z)=9;Zd+8mxSsEf;Nk!@X`=f&%MTSed$~f)Ms=Nla=!qM7?$JMN({xT{&D!3Cse#XQ zUSG!_=5cBoUn4CNvOV>+3k|{K)!E*+tNb~R&quM(p<<_&){VtYS+bdMYgREr9*svV zf&C7PL#qYOk&eKz@O-MvaYD{DO%T3nAdTKM{bj+g-c6-VwPeRK7lZ)*WAIV5;KSab z@v`dtAME|4O|}My>6!iaw_D1g0{-9>W}C6red>S)yfgNx+&07t_CS}wlkZb_T7US> zL!PiO#k)=PD|*bXdF+2}0yzpOyC~i4gt!-U&y~M*l-pfHS<_MBBG6HeG}aXzcZI5I z(I(FWYGDyIQ{{q_23!4#TSE%tvd49wGh#z!#=VRnazhU6^ zR>F;JXedOu6dI9r(?)xF{PYLZ9fVHPUHioy#UAWkUgrpSKG6*cgtdSviiS3{J4D%y+gGEKe2z_Jv1aft@kIX z&{iLX0n3R1#86)BPUmI3?=F%`Ev*4K|A6%M?DBqiWR=T7$9tz2M8-Gg25!(pS6tAD zuj$+Bcv$u3t_uVo8iY<`Ovc~2tf}&A>4ljd{dGlI041Qu>B1`K>m@(Avc#{|kBC*Y zIW(aDwdP8sTNe~rP*WVg8uaH(zkbb&p>c(s*RHSqsy*wWV~r)c;EIjLszBn8!|&Gv zZBKiw^6mJ}ZZpSLu?-ExEsB_We+p-4-N$dJID0HTSIq5sO?iE^ykdCO!Q}fpS;rkc zC93Ho3+iG*R{H||Lhab|58t7I^A|>j70$)ZuFadDe)W?3sa_zQ(-W@zQh4Z@t2V-6 zQiIuoij|6d6eDa`f8=K)z+ck#7E-nZJ`1XOU}5C(tdR)*R<+z z&hNTN))^E2UdM5n(S25#*Rf!4BDg0mHnDRCgpk``dRXw`gmBOZZ&ZkZlCHCbb-XYN z7{NXJ7a9$B5Lxw~u8u~z5s6irv#p{KbsSr44SBeA#ES67=@F3bXWO3gSrWA}{22un zFlb~SSL|^F2mrKj@gI}|VHQ+CNVaIE?S<`*4@%Y$V9))`LMFR^T zJ2>Za3{2^UGm1vt6GWC{($!fzH=ne-1i2?ZU+a5{) z_Lk0#`~FuLjArrE7I^3OW4?DTfF3t+|D^Jdg6I$Q+ZJwJ>;lC7a^X^&dO(}qrRrf0 z06#`;c8aX~8$WJJEZ8x}fYJS)8_W)$ji@g4A`wU^fC?6ZA(TW^dj~ckEilfVV2{dJ z_xFHX*ZOX<{X6MzwNtDY@;n!bjYCIxl@)2JF-}7D_~5_mTbi@myf`Ox98ZDrpZ9>+ zsef{(!hBme*((2dul8@PQ_RV3=|eW)d|o2KaO9v|zwj$QULGKW2V0Gzi{<$cyc`A>f;DFseT`^ z+fS}*qjvmWY-fY2A6c5vm{Nb4o*OX=k&C8pSRL+;;{h3Sr~*fq9!nzqYymc{!Ty#j z%pHy}<1w_T zKOW_p0{C_GV~p6{v~HWXkr$aqRK5QAWbv^;RJ`iu5Bktw4`)ic>o;Xvmtq9rN#M%S zA*V|x6W#x40mwEOaJ0*jXz zf5p6)zSO(ORK*XBQ$9Oh%WPyyqd@@d|Uw_`L1!6RcBE0yvsP_9UMwfB9Iq zh~X#5Y@ri=f|t>e2wQTZW$tTBkGC6)0&QldR{g=WRwBsqW@rlt$9vTBYmrM7Ujwg9)u zg@aTfA7m``7@)52LRNREPqJjyLAi!ZhZ!v_*6tW{p^k&=(@UcOctHX2P6 z+4aJ7>~#M+H0F-?AVKHM(PffAPkp!#6TordUKm}^`V2r-qD$3+Mp5Jc!)AO?3u+@k zf!cQKSp}5%1dW<~SO|hM-W)#TBA%4c zl66$2nsawr79<5xFV?!Zu+Cf;lEygIc4P60I$-O?%JRl;kf{Uplrn(yC4@)v-H7N% z{hTQ!#3!3bf9~?6{gF&J-oteNITNW56&xnW08j=Gs^Of1x1L=M9dar>{#*-TsxeO8 z3FQSqXF~PwuCDhej98iZ^fEf=o&r-lKm}HDMR2~;@=r%ykFtiuT`k6leM)7>kgc;v zuD15E`3Lr_fR*6xa_W-p+AHeoGlCb;UW7;6j0ObBYoY8g z;=!rwy)Ev?>jI=wK2L8tQX3aT?-JHuSBaqUlXGvQOQVNo=nHlG02M_DPni~^Y#gDM zJ!7ofRs@p18y7-1n&9PSR2uS!w9xxgbypt?H4_V<875}LkFb2Bhl_j9F2O>CrFLtr zf?LC&%(BQ4!-PHS&LJ}i1r>PUtW29bfu9*_OtNWqx&a?a<2V%3o{RuR zl4~I%yQ+5tFt0^rswy4um*ze$Thb(nl$%K!p&L3a`n+t|*DH!X`$qIit*o0IfY-e= z)NoZqeqAY-vyh%pYpl`yscjH1pq`e-&>B0y3Ei4IiqD^x|Jl9%2NR|`a@_?Oxe--K z0Nb^~{EAWjY!#D{GW1+qx({ECi^N!bp9NoQZDsup*Vn?~tqO~c_lU0e!cR*$}YvnLZ2uiLA`du@Rc=2zKF1~X`&OS$LM3O*Es5ntqAtJoPBBZ_4-9V z+V7_^4kk+(_(S)ijCBHv92n&w;a>W$b+ge#AdA8JUaCb2)yLQY{eNoaE-!7kC2+MA z??3?yB`pBEq}@4-qr!yANxf5gFeQi0?m$P~yEkeg0x{uz=Sofb@<&EeGK|%$l=t=c zp<4z(Wk>u75c)nGETjvS3hJ-lMo+pr%0<+VUrN3f6{{)1)xsAnJ@d~TQB~Y?cxNE! z*J`;J`Twx@mO*j7+q!R%;I6^l-3jh)!JWo~y9a{11b2cv1b3IJ%*Uv*b80DI8SX-A1}F12lHxcHfOR#F3;l&IpAp$cvpUeqq!E1xW^QBfE)Tjh2l zY|EI4k{qDsB@WN$EuDDPlVdR5AFo?Ihv?20mdg=-|O5qhaGKt{LO0Fhi-|b@mh=`W;=+wTB z6%G>zBwozs`SjMR=B`qGWO@-C11o=k{7b zy+D}1sp$JiQQZ&QTKie_WVO08{o_dApxlc8ACv!ofXTbo!5x~w0_t>9*Y8-lF07aj4;y2UYDm#J zl8%4Ab)`MEe+TgPWtV3O=<@+8r$evcdAHo1nkb+Cle*8}$&P2Zd(s{*kccyy# zxu|mUf1@k>npFQwx?dHwyaDJY5qKS0nO~iG7oBp4)+=)L{;Irr$ZsO=qUa)YyMz{| zdDPh)8Bmk2c)GeeHg)D*7pPBO;Sb1XFc`guO+H6FpfeQ7rA3+;HXq3KAw{Km%x)}+ z{?-y4UG|4@PG{}{KdLyFQ7(p4*Y?FVCK~!%X4vcJvp%>HQ`aU@`%iuAGv~ruw#Ch= zb4+7MZuqEeiGnNqoa1m^ogopvA&^+NS}SL~-cIsR>}*d^7E+^w>$e@gT}yamcm>Q? z0W%Mq5hd2q+jC}dsAz2|TimvklR~6JZ@_U8m&@;SExtQ#L5eZ+^X$&;B@K3qz4*@) zZd=+C(>Gx$od;2;JBgnIEbaAPci3Si?0(qmGI~8)e{){b-RHJF*|RWU=8>3H2h`e` zD%E(Z4tzpzC~Ijdoo5Du=d{FOau z(#;+E0n;BYciZtjWK^D%)GU9hDZrsrKh0|-bH-!)zr@e)y>E25wEp&_ZjWbh#dX&+ z#5C8e%Q=f}Hhq`+GB0%(z7f?S73^&aQ@E-8vD1W+&gI0x<~_JRnkGKQ;(=nywl=OU zMMbYHc=%;wYmCgHK9V>f#RH-{1pZ$szr2jHe5;KuT_(I19yk2k9zJlRTz_uk*F09N zV8*;C#sCu)T)!JTTD=wJnU7ly;6xWW%C}1xpfZOKtb4soaKA6S*XEX4lQn z%lVTUnuO=MA7?nb!Q+s`u4l+Lxo8rv2xI$cX>c;e`NCz&JH|BfYgrHg-uB5P)H~it zNUjd0)8m(vIsgL-kge+{YEHh}HR$F6n{M%uY}$T_vQ#<>Qa;j&=!_=;zObdmbY+EO zPxfA-XAn1Z>DA+%c$9jL1+C4?pU?_@B<%?r7r{sVLcm8+zahkRY4?+7Tu|Cp2-KmU zyi#D?Wk3$j`1~||2;7X9`gHeW%!oV_i$(082G=6ELeB%5{p|IcZke=b1V}2$YhJn> zboT<#$v?C==Vv;t{FRp-c;bgx3EWcVMAw30Qf>lNIWzBwdmR9Yh!R&8MUd7>*6oe+ z)l+$CYb4P$tI#5=0xmTz*nrPelLpdh%%;KWSNSS&ewh+FuIbk_LB^Q0QiG`>duGn+ zUkD5@aqZ}c5>Qhaap`qpzu{eV<%{Agir&BfF$|Rq$YzjgGnncK6&jqs#D?IrN6yoUCZ-qVgZJ8U<#ibD+NUy_h}!rW z!^8kE9#dgHo?wHq`7l3shi&|5q2Myd?@p5qHvKiIHmvY7=1ynbw9Mzi``*8VdJ~FN zla;ZdmAM;hH8$?X+jQG+s{BeQaL6_Y;t0K(I6^dGCn^0PO3dy@e#ikK#FpR^Z;AB> zwTphWF;r?rOjCq7WYOQW2gd>K@trP0NJ&n*4-kgEVdF#{3wYP?tx7kcARNb%L8dnk zAz)IG)%j?CYV0%fhu88wYR_D7TQto=1WE-iDm28G^;$%BZNiA=&t#fMi*l~~Kv`LU zN-tPI51B+mrb_e4Ly%g{_mn!BD)uT%;@zZZAs|%8civ*3c@*Jo9xb?Q6@D7tkU^L0 z1Ey^S&TLR_K^7mlrYJ5azb)jR*&Z7mI9e3%OI@x0O!mKTqejll5=N+MchYW`OvIg z1Ltrh!l&O`HHxhcey0YEY8s1InbMwOQZJw1s{^X|tJY?fCh+rXLM7d%iXLR-BI!(l zd;U(-=pVaKb*My?Z1+@6+eFUBX20kZ6sh*VM0ikFoI}le(A%d^1;smj^KcCeA*@Ui%`y^~QBMlu*ah4py++T@T6K_vF7O>)X26|s z_-fNB#t|Jqq<%XU_96*j7HSCZ2#enVgG8bC#lbA+P~K{ilbgY4;3*iIj`9HMolsqM z@b((5Ds!R^*;{see!J3zk-NOiD_)fvC>OM~mC|^M@`!MwlDY6(-;2{qss(l!Y}Ft? zD2D!+QI2vr)ZThvM){N5#e;w!TZD*OJb}Da^}bm7`mN9osRzD>)bMS)4%LXj%RDvn z@@WAb9Jt*G^zaw{5f-y${))FKn$es@w041jB#g>)Cm-={A-5l5eV7#BEP|i}KLeR( zQ(Jy^j3R*0E&h}5s?e7q1gTmF?w#6-(U7PeG~`^4Xe790(7Pv4#Wy#jW>5rrg?c(oUU_9|1ZB5-96>w7(z-9^7z}>6BSxixE1VP4~2M2VzRp^$@$iQ)L!P3l$z;ymj zhXPD{c#8S7D9B^(>eP<=Df`jJS`WS(>!`}vLm@kS1kqaWlDMk87wzLB0`SD?IDK=o zU^><|guH4Yq73fR3`P5*a7`5%Vbb}P*yvwnwDg>;G= z8&IE{8_FEVgcE5YPFJ@6v(5N*vQylQ1GE`z)FkqL`@)zLUF6{Z=32-hbTdY^9a}$8 zvm@qFa)SuLxUnuZJ@^X#(l!4=Nw%|kuE(HtbnT4FaA8U{gYu6qtN(@8RPLWN7_y*aL`B=3W;CnyJB$46tRB?MWvYd zM#KY5IyOX5p&>HX|8^EraOGoEVO;S}bL}+it|oc_iS^W5p=VQOGp%nmWW)Q%CZLx0 z8<@_D;Q(zNxM~Ub;3;*FcIkpXsNxMPoIw});q@Tkr6?8!EnT@J9FQ=`CvG)eLImROizzVDm*lRiR@7c4f?3NGa*N7eX;rgX znv2IgSftY@<9R?F0Nt3=K?Oa!^NnylVC8N2Tc6=N)DMerO2~}6Z`v_#x|8nA%ProJ)J$ywE4YJ1OzR^A* zC8u^|9rF=>R{7}Rjon?eHG^3-UfpfVaN<%MHzWZ*mdW)YLUoPOadR_GkE&gNJCX&! zXnNPct$^qs>WPnOi;KJPt?5q5WV!CS;Ic-pK1Yicl_(aSvFO^2Fy*AXb&vT>LXQF3 zAcl_Vg@pyFwU*YrLqa^RB~gBw>r7STlETWkC@R{vu4P;t_UDF)NC^1yo>P%6sW4BK zO=69_2kf6a3Yp04oj63ep&X@<@uxr*5Dw2$io3P~Lcx~b-VJCGX(H^-B`eOT}n>|Q98v;Wy{3gPiw?zaR#p7S&#q;OnX6Y|< zEbqeipW0#FJuPW!P@~A&V5^vJOL*~LBbO1?iv?$$2aV}u`pHi7y%C!}!rK%(4;wuV z0P;{!#kX+dsIyNbiDp;a-d-880pE&jyffnttCJsmWOn@}e-&GL)E3L#Xa4}yhb=P- z=$>3Ry#34MF|iFtTPW(IqRS$ObKy8ZO3SCIHSmaEG-}ps^4A;yM~z9Ud05iBE$as` z?mQ4u{Zjcy3{j4`l{Zov^DK#7xp8GI=XbVL7I+?v=saxB^b=eRUkkAN!L0z%G9}v) z2%s`vGK2%>8R%I^eS!t0P3UX+;lLPmjyd{k7}XLw|7&RxP;yG-bAoI;I#Il6*vv}5)7+G`LlF;f1t?}k z)1WnrHx@y)ikKHW_dRbU`Z5cfb3z}HMG2{#2&7*nCXo1+vwcEal;d6M_^LyYtrQNX zW?~=S-VT!AgHFjwh9(o&onmXDEc=(vUo-lR9gQJUPpZ&SxuL({_b$ymI(A67FHUL7 z58w9pSbDW&*OJ@DP9JgmjOy~7XCj@P*$K3hAm=8wV>!h??hcFtX-|qDDuU#IkgN&Q zzNb5#QW{x6)$OBHD*)n;a23d6{el2V(bHL4SrGU@5YaO(yV`T7--sx1z3;gy5F+NR z%;dKN7q@QJmWB~aA0hzECxwq@39OP^yCryN&VsgJ^;K-K!HnjlA>n;Lrp=s@A_T} zFA8E(O{76g>Z{Ecb3Z6+3;PJ0g(*490{x-`s+cV?R0?a-Tv!X{%*GV!EAfXyRvl+O z3#Wh)>_7%#NB0uq;E60aFy_R(u^{vEDb!J^|2WE*WymAb*0Qva7#OS-Cjhlp>3g15DZg~M@CBHpyYSw#cCxZ+Xt;PUgC zurwVZWl@xsBg2cV44tXssoMe(4sKi^({z&AA(2C0Jf-uYjaXg{!-!j=Ryk3z(t3 z6As}+wPjE>3(`cF-dR!ltKr&AQIk_vHE?lp2DzWv`kZINSlrvoQ~6UM+@}p-vZs7Q zdv8R0?J9KabL?XQDT{`}yRCO^7ZsTit4bCH%JeD@sHl`M6cDHKuJ>UTw_P2!va@}g zd|{WOc>?C_8cqj9a#CdU-mvz74A;mfpxnW@=ple;$p2M#1LBuw$30HZnWVpFrV)>b zcn3HhIq6$(X!rrrZS%{|EzBLu>)71z%`%*~30}dd^&Jx>>!8d93~%CO~s1B?J9rwK#K z&0~JP7NwHD3M)=>;V|bnN)c0-2iDC>Kz(L-h{(x2_HWy&lb~3qWJ;$k%G)iNq^b*d z>4l$+@bvoiRx+A(XvvhfK$c=lpO(Q{Jm)r>pBg1s7SnN|(Y2Tss?H0u^7;x_;&Kj2 zm+IEahLQg7mdf>je}<~On+5op zU+Yz{{}N89Va8DN4Gr4;{#)~5hrX@1JYY*yN1>-EnkjyEJ z=K5~h5*Z4v`O|?Q4r1kXcbTP~jt9fOdCuIW>+f;-m0GHYMF*S{)IKW`6dl@+Kc6cU6)}M{EoPZ&;VgJ*^A6|{sELn`&JJ|8b1fo`xq+^muceSkDUw~< zKfwJ82w4x(R~`vlpPG&@l=AOu+61LnpOy@O_@T>8IGW5*fQ|p_%w)^RgWQaA-uevL zGTJ|{q~!;va6q@ycN)c$4mdP_$3^{V38{SV_x@5r6jnnN zTgze9{tsK^f!4%P0F0H=$$U-*Ud~&i@=lcn79&pUjT`z`^6e)5N)+%?Z6CE4c$E4@ zm{&6s|CQov&*d;y_e*U2a1y>j zc~y0RUBYM{{m;=wU2V9_Y^I5jSRu$L%zj+}n~z@d4H8BPXzKm|&D=laR4gie3L;F( z8*YXmBSrQ`=EMG*%tryhA}as}bFQ#J&M^dykoX?{q8G+=Te?Qke(&c3lT9k?^+u{A z^K$25fg;$BO#-z2X*rqw*-C+oa|GU;q(`B5KK4Y}9yh2}|0F(T%M^rqaPmVYnaiKO z1otAhtpA*=N3pI(l1<)+ENxopn&UtQAK{^ojURgpMM@R6)+4CupWCZe*%iX%)#{yd z0YZ=KJ(QXQfG2iGxoW!sMlb#|j2J~zzqTo43109wi7`=A324NRx0u!9> zk$TZVjQZz3vYTGIVgGMUXmD9*3@h7{O=zdLq#>6;6hH&ovFFjy{d^<*LoRU*TI*nc zaT95iWXtaG&KK5rHNF3Sv$f)XQTchcz3WrFLrvLrlF9EpAx8v$+mw>!17toeSxg*! zWrm+Gi5jy&H(ehe*G~&OZUsbwKFGeG zvu4$u$LNgDsj+16u3!Vu_oPH%h&p7BE%JFoF4s>iPW7>JoSZD5ge9+JaA|D11LPbT zT>yl`Ot?Ml;R00b^N-j+n439FafSw>uh_e`maM(lk7S^(q-#Zmc+k}i+`w*4<9L#* zxt_YcCuBNNXkdITescTVW{jdEK36*5B-^LCj|)T`igOdbv>6@iN}#FTxYIHIT4mP$ zL_O3$ab3gmVnBKdL?R6!W4_#xBWhKTFTyH>f+z%0wlIK<9@6w5zt&F$Xdw%p3~91} zi>4p*b2&h)HJ=+WE_CH+`X>++5zM(2@(L(($ulz0Z9CR7k5TsqTo7fH^6R)MBz03o z06x{xKcCR}KJsrMk9Xdy^7gQ7S*3< z9e<)3_n-CY1i8bg?Kw4KQa#>)yQTA*D@ac{DE~7+`8TN-(;D$lwo@@!*sK@s%pnss zCnYuJ^1LOq9j8IQu|zvSzKlG360r|^VNT=TI%0g~^0B$uwAU)_ zY`=U4#imKxbo)?NU#EgFfRO_x205+OxiVL-;yZ7DY1zOI(7>!S&!Xpp#DQa;%DYQj z{t=ae$aSUc|F{q~Zx_OILazvs!vPyFDB9=nprZPb|Rbq)YKCpyW@=Y9I) z00q8VQe(a#{m*-A7)R5iEeVkFy;5sd2QU)@p3TgIys8qi zLuvgBtlt^oY_G$izCB%$gXIwyPdFgd(+f;56CeMJIJfDIEbyxBDJAt7Lj`UwF}Zr^ zqsz76Tz<_5NM_OnIfIm}?@jXrk&^_cB@u|nb@F3|Lho-Rx_oJu5FtkGifd;6wJ5vc zvgxE76n^~Qgr>?@qSlk7yyLRRTBH z#uEVby0%A{B(2syn7T2mV`=mCb=m86o-?}M=IQ=BRv_XTnM1fLk_1Ig^q{}DrIEt% z7Wcgp9m=A8={KG|j@&C$hM$#>42t5q1A&yTQ%xxvBvh`jtVIiju@=>csBrMEKHpf% zeV&;gy*0{amk2A&+?b5xW-VR!vW_}#vaHNcwXCF1b@`srY1)c*L5rN`pTR0AAB4=~g90;f3NAI*u}=i@H+CQFTQ-cAdF&#ZPe!!VS}Q(4Q)|+~ z_><|}sGnU)QZS0Weq>mEGC^X;Jouh`rlAAAfI154f+-tF$@g^89ZTEbU~I}a#ZhSu zYTS+kc}+FYcKSl{!h##3YyR;JMX`?#AOQ#0bMXUe3~+J`5+EI8!%r3f!*#)fj9x_F zIZS*NILhb;$?jL(YK>%vkK@mM1Y%WqLin|9*}y{sY@oQ6W&;@f9L;%oAxi7eSA}K~ z{V=KBa@iyxQJ|wfZ!F*dsxl7Imoi|W^0U%L7Y-P}jeq%55rZm(e^q_S$3y)*O=(qcE=ko*|@XzfcJTND|48nhMsl4B*@%ep(i4p?6?~eE#zyLgPL)At; z15o!c&~q;WO7K)D?5_o2D-~0Ccm*ghu)c0$_sKE=Uj}`WlSfw}%=OlZG9)38M-cnc zIX+mQ@Hok>LjWiuCUd{yAUnL#{jrGyhMtHQxsaF4m-}}lat8-c`Fxn{s)hjSA_(rg z;m~)GNvMeU&~U(^g_>)}GQo6~5bW(sq1F7iM>uc$lEII>`tPx;r6 z26eQK#UYjC&Yxin?g@bWm@~qwDW9r-**fo4fW5Z5^|0X610qvCO_%D&5f|G`Qn)F7 z66rJn$};9|3gGEwvy~zl|Hf5*5bG(;lKq!kt0NU`3I4Tgz zqDw0D&oSVW5n$|DT6U0_m44$&$lG`EgTv6DR>OHnx4aF?b{WBd0e|$!hxu~boz&Hu zq7E&LsVxQ7LHPD(BPo5ztX2kIspJ)gbWtz^Alc?_$y*;6xzs(Z;m30BKb3DVO17-6 z!TLUP2L(7tRyI^gNaCF;82bNw3XVWUdkaHc)igl6Lvir7u|#_p?YQ0^tJUdF8}+0)wF)j9VJ69iyv6u}J`8{I;q#>kURRDdt9M;I^-_uMek z>RVWLt^3IPU>FI^0itc&@h_CPXls;!Y<;peBna_Ki1b!JTH%~t99KFIX)$UnkqU9V zn9TJhxlBC){qKhdO8Q&RK?P8I)^4j;(Ol_3uI5(_{Ri@H?~4@t$E$hb zs?W~dNGy^jzvI#+u)iG1NM;21AuOOF2zpMyEA>Ns3z&q|E3Klfay9p0WOIAfuF*-u zN%JWg?aN4j=khd0y*D}k9T@r51Su?GyJa13kYBJoV(Y=YdS`z*ywhMMN^kHi)acpd z>ow;OeL6KM?PgAn)#b5sr&ibZQoHvgtb((dZLtLM+Q~2x=(>(A`REa<+^WS(I%>&j zBV!hsw5NPy7c#p+dQ;fBQW8apr+tk3;k@HD$-8Lta6hE$`(dNM)6p@MdWL3XFz$>} zxb3^!=29o}>3$~+m4>0P!$|np&^xt)2wN`|nY_WE@VQn25^;Msy=F|;?Kn^O(>Hc# z3m%P>q(8?B(Q6v=tWihmex}7LrG2U?89B1Ht4xT9n`Q5a-!(vXg{?V*sh&gazpE2T zrlp6ckA7`)b|xh}+wstP7goPj6xK(DjAUu_yn3Tc#P!Fx{-ux^L)s*12!Cp_44)`A zf9hdXlT}hIk-Ac`Kh6NHx?Ui0k3kE*@`IV@Q78x>c_z|-Ak@7tONb5 zBX*Y8@k@Koajn98Kht388Ce>xG%9=?o4(Ld9?{B=9sBgU-R^R}uhuVPR3tb0-d-M2 zzpL2P*TDZL-b`Ta#R|o4ee_~|Dfm%^^Ukk1GOk5nb17AsL z>hsxS)2NDCUmHz@026vUwQ9_abM>swVR?#n4pOWXV{+ej)n3@{_;*&+G@HZ++DBrs zCA`5rYR2WeECI6^sUS$EmiBBsxPztOr4*5Os)oY^^mwAlv>j-gc5o2Gs!0M!C9EPJ z^SoE?S*0l}ea0yg%WX)5s9N9%wkaqTwKdlm5x1>BwpGx8Igjd%2^eUoUFr= z2;5oJ@bOq|j6m|8>Om9ylhHf9QfF<@bC&F(lhjmh9N!P)IDw@Pc4^(EoxESXG_5k| z!sXk^J649Mn0Da5Qw_0_;@phR4@2aXV77uYm{kpmq}w1RwW}5G)hVEsHkM zobusrVcg;F9!~kvrcRDJcH<>>Qx4Ja0I+X@PqQ#54incnsK0Q|rJKq)-2$a!%0tBs zAy>X;S|MVBqR~Wjy5Ud7I*_^bbSgZ445SqYES(`v7RVI6pBow}fgJKDEs>(CUoEH> zGqJwUq&Ai0U__PqsEMw;rflO?`6KXaoYr+MsB;^T|L^rdxha zCJ~#8V%eVhF^4i+oCy!jowbvkVWJ#O4HxDV_Y@fCw7nSnT;(nJHhk*Jy|jEB zX@o{&Q837uy^)am_;M7|V#Mpo!Rtv=c>JwuCLrcSvP+(t&oeCyXXe$frb289<FGB5q?mT?HGy=daYCR-R?si*<= zPP>Y3xyE{*@d}>v#rY+DBF6>(~n&6602mDwI0 zMEG8*)Haq@kI%6V!Fquvij+(&Lb);I^S1vi|clO z?fg+#f_jCNeoU&mma?1K#p98*S&8~waj%Nu)*(mhg&n%Lw}XgiLUS3HlH<@Qnd{WdPYkf}p|_Ei9+`x|9$8 zGdt|S8u0`V2BE+RIky73=8+qEwUr4|sHRtIhBT0(mXFxkQtt}6oQW6e?ac6M{OjBI zs#k9!#W-PPqR5}8l@U9_R=*prOiT?i1Zw<*tV%;e${Nvh#Uc<)qQ8G%q>Rbtt^FeA zWBY^Ad-QzTBTuA;IK@<4993 zr4lFX}*QSBdk{E0EvWnblt(1w78j zO)J0n4x`ve$ZUoocB{_IMkDi-k zD;Q>iH^KGj;^1}Q+EEg{aREJmu#=96tTXe_- ziS>H~XL#h_slkitlG4aWx>EfZvq{4WNP{h()miCcxF0jo zJ$C%ngiF1y%KM!P-7$FmXmPEtKC+E{@rnb40OnPiy^-AW%Q`k>R4<;H)qUqym({^d zkMOYW(?l?6i9)QV3|v+<35G0evj5_ArE6iF(*OQ_*Pn$0xx@!UmP@ynM?D5?^>V>+ z$K*O3&t8v2+5tQLJw~dU@#~V)J@k9rAgN!KY3THG1cOk11zX4b;yOyZ3Bkf3i{k;| zebmpwr(FF{R}gV>l!n#P~4Ktp4s?g)g z%{*v2>nKxbhCgZs&->!2qwpYS-<}r-Fz{6G>aoxvrrz|f8M4Gnnatq=RzNrV_cX|) zwWImkHkUb-R!Hb_;tes(gX?heOpSGG6RDwZP^v0@#D^hIS8M79Un>w=j)hGZgx!K63*0U8-M zxV<-7h3F?#KG^IKXAme&5$}-1f_=HUW3*I>G4h)#6YG(M0*Z5~0aexs zz<6z#fW`8+PoVm=?zna53bQZ6X3H#Mw~iIuIc*n9!I)Sz!k~ku`Uo^m{cpG~*)p}V z_`d*V=|m9Pe1p#1@r>T=(&|E%tFHwuPL3&u`{h0@BjDPV;k%ydt9o3W=~PfxDZ@Im z+y)*bR%+g1Qe0a7^2=?;xgiF48QF}@aMJS`E~jnD(8$H%;2~1%>bo!>#zLUV^ zRn-e0-N`5>S5ds^t(uReYBv}J40}PiOKUYdmy3sMvXYb+G)EXh)JM&M5#!b#yNp6t83ce616P*4mlb``BbLiRCA+(kLdF*?9 z@0f+YHfh}{sp=ks{%+l~5p=}7(a%(mZ714h3uL!>{W!r6^6&wx)RYHFzbgun19|+V zbz~)ekva4sO0tWhj5>LVjkqEh?ppEd1jB`R*RWDHowO1jFl$&dl^FaZw@$q0I;jRR zzHv%heqJ<^Dt4(Zi2|{&DK4bjAbE;Sqk)k>>GvG7u`z}uo~CwSJ>oe+IUr zkbSH|)Evu}>B3*?IGrefg8-L(iEteLjx?daTl6xSg7wuFXIjJWt%F{6L(etC0_teZ zKmJVC@6^6*Qes8s19dcHk_C1!Z58f?a=}TBpE4$WRX8dh(6YrUbVqUm+f3BKE^Mg5 zmqLyg*&L$kl`=C`jWT_)LN6>%LWTT4K5PxMHmq+VhMcAB#kSS{=(O~qk#jkasq};x z=*L&Pqq8l5J!y6L-hKQa?@QbLCl9(EDC_s$Cs7}^!nDTcIZ<4g2|ht#0KGSq;{VY7 zXJToBVZwD6f*w63lUGh^(K%{;-!1g`V}CJ(DBgnSss@q2DGY`JktA>D8MKVaS%(Zv zCV4MFm76J1fRJnq+MaH_C5)8lidEvn~I zAbw5VKBoV*J?J|~O^FTb;n5q~7|0}FBH|5jKj?@EMwuKK6k=fD>rG)3!b}#*NntXt z+TbMKfC3~bF6x}f$Tz3332+J>U@^jlfizk_@plf)Rh2fP16@g+zPO{b!ik`Vdc=1B zE=!2w;y6S=CR_XyAXpBx1DjM(<&bJ0?EMAhe7i0}zJqpgP=xULAU@}`Lt4XD94Rh4 zDDBpT1~u?Os(;oadWt4Pf?6NA@`P!a=w;pD@S_EDV{Shc22J*&aj*ox+afkbGF-y1 z3bMGZGQltIiQ`IX0PYZr+peZdVqb1|!mA!vh6OavnD5P<*~6(h@XJ9cnueF|p$`$< zA4tu=x0ZoMnn@63-)GBc@_C>7h=4NjL{z(I0qL&XGg!Q}3k$2gCtp4rF5cE~k{YHw zNx#IJe|@2>fj$^~l*E^NFhlkOCyce>!bj^=sM4xk#m6bseaR&*SuVJ#g zvcp9ePsHt+CY_8RyQB~o z{Shh2{VeHlyLOoQLQS(Y%NECUy69YU@jd9g!=1gSfT(2M^4jeh;cI&i%8!<7ds0ja zd1vuQN{y3JAiw=Ji*%zeNnO9}*akECVq1ezf{m_aa(3!zP<-o1ik;)!UY2xWjVT_x z6(%P5t5QIW4_Ne*pD(%^q@k4s-#(b4z`t1lObU|xBfIDL&SJN_cnAJc`@;$20Tg&h zuU96)C&Ri3*IMO$HL95?^C{`f-CzWw&q>+Y1ocxYIeWxRKYjVH%!3H?vFL z#k31!md{-gvFa`1TBK86nw&;N`*2YUWAv-)l1gtTu_~5C^W9j8ms-xncdz;F$t$vp z)x#5a=bbfmaTMC!pmc}R@3$CGr5u~D3M3m{z@q5j#OUK^4DJtjHl??8Ee&4Vf61ag zf+NvXv=v2ZFuPNINhCgA#Y-}tEPLAM3;&g-;6iwk1qB9O3e#z}J-O-i8hiTZ`_%bh zwqpoW0bM%VPB{H9-vW-d6^VI*NpVprcrKPYXp9TAf$ftzkG=b@|-X64o!j z-+>#VY0f4(?~sPs`YSaHj;`V>N+#qu8<$df>cr^>@B7XWc=#AnF!1x2ugqi#U4?`p zxg*H8NOaL4!Ij1H_g?DA=$W{x5aM!OmJButNXe`pt;6|)$ZF`|^YnUeXC>7Nh7%nU zt^J7;I+_M!o1-f3bFzjEW{?(C3w(lJ`6aVI>a2K<^`Q%EHkT-b?>mGa z!<@!jW1v>aCNgMy<{EqX-4cr2g_V9Hp@Ch{(154lRvU*_cm2GA=8u2n%t8&M+Fl7+ zqLra}CE-D*AM3QB zH^+mVRrOCN?a*4gXT6eh#2aC_`-Gh^z2M38SXlxMY9~LpNv3Va=;MB+rl}f^KW{RG zqGPmjsDNWVnEn1s3jS+sv2Ze-Wfn%=v@)Iw|6FMukHf}CaK^>04GuMNn!@A`8J7b# zO}Op^iXN#icOl9&>t9z(NBGY;aryYc8B0Xw&eF9}{M&+(k1tw4i7@?0fp|!H_klU*s?8C$%9+G6O zy*Ag<+4bP26!aAVxrArz+svEd)9(%m74Lwat69DG*M9o71l}SCVV9vhc!1YNjAJv< z@uc5l3=|aR&;Ar7D%@VV{w`hq8)W-kw+FY^=Q75C_3{_L4 zp8abO`Kbdw_UkL5_PoErt_m6apVvi#xn{amCGiCv>4EJ7$?%Yom_n+r3&pBI!$j~0 z-1D#XV$kNnd@@eDPY+naLW-`Z>hjU0M{GIRC}bN2jipvG^d9 z4D7JD9ICEPKu^yOUOA#@*P$2cgzU(WIogYNq_YJDr$b#Reu;DHn6tA&!V5Z?ZDNHz zEuO(eud{ZezZM@UyjYe5$tu(zF;}`4SK}BM+Rd;u6BFh5e4KLVO}FV+BGb+2%JAa9 zJZye1xF;c9$xEix7_<25C(5NnU@47hXvg72vh~+uzd*PRl`KW z`z%?SCaj~(+=S;5xwjHm*go9M@50xP2c6t+7u=cEEwM#lDeshsewRs4Zhc05fU8h& z1Ik?1Q6i^B2nl$ICS^ojWPq3K4#{+JC8y7a?>_mzIHPuUXNCl#Lox>|nsuaOMjm2c{_TBVtaas=emJdKg24{xod1b1!|> z@a@ac^^;YQwMzZ*ht&+6Us9znL+)RWHwhuJWZCVFBhb}K3;66lX_zFhw|_15N_wSN z(2q-oNoO`GYr05ptD zSP@p&QesuU@K=Z9B;9m8neO;Zume@7eN`L<6Y=DT+o;{=h4%!0Z_{Gm#c7%OiO+=={g7g!x4e0%qfLTY-XG_jI~KyM$?U%%D=PVLTU5p* zUTZoyr$JMxxVrwdAD&^>C9bk9dq^$LN#!_s`#5c2xBeCwG6pn%Eq_`r>{`8}z_Xx1i zA+7>`i{wQHYL$IqChK{B2E)s{)JH%;!fC0}gYm=Uj0q|33=JT0ylHNQYskRssZxDc z8rVO*e@X>8*GwYzzMK@OU`ow3>)@>oC+XytAWNAKu&|;418tJ7Dhv)duqp^angS#% zh41}h>rcBsz)a2|1Pbb#G(4u<0)1%rnG*|e)dG7<8P z0IXGCKau&j%_dG!fkrb%ZBTaVzZwa*K{D};R^$fAwf;IDn9>Om)<4%kCkcrqdO`YU z#b!~sWyf|AAMWlgrEC85a>`xl9~1YjU-?%<^Y|bTa?Sv#^V%YT;H+;UUH|hbd_9gC-U6*QI+*<2lAVi(6@B;{)u@T+Q{&-3pA@ zU*C^^tnC@2Mo0AB9c)d8^`r(AX$eR${`RpWzTKLD8#J^ym8ePol8OK9po2Q6t)}aC z@$-1kP*iQm$|)UDZE}~yvQ`xM#*EG~Gr|{aEOS5kE&dA=jFjc&rWPWRlT|TNjo?jJ zm*vwElT)gyUz2xbJW|CmFa2#O+@9>VkGnN2R_#*95(3j+3kVU)x|>GrdAA%r7`3RP ze_Hrj>B6t$9XBO@pf%xsDApK6YXGYqPNgk6p&tR4ZM6%Vx=__voWuq{s$1a^_0ZVy z;U9_bB8R1^Em~V6I(*qVuckizlDhL?=4NT6N4@)?YTD+*h1&Y&$uZ8YF`A>r<`k6v zWT|GHOA_2fLm!V;=-cijjHED1t=HDW;3`6|#(Q%W+tu|tFeAKg;;xR=6n-4;%h9{K zt@3^No^#f;QwLLLHnLeO=fBPFcYS)UNgnGVGxKM1T8@;p;pvx@@8Q${zMvkdw7;VMbuH~lX?E-5U z)K3G3r3GMp{&>`oeKBDn!YwPbl5=fk*}J2Zirn zl_rC4b}|#c-C!w$&45Bp4I={QL|WA67dQU|l0cURE_lU|w{q5ztV!q4c@+4O39lPT z>Z4wH#N+y{Lu%0>{~z|=Yi~E=aOm3~6h#ysVk;V>pSLTw#sJYuRzm(@`&5qU)}oqG zJ;J~&dB>MJ{JI{23U%W|TcEYLzvE}=OksDtE9pcyLr8xZ1W4I*-Oa2QowFx zd|dfma#FB-pIh2Ig#T0QlWAcAZEY71&A2zas=IMH+<-O7hEU?%?ZsL1PS8s9F04!p z=M9O7X*AzRmX;cmL3{uC`f$(Pl26qka?h@Psg9UdB?(P~S!GlAP zpuyc8g1ftGf&}*raGHEsYkh0g-c{$t0n!$p%q2KKURaSR6S^U6zrpHd?zjRY;ElOpz z$RjDGaEDu*U%V537nNf9$gd)q9sJNHM_n)PoQg4sJlQwlxQuotVEyST)Y0U@jxp(t zEIwit&@1p&9zW*sX%M(lFsp3-a7?(T)YjHCv)nV$;()giPG6pVTGXjb0R0{!y@nuGChi}Cdd_@;-YxszQHq41`WNAL67)5a?q=XVNVY{(s)O>2&_y?te`AKe)Dqo^!2DawX6ycM9!EBzG zueLr#5+4?UgrXi6YNi?7nhIVKrB2GrAM?adSG?|ZJaeP2C8?2S2Zwcf+7*c0KUDXbPOQsy#Umwz8t&In6<3x0Yh7aXIVr z$L5s>C$&g;zlyP-(rv!qvzLf9v5G0T87Dg*e$6ZAzFzGVshuJS+*4)KU-n>`567(P zlc2S(o!rT_{b<1*l&?n>?poc+u3CsuO+`)Ik;oxLgAqo@l;34R6TSQ~fWNqrE2{=- zI6tv?B^KY32zW3I4ouv%oXfQn$V9<#7OC?4Fbwos+53<6?DX1C*MxZSSxVYYDq%w> z!LhA|t&G>42W6ZYxE_kvIfPh`h$SMg-Ld0OZzXV^m7}zwTvF6%O@Ukax{PVQBsg`9 zcoI^EsnzvlEBHv_2Y$6#d{Xe$ogNfRw<9>Hca|w2VyuRB@pNm@Qd4GJx=W{_yBM!A zS^RwT;f=9;e-N!>DD`J$Nvw?HU7irgLk0LMX6>9P-;Wbflvv8ix;;zh$W(CF=4@#O zDjE>jfodaGbf2>GL?D+EgaJy? z_7+jSSI6aTlxfg)-g|mYz*lUUDs35|ANfym0lt;=y`~tFR%}W3d+NSC=U$_~Y|xAj(mZh&SEH1*qV4AOBFx?vT8 z={{B6ie{o#bGam^F~@T0;7QO}(>EgxP#W>YqCkLiJLzKo>YOSf)P+HIsE8GOThhMNs_PyO>{0-n4V~WZPV50Qk*ZXjpw4KHsn*fuqt<1e6K0hV4L49yT6*B zY0`d~H+4JNSS=>xbBFJHJJ8}g#{Tg`1z_dN1*Dxe$pErt4isZkf+2DU1Edh3+H3ob zuUlGkl|~AH-wL^=-gcjpT0E2fY=xjbi7gz4% z!j}Ergo8cNPmutU`yu2hUF85$uJ@uZkvr!StQ6dq%nv2U`kF3IM9&E46 zD|`A@Ezi|l4>u*~2{w7lOUx+^R;*dUv*6PZ+H5IKuJ&5fyXH&Da0e;AmIki^;icJD z`r+Q&AJ|X|Hbv^!`ekE^s5eh#!X1k%Sylk72POm*RM)XilEfU)-1w!oE=YQCg2jtG zC2vEYpV`LjKGvZ;(bD_aMpU)}qR}esx%EkXDo*fCoK5EiML5}7x)^})OzTLu#M4CI zo1d5#g&bgMCLjpL5(vycO=J%Z-de(wX|hwDr-n}7-jJpCIvetMjBruJ98c$F;wu8U1dP!ZvQo|qA*wm zt=JpYzK^fSPu!Bo4G9viKH>afvVC*Eg67xk=ER(AP98;b5(a=?)?L|ZfZfLncN_jm zx}(hyqV#v+Xw+*fW{q#ZSOVpzaeYg|Q>FR&`kr&2B0#-K ztI!=QuZdz>Iu^OI7L}WA?>U&*yz2icO$rTl`1bQJ){Ix-3_&Y%-DjX7lkOdsA>E#} z3$R0sKnbhB(-4YlNvF09Lw1rt4a)jk<{16Yx_9zL9VOCbvT08rB~;vZ`jJ|w9iq9R z%D1v+Yl_ilqXtoTHZ=7Iqt>T6r*mDzZ>I-H;h{Fg?_D)Ky!k<$$1k(n1A-xkE zypdTc$CCE?HI7ts=n3X~8{fpOK4N+URg%|w`TbNzyVUMyh-zVNLk5wB9A{RD-Zviz z@e2JuJPC@D5MGF?*gQGaRoXR9stv7??yNmty zzTFjXPK*g(X?8yp;_yip3YyJFP0jbA&(tcy`&=l1A46zX zxz)fFZI>`^MTPfLcNLWf9uXc7ugPj}T$J=)mvwtXbo|{-Lfft7Qq(r~_s{*ub zE$+(O#R6s885Bs*Jz2f5ylOV_AEssvlGIl{>hh z@vmlphQQ@pS&b($@wNoFB~;03!czZA7<5bG`6+v*OcG~(i@*(B4t z$YTSyX(C3Gf&fxJcgJL1cRND;e0unPm2Cbr9pr-c_CB#&s(e(w8d`X}_Z{l*LfR!X z=^jp8tCw;{-=~qX_EAkwyhCB%Aqc|aPzyxWCr1+qoc$DJs{7H?M4nB~w@%VH=gu4~ z6NFgWU5R5Q&&U(3A*!@wJ7BewHjdB38G-p-v{Q|!)+>oq(=#D@Nx?*Y2r(*51&+Z) z{h)w-uo)SHB{WF6SkcPT4OJfMyUT82&;_MyU<7a@39`p6nZz0rp9B%IqS)?6wLEx9fjIP3`nzFCRB6 zN=cjRn2n!%LVlNbnFySi(F~ftYakbK)0-*nvu!@8(e11*tP*n=QQGmRZoIS+KFtn! zo%pRUk%Y;K8c(y>9V%Fvk!h0rcL{zH#yP}Sk0~eLVNbN&knrWYvy=av$&vQpLZg>t zA!8)oSK1LM>v!~$P8gYy@z^HS5jp0kIZ>9+`0d{9rJNGq41VCgp6bFLL1X#E!WtZ`8d*J zsarE6`}>K3?lwNeWp6fO@tcQR|M;^Xr)^K4ihh=ngyt?`Y?53&pbM%@PwdLx-GQ%e zT2$-WG`wnMjYGiiqCivRS!CH9UMkDcvUw@mVP1TXC(ux2n3^Wtq0JGMCFYvG`LXp8 z6W4-LJuIPY%1-e(Ryr>kaqZt81aEgwe0@G!8_&mZqbcWmdwF?3#UEoUHW1f<&E*y_}ybGHx5^W92>y#e)@*XHdZJyU-*y5aM%)R3H_6 zVpPTK>R^3^VhON;n)sL^9CKlgi?nwOHUg4W+%KR}+g+l+L-v*7esd%5k-RPO?9F^* zT)rJ{@&FJ+OFoc%;w$U;ax!nDi|Oq5jVquY7GwxBA^Kx#5#50VZhQ^49O)$>)hG!p zUi$^m+kW7sMFzq*z41S!UjXnL`O}7k3IJ=rPHZ4@01*{@R3EEw2xxT`i_?(x=U`6@ zTw5)P%^8NRK?6+jUm=jdU8*IEVHLnw5DZ!V9u>$W!j>@-Bcy(KWVlJg(k_<9}cFLOqE76~yWnc`?JgY5U407~r^>Ma0lyfar!QP{;pH zhCFX51_e-aLqomg(C9!WTL`WcwfFR0m)3+{FoCnMN^hwMfr_en6V6`~vi`PP5lIuF zm~{i%KLtoMqY8HoaXh;auoGh52n!h6^Iz(>vxY3u5p6vUUX}vJ$1Mz$2n0L??u>Z(3 z@8xxQ+Hyg5JWM%}{?GRKA%6@LV2{JDbc_N>85#$m+|PEGzrB;@F)RSMk!5LBa3z&s zIR7kp03k=0owT#}M;w2rNVYW|SJ%G>a}WW02v{)j@5GXyuEOa+ofvDsf>yI2!@u?V zqZXli9M>O_>Nz#opuqS%=iYxup9;6AmfGzi{$>{W`hMxYO#)U`Ok`q^_>f94a{u)q z40J)ktxyDXZV{J!0%l|ay3b!K&+#Ye@J@FU8-&&fOII}2>=2WULqO!&gPD7*Ig+lj z16+`*HNu)SsJAa#oCOLVdxVIF1XnfM^hX?s+{rrLJ?e<@8&LB_50~O6^rU@*z)kZq z;(gaHQHBO1AejZz1TZ5)nM97wX%;7+NY>n6ZGkHh;=N%2*`AWrEp@;2TimDQ z-C%$wT6Ag@8to?!My!f)I{ctsJY%b6Bwd+{+q7mEeW(!4T)cCJEemg2kb3kSruo#D zcDxg{C@)56DfjjR;l970T|=M(29=w+NAxh3WK}zV3F_OSx+%{pY$nk%1&R$kwWEh) zhlnbsN(FrdD&yRU-EGV&AcbncqLUMdq* z{?_oPPpTYs{0dD>4F69Bxb@&uzzN07Ug2p`!91+{*(Ah#w_A^p;~vvb9yX$qQv#rW zjM%DOcVoSkF@ufU)SvI(3aq8>QVfC>YT^spJ>|-R-T>ucXVUgA8*{7{_L~*Wj5nMs z`}L)0KjG^E(_8pPyOcx-FKSf}xA*>VF?(6E2iZS%J&?XHx;7_D)1V{G!XrS#%0qA% zQ9$^FkH@ZEW;d%SI z&R0yCFI2DQr;+AwRs-3H%S;x$biy%pPaUIfNI%8M{7fPAX z+Zzcf6v=>~5wD*@)HkbLZ3f;`Vn}Hx+?rT@_GD7;Vlwb6pKj-d%pz`3(C%kV|4gnz zW{i!G{kDqlA^gMvgXOy8gu}v7Mj;hCNf7b~;VUD$6bA6AT5SqsTRxk$pD1bw^WuS7 zeFShKnFClLjaANv`m6rufTcm2L>kX&HK#dQfPXtuOiRQHY#PwDQo%Rmm`=1ALdDOH zHRpv7x3e-=_S_HpIWmi5STpiz_Te+KJI-cMA*h*zvDQc>On1gi*H0tF?CK=2c1+GUX;%yqO{YpBCEy2H>4z3 z$u}sUYw~ARyDZHru~-%Boqbc)vUTyuUN{^%*=3mAZqAdya~6aP!H)h6Z&^PDj+FnT z=U&FvJoKcUeq2DtcO_#tu~q?;JiLBG#cn;-Y!s9rtUvswgzB8C%mU#xTL7SjYl4pt z-kHOwBVw*I>w}`PE(Ox!&Civy)C^OdDtA)p_nS679qFm9={(IMfbGNUz@)PoklMrV z5-FPGH}`M9Th9?OP8T+1fPmo8uXX%t+W{}O!LDk-K_qy*dOdRB7T+oeB`RHamI0vv zACq+W`U^hAYej+axJ1&oE9nZwgx1>7Y{|Z&RrTZ>BG}?B!4U6;xpt8}u}HMzuFK{n z9Up2^Ts5MI*fPzTw!@72z3Lo#m9lB;v#7+-UcfizhA+2dS_Z25tKfxG_9gA;1p|R5 zHc#smV0(hK@^O?@cl04ppuIoQMsv$-uf4os?5gksK zI^3Rvch1rk5r_UVY?IXytVv|?*K>NoPUV@Vm|D}m^QT;Ow8Ood8K;VQu?L$03dOTX z?ecQup=N5bkYAXdvNyC^Mc4@h6*nI7F9X@2m?y8&R=9}PacY$XN{*^Nt-`(=L%Jq) zlg8OhV%0#&z2+T9jTVLh@lYA3z~c;Y;Bhl(9qu(a90feH!usK++COX+=-${;;a>fa zwA&Co&n!K@2|dS98?ZfXX1&k8%!$^t-!s2Tw5+G$pLa}dU3dozkk5gAonoad=@w+x;efQovsAW*WQP^@NN)80<-QlLt1|e+J7X14+6^2?Y|5`8+vjjL0K=E zC|?+$>J@uXRuVHZZ{hxW10*C5P{lTjBAEVLtQ-jnkqDjx!Mg(3W*I*n<|(A56$t{dF5 z(fi{zJaXJj$dnqsKYvhTw(J9Dkwx%!V|iw*k7EVosK!M%ZqCk>g_V!x@VV=LM1nHv zow5Cuf`4s^r&?0P*(6|`Ehv{AwP|O3AwGW6@%O7~5}j3k#OPmDU62@3tH+_f-+ueB z^G|D7M&<;N}I7?-iP3* zHUP@VnlE$HGgMmna9;-|`t2ZBTI65A^Y(Wo%3W?Ho`G@qiM1rT z7IYrnDy?0z$>DqX&wdQFm4%b44k5mq{L!I4YWnQzDI3^)I`JPaAQznOz;E|YR75wW zM&J@H1iNHJ6p%d9!OymR;Mq=B?a1at;SU9>BH6n~i}lmX`S^z=R!ej0w^QhPAcR@v z%#-F6E!v-IGg~?wKJ-trXxGQc(lxi3`_G?Y~XQDC}1vfmG_C!>oG@qXKp7! zqAxUETjN`l<8%k#=WPUD`m)I;3^~z?)li9C;wl*5{UP*7X zZ*+4&lOcLJM&t(!PHHXQ*jm#9)jGry2c&1KrdlQi;8q$a@6p&~GA*4@);a*tpjT83 zLh>O+yU0$?Z-V2_UKDMJq3O)~vl*a~mtuZlu;lW336=I0^pVkZI0TXb&Zj5q3?4}$wejU&ahCSBvCdmOFd08>ea%tc_bJIyW}!uR zrBkHN5`2OHg{EnuM$1*#thQF78Zx`=cy(q>^$XJdp&~;qPUcLKIT0;d}sfpp)VShAWKb?r;@5fB^ zyd=O$P4S)=$q&ii%SgTg58Qotr~8yreT>=0Nhe%R2MUh8Jw?q|ZhyM>t$|QE?0#Z= zX1(Ee-Z4{-eqSjqMq-IOIRi~UB6;~g2`*xemluOR+sE1Gr@^dxPj4!(~g_{D9md=gVZ1pmG$Z0G=5i%Sh7Mkt-o=C*#BtP9~^7L_lI;*RW{tXmp*hq2p zPEKB(n>t-qcP821q~J5Ga2c#8^v(eICxx|{&jEvLId@|%ywo`Gp~Hks@Y5j9&n1y7 zDPDq*%5R-W;dQMPGt2j*kB1Hy#+f?vxYl5AA3L?m;^cgLAKud+^+L2zoxA;wtDa>F zB}Z+jI19Q%7mjZVC%#mIiw?2YtS zg6XD7KC4I)iYniY4#`rNQ>PH7s4$HV?;lLg?5MuB8sS;c8P)%ItwS^5N-MjL(zi~M zoci?(l{X#1HbypyR$!N+G&+1-rxgOFQAbTDL5waXU18ssXIXtA)JUR-ryqcq*@&=L zxtUCV!OJF;Kk(8SmaG20f;sN}y@@XN{{7RwH02BN7%%^k8|R>#N7c06r&p4lFgi%x zd0jJ!k;leYv~A=-{Gjo%Vs>AyrgJGpHb40q(Be&CQcW0cG;w{AuC_mMLF+0>0Dqt+#~v=CI#@t0E}^A>rWMM z?s?*?kYn}97-LR^vGg-OJ&X#k4SFV-N>QmlZ&0*(ae)?zVQ%L=6(QHI$nPvfVQoZZ zvqo$D)+3$mS&Aq>kSf4p)>vuJSkKEV)FweKqq?{D*-kc z$unYzU_#89sehYmy!scNgdGAT<*E&!2PUBNrea zSXEqFB=)=%C2i8Bc9kryi~;5GdP)SFG~8xUkdReILX`{>#?<~EMdW5Ldhqf4gazpN z+b!rmmY$M7=PC@YDQc(>h^ZWUSJpoZx}MhKYb?`2Qy{B>Ur_}3#x^4En9J7=!pmv5IPXS z705un3Crs6BN!lh!6@w6B*0=H>s6_Eg~T4V=!W7B`Q|JDcNcC3{Al8C^?n<=?|~ z&|zI^>__Z~>7jYOVhKGIg}iO;E|iXQnHDdP|0uo6YDc*%&DyhFJk)z;b3L$rPT+Jg zy*BPq8=`bVNSgX|fd6$cg2(FuZ(uKJ$-;jl%iz%C8(1e6ET|a+T!P%FF4GT((*VHY z_q=6yFVe|b30if&88d|VHt&hoDSUvSJX;iF{Z$Y-kn`C2+N z#_c#q!#E2=5+f9lGf$pqIX~b1_H~sU-cJ+CzI8vhfFj886AJU<^y_jHp#S0$7rB@A zO(}T1jePUu_69Iq2inNN)GtyfJJMgvB&bV~_zJqm#M`od#fBQ&VfG_9Z2O#%Hb{xg zTrcR^D$qy8Uh-a3m=BmU;_yX)`s{FZyXp=3MZ)R94@+uvd>ucUxwKF67u)x$3xOA8oGX(${(|pMF%)ye9yN-EIambm(Jc7#s$VKxi zLLO(RmC8 z$qDekMhBPxYtl#4IV;zy=1aDI#pHpw;g+9ZQ@f4+;v%Pab-Z?{PztJiz7cXSOL3ca z-fW8O%XuDGpF=oO0|px{deRKX>oU2>-ajLo2D@$C&$Tz}h@en-zL(RDWAVIf@$ljm z{4$1*q8q!cE}o`%Ja*QMV=(NHY(5zIz9>>QlG;RtzuEv#W>3G6B3M0<^5)L|de+zq z61yt*ONHyYhfkiLjXKiz@#ij7p?=bX+LU?wwx^V>|4@ILW_R>pSi z%D~q*#zz~Q=TV_0M-*~wg!>sPeXktdzb5~x)x>Q`w)-BTmM^A)R)ZH1V=E3|Rr4&8 zc&OoztDD=C$JyTqzcpPTGP=^g7a($|8aYqXe#oom%h<8|gdg2-^C7`9X6f-pqiH93 zNzc4B#9>7W8n_VL&8dpS!sFCP9GYQ8$vbNv8kFXB_@%-3g~ey1BHYW~mftV(loK_X zbaydiTtofKP3DxB?+~l#RZvjfxrJ{QLp57QZ%oY!($(vW6c>KUr z7FhN#vY_r*JmH5am!P!VD7$e5p(6Q!MK`S&)?TCn#M0)?LHY8Zx1+-=yAs!)@)axf zV(CCHm7%!NE&(RcRJV(p1vm3~TuYsY`KV)+9&@d-K}4mMKw=h|BGS_C%O;r#q6aN1 zmlxZjE~sb@l6EB#`MlPGS~?vWpTcHWd0L7T&Ph53=*^$fzAUTyGOaV2RdcyrTQC80 zG{R+3)NQ}}+1ZuJ&Ls}e*UKoxUga{tcv4kW5vON|JohIXgvr7@WkrzT_xRgH4U2kB z;%GT*1TFDS5R%K8O7^{K?<2g%B1J&Ov_ZrEsMtHL`db#!PrBM=n3<*+D+|?Rx|+FB zM_$zj-)jra!&(1f39}EE%hkqD%z7`E9_XSTm`fqxDzB%FyzgX>ppTNG+5OfZ)>9z) zb8-{Rp{ZS-LpMW@o&+sF9aD<~G3FU2GR*rXvMS1MFa94qbwFi*3~Ak0Esh@$mRG?m zCzMJUYI$Rkajyvn%=WJ)BU;+30#9b?e1{tjRjN`1WTX~AA5C8N6XZLJU_;{CupyMz zj~f=XE#IY&3ka^X`7@z!HsXEJ;fWlz!bc1_ttx|W%nEQ`|JpI~MZw1c?#b9rSC=qP_;{HX4&s_`9Y$?qt3|JOh%8WlQ+?r!mhe=9EOHY}Y z#ZIIWlMXb^yta?ISR#;sQfgvw;)$^H+T(Z3XLek}f^E#E64fSarZa89{H~6u(U>hs z*Z(crMv7JD*qNt-O72rdaWD8MoHl)ljA2_NVU0=Cd(cl&pxTy$DzVc>UKyjxCn|k7 z@nFu~Gn`Gb=iD*$wHoNU7feoYt~^qpPrnoFU-xpO#Hm4`>-ObHPPp>*7GcG}?jJ>S z$O`9!i#YNi?e1ym&s_;f|17pAqz7MmHEt|lBdaZe$DT%&e+Urg0g1+RftPz89|K;f zeXbhb6i9*b_VH-Am0D7$P8uF5(lHmjEDtLAcACiG)PSS8wdeA_ z<^I*;Jq=J^@5sL*Uu4F@zt6P6Wj|J^qgviPUIIBa(asH;k=Hbn`_S%UR898!R!iK@ z2Ti8bOVI9f=W%;G4FiIQmZ6p#dLMhQyYXssgi@x`Jc8jOD_$BcMnPIb(GX2Oe=Ydo zjk>lpvl(XUtaud=Pei~^@6LUpMyU}k(*oa{tHCG4UMn#*V%;02X}oo|{8`hzM&kA> zE81J95NyyjsP`40^cwfKLKV`Jq2B#KJ1P>@r3v~9(A;&hT@S`I)px&5ode)Gv5RC| z1*BgOog`}F(PzLE2%gp`IOZOLVQ#28(gsZ5K2K2}CBgSL7ycyQF;1>2`E?+er4}8; zO|cQaqK*r)t4)Ky7q~*NJ+Cy6#S@ zASa-y<}l`D%MoA>5Bb>E1EW*BY|xZ^g`3?5=(GmGQfon1*_Upm&(PS@yig&ac{9VR zD(2#VfA$i$3}4|hIcyUAVs;XRniapW{$b*Jw3!d8x@}8VTY+p#*Y2-*3|ooV){$Kf zOPVOJp!lmFF?nsL*XW^2Aw5Z&#vAIQREe6n;Y~4lH#eJtn)GqRH3PBqUkO*DjDVRH zub?K8Icc{ApJ!6GJy=YF0H*q%oo`nw`GfqL?l~Mirw_<&l~=en2x%UiiB!9J(X9P`I*S0Hn7{NzC5^YYhxa`}#VTELojRWaZtzk=s{! z%f!Bx&*ZiT+S~?+sM#2G4NCok-A?{Es?5j+9am=K)<6DxB=F5MA^ro9jS!KI4q5(v zGQ#tF^nPlECrl*#1xj9ty4sfe^gPFJXSDtrM%7YxGL{_z0#Z>%LR6KZRkS$$Z{6@9 zm*rfRC}52fm9i1Z1h^J|4LtqR)+Yc4;&8YOzT&?8HfK;tWkW3N>@S+gA&y&*`0OG7 zqbYBbO=j}|qw{U=jN=AZm5;;4sO1y0ngGUsk?qSWq-#Ud#2S@U`lR!U zv_&yR$$6@Tl5<^AKJ>k398Moj3LUGv1(v(JoPeMxu{DyeJMU281AI~6$Fe3KU?1Y+ z{u&MQ{dwGv-S*2Kx%L&#-ER{1ISnY9PS4`^J>W+^zg@e26Z!7Lr+@tOL*H-L`3?Wz z`apu{fu~>Uy-2vOA^LaU-RlA&-iG=SWdGUr_itP+QjfhE5p@6bkEhdU&@a!})&u@I zvY+kA9$y!@7jyT2j9|G6<5=Mxf<`219)j-qUp4=4y`VpO2zu?SpU6sJeH}qJSx(oI zePulnw13wS`C785AglGXj}wjPe=PF%sZe0w^+y)EQVPHWZr*qA`m;V~k--b%bVJb1 zQ0*VG+uw2#c{`G;)Uqu9!=CSGhr;s`@U7V;#yS{DFe2qGg3;ibe-qPNEXT>pQ3 z_^S1kiUjoNAK(6*7M{RUs)4)R8pm^qsF+tM+~rioZh4Xj+(V=i?;qXtBth@*3iH;L zzPf>H*WWPy8-eDZTgco(xmxU>(g3YWMn)j+hkxqzvRKn&%0C;BzgzK{EI^s3 zTz`-_?p%0fC$(97)Q_E=?)*ivs8m1{BYz*4k6mxo=;HOJQAXzqc+o)#HIB*B?PQj}Y(QrU?!b}+05 z-!rhs9@m^&g2%DXf`kwAKgro%Xy9^#|ESv+nQ?j3?VlOvFLjWbPU~6<>s!mesB~t@ zM)%J?Q+~aLWZmEE!N6YsIXv=*Iz6O*2->au@h6jzo8cJy{g({qbvB(z1v0xv!L}2I zj=ck+)dlAkk=_$w#Kf6g7UN%f@oF8G1D6^&?8GF}<+3xd;k&zs4+sv4YUB8-UNwhh zD;SA*^}{Rg(*U8m zj`=YtZGXUmh~NiDi@-f)6}~IQmlwBEUe4}a>soE=@E9dN6qY}69ypU``f{;A2%hP7 z*~7<+onPV4?5_H>T8c4>w25{2c0fdG9m5t~Iyo=1FJZYk+;?2t+=WFp59Hs~LTIoR zYX^sR=q2c_apwuL3<~P!j|CU=>{nJ2XR5oD)T#YY5`EdHRPzhef2svAwii^PCz>by z$?Ge}xt*|3*SX%{D)aSZ(y(GT}R(#H20cwh2Qa(DjpwuXn!cHcc8!C zPlzAM$N<{NPB&)TJmYXF-}6aYbvM_#zr|5OU5_R_IGDo*qb7Vi;WOMj{a7jCvJE&> zLQ-C6H!(P=egW7FP$L=ZL+skKx~cltruWDq#Cc0?PQBT4K#XcI>T))PixDqPE-N~u zy4*UC=GE!^!E^RJ^sKW=%ii-_cPjT}?5#>4&t{cm4-F7@FMaXQ zL~^H)PLGTfGF<9;_A1JJOg_12?zGuV{v>kMvydcWgBml4d8H>rt;BYu0gAk@?{TiD7}!hMF>l(+>d>e&&x5vUB2=S20|Lpikh zG?CqW(LKgwWn6=-=P%Rq2Ow7RvKyP-Wl*?|Uc#N%888PdhI-vBG%~kw| zt!E+y%oVZl4`U_|%3|J7zhC7a*h>Dpmo-13__ijtFh&2nnXpc~cxvMJ!2%&~)<>HY zOTWt6)x{U$3q^^i%e6o5o(uB8_jme8*(0u4_u9nd06f~PUz?@`TT|OfX34$SlPZW% zHdqVVw<}1RDxz~J9|~LN%l+8iA-f(|SP9zARN&$NuonsVm$(r^bhHP?9SsMGE4r@z znz2e7Rd>mF`%jyqj&l7o&OHs-0R`W$3w^wff<#*Yr;h&Zoza9VMMvTF7AWDMlRy?R zoG;zt|SgTMyq z(@hoc@z4mDs#A90LdhRzC3N#uxRTAl+>5PdqiSPJN>$S5Gn*Oj2c9US3T2$*`2}{$ zWD+Z_(O+(rX04;R-U1nim|T_gP&dQdWI?hUnxp&-5J8orxMiCPT$ep`vg6!-W6F+l z9@VPa(}56e_}QMUp4#@C@pTc}gPrRVgZoYcyUj~FrD)UQ%pX;5L>%oSjc>%W+8x@J zDHx3#C-p3{2`HW^?sbznLagI|Sh=q0cGbUW>dP<&!KbyBfMJ!>h=tMvD}Dxz)&{7@$v76ODb8aPF|1 za)lHutWGae@#n-|`&>X!ubrzLg+41YHebof_o^V|B-#lY)W3iVV{$$X=dFP4dauS* zR?6jQO+zx)mo0|*mbnqY5Dmzb;t2c%OuO_M9R1z?xHeti-z zH@sFm>V@VD8$99%_tujJ&Pc(P56mh}%;^~49?q$#hR%s>#LKyHzTR3**aRPEqHV{g zviCwM(ioj4#cFms9etC2Mk-ygf5?8$l=;*4egfV4aQpnubnV3wcj0W2%oa8CPz`T1 zr0)QlI06+Nw-{}mGoOum^}C;Jnu*%e!YoB3Ve2G3pI8W1&pVZ>^pD5`tFp7{9xvpI zY-G4NkMOFsUHdbs&`>N>zGlivtzXdAm&vUK%}bOg)kWeD*G|`v5}-KmC5(JF7_Wx7 zrgJ>3hILZKy>=zN|CMg`F*n;tjQFi5cwhZZ^=A&uj8%H3JPd7^hf9o;E}&KIuJ zVrT`6V`Dj44JYn<1+G_1*We7j#PgWOok5Rvn%87VIm;+Z5+mcXTv6cgdHCX_Xzz1u zflWLYI|b1Vx(9SYJ|ut>#SykDAP)LH!&;sdb^ku*>2qrP;iY1dMS&aSkO;TW0V&-&3eEh7&v+*LVR(Src{Du$KvcY&qc2lsD z>Dj`HCbrfe%|t}T>)GBUFvtvM9VwmD=0d(RWWC;o%o(@iT9hY=9se}u(PFx~w9OLu zL-`=FH>3ZQb?Apd=Yjy5(2B!u40eX;kB49U_8X*m3K>`}ty}S_#ub;dDx2oXts45# zwiz@_Uyp{cMD1+{7JikS;)RC)*o4|gKxF9$;qN6Xp~d1wG4pG|l5cnGpNhPl<*on@Mp-M^j{EM1ebS}f^^*5*i5;Tnh~7?jZy zoxAU7Md^n$p3I`|5!dQ7T0EZ5pQpN;v*{z9R?yly@56_o%sgR1!sbE7)~4P}W)$+2 zE+hQ@S9)F+rP)xNx0)}P8uC?au_wzjSi^h|eZ(i^QH6A1)~q90a8PG&)t!7~jWCa% z;bV{(o1IQ&n!iLkd2era3u@ARXc5>w`suk zjO3UdpYhccLLd_dbLnI2`860fMJUX+>~E0YT>em*I-fVyuMvHY8D5jSrkmFDGk1u# z29m1Y)IYNkH)_<+!Z}SZ8WueU-s}{QQ{hvol2hYl(mwbStr7fcs|1J5WNle2=6ryncAlEf^->ThvQ*I zFZ$K}_*W~f!cJJUBF#aREl-PIJb@5^-6$(wul@Flb)=rBGFFu%n_RI`(a~$QBSdbk zU`AwRF+62=Z~*6R{>K!(@q86~c}xt+6isXzrMFK+bmR?EnA2tqgJ{%YT=KEhn3H1Z zduIxkDaq9?nz+)?*XQ0At%sq3a(k2_ZzHtm6jh17KEWimPEzB4t92*jWlfM*D?5Yp z$+EX0cuF5&6dl^QlTXH(ccRVvxDNyDzdg z+H2jM&xQluv@cP1u&UoRN?O%X^o=6jl*YzEX?-C zk5up`a+lj5vScT>HDR-q)_u3lo_&wgx{%yZT5xh)Q+$PYF+KGB>d8cCa_-2t_~|q` z1J>j8uD=G{P_RU=FL>`9Nu$b^VoE-ZO)$jY<0CfT(-t;0#%VgT8L@>M580;&8gTN^n5qwlNv^AVf*T9PRieJVNlvXW7MkKX%LZzt02ZV5tpWsJ8+8SD*hJdp7;|} zF4b3=o?}{fq;|9KSJH~~q%{|o5>SkV_2K>HL;gIPJCFDQ&}8droe}Md zbad9F=FWsSvn)_L+pt_5T!hUfK5LgvJ4|4b^{>Yy zmOQYEZeJ?SfNSBh?yky$mG^zB*)<<@^#{Aj zdH6~^gk@rN#+2n8sOw8V0k%&BXvjkDz4KVm)aKwHs0yc>VOqt(aYD|y!lZM>nL*u$ z`f5UmIGvT~owpRYagJGmBUtq?!s^!%UuD=qemyc|cd^jzl<;v1Ys)ArKr4rY zam^Fm(fBjtw6D=l8%FJ{DQT}3Ac2auK;Y1D61*48eI(*E(OUMk{Y}j(cKFzJO8;=V zY`w=H%u4vCYDn8m_2eJ`)39$n~*L8GI9bB z^{OR9uJXb5Z*kpv;_VTu7{gR|PBbAM!K&2$f|X41s4z!gUZJ8w=)=+t_9Sqx2wKx9 z|144739%i%(S_8*qs0$)^)+nqxYe$X=O5DX%iz1B`v+y>L`Kut(-eome`V|rv5Nu$ z%mKVLd-r@Uc-D|ey3B+}vyJ{y$*%U9?bf~#pAqO~$puNbz=?y2ya3KOy*PCspxLdyu@=BrB*kGQ=n zb6jh-qjvVWfv_N7t&~0C1R@fCmdkC)a*e~%TG{lykE2AcW9e5wn8i7#s~4N~YE?_m zb^KL^Fgk7XWE5%+@*cjhGx#e~e-A|J22y`T>NFVV(r4|t>-8xyH|BQC4&JjS`J$uq z>rsWeSinTXAUfw5}9GAqhI%Z~bu zHtt>&g^VmtMSU*#q)eRP{H|8@6~+olWAJRv-8k=OE*Vps zK9;2rivG^%=T5RX7pB-5EQr^wO!B&@+R3zI>9hFIENbqA;U%~H4JjLL;JnS5@WHGKy4ZhfBu_H7l_@ch2mc2w!1m+@jh zaR+MvokM{?kfcrQ-Fw1(u}A6IA^hPCi6J6-+4xpr}|P12KOe8Zx(VT>`Z z)$D9>&u+h|o4osR|Bhv$yW$7_X<4#3soYEuC~r+f=2kriaZ*!AYc^X3;ER$!d93mF zB1PTak~_H=YVpu~_4@g2%w{1c`Yt8iv{jK!(+XxSDx`a}yoO>zhR?NjYT*)xv|Q?L z(M@jaogKsKzJrqgF_)}|TETWzz~n_LG+SQXjRuuoXYRR9nYeSVGs#ZJG-;Kq?8U(L z?l}bx4w{Vf*KF;H=i?W&eyY#CD4|e`Ar-9SuWNO}AA%}nKFjTWmA-iMalNnQs|(q1 ziH@?hS*-Fnw`8+&pFIBTcwvUG5}yB}oLg|`%v}Ke;IjGxzOiC2h`Yt%xcOXlH#V|< zf6nRUT}3>9p6C%L%|m6wE$kv_0&0$bb5!baC*`4K(5&VeejG?Xa>YmG>ccl@Q8i&IOE(Q{@kwzM$U3HP*upSRh%9*K;bWy0R z^scUhZj4w?W|SQksSdpbI;OBp8&>QLE(MU<=O#ZDrhVT|1EEojR&NO+vaL853jix0 zd2)J|5u3x(^|KaIls$yB(h-n;AHg=$<15#W&i zu}`0B3tZ+>D=c(<$Nd>o1ld@)Rc4>ru<)v4E(+c>T(_3mE~7AaTv(9m{GJ-j-Px)1 zBNcQh+#JU`9}#{2Q|FMohygBZZx9ZBU5Nsx&y)K(@O_1+RLnB?`L+SP(50h?P%70L zx|8+Gh#?*79b1k&dWP0R4Xd+}sj|)4m=*u|-c1UC+B%_<`8n42wm=?UI*9W|J~VlR zJu1>0Z*De8!!C;!$ahcZ3+MK^BfVC_`3%*LGxH4xW4};iL5l}}217TIhk^j2%j_aM zmE>n1Zyr7bWqSzcHq(S}uFe2D5i13O)~uTy4$M^S%;ih*Ll`D*)vc{9@k8FgWAdv@ z_c7GSe3Z;NrKl7Tq3-VP78Z7NYg_-!6aTqcw2y9?9UdOe)~W-qRZ|+zsX`qzraN5_8Nf|sy?l+z-Dx{Wy>;ftwJJa? z65Kt(!GV`klWw9#`RADaoRuM~n>iO(zGkgsPQmuf7wwk@A*!gih!$QE!zDu+y-7%$ zngT~^hkgbYzb*#gIzKISMmbegpPS7P5Zu52fQX0~Kc~JO@NAt12&Eij!!@1IKDDt) zxVnw&-!(F!s_#D}eLIWnAJ*qjThRCM`r6UamSl!96?0Cf6N(Vt(XEX6tDlwSzI?VT zg0^-RVxG}vB8BDft{YXzD6HHGan*#oc6%v-(f}^}p!}aZ<%eBmp|~8sWC2Slq|9z%fKo1dNF$9A zp)xZ8>E6BMwi1Dm%7jp6YeJZ%RI?TMY7d-6cxj}cQ!&$DqrBR~esTujmo6XKwBn@` z^>t;w;`bj-+Zqj&@*(Rx<7M_qIjn9ApVu4sNDGqQ%2a0ALUUw}OjWDtDmt+OqVk|i zlOh)r_bBWsn_MK^*O%+ePsw8EaA}Lg>SF|(Df$)=_I3M^bf1IiKHuJ@B8ei;bt&NN5+Zs6 zTH`5vGtM7A^?qlRTwNMu9pWb#GSUOsH#xyeQbi5dG_6{2Y zNa_9eJcatBYXPSuZ&-;Po0Kc)?SvYBldUcW!Iov2yoIytidE?LwG)!@$$S5%nfPJ`M{HXJ@X z3KgeU+3n0b=i_*jPd_0=R`sSC{|ynEExu|cqgp@hAQmAQrX>LsGRTs>AY~M~S|J}Y zodaNn+8rUjah#3)!lhJ!lNX!y!8rbU6L)Gsn>+WGNoDA?YG2J#<~2{HJDy<%a5b(( z@=)~{`Xz9x27VYGvP97E&O4yt*gHb*5AJ7c$K)DyLw%D)Gbmk*R(f&5$sRPfxT&Qg;i{1Nc|(@A3hRan_;vSjhpMW+_c&AE+ANzzn%{p-(ccF)=u zHPU*PfnA_puVXo$8v+@h)#5202u#V+g(VwGgnl$BMCy;pId;M3PNWkyDP>ssXX}4T z!|eeO+YVV#4eZUt>bQnXxuq_&3s82}3EdH0NEQ zap*H^GxfusAZub1EldI!aXqccCoSq6!qJ$nd-=-|V)$Yw>1&6c1L9akDPZCS5 z;H&+rwi!@b_=M2^z{a^~Gv=W=75oAtXO@;h0Q(hozZPLak_pPuYetCCg(wr8w5k(} zvBv2G$2r-w^Ww11Nm1JUT0KYR{+aD&GqRMNqpoIc&W22=4IbJDtZJEbv}4;K(+xsN=DFJcxU~Y6wJ{jUZ40Xp{2o5P_QNavvS?5L z_yh%bA!H#P7S3gePvjCJl{UKcP5;J?s?n@+{N4Ee)h#Q496bLo<*S#K!1sVMToLp5 zczLO-b?iM*BJ!`=D2PPmLJ*q-ev3{1MOcAPxuPI{wN=Xc=Gy;78#(FjL03EIrwc%B z;eSCjE~{Yg0fo&r2QBTK!$0dNe)T*bzNPXALyEuJFv@~!PIvSJAN{71LX1NeE)7iFBNsD_UUm1?JRWD99&+eWnxI|HKsGbxeQ^n!Y`T8rc*=;=P#W$C}A zbdmcj>Wmr{2Zy7xHZ~g7j)zL%+1iE%ph2M72QhdryLV3A|v#m0)ImEc8!_{xBSlxyL%K@zXnb}OucpQ8mc3fNc zmsR=IV7Wa2t!~ziPfptHF9S(o{cI&m-z<36Pc)A%>F{A(fO_L8Nt2);u*TKw92^`> zOntN4|4n!eZf=0sfyeQQ@IEYXl6aA&P#l4%LF8RREy5zL!XhFf!ouD@E(o)?Vj3nc zX1V(@#6YIksaV{2Kc!3{M1UcbkeIOx?*TFXKH_CBd-_tZt_~|?LXD61w$VIBqy7#e z5#5cpbUTT5p61W5j3FVPXPK#DKy(rU+4DMi)7~L$bOKdqVsgTDHvkOX@!^*|6-3l{ zSJ5X7$Vn487Ytb&NdLWE)QAf7+T0k+r_1Wt*5PXV_)wN+r-}GtXJ8Z@92hiV$jfcO z^6Ij$@G_73tL-7MJ&PH41|%5K|AH7;go+jxGW@z+3v7`Pb@c3gcJ}M{^$mD!GJYUG`#79P)j)PRSb`gLXOQ`R!B-Q{oe)#^@zn)Axirn%0Bx7Y#2bb(E%(2 z7+qkHerxNB2_%rSIz%>2po#lAg0;hwdI;>{!0o!r9Svv^7GO?-))#bjQ%Onx2U5UJ zNI06WRXaRtXKwNFV{u!&)R*XDeBO%~5-)TiK;?`x=OPdtiD8Ys5@^6K@gxZx$r(n3 zPbl0K%^aML>FS=Dal3F$ZZn}zdX*vSmA``M{7@3iFdAUKp#v1IxiA6}?IJV8hM2Si+d7Tr{>Mbh)oJWAFD91UF=$6+`Oe2uzQy`P{ z1>{g}fe5)DO1caiz7YLm-Ef4NqAYJ~c+lxRX3D9bii@2hCuZ_gb$D$|gIVx%-j zDgNmR50`hxlkHF3a16+;VesWu}dHij;K<&Xj zojd)G2Zz|tjveM;ksZ}h^|rai(Wdd^u&~B)G%aXT%olCY>jORJP0Cqh#`|bO6KZIF zqH|o}4Yg45betMzGS`px0aL?V{7&3ezOrP74-v@WOCcZzBOZ_{Buo5A*7vCzMn2uMmdoI+feg`ITVNVAD zK&Qew)gfd1DLw&SB=@{UuGVH*kME$v3?Cr{F{I6O@!mOBrv#r%wY_r#{#?zwg@UCc z*vWJ|uoykTT8xQU0P4U=MULUW&!c0I=)zv-;Hcp5)tfuLH!?#?*%M!VUy`*QcT6&* ztqlS7mGuB9;e;X!r9?kk<>KZ?qX==v@g&I}T23`>&f}9dlM;F$W)c;mGV~(U%M(J6 zA=I&5x07j30H?X<=_#|_uEIxaNQPkH-bY?ivjqO0-0pfkQh@2DiEvy}ae8Sbb~YAi z+F+!Hi-A;Km?;MUQ_z`SiVeU1n8H9o)LxaHvt2{^Fi9Ep{qfXuySg&|u{BL9=0~~2 zKMQt#iIP6)Vt&IXiuP)kwYl0rN1%4Q_YatMIRHn04N}HU~^8n%aB~niwg4do5`a0-=qbb zqL>pUm}iLk&i^dc`_;(|z55Qk4Y@}Jm)r~!a+exyicX^p;95kh(L!Ul zj9V>UW(dB|kn?2sR_eP{lhfQ)R6>K}JaD);y&CMOohs9h{s_B;G6t}@X7v{y^DUIT zyKidUlg<$2k~j6mzGI7ksEz{dXYR^8SK}We$k)VPCq_%*9b<_pu9jd9QJ^W+UTDzP z5@p6+gC1;R&v{WG3(4iAzNwv8UZq=RPav+fUS^oQ(TsZw>E=+k=xuZ+=BUHS_2Ls_u*|L}X2A>2$T1cqohUElK4)(0%hG zUHC=S05N?(^%`c2`Bwq&uaCGne&Z{@CKKn}xEeXQ8x0`1K+$MP7ze`RxgOs(?ut9Wc1>=ZGmb$q=57Jp_s&?)eW1@+%>V zSeddfhsyk#(h}i;XQXcd4bfR4*RM+7AFiy&)@tnDiXqpF64UBL_8u`@B-no%-&42pGQe< zi}xm3AIXa|Pk8QtAk+co2IcSNPck##?$L8}azZ*g7xDIKZEXRv1&ku=5Kz~+rYnf$4FyO`T7l6qBET{g5B-|Y6c0Oat@W6|cTt99iCavmUk0bY>K99%oHMV$S2p`{H1k6mP>k^=9P zj~2{+UB_SDCMLqe1Tv*PRWDN^VG&{Pt=a8*r(*y&0Wzs-+f6k9o&l1Mgm!?%-9?-~ zzkmAjW?g3W`9!JlB_5E(VTfz%&&q=#8vFUTqcpo}kw0z>li{)2VCDfwpzY{_@ii3nHx zaS4ONfJGNV8^86h?mu`#yp{d%=NkO_;kz@snvwyY{PPk9p4S}92uRGY-@G}J*-O**;ldAA1AZUPM=wD=`ET9!5jug&lg#UeRu2mdtn2A06X11pM|?R&GnXXStxhTr>o(%r&iHL z3SBe+M)FvG@zVv28iH`gsp6b05W_jxxVZS(lte|uQ-G>~TpyB^o`kq13WSK;)Av+6 zOXI~r4a!Sa%-%~R0N-D)wY9wFqAC$Vk_;TB3j9=GfA$r?qX8?l2NcdQuP3*~J~3X- zh?(|6w7B*crz_i2b-Wx*vTc_D-JBjUKsxX5#XXOceU%aJ92Pgl93>aXLQBGA`8N8U zpxYR@N4S8^5(oh*;sAd#TC>v_Us{5X08VxL9rt8S2I5$nnOR<54&Ry%1!`w5Uj>|m z7I1^h$-2zp)ggoSom&^DN$|>?;{9s#`daJrBfx9uyO2w#9(JEIy2I1YR8YMr#z8+r6&`F7+6GnD+4jtjSaitiork zXRksv{{)eMmnFYc!Vl{QHa7tv9w;e86C$Fx2ao~McG7{edOc_52$veTi(~KIGv64J zm&AJrk`m-Dv~R(mmW_fJqC0Pbzuo@BV??}%s4$m_@mtjG(EkEE6oJO^e7KC5_p)5t z5Y@DPs`-{fEDMP!iXxEUPS&N-ZA(3R)QhM(U>~?Y2S_t;2^Ak9kmRRS)Ju>pc)}+G z22IFxWR8K3W+ya+jY9gAc<;pv5iodN%ml0Bqjb*YZ zId*FX6%)H<^VHNUe?{BBYR&&2R*#S;xQni^DcV$FrrsJ^rr$V;qOtxgBv6Ze@xswIx=$ue=1+^F2vdR&9_2D7tO zR?l)>MhQ&lwgze5tcBHi0Cx}((Qb-N^A&3&EywNT>>Ic|R1nL?OH6gv^e z;7knMSbaYP%&%F|ml9>0U}CmTN6@SHGiennoJ^A|zUxm1U0ZgcjGY{8o?hWU9O29*LNoF zV4^$mGMgG7ttQV6)X+AE$NSIL@^SZZRZa%;MCz!_G6o!WyF$PPbkr6!5wWPRBWyX3 z;jmeq7^k|iQZljK#&i1MBEQbYBs1QFs7QOka`q%H1+|NE8C)kd0Cc0c?KD82d2Ff` zLN@@SRUc4!Av6(WDy?NPXU@NN+pYiwOo*mrHgnuw8Q>;K0pC0Q5-1ShiAz-}Vd30= zR(>bRbj_D0(Tv+pXM5SeBkBAqFyW~w5FIUR0!ZqD+p=~Rd)sfQr1fho=cpcsm3-v@ zj@q1526;DD>D(#OU}0hM-g$1K*1*FzokK~hLVuF7J^=m1+haR{p1zQ|gb+e**NM$7 z(|o6;bHqwdkb{C3fI=J5k<^>+XK)~E)i|vqF}CV@(~wSMH-!Rh4xFY9qcOlPe&Td6 zYS#D;na4sxQXSzd3;<~w!7C9moID+$q-EdWS2milJj+|XE>D>^!LUt?rncC%mGpVm zKV|C@GdjV@*t4JWZhE$eW4M$&5?~INGy%bfj6VOxP1hqkA4@fFRVgb)V&ToV=MgTm%R3jZLcf=(!jY30ugE z`?1>`=RTzpRtpmWT4RJX+K?*8HMZ^S&@)Ayju^PTTdVb1n}Li?xw~g&V4FUqdj(IZ;qeIiQdlnY4IR+dP#| zY6MEF6}S?Sh1B>b&KlJ?Ufj) z-Z^%OxBbK0JvHPDOdNED2st9*LdFKocf4gcMc7-<#AQ!{WZVzZ1f+3h%J&(oQ_{bF zpciIGNg=)pTi>wen#&Jpy~g*>EDoFE_F4&IoeJaL`g+e8e$!i%~$R#|E@ML7h?!2yvhtr#`6 zOW;T^!I@_-t!A4nHxun0Xo!QoNWg@*S2}We`UoTm);LZlKcb}c!!5%3vAvOEBu)H2 zL^QAlNXpPfeK5pibl9n2UVFZYDTRun+~+^iBQ8mdvbB467T0Sq^n)i!;YFml5So@y zvS@moQof0o@LLKm&udyk!yY|J)lFsE&+;@#ZOh_$s~{`J#!<~hwIA;<+4((j$;d3; zoXYe$Hg$3ka;*|Op}R6m%(A@12~mZ#c6h4C``p@a)-HOhbUNZShVa^fU0-s(QL;#__;Er_r+Ha2wK{6-$=tdX#%c;JIXa7k zc$-FB>VT(>(PW7oH`(L%j&Dhf)YVzf&V@ljeS8uo^aEkC5V`ZWaVI3aRxFIM9~Ajpn~9 zS{teZYNPqQNU(o5YGT0YmN4Yr>)bJ{m{Ky+;Oh|8K|=*L{wjnCiG*`GuKhp(xHLpv z1Y}ljxL?PzsmR8Y)GbREp|)dXa+)35NLhA~|GUbX{pZlJ>^iv=d57?NtAMnW-$-S< zcNOb46vR@EDGWY=@F-&E-+}39HMnUXJCSZMhw-a?+_P&{^GJBzaV7~=20n0A=y)e5 zpJ$TZBA(t@LONTXnoktB6jm_q9*C4u+9QrSd%8AB>;6@_rqh4cRj$+r@`kBS#>wKr zl^jJDz#d+TI;DRzyBj2yE;n~w3Baz2da^{{?zIBrygw+~xf7S@+CT(HgUnDOMX@gw4QpO*y{swL8EWr!eD z%x?xzCdiR+HW{n|qM*@_baz}}Shdr&Xs1+nv+y;}X7}k~$DNy#Rtl?gpbbkNw9; zUSn^qUCC~j0x)ijOtR>~`c6#t26gDR_MDoSDg86;cPg?}5o(vz*83gsv3ET+xX@0C z*ct4Um2Mw$!Zs8k5@|pQLP4uph2#UY(#0UIhxn_MY?c=en7YR=rSF25*9(0}1|R0l zFx-j&9BVQ0P3~GseKaroF#fUA*S|UKjxAfT2Z@`iIb4_8y0gS0l<{Ib>)wKxIvv~? zzPi({!-iv)DwnPm5LJ6JF}dx6M^5lkOJ$Rx-wTIMb=O?|m@u%Ln_2=jHH`NvQ13n4 zcS|PL%!a*f$X8_g#*EVAl5)8l*ws=V(Q{ll)-&TZYh*q{JUjC!LR<(-mN{l5xUB9S z{+iZ7W0`JV^Q?QYt4rP47bno8&#vr{XEv_R(5ivo&K7n1hP9aHs>EUzk?l(y75AO^ zY55%Hs^>htCD-S=C2$rG`A}_*#5PmY`JWOX!xx+ zbn3)(j;CaH(ZSQaW9Mlb4E^g_i{vPfdXc8E}H3YK8j>229K9MWg~- z1%yc^0k&`k+e-54RI>M!VPbcXR(e(MqT?u6=vXiN!n30 zylojzb=zwN`Ad3~I9A^@>u*``@{j7mL6wecJhr%3mKelxuB8^<**kMaR`y}w?Y#6O83XVq$xy5HSY6+?{|;+E}{27(bZjIApqX( zt#hfGsGBA9+sl*#0yw3*u;&?v#VpM)VZb>fIeE$i9>8{_Fp-9>ovzP_tD8PY=j~5@ za^5S{jc2^BQv;g)K=#!>47l*?up_b+`{nBe>qJaW4SQju(;gXGn?Ia!#w;CI_I5st z+o^th+@&C1R3e#i=n;kMEpv2Ls_Le8c@&~rmTWWv2<;qK2bYhu<#&4mx-)?Ur%3K$ z=(c@--QX)U)Ml>b`NL`%)^eXU0g=f{tm+Y;IN?Q^c>5if=17nJ(c;x3*kdYH%xi1G5o{gUks{oZ6E06I}X!Yi{m|IR5eDui}29 zdMj6udTw|)z&dowrg=MsrwnI(Y8-8pC96CY4~#rc_CMU1v2NxhFl_}pWYOAV{w?|Z z>FeImf1|eEAeo#hT|At?Prd8!TxcwGO1|ts z6-q-52SVfytE4?~?yC~#r#HTh?aA1t7YWM<6(f-$Lr>Y736Q#t24D}mhsr*Bj7!O> znnq#ZpXPcjX1TK86Esp!FtxYwcCGSCPx9tlsqm0$loH`pxi^3U%Bp^2*``6{Bs(*e25PVUCYl&N10C)ARqM1jMVb3DD zZMCfpfPK|nT)^?1#V(U&f%xooH7}dI!T+TaH0{%RLA~n+9GZkiPajoxQMF(WskS}* z3Y6Lcq!&O(MfS$pGB8ieEIYDFF7?pC3);_w*bcT@Htyo_?kGf3TY| z_c$PR>FG-V_vqy0q-?NaH|y%Oa2exHl*m zHzfopj0b)~=3XMEW4;xQH*t_nfqCPlcKh~;8)=6oJOF}n4_<7?C}Bll;GT6TT^e%0 z$|9oc;l(<=e=`sLEqKa z9@)R*hGO$8Vzl5LG-gv1XaY8v&st#v}_LpwD7j?i5`YAetkBt%Z( zrua3n5KZ=$;3lo3*87&Mg4C}J?fG%@@W!%@oP^%O}IAn`hMKk}NX~Icf{0`V z$qYkAa?UgG_PDouKj$1c-{0?D>;2=Jg?IPeU0q#uT~*b6`)sdN78=#(adFo5tpyqI zr)3}ev&#Fa&FtiA_tHETsG~UJNK*}#v9QnnW=qIcr2~6+4?Fw)OWiwbR$Q_X)gnxr zzu;>%rrp=i8ZfKBL&NbQc!dM9R=~^Xk(!y7Y(z&0UicgGr93x-)0_Pw_1Iq@?#lwh z75?2u6w@XoWNYjA{Cvk*UyW$oz^)e)@;;mnf7}xy1MOsHaz5V8hn<@70sDX~v2oBC zvV$HTF3nOlZt!wF-d!oKH+ZD>NNwi1?R?<%6km{UuY$lkEE0BjS zFDB0TRph?Zn|&(=dG6$UyxWx+pUjwW#sLm4!u<}*RLDUKDKa&=&f~ZiWf3tzhkD{m zaiVcjaScxq8f|=Ub8ctpR??k>Gwc}GjVkxG@p%HO6cJw(YPgiI(AYRBm1_10k!(b~ zk54S@TPIe)+^I7#QNjXEAs$|b`#&-)l~d)QwnzNm5>`Z*ri3KUu!N0O*sH#qZOwgs zk0WX0B&qs!qHtujWub3vKJ+j}WwV`bRvdZ!;i*2(hZCPO*6O!lG6}s^$2PJ=vS-~i z!Eqnh&!u@WFZp-Gw+6)<^t!(ro!&DCBY1K=`s)s<98~74Pnq~T;mSY_#7=(bzl43O zx^$TIAb#XDh5_uU_KkPM3yM|-erd4avlS-p1?=0-7F8X+4AhqI`^iovWAe%Is?d8}(mT_{@PubuVxzSy2vNCi73Q@HfrGuZ-uySf9w8-OM*$v+h{h;mu1A&V0<5)(61wDg=C;U$3cE5<>AZvBvg8u=r}R0 zNZ-Q&zr!p)d4|F$L#3&CCv*og8MR1tlFgcXFYACcFFgr)9G+R`5xqVn=Q^X}TPp9A zr_y8J0T;*lB|xnxap4X3>{M1X+pW}y@Izb}#lf_#NHaxqf zq9-g@As-{Rz^175@-Kh+Z%E~@Ep_caHB)aQ&`1f{h-5k`D7j?3!!xzp{jzHF@)DHk zZI0ZnY*asG8nR&RFU=N0`?f;{`V8cYof4k5?dC+$CTB3h%a)R2zW?N!vtT{F_Q)R% zeD5Cxbi2UkJ(AIeKOh^Sw3>$c8u1PoUaUp5kzq7#%sxC9Pj5T zEq>fdrj4Y9I6V$K{RWfAMDK{JeUCFLv1KFP`;+l9jw_l3I9!oU&Aqot$hfgp(loMp zw@A`ed*sb?ZGjS5BWm)F92(dg7 zO3iq5`EbD1t?=h7iV15xGYa8dg@>o~zTZl&DM;poA^l=derKakH)$8i4 zOHs59hpm1&EKwvR;cBc3k+fM^sX@!~t|&87V6tD!cb%p~CW`=QM-Sqd9KmJwt|J{!MS^lp)-ybK&YH*&81Nh(n zYZm@@Gyi9J1{csKR??SG3vvNm_y30XKji$|N&c3exSip|IzJVq{WfL|0osWwil&LnUW&vwfzfVItQbMdVaBn+Nxj4 zv&f|;zmwsfc0X^Cb{afmFSVIMcEEG!u&=LweieSQodtJa6BQPLurfJ& zR*y!vfwA9^OMGD(sEgKPa_K zAs&8WWR@lyk$RsU;9O+A=hoE0)>=<@J!&eX!Dn~4)$gz`Ib8Ixl0Xc#PT;qGR)~mP zX96jTTJL^L1wmzrp=V2bFg<{djD-C#*bv}Y3WiI8q_fkdfuf%t_+|ra3$V3uy~oBN zl`jTb0)&adcd25>`{Cf!_`$EcXWb3g=zsCpV;+>IAMb-Y=PUwBpQA25KlEf!9J^6c z1H?O;o%mp3(;20XZoVNr8`}X0~1d@2%6TH8lmj+7tXfTTr4y5UDJm=th&|#dif95jFPQ*3rw>bEsmk3VQgGhT) z8^ENymIg`Z=Ucs`VLlI<-Jw^zLKLJHqr^^h1!rU!tD4X7i29%_)GSagudPGZgU`C~ z!=X>PF^Hb z+39V55$(6I`%Vf;RmizsMS4`rMr`U2cn2m*ow0!?K4Ja69vOavHQ_x8J?>BgBLj?{ z{Zw69+bjmJl&NWPM`Pq5fcr zK&;khLqg4LKg-l^%()%7ouMG>Ydb(SI}<6xBfe{8(aKJ?KL@OE6DYA}>U1mI0; zX3v?=I=8*7G68lKP)Cp5{f#Zlp7B~=h?B2xwDR{z;9rL)KWUv6p7}*o<-NBG2vch~ zrHJ>ThMHLr!DrU(=~7{)UYAJS(~>0sr%n@A9knqMC@WLRLJh7#G)zsV0AnV+rvr+hrD<*Cq+&KcTT0b;3kWU zbW#;vKp;kHGkTUCMX+zk+LPA4p38JnkvTF@Ix?eEH>1fDfT1o#k&oIlFgfRQ$%4i{ zZl_}8bN-?LMA?;6T&RpAUbtg#6G!sqUElgz_*z}6pO4nYf%a_9Tyr7RQgo{6L>EFm zAcpFxn%Um!6Kgmz&YjK&CTIPFEbA9b>--gRmQxpu<1%=IzigNtUlj51>Nd`;2vL!M^cYip#jt^;ieIm5Tfttots#^W9yhK1SWA8Q)&QknE8&GKVR|OeJ@%ibpjD zD^B0#9J2p0&^e!b*U2{S7B%@w1!gEf>8e9>zR6D5kIms*^DZTYBOc_4?9d_f)> z9Ul25!am!a5SCm5v?UiL9c^$tD>0aK`fJrJ7eE3+m%on~>! z|GVPDb3VFT$+Rn)jEstx=oJIZ!e1n57?;(rPM7NK*!a=Q+LRlgx~SqN7?}RIij^SR zi0W}*UOKQtjoihMfewP|oZdT(lUs#TcPXsY=xW2@0bgV{s&!`LD)%5hTE|jLh@Ty9 z{gphYJ81>+f45o%CAy0gcCt~ox5TbT&;m0)CgP%fBDL}J+roVRUID3R*#1SjlPxNV zl#H<|b|z=FIqj+4EZt)LLBtuy3pvjJyUg*8zqHS|d`>4C0QZXFF#l0`DC?qdug>ZI z=lu)tSR-9)MWQTJ!DKu4jPLHU%KbZ~o34~rtpCf*|F#|s9EX~JzAyLRLTESX0hh9? zFrCrd%d+IL*B36HPWqSAB(9<&w{B} zuOA@7i~P%VdpZ@{VND#4o*eb~Y8}IURqrxA0mMfPg*-U|7wcpdw9FRdr*RK6$2 zM}D;~^Zx~62s4#5*M5_Tq7^eVGD=EqfNagRY|Ue!ItUp19W0k(*pCnuWZMCN-bt$C zi8H}^QDXySe-cAj_Lcn(|4YajwUWpOP^yr^)vN67t(N!@6?2qY94YC%1G3&{W2zAF z8-UuAL_Hh=BZ0rzDz-S1Vw7rE{_eH2Ion6&hXU9W10**)*v0(z*8v3qz^v~w0qWEx0yyk4 z;l25fm`6L<8&H6c{he0;`3P1qp$@P%E5Ggx#wB8?)#m0@dUuGh_u)21YJgGU%*Y#&^-3TNy#(;xm5@5SBg53A(qq;5tK3#?MRoN@0BHOHm%AZHMfhGj}tc;+<#1+2hY zbJKH(WwQ|67CPO~ode(0>vYILlI9aAZYem`=5U=cw5k%|mcA1=pI`&?p@ckt4+g<085+)5E z=>WgypMitZ{rHKy3UKy$M1=8!6FT55xW6f`0t>%9)K-Zm`mU9dTHo!AR>3F=%#~M{ zZxSXETwQepyM&CEnx8eVf?5Sqv zl3=N4`h4PzB^P*^f}yx$iWfnS7`U*Em@vt5IRCcPi$>VRM{~07sqHtYIkf$)E?UY) zbTI`u!@6ayLCb;&2{?Gc_v?g7`qaL_%{G!@W6yJLzr@_{KutmzuZ~{`9301W&~jct zaVPEq;EaiV!eI1w1&TE>R~H_E(eJV4VNTH*JQR1V$4W2+Jr-%7kMJz4y#h|OHTGj? zk|fbf@n#XWJ8>{(TZR6}O%B=xe7A81ysXVN{>lyNBJA^@J#p*uLcuiT#m!Q*{Ic23 zW8o3K*K$nh)9@2${4H^=e9oG8jcM^fQiMsN7`1(?BJN1}B4d6Dus*tzIt!Z(PFn0gYuZ zw~{gsJ0>T=`dFm^8ObK?GJ`KJfv7W1_aL7J=1LB?`_%kvNgDoO`c;`PzWFGM!@-#l z6Dq8}E8UT_iRw7B_u`{|mVNyt+O1{|Hor*#*2^s%A*mQJMQZ*#7cdHz_~M&z1*Iw# z5CDiB3FJ=*XJsEd@Bh&y4IL1FZ}Xk2+;NlK#b5aLW4PbRQdE?2gI|U7 zE4vMMqlpTVB9Zf0Tf}5!Nj=90e2kM~l^OOG;59`K{{9i3$bKDtUpE`C`L&P*sE|Hl z&-duqK}pGXt0#Pr(mX}mNRdI@Bppi~r(1C-n%IhUfS^i1R?+5d!D8H00d2A(V3>=S zI=%wkQZ}Ld?5X*Em9=O5p4-5+D#0K=Xxq$mIUYIkUDcHkE$~Jg|A(K4Lw^+vU&N?X z9uXOFd^mYriGE)lgOr4pZ^zq?2&YS9i@7ua`x3vGaI~N5!7MXNCZJp6hOPvhK%>)1 zhAF;6?ReFPFi?G(=)IJqsL2G+MS;)onT;fSYYCI8#&;9864q5&Y#3oe{AnT%4j)$= z;SNW)7)8-)GmY8N0rl9W=6F3Zrl2bW<-WPZ?(vrb{$T&zd7xVX0<)1X`RiiOU$|r> z8zC~*5;plr(VQusnfgubHvOw0=afsl%8NPw>P?6l zekSW2*cwKgAbhjEnv?kAvnjVhONa7fC9zM7n;BC}LQ{UjmIhkt8f{^ICqI*l=^bX|}%*uyZ`Ch9}- zPv!!}5hKOX7 z8fjG)y0^1OK;6S3e7SLAEukH2J-G#V$$@3#x9Q5!h-61A8J76GP~)rwBhe|Id(rVm ziBsm!`hMugY_NcpsvsewOsZt_qyoH80X$u9RX97s{iY^ zKesO0#(O$V@+ta7F^1NSUb%Xun`VG#Elkv>p`AX9lOynpSLq^!2FYH#$(?cDor}A6 zysH6I?}CC`LoY+)B2}D|QQdoXhKh?J-9Zf@A7`CJD>%O%bi2|J4=s-}i-FTjU)swC zyjlS$@PA|?-T{+&a5RweVhQ&Jr_pR~q_wTLVlmjK*cPlDbPwDa@Z0tKoSV{a z*ft|fTutK<+Cgmr&n2d3=wzda`95*JsI=EAl?)i2;2-=!HQKjmIN&#?PU^ec(~!O9 z-7Lj8-Z~J!pE=apaN-Kd%e&1*c|AcImLbYeTT1;4Uw`G`S@vu$ zdyOUJ=N`*d)>Fp43H8{pcQ7#A&&~C9yX*WBCX*Bxf*Z3~rTe-WBB^uz_~`ycD3 z(<1mF$rZUfg0@<%ySwJJY;+JqlE{lE-52l8CUgb&5(2|b%R$XwdF+;@0R18~J~du) zBN}>7!@kHuljYK33|Jv-X8+INE~1uxQ_tCCYd6dHXk3e-BAbA>F=mHMp}a`N<8X<|gMR8oY*(M3J>iK)EUM^x0S)4gsG}R8zNc$6t1(fX#Y{A8Bz&i{pm2&VE_HGSzOBZ#fzRU$JBNGyNiT1ExHv z$X2hzp9r9yDAsuI?0W8WSywn6U4q{j>Vt3OR4mll?A+2#GipJ&dZ{B0j$`B43@L9G?2nFihsrpKbg%L* zhPPaOEL*W=P~D7>M#~xuQ%Dy()Icgzx<LyRzE?E~0H@H_V^K&h@c^ z+LxBFbN-rGiLbVbU^2dacbYwJC+vE@(d`%Pz&I}{|ML+7)m*meHaa?%a?m?XPXp7c z2=Y$C7^`2eetX_-i*(7n11p4khIo(!yV@ZATeFgKj}~|9mTF)!@K3w-E0XRBR+ucU zfhySI^-tT5f;BLB9sea>n1fYSis=o2XL{i;}Y}im~ZLsYI z!cb^+=h6_11GzivV$^G-(ueNjPi%&X^-2=kpRaK%8K~m&On*%LP zwh8HP8k2=OcUPWyl_uDY)!%Vw&+{rhx-9raRx@gM0oPi< zHn|+Ge!7Qh-t#-LBZ9ZjDw5-ImL8X*d07KKd)dX@2~%~-4eiLHl>9%E8EkZ-av&i|IGUbuve@kW9VTc0?|Ig_f7_9MIpbWwLANU#?Y0!x5#CAer|MEZ?=4g77uAO5yA*8@;{a;lln$(1mNIBG@?O z5+*v1qK_0Kf7NU{VsvSANj4%T*^nSO&6L|HFeE%(fgQE5sdb+wH88h+w<2X`(OV4w z-LEWOOZQp~B}x|8#ID5c&LJ#HEAsTcel3U5ynxwEBOhp|-Z}E@(GR1Dg)E?EzZfth68k^)L2-l@38*o#_Q>a zIqoqFEg$QD(S@>|gt6CA#yIGjYQbHq${O*tS(Kqwy+qVSYGC>85t$Vk_MYG-gEH=D zl1Zm7=E_W3*#3Tlw@c(6&5MGV&ztagRcFze*c= zsX({pB-qyn(ycCb_pPs|8e1Xbu2RszMYv*sd&j!?^?K6lhoBYM41jPMMPMjY1YU6G z-8Jg|yW%GC80v$OdTSye6UtNJ=5NConAA}H(rE3rHtz}ZK#qI{@d+I)Q{3(*YRtBw z1C*%ven=RGsA~cO#(WE7S{xt!ON}|YWUX0Ew{>JA9FQl9kyuQr5<46VM6WU>Z&0gT zc}vgFsP11Ah;iE3mZq36x9JVS_|}2D06m}c2Q?E}f6>QlV!+iXZM`*rOUc3%>9?9i z(0#oMmyqmDqt46Hzltj{W05I{%mG;+e?60$2mzmJ88kg(f5`r8pF~C%>+_i83x~vC zLrc?6{!LTD*6FnAF}qe0(5s-F2bj?-p8tH1@WKs4wz~n{PmW@K2gWr>`Zs$Xb_X>b zNptZ7WOc^^*_g|}6eK{X;1ORopbheXvBeEckI27h+y`wp_98$HeUbRT@*wQ_9V+mD z_$UgOP$%=nL~W<ZSk0k)(2c(GM*y}#trqRp4t@;CJNPH#{lnWO&3^NCAu%Zu49Il40%-FxX*sR9 z$s~|k)8!X)fO<;+>K%}rj`^zSpFqLxpFja~CJ14e(13>}1ogN8$Bun5A)J4#rVp?O zHT47bw7Bw!bN+4rMr9-&f4?yb?>w^tcw;UuPPev8Dds%NE;AT-jP@jWP1Q@lyDxIX zF?}QdOx1hfPf+AY)HDWsbIv~kgn;fY;Phj-1ZbRf+XzF{uY#G9c|F8j1p58@KMY#P zZ*&%7=8+4AIhfLaAKdB5M$NBWbEJ`S&R-V@xO7=76ik2d&s!gd{Sj~$CDG4R zu;3R!P{Lp3x1ps0KP9b&Zi3MV;skd}M%!Yv^TNOKz3@y)ZLMK1<*X>C2_x2JrDlWq z-!_fnpLmzZ#_u@$TLlny(GB`^4fKEI{O4hQ45Apqel%CS6O1Vs|4?gJuM|7gcm%4d zLBPxK0V5M1p#%wdS{j5x!i&^3-hlDGEak+kdNFQrr|A;W4u}qbfTMquAfowqIHDCl zKm`*BAOdTDcbQldV2l?NwpTHtw*`_M@_Gcp1U8s@yVS@%-hhhbU~ypJ{bjFsxbwBy zt6O1V&jA(fsS7FxD!Kq((znFL161@pR{b+{qwc$bNjfPIb|tjn28tknY1L}f&j94k z4&+{s#{>Er2CYh$)bN4auYrV4K5L7=Uks+CP?G$|B}`mr0)fC1G6WlgtO2asv8b9P zP%j3cUiq!%abPSULq*Py*DII=hWCvQ>$MyR%Y0y)Kp5(OSp%z0<_}uER8qMJ_M{mW zt;^R*Qt%6t|A)nVZGV4#u{|SO~U@weZpFopYcWf53|g9~|>mjMRC7D0uZM zFtIy-kioi^bkT}J2~)f;o>kL~kyRzuIAUc$-3qiDdIPrI4e8gIKL>$!*EoMiS`0zD zt*nIj^06jpXQn-0;tb^KsB(U7^;-Li31*lKAS~%pf?2<7j36x8JraSjOgiFfGyll( zQfeMA25`T}u7%%Gcld+{9%crrq4M8UW1=mJ@joO<6pZOf7QBmL*p6B8MdKLI-0Hoe zT_S1@5wN|zAejJFv4U*wzpwiL(UJ{@cBYQkI5|5{BaU>_rjKTKg%}wb`5YYbhA#7k ztcVU(As+IP=^ZR-SV!YGD80H_w?@?6endk!HeyN5-$?V7$C^$`PZxUOgc;NUJtznU z*B}6S-*ZH`yI&wM#e{VUhopg}m+rt+9PkG;&V3`#>`?YPhC40wF)F)pqrFZLcP4RI zHg8VedsT9qr897jWMJk}jv$Qi;RpVw?+fw=Yoi#9?Mba-Q4RLJ^cw8QPpB1X&JN+S zO_mYSXB!EDg+pR)x`r!yrY+Z~#)NUZ+O!!(MQl1Mw1fEdR@v4L?e}Kc1e+Yv*dtvm zA6uFaM3Sei?!12_EdF)scF;ZM)O|DjT)(#Nsj|4fv=o9Sv+Y>g;cDYJpB9h+x!nD- z>>4AOn{$i$pgm>^1&wp=?w~w617i{+fPJQg8)kqFRNlxNA)s)(!U7E@bvu*7VXW%M zNwjoQ^ca7Bb)?rbqbG{?Ca&@!i#)7PQp)0y1P#14+5<&;L}8Ii-|R;wpDp3_X2#p( z65ZBRxLsM%BP#^?8EN1hT$g{fyghJs#l6IM&uxb7%IbH!;x!UpMEX*p%3+v$_SQ|C zjK>0xj=K9E>}zD1i9YR!UO(;z$8)rR0c*eL8rothMl+3EtR1G&{=vq^P9Ml{e4}s86^+4B#pKzIRdl>)0joJmB<6F%qteN zf2BspCgC{QAwJAKm-*`X>CrEA5l3d+%l=UPJ?%X!x9{q5~Axp?fhCD>5D{azKZgQh68_SRtfZEs=hA& z8vU0(Mth?6H+Yhe#V@jX;>okt$#TPmx&p=Po0c`qwFMViGC99bwTIZ5Rt&^ehh7oW zC|?NX$>26FpQ5v-%GrU#zP!OqzI7 zB}U~Ay+HcnA=mONE`41v%WC(7lai19#-(vwt&riDDiT;~T@M(K==0?}l*4*+#Cyx! z>a)^=6+`-Ft=eHJ#|2|v=)QR5vLoWQmb_eVjqZZ#;T{t-G-|H@=I5LO~<2RpWC(B3I^OXUE1Yh zEqCF2rFIEU@3mQnKMoaArYpC1_Kk3f2#KGtN75e2dt7DG1r8T>(piQ%a(I^%V_m)_ zrh`lp;rnm^3{vZVf&+zfv_*<$*ZShxz8F9Go}6qu$1T*CoZOS1O%er!SjeBM-^mtR z7=@p+FsC&~tuPnJ)L|b}BG6u5ex)>NC9*?rmqdTC!H0->IN@-u4OzC^?u-A}N|QQq z(|)k1k4bxzJ*%l>S}XsV>vOG+0rs?O=o`abu@J5WO>M307oUwiFJw zpSvU_*X6L3rhi!#hZ4Y}pX+aHti4n6sxZ_znhdlJDC=pr7r|@XEVEu48n0|vfIJvh z9cgisc2$?yD_gWBXNjPE3g>($6|Q4|=K9GiYZY+i=dG|>w#llxEa_hvUWF^l={15~ z;8ZeAb_m+;E~Yp(Dbxt?1QQ9Y(OP(-N~s1WVA=<=j#&slYvmV>-rXt1-U%6)#!CD2e4xa`&|aO`s0)&Hhx;JB=Jp1`R)`h4Ubf%N!o@B>p*>3~ zXb=%=kr-WC*7no7+---|y22Y}E;$P?oljO`N>EA9x3dy9+2L~V;88U22~$J%apXfn zjQRLV@3}}?De6nOTUv$N%BnD5)?o4fsb2noWRTLgxS@R((l^%jeu1T2teG@Hg_nyl zpYpj3!Ov}tN_|t`j}4zF=%m^gj#89{Il3h+_nw>guCrdg^-D0LM!>!|#+5XGA62kS zxN~`rBsHXRVXLp?*N=rx`^S9pf;?;Z?B)@}g!zOLiM&^jlwMC^$wr{*g#)8mfv~^@ zyO@$!4knB%LGe#x~j#L_|Z^5cRaG=)i%i;0^{x?%c%X~oFQnH<4(^iovqfv>au5+ zK)K{e9vUjS$0Phc3)xcg6#3LC>vpnTqUU;dPSuCSy{W-;(Ss}ko)u^!!4cM5ucdtB z?RdJ^0SuM%@Ebw9t8FEx&j_m9#u(4XIT6sFX2rH|)2Tm&c9_QmF zCdNNk(NaUpYmGM1$Uk^pJ{(zOnswmnB;@f!2oD)Os{XO;Xats=lUea(+BQdojTu4_ zmO3~6-0x2<0R9^rI;k2(9CqqV45MnCSGyHrq*e&j+M5^pG} z9bhD-W~*|y(B8`J=kodJ(KYVAR<>QUtWmyFdqry<-fCU_mFUh_txrYkTuYCom)zi+ zT5;29J`)TG7V_LL1ahv;(=-J~T<7L`^2U6#C_N0vXYMwQu<4DeKbbD4h{MsoQ(41g z<;|{Plc(`q)}*z1Y#xG(NqQ{tm>kCOgQF{f*S?>9{?iDybKmmqrBZ&*iFT%Y#7 zBQZ76bZ`%Hq7;veoW8u#au}f5b?%!N>KM+KAU&H;=5{?l$D!On%}i$Tso$s!z>T*2 zQXu*SYRxZV{A2gcPZ$Da0SItUkvh+tbN+AefPZX8nJh(pC|MXZA!4|kSl#3dtv#%H zVK&a|zSOCAvlY4(*^-%zpW}(z7r_SBT(-w~rHf$m)^2*O^ZhD=)aB6vAGHpCWAbnv z)YUG95IHR^6%m_0H~Pys?9Hmz>r@7F8Ktx;%Umb&CasbaC=ztJ=NzVV+~T z6D{(Rf>xai+(qzbKMp!3Ip&F`mfpRbY-_bm0NP3+OBNRFhcyp`V3kLQeOu=_4-wIjQO|;nzM?eQ37Wj448ZLdK5T#9~Mho4dFsN zW~qdRrb@dCXPQ%hdXRqEAP!r`o{fQWKv8(|qn*SCuU`NhJ$x%^J ziIwhNS`PP?&mr%-zL2zi2I~h88iIjG@8f(H7D1&MBO6?&XJ)?U9|;MqcH@m1lwNo~ zeEwQ-b#jGH3Z{_eyi(K!@~55|3L&_JLrNxk5uWf03g;#BF0N(NuE+dE`_-ch1}_O5 zgS%GBr0zSuP#W&1p_2-P-QTSYg?$T)qMiK0pM5+!>!aukHAYvC@+m9X*N%4HU0GC6 zvWcd7^mfV;*ZGCent< zP$BPbO{BUi+|A8<=Xz)9#F%-!v}A{#(UFLz_f-RUBW0b6#qGVN!8Kw(Ck|eUuxMxO@IylTrHQ-iUu=j=#I}gGH|5t3>(} z0qUDy^ObLu%=o(Q*jfGxu`(u#Wbl@K(Rc-9&OBREF5D#D$hspvvZr_9%Iy;d$+=sR zL;6*)C!Q$$PHT^q(dk{XDmQ=KopxgeuNY;AfPrz`tY)UM$})WE=MBlc)**M^yMn@#*!EUFV&P|Ha1{2ZSzC?WV#ud((L+CY!=jPf?TvQ*<~>58 zW%3~I#ZjgGPwj%uE@`yQ*NCvnqqV?Rw~{^7PGb5o!c@1#A_&g-=JAdT66?ZC7c!c< zwUs@+>b9gey1iv<#7i8n-^Y$y&w1x#6U;j!Z7t?;Zi4M=1tfB?xT4>U-i^ZNdpBFB zepqng%Sj)NxNC^;^zyc!7;$~Y;1WaS)sP%onUY;wjBqHd9UgL3o}X3BL~dEPBL72W zB56g4n8P+y(ZY@*&6B&kS$^)hLBG@=PT=Ss_3}w918R0w_l#~XSs6K z(|yoSTA7g>KMVJUJ`2MK0`h|(?ZaUGo16l;KaL989?=C49rx=E~tbYyV7SYAAywy71SIw_dZoEb8seEt~ zgwZ^IPu0$#8|}s`i@1bWDQS9?G2}*1aOCZ@;M7ppR=bB~xEzf%skF~;^=MdGwO2RZ z%a0=-Hf}PNX^W(V%R;x2m&>GmJc88yJfyh{jr; zemBrg!f@12=Z9_e++hMGEoHH-C;UOOHR)`nUb3xvp4}m>t||q6z8wvcyx>!BPIX%U z!~qdX8pPE(W)Oxaw~}9HlJO=MMI}|wZfNCQQLk}T$yZ+2F;N?|-Q>fES(RmDX;&rY zWwhpmrV4vT=0R1<3M8GEH?K!g-qXM3Co5-D=Z&knH@9N;s$cul{$STD-c<=6(!52x zM&QkkHsD8r^yUt9p^8WMx(OKc16LB~=!8T))m$HGnr#+GZ7|11N3D>__N7X(^x{1# zE_lhtm+K|Q#ARR;c~~jPH(a9b44GLb?AOnJ#yt~$^C-u#R*HoZziqKkP({~&)xx!< zDWnYE!Sm)IN8ZjT0Qr>()QSYvuP)76vuZN>l3U2iSRrBgy8ZnNcY23^QMASniMW@@ zxs7Pr8cTJMSz^h%km@a~y`Ptda;O=KmQc?rEEDQu&emL@-Ri%i`{fhs+ko^EapF~d zt?|K_AR{85UVh}4Vj!Uy} z+y4#s70p7J4+C3Rw$D)O$F3eQ#g8mXWfpNP4Oi$HFO=W^bh~(R*=GC^T}B;+_&{C` z9v}wiwH~up;8V z3nL3Rx*3ZSYgNz+h`Z&Ro76v;Q=}b=TFBSmI!t-R8;QhSRxS z#}SZB_l)@Ag9Dzg?}~J9oa?J=Iv8J$M8_-N3@H7es#KoU%G^h(yIz>lck{d_Wa0{4 zzGDI@WrEh_uirEqB0BWS(6f2=s;UuXIUC|=bY$OaLGD{ig717CxfWf=rOFZvPCh>r zc(f_4d61 zTTuo&rOZsFo&phE#o}a+P|cMqI4x+$ks5`}4jtC)p6@8f?k^U#9#Y_P9gOxRCI&_5 zC{ZLsFD|VuM|U2|;L zn3U}^Z~-w5!opSmsT%CsaYP5&4RT^IHRwLgWQ(pLv`2>BM631a⪙~lKwWjjh{Hj zUM6ucd@m=Q%m@~Qli0sTSm@f2Y&g(y*TS?s=aKK1T8p-F(So5R&2fZbn=lM%8ib*R ze@Ks81Tk)Lwx3)5?M0CvH2BGruGi~3Oc0My+am@>)Jy#OaW@gqehDURSD;-_2o=iBsZ3Z2*7zx4V>=MMFR!-c zrMh`c^XP|8+xsDpZbTQ!?<<9m0+>3r#P0`380^%1gs9_et~) zYCfb1d>r7W+#c)&Ag{^ihFmaft$EoHSbDn)M$?dIGZ6`KB&F7;|2U#>A7AMR`k z+Gw_!TG?TB$~zvFVutqQ8->GbwDs8k_3{tFog!&jIy#2C3G&{azR%~1B9GoGz5P}& zhA8(Ww?zn=S%ohWa8HHGtHxMJXOx;MoTGE+&5wdZ7>Tt(P~>S;Y~acbXoZ^Y!S*!< z%CyeaXnZCIgUF*dUKz)vD_3CpuiJmt-MlzeCqEwBov)*=qWjT4y1t#)}2xBDs!N~K%vL_2m4|*8{*4%QV+P60f zma@0?;wiZx$sPkCrK&&mQ@8U$3{R~;C$ESO!B2Ttt#{P)RK#P1*pJe^ceTEkWBZUR>7P+-SR8>4j(eJWW6rjYX6mF zpor@&eB*M;(*sw=4wvY2v%1FJN@FN${FsF$VWues10D}nMh>Un2!?I>jUMNV*6P>x zoeEg06hr!&_ay8YFBH>Wq?)CIHTq=HGyCsx?H z;??zkMQG7&ifH8yWOnG=<@&KzFdgS+JsZhaV(yD^2#w#ZZyVEqG!BG(DK+A1%Jng< zT&I1%jc_m)V{%?UTH!c1hM1+O-QfF&j+KK;j~2OoODSt{H+~`JM(b8t9dUe#QP)uTK%A+-t8-LayUUO?4=7Q zMn{X(o57zx z)rdX3f2?O`t)BL7V#;8f$TOnYT)aolC0VL@dWn|{9@N4TV+un;OAob07k+S+6m6sx zsCfHD-Q*f;Ybj99AjYXvxi~nQ?+#&Gd2Z+6&t&S2OcA5&tf4*{eQlvQV7~G|@N%@ttmdL_(iX5oy9%?q5E5eyM{uaw6OUml!|I3P zI3(LaSau~=Xi;i@z9$v^Wj^0QVl8EZ^6rz99IdpS!Y)q@_V$bc{_ZaHY&29Q-Ud!z-=dVs|7ggo`XcZLUYL=amDJ>4R{JqUM zrE9Qnpky;&$IE@NVF2y6bFJ9YjC|Es=xR8sRt6dVWilq|qLb=Qsl&NePIA3i((z>q zx4coK(8clBU80jQaQFw_ZHU%K#(zSKz=y~(s_Q*TLzVB1=(6TDecvgtUPukxG44-T zC{iNpx`xQM6y5%*bf-sG|iMUz4*DJ3KgF|UwG+;Ag?*`H-qMBsO9Re9DZwH^^kdOwZMm6xCohh430bhO^<|R zp))toR(aV*dyj%_4Er}_Yur>fp6ZedW5X?0^LA{OZfFa}Ezb0g+ezp|4#E)IjY#8iFL6!dYnPPG>H5wdqm@gWns`c%VqIHi zIY#^0v2ir8UX3GoDl0d@4wHJi_p5vthD8BPugmo89$IRy<(a_%%A;4 zSkJ?t?v7^DqFWojlt(`s+gMM_i-9ohw(gG&Vt5$uFCXcCFxReg&ZrPS(J^$5*6fJ7 zlE-gf@;c7+CK+Lpcazd^diRGFD#It!Jh@uy#FCW|s`nI5(?d#fWE_#X)@DI6ITa-V zq5U#ftc)03ZEk`u)ajD+=bD;UWN~p$ujBd4;sLKe_{`WC)h1~WETQ{JmK}RxsqD5$ zhdxd2emSI}Zdb`F@!V+W_!HSBHgUtrl#zj@90j+n%nv$I6(1q2d!s8UdgExfLvBqn zS+I=w1>>)mj`_21MYja6RQEXW4(;~0tA7xw?WlA(SRf>*SPi4Mk2+Zn5gR5NHO+Pj zh6QM*9MFUyOC;%XgW^Hgj2(Zt!JLPFt8pzec+9X0ty=6>pd)v##tfFV{hZjw*9 zyF(jindY5V<6N;!8H-<=TmprfZ(;rL)=hC&)wKW9-gkvXl`Y|##*7FeIf+P6k|LmF zktPaAk_-xnk_E|8L?nk65y?W6bC4Vr5y?sx1Oz1KoZ;4X&diy6ru!Tp?%N&a`{tqD zyY>pTs@AIY*I&&jqYWF&-9jB3as`th>I)GEF5Mxn5qmqdmi`QK3JJ_BgM$)lWlvdb zMHy_J8%)u1TU1a|VP?KAs7Tg;pJo(urxL>2_Gh9qHkUh_Hl@3&bLt6mO~WQt{NER*Ffb*tN_k)01ZSVpgdkc(sqMfa>G0-Xf3Lk9!yKiBMis`0TOhG#<8_ zqJ2L8`-}6Pch{pd_TxpuSI6z zoefCyN$lHvV_lwh+&q(gCQVMxjmtbbqb^GqRdZ^GA;UqZ@B26Q=kkz^xFWSRyPIJf zKDN`wcBwFZckf+OVn@gKz^)=%VzhLNkus@}MP7W-*@nlH4qNzU+=0*Os~P-~ft0$` z;W9VGIPBMZM<@M^#S92b7fD}uMtV45kD# z%0i?@6$Fcv#j;n{k{5(|de(X~3g>IS=*^qgZpbRPrQIU4oHFus?%J_%Hr|SJX^QPC zOrS~*c8tx{^TE@VDd>SQ%1ttwoYhiuaBJ>*)s&UxJzLliP{&Yf>sYB3MbbfK`9R`j zZHpP9Zpdc;;B)Is5X^GvatJvUo&ILkYv#Gva+xtI&`SA^(RM<&VDxtjHJcB&?=Fd9 zPKvKc1eo^@Yqjmp&gzaV#{7A4tPh!-9NEN^pVYNwada|&; zDvJ7oNzk1vCQj{XUy+^*N-7JvGa1i$lqK-x`w5j*1@gF?v8!GFW zrYX-9MRxK}?I`6ZGLdAO+)ZnBxsWa4HaIsclI@dZFjpKF)iASFY_+zWek=5WTiRs6 zqR;1Jd=vS!0%pWTx`CNJ*G{ZdAFw1oxgzAKClJVFoW6l(iFC(Y!KbTB?$)-Py9pZ2N$th-kegOcf?(9fRf$6ryi z&!gs=JiK;aG_|;S)11V?TlcrA7%pjf8+A9!H3P}yd&+M5cJ03@NA+rT6tjAl#xp84 zQ4kkQ3{krTC?N?za%1UdzuL=ZNj8tzWNYQ6_LsX{j^fM-Zyt}9nR8x$zuWbri1jixAQ#{*6o{h8n66->lj@AUVgIIFSThRHrP%zUB*17zgUZbw5L=hK}o3ddi;_4xs%n}by|wo%pBf; z*DB25*5Bodf1c!HyeSXjOwEYdJ9h*vuGK z6*=GuKb8b_2N(t$&ZB}?Gnt_^T>ByWW}llg+|+K}R%SqcK2Li^N4%v1u3)-@pCC6Dqi;W0 z_>0*bv}g=$krHyv-W6>S@Hkw97^Kyvlng+d zg8+SM!MH5|x!o6r59{98FR2Ky;Q#<)L{%s#0@{-vvUrp8xI*CO4luQ6CYU-vfEwUJ zA^EnAu$Ig6U{3n$c%a?8B?`AX65=V)$OcHQ4(%9#&sGruK9+g;1uwv>5thy+A z6_)yl_hIP$i^Fgu^&42bMchZFb z;~$50z0r~p0Glg^_HD8$+(1&QpnW-A`2dl(0EqnXEKNSZzha?%wNdjxYCihXkHX92 zFk=&7V7~$Gu4qeq1`;BZ0TcQ;pvwfXxh%9-0*f9i`R{WZIsWqkQ&yU?>l5Aw%i&;1 zE#uII_I-%L7(x3~pnYZo#@7M;ewLInY;K^$f^ZLkB>K-3J1;>hFWk=5JzpCz^(or8 zkxS>iVR)^Xudwa@PaL%2Uz*hTox&?2stn-gkRlJDwc2)DXvxuZNbpvvQbmB+m}4h> z8_%65>uNziOXbvKa2Ws;6-LL%VPMr!Qj94niZ3|D;K1GV~rM z$18Ost(k}?1pQEjX)KGZ=u`lBFHECcG6ExU^Ei6ar;+9x%OXWn+qoJ=T4!eehj)JP zn|&)n)1yR)>79A$`MFx_(w`kX%O~X{Dh<#I0KorSOE||5>xLy|-GR&U?{+Yv3)55x z!SN-4u!dZP^kCud5=gSiLXf!AEqWzR$4Bvs3%WD@wsQ|0^V=@UMFXSJi--1$KP1t{ z#Tz%6_@#ZG$qii;D!HW5e?9j%1P0{$Z?%5@O}KHysr?j;4$wC}ioT_i!UXVr!c$N8 zdH#U~09|YE?U8y?bro@TX0L`MSJhPQZ%&B|^Gc$FmnG4Vc1*89SZGeg=6Z3&UBjOG zf>6N#vLZ?TqLBtwcpo%n#b*%Yt`%I|O)Q6mgVNH%^xufeNI~=)cH(CDnSGvjgaz7M z3T711eS!H#Pt@vkMsj6dJc>c`cq4vSUDVsaafGI?5opfQF!Lv7>=4cT0FlGT(FsRQB`rw}dvlX`7vm++lEv1T?UU)jLv2NZ zOfG3K`8)wXJWr%?yJR`4ANxsBaPU7maI#rc2I;^D=zzEN>q+RqN$3FCPI?G*KoUC8 zoJSOEfqIXzpT?M&JLbtM&_6JYX0B6UR-UEbnGKrHmXJt_*IA7Y9pynfA^m@M!dd}A z2_8xWxSL#TXgfg2Yv69wcghkEI{Gi5+^0Rl24}qCrDy!Sf?CgR#LT zOPiq-bNzu4p*oG)=_x`MX=)=su>p*<%$DDdxgwb(btjA6UiW#Y+yFHoErmtCO057? zFvQHT$a8zSAahXu#7XbQbLMElgf9*Sm`XPR?_h&!t_yEtt{K`m@pN>K~!#yJp)iOGKfNFijcp}6CmvWEettyX~9ZVs$?a3iiZ|kMc~KONZ9au2U$Lyy(m> zlKS$(;$Y4?$8XS{%b;j_(`T&#(7yl+b_iAu*ge3s2WCnud4X8D=)V}gXA=KA|6m@V z_KTm$_QRab?)%jw2MZdv^@&9VSQVnQ$n`iqVEZHV3sbHF&IU*(#`>#!#Gh0xgTt6u z2_px3jD7oo?eSX~|t2Pd@ukuTB)EljW_fd=2y_sKNA|BE>SwjQ-GBT1yYEt-F9*qY`VW_V3FHv$?PvD0fKk|Fe_<7 z`Og8<2@}dWA<+bLE(WgCk5^IgCtC4O?+aFnvHv09H8{cZJ!lUL7kL6pQ^KW$q zZPsZ2LJh434Q`2};DVdgV(8%GDfyXm$PF0X((=~r6VSfczZh+B?a$rBgW=Z-SOPnO z?Faq?lmo7PPPpSISne;b{c=k;Mj0%JCbZA)NU9*hKmjxLHA@{d@FiqUF|S3JfCg&( z#SVdMH>nPM4tKEk!SvLZC(S|!?|=Z)E+bN)(Ex%K#$M}-kW?J`Z`ZyIv@#pw*!j~S zhw9Ncvbl_a-QRazY2_cwau`t`qhF9Mjmslu3Y^Y=zWO=`djFHh$ACYb%O8{++hbP0 zjq-HN0l|=A^z6fITuh*ikn^Gc@c2C!1y1Tt$iT-gtmMx!Dsr}0RdEy>0Ac`ytjGh% zipcPdWpHs&X}N94tiVR;VR#ZBISz(-P&Q&fpoELOj%oMv%ufW1gso+6nMkb|0r+TB z8xE-c&rg6db;g-HEkcI*9*#_Vs{LC6M1H+LvJ7 z;i^Z%p$A$T%lvFZvdK=>7;9;FXvO$Xo~U5Dn{E7kU+y9hENt!nN4@?vEWfMQ+HSmr z%;;}81`A1IH>=Q(1HLa8?wW<9W~@UB8^^l`&d9h~rLltR!?k{nrk@!%AYmxc*O5ef zTPAbk{GxFKl|CdLHr9I2+3% z0+xfIuZ)4!0NLtbmEeW!c5xM|q0K%D{XkhQi6}UQ0~QUdkWNUr!%309|9~&>PpXOX zWJ&s}foI1mffzjCFLWb_Csm!2bWD5^k`SU}I;)Ne6jol|Fm4*k>byT;*rEtUw@l^!3CS5)xR! zMGk+uJFSWYpGI(VwHLVSJIO*fE{L=e`!g{wZgyOn2r55i<^hLiY)cWWDZGf#aBk1^ z`YtU2?c!Bmj=Zk(Qa(WPU1}zWb6;_ETH|{!l!F9b&0Ze!(I^r!Y1pCgRv#%c{EZSRGSh(~L;8kei+6|rVuB=*n@IcOW+oIndokNdm z9;gT*!dAiL=Qjf`{NJSlqAH#F8rVsI_fep8-V#RU{|N-0V=d9;_B)$H+fFX{P*!YxodvrQ2j8^}%%uoL(24=bI){*G#TRiB)oJ=EfoP|(k!nY+N8+kNI%HNdQ~Ox9ig13-)VWBHxC z_<-{mNEcuLQq_d`Pj+Hu;aJ~zQl$&qu+`TmQep%l@o)NA;vjaL$3cx8E(mloATbc@ zm=K@)c^36@6O87M$Mvxs@Ri7a*v;ziz7)La<b%Xoiq?J-?*Kj>a(ALA-cnx zc8ftqIGk=RkTR>?QTWj;v@;pMJqIPV#;jTW4;l#E#(7#hIX2I@9Czt5^$3BhTUk)Y2>Exx_`4Y6x$+%Fl%yW85G-e#oPzL{jb zW;dAj3O~Ze)0lemN7_>5shTcL&LoL@JMJ~CbB4Uj8xs|=)^7BipO@fm=Te`DZyn3` zq-(=2Rfk!p{q}CYV|EpjpfzX2+hemj`PnWc)uLD+(jb0E!RdU*E0qnqjq^3mTyv%@ zCTW9Fqk1u}y^lF7xaP2{{uAM|`{m;cFcPk7IRf{@R99@P(V4q;Go&rcF*`*s+3sY7 zO3m=DYTa*ACEA$W=J9NZy|DSAqM#q$t###5rnSy;UVX%KcgC$DXqkVebp-OX#kCa2 z`*ABq3mA@gOwZz^JiW8?&OMS-854PPmy(L5J_g|`RCk*QM((;#ykkIUZcXXT*x@TR z3uM(KJ>m8Cx~^k;V*Ahc^%AgUe()Xd+iliqFuoHfv0U9@9$r~|hwXUSZYGmax^@zW z;P9N#N%2Y%6*;rqxgqghhlWckzBC>*${O0NKKi>ItNkVJ$4IV5m5{sH;aZoS$GzzC zDN%7skFNC#y;peQFxMVpSyj~M%yRKyZA+g73ER?q%K$aA;N8^E96juzoqpnI7zc;V zHjfb=z4Ic1lhiDS{Te+w7q7keSRExq67w!Za$r4$Ynn#>g?GKo#g3xNVG-AJ{psqQ zAw30Xx}7MRmo0hB{7nnJD!f7$X$-3ue8ZSJa5nJHiJuM3jC9~Jp-OpPPLppFUF@`D za9JtFr4-ci zp%d6s)P6J`lSVClmQ`p`%TPPGf$E;a_11i&#NZ0{ILtsIv@fX|V>0C(HEQ>^Y(exI zeS)RYh}m29MiFf51=w}oJYiC3kf$H#JA10pYe?c^NKannNWWi~=75)LWz+v}y%LBh;yFFoA;MK7N8T+?L>?aIKhV?@&lu(7`cd zw^8uv5hg;vK+Rb@SNG%DSs8^dC8hMvU)gye6hNJpfAQK?Dj^=O8HFQVy_etGttXM)!^<|Yx((mnC+G(%qP2` z*_mYCrR_|;feZ(`u3n0ZRB(BQO@*d|U@i8_(=!SBOg4HWt|v6@U1nk>mFZjwrFFo1 z)MfBU_2G`;;eiF#TI8zvA}ZImJ`J|H=P=5c5=npLJSd=3KQ;HzvK zwUJP|^KgQe;`o`c$l_PW*gV^kYU3mZ7j#G}clG!Bb;-TCyD2hDl!=&MmJg=&&4@=t zNWDw6Fkkvokf*{Eg(ql}Xn}{rE78h>x;2Rt=#1mo%ENu~Ia(htI_2opSIHdX`q-Tt z?YRSSUQBd2%b1ED^`eWDpfg*3CMuhRja+e7Db*B#pO1Gj_#KiC6fH$Um;1ld>*?939lzG z6kc@L6a@A=v;@dlz3`$CIEQXC#2$yJV)(`JlzIP#274evSmY24g(%JwjrVC7E41jh z@nhbRvN)<4asTh}w9k#fu2}n!#o^A^91@G zX62x7ca7rEnKNhF6cv3>oM_nCu3uc7yOf0?Dk~`8?lSIe6XR1?)7EYtBOxKyhO!zF zn)pI+6NA;29I_Fi<>R(lYmS2Y?wZxG)zU^nP0iP^um{E8+@WH9LYwW}H?lHo zD6^iLjpAEtoj9^KC)6-v@W9{|F-hpshAA1jyds7D zQeexb^Zonx?XOq9y~@M41;;?fK$;_ZdKe7mCKT|Xrlc&hZuoB>>2-I92Ll2E4(sX3 zQ46_1g@td^CN734y9`7d&i50Z$9;N_)ZK2+)13WM?fI^|yWGkFx~UI;cHOuoySSmz zU%q^CoUr2HUL7B-hW>+)_uFMmx$c227iBKxzAd^Y#KQx{D?oDHa4nSgB5g6qyAg!< z^sY%GZoy)1tvvI?U#H!V;79YjY_o($IBZzDm}M?cvz$k1E0TmohdV$g@-*3a08k*m zy|g^>4B>gh>Q1x$51cs@eCAAXT^%&gcdR8TDRHvYtG#|W&qas%$E$~BNbkQv*}cZb z#^vSZ_LsOt-8{hA)|SV~`8Y4{#L|*AzgWByosq6Ax;0i*i@x&hOC5D}bxlo8)p^{S zsC3uRB4@d2+fq;`RIbVAil`|Htw5rtrp@M0uGTEM)@r+NABB#j=nbW@a;*IU9e0Z& zW^(4#89>DsP{*A02w#sFzk+k;deo#;c$;sd#8v0t}Pp#5*%`R>BaxA3EG`YXi~?7jX6m=u2YE1n z8wnTAWMq{Ups$>fi^;4c#C4qmdC08uKfYi*HjrhuIZKy+MM3|*{Z|fjR7H|0LYs$e z-jzr`8Nfu3j*U*1`nHP_!2pA;NYgnS?EAOl<{!h%$3Fj4)p9FJB{D?O+pRBfI-;Xx z%x>ZkZ)zAxOiCmk9b1x?zSPi=W$NPJRd8OO-cGSFbO*at-{;mtaP;FxM6Y2bx5iJ! zeUg@QClzZbs@+ zV3W`uP&Gb2ZpR%F5g8d4t*xo5+B`O8p6$gnv;Z0S*oQ=z7G1!20?VeNDMX zSDBR#C#=V{d^oPWB+{RNSz)Cocc$~9UnObu#=ch(p82_hNU_G%CpyF<(i8AF%ved_ z>WI12ulfJo#8eE9Y5rF`a4OKBI~n7oo4h!>Y^R8902K7(k>CWX_iKdfYVwtv&RQ`p zeH2^k2?a_UavJ8RHD(2{PdWW9q!&iXXHIHgvwtXeF@*QqN>bdX`Hr@j@_`_!gdX-9 zSt%mP?!ESYrF#WfH`hiwhzQ-C?@ca_E=$Yk#h?Ho5)@mZ=S>k&bkCkn;CZYCz^bN5 zkbQ+iAf0)^ayK9L=C%9%e$HwXZV6@eT%Eay_pG~o?sh}ySSG%*H(A)OZ&VD8sY}xQ z(G;cO{!x65iVBWwhWwT1!^{Czczy$A>nn^~Yut$na2h|NQR*TYdWX8~(W0t_E#BN! znysOsy9^as4~4}v%ng#~ZfO`cfs}(WP91rpIVuzaB}OxgZkg4COo8149_LwhQeWCE z@AeOQCn%(7$ zYGPI4v2dwcrL;GYJG7=>$YC;R_g2&Pm8{~=yEyi^#)m}F;em8JeYMBiOwzaV#>T(y zcCT&SD4A>D%_d7%w_mFr(56XACtGnJ4GiEWv6PsW8y?Kuja}(ATu*k)UhHrsIwojp zZtmk;v|f-?JgiWhPGpwv|MWx8NQ4;rmTQ-|atljGyGwC9eO$1*HGhOSdY4A9x6o*1 zQHVUEx5eXf;-k(l8{CE47@tRq;v1x$+eRA*QU}YLX%GMXFymZ|nIu_Dn41$?Uv;>q zq&r_)-BEhOqRFl!xgS4{H#^HvYg{5hVJ^Ca{C)@%p=gZSPGZhTZSP<`@P&&t@c7ap zK@!=S@)lPnKef8}tUXTNuZz>?JLHUlnR2h2q^4kar1&(V%u`2d^WCN2x23SXkkz!2XjEUO!8;(HzZHxnc25XrAa|Ngr6NDIGC|*pth*PKaz6g`InRnSanO zD7oD?Bkogvb7e@NRTHz{-sX(kqFmg&%OOpfCC4X@3b>r?22XkubCIOlBBLnFso`yYcQ7`zbaPWEfP)~+0us=vfUc;9`#d$)u5 zc}PKP#qNA0T@9;Jz&UOv`b_~ze9cO$%&CTghuX`n6}meZ1$NyNY5qiH*E}EhiTvgsRx>2*4^c%>lvDxw)cPo;3CaYut8qcM zO;eX9FR9=Y!m_ZQ(A_BtT9KBjy<=^^qIdM8C!6hV`SnJ@dp5`R0#*X0%Dq_AiBa4= z@`K%-F^h*I`~}2JI%bE~`*C<5)?+`v7>B0I7>l#7OcO;{C=NbO(>9EkD0`zccWCOw z4X0!^j3()y6z;B{x^#Hnq4}>nd+E*HJ`_ei?G3|pfk>e$gOgXgH zkGrgI%}ej9ge(5Y5<|C7VtksI7vZ>;#~?`sA77=s@TGlI(e)uhcYf&fs`U3KN@;YZ z&iw3Rzdc#ER=;Otw3BjeX=A1#>C~NOuGPCa^<~Q=a-08%NrXR= z$gIfJk8URPX(M~z49yJdbe6@On_R_Yij9&xNrlmMRZYhHw2A6hStmS+&U)Ap9s78M z&1I){e?G%m(Ubk4CynX*rT`A*M+z(F68I!FqiMlt9?2=sjb;^2R}omiLi zKDY3n!5Niu0u);1Y#!hhu?cH`hQyi5CjFUw%2Hpi;Mi?5Ae-KJ2D1;W4=QtzN@Raq z0PxGY?=QxiDoOF3Be9<|CWU`(=d68Xe;YG)m9Nr)wFu4qp8>qR8oTjftUaNHs#q|< zzYvP#J^EVK4SQF1<9;oIx5NJNCJl}$%J90|NtcG$kR1@Kngi$bgErIJ2Fp<3?6jL6X1C z+uX7;IC8SFl&!b8PJXYjIF3W&_Zh-Z0sejEk00pk(^DJ@j{77gpr`RlTwdS6!C`2k zt#C@r;w{d1&m!+`tyRvQJcY!msHmt~3?2suLl8yGXZriTun{L0LkkNVgHbiMwePz- zjuXp$e71+p`^eX43J1*kmvM(tB<@0V4W2A<9BVGO+I+XO)nZu;F*pbjfKZL+V(sl> z{ZLBWMMn*e{>|*HO-;=ecDvgUg@X7N9ECskl@=a3a>TrE@xOfJ`|9`a@E}Cm;HBW& zKplQpK_0#c9re`8V<-@v63dj!0q_~F6_!xP<@tLY+&1h{)X+^b_@hQjBx2XC0-d}1 z`c_uU+PqYL{sIo`^J;870$lkVLhLXs!+BC|J}j(k%`Htb{>?JpwjKA@_jm3PC4_KdqIN}OOvEJtz6J?&S33%HLYv=pmnF18Mpjmq zXpi5ScMunn?x+c->cfX0A4JV87h!@RTrrW7vdKo5zqp=0pmMzU0y#UGw7uVUq6#En4)F?qtr%~`Bu`Su&N>qM3ZkA6%PqrZnh+R? z8`W7=bxjs$&dSJ^t`Wt&xRdf4^6~AcJFh~eF=DTl?2(#QZi~E zpkf(z8A?v#@V78sy==;t=|ziUI=T#~+tDVh_E8Bx`VrOmw*3 z7lud#T2yniB+yY7srG^GITL2&+6CLcARYZf0_*VWv)Ft7M7R&mN^v+5naE8BJwa&k z>~QoM?4BJd9QF$m>>I8bATTg x!$0cqm$m(mI{c#!|ER;ChWr18v-rNZ?_IJVUXWL=`#$)SM9GR~irjtre*n3g-NXO@ diff --git a/profiler/advisor/img/overall_0.png b/profiler/advisor/img/overall_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f74cf2dcf131f36df9901e20ea327d509c6fee67 GIT binary patch literal 56377 zcmeFZc{tSVA3yp$tx8nZvR9T$sD!e&ptA2Wwk#3GIt&K$Xdwxe5JE_{5Ms>O%9?#& z$5^wDnXwPXa&GGR{+?=n-{)M{Ip_La=bT^H{4tr&=llNllc1O|boH-4 z{@k^P<_M4cPhX-KR!}OPidMhdnB`{Mk%Krn9lc$g$D0nJGfIq$BqdGy zU02Fg^xjNV%OaAJMyqhfrY4QI!wPdS_a+Bzws#c!jnagpw?ygtq)&5N{#=IqSnqwJW7fE_M!70s#(cQ1s ztW;g=t5e7vTu~)b*UhKHtn0`2U0rBG4)}>Tacn{_4@8?1(Ne*fJ9Xm4a36z|f-Fa3 zS28@$FnrcpmR2crTX(%zuXAr zjh9`S8P4fb!ZrQnW^cNlSsrH0I2_Q5)vxR+3RTFoC9#z)2pK9eVoaZ;cdXx5=TFz! zQ?}NOlT$`!Gw6!a7@AI%B1~t`Rm?62KgSLuYXas7sg@OK>F2*Il7{I|Gm{8)c*tXe zUT|edk?rMpd{y1uxS^WqtG$){l3YA)(<{ygasBeB*mOikSOfv5k6e4)QX(ULKl6B( zKEfS(T5SdDfF#eTi)2(E=P&CiNkFg2T&0*22iA#|%%~ECb9@}3uekIKO>|dTj|_UH zlezbz#3#z|$N1@W+yr9n3#?6c!YTM!b}&56O%`fB)ea#a-qB^WP=y9Og~pZ z^ZB7)G{pyK>a=vMjR;I+u^`X*F2@XHO&BJ!KER%5bf21ZcXL-7d78nDyXUCV#N{hK zy%;eFqf^&ivs&)=vmPCOobU`p7zU*xq9+Fsj!R)&dL+GK&o%FL=w zsd47<2ZMB+{hb3|NGlhIg@;hARFO#2xizHK8>+gja6}b{^2R8}O!x63-@5h6&MQd690V#c#uXvj^M-(h2TyN=bTl+?=U}X`w zlXgv@Y^JaDLh)_`S362p(o#g=X?>Am86E9~=03QHGo>&phuD`h5S6ROm2pK=->8O9OhXa#3)Qi!K4oEgjiWQ-TkbvWUy`OcPg z^P`O=OQX~X1-)0b5sxh(i3BHp{9ryc_xMO$+9G+m63!f$$QrIC-5FZ4Cqkx&MUZam z%=GM1(SEH{Th$#m$uE-P!@1HZ;Z;JaJ||#dUE!GAHfz0=lD$LJL9}9d=Qup)oJ*1VH|8!ONsaCMj`!rJvJWow70uY9-Sme;!yu`7Ae>Q{Cg zw;3}HZ(@LDzsBm8@f+yg&Z}+TSVd4>M)dhsh$U#_NabB0Z1tr6hl-Jif!v#XhFMC zZuC|}e&mf~eVhuOQV(Wa#g2lac-j7g>U0g-8Li(M`YJ=p|Yd#r5g|D z(rLw=<2o3U1A|V=O3$715qg8uPYHN1^db`RQgBxd>$MTftIrP|XH-a0KEvd&-lJ@` z`Y^LDWqy=ZM5*23VZ^dbjncgg5tPF2GY4$PM)!XFyA`QS&Ay6h*7 zKJmh0r}tjPFo%aYG$3QsrGGe8J|2wmVsA$Z$4+!=`MN6*i4_?2Z`&e`dyPJK*dPvD ziZ&C17Fu|0Fi4-v0-u;9m=|2fd7(wY&wZUz{Qwrq(;j{?XHFBpieK9^nDItR%!7Q=esg zM7#irmDQC`ZtEFb=6Hb!=VZ+-o7JWF;~l9q4;lj!(T_%P2q{Su`_OAc<27+Y(*#1? zlMEe+1<0`pkuM+e$n-F*T4PHBHoc0H z!?S0L4VQF8th`jGo5EyflrkD>yIK_>F12Y*pfwBGn(0IcZANaf?|09cLmZ}rGA70* zF?MTrRxu8Cfu+wqJk%GvE;)OvgFPj*Li?j7q#-lyGPqIA_%6O-C9BYQ&K978@&b>u5dc13r=>-Dcb0hhrgHIgT^C!IdPwlv8vAqm;Z=dCeDv@-V zMQM7!P}cy>$L$|8+a@nax~=mrW);-CT-lP)bHhY}YY{zz~}Bmv7ILpD{LLTA}sJr@{}a z_@brMR@)Cu1(!3V>D>*~XJo#~1oJ?=3f$L7AnDyZE?`m9L1TInlX(x_S0aO9;u?_g zZoiW9xbo@ep!tzXW_^ouojyw`M!V){OwB(&PLh}zV=C|O8UDsOojBK5oG|K?$*er0 zENf+w?N&P$4)I02&7HhGy-PiILeac4WGDyc6OLcB9gH8rOKga{*g4*DmSHr#!9 ziUs%u{}ACRnkNpC>qb}_<4KF-JEnW{VC7bE=*a6Tb;AO<0Foqi;eu%FzUo%X>193V zkai9}uUbi_cmakA*tpiZ9I2}8Mxcrpm)(e>?UbATXfzXYf0<%eM9+!i8vGf~yqod9 z*UlIv!qc{loEqK4QwYcpg0!)ev6btp$&^?0B>C z?MrB=L;F$aVAgqU`gEeJ?&0)vrsU4c7*%E_(%@QIVDj0KVOH3iClnaE6?(BSdr4}l z^Awsl>e%_t)fom6+|7+R-|5fsumikG9wlE`=>A$$=MOjxD^SySvr_0v3kkf&baug)$MGJ!WT3t3}Y$r?;kWt za^HVBajUKpc3{CP&mrUBQ+BOn)_}~k28@;-PJeXn`PtN_P3NgJU8_h|9Dn2)n_gdO z!`Ty=`aUtayoj@^kdtqO;5O#(lIfqttZR$-N#1tedLL8ZLFu^Jyon=(R%*Wq%EZmz znihxUc&69jKM6bMzt0-^Cf(lQX&$^N-HvG`WR_a_v@Axy7 z4MJ>l79q9@ned0x5q$yQ_3UdYsQmFc*f28^Ic$+g}WhM}dEKzkSh zbdD}vjMRF{BydU&A?3FF(>3#bHfOCrg!xoP>BMqQ>m+4qndjGavC3YN@Xh~p@@fg} zgIdB8Ljxn{U9(Tm?x04E3Bv>U_(A<5ZMA3p-d`S-tBmZ^NXv%_ZiMsPRy|;3P8@zg zxZ9atZ6K36&!FWGu?+x%LK^2bX(vilX@Eg;4d z$yFVwCx^U5$By)QiK(2n>Cg;bb4F+0>^>pc&-fioc>86~4HRA1UD-!RcZ@c9FT>W1 z`r~uToZ&YQWJKr+afyj)``E`;p2&xkG$g-Ds0PFo-*O3%>`YBz z%5IH%`g}Us!tG!tE3ONIcP~b$&j~NjFF zz>=VkC}yn{+T1i5iOs#$hDmkej;fx?G{j%I4@n7S*gd{{ z1BD`i-FjuOGw*2*?H@|pou+&AAlRwLG+K-eK--FL%S5b5Vy@fa2Kpdf4c}H)ZcJUc zQ+mpL4hk(6DBa(;hTENnJlb*mxdeMP$}SVzWTQIs&8%ol@7=)jFpJNwKlUX7>`U5V zTiKBv#KljM71(0hbE{f+zH!VxAp#}fUIk)C#dp;mQJ=_n760vW+R16l_u)RJQ?|4z z;^J~>$HF%$4aOVxN^i1C+QQb6u#)GO`cj(c-O@GH&xF`VqmV2Ewb1#S>yYI%X}n%H zMwK|2yOBq#3y(j(Ie0ql7GHu)_6@eHctITF-RVn+;jj>;=r*xH4qMqaGMsN8r+_)JhbE2)=dybkXV&-4G0^M`pcJS7$*r!!jS2(VM zxRH?6G&LQJrE4u|z2`5b>>dHDc zRp^Vq+=eKdzC<%)Q8^FG zg8gA@xHHbDvWIeJ`R}{xQ$}F<>Di7`LV_j?yi1)5Z?yFf2UdPXu9>aK9tjCs8v+3c z4|fm9R67(WMlc@aRlF(vQk%`Kx0pW^(KTN7nE17q@*LmRL{ye+TtZy zi5y&UG#_tEiJ?9q&kM`aM5DN~qhH7@JG|KlZ;nw}oW5t`d=<`*QH8;+>;vrfz1s4{ z2$~QsUFf|+6U}X8%B%jYfv0El&PtV{)p-H)MwghdtFDQYyb8mL!|ukr+;l7KGBFTG zozRStYdy8j2KH4J6fMCh6`y%~h6Hr-G=?Uffq+iHIj2L!v(F|>HfQ=l3pne@ zKgX0mG_l_=x!{_SnJV>1Ic-1v54l_iX%(x_o2C2yJKp+IuKY;+hVJlJ)y+tWUQ~)u z-Nmz>3S-$3tQhU9wm>dRtto2v*hr!hZA&pW2Tjt8-v?8glEYrDow(nTDtX2>*0w`8 zn*CNo_946c_i1P9GPS-q$6muLp{j00Bhwm`8);lY*6ptPxjp; z7;!YaZmFn*3UlQYPZl_hE*pWZ9kqkbr5=;&o6Ad8&=jl?2rcf{mg&5Ne&<*SIr2$1 zS9bkGZ)oUc*wpH*8JUBe`MPO zoUkM7(pYl_8gr1!=3+grdWtH-dED1s;?b&!Xq<$8F|05v{cs)`GE8q1`%9FHK9I ziL-4U4<1gd8`>Xmwyc*EmEq#iKOJ8t;|;vm?B{W%K6p3r{fK4T@ZswvBbSD82eL3x z`nhi_Et|K)X0~KghHKmniRAWAvnkVpR?FoAMe+7R>IG0t8I-a)P@Ctdv>=X^yV;+c z{cPEo9Otz47R~2L$*#Pa@pQQEtK0?%4WCjaGHP_Ertu!RVjKZgXjF27uH)$@BkAIz zjTd#F@O8(YP!jd+f)l(KT&X?1Hh6&jrM^xRoY8 zVl_gy59zWZp987*idp?q(CElep-(vB7`zD6hOots`vWsds+-_rp+5BfT5zBlN5Qp4jUv3Hs`L00OhLB!{o zL@5rnnNXG$)yLuCnk6<{?17yYc1kEsfBb>(AcMSFIH*7)ms%GbHbrmU`EGlE_?-jB zm)AlYY#5M%!*bhuE}Kcqc)NR;B0lNNS0MZ|Hl1~iGTxz~j*im%r9Ig{i8XTx;~?3Z zV(DynrSDpvs@^UKFv@U&K;A3ObH3O`tvivm2DMx)3N_7T~zzZdq;3v+pRFw8iHUpSiY z$-FDT=^ET_u`u|!QTyG@>ZgtW=cqK6YVxocwD6tYbEZ>2g#GjHSLI=Xm~BY>r$2A6 zSAHDiF;V!J=Xlqqsr~-B4rr1BirT10zP|ZuQry)zH}_6y+uxh_&kMgk_-j%RrcC9j zfA}>Vc;X=1&3EVKVyEBlv--m?ocRdusROl9d7fVZmE<_a(kjn8d8d;6Wl_)f{`qg* zRPOW1jNQN7wzGOc!G9A_ZQZ@q+X7U!iih@J^}_N*tNwm!dZDK`1Tt5P%^w>0&rJj} z`rOk)IOy%D4nq{#j!OUX{Bu{-|Ia04@PZnuA`aBpM#?4ObuXyUV zcFWH?&hwu8y?ey9rJ^?j1BA(fj~~tIp!`a2Z|_uuzgUsR^|xth_ibI~1E!|7ukYNx z{^NN$)1)PFYS+v2WbBF*;Lq~i+AvmgB)P++kxPF^iJvS&p3V6y$eD<(LlaKjzDpq>d>d|qBJ>3|q775o27p7*@;rAuDJ>f@?$m&J2DY$ROgcN%uP{ z?o>ta8OH=*C{?HxRbiUl0YS{_LT@0|y@hMb!w?er;osTNf5ucWbw4W*5hANB1*orC zb$iVndMrOr^@oN>Y?CT1+NWj~`{ulTKP_I~v`sFBVD{BP6e(oK`;Wv^Pf@$i3XGY4 z{6HdBw>_L~hc^A$xAGjpWkp{FLyH-wo%s2u9nsxDWmq42;-If?YD-O|c@FxG$F_&a z>NrAWQTK4dSN3(ulrEGsBa~divB=;>GRO3~F&0yI0dFFshdyB7JG5av!YH#iL3Bcp z5QO1nH+DG8*E?NK-}8)w)3nA!NKJ+7-m6DqGm%@ap(IHd=94vPQ#F}UdbY!k#7#KP z@sz%KDRyNxGlh7#nQM>Vq`2SkncugJMgUeg26iqi#)qf>{axP&a(V`sv;#zZ_(?N|7b#vQ-G)R5>Ow;L0ZCxSS_g=p7_bYrZ(>$4_~5LcLa~e zX+!oC4c-!^-HgHx&y5b8SY2P}eri?OO33Nfwqikg3`I%}Di6Fr=j>6Kn>p$JBASkh zHL&T)!yp-WhE@Wl`JMZdB0I;y8A{*C6lU1=z?TRz1!)U|kp1r`|Dg|ynLZ)+ zlXL$*a|33e3x>GYAcGgji=*#dBTHrb8s3={H(|fqPARzyL#&OC%;yJjZy6hgB@BGP zdZ_xZe+_IB+A=9k>m9jL5QH)~pP)CF59p{hqLK#z!} zZrEjV6>3*KFdapgVa)#3b(`i=Q@&6{;eutctM4VFeG>i2Vi@?H3&S${l30z)%o6cZ z^KB2SWJaHe1CC?a$62+B?v1q9jAgU@#WS+26Kxx9yYd{aJ0hth*f!oE|HjFFK)8M! z2$kZ>l$=hmye!p|pFBPG)(T5WVXSpLIl*~l&Csgxkyk79BN3f7V5tXL@}7MNO&+;{ z>YVna;ssg?f#8$Z9B`vjgOp;4Xh*5GLD$p;cE_}}R%-<)yXdaqCBmW$2k zubDA&_&CvnC>uN`9It`wg3d9K9laJtwTG`^PNm5C;L`$3wxk;#AeQr)$wzVgA7!YN zKP^Mxnfplcoz(JqGci#4=v!plGXCy5{@g_;3@Y?ldL17so_&vA5{RH;%jr~;lK=hC z9p~}CYJ2+yTcO(8?Y1+rP-+Q|8S}vp#zo5}fesRm1H}iWG{SxgRX?1HLGZ4!^)Zov zefveeEsPHON{Ls$Kqqf?SbVNxivD9Ktqu6QT+Mf$ieFQ#ep4`fwe_@GL~ zb?U`@?#s86TboQm@L@{C#g?jS!^$?7vb(d4i~78@5LBnYyvwdTYom%$okQB`ZCM%8 z!^UQ@{BhvioSn9IhEo?I_}bmOzP=!BpR(&NE%Nr9!Dly*yX>}ZT3lY<9E>#e^<4Y3 zpPFj#$vriTH1+MpVx^qM8+wf#z@24afVIASG`^d4d?E}_*Pk!}2X2Y|F?h&bkcNY7 zUr;~G+jUB7bA5FXa-?ubaIi_C(~0=9=gkw%@#J3$ zr|%L2NXVj0NaJ7%?RNGi!AQ)k*ViD$<|48>TzM%NtwO0dbs%j=^m$9PyNP+4a zkji&G{5>Lrzw{n`eT;jHI6nqAynjLpVWfa-$aY710%T>d?X*Dn9T8h_H&ACOnA&Zq2)KR@`piGFzU zuh^3y!?55hD$?;&0Wti)LtZb_Y)q-PR2v-qSL{iTw_N}9Piy%hpD572`rjchUhUS> zI%?AVc=BJdrw=zDWou9~mW!Zx`WH;{eWFh|H8VC-00R0SVDL|i`WuscL1UUgO+c2n z{}q!|QJ|dt8{_?_f_#1owC2AHDEH*i(reUe->=kqrPAuR$O3Uh2RckB}4i5eo2=p@yi_UpJ2qPt|d?J#Qrp>4PGw{d%i9wP)zi z_QMB#fP0*n7-t!u08>wrR@l`nz0fez(9pkxr;8jA zH`zv#&X2jb(tM(Z`T=BKRaI3xEDZP(2_rA8St}H{Y7n6m5BJ=l`T)ZpK#WgCGAfuA z*VNqDIgbjaVjzF+65F%sHa7x1U|MZ$Z&Va;HHvo@^B% zml47U^-mMpBG6mq>+4`S8TVRjXnxY3A4(249I_XdSYGNLG%l6)AsBjAVXfZ35G;b6 zYQzYMzz`s?s*#fys3fU8A8q&e2ly+B7y(CHCNAdoJv^W2QrWJ^kET+q=V>~Q9Ao1s ze%>64q!2p3y_;MPIrrME3mWWfO*IEp1_>hRrcsQM+BI0wH=0GNfQ;ClePwHBk(HQ1F(5OT#pVC`aYTBE6V%t``j&{)6Q3P^0UOTgM-|%Z+!Ar=NymFEXbASksfTy3apuc3HJNAGG=jFd!v z|5{GSKW%qMoi6P-X9Rn|v|Y`@YHQo*N=A6Q!xY^WHtSW(Y6E}V-%5>|*r4rnE`)JPFE%^H8LH1x{0 z*MVSHLQgJ>n~N`}5iMctpGw!JhS2MiXur{|h4|pccgw#TuHV|WgN1IiI@f1|(lp9G zpUX)p3ETPfz433C0zvA*VmJ=V&4FhK?8=I(P(Wp*h~)oTE*hKwbyQap7N7~gsaVd*3K?LezJBtEh*g%AE`jaVdSI{NA{pZFvh64WO(RO zr--21zJ(f-P`e}94XC81at7$RzMSrQ-E`OuhuO-_5Tzg`N3YRQ?cK~_FWPGc1Y<>|Woe#{ zbH_=?vR`2(56!{hiZTZMdE%6m_&9ofm?~}W_$D1~UGN*C z?Ckja{R{|?+rRoZ8y>LL?~|}y`k{}n=)Beq536kFj&P~qF%U+0)6&{mAJYAqHd=cTx-&Z9@O6*aAmqDHgzrnQ)d zAB!2yla%0`$5VAXy@}uACBpG+@$S8?1hXyqW7bLnB%iI|4WDo)@xIq2{6U40Nq4jJ z7%{z1NYZ9^k(b>YQE5pJkBNjny4ujVfi0s{E+y3cWgL=lKt1m37&lS+xSrYUXHo?0 zP|*+z#ew&BL&XEdh1~(}k2;M_l1Av$zcopP-Z(YA&|U7jS4#F^*@3U{vL&Yr2a&{V zH$_8IP51@wJ#(`mVD)lbCjDhqvYc01?;O8W|0VbEuNK843JT&wN~mUD?W+E-8p6>C zh{~cGEP|muU_kQdy^yUZ3^|wiX@3RbocSVy60w)_>RFSpH{_-J27T5ib4HblDE{Rx zt~Dki{thU+E&@hwQ|8%%OG&^0j?xuF38~DvukOj<{N|9b6hxs+ zRg3J%imJ;PAu;ht_l(m*lGevm`Oik-DK$BX*ucvshbHz4+{5YR$mNi93i)Pg-VR9P zF^*?Y1MvI=wHcx2K;wH!Be?Q26(XD=n5Fi}vz@^cH$o#?`^XBn)`sMk%NE85;PjQZ zh+l_e#~ki(;P~$?jjz!Y5ctLt(uT}?*(H6rXoi%0@vktyCBV31dMCwS)B<$JW#CIFXu#myc@EjCmmY0uQc)3;S)DL<8?kf(-B_f6=KNRq zs6)A~HG@3{!ta-Yi7i6#)MAFKCou$y8DvbK#2q|Q5>jp4zP2V;ZsRgJx`sc-`(Vn6 z@2N)@R8}t99C@0)UmX_Vb;8->SfHL;eXg%jE=iG5!#OFjRX6Fc8=2+6+?YB=xgt(D z=CUR0GIf<4r61os98XL8)Oy_iV^9)yLYC%X-wN%sDzD+=8>+wA*B)SBdP&$y%i2)ZMJD7;xdNd znNs*1--cw_pC5$cz1VfS9J2P|DhDp)&1C;P>8@9d>{+#ak({{dJfkpxE}cBYkY^-F zD_hwLHLrcZ6DKIS@}(fv1Uk32uXYTc)Geq!60O9u)XQkc64}l8WI{VdNx|)m1jW-? zy$bSD!#N__B};Z=L{VsfVb5HPkjT=JpfLdWrNuK=Epd5G$>D{x*@LmIPG25f#x%N> z+iJ&jXVcy@^H&00+WspJ#i z@vl|2W@oij9;F94Xh*2k1>T6()aY_4`bVx7LD10s(_aVSy5(H~ zaL&Wl5A?gBmvKJt>>O)E!r&FkM!W99*x%`~)`pI|Je0xhAHdxaSt~pKy8E}BL=f~! zKtUJ4Q8pGO6%L$#uPLa4n@bBkFb=PXRhkxW*!drHk5tDfpxhi^>q)8~>kxh_tGxgr zJKiDwhSRGQq@LE`GKjq*R>OBZY4)>N{;tWXaClRv0Vy*)hyr314kX>VtO~4GUD}E?1S4& zR7qt2qdilh0OfZKu0ck2S*AvwsGevRV7c%l5w%VTaZv8QS&wutMsIx$9O!85oyU!Z zbBtKd3MishEyCTrf~iVl|VDYilK3WYkv!qu+%$ zBYUq4(srza4MVoTLy==irCsN_+#K`1= zQUeBhABr%yn48;5+{-}R>Qv??KhD#uXLG)|5|)JRPTD7wwWSK2!iQ!BJKD-XPb=LK zt*g$BpvQh+;EN;6*P0RPmEonec@R<&=q4KzPgfMph~{>;F+Y}#nA{4-Yz78rhO^i0 z#c%r5O9_m$QnaRcL7bsL>Mh0tx1Q{4++Ei1d3von(b^QN`nveHvT3J3C8HI3GMKZG z6R|Nj536vC86X^3DC)?L5+GXZ#&y0f&eZ!zJUpz^@9q0-p}Vi#R2?~#))aPqwKNMF zPPdeZ^+0^0xjy?D9K|~J-Vx>6MIa@m^E-yK3>JMLImSGV)HF)=yDoLM6FF2S`;1}W zJn|W95DkhcG~{8G&V147XPdWLp+>Oc2s|d(apTmXlaa@9`|u<0au4|`U$s1x-(RYH zjqgpp9Zndfnd8*aFkfVK2;Vvz%r^3r9@wHguac(nUq_{G6HQKY%;FmxMLL6>Ml8xf0zBhyg9-{fjdcU6;RP`aLfm(hBNz z&^2NCRO))tNc2QJH=qR|>&j#3Qz9}3(swu(_=vYaPDv>(mFJmQ+{uwYlHMAdl`*D8+_$vxJm}XgdJmO+>GkHVk z4G+hMAbr0l@F(pA4IcV~6+%_f)n7(lAvTx)?gij{=J0T+wL3(_;VYWvBV}p6p$#*0 zmo*#f#w)At()iRVAbGQN3@8LxO62ykB{6iqOAgWzFO_e-VryNisnAs6z8W5Ph!Y{h zu>gWHkT1FP$;0^$1DTR7w9P(}mWIjqBWM3GcuQpvqrfU?9(*q}R`iTdZO8;jB3Uy% zo^z-MiZN&^;E*i=fLGt^9`ZeAwF;9BoQfsD!kC&YfYg@P)B*P*6 z0@^Yu;)w%eSfOjcS;o*XRfFzj?}>yszZd66xu!^0Hi=E9k241MT|KfiZ{w0V%p+60 zfPT>qqE)|^n9WN*PqkmOl&`rUQsG;n5lYRWsSplKm9|w_m|(^|%H|UWy|EK>ucn=# zPj8E|x4G8Tgo_+bu7+Ze!hrzAK@ z@1T)>gxam3*v`!OReswz!cOC$0xF9A)mdsN)^_QQKl>Zt0N^AZ&|e8E`0;(ozpn#q z>-KCg2Y=gNU`N|g1l05Bt7+H%VO9S@VYio_V2Cbmt zJ=Ss3zfe{8h$l6uJ^47JL6JK_iQ9)4Q!;wSjR|KG6TLR_t=4pFtsC%BjXh=>oYW?) zXJSU^%P;d734FMpevwQ*e3%YUF6S@E`a*5%K47JoN5QO`jmu;KqXAm4LO}->V4eW3 zIxsM>MVYS`QIB7yCf2{??+k5~uIHjqaBy)Ry1%E}6bWCQU(B}y^oB@7^I&8C9$(m< z;7?}d!PgSDThS!Q6^o1Eg8HRs*cs&J`fN+f=P1#QjSU|kpAzao?^_fqJ3AY2Y~T@q z+*)5<7&OAb#_Gbk-=_c+m(4r@3z)F$irdr;KwJUW1mIC~z=VPMfZ@URgAV|OJ^!OM z|M=o0Li;MI91NR>HUhW&v%yUfhDc#iIV}R0f^Vz=o=3lIYZEYG5|P5j9PI2RPQ%w^ zktgDtz)=4OStV}dfc2o|??27NIBY{C07M!L*$%kMnCR%}H;ex>DfM-90HFXh zy1A+0QkOT@>}{?U(B$^`Em_)ro}Z;h&y!|$P%fWx$)M6I z1YA@EXvr5EAU9BFh*I_PiZ(Y8(gXjlH<+|7<|{a@p5wmtEG?I(|Vx~%*CXwg3` zY+Jz0NB;}C=7*5P2%S6U@$)_3KvtPRHU9O`LtCh@T)$wU-Y8}4N~ z23iM7obH-?uLEzi)Y}WV?@r9U2b1D4r!;=NMd-WKfy(TBNj-7?(q#`&AZ;yU(`rE} zw_9X~5o(1}VTAnr!J!3lCqM{AU4vmicE5c}g_r^({@tGoT>*V?5DOTVN+$F6E~rrG zxced|2A(>wFSF@5Q;Di#+w?1zt)iT2qp%(TD+Mvey+>22o<-R{C8K~6^M6dOBL#F& zY};B9qb$;bkBCfZR`pHvMhur%8ouX+oMTZ{xo^KF zs%*LG=I^Q8P{>aHx!SrJDizn$vs_s{VbTYGCnx8XMc#nW=^z_;a4Su$tF0oESO@o+ zh%CR>%?-%eA|Y~WOl&8fO15)RXgi4;XZ&Jw)6;>*q98oZ#v=1|g%n|cG zVdLXGI5%MA%<1onE-fo7NGs+q&!|ycz>&6Ad$viWiR``Gt&a8T4|qHX98f2``I6PWWs-H*VRoVEwpgN} z7nMlGQ?Z@I5)vofadrX=tEFYovmw}d-hMv|$Bpjlf~{%ksQS>j$#aj>50|)|l#9mu zPLzok$Sjp31YD9<0Zdk}G()PN*dI#tAqED=nDimfMsi=l;1hUgp0+}LVQ(koCk9a9 zt#$1u?9U|1bR#A^p-^>u1b*_f4OmHY=2a&oo{?QjmhH62)GYT_sj!dEY)0AgvvJoH zjJt!E`KYTS7kk451yg4y^GbBkXfUBW4q>icQ-nLU>OEMr6*VlV*-6D8yxY-ag;wZ& zdi(oN?OM8eMWabV*11(Y&#`M+paS-C5q4jfhgJ>szG?v%Sqs4>dRC6Y+CImAdtynACUsddGq zd%3=CrVmP?_Qpo8{(JA+=uDi%JLJ6y9ipe{BMkGGqH!`GninsAXn`6 zKFw|21+8i}{oVZOcFcKPi6Tclt(K1Km>b_5-B@kiA@NA0rP-`eV4I(FP@8rZSVpXg zT#M*S(wTvLdmNA#Zvm?{j{a3xzZ>?cw&~y2v56S01;Fz{x8cJWB~%hRZg;kWV1$-5 zWb<2m(EhzwH4Mq_L!Xh3>z$*All8yS`RW=mNWjzWCf*B(%aX@)zIgl2%CBZwX}&T- zA5lOt(;$BjXy2Le7=uRfNIb=RP&qUJ4fQ-Cw^mLO*rzyOg;VqjEg_rpY0|yKj^;ZA z)qE-~-1;V;RVYqG`)LQ5V0B~%8@n(vtEldfMZFNoRbs*ZMCHlh*nqX5|1axjK&H@Iq#D@IWTI-CTz`$hp~R50njFy4r|xNnA9Mh(t|`mWZS(*NT~ z()`-Y%=he6@oxCHs9OJZ$b3L2`8VJ|OZxfH>tZP=h3B>7EE4WJ3s zB<}SJ=u`zqw?Ln6#r5mgA3g*ha29ZkiWUAPFTS_#!~j4SdX0nuit*3n`xFK`vzM{1 z9@k`Cwh6F~PEuK!^o2P90kGCogN~@e=gp>HC%|J6R0|$eKbI#G;22v~PR@3IE`7my zi9$lYjuXy~FzX^r-`YWujL?ALs%IP>1>Mhhs`Fdk%U@js`i58Q!UX{+LcZpJP(Qcb z;@S|Kgx*F<{TVJ~Ci2gz05>HF$b9Z|0Q#jYZy zxIsvs1L=iYrwTP4U^51%Vu}N!sCQa#6OQ57-+0LFr)}ev+vmamKh=Br-|P2L;i-Y6 z9jkc>3r5366aG9s1IACtWmSiK-z`jBJ#wjRULja?amJqhf!XzmuR?Ko8Xp!yh{>Vp zkcCmK-6!&dW4vJg+{g>?LM|pd<`4HBaP#hioge#@xpcE?+g?gE`Q%cnP>4jvO^>#Z zx95&4(vzkq*GHz?c*IO%kK+d@i)*ZQx4L|%Y)vQHGx_4)qTO7rNtQ+dYa)%oJ_Q*i zO`3rP=kg(*Onj8gOZ;~3w~yG%#a)IPU1z6~`pAPfbHLkXI_CoC=0`v0U2R!YbX*zj z>_uZiQGDAlMb^_@^};hq;ADYYbEIa5vU+I@TJy9D*Wdudr%xn;tc0v@&IDl@oX*O% zOMJ7ztXScZCOr9 zisuaEDyi~~LbHW*-gb;bWXl*V%nYYV`OUofJ>!CdenN;g`q*>by=iU}S;Fv(7S5%X z0LmMYhc36Ye~Dhgck4&KezIVzms>a|(2NmMJ8h!V0$xtM_lu)k6kUwohr1t#aXD>q zRg(qdnnBIlg_@}+(5Z#d`v;PPaZ(T7FM(HzaN`#8rgcg-mHFZ)LKW#&XI6PU><6n( z>Tg6=FGFb_SRZB$n`^TepN*JIy&@B}dM7$>Dn8 zZtjJ4uAZ6x<6Vv1yeuyY`>^Qb{Ql)GA>#_sAY~PJKOBdMJGZ3Q(ovivtxOIu|a=&UBqOrw)O7LE-p+#NN2eYW<~HA#hoM$Cc~x3%l@gS z+UNqCHBR>*8_1Kj!LPkH(z`!I*mx)S4m&&28YHS;1kcy4##iI!Mwmsf?~fM=&H^u_ zeb>X9)-J4>NM%39iaaVT3+8m%-?=e+@!jni&tyJ>_CDv! zYmI9Wgqr#jeJW&Hp)b+k&2z{p&>2FTS>?u3{`R9mMV;oU{YWgs2hTO9`!c-{@FF`^ z{$&(1JA})-c4%q5;zDQE(_8b0swS?sSL1NjHP<^MBKFs390X@7b7X0{NXLactsUMi z zWMX$$IeEJe5$FOh`vyjqGe%?>KAmxty&zr!X|8X)>WXhx_JldpjYO|5H4G2(3#dj; zm+|KpUsqOE#{VDez4upBUAqNpM@7VfAR?fGAVhlaf&x;c_fV87y@p-{M5U_`dO%9( zp+g`LkS?LOAcWA78hYsEZr<;FcieO2jPv~g_ZP=-jO}t~?WfK;pS4Iy_Cp$pgzZ1? z=lcACQ$a;V#2r(Sd4eBD8V`}4uQPB{ntvr1f86OgC6)<_=RhLS7kb_^nM}%Xy~@j{ zQArY~ZmtZH;+oelMOS-H*~XpoU3YS(kfprd=ru$_zDljc{(S9?Hu#o}IEAwN^Gx*Y zj7v?P7_o0KP&ufVuBy{B0itr zcb1q3zCMRN7R_+4$Rd|PJ_qGqNhA--yB>`tdB$;7Wc{7jt!_rP(P!MBn1j=QzjAC= z6m?7~m_qleMH%)_n*0`7&q?!F-Gs;28o)hzt-yH8v(@X&y6uRPb_`n+L^b2 z^WM}?ymy)-0%u1>M}@*E1^$u%Oq^J(uK3}8t%#1FsFFNxpy%gX`*H4K?A?pT7tITb zDIb(FiGC|!&tC|-#=selGnve-N)FcwtTpZ_3cTI=LHg0BxO*A8@8)@5nUbwH(2#x; zM1;At4o|tqIB{!4vmbk|ixFLNci-;iBvymJZ-#EM~uKugyC^nfrHCb76&cO+BeY_ zHXTZ~NEU^l_B7(%u}|I<^(z=D{3w+S+ZUCe-s>fu5xHuCf?WLp92XnzemhPKN|f$< zHt<%F+a{=&X{wU)EZ4xiOT@Ejix-#<`+lqkGrT>J(i-G%daWn+eh;k@-N<;@8sl> zq7|JJCel=tkP;m<^Ost%9XH3?Mc3zR(kA+B1RP~vrsI@|R^!7g(1V8T3(sM}0z<&GlML1j8u%9WY33c8&1oh3rAiS`48pw z+r-Q`d3lyY%69c`LiXpe1lU8Hj=9npl`NjUAlcr&)fWt7ZCAhU-2NZG_4Kb}CFgnx zFM8ZBWN+q}o_ipTyc2G=VpC4rdMG9@{;K`;xg+q1h|C+cbZK3Ph$atJh&bzhDCp^Y z7~8M#LKG}lJd~Jvq>oi%@}g_8HUIdamKuFO8s}@>ivU>&ml|J3(q`EUA?okLzIYvW z6prfg4RO=Z+|bvf`3P7PDA7@|qX!cmMuuFD785x9~R^>As$0y4dp!1%Xo%n&Da+RzLcE zv3_+`@#hvr`U_{6p$P~N@B_A5Lj;(XgbEvG_Pra%fQOrQ2fEvB?duMMprF<5y&A@~ zH#YW3LqA<--)FJL{h)_x{pQ4;(24OVVI|xLM%QJyqFC%cUUN6XP&1Yqe(vm*OOhIw zWhMsbYW*f{RhMQt;G9a&-v211dB=9aZ(o}AiVf2qkzBiB7S&ol>|TE<=k_}v)S#ag zuzHb~8#j+`L1=-{kHl&qZ}+k;lm8?_~WR+lvfZY=<}g0n&8`kJ2mwGUbMiCC|jK_W z;J_qS53aras`m7v3J(f`AFKCVFM)1beKcgTeYi!>Pj_=MAtPR7tP%4aIKz{`+pg~0 z*6SSA%$J_F)K=euNP?=dn$6lF#;fUMkOft-B~C%Jw)P(8C3E%E3z|ru%cjJO`KAzy zlStqB;VJiL6YoEk1(P}aCk zG)fXiy}7FVs@x7ozG3x3#qK6=;s5H);48EgMtzt5-kElHl;(ve4g!zSwi0e!4 zKkp>)FaLY&{V%ikw}0_rK4zZs3@<-@{=UfhG~5vF_b%7j{S2tqhKX5LJ5H9l(CgWb z0l|H=5}agYiS}sKQxNh=yLdb&gPud#BT{`suvQCLwe-!9SIn?KPVY8q;2H&^A*&+G z>77zj%TBZC`Sz%HUYBX@pd{BXBT^pPjBZ#c?2no{5*`VcC}t!<`!?S{sawmw>qGG{ z$?bopjnh}}`f;w;R&+?jOb44oWdmmlfwmaphOM7dm9m(PXVb%+cHEHhGm$}_4u{); zHS)80UDhxYhg(&wN+{Wger#PQy#)}SuAJDop~QxKtjXfTJqzh$ZGfOxhha%thH|i& zDaXVh^_1p*D;9kAV;r{5bg;MeVc10=X9l(w$R*zXo}{AfW&#FKb6*17b{FlpP zY;wA1qf6!)12+&_O~JEFtdXY2f$Z!Pzl4Q-)4K}GmemwI{)4uuF6 zE(A-l3BFy;P*2&hn^U$hkICJv`n?|5<)t0q(XlqdITmdtwZsYV(CXfG zMEldyeaWj&=fsP&J|r2axc-Dh(y`5meJkVhus-V1S6w2iM|Rn;>2O-R`ZmcHfW#lL ziL#tD|3ZJKA?c(YGKWH{C*73O!0CqpbSxYwg6l{zr2Y2sSSF=ouSlH7eGys@Rr_c!(=0jz&UJ zDkHtB6JO`CV!kV7d8NL57Y$l}zSe;7#;ajx)qDir?@!BvQ#6*GtAhMoJq~(=-bcf8 z1j6hzF%o*t!xGKQo%e)hThzwX7DpC~?RMUmzCBPO2^SH%2ljigme5P+l9I_8r6sHO z5Q$Z*NO8JCbFiettkCW&1?y6TCQ1@@bf<)87>086RTrhO?d2vy8`zH_%|Yk2;LHlD@t4~x2#3b#r<=K7 zRo~t1OG0T_EsGHF`$-g`SJh3XcZ=yZ#LozrKP!PyR=a;SJi?9$+xR24N|=>{Z`?C* z&?*^F%%RS|H~6^FvF`9L`j48nB(Y)uss(! z1mVR8*v36B%R%|(W>l;=NoQ57T9p*@92xABQMo3okp~klayNU9PNP5%E8C12(Mm5a zaTvVmJoC+qEMC+|US^qZ(QUaY=eWl!1Ljgt^rx_ETOvI9LM)ru%^^|+7ZC$8Ckt&> z&TMK4;z|{MBuq`)Wj8tQ>&9DT(0(G7Ak(7s83wki1?1Sz8TJe^PrpS!tr#`Hu!1Tw z`*LOi2Q(XGOMm2kQ;-j_cqmi_S6ZnVtS+R-GbLiL<=d6*K69G47T%wa?zOGNRLz5LcN<&79{03%L)`+!z2s1a=R~H`IrqrEV{PRy>1lTZ z$>T=bS7nf{Y5zx*RsnTy!kD4{@zF-YQm4#g`@3dxVXDl zJS>daNBA6B0;xFnShR0~aj}M=5hY;=6w3G z->w$YhGuX1OXT-RQl5?V+rVie zxhE{vpE;g?y!<4C+$ZZ%gW>BVDT&Mu!3eI(!aMjYvVy)9231UaPjV>~uPPo2HA?(l z@A&V$0g^+AoihD8uXuU1K|YN@7QI~XU`Im)&dU-3=DGLgqi2h!q1;Z`ono&NmPE#%}{6|bdmGQu@OGFmg$AuKCe4(4Q9>BjCQYxc?$i18u9}oR4qEu~v5An^* zRlv@STVMz+n?we}_w+01XVG7IYK+#xprE(uJ%pTil5U_b`D?@8HW?miAA}ALMoFMou&~R`1Wh4d3Ov6Z7{e+mD(KBLY!)3A@w~KW|4CM~Saprh z{PG~xrdsRIWmE4fiLE!HB&R#osT3Uy3Ju5?i>lO;RJOCg@3@!fM_(f$;wz2&OhXN= zuF{P|aiyB90?4W%AaAsrU^1;}Px>e67g}>*DTw*=2zdnhIK-*&f}uIIRl9%aZM5ElOgVGFg4>N{VWHDDokMs+CDS#|?cw z@zRq1SzC@ipV|3s<9}5IRXVv6``W^pGV(a#_dI-V<7* zf>-e}z5`;}`0JQvB_NU2qwu#$qMgvLTdz@3S#VO!zKiwG%k~dNfw$BV`Etj~NKUNwT4rhP#Nj%Xi1@yUoDH#dGreRuKi(&()|5Q1BIXTJ?VcKe2qNiUi&s2e)NieKC=L+9O!Hcww2I%>6HIM&`L*wqCX~ z|F5f$I9-7#Y$CvDUeaa#_b)$n7s15;vq`Dl|5_H~B!7>8TEvbr8UqEd3*dJaQ=AN6 zDo;8fqEm{Z4>J($%_F`$V*3Ttm5xZkx;|kKg~nbzHoTsd%UDD2qCnaFhm+om>)Q_& zY4Yu+LydD#Lp#S`qDDYQiY{d;I~MM479Olioi{<+?rU3u3409NeYOzt#ZdKlFfn#3 z!%HQx+oBr|y-owgr(4FCl<+8YHvPCUy!6yo8b+U5e@0aPDFT3QK9H$WDtrY-2)=wc z<>@>wWK+PQ4`h4oF&_vpPE27f@heZGcCp6w^U`IWd_VmXsDbCw!VM zChNg8VIl=KeHXa{ygVrgwx|g&C@3>}_v+HSjjsI`z1?a8!B;;~6O(K(CJqFM_R8e7 zz0_pGd67>Z+!UAkJB+o+12c<*fr+J*c=0?%Oo*{b(4W(09$tX>^ms(4bHS}A-reQk zy5rn}MJ(^_m%hx1q*@6daIFrEe{(6@OKCPlO=g=_A-M4>WZuy3=##v~27F|{LADrw z@JX5eq)%-;Z6VMr);MZycQRV7%8SIX`!iwC&9FCk?e^cc#c2o_O`$j)N3b2_%8rN6 zv{siq3$&y}K#M#bOcd#yl~NklOBNT<)ff2^kQ+#xG6NyD*e4O=ooX^bv(%u3AmgSaqz~_Y6GI0%d?~l^sR8{tNKsg^Nw~AbPokQSOLO>|_A8WZC){#a3C^QF zYi0Si{t|k|HFx{Ta$wDSwmBXuc`}aeMH?VkIhCMd8a}1Ks>HOJlgSsKuGaL$FKle9 z?pQYCbXFV+FL{jSAQh)bzr?yP`cmdjQp4Igczy1!rcbQ+?qut-=x??BOsFZR3-3C` zF66|nuzu`2_!M+u3%?&cb^oQ3-z&c~$w;S0$zH_pS32xs`ge(`_#CHF=Nk3+?OYw5 z;e`X43OZLghE*36lPvHMoLz zpZiej%iFol`TLc#f^XgRvVx#!uOtr->l!Xu*Y)0BVb2m^r0o8{o~*2CNn<$_yU7d$o~u2OQ?(+Z#V z9B^(p^F9URbksA~R8(zmv}?G`PVSHmG$GgHsU+CVnZ?W~?=|(MRo_j{J#MC?=V>*S z1KeZRUJ83SmhdqPlu`kJmzQ6j_n?+*>PqaxL5hX8zCS>RpLg0@+u-UN(V8IBXv}l* zL0J)EjNIWXm%P6CVfoD;XmCn(Ml_^4^`$e1G%+2AhwICm*%y3r8^#GNdG0x6@iXik zY0ti@0A1h%`4dBxaNn%1lxk)X@CbXR+(u<10pUsst{3!Lm%iuOb$+UOht|(s6|Ns1 z-rhS5s6gOvDM(C741Yzp5C$BR)rkO7SGJXv4e9$M;_xfE?yv`TF~HjniRc7DNAPkq&#a1ccI=cm||Sg;tLFE zR)vCV4Ky;4uc9xi4O2Q%lLW09o^Qh@&B}fZuX0Z>X>j{lg6Zu=%$U8STz*;=Q}U+< zQZL12KsLlF-*3ePlsM4rfx6P;0Wj10-Cl&_)SU|)V{)pNoRsJqVs9jd>q)Fw>^1xQ(+wvtxbRtj);QvfEVLOUAr zKrDFdhD<%g>^P4z7&~rtLor4quRrJg`sx?7sXjN{+ySw`*HAx1SAg?U!(1AVXtJmk z{f*fDd64;dFkF_C#SKRDy-<<8CEWe-tW8|B2Z_Uyh4_Q2pv?Zg5Rp>Kl9MfPu&f^ z-%I$?FYsWHTgLCotWTRI1MWUq^G1*9&fy4!;Ra}v(1&katJ&9X#>kq#&HXpIGiAng zh1gF&o1RM%!-l-0vftly;|6eRRIg!gqPg;6U?Kstpc53%Sqkjf{Yc6FYIo9b4m{b= zr^w$5EzSDUc28R0p*4|Fo1o64`Qi8G{EB{)o z!tq<|`iJk-Ne{8@pmf^+^x-*BRcPI#A!VVbx>1**-|P1V?|4Qkxb~puPclQ)?mnVR zqHTN7QA;1Zoz~Nz^6q7c^g7x|u*Gxs=nqaJf{$EpD38C_XumN@rbjq-EAaUHaqhRZ+cE6_-#BS$o}Mz@vp zXI}|W>a_c$YA?F3*c3Fbf|ncVND@uZOS`HDiU?X(hb*#zBVzBDW%YzhFh-n{9~el|vw$0i6I zyCu15r$`xhrjNe0yN7KE02EnO(`>sD#Vo>Hol8$p)D5bfEV6DZm4u-+Y>o@bn5W6#XFo5PDV8a@OS zJ55b(H$vAOEl_bEv<+WU?es)RrYHcwN^UBbci}nb*IynMRf^>-S`VZRmU1O&1SDa| zo=I_ga}58>>9p`G_ya+JyQ)>;%C!FNVff#X{(oZCstQm8LL4IbNh{Qb6QL~y{bTst zQA{S(;P=*r64NWp!OAKVn#^y8joG}-eTs%k<;DNaY5{kD@)q9`EAJS5S4{o3%5Et% zNzJvk1~M=Bxf43|u|xtdyaN2he|W8p2N1nPQV$UwhG7(l^je{2!ol%@FR|}^P9WM#^8<|?AplC3co9Pf;DOX9DMo3t=pK3 z*!-ou&j%4UmY;kYl}`&VeeN_RC7R(Do7aZSUF^%rx}5a{bT(DvZO84!P!ADkBprB& z3ctN|vG%YExr#u+5J7TwzZ+X@Pw6n?!}kCgcFNUx9cI5jP+$L1C29@IE20*au-!F0 zFXg1QHEL(Vl~R|^Tv}F;{?4nSPjrnVe8S&%pIcFRd_0tmYdiJ|IbNYVb?lgWQ;0iM5bIY{^+8yTA^QPTH)6^@9^#6&g*g}2@4 z3bXUF%hCk^cTc2ZZJD{oBQYHgLoA7+?r=QZd{>_d29xXj_}g~6kI+~?H6v6=8h?1W zpQ_#e=-YbRifTFrb1w$jSRMsb$O~Z)AH(8VwG}SXt7kIguDoM=4>B_=jJ7(E0O`kpNQRqQCnZuKqNy_YJ9y)c@6-#SV56+Ad{Ccp7QW@_RRofxY} za@x8({A&o^d#op?Syr?W$xlU%&ndoXz4kVPh%jy5Cm57YHg97a_02b;oMdy!^5EFd zSQJ!fwOTg{8L*c*XOXA(Yk}+AH7!2M_th(3&|3o`JFhIt)e3gxfjxMRb4k%EoXd|+ z&T4@_*48>vUgK*kLMzeZTR4)Qm)jBDgVeuJ{gtTKx`2j#T!_nqo!^tNtqP%q3M+WqreTRbXBuurwa4N_U(an*{&cuuNX%EG z3<^7KFS_290$~ASORTKx#p3nlhlf}@kKUGopr+%(dHpPwVgO26s_h3ptK)eD+@o_9 zpc$UWKIGH@PS%Kts101>9ft3d!P(i%uWT_^%?k9ypMPURsUaMdVuasy)MDX5y>Tr> zi{>oj$l5Aln0s*ZwEXSU9$1}E4sKHS*=Q!3)#hXIuAXGhDEgvvVpj{K9ijWsG?-i26yeeupVF!~e|+uN!eBzI z`dEi?m5eE8TgCn<67Y_STp(Of*|4Ep`aF1_9gk7X{DJvhYu0ShNv9b#`p`$hjVbLc*5<{8!Kp@u?8)c;C5QgJlHpvY8WFBiCP}#5B zmIYkBfxbJ;t%ITwj_XBKRSXf(zfo>^I{0fqQn*iz#|S;hT3!WZW%~6==v&Gv1e(Z- z`|4w^bS+LvwYw+iE^Q_O{Ngv->0ZRrO`L*oJPTi7#nxtL*yy1gOL~6QCznTE?8^2a z@wK~aZzqh8X5O5M29Bkj+HHot@MxGsrlN#!2UMr$nLKEC9A5o-vSgGh&sPkKRa43V z6gdkp5o+hwrgN=4uk1N^G~Q&ym~XSv$W}AwkPF_Xc~NBxG@gm zk`c32v%74P`nknxG#f{Tr8mBILDELt8tRm^^qqUotzSsqJbcG5Y4Z^B6#_u%* zB{K}cWvX{ZU%B9N49OyIqks80_IT-+qzeRS=NN1B@`?AUZa+Yu2=@}c3ZWCn#HQtDHy>A^U_&;2Lm1lU0$KNtYTGrE7rSHsghD zt!7s%9t^6ME(2XW@49Ok%(Z9@C`kX+HYm>8HyT$=3e0R{P z|9`6|&IKP!-VvBa#xg@Se{&{ZblpNOiV1-FlCet0ZfD!mc}%D&(U}_tJ1^FM126c2!55WIE@$$6T(5X9?TR`48O%8vO9B z&Z`39PXY1K-O;g7&)!o~2nRV0L0tA(8)NG2dO*xC*_=hZWSK<$i?M0Eo_q5;Sto_2 zq1UV~k@2-__ir?~&N5E}(0!4DI{vHzI#-ir-;FYt`uP)mC9+&?niX+D)SwW=oD1^; zO%U@Rp0#CDi7(gOAIa4|ny4)PTBl}J)AO>w(iT*Qwe@l<7fg+&6}!rABfJ+^GU@en zig-KE4t~zj^l6^mL3hLaPisX+*E2bSx7mQtsw7P2B;B?HBB23tXqdg*6cIhzW^eFZ zBcV5B=(KLED)au@H8VFv4usvNUxtmgh8!r!KZWOp)etL4TW9R1Rr4p=h!oiHN$^k( z#OqX~BF9k_5#>s5o%(^#19ay-fR#U*Ua4B?orYT$PS{{DUC;SaJ{#~EF>WlSdcnj| z;b5~I2o1gX-IO!aqP-E&zz`C@JDgoIox5QF?35e%UwDqI|0LDlWNkE`)|VsxhC5F? zCi)Nm2^m8Flj=g!g|lnNn$-Vf1^fFgr6Ol9;C=2tsXe{V8C`X_erE0PXzkN~vO>+f z``s)5%L3gGip9w7bljc9FUO&APt^QxCSCU8n$)_Y)3p$r|X8HU7 z8^gc8%zt1E+hlD3UEwWm&9Wy4jzVgYSNq^9$G1fggMA3N%v@)6v36~Cx698HibVhm zwI)M*Xphg{21I>O5YiBNZP?%*zmc1pw#;MgF|WqecZRhGiQAj^lpRWcL?=t97sR4$ zwnkk33|jpo4tzOc1$7PWd(2o%`F4osOQo6F^p1$XY;r9XFvph7 z=qU8YQ-FqR7}-vB?$HPT!pR%x0T)0E98i9eNOlloAI7kY`I4fAfs&Q>=)nDP-hd6? zZdSjLD}D@T{XU-|OB)#ef47tQ5Li41O$ZSkL377;`;oyfg;9UaL9`)Yi*>J5#?ixx-C+> zG`~Gz$L?zDYRw~WfbtUjp|GD$LzJXiE{fd7Pz474PP`0p%mIYUks%enT=Rm(b&NJ= zSbt5`xn2Zy<^1A#p0N$o+7TKvsQ9NV#@?sMu*RnC%>WBXq;%kh0_A*k?tV6}hmyFW zMJA@RYhB6AQ%c*gsHA@<5I0GT*};Yg9R(xdZVo=v6^?-(UzuC%=dm=uBoFo5aCsy8 z30MKQtnOK=1ek;Q!+3+-7#Zm0`}gro4^XE9z>Qov9jZ_o68!w`9Fu~P3}IU;yNplB zNa8__O;!JM64^wc4Vm|@sphi)E$yTH=Dbc-)J`d0o`5E_-c96``@5>_?Cu+DHScO7*BVqa@`jga~7Nd5|8_aCPqdL z;;YM5+yeYicFtql+}QWVyId<@Af1iki(rTskkTKkd*0z28oEw@M%uDh2W~hXh^DaA zXzR6rddm-M$38evI!)*A2Gn{S4fmaLYM08~mYib&UhAO74YXrOkdes&bTxLM3b^=d ztK3Y6w;W-c>FhZ?G!$=rdM!mj|B?p2xM?hdWQD~ec*z4Y?FDKk?6s|mMu2A6XRfH2 zaVLtzi|jQ_=`Nnt%r7kxo8K`aKU^WEn$J%@asZwM#Q<%!;k@&J5R3%!``N4-tMiU7zSns3y3g$*&5{JVE^D!a4uHsA9T zYk@*SUw1JnEWCD zKS@uUPbJYe-sH_#ZU>%C>Yi_{|C4gl@QieJTDD+$edJtly93uvUS0d-k~A~t`CaXe zW}|dvI4-8je64xWCGOlJ+tC+wEe82pe`cG7!)S^YwLYtOIvVftG7Ej7#w%$BG{D3x zM^0XcxL>KtdYGY$xkTR_$QJI9Fwd`UbtO*{x3l>!?3rv7=1xo(m^dX1`VwiN9h>@W zAdvPb69yR98+HK`mClg%UF%GcqD{I&33hOE=&lW>T5Sfh+x)_2XX2s3&irJw{A@bu zSU&;k z+#L-4Q|uOy!{j|K^FF0j$S7d-{)moM-m5Y-RuB;WMe_<7oQa;-Dx9sc9N3Be*sBa7 zX;RJ#wo6y+AFNMuEPKCG>-uz%uVPDz9VR|5a`kBXsVpQ7p~k8=_l%^i1#|1*-SFC< zPyD^e(8tpNoD68;SC%e!1u_T;N!{W-_y(MK+naUgQsSUCT_&?&i-Wb!8ms^r1J_n!92JiXpFB@bUZQw! zDZRz#=PTl85z+iYi?W03y|z0=l)JmkbAHa8&rq5R?RzzY+AdEX)b2J#kd+zwi612ElodK=VY=*V8ndXQ`$-`!vQd1g(gvCy7LQHtS#{$9EH@NO-wmJ!Q=#kMR6h)YZ|m!jdM3Ws z2wYaY`F^I9ffaT0OVanfbURRQPfi+G`v-uprcG7Awo4yHkvR+GP}tnV3act7i^v|c zqrNmjF{zAk_542Nb~J7!@k<-O3R64D?2Ris#?X;GQM1{Vwl!O}6NSJ5!p|hXvxgX) z$>;`=rFSg#@=QmkJM=t%Z#{q_%AZRdBo|f!f1YRrtU3ah55YY%;}Ld#IM)4x-PR$X zjV&EobM0%omCE=}g{UZ=rs`+H<Kb1o!2h$>o<7>;t1{~_pqQ3Ft zf0(c8q8t;IRbCQafwJJ++-i#ai_m;j=W`i3yRS5h4trBl{a~rYhbzxerxcqlFo08T zkVQC^wr+orwOo+?f$8GFg1aP3^*S=shhAJa)HQMUrf?}>vOtDx&Vf!pdz!FHExP(Jf} zzc!E3T)<8qW}QC60tRMoN9W3bS-0D`d2J4o6@JRiJ&_A=h6#ia7|szplZFFEEy~kd zqhU8rUT=$K>&I`<`?TbO{eE4ExNQFyX#Eoh$N~vG1iAscH+q%^&Hb1^bK2yc9qRxS zrt25?kFwB#Z#E~2!`@>WU2fdgxKR=r_UCze9u~}hbz9o*>eP#jC0oV%O@G8779t01ccQloB7WTA`$toF<)U9gRsL1Gp z5d+}MRcv)$!W*Q=!`G0?fEtf4-FUg@Pntm*lgkANK+?oCz>m29un#{j2QEyNVF7u~ z&B$5H2{WLeJR<{=~Zr$BI9n4|>>SAu5qsx{(vF_LhH?5?9g)G3u%t6iMJKt9< z^$%UXOE;h}W(Mt2FIoU3- z&G3{BYqwwkYiYG9!1sDaUl1w8jKgs)L_LLU5&=dcq1P7u zL!+RE%EXp!7T~QC64CaBvlLFV*a;0Bu%iDIjAW!0NNM z7nw`66FFk)mTUGSd}zOZ?j<|!y}L}{;~l%GQI|RpzHjY?S)q#6EA)vN?_oGka)fs$ zh}U_yqBojrCDhx{N~LecGx%;g4dhd>FG`RC=4zIDJorSV#>-O;?3wN_4Vt|Q5o2MvR-g|eWTqkBE zI{7cU4cJ$#{83}|axR}01ZJE@pTgOnFYo49qu+uK7S&9~|0~@i@42cSC!1d1!D0mh z#y#^+@_wMbVxtqDVO?Pr|BPFE7hR_Kw-x}Z`@5HrybLAE0<;sLdBk@B4Hn;)EWnQB zsd%kyCl^xYzdc|k-J@BOo%Xr*QSdL7{sbRDadaL|g-IiUDVAW(7G`Ays%TV&r?*z$ zR{@1erL3*Yz3Bx#kMPhFn%_b5+q92T%=?nfX*QLOmLULR5&u195M{RtP(wg|C<*^q ziY3St^=$+7x!PXH^lGg$^X7(TUpUUm#fLY1QzOE6E~my+$jnBe*1SPeoXR?&wZzThJBOQpK`~_g2##W{EUU9fcn_4iKl;-kiOF2 z!!M$4d09@kI~+v;d+nEgv^h8P!%3Qoe(feP&9Dp#5z{Fa?67_1eDua0f6K~;qbB*@ zPJyg*`A$vkM0f;y=cLh9)j6}=R}!d>hF_pATK>J4d&#w?xsme6eBa|)1x$AS98hI; z#9)+$s;_VLcpH*;Kk+gla&Aco{f_HiqRz|@2EYH)|)Wfv8H_~xBHNy1|3d+!I4WCZ9G4apy-yN%P1*aNcG`2>?iJ1FB&Q5;v07 zQ=t)tec-9uGC-VaG(fyg#jmrgA{*nAu=$voMj4ja>oFM%*<7%Az3c*eUbFV}H&t#z zr}I4k>pW9y;>VU)J3d^i^^N;#IJ^d7yl2j7jn;oo$CB{C&ZJ8zN#pCSg=jXjZgzWm zVSoevR%9#U<=kxFdp&j~*;fJPciCT~8BGSu9<;S}q30Lq-m8Q0of$PWiWNxBIpmWKTy49w~$n-_0*X&MUvQ{=*WO)cI+q z{2vND;GmTaSFsriOazo!1%;`_OQV^3!JG5SrRdzGUm9#-S&f-wpMM|rmNq1sln8T= z4*eEi$&A&xY18%lDfS}{F&>>j* zN;O+8-DWnZ9h=EuhpB-}i}P9q;#KGUkOfQrGobFRj%zEOvAAtFZPP0ULOZ1Qc9;q%y+ z&+?x-fhACbfsr(O<{KEx_Be^dzQ3& zo)c31hgaGUx!Y=$ z4Pax}LkD^Ib%;GNe)QGyxZ7wF(ggAA_Llh_b*26Ks1DJbSSz0glnE}Z^aot=qpRMj zWp@j#tz)>~)mkbJOMWwUz!u>8XDKr2Wpch|h+=KgkF+o3C~Dl*lI3rq#om!ZZ^X!X zFC_1L1c_a`Qg})Dcsep{p5GDVro7#!a5<%rk~_XiwmyfzZk7%SaC*`b6&)rXCU|uF zh|9gv+v;m+@7J4knR%(DNwDG~RoO#Nc488M21(x({>?4CnljO|H;qDcJ7ZV}+(3J4 zUIDe>2?pe3Cz-8OVLk7kuwtve%7|Ao$s9-(a=S8eL=B|G>CJ*sDvf(HS;}E=>ft=L z($p}&EAw#DPU2T<0n;_!bm&lV`(lU?dEn}e3@>hvt@7vTLhe-P&V_<#LQrV@q0`#A zf+)!Jw_vBk_oI#Tu5K(;NlMH4jkVH#4@Y6mifcnMOmRj(ehwA}a$LDX(!1nA! zslm9;vP9G~ie~GV=SPM>%Mmgr(g7Q5?HO-hq%&=K_sDj~(9wXr!`WuLUmMk~ z(!QHhViF9RK=HeV=}rD6TNrabp9bEnJM4VH<~|Ozq|`|>pM+6Ry|2Um$~z;KD;AaS z5*}48Hb2!R+c$>j?a6(uS!i|^j$LmT@pM+WS@05GB(&;-FE);Qjq)5!eyGc_wR!!O z4ETzIdiY*~DD5=E0zt1n-=6+bpIfy1@8dbmuis35Kk%D7m;?Nv z7g1ohKo-*LNr7Mdvbpa$FS-@P_2bxQwpYz4ot!~Q!u8|CpRM^v^&Qks7?g?3OW+&| zV9Xt>e)C@NLKK>6aQ+L&=yu`~&5^_LF98x%j3cXlwHuZoRObE4_n& zroy~E(Q9AQR_>`^&CJ!Yj^LDPH`mFmm&|^LMU&g&N{mdoz^vpIlMD`-!hl>MUP<~b zmS1jcxka~q8SbnsAw>C@UhqPU-W|7k(TQZbuyDO}O40t)@tsQ5qGtO3(3hQWTx5)Q zh_&yz#vVGLYre`7X!*6bE^I%ovufpOI|^)#W}E3&*w@oha4|gDGzGH`?x&9<>*aQS zebZbUOdR%)l-Z)BIM`ul>?$aaE)69Qfe~F1H@b74vuyX~ZVU!5qfwd!>*ujVy6f&rkom)Yae= z4KCH|Jc2>z@e0XG-gjtHtzIxvn9$%CMIB!&x%{INhth1L10Fs9<{*9d?Z5(!x>T!7 zaNs3<|G7tV*^gIU4}M70BUW01KUUMXN%hv!^Y}$mnz6Z_DBoZABH;>oP7a|N0}$0a z>I3MiZfPB1g%HhWhiNiX>$GPM~m-(5<>DEZ|nF#;@^JRoa(VTRqeoL35 z-*UJScgJ_`^Ge?i{=@C2B_ESkZ@N8q7^zk%W<83B(QK~p&nGr{g8k5;2o|TiMkm5V zU9cV=8kuze+wS;_oPJ|$udj=Ct)ErXa3BNYXAXCXv7R5)f>x5n>Q&!kpm-RfPdcX& z))mLsO0vT^_5}Ao3Ej0ZfPG7NS+0Hjt6eQzfOteS+<+=RX?XjZvXIOOLIWaOz}hcn zNiR@LSMq~)0u{dB5r-QDgmu6W71MEbviRD0tqVxXXSJmFy;`&~bzo3B0f*Q@TqQJp zDCr$H9o4wMG|v4$?R|GtQ&|_UA_|TL1QkcBqJjlb5KvmghEf%j&_P8|Ne!cxyfWa3Q(5XP>?I z*=L{M*(H9@?ETz?o>K4jwoPAlw1v5Vz~XJk*U5VlR2uXfCnn~S;l2l)_03)hYuK;G zz06F2X;mjs1A~`8VM<&W?QXac5>p~;q$Z*@bfr@HwN-FUKH}opqW0W~p|fIp-G{(J zlydumwvxNG$D-sqE`BL6JCDiA#x|tYyj+|FH>%>?r-j#Cc+U@qlILOZNqZ>9$}-sYvlqGwg2hv z28%6jKfOr@s!o&)DiZfJS!7*1j^}K-aD1OJ-d8E5Ow|G=e|p?nefh}^8f_W}vr6-J zZ@;Lv0^}WtyX0}@jclF)n}U=*SXi2 zU34dN-Ral^3&`DsL{-~DcT4@CIL;TUybi|oe0P!g%={S2nD$$-+?ZAc2xqRw1^ks@ zZT974^+)yg1id}Rw;!mSUauQIUGrGyVDD`^eEs>db9#gHkz_cc`@u5K;Z<4oq+g!1 z!}aFh=QX+d-6Sh+9An`k%u-M92f*N)-m2?o@q8nR_r0&Gmn}OongZSx@Ct0lwdRWN zTkex#;BN4pcg{=~-Wy}>3!uKfO1|?vqCF)b>&x)-BHJhJt69^=uA9bd7G!BA>n@x^ z!Luax^-E5!7;fRbx>(BId;FjwX1kx~PWK`1Ief@JA@VA&8i;wWSi`yMGpE$*hw~h4Ucm2G-osdeMWx0GP0r@>?*$H~ZLVL=wf82=O*gEsGtIUW zpnp;v2gg54YzU5@qrwi+&sinqv6QHx-q$bSX;!eHg5fcdXO0e+*4p_VoSikAt$Dsi zvS9GV9q)$lQ%*|AXl)_23v9%K+kvh8_bURlZe7;%;-6^9t#H3yE6D22Z7kk76SfZ6@_S); zr;n~6PPneIHe)$p`OxONT$u7j5BBDcoSv0nZ5@4%=M&tMnc~DJbM1o?tn=0(yBt5& zo#{wHrt<_T@k%ZYs(LL0`Nb{)neEqAR2VWMR0|mxAuWY1*YN(Sy^7l{yE&2Qe+Hb( z-vzH#reRm&+3=f5-X{06caHO13j@Xb$+}xljFqLih3wj$BXw2>%x;i9TAW?%m_GFj z_6e8}9Xqnms`W*?sVL*XT0oZv0*pO(Cy(c7z>WE;#}OOkcs|$zAx+f1VEX$eSDC`l zdmWaiva~{q^zmUftKx1K6~T>5(27U3EXMj=_vz$WiCDTdJZ)Azw+XA$UUI5lp{hPUIZHgMV?lydtv|EZdWN53Kwmk6UlqVoU}R#C$9Dg*n{O74En4 zLcsH?WZa{vt-f`k6Ajl^2>TmaD-Ud->W|$OROq4`YS)(&KWs^bafUb}m43*O62MjO z0{3Og!Tf|qruL__>BZt+4hAuXs95VWy#&4NPHZ`RAyc(8KciR*#&&JIeJ%l3p;Ms- zfBJ3&-XK)oay9SS&8x;U7xNLcgGdo+{9`|2ZZ~Zfej^Obo=yLNF?{AwXszaKS%(Sl zyYj09h{rMNGm{hNIG&U*;HjNkpHSpC!<<_I;*b@m6%Mxz1kVQ7GE4k z{+qx5S=q~F0vx&qaiX$>7JOlzBDDFNs2f3CNvhe;yrXF-NHq0Zpmx!bcK&xuxnNZ$`}(Kn`@kOk~O9=L-68b2t2Rw#Y9cwSb> z9U{|Fz=Qw$^%cNel+;Fs_<`?KXxIz76fp>B_AH4k1b0xUrh3|na%&eY5()zrk`S=`;4w|(|6ur`ZIdm;F<;*b}FjiBPe9} zLlz!Pp>FZ%gj`4pEp}5hAu6a5VxQqd`ozZXAzP)!5Sf>g#A>ajx z!DHbmRKVcYgXY7bVb+hyFjV>PrHuNV6A*QZK!qBSn#)~Sz;G!Hez4y(Qyv(JSYV1| z^rt6|bb@1YMN%Lu3WBysUt*3917>{jVbjsD)T^^B)1&w=UCzaWEPaB|=q^YitA(9p zWEaQ`3}$01SqvtaKW7}q_w7D016AcrFe@V=pcN6s?*eqder?Iu5dcW0oS6Xu;_gUb zUaE!pr0QzSWxTVDtZYy@M(^PKm7Dz%ln6~PiOKJbaVw$R_?bEmYM1@7Ucby zOP_EA6UMI*u;dv5%~I;p8WO*269rtL5sWN_bPlY*4`0R-`Po}Xm61D6>=Q*F~R=y-5pAB@dq}d*4l}g zvb8pA06yn;r{ETjz3Hze@du4)&MyWp`MGKqPv4K@^*Ww6f)rT)DuMMR+ZcxtMoRfT zzt=@rV@B5_`LLvpAXUIMWkl;5&r)0KKc?z`rgCxg`@1SBBb^mhHUXXCNFg0BR-vB7kXM&;AQ_(w4;C75!7-kn+$=J+vKPqWyWnXaHD8Zgv_0TKX3E|D=u#;}~3ck*)nRNh4m{`#LtF!{kY9)X$4 zC^Amt-UOy+oDte`5rOj-iNAPK_+$KrjHmty84IQcgv7IB;l2y`R37J~KX(`tV2B0) zPKZCZHod(NJ>6CzGoS=Kr|_o1kR)a*r^%T&d%5VuZ|f+0PA*)M?95LA4xNKE#pqt4uxdGM+_WGa=@&&*gZ?+r8p1?|B6Awa`voCOed z{rSAuM1`=0=y}F&O@GW$6xZeWuV4@@e*y;7ka)G2Re@raXGMEyXh_L63DUKC;t=wU z?undjrj8_zapya8ftKJR+Sn~a*u5RGM);WLXBh|72~2AW|B(%UUM*uTXM?B*v_J8u zyR~MlV@wz<2j{7s6MXpdur8)-Ys&U4GEt4O5m;aodrSJ|6v3O>h&B%%C0-Q+gcZ?) znw|N(aW@cKg@!U9x6ssci(1H_1aDF~5D$hp>4jC!`IMJ=^9_0u^0&)T42WDQ>ggV7 zv4~Gn9?%L1L<%R( zSrm;h)%f+zm@-G_`TTmzZLjhJxQRf^pgW99W3ki#&-nzH6()4SJ+y<)qVmFWb{II4 z`}rMhxQH356y{NThSS1$H1NTOOSS-_R4$4i#}d33&{G(zv1a~sE~t8aENeM_Ag<`nKGO=4K2Wp4Qk53>v+

1s4)RPwK<#{pTPb$MA zrj6=JUh`#R{naVdBz!SYwuQm-pgqC&1T_Qm?j_R=K|IYe%Lv29k?EJ5rI%ifQ}yQO z-9t!PU)cOxcKGZLNJ(y6bfyygw>B`pfmr^tG^zI&vuM6omQ0YzqtZg}HmgI9hpp~~ zTuv_Gg2UB!y|%>zPz-nkvW#oimY#(uHUXZ>kS0H2TmvU|(C|$C74&ZcO$3wKYw(jc zzGgUOP3Ui5o2I{*SB2j$@Sp!qY@$6rmA+(&^vjb+4bKRP@&G!8>S;gxs%mP%V#mkb zR$fBaT2^mfG4-hN*l`=@Kf>-*R=l|9cH609-+;<~%CQyWhYt&>sI7W7er)r@0P*|n zurs?8C1vN0Cf?jl9E*z)z(6qSaGLUN0pfzPuoD8do((7!YqNc&Xn!&m=k{%zkEQSrHTLSjQhcZ-XYmF>{Hmp000 zw6w=d?6RCNy$2NZi@AmoEekLaMnIMmuFE5_u~Es>6ZSdlBOK~oqDK`H%$3~SvdPOV z^T?y6XZ9F4+1kE$RUbtu$ZO&azna=#bF7*Xb!{ExQX_OVG&S}0(Q3)5&8|iH`8nrX zo|2_>2Rc1VJaQ~m6FdFN-d??0c4pT;4UK~b=b4o9AV!}rRLd^$%Brp)hPk=<4mY zdOFy;dp7>RV)%>JC6UjUKPl}ZgD?KFJ>j$tv8V$4ZdX0ct|au7HUZsxN#;JPysYr_ zp7nk`6V*96%4z8)IpCwUJw1aH6BF9h1CU&^di3iQ_>_O%nS16i2fMn9#cvOv^S^L} zfG*TFy@Uv|$DZk|r!ms?YA+V=uCF(_d2=S&bjJ<#)9Z8E)80gx9mAdYWb4nLQgE_U5Op`%&wFaNaCH=M|$M6-U?&d(U&k1K72`zR;Fl;duKTz3#zbP9?;)W zh;-}IT}@$$dq4Pq=%A?eYS5SeEw-^lIe^{42AQr!G?)p$PsU1=36SH+ox>en*~yQlsA%EfdryqQXiF zloi_B4Rg@iN1!<2{6l)3B`OV}SRrcsBPIzavM8&+lIh>9Mz0fC(to2DeR6IE6e|{m z`map-|3bZT=>IvPMxJT~?M4vPvN)t56Uo2d$Im9$y?Zy7Wd!Ewgs(pcWeOFIbPOhUaPNKhCYE$j9t^B^+N117ph_FN88A&rf$SDPtWEv?XahD?4o|w7D|CZ z=>G+MX;$w^hd0cDTtc$#>lb_Wz0Rjan`O)^$d-Sdp*2W}l@1N64z z7fA8_pl6Rj+4uB?M?QIjh3x(K(MkPaMy)FxZu0j0MgL-_e5OmY@M!0?m4C1MuYZ}_ z5;-{Gwd01hJWO@tFRgU3leN6BKmXgm8cDu_+T8^?qQZg59%N-q>*ahzRLX<&w_(e^ z?NuzUApQt^LGO27uoUzccdpz!S-u_09EI*tpcvyJ$r3d`L|B6`4wYf7F@j+Fp(_G+ z5~@e-5`nWw{`h2DRTTqLX+{b`dMV$fH;|M&m@pODNdoKlvQ!$V4})n&anl`draF<8 ze*FAPBu$452(5`61~-X}o1;qG;G==0tBw%QR4rY98^I2<%posq3`9flL*&#U0)=rh z5y7_aW1XCwaTDr_>so? zHI=A|!E!02Iar%4bRU++tZAk^W?GVcW8g66nlms%H($9h{kry3 z8Vt#&)+3!Q_T&0a{&B=8+8&KxkT#0LOoA=UPlN}uy671J#7#2XkM&KLi@V+#5h7kP zr{7PWaCi`J(bro^Y*oVM>MitDlLh!OZRumQJh*)mc%!_9v?a(bXErQQYKOwqx5 zeu#{WJkTEaE|xkk)=F=r#+M>5PUe8GZs}D2h&2r#v&=VUm`lOpGN2T8asU*#BOPJJ zdzz?PqgJD|4aWCQ)xdCvvEGtj02;Fs-`8;5#3)SBLwp@J)jMFE_Fskt{(p7ufN%$lyxGuGUj zPB+o8ZSD0_w#%^k?z*cSG5+VCdcL%vdc~^GHaQPx{h=|EPsycleq|Ggz_8WSZKN~9 zt5-VU>1n>rAqf zSd3>)s=oVS;=(K3fF&n|UEow%R6oV8{c;vx!)IG}J??Tx4jrtas8G^l7_KN@lp|xX z&7MrHDjlB*O@Pw;3`1}dYxX5Rc$nI!^8{ZR=fyo?=7?bgSBSC)<&}cGbjidnZ`6PV zfF8nP2l{do!WaSEg*B3oo3xf6(G7e+*{px(L9D-zJC58YAH;0S(4YjOC40M#f&w|) zB#YBj6a)FnmDM&477&vapPLeDr6U5nsB!R~bdJ?y^x@(1`Z(YdD||f#s2|7D@<2Tf z@6qvQO$YPJap>8MZ1OyS6w}!|J(L)@g$Oj?#L3HgA>%)EoQZ7 zH>IJqsr;8`BjmRgR34O3%kxI-+YQ&(^pX|&%Jy!|*grWwqit1KHS5mE*d!A&lMrZR zeHn_zZY6Lt`*K%zV{CsgkmOsTr}H24kUEFEFlC>`z!Waniy>0IGqm;|*sr4#y1%Iu z3|K-&6cKl^LbO#M#7iWZ*C?)Dy&u;V@aj`g0a9(Lc|hQrsE9>YY2m6~RhHMNKhE z+UUHrRZ0W0a=p3nGTyKbu^jTeNKC$auX`ybZ2axV7=T-szEbxvASb_N7{Z&JTM3G> zpP~}LZHt*IWIgQ6{8ZRBth$gz7(e!Q;EJ_E3^5RuD_mX8BjDV8_%ER~V zq=Gbg*gn1XX9ZD5YMk_ETBvr?iZ6g(A0R`X6!5s@df*7nN)lrdE{98?XOop5lF+n? z{F;YJo5L@2-&N|#Be9cFqk&vSP43&bSZAa`&z;OWlDYjrJ-uK};DyMH$$3xM9VsYZ z1GVY|o&(utUidf3+o0=n_e;?iO{k?GZS|uwnoJXEORxvTUN1~o{-cv@kS+Tm)LO0H z`6I7s?J!kb50YZakKCs0@@HXY5Dr(>C(wEM_d{j$3{o^5l48UUkKT@*L@0Opqt7oO zqNoDJ`@dvUJ+Ccn0Y&j4{J;8npkF$wI_@O-wK^oE?#(PVtnc*ni$G{E6ofgTIYKq3~cSg*15meha9b27{mq7yA3_hCR-NH1z)_l zC80LEE!z8uz8ciT)qX#;Bdrpk<29_T+S=OKcWdWOca#(Cc5M{<_>`O*dmJ3mdKb;| zvPzP@gUthz90?7LgoFfVDlv@RBnt^s3BO)9$N563t*w32(BQDEPtLIM5HMO-upSX5!%{X zH?AI5O+mkW0YZ8HdF@13^@F-8scC5oL(C= zA<<6-@wh{IHhYqc$7$0;?~bc=k+pO^EI_PqZBGFnWR^A7dUs;o&=cqhWUd4;eYbbd zx;%(IN|~~!xe16AX6b31_82o1JM>@G{>l$O^gyWbRu4#I#u~}qo|oKX+<7AOgQ7x| zrNwLenfA! z+DhspD9sbz{v#&+X3RxggV6oRO2M7YzgpPJZQ)T+q7w&&`Y#7*!NLFk3l$V9u)=Gi W%r8Mxf7cT5=j1W-qXmaue)}I{>dlJ) literal 0 HcmV?d00001 diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 59bfee77f60..1d3872a1783 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -13,23 +13,31 @@ from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyze from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer from profiler.advisor.analyzer.schedule.dispatch.timeline_op_dispatch_analyzer import OpDispatchAnalyzer +from profiler.advisor.analyzer.schedule.syncbn.syncbn_analyzer import SyncBNAnalyzer +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_analyzer import SynchronizeStreamAnalyzer +from profiler.advisor.analyzer.dataloader.dataloader_analyzer import DataloaderAnalyzer +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_analyzer import AICoreFreqAnalyzer + class Interface: supported_analyzer = { "schedule": OrderedDict({ - SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer, - SupportedScopes.TIMELINE_OP_DISPATCH: OpDispatchAnalyzer + SupportedScopes.SYNCBN: SyncBNAnalyzer, + SupportedScopes.TIMELINE_OP_DISPATCH: OpDispatchAnalyzer, + SupportedScopes.SYNCHRONIZE_STREAM: SynchronizeStreamAnalyzer, + SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer }), "computation": OrderedDict({ SupportedScopes.DYNAMIC_SHAPE_ANALYSIS: DynamicShapeAnalyzer, SupportedScopes.AICPU_ANALYSIS: AicpuAnalyzer, SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer, SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer, - SupportedScopes.GRAPH: FusionOPAnalyzer + SupportedScopes.GRAPH: FusionOPAnalyzer, + SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer }), "communication": OrderedDict(), "overall": OrderedDict({SupportedScopes.OVER_ALL: OverallSummaryAnalyzer}), - "dataloader": OrderedDict(), + "dataloader": OrderedDict({SupportedScopes.DATALOADER: DataloaderAnalyzer}), "cluster": OrderedDict({ SupportedScopes.SLOW_RANK: SlowRankAnalyzer, SupportedScopes.SLOW_LINK: SlowLinkAnalyzer @@ -66,7 +74,7 @@ class Interface: if render_html and result.data: if hasattr(analyzer, "html_render"): analyzer.html_render.render_html() - analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + analyzer.html_render.save_to_file(f'mstt_advisor_{Timer().strftime}.html') return result if not output_dict else dict(result.data) diff --git a/profiler/advisor/result/item.py b/profiler/advisor/result/item.py index fa0ffb5b1c7..02db7fdd004 100644 --- a/profiler/advisor/result/item.py +++ b/profiler/advisor/result/item.py @@ -15,7 +15,7 @@ class OptimizeItem: @property def headers(self): - return ["problem", "description", "suggestion"] + return ["category", "description", "suggestion"] class StatisticsItem: diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index c7d7da8663c..0d0602ee56c 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -93,6 +93,9 @@ class SheetRecoder: if data not in self._sheet_data[sheet_name]["data"]: self._sheet_data[sheet_name]["data"].append(data) + def clear(self): + self._sheet_data.clear() + @singleton class OptimizeResult: @@ -110,12 +113,12 @@ class OptimizeResult: def add_tune_op_list(self, tune_op_list) -> None: """ add tune op name to tune op list - :param tune_op_list: tune op name list to be added + :param tune_op_list: list of operators to be optimized :return: None """ - for op_name in tune_op_list: - if op_name not in self._tune_op_list: - self._tune_op_list.append(op_name) + for operator_name in tune_op_list: + if operator_name not in self._tune_op_list: + self._tune_op_list.append(operator_name) def add(self, overview_item): sheet_name = "problems" @@ -148,6 +151,9 @@ class OptimizeResult: logger.info("Save problems details file to %s", Config().analysis_result_file) self._save_op_file_list() + def clear(self) -> None: + self.data.clear() + def _save_op_file_list(self) -> None: if not self._tune_op_list: return @@ -173,9 +179,9 @@ class TerminalResult: def __init__(self): self.width, _ = self.get_terminal_size() if self.width is None: - self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"]) + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"]) else: - self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"], + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"], max_table_width=max(self.width - 20, 180)) self.table.hrules = ALL self.result_list = [] diff --git a/profiler/advisor/rules/dataloader.yaml b/profiler/advisor/rules/dataloader.yaml new file mode 100644 index 00000000000..2bb7a4c0e70 --- /dev/null +++ b/profiler/advisor/rules/dataloader.yaml @@ -0,0 +1,9 @@ +# unit is milliseconds +dataloader_duration_threshold: 10 +problem: "Found slow dataloader, cost {dataloader_duration} milliseconds for one step while profiling, normally less than {dataloader_duration_threshold} milliseconds." +solutions: + - "Please check the disk I/O of your data directory. If you are training model in ModelArts, please move data to '/cache' or mount a more efficient cloud disk for better I/O." + - "Please check if there are any other multiprocess operations in runtime that may have affected the dataloader, such as training process core binding command 'taskset ...' used for launching the training job." + - "Please check the format of your data, avoid file format like tar, tar.gz, zip." + - "Please set 'pin_memory=True' for your dataloader." + - "Try to adjust dataloader parameter 'num_workers'." \ No newline at end of file diff --git a/profiler/advisor/rules/sync_batchnorm.yaml b/profiler/advisor/rules/sync_batchnorm.yaml new file mode 100644 index 00000000000..d65bcb0d4a1 --- /dev/null +++ b/profiler/advisor/rules/sync_batchnorm.yaml @@ -0,0 +1,41 @@ +problem: "Found {syncbn_num} SyncBatchNorm, which can lead to slow python task dispatch and frequent communication between devices and finally reducing training efficiency." +max_syncbn_num: 20 +solutions: + - enable batchnorm: + desc: "disable SyncBatchNorm by remove the code like 'torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)' if possible." + - enable efficient SyncBatchNorm: + desc: "replace the 'forward' method of python script 'torch_npu/utils/syncbatchnorm.py' in your runtime environment." + efficient_code: | + @staticmethod + def forward(self, input_tensor, weight, bias, running_mean, running_var, eps, momentum, process_group, world_size): + input_tensor = input_tensor.contiguous() + input_shape = input_tensor.shape + input_tensor_ = input_tensor.reshape(input_shape[0], input_shape[1], 1, -1) + sum_val, sum_square_val = torch.batch_norm_reduce(input_tensor_, eps) + + count = torch.full((1,), + input_tensor.numel() // input_tensor.size(1), + dtype=sum_val.dtype, + device=sum_val.device) + + num_channels = input_tensor.shape[1] + combined = torch.cat([sum_val, sum_square_val, count], dim=0) + combined_list = torch.empty((world_size,) + combined.shape, dtype=combined.dtype, device=combined.device) + dist.all_gather_togather(combined_list, combined, process_group, async_op=False) + sum_all, square_sum_all, count_all = torch.split(combined_list, num_channels, dim=1) + size = count_all.view(-1).sum() + if size == 1: + raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) + + mean, invstd = torch.batch_norm_gather_stats_update(input_tensor, + sum_all, + square_sum_all, + running_mean, + running_var, + momentum, + eps, + count_all.view(-1)) + self.save_for_backward(input_tensor, weight, mean, invstd, count_all.to(torch.int32)) + self.process_group = process_group + out = torch.batch_norm_elemt(input_tensor, weight, bias, mean, invstd, eps) + return out \ No newline at end of file diff --git a/profiler/advisor/rules/synchronize.yaml b/profiler/advisor/rules/synchronize.yaml new file mode 100644 index 00000000000..ed105b345c6 --- /dev/null +++ b/profiler/advisor/rules/synchronize.yaml @@ -0,0 +1,8 @@ +problem: "SynchronizeStream will reduce training efficiency. Found {synchronize_num} SynchronizeStream, {slow_synchronize_num} slow SynchronizeStream cost {total_synchronize_stream_time} us." +max_synchronize_num: 20 +slow_synchronize_threshold: 10 #ms +solutions: + - disable ascend launch blocking: + desc: "please check your env 'ASCEND_LAUNCH_BLOCKING', if ASCEND_LAUNCH_BLOCKING=1, please execute 'unset ASCEND_LAUNCH_BLOCKING' and then start your training job." + - modify code to avoid synchronize stream: + desc: "please try to modify your training code to avoid synchronize stream between cpu and npu." \ No newline at end of file diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 84419b67087..83f304c2d3c 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -1,5 +1,6 @@ import inspect import json + import logging import multiprocessing as mp import os @@ -11,7 +12,7 @@ import traceback import types from functools import wraps from typing import Any, Set - +import ijson import click import requests from requests.adapters import HTTPAdapter @@ -43,7 +44,7 @@ class ContextObject(object): def debug_option(f): - return click.option('--debug', '-D', + return click.option('--debug', is_flag=True, expose_value=False, is_eager=True, @@ -413,7 +414,17 @@ def format_excel_title(title: str) -> str: title = title.replace("(ns)", '') title = title.replace("(%)", '') title = title.replace(" ", "_") - return title + + # 将kernel_details中的列名转为与op_summary_x.csv中一致 + kernel_details_col_name_map = { + "name": "op_name", + "type": "op_type", + "accelerator_core": "task_type", + "start_time": "task_start_time", + "duration": "task_duration", + "wait_time": "wait_time" + } + return kernel_details_col_name_map.get(title, title) def format_float(num: float) -> float: @@ -550,3 +561,50 @@ def get_file_path_by_walk(root, filename): file_path = os.path.join(root, name) return file_path return file_path + + +def check_path_valid(path): + if os.path.islink(os.path.abspath(path)): + logger.error("fThe path is detected as a soft connection. path:%ss", path) + return False + elif not os.access(path, os.R_OK): + logger.error(f"The file is not readable. path:%ss", path) + return False + elif os.path.getsize(path) > const.MAX_FILE_SIZE: + logger.error(f"The file size exceeds the limit. path:%ss, MAX_FILE_SIZE:%ss B",path, const.MAX_FILE_SIZE) + return False + return True + + +def parse_json_with_generator(timeline_data_path, func): + result = [] + if not check_path_valid(timeline_data_path): + return result + try: + with open(timeline_data_path, "r") as f: + if os.getenv(const.DISABLE_STREAMING_READER) == "1": + logger.debug("Disable streaming reader.") + file_parser = json.loads(f.read()) + else: + logger.debug("Enable streaming reader.") + file_parser = ijson.items(f, "item") + + for i, event in tqdm(enumerate(file_parser), + leave=False, ncols=100, desc="Building dataset for timeline analysis"): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) + return result + + +def convert_to_float(num): + try: + return float(num) + except (ValueError, FloatingPointError): + logger.error(f"Can not convert %ss to float", num) + pass + return 0 diff --git a/profiler/cli/__init__.py b/profiler/cli/__init__.py index eab13571c58..e768e4cb86c 100644 --- a/profiler/cli/__init__.py +++ b/profiler/cli/__init__.py @@ -1,4 +1,4 @@ from profiler.advisor.config.config import Config from profiler.advisor.utils.utils import Timer -Config().set_log_path(f"att_advisor_{Timer().strftime}.xlsx") +Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 2e173dc8708..f400a265b7b 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -83,9 +83,6 @@ def analyze_cli(**kwargs): help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_all(**kwargs) -> None: - # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 - if not kwargs.get("benchmark_profiling_path"): - kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") try: _analyze(Interface.all_dimension, **kwargs) except RuntimeError as e: diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index e794578da8c..f9add948ea9 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -32,6 +32,8 @@ from profiler.compare_tools.compare_backend.comparison_generator import Comparis @click.option('--enable_operator_compare', is_flag=True) @click.option('--enable_memory_compare', is_flag=True) @click.option('--enable_communication_compare', is_flag=True) +@click.option('--enable_api_compare', is_flag=True) +@click.option('--enable_kernel_compare', is_flag=True) @click.option('--disable_details', is_flag=True) @click.option('--output_path', '-o', 'output_path', type=click.Path()) @click.option('--max_kernel_num', 'max_kernel_num', type=int, help="The number of kernels per torch op is limited.") diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index e7e2d5adca3..380192f87be 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -17,6 +17,8 @@ import os import csv import json +import yaml + from common_func.constant import Constant from common_func.path_manager import PathManager @@ -60,6 +62,23 @@ class FileManager: raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data + @classmethod + def read_yaml_file(cls, file_path: str) -> dict: + PathManager.check_path_readable(file_path) + base_name = os.path.basename(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return {} + if file_size > Constant.MAX_JSON_SIZE: + raise RuntimeError(f"The file({base_name}) size exceeds the preset max value.") + + try: + with open(file_path, "r") as yaml_file: + result_data = yaml.safe_load(yaml_file) + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e + return result_data + @classmethod def create_csv_file(cls, profiler_path: str, data: list, file_name: str, headers: list = None) -> None: if not data: diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index 78ea5d89717..b40f19e92fa 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -145,6 +145,8 @@ python performance_compare.py [基准性能数据文件所在路径] [比对性 | --enable_operator_compare | 开启算子性能比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | | --enable_communication_compare | 开启通信性能比对。 | 否 | | --enable_memory_compare | 开启算子内存比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | +| --enable_kernel_compare | 开启kernel性能比对。仅针对NPU与NPU比对的场景。需要使用性能数据中的kernel_details.csv文件。 | 否 | +| --enable_api_compare | 开启API性能比对。需要使用性能数据中的trace_view.csv文件。 | 否 | | --disable_details | 隐藏明细比对,只进行统计级比对。 | 否 | 说明:以上开关均不设置的情况下,**工具默认开启所有的性能比对**,当用户设置了以上开关,则按照用户设置的开关进行性能比对,示例如下: @@ -174,9 +176,13 @@ python performance_compare.py [基准性能数据文件] [比对性能数据文 MindSpore场景仅支持**总体性能**和**通信性能**的对比。 +比对结果分为打屏和performance_comparison_result_{timestamp}.csv两种形式输出,其中打屏输出为概要信息,csv文件保存详细结果。 + ### 总体性能 -总体性能比对结果以打屏的形式呈现。 +#### 打屏结果 + +总体性能比对结果以打屏的形式呈现时,字段如下: | 字段 | 说明 | | --------------------------------------- | ------------------------------------------------------------ | @@ -196,6 +202,54 @@ MindSpore场景仅支持**总体性能**和**通信性能**的对比。 | E2E Time(Not minimal profiling) | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | | Other Time | AI CPU、DSA、TensorMove等其他算子耗时。 | +#### csv文件结果 + +总体性能比对结果在performance_comparison_result_*.xlsx中OverallMetrics的sheet页呈现时,示例如下: + +![OverallMetrics](./img/OverallMetrics.png) + +表头字段说明: + +| 字段 | 说明 | +| -------------- | --------------------------- | +| Index | 指标。 | +| Duration(ms) | 执行耗时,单位ms。 | +| Duration Ratio | 执行耗时占E2E总耗时的比例。 | +| Number | 计算算子的数量。 | + +Index列字段说明: + +| 字段 | | | 说明 | +| ---------------------------- | ------------------ | ----------------------------------- | ------------------------------------------------------------ | +| Computing Time | | | 计算流耗时,计算流所有event耗时总和。如果有多条并发计算,计算流耗时对重叠部分只会计算一次。 | +| | Flash Attention | | Flash Attention算子。 | +| | | Flash Attention (Forward) (Cube) | Flash Attention前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Forward) (Vector) | Flash Attention前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Flash Attention (Backward) (Cube) | Flash Attention反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Backward) (Vector) | Flash Attention反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Conv | | Conv算子。 | +| | | Conv (Forward) (Cube) | Conv前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Forward) (Vector) | Conv前向Vector算子。Conv前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Conv (Backward) (Cube) | Conv反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Backward) (Vector) | Conv反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Matmul | | Matmul算子。 | +| | | Matmul (Cube) | Matmul算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Matmul (Vector) | Matmul算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Paged Attention | | Paged Attention算子。 | +| | Vector | | Vector算子。 | +| | | Vector (Trans) | 转换类Vector算子,主要包含Cast、TransPose、TransData算子。(仅针对NPU数据) | +| | | Vector ( No Trans) | 非转换类Vector算子。 | +| | Cube | | 未识别出Flash Attention、Conv和Matmul的Cube算子。 | +| | SDMA (Tensor Move) | | 拷贝类任务。 | +| | Other | | AI CPU、DSA等其他算子。 | +| Uncovered Communication Time | | | 通信未掩盖耗时,包含卡间等待时间。 | +| | Wait | | 卡间同步等待耗时。(仅针对NPU数据) | +| | Transmit | | 通信传输耗时。 | +| Free Time | | | 调度耗时 = E2E耗时 - 算子耗时 - 通信不可掩盖耗时。Free的定义为Device侧既不在通信又不在计算的时间,因此包含拷贝时间(SDMA Time)。 | +| | SDMA | | NPU为除Tensor Move外的拷贝类任务,GPU为所有拷贝类任务。 | +| | Free | | 排除SDMA的空闲耗时。 | +| E2E Time | | | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | + 可以采取最简性能数据采集的方式来减少E2E耗时的性能膨胀,示例代码如下: ```python @@ -300,3 +354,29 @@ MindSpore场景暂不支持。 步骤1:查看MemoryCompareStatistic页,找出内存占用差距TOP的算子。 步骤2:查看MemoryCompare页,搜索内存占用差距TOP的算子,查看具体占用的子算子。 + +### kernel性能 + +仅针对NPU与NPU比对的场景。 + +kernel比对结果在performance_comparison_result_*.xlsx中KernelCompare页呈现。 + +按照Kernel(Kernel类型)和Input Shapes(输入Shape)分组统计,统计信息包括: + +- Total Duration(us):总耗时,单位us。 +- Avg Duration(us):平均耗时,单位us。 +- Max Duration(us):最大耗时,单位us。 +- Min Duration(us):最小耗时,单位us。 +- Calls:调用次数。 + +### API性能 + +API比对结果在performance_comparison_result_*.xlsx中ApiCompare页呈现。 + +按照api name(API名称)组统计,统计信息包括: + +- Total Duration(ms):总耗时,单位ms。 +- Self Time(ms):Self耗时(排除掉子event),单位ms。 +- Avg Duration(ms):平均耗时,单位ms。 +- Calls:调用次数。 + diff --git a/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py new file mode 100644 index 00000000000..bc5810068b0 --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py @@ -0,0 +1,32 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class ApiCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_api_by_name(cls, ops: list): + ops_dict = {} + for op in ops: + ops_dict.setdefault(op.name, []).append(op) + return ops_dict + + def _compare(self): + if not self._origin_data: + return + base_ops = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_ops = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_ops or not comparison_ops: + return + base_aggregated_ops = self._aggregated_api_by_name(base_ops) + comparison_aggregated_ops = self._aggregated_api_by_name(comparison_ops) + for op_name, base_data in base_aggregated_ops.items(): + comparsion_data = comparison_aggregated_ops.pop(op_name, []) + self._rows.append(self._bean(op_name, base_data, comparsion_data).row) + if comparison_aggregated_ops: + for op_name, comparison_data in comparison_aggregated_ops.items(): + self._rows.append(self._bean(op_name, [], comparison_data).row) + update_order_id(self._rows) diff --git a/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py new file mode 100644 index 00000000000..13c0f776af6 --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py @@ -0,0 +1,35 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class KernelCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_kernel_by_type_and_shape(cls, kernels: dict): + result_dict = {} + for type_shape, shape_values in kernels.items(): + for shape, kernel_data in shape_values.items(): + kernel = [single[1] for single in kernel_data] + result_list = [type_shape, shape, sum(kernel), len(kernel), max(kernel), min(kernel)] + result_dict.setdefault(f"{type_shape}{shape}", []).extend(result_list) + return result_dict + + def _compare(self): + if not self._origin_data: + return + base_kernels = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_kernels = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_kernels or not comparison_kernels: + return + base_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(base_kernels) + comparison_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(comparison_kernels) + for type_shape, base_data in base_aggregated_kernels.items(): + comparsion_data = comparison_aggregated_kernels.pop(type_shape, []) + self._rows.append(self._bean(base_data, comparsion_data).row) + if comparison_aggregated_kernels: + for _, comparison_data in comparison_aggregated_kernels.items(): + self._rows.append(self._bean([], comparison_data).row) + update_order_id(self._rows) \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py new file mode 100644 index 00000000000..55e08a86be8 --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py @@ -0,0 +1,47 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class ApiInfo: + def __init__(self, op_name: str, data_list: list): + self._data_list = data_list + self.name = op_name + self.total_dur = 0.0 + self.self_time = 0.0 + self.avg_dur = 0.0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for data in self._data_list: + self.total_dur += data.api_dur + self.self_time += data.api_self_time + self.total_dur /= 1000.0 + self.self_time /= 1000.0 + self.avg_dur = self.total_dur / self.number if self.number else 0.0 + + +class ApiCompareBean: + TABLE_NAME = Constant.API_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, op_name: str, base_api: list, comparison_api: list): + self._name = op_name + self._base_api = ApiInfo(op_name, base_api) + self._comparison_api = ApiInfo(op_name, comparison_api) + + @property + def row(self): + row = [None, self._name, + self._base_api.total_dur, self._base_api.self_time, self._base_api.avg_dur, self._base_api.number, + self._comparison_api.total_dur, self._comparison_api.self_time, + self._comparison_api.avg_dur, self._comparison_api.number] + diff_fields = [calculate_diff_ratio(self._base_api.total_dur, self._comparison_api.total_dur)[1], + calculate_diff_ratio(self._base_api.self_time, self._comparison_api.self_time)[1], + calculate_diff_ratio(self._base_api.avg_dur, self._comparison_api.avg_dur)[1], + calculate_diff_ratio(self._base_api.number, self._comparison_api.number)[1]] + row.extend(diff_fields) + return row + diff --git a/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py new file mode 100644 index 00000000000..df96addc4fe --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py @@ -0,0 +1,75 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class KernelCompareInfo: + def __init__(self, data_list: list): + self._kernel_type = None + self._input_shapes = None + self._total_dur = None + self._number = None + self._max_dur = None + self._min_dur = None + if not data_list: + return + self._kernel_type = data_list[0] + self._input_shapes = data_list[1] + self._total_dur = data_list[2] + self._number = data_list[3] + self._max_dur = data_list[4] + self._min_dur = data_list[5] + + @property + def kernel_type(self): + return self._kernel_type + + @property + def input_shapes(self): + return self._input_shapes + + @property + def total_dur(self): + return self._total_dur if self._total_dur else 0.0 + + @property + def number(self): + return self._number + + @property + def max_dur(self): + return self._max_dur + + @property + def min_dur(self): + return self._min_dur + + @property + def avg_dur(self): + return self._total_dur / self._number if self._total_dur and self._number else 0.0 + + +class KernelCompareBean: + TABLE_NAME = Constant.KERNEL_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, base_kernel: list, comparison_kernel: list): + self._base_kernel = KernelCompareInfo(base_kernel) + self._comparison_kernel = KernelCompareInfo(comparison_kernel) + self._kernel_type = self._base_kernel.kernel_type \ + if self._base_kernel.kernel_type else self._comparison_kernel.kernel_type + self._input_shapes = self._base_kernel.input_shapes \ + if self._base_kernel.input_shapes else self._comparison_kernel.input_shapes + + @property + def row(self): + row = [None, self._kernel_type, self._input_shapes, + self._base_kernel.total_dur, self._base_kernel.avg_dur, + self._base_kernel.max_dur, self._base_kernel.min_dur, self._base_kernel.number, + self._comparison_kernel.total_dur, self._comparison_kernel.avg_dur, + self._comparison_kernel.max_dur, self._comparison_kernel.min_dur, self._comparison_kernel.number] + diff_fields = [calculate_diff_ratio(self._base_kernel.total_dur, self._comparison_kernel.total_dur)[1], + calculate_diff_ratio(self._base_kernel.avg_dur, self._comparison_kernel.avg_dur)[1]] + row.extend(diff_fields) + return row \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 9c4825c0e8e..c15396e9c59 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -12,6 +12,7 @@ class KernelDetailsBean: self._data = data self._op_type = "" self._name = "" + self._input_shapes = "" self._aiv_vec_time = 0.0 self._aicore_time = 0.0 self._mac_time = 0.0 @@ -27,6 +28,10 @@ class KernelDetailsBean: def name(self) -> str: return self._name + @property + def input_shapes(self) -> str: + return self._input_shapes + @property def aiv_vec_time(self) -> float: if self._aiv_vec_time == "" or self._aiv_vec_time == "N/A": @@ -109,6 +114,7 @@ class KernelDetailsBean: def init(self): self._op_type = self._data.get('Type', "") self._name = self._data.get('Name', "") + self._input_shapes = self._data.get('Input Shapes', "") self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py index fdce23c6ab4..3106527c419 100644 --- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py +++ b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py @@ -17,3 +17,20 @@ class OperatorDataPrepare: else: result_data.append(level1_node) return result_data + + def get_all_layer_ops(self) -> any: + root_node = TreeBuilder.build_tree(self.profiling_data.torch_op_data, [], []) + level1_child_nodes = root_node.child_nodes + node_queue = [] + result_data = [] + for level1_node in level1_child_nodes: + if level1_node.is_step_profiler(): + node_queue.extend(level1_node.child_nodes) + else: + node_queue.append(level1_node) + while len(node_queue) > 0: + node = node_queue.pop(0) + result_data.append(node) + if node.child_nodes: + node_queue.extend(node.child_nodes) + return result_data \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py index c89e8451930..7bac2b03353 100644 --- a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py +++ b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py @@ -31,4 +31,30 @@ class OverallPerfInterface: def _generate_result(self): overall_data = self._profiling_data.overall_metrics - self._result_data = getattr(overall_data, "__dict__", {}) + + self._result_data = { + "profiling_type": overall_data.profiling_type, + "minimal_profiling": overall_data.minimal_profiling, + "overall": {"e2e_time_ms": overall_data.e2e_time_ms, + "computing_time_ms": overall_data.compute_time_ms, + "uncovered_communication_time_ms": overall_data.communication_not_overlapped_ms, + "free_time_ms": overall_data.free_time_ms}, + "computing_time_disaggregate": {"fa_time_ms": overall_data.fa_total_time, + "conv_time_ms": overall_data.conv_total_time, + "matmul_time_ms": overall_data.mm_total_time, + "page_attention_time_ms": overall_data.page_attention_time, + "vector_time_ms": overall_data.vector_total_time, + "tensor_move_time_ms": overall_data.sdma_time_tensor_move, + "other_cube_time_ms": overall_data.other_cube_time}, + "computing_num_disaggregate": {"fa_num": overall_data.fa_total_num, + "conv_num": overall_data.conv_total_num, + "matmul_num": overall_data.mm_total_num, + "page_attention_num": overall_data.page_attention_num, + "vector_num": overall_data.vector_total_num, + "tensor_move_num": overall_data.sdma_num_tensor_move, + "other_cube_num": overall_data.other_cube_num}, + "communication_time_disaggregate": {"wait_time_ms": overall_data.wait_time_ms, + "transmit_time_ms": overall_data.transmit_time_ms}, + "free_time_disaggregate": {"sdma_time_ms": overall_data.sdma_time_stream, + "free_ms": overall_data.free_time_ms - overall_data.sdma_time_stream} + } diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 292e3128154..6fe693fb067 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -8,6 +8,8 @@ from compare_backend.comparator.module_comparetor import ModuleComparator from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator from compare_backend.comparator.operator_comparator import OperatorComparator from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_backend.comparator.api_compare_comparator import ApiCompareComparator +from compare_backend.comparator.kernel_compare_comparator import KernelCompareComparator from compare_backend.comparator.overall_metrics_comparator import OverallMetricsComparator from compare_backend.compare_bean.communication_bean import CommunicationBean from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean @@ -16,6 +18,8 @@ from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean +from compare_backend.compare_bean.api_compare_bean import ApiCompareBean +from compare_backend.compare_bean.kernel_compare_bean import KernelCompareBean from compare_backend.compare_bean.overall_metrics_bean import OverallMetricsBean from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare @@ -39,8 +43,10 @@ class DetailPerformanceGenerator(BaseGenerator): return op_compare_result def compare(self): - if self._args.enable_operator_compare or self._args.enable_memory_compare or \ - self._args.enable_communication_compare: + enable_compare = [self._args.enable_operator_compare, self._args.enable_memory_compare, + self._args.enable_communication_compare, self._args.enable_api_compare, + self._args.enable_kernel_compare] + if any(enable_compare): print("[INFO] Start to compare performance detail data, please wait.") comparator_list = self._create_comparator() else: @@ -97,6 +103,18 @@ class DetailPerformanceGenerator(BaseGenerator): comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) if not self._args.disable_details: comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) + if self._args.enable_api_compare: + api_compare_result = { + Constant.BASE_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.BASE_DATA)).get_all_layer_ops(), + Constant.COMPARISON_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.COMPARISON_DATA)).get_all_layer_ops()} + comparator_list.append(ApiCompareComparator(api_compare_result, ApiCompareBean)) + if self._args.enable_kernel_compare: + kernel_compare_result = { + Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).kernel_details, + Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).kernel_details} + comparator_list.append(KernelCompareComparator(kernel_compare_result, KernelCompareBean)) return comparator_list def match_torch_op(self) -> list: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 6ee07a65696..9daaa55ef16 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -20,6 +20,7 @@ class ProfilingResult: self.overall_metrics = ProfilingInfo(profiling_type) self.python_function_data = [] self.fwdbwd_dict = {} + self.kernel_details = {} def update_torch_op_data(self, event: TraceEventBean): event.is_torch_op = True @@ -43,6 +44,9 @@ class ProfilingResult: def update_comm_task_data(self, comm_name: str, task_event: TraceEventBean): self.communication_dict.setdefault(comm_name, {}).setdefault("comm_task", {}).setdefault( task_event.name, []).append(task_event.dur) + + def update_kernel_details(self, kernels: dict): + self.kernel_details = kernels class BaseProfilingParser(ABC): @@ -57,6 +61,8 @@ class BaseProfilingParser(ABC): self._enable_operator_compare = args.enable_operator_compare self._enable_memory_compare = args.enable_memory_compare self._enable_communication_compare = args.enable_communication_compare + self._enable_api_compare = args.enable_api_compare + self._enable_kernel_compare = args.enable_kernel_compare self._dispatch_func = self._get_dispatch_func() self._result_data = ProfilingResult(self._profiling_type) self._memory_events = [] @@ -80,6 +86,10 @@ class BaseProfilingParser(ABC): self._cpu_cube_op = cpu_cube_op return self._cpu_cube_op + @abstractmethod + def _update_kernel_details(self): + raise NotImplementedError("Function _update_kernel_details need to be implemented.") + @abstractmethod def _update_memory_list(self): raise NotImplementedError("Function _update_memory_list need to be implemented.") @@ -112,6 +122,8 @@ class BaseProfilingParser(ABC): self._update_memory_list() if self._enable_profiling_compare: self._update_overall_metrics() + if self._enable_kernel_compare: + self._update_kernel_details() self._check_result_data() return self._result_data @@ -291,7 +303,7 @@ class BaseProfilingParser(ABC): task_index += 1 def _check_result_data(self): - if self._enable_operator_compare or self._enable_memory_compare: + if self._enable_operator_compare or self._enable_memory_compare or self._enable_api_compare: if not self._result_data.torch_op_data: print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") if self._enable_operator_compare and not self._result_data.kernel_dict: @@ -300,6 +312,11 @@ class BaseProfilingParser(ABC): print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") if self._enable_communication_compare and not self._result_data.communication_dict: print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") + if self._enable_kernel_compare and not self._result_data.kernel_details: + if self._profiling_type == Constant.GPU: + print(f"[WARNING] kernel compare between GPU data and NPU data is not supported.") + else: + print(f"[WARNING] Can't find any kernel details in the file: {self._profiling_path}") def _read_trace_event(self): try: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index 7b1ae1a5a12..0aeeba83efb 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -33,6 +33,9 @@ class GPUProfilingParser(BaseProfilingParser): def __is_sdma_time(cls, name: str): return any(mask in name.lower() for mask in cls.SDMA_MARK_LIST) + def _update_kernel_details(self): + pass + def _update_memory_list(self): if not self._enable_memory_compare: return @@ -171,6 +174,8 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_memory_event) if self._enable_profiling_compare: func_set.add(self._picking_flow_event) + if self._enable_api_compare: + func_set.add(self._picking_torch_op_event) return list(func_set) def _infer_compute_stream_id(self): diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 457a3b6be5e..cb25c252c6c 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -53,8 +53,32 @@ class NPUProfilingParser(BaseProfilingParser): func_list.add(self._picking_kernel_event) func_list.add(self._picking_hccl_event) func_list.add(self._picking_flow_event) + if self._enable_api_compare: + func_list.add(self._picking_torch_op_event) return list(func_list) + def _update_kernel_details(self): + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except FileNotFoundError: + print("[WARNING] The file kernel_details.csv does not exist.") + except Exception: + print("[ERROR] Failed to read kernel_details.csv.") + return + if not kernel_details: + return + kernels_dict = {} + for kernel in kernel_details: + if kernel.is_invalid(): + continue + input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A' + kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append( + [kernel.name, kernel.duration]) + if len(kernels_dict) == 1: + print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") + return + self._result_data.update_kernel_details(kernels_dict) + def _update_memory_list(self): try: memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py index 4b5947fa7bc..ab9fb43a968 100644 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ b/profiler/compare_tools/compare_backend/utils/args_manager.py @@ -69,6 +69,14 @@ class ArgsManager: def enable_communication_compare(self): return self._args.enable_communication_compare + @property + def enable_api_compare(self): + return self._args.enable_api_compare + + @property + def enable_kernel_compare(self): + return self._args.enable_kernel_compare + @classmethod def check_profiling_path(cls, file_path: str): PathManager.input_path_common_check(file_path) @@ -119,11 +127,14 @@ class ArgsManager: raise RuntimeError(msg) if not any([self._args.enable_profiling_compare, self._args.enable_operator_compare, - self._args.enable_memory_compare, self._args.enable_communication_compare]): + self._args.enable_memory_compare, self._args.enable_communication_compare, + self._args.enable_api_compare, self._args.enable_kernel_compare]): self._args.enable_profiling_compare = True self._args.enable_operator_compare = True self._args.enable_memory_compare = True self._args.enable_communication_compare = True + self._args.enable_api_compare = True + self._args.enable_kernel_compare = True base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py index ab9bc364f44..9e6291e89e0 100644 --- a/profiler/compare_tools/compare_backend/utils/compare_args.py +++ b/profiler/compare_tools/compare_backend/utils/compare_args.py @@ -6,6 +6,8 @@ class Args: enable_operator_compare: bool = False, enable_memory_compare: bool = False, enable_communication_compare: bool = False, + enable_api_compare: bool = False, + enable_kernel_compare: bool = False, output_path: str = "", max_kernel_num: int = None, op_name_map: dict = {}, @@ -17,6 +19,8 @@ class Args: self.enable_operator_compare = enable_operator_compare self.enable_memory_compare = enable_memory_compare self.enable_communication_compare = enable_communication_compare + self.enable_api_compare = enable_api_compare + self.enable_kernel_compare = enable_kernel_compare self.output_path = output_path self.max_kernel_num = max_kernel_num self.op_name_map = op_name_map diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py index e2002588024..252aa536e1c 100644 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ b/profiler/compare_tools/compare_backend/utils/constant.py @@ -39,13 +39,16 @@ class Constant(object): # compare type OPERATOR_COMPARE = "OperatorCompare" MEMORY_COMPARE = "MemoryCompare" - + API_COMPARE = "ApiCompare" + KERNEL_COMPARE = "KernelCompare" # sheet name OPERATOR_SHEET = "OperatorCompare" MEMORY_SHEET = "MemoryCompare" OPERATOR_TOP_SHEET = "OperatorCompareStatistic" MEMORY_TOP_SHEET = "MemoryCompareStatistic" COMMUNICATION_SHEET = "CommunicationCompare" + API_SHEET = "ApiCompare" + KERNEL_SHEET = "KernelCompare" # table name OPERATOR_TABLE = "OperatorCompare" @@ -57,6 +60,8 @@ class Constant(object): MODULE_TABLE = "ModuleCompare" MODULE_TOP_TABLE = "ModuleCompareStatistic" OVERALL_METRICS_TABLE = "OverallMetrics" + API_TABLE = "ApiCompare" + KERNEL_TABLE = "KernelCompare" # memory SIZE = "Size(KB)" diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py index ae808863e77..b6be0ae2ebc 100644 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ b/profiler/compare_tools/compare_backend/utils/excel_config.py @@ -57,7 +57,7 @@ class ExcelConfig(object): DEVICE_SELF_TIME = "Device Self Time(ms)" DEVICE_TOTAL_TIME = "Device Total Time(ms)" DIFF_SELF_TIME = "Device Self Time Diff(ms)" - DIFF_TOTAL_RATIO = "Total Diff Ratio" + DIFF_TOTAL_RATIO = "Diff Total Ratio" DIFF_TOTAL_TIME = "Device Total Time Diff(ms)" DEVICE_SELF_TIME_US = "Device Self Time(us)" DEVICE_TOTAL_TIME_US = "Device Total Time(us)" @@ -71,6 +71,14 @@ class ExcelConfig(object): DURATION = "Duration(ms)" DURATION_RATIO = "Duration Ratio" DIFF_DUR_MS = "Diff Duration(ms)" + API_NAME = "api name" + TOTAL_DURATION_MS = "Total Duration(ms)" + AVG_DURATION_MS = "Avg Duration(ms)" + SELF_TIME_MS = "Self Time(ms)" + DIFF_SELF_RATIO = "Diff Self Ratio" + DIFF_AVG_RATIO = "Diff Avg Ratio" + DIFF_CALLS_RATIO = "Diff Calls Ratio" + KERNEL = "Kernel" HEADERS = { Constant.OPERATOR_TABLE: [ @@ -193,7 +201,39 @@ class ExcelConfig(object): {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, {"name": DIFF_DUR_MS, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 10}, - + ], + Constant.API_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": API_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_SELF_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_CALLS_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + ], + Constant.KERNEL_COMPARE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": KERNEL, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, ] } @@ -201,7 +241,9 @@ class ExcelConfig(object): Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], Constant.MODULE_TABLE: ["E1:H1", "I1:L1"], - Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"]} + Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"], + Constant.API_TABLE: ["C1:F1", "G1:J1"], + Constant.KERNEL_TABLE: ["D1:H1", "I1:M1"]} # overall metrics index # computing time diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py index 690c46cd51c..69ee92d1232 100644 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py @@ -64,6 +64,14 @@ class TorchOpNode: def device_dur(self): return sum([kernel.device_dur for kernel in self._kernel_list]) + @property + def api_dur(self): + return self._event.dur + + @property + def api_self_time(self): + return self.api_dur - sum(child.api_dur for child in self._child_nodes) + def add_child_node(self, child_node): self._child_nodes.append(child_node) diff --git a/profiler/compare_tools/compare_backend/utils/tree_builder.py b/profiler/compare_tools/compare_backend/utils/tree_builder.py index 34c1fe1a1f4..d5aa787ac2c 100644 --- a/profiler/compare_tools/compare_backend/utils/tree_builder.py +++ b/profiler/compare_tools/compare_backend/utils/tree_builder.py @@ -23,7 +23,8 @@ class TreeBuilder: tree_node = TorchOpNode(event, last_node) last_node.add_child_node(tree_node) last_node = tree_node - tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) + if kernel_dict: + tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) else: event.set_name(last_node.name) last_node.set_memory_allocated(event) diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py index dffb7549fcd..58bad621b03 100644 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py @@ -12,7 +12,7 @@ class WorkSheetCreator: self._work_sheet = None self._row_id = 1 self._field_format = {} - self._diff_ratio_index = None + self._diff_ratio_index = [] self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" def create_sheet(self): @@ -47,8 +47,10 @@ class WorkSheetCreator: self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) self._field_format[index] = header.get("type") - if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): - self._diff_ratio_index = index + ratio_white_list = [ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO, + ExcelConfig.DIFF_AVG_RATIO, ExcelConfig.DIFF_CALLS_RATIO, ExcelConfig.DIFF_SELF_RATIO] + if header.get("name") in ratio_white_list: + self._diff_ratio_index.append(index) self._row_id += 1 def _write_data(self): @@ -56,7 +58,7 @@ class WorkSheetCreator: for data in self._data.get("rows"): for index, cell_data in enumerate(data): cell_format = self._work_book.add_format(self._field_format.get(index)) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) @@ -76,7 +78,7 @@ class WorkSheetCreator: if index == 0: # 0 for Index field cell_style["indent"] = cell_data.count("\t") cell_format = self._work_book.add_format(cell_style) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) diff --git a/profiler/compare_tools/img/OverallMetrics.png b/profiler/compare_tools/img/OverallMetrics.png new file mode 100644 index 0000000000000000000000000000000000000000..b130d3607344c983a9304440e38a45fe96a4bb56 GIT binary patch literal 66941 zcmdqIXH=72w>GK|3i^maK`9z~Rhk6pT~NT#M5P8I9So6<5C}-ofb`x$q$tv)geFxW z^j-p?_YyjU651D@x9s=X-`?Yl^W*$EW3aeqa>KgUTC=QcUTfZgS{lkX|Gf9-rAwD? zs;VeyU%GUq;?kwd``5@x-#GP%tdlO6owSvoTq@`Wu8=-lu~JZ1xOAx~?8b@7Rnq6{ z_A2^Lmo5PsFWTi6yYJ?gE}dtoDk;2jGhVBa30+X>=e#GCbCt5!^fDuiCttMp32U@w zzKmza<6AuNaJL6FH=S>!*D?l2TXPchn|QWQ#3xjooOb0fr>vVYC~*hXbD{T%NeNN3 zV#3B}J?BSqY18Ms=O=R8Q5>o})aGieR;o!CZ6!*Q?eF5sz(8Ne!RC})9rAepsO}7V z&ZZj4@$a9u0f$p^I)Q7xECL_n;^H)!q}{fr9hTUeW^YB1{-{mLF|hfHP$;vUSEle+ za!qA5``cF>?mlX0_M@O;q7aXtCiPY=l`zd_*>FXd`LDaej~ZMe=h$z-2(bk)iq*ai ztEyN<;@nH-LZ4xBB}_h50yxO}rIZ#op#l|K@P)axgy=@4RQbO3uwT>pPko)bs%|Fj zZ}^%cmHXaFv624a|qXoBi4P?G&)N)okBgNVW?W8jjnvmfID#}7M#9BR+Ap<>t*aGog0?+ zKo)Uu7K?!%bEZ5qGCW zGOfV56GNE2nd#XTAu_HC;v3%}mVl(o_m2?%s8Ba}AS%v~F^)-2#x37*D-iX}-M(}w zPF6Z)Hl1o=zuqsslU0TTa2vkx6YJ=9z~iLoA%O}tw?%$YJGkrqr6x^>vHQj1&?uQ1 z;y_!9g{q15fThfAN$`NzqF=|hi3w2)^0wh6eakLcj~+&8VPf_{P$NO7CUHlB(>>{R zY56|CR_uDs(Y2?t0Ei*7-N!K}N)Z=8nViwd03@mr1Z?mO(9IK zaPaQdM%Hh(hP#+>bFk34bDcJMO*)+a_8r2Xqi@elHJ|J$AlGQ-F`^Y3ONpR@(zqi_ zf=%c)Q~wc8v+HHIFqcfg$y#ml(KU4Fum@lXB-a-dHyD)^(&!r|)Sau5ZdN2}j`*+O?W$!1S2 zORCbo_4yBgJ!yx07Y=2_BO^K39i)m>=ZkKk&}9hocM69Qk0%}x4_rWU9)(pJ2T=ri zR!odU=q)(m1b!Zy7g3Z4$L%}9$J$sA3a|=$*G^R!0u-Ge-Z|P2uPsBunTX*MffGZR zaK#>703?GMFbHN`yn;}+OEzRCGCd2s(8Z%zueL}!Mb&F6e`FKBo~xA#Ht+oYr7K6| zl*t9xYI*e4(l=IPIi5ZY61~;F29}PzRh81qb>*u@bf|x1ll&8V^+)b%#PI#zb(~o6 z94A9@WrD}&^`|ZA8inaKx<+vXFka|MFs~EP)H-*GxkoW!StZRdG}r<@Q05KBzP_(P z8u{gI*6s((%G@^T%6&On^M)Qzq&la{n0K-sa~DZfa!X;Ji$J&0opCSHRzZfKwb0$G zPXrV?KU^=6C53})vzGPGYMEvl*o@~`RRQktP22#&rPrlsmwpNg^NvOuS%-DT?=5~o zKv#`^<}{X7Sh0(g))aUhf@uV1*1Zq=9)$oJe3D%|aab+iv>%T7H#NdBj&H%z+f%Q?ClN$ zr5#xxayJ9CFA)uEY)M(P=6FG#Djc!Z0bdy0V+TiovR=MoeEOkFFw2;AdY&)48@>> z2Sxik<+RJuOT1Ru*~wCmc5%Ij;v)Hi@X8nj=xvd^Tr}a--wg69_xUf@AKg zDzO82LYHvsUGJaKN3HZ3f$ZCi$NXsD=ie%ZVI{=kM5qba2luVm+^FA>z~l=Nn^dXm zHC}ZH)%1{z$nUM z>DyR`rxI$CXzi{OTN6uW!9tw1CPX>0Vtz~eS842-ECtFE4K{A1A~{m zBoYl`1>!0LJ@?QD*@0(l(8WI(;;CdU-honqlhQC$svJPdN}WNsVBwaL$K2|%VhvE6 zt4Z2^`sIf5BG+jqKOlnF1w_9Bxd)D%xQH-AZQ!{>^(|cOtFqwZ8YvTZ;Iwam~b}y@#pzocyh?N_ZWQGvP;IsGV&>hODh&tAE}w>zp6kOHA46)7tFPqJ`pAL@I(0JQx@%pNuCn8}aB{ zUkGX$|UU(nd9c1hjuoG;Rmv_XIaNaOl%LMuOA~jViUk72%Ow>V7sDi1 zU@hUtB|Fu}jaG>i#+Yil#~o{&H!WtIkVjz0^&6!eH>&BX(fP4{c=>9j!YQwCuXG5v zBpX3oRU?`amv=s;^~Py~C_RYYd(#4i=0!mar+~UR)N!!j0MYpEQb;AP2!Us?nZJHR zh8r^vUN*zNJWx(a;a&92^chF(L628FD6yaeaq@X7m;GAvcc6k!C=Tqc&JSKJExVD1 z_G0%x4fN~;Q;ThWh~JIgCgXg-VMccv#)OWta(2bQK#|U-SEbo7%u;HFs9whdyqf?&UrwH-X8h*0DKA>JP ze835ENm>7fY}g+lvIa0bY8axRfm~4-r*sgGW4ArozDIS%vGr3JD5r&tvVFni>VkN` zU(@qJkA){Pp+I9PlIuJb07URR)u)e519zKQ4}5!Yl|w$`>?Cu5lmmDRb+{-njnaEQ zM;#UYx*Mth4`y-mjb$^u`G$5O6|#re>qO)Jr8G_wx$How%%@>sS29EfXz=BG5l*tG z3Sg%Sty`3xo`u>kf~}E9 zU&cdOcf(~TWv1&T+O^$1+y3M;Zl|mvirkJ|G=<{kv!w8_RggrwY^@sV_^GL)@jZ)9 zwrx}Mwi^YHxT>DGJ8jsgZ9aXCvqVx(9IIfv92TU zNUu31$P*YU{%-ZxWYnX*wzKP&JRQTsgKgtj1ZQrU?CAJ!t7K&s+e=HXll|`)b<>h0 z8Jq&r;i8|Gn|fEplDt=G3VBa;8H{E2(iZIqxtpB1;Dzm%h0w?}q#rM#<(`teWlaeI zyfiBLDH!Q!tplxWw*l+bIgsv-pDRe7>YtH`3+-jCIF+BjLoE zsyOtk?%=4`^y)!&8pQ8EQ8eqjMMr;%4{jk8e;~h5Qc?O{sBSm=p#1m3@ASay zhFQ&q0ETuJV;=Pqi@u6X##{PQ<);Lh(8)U4JZYm(+iO0qRr~;(G^R(`<04comBc(Q zH|jW|=bnYM-8*rmvgN1D2#6Fa!pk^?gcoR>VqA)lgp;S=Ky(sHbi|)W4eXE<}Gz2%ZMZEEX`yd1cM979;T?I`%OAF$AfTbX&LwKA94_Ft( zS(F1D8R*s>LKHV{z-?33x#u#*_oHB*v%N?|NBe2WMnf54@kh(0eCY{gC^xe0M!eSw zq~lIdT-jD-?c?m){Va8J3nbi-vn4bgdd~q)qk@*0TktxS`|+EkdQu!16%q;Bz6nP< zL2Q^8-9xVE;$?O2xmR_r-f5!OH++O{K`Nzmoa(I}t5{T=&oaH7K*+6>v_DRj5_{BU zjIGacT6)ymlm9XppUbd#?_4!)fTZdEoMBX{!j!GL`dGzAuWDk(BV0gm&SKrFev0yE z#`_ZbDNKT-C{Gi-yGP_U?N*6ju`^ydQt<_^KD9ESas=pCK^Ey4h)?g|B~REkm_Y46 zZEY}EuKc3D_;RJzyvv}LXRf6zJ8hWiKXGC}QYFx2E)46&60Ruf8J$J(QI89*kc@W~ zuP!SCEbPlmFloF+<2RK_qBMfZ!TuccVHOG0u-1<}jmOnjcn zSq9J-D4Utjg^BP$6C2aZi-fs~#~3q3fd$Yb#C>LbzfmHP|?=T8DfZkTPL4Vkpn{a;8u&B1?DWP zNa$EPKWHdsnd8^NperL)llY2p{u$fr6jF43ucl)vYt6g!+bZ~aI%JKP9e0&sRJz7J z6I3ZTA-nou#bijoN?BiQ!g#Dwpaq%d3Mcv@@`GTVR7=@uOt4GGbb3sUZxDn#ZJ8z^_#45Jiw*pC? zPRH0fzQDAgLZ$p7yI=scH?#7prw@;UAzN!qq!|YG8!l`hp?o+*_fz9!iEJi>=xbd_ zWn_gVTXK{u17O*15GtrDxq;sm?*n<_oFsZRdaf436IvrBb9obfk#Il)s#86#L$3;u zh5+|-*ZO*md_n8*+_K2t{ItAb!nf9^HKaiAjz$34Ue2pZ z04n!aS!fot!(hO#R1Dw(8Kdd%Z4In&1z6XvC{D3t@u&p49c~9s`ftJ4e(jQkJ3Ju(Hh^G>M|Y4ZC{63WTwaO zcn9P3uBuqaRK1aB;fzb^?K%_Lq_yf(b=}NfmkfJco(I7^ciA~lT9Gh6NIy>3#OqZS zCoUb||F#iGV*u`l4T`j(d*Sb66q$zQgJD$ey<~l~s7}rDIFw{>_iJ;1RLxp=ksb1l zm!qH~v!kXXGikSvz9D1ePO+hngE8uV)hLf~EV?{KxA0YWB}f9sICsy*QH7ZwI)%a- z&(DQ>At)cvNc1ZMTAM@yBt29-_OOz4M%SL!M#J>k6r+?+uMqZ{d{uFBRIkPeX|i(*RHI;8kcYOkTjTYD!{7< z2U9qegu#+3UH|(OtB2U)VfTlVxa`M5vVU_UDjI1F72zieh2hJozhLD&!Va}v%(CMQ zH=AQl?zxy=^7yC@h~RKn6DW#j!Tc9Z*MzeB2irHrUi&J7Lf zdqmPGj#B6bPK|l+HtF&oq8f8*=wF1v!#7%7mI_I%q_*NG$@X_~rE%c)1x$oorEG8F zvlhN+NG;|6CL-M=O)ogO+z$!~@mJxWNG+RcHQ?`JHr2oROCFk%FI~4tZBK#m_TNQP zKnb{@jtc%Dw=ktGWgxW#+3)Os7ZYTq8HFyA+t>A9)H;1d6-jM?!bR!t;sEL!`(NqK zpY+jz`n{F`q(=M_dU5#=k+|jDbcIggmhNRg(BD_+|3`~e!sY=XLZQY_M+diRlnboIPcYLMkMJijNZfX zFB&=w52S3Dwxq&QhK>4(4#>?mGOLD0i7IwSiVkoe@G=chz_Y!ZMxsc3AgO!E(i^SC zi+Y^2 z*2^{NYmu((ut!y12eQ8**toneDBVsY;blB~bRk@zHBCX+PC;{9a%G+06X#mu`71m* z#^xQf@dOQZm0lrplOpt`3(74&C(WV1Cf@_u59Pp(z$6fkf~3G0ACzn+ov^F>kI}l8DfU#Bm+72Cix{+q z(eK|>!$#_tZ(~YGEH){8U?rUS!l!fmIDL1}vsdTYn2dJ9LuUF>Lmk4*26bU?gh`w= z+i{jx>vu+M^4KU9oQlY*o(gTUQ{IzCv#)CYa@V)fMYatWCSP`~(T zRQOJC;ptKTZZo83=c^}NHnA(ud1HaSc{ZY{5ykyipKmk#&Ac`f4Mbi`WKBvKl1*bZzQ(JrhriQ;51ul>Q_ zEjO6#JY`w{ij(&!>`_}bsqwm$q(AyA&K|j4DRbRn3h&at`y*E?ma`&q1CkUWiH?1p zrUTCsFdV}qy17>F|Dv9gp1(HN3|oF0>MXbYGOa`A+B#bH*}`*V|0!Y`(t(mLje%(q zS6#)ibJOxr8$V45Sfc$=*&zPtIU^t=q8JR@9(_RXy5)u~{BYP-(I`aC%j2?au6S8I?s5Jd*$Vh% zX$&2TM5 zd5=+gg+Q^Ynot>0ffe^riB*Y_LyILCo*>Rf=dZ?K2pBs^pjuV{{9Z zzUH%E{*!@p7NXwSP;kWgmka0ukmtSZ-(5JWXik?2#V!Ut;s%a;6;+G?UW%nuBjep` zW(9)!CdT_$nIX{e0na4CkJfPo9}iudiRyv!?nQ;1S3Y0)kQ$eeaqxs!^{N!qFfAl% z9<#5osL5bxZ6PO=jw0alP+N~hGZJuZ@ew9~P0KDp$#Ra}zQAK>;m~P|xfzRd)hf?Q zw~^q>Z-ijH8n!I|Ub!9(`!~xtEC&V}_NdS;#C?^$t7Agy3s_eY}=b6}fPOTwZV zfv!55^at9a_HFE~r4_cm-~vKrR_%ib&gU}?el2srEx1JWm+TIXMU|RRVR8F{Isd(w zOML5`v?O$xstSp870=91$w>9v^P>d1)*?v1fR(hX9!15eM<1Bxf8tF#@+S8XGxe3j zC3`h7UCWJSjg{}-hg8aR_#DS5;`R49sLZ}cf$VB*Q-*~B`7lEi88)1Io$skYw_S~_ zNhj&(p|j3yd)Q)SGP5=*PX(E7#c@6J)cJ{gxro!CQ@Gboq(Sxc4iTOG6&7n|e=d+;akB!!*Xv5&4AVJ&vix&yta(=c^agktSGlp+pCGCCExN;@g23^_n56mo-5qo;2r{s=0OH))$NbhG8wbU3PqF7S_7$jH_PJ-i+-D6@ToSo ztZXurono~}!L4|n_)m+jqO78>amRloTRJFZK#sVwK{3BDJM*?b1yrkVe`epyCP zSb1~dJu+Qn7fmppWsOB4jy{FPppE}!#R5T9&k<62UDk> z_yBx&nauy18@Uk$TQB zmVsH=bpb=vu0cwTs*t43(u3rSZBmAVcKYY`r50K+&@ zn3i5x8W-1Y@I)S?V6^Q`n?QQ>K}_6??Zywg^^b=M7cp69$>&=!gZcmpdW75~d}!G<|B589a=(?<3gW^91r@F)U z_bnc`s~{pIJDX2$SlNW`n7)m=YaD5^Y(bArhfLL-fjx#YTWP2fdW8dsfNnNTOJQF1 zU#*Wnb#Bi(c~H;RVqwZXk`V#;A6*Og$v7V&FRi{sU1E}(JN%@O0lJJ?hn#<7K4Sd3RC?$ZzCN_7gT5z~aw*JL)fW=NluHFn8l!Kob5u#f5eDsRp?IL++^$pe4h!pu}Cz+uaY1Ut<)`du(bX~5MJR_7; zQNgCVBdOH8-L6{v?4P3>yiFS9KL$z?JKhM-P>jAAj3Ax-a;O@ku1Ok8s1B?Ak9A25 zwp=GH_k$%t*4!0mONMKuJKoiC@QvnkvCaj~&Yo271i41!50A0~@4h0H9iQuIZSAFY zHRRBK7p9lT=Q-_P&jW)$Gw*xu)%_mHrDW$0a^KrcqCv#%nx;@UGd;7tY@2`Zyk*!6 zyk3Iru~D9k(_=@bj6Q|uOX5e0^KT#3&^yCSQ}su!`QDYnh(jMvz(->4j88U+#9Tb% zQ4yhhlZL;}luwqT=ozsJovnCW{FUxy66ju;tEQ=}apfH`FB5L9X93ooQ*I$ML$ol~Lo+Yi_bN7h&_x1IgKb`D+COa+!-T3Y^7#C| zNlv%Ok6~$3=JXA%eRdMwbvH26m@lI1{Y3ZxSr8;+vR#kn)y=x0-QLnOz){^tA(#6* zRiz{}G)ig9ninR$0ai749Z-s4C?++-sm9?g`Wvjkk6me+p?eVRE(&lPWQKQs6JC=Y=0q$RdHQ`vCa=s zYv)V0HpW}~IXx)sI*$xCqklyW!937O{BnDY-`A-{=~wT0U6T!2(Sv-A#MW-NLk3aW{aIJS6cp2b@tNvyi3ApaJD`SOIwY;r)SqQe5-N6hu-65 z@ElE<;$$e!a>?C@e{ckRV#HS?e%*%~96WoVB9*|}GnJ>!ZPg;S%qH(QTEBWj05+O-cp&PS6f z=^{0W23QZrYV*7Eh`>VQlvd?PFf}B9Kh7LDR}n`%)n`T5 zXX5il)iR5saENl^EBKAl9hxMeWx$~1y^!WE*PC_sVhStA>e_N|!^(}JZ)&n840N^` z4Zj@=(K09Lc^67+7rt*yee2I?E`(+^Yaf26);YjiPO5x)bl*GaWcFV8fr(zyR%4dB z{fxo+uKaO6f0qGcJKAWOLoZhYgYldP!ZXhjgXic6X&@6_WZfoyA>yIZO3zbjT$62G z_=bj^+$xzQ1Pj)a*Q+V>^S;{d%feG4)FZ=`4$;ZVbuYX^@oSfrZc`Ns-KO;JFKTG| zv$VtWVc+gpp2mTL>4(mtD0SmFoTUzKB&VZD!$-t$HUIt4@E;2ep!Qg|d`vV{C;WhO z_&0By$Pj+eU^AGkY@v;=%(PdAl)-H0(*+r>n;JEfe53Hh*E9{Fani5jiUpIYq_(L_7AEQ6B0NnQm zT$vieqX;zwe1#k|R6@#EAB9<$_{5W8V(kLc^F${(>0QjHxhqQ9hv-ujCCnCq!!Op* z08x$U221j9WoFCx2&Y1UUxlHHhWbxO*O2dm>V67|s-~v=bdV7AEZnJ&K>NvEWhia{ z*BthEUu8%=M|h2WZA$uD08-vT42rVzG3k)a-OEJ>PGJ}!PBnF zqI33$nimD0zlp5@P#cEe=9Ua}Lb`}L1Y7buBK`-w*aCK7-c|TnB9!>rRNea@;T})t zRZ)6ffFr{8cZV@H^0ml@s(qWZbIn;9HjBZknyxVYXBovJL$43=nMm%5cvaN3ymidm4! z?K~Q{YiKg_o}L}ezuM_GPq2Y!2jm1gguHeSUg%CNAC}5Mi##1kXj&&ElQjD_DbV$W zeoJr&;oa!)_Y)n;DvATltii8T=*YGECM~e^Ci>IhKDcLHN;$_P+?>jwz^bQq*CKbDgJqqH2KN!{v$h|7oqRy z0_r);PUw^ri^E2P& zV#X$wgsMK3e!kXxiSebu_9s$+d$OsY5R(3R^ehwCnRl_%%eS_#sg3@WmX0EM`IGJv zv<@HBRu{rBDTlNYp1hlrO~8`kK0SyHh3M(qXSM4|dWK{>qW+TvLi4Z`dceV@sgNb8 z=y0UTpw-<@9k%}}D8=~byZUhI-A#qg7T))pzh-r(1J3pfF1z!6B3!>%2)Y2J$h=&R zjIGLi!ZTGj8;P=fjpyi9|8UMZUl6mZvyor8U7v>igunf?l!Oj{cqGS>3)`~W0+V+J zbEJ>3sAA-FmhEDM*)2062{6XgF*djv*KL6j7XwsuaB4|?pTYWc4h3fa+1*`7fvyb$?O8$TC?HNVMILhB-d0t<&<(7(TY{O3c35YLnL+ z;T52uE){@@ty6Mj>|ej5VLP%kvOD$ww}G^8eVs_#?osDQD8%O2^_`tigm6;LUPt)~oSCEG;i(SW8=kczPrWM90V6Rp8lz)Xs?pVj>r`GH;Fz`B{i)KD z!bvY+5A$mI+=WatH~c%D2X_MV)SH(KTq5%6)z2kw;E&7A1b=74MTS$04q3&Sf(&8D zBly(g*StE3RkoUM39v^|MS036`&g*Eggf7F%im$r2^j(YksK+HYm{O%tVaL%jgRP$*XGJGw)P9+dhH)%mifEo>%ZAI^SxeUqiwdbMrKxN* z1GMuVQ;w6VrvBww%V%6|`5k6Y^6qa1Y1c|Wo+;TkeKwht)kT?i-_**IrrnYp&mmC0 z7AehT=#cG}tpyBIShC8_h}CSu$M%FA&+T_e^4~X8J$izfH3O9}G-umAhUXUT2RBnS zBzExuOFvSC-L6O@RNdt@tQ`z4a?wHlxijeT(4EgTq%rIvX*Q>>cf}K@iwD;YK$#k~ z4GUc}1|;M*zIi@P%?*>J1sbe6+dnP^Jp|2+>;KV~q&SQX%~>3{SMorU(6?i_qq5`g zpGLRPS8$sQ2rU?H(9|Ap74<~jWk}i-Gwy8tHGu2vt-4!B+3Vm@#JEDoycG?fws>2@ z8`zUXVg;_c>s2{$ITj_D-hAKu#Yq_ZVvO5BR%|{@6$N7~XqGce8RnmtjxAvRSPNYl zSuHX#O!>?4P-YGtv`o^^%q2e`7zD=TFD2^8h2! z|Ct1Ngzj4}sU2IadO;E29?!lMA^Kf+K4Sj*b?@zVx*uItujx+Z(T&-1^K{QJRY!sj z^++N1&F)UiXIOq1#l5l5qNPon(6f{G1QUJZ$?~|v*pNu&bY`oct~GJ9!$jF(9382W zPN`!_U?J?2+oYMaER#{)y`#9Re%rKp`7&nP5|wha<#YAy%>io4`FfS*@A!B1Z0L}& zn@P%?q#(%?(DZ`e#$Q(QHm8!u;0#S*j*f3fR2bgC7fb`s%y0dUuDL>jVSq!;JFi(_ zgn|3{@Fjv`!e~79OU{@TQYfwcFkc9@kn?6BQaO^_yevWri1=dTCG|}`qUeOy{rZQ* z8VZ4({idqcqr<9%3S4jsI=(i>&NavD@{3aLuqPu*I<(k2Cd0=l)9{z?MQJ75D@MIK zmaOCQxBCZsGCT57)Rns3Jx z{@!Z)Lv=Cw9XO?F=#Z`UOpd0Kao*+ri-Kdpr8%mQ`P2AOwP3otQlt0%hKqMW2f0FN z^xKsNz%JvjKe@h+J?RN=TuNpy4sO=VP1SsC3$rIh%bt$YBE8YyeiyNFNRMIj*FI!m z^_zHAVKdqkD0^XvE$upvH}W_mdzIU_yB_lDjzor?MM~$k^-ket#VTGIInO>GrQ3}| zO!vZ4VyzbF=7#>r6hEd`p-mt{aINtVA3bh)1^QfR?T$X{7Ew(N{^@{BIVu}9H*|4` z+FuP0du)*h@f<)dvu$eRrRW;x8S)n3C}iMrlZq_ncd2~99_|$!VPLqhJLL$EYl#$j zN^-@wyu2lvhPTuRZeh9_*>3MXXe&OM~?I@c;F~(qLjp%w0E+ae7zuTGp%st(_`qAcXm~IPuQR{UADQWXKhWj zm+A7|s`IQRr|a$juiCnYr>~tocur0{y^%+rer%7Hum@V&EeLPlu<{cosZ$Zz$^Hsq z`uMqw%Ltd5QTsBc;o2E9cKqPm@{taIRZ0K=2fEAblKz$m(!{P@~`>PuzmAuQ)H}~ z1%1zOu+g#!kpLlnjpXSc;&0#YGS&wN-;$o3oZOiY-0;R9tzDk)P;5rGRMGEHFkd-( zORX^lt#0}hK4s=vanEyPGG_Wu>+bE#Ggp)0C9jEMqI6iR*Y=g&hAGc}8I@ValR@(U zfhI{p^!utc`ws=aoUbNX3v&dE|KXUVAIYr`)I(03fF5E}mQ-BBj$eblmqD{um8nh) z8QaMfVgl4bF4ZA%caKf@yskwTRnSu0E8Cj>>4olq8uWq^dxo3zZ^1D$)Iq!RG~KW^ zjuh)ij@SaaGV^O$b?;E-_uKoJm$Z%VFI});(q6v2#)4wZbc*i{#9+Pq3bQ}@?XEHn z->O$+f%^6De5h!<``OdT4nql=3=&d5o}IK>h-vk;W}W&fV%)2LqTU1(F9!_;Dc$~% zB4N2>4is_yF}88t<*a&F{g3tPxO;?YxXYV@jOy|(SJ6@Vr*gLm60&Sx$Zjt4pM6?w zT*k5ul=meVCEd+;?a4Y2HcRVL0cym(t_P?ApLYjQ)GcJaeMReV$6S=G4tiiJpI>QU zr%)>q;A2@qRn)vSbIhIJ%_m_c3Gdtd7gH zt2N6Die#TLX$7wj;_+7(&we1Zeo8*V(zh5TU6N_BMbex|$Z}PGpfMr9v<{z^8DlK3 z+w9jeIHxP0F$0X9iI3Lik8n@ePu>vy~@216q@UJNrPNv3JgwCE>jsCpMhgM>ox3 z)gQkt|HuNs337}|DRG`}+b(h1Tt(~%F-s_W=hSh{iIa!zdfNvMl!;E!KgtxKs%6Lv z{;YN-h;L@Lixm%Rd9`a3G8+Aj*o5b(WGAHhZWy!Y8Jk{Ia%`^mXwJrh>>MmOXhX*)T(!d+<_(C>M(J7j(Yl}e^oqv zNTAIKONrGtoaKoxV_+G!N!l_EwiDZ$pr89H6Hqtn^uU`IDkDttjbRN>8QETsZ8lu% zZr)}ERAf(V-paQNRa_x`uP5-A*|_B$QzMJQ^${ww?V}esHBNw9b7aet>}3Uo*zQ+S)P8sWOY)73;Bym>(!au555)3IAcPuOL4wQQZ_@<#83ZeKMbPS`@Z(j zQ1JD)zPDlz{VJsP$J7A#?|&AbmiKhpeSvFZ&>B%Ijp012t@RccysD==DAV&>^LXdVW#R4G-a*3!r{4Lz7#h5x6 zHg!qN-F#E9efy8=9SSlF5?Z@02+|Q!JU|?`*S-fwxmSzqG{6$(m}m?geY+(Fp z-GuEc?p?!{+oe^%KGI8!l-S(Kafxh#!~9({nEqw2Y0(3^pPd=eHKU5PZ6o_d`IP31 z`a_z&axE?XboOj(I9fzM5lS79aI^AH+wzXR(Cx0OTP^iJyR7f*lNt8Hj|G)~(!r%V z+>cG7#K|3#98BNS=B9C|qe2Mz$mOf}UQ;6gA?vy&uUll|~m|8n|>HjOT! zt8}*Bn$KNcua5Ymj!!;ij`<@cv5(<4Jo`yf<`ctHetwSVOeRvuk2H@d9k6ZdRjTAD zd`u`W+mpb78io{Mej+a0mmf}4pLTd$;s`fiiz2%2`@{(=68H1Oa(mxO!`U0CriTB3d zwycnh|7FHcQXpK${r3CX+sEYXfb7Q3l&l^_d_dp0Ajjf33j9UWOGgb-oBACQAb-*U zxVg#4}n;fJk101Bo54~Q!u;3YSdoLcXF zT^)rATet6hiT|Z!Hpp7oJSr9X&m!HBaoYEPkYt@rua(fU|<{PYvD zzZIuSgS<};BO<0Q-fVs-Ur%~*?Qg}q|DV5HUWwQ6kPL0-#f@)JoghO`*s4tf!#QF;ydMRUPVsV_lgf8?u3?G12(!#4YEn@0*`2E zO5X5sSvKg6Qe`RExm?51GVa0oRvZmf*Ssa+ti392T07BCV%fARgWhc}vxZE2ZFtA&wWKi~Sn&fG`Q3!Zl%gDuw>|KEdUX z@`>*Kn^|sY?~Kfl!O~yo$8xaP8=a>gM=b#;hrxT6PR1Y8!y@?);LajH?)%71wJ6Zq zN2k)oqv=XWv(uA`k%K1K2IS4m4uLNv1UL4o?_q8UEu^36>s zI;sZrYC^`e;#)?`kz9pRU{OKrmj+W6BMXPiycujx;K3^_=`(*vD=-UN5%+sMSn~U0 zubri&Cf}~5cl}!a7Sjh`R>VGy&wMFg#`K4W_vNYy*MbesuOe~&dE?!hZMl2zmyYE> ztmpM!ue+AJ>96za1w)e!$p9Ok0nH$`ev*d|WaD;}_j{BZ1d`lA|J^t`Eg!G_THsup zzX94{Xp~`;X+Cd2Ka|lPYA(|*pv7Ha2x`86Q#McR)aJ@CK#4G|wv1Tmo3Ni~*ZNMk z8C&6vt;%8W5&3&$p0t(mGFILGE`Z^ZevumYm?X^1;u1om`NW_~f9uutf+win@@3=n zp?9yy9Sp_?ESs3|s&=yhBNoD%!FG@c(ED!FMI z{v)o&a?utyZr--xn~=o^9lgTXg-GyeGH8KQ$=eCYLEKXk<8X_x&D2g1D<8IVyu1w6 zPkWNecz=hS8nS=J)+^T9zMAKGb?N`G_ntvfb?eqB8IdR`IVn*xG&C6$5hMx*az>J5 zu#qMQB?vS@B-lhz0f|k{Ip-XjoDpb{oZ+ta4QKCj&)MfxeYXmi@D268&&m)BToZZu*M`91Wov0N|CPLQnA@?{7O0 znEA&Jpqs6F#!c#c+)G;MHD%6GfiBCp?dm5%iSu$Uava+Cn6ckSh?^kBzqYL>#K*;( zj+jspMdGK)P$!oJ_c_&W^d_52-+vLFXMt-6`)az)e>Xq%bTfX`lTfz!)#skx9%p4y z%quS@d}xF2n}o>#&;2~N455(XMI&&a%Z#PC;?}&%=pdrsGKKJu<$`{~<3=6To_pmq z?-ytG$ldlJG@Rr1ar>KXPEQp{ub|-(nFb(Vo`;%@P)V zP5r7D6OIv3J}CdSg(XA`(qzHx56L?B^n15>h>+9?d6*k)y1(Wq9C%bW*ri=VcJ~gO;$p-p< zoOPZ+M4o}{uZ#~V#XiJSj-5=O9!}rUNhMN)Ph!k{0uzqXioR-iY7Vt@<#Hek#vki{ zY;gR!Ky<8Pawlm(g3H^&G@DRSF~b>HNYT|JGfS0z)R%;SIP zYHIdc7EdGv|0&7zM}_@Ki}!2z<`T5N;Jr3B7zWS>iPkLmsmO)x`@Kl7)`xNoXb{ol zNq8yKAk&GwNZ;jk9sbRGnCCd$1_?D;UL769aVWgG2F|0T%*I$>MYVX&y8<}__LC_L zDsKAA=yMR~1-gf--5r1Ag+VuM_m-^!>$9sA&7Pk*d1Ip5r_F;RF8&>=&1n(q--D)G z0)Ncdb|j|hS{w8|l&ncZIU6nV{bICRq-_gk^qn(UtTqQF`&hltVZq#<3=RGUJ8CtX z;@cnXnrOK+wYRjMG4q=7;>f>5W+VDRl(IO}Vj%M8AkWv*q$x~l^*@+5n8R9^Bz|;&h{m%6lq_P|&M2~YJG~+ssRd+5+mLt2Ew_(okL4jSR47Nw33kwd=?AIOlNJ4uo zu?p*)cTNQa^~jd|XXxBtBfJdfW9(<;JI)SmqT)0S@ebav@wM^(Vt@U8$u z7>fy&M>3t;9!3X5*knmC#ckH&hs5iSNS9LsFFbj;4cSh6%&(^MZx5#MgjM3|dR`~;HF=3bo*0_fyoua&$9yl7zXlt{?>(2i3ktK0wN0XT*()c|!o=UzkNsWzPby6|& zuRE!u`RC?~T&w?POvb@jKNkm{X%pVS)KYlNjwKhTy1N>6SZe;`w4}|h>$&$LBxWuq z)8B<*#7r_nzmWEX_PEfDV5s2kgMu@xkqIV8)7hObj|esppB~yu<&d8H!T4VH;%MP} z@Y)ud!+(<}qqL7F#I;x5Sz2nfwAG~+3kCllmj=Bp2m6UV4rj~^fRqf%Y42Jp|m_e1RN&uK6` zEq7go`FPAa>~#!3GP+kRvFai6B8R=^I# zpi|Ejeb=Y_G_~l_V&N})_J*)Q+ob!)C3ni^up34-cVli33=LY-+n07PV;>+g02*ax z^E*iVN&=mw z%TlbGFE=x7EW$_bHog~Jrm)C^`g}?;mTr{QBvKxIm_GItUvH{!GgycsVZR|7`425F z=5h4r;?4BQ4g7FvqXeu#)Yn~Ox5wV)O?pY$%8Nq7F3^&$cPzi4+r^Cz8YEAwlw?`u z>;djNbV5ANY0X{kHYUdgr*R~%0*79FE4GHcJ=W_zt5;-iTPu1?qEF|d!h7+`>-4AZ z!3cL6+2z`!-A`$!^s3Guawwz{S3D)0sH{Ey4pUgNE;bYcbOX4B+jK_-QTgL0{x2BeD8z#e$k3mg`rg z3mfO>cSA@m*`x&gJ3Zpm-=_m{4xcFmF!qn(-aZ>B!Wx@TXJ$OPhx}KfmY+u-X0R;Lsd@5XS;Zb2`NnN0ji7WCd z&Q<1DGG=10bxUc%Y2{>&@aN0E&}WL4Om3#=%~TRqbV+RAd!y=}sa{ub+=bm4|Uu^LWylIfddaAgg|Cumsn1iNJ|rh1oH5v4wt{ zN{{%&ZP#f%N}w6J=HQv%UN5{?sY&yjlBo_d#p-5Q2hIZKtttW`qxECS`23^x`HVsbh8 zeba{bKU`lt>xAu%)qly_=Ko@b4N&kY#1*m+%Yr`S&rIO2*)e!YZgdrNGLzuuMy6GM zZ#&*En>sr}@nIBwh%fq%3VK6*1XL-=< z#^ny;;NnTc*>YCylnIZ~z{DCRfIzV~mzyH8^QcD27kIzj(g0i3lsN6z+@3KUVAQ^YweR0Nc{iK5FFgEZvM^3#4SfXKnsPAF!oxRYY}FX+T<_rpExZj$G9`blGM2M*716oqTC{(WXqVD z24Pg9i=}9qpzi}f6YN3ldU6WhC#Y9TkRDC;xc+CVU>W?;Z-TnZxahcBWujA^<}Lbk z%P8A;r{KAkpuq6{Yv#5mG2x<}3P>OB!igu-Km$`Lfb)rNF`VZAh3*=t$E)OIn2+zr zTR;Dpv&Q26=DPKvkx`(}rwkqd-s#W{+Su2)M~2aB*Sm%5Snb$9Ob>)eV@h9tsAJix z^|a@{%sF1{KPGCBb`UrwA0MeM|4kz3IzNYN((!{gEFOUoFfpnBdKiIDp~o#`bh2CM zldxM?g)33H1Qw%IPE_ zyf{t4)@{&~FfbW=Ie>0Ti19kS2K`XK5IVL2xTOD%RWPcrMo)d02IM#d{o>aEO%7X| zL~`yx!#}VzO*qWD|2<1XGZT=gX)C;K+8QI^;r+6c;X%aZF6HVapM)^MccMHXMNxI= zKC?c^fApcBLwPdyr2^5krV0Vf6H)FrLi|k|LVuAVNXx=oj=RK=Uk|R8$Uoti^9Dm`JssCVXLeqaUHV)N7n#&uvy+KP&1bIq$1o(c*A_tNtB|1%~pqCnPyu$Kd5pT|O z`B$IQ)QhYZN?APMMe|_Cnl|NZA7~}=B5r+Exn~7B!qwdNTrcNzuqpTXNSHv=zhSPF zo9B(ab#g1pe9Yx1-75=-R-GJbw%BEgY5dcnveTL;`7&nvw5OVJ?;oUz54gB=$}Cy= z0Yz}@f6bKM{!m)?F4;hSOkXocKoc^GN6iFMQ4S{D;uY(ve!+9Cq> zy<7Za2U!ajCn?Ylkezd7CKU_wDT25;=Kkiyw5Gc)HS(u;*0{EB{r69K+y?9#;}&TK z<+^#Q)g1%s=OMFgoeraiwI%ICnuB)82#QJ+3JqAWhL!@^AAP&1;<5JbY0 zim+aw?L*rP299Q9x6B2jHIS-urYsF4l2AWtlE2EPX;dUZ@=HFa2Moe%o>=aHW<>8_ z2aDWW-0F8~455xmJl`h2FY1@4mvS%kr<4wm=jF zYiCXro%lZ*y#B%KrSg|>7MQQKgU8LvA^<@F@|GmBTHQ5>ofiaaN0hAtO`KKD~ks}o7{>=e2V?cr=cRGvRYT+;HZ5TG6)dtQATf(C(~5^ zz$SS2YmEq4=9}swVBgQ0ga_V$Jz|V@ zNFp#u5g+MBKu-#$hL@bBC?n4dnhQP)CP@(%P=s;2HR@cD(MN*1CBk^6C|ntcWrkr} zhBH>Sd$}!DK}_7yPd3=XDfK%(>a4R|vTA;#(3DG#ig9~3PD|GbDp90MQmIV77NKqz zsL%}PmLkgQ5q&g?AvoBhG`xBYzX;+GIMReKviH*kvv%rJ`1~k{pBuotzTlqA1yRc6 znFZ^&_g+LNOoi(j??;J_^g$o$G3CBwV5E+Z=i0t&l@S4o7Sy`fJ+(dZGSw-~Y+rKc zVLYgeGk!dyZC3I6DI=UJvX!^m1klgG+7s6dr&Ja5pSt^4SYJ4Qz+Wj#H7IY31J%4Uc1iTvnTdcP(4fVa0zbfpj(;x;m6wc%U({Xb)2(8 z3gIu^r%uwtdZg`(_l<`RE`MZopG7IIwwmvnFqrQ`#jkpTOO1_`-;Wy0OHl*tk&p11 zs{UbwT>ml)8K70qmB&&(SKp+2=?NIR-0~2B$T!xtB>K!&2xjh7jidcGBOqf6BMU5h zP8OHHupV2vtfg`{Z#-A-OQ2NiTGbs$7 zC@9G($2R07xhq=a{Emrn|Y zQj8Td7EIgdtjaXk7;B8*_iCyIb7jdrVb$_VqsPuN0`;uGIPP{Zl9x0(FP>E?+>Rio z%Y8Xrr#w+Pz!un;cbs?_4=ZzLcXK$$Ll>%uq1Hxhl%z;pK-4NuZ$*EvQ8#ilvbm|C z#E?{W7<>`g-$JSgY%-eP0H;l*nv)XEveFl2j+{4ey@2i_F!(Ht-s?*)+gc9bAr%|A zZhQ|$3ch68ESZET+bzs7TG4R>-^Q{wqZ{}~Md9WY?s6jVB~McPa2vLvD)1!jhlisc z!s~^_u}gb-=5FdTx(SEjs4hOQdtpNN}8&=IE`ww%+?`xJ_f&kdUrkz zS?OFW6Y{#fT-`Q*1t1H_RAQDF<^H-B`p-3`%Pxg?@`&fatVyLF{>p*+j3a zRJVl$t@n3^nd%Sj>M~epDQ#TO;9Q~HP>0Hc$(GyXYB45qzTDa&;JrNVDTh~fmwCM2 zzYogv#13)uAHw^pRau2xb;#7Y54)7|hO5S>FP`i@dB^0rV(2^O+|(Ar6Ev{K6!EP$ zOZwqdKxv|K^$9uK%7@7?aX?%esUis-ycemG`%&A7wF1l4(>>)xb5wId(PVC|%4hV$ z!t(u)((dj$$}kpBV2AU1ZI)c)!rv!A$uX8<6VVrRu(F;gC5u=Ewg)8%0$U}P9k_N? zs`?9Pr$E6>$3)b(^AMgtg``uzg`|)EevEg-2`eMPx&8Oie52rr7jdFnFfn?d)#YT~Vn^gb6e~eH^61FR98!7G`Rw=% zTA|3es4`&>VnNZ*y#jOuz;Dj)BR^dFuyJ~0?bsr&l{e0!0p+B|8iR>!l|yM}Lu$5a z*17jrp~yTpkyC55^#v(z^qG2iv4kMMH9eWwh4X>ets7%PeKuHc8E=Q-#0AyWyPt{} z?kGFq3a>q(XM{G~N9}wwza2IsURKPmUl}S(do-zM9ye~!6UY9ZYjn!RT(<7}`D9{zCaqu{!KVRwN>iZtm@9|gQ!rwT6~ zl2Q?1c6@Gxw=b@>%G4`|2u{jS4L@aM00xQ@AyR;d6OBCq)=JX!wa(1 zPlx4TMJXINU=C+Yl3VWb=j+byFXuxA%>N zfM0y4_wpHFZAlVs96dAuq zo9?t_eP|Wm7gb84nYobsT2IY!uu`cZx3@gHeefg+OSj=Z`8PL|sGV&e&UmqDd~XR7 z@`{LWjOAqcZHR=-0CB=%)?(1@0 zwV+S~X#VPYT|@Y0Lg90)J)HPDDgnoCoN=iCp@HUvrUgqsGBLuS8A-oOc2d=g@na?{ z93xajzbCt#4HgJIr(JkL;d{kqhx(xvcQBc^41zBe7d=u4@;#sKF(=# zbq??zXX%Tu29fMu*?m-~$Y)E_$oyy+m2+L0UO1Z1W(;^$cxCkM`5*EhN&P~Xc8Rnf zGT;fz@%seteL*R>DL8adxw%9Xt~Wv+{*;bwr+c!mQ{Rc3RzGd*7pK`#Y##q`P>pf& zJj_L+s>~d|DK$|xmi10jaq)HsHcWT3$%x9NcCde_bQp#rTf-jnXin%Ynd64|t58gDF8Z%Eb`oI(Kz7xoZ3=|4T;M1b(qREkoT|LA1&m#Z@tg+U~4 zF61QomJ-V{ww-v_o1vSiWm*@7nCIwK^LdD*MZAkS5NNgVkfh>ciIw^=Hy_es95jC> zzPG9u5XvN_AbN(|?257I_@w+uC{jeDa$g-@$5KG8-+l8}U|MCpUtRv7mFLM{)ccQA z9A8)qO%#3-8zOPi?X4+9Xry7EY4HG%ALY533;jNVLk(D9p@35N12uvJPa4nd^pj5= z#v(|*?yaK(CGvAETuhXYDeMmI`7x#Mp}6@jjR0d^H`-n>Z93HOnt@41t?Sd~H=yH{~b4(X5#|A#t?jm65qTjD>~wm1|wx z>Fj1U&n)$%CPc80=jkyd^Py=tW*E1jX9YGD`#M;rgJ7)vy)_j3CyA=DeGhNn{#T07 zO=HaxkI}Q;jSF$|cKk8nU_AiQzGM=jt5g`@2?3ea_W~luk_{|oZtgi`cOZwe0a};V z76Rjy;#6iJK6_cVQ-0*owg`NJ@a)oICvT7+;$(!=o;|&6_Gt*Z0HuZ%A2+pCb33cc z!Cb!0{OaePNKj7E?`4!cUq$sn?Zj)s z2edlRd_11S;_Ut}&iQ|vJ|vwtmb)KdZEhbQV<@#);r~y1h(Gg7|0hA?KLlz1&R+cw ze*6#n3QPU(SW-Vf;JSr>%|!gWB2F`0IVxmk7uE7=7K~@~ue+G9kf|w;7I&|j7W?~; z$K(6UNt0k#cUmL!BHL!`ov0y)M>egH#`-LGXa8Yz|^_G-A5L9on2xlK4(5+`Z@oF;_qyUe`rk8zr+Z*5I*PAj|SkTSghzykrxnH*eS%JY_Jvkcs()OJJ= zP*AwQC&|VoMx5_&O6Uo!Tdf*)m2JvAqR5o91of6Gy1Zb^G1LE8qugP~I#o|6L_(hl zXk=^xx_KHtNQv>FewnP+Pt*kC>TNt@Ds?AhE}-WCg;$*9;jfI*0~*XGuV`uSKfna? z#4I_nJhtEur${$sslU3S{)al(F=hJZz^twQPvcrD-?|n-f5?o<7(WtGo>JFOU;-aZ zk$HU?infQ8OuYnjja}|fbikWsm$%vtZ6mseY z4Wbc4v4)_)l=CWptZ|5=au;v&Mm5RhuraGE1(WE0*7a410Pu@o+Qn2+xq(6|S&s|UvQmMp!JRChGq zvzeR_Uo6SY*k`Y(y4m_=9bc&U$o9h~e+*-i{z467YQy;07FIkO=`ZRTVANuKP+bgvB9-)|oKaDAh<8FQ}U%`ZL zZ(GQ7^Y~uXNi7G^#8>kk*d-hLg6n;&8gUsR_@H%b2uBzb`uTqp_}8`Eciz>Rjs{Lm zJ`*}M(#4=gwnB+60Xn$3L6C127@>M>Q~QYA%Fh-wi?q`5>WhG9?`%X?^lhnYbcwxA zI(qY_4@Vm!czY3R17B!mxlG--posfnBVL*r{rg1mcg4LLO>27mD|A}eVX6?oQ^9|* z$zJgRgAPOYy*fB>Hfy_ApGi8?%!hY|$0T6Wz)vlg(+=xs=_DT8w*1x|y1RW5=qboI z1(6$C7S5&ORfu><}0JLF$`#=u>pEy8`4! zU>h%_TqFSCIVB!={e0H&#(jx=-c)I$2l$Y!kY0jqR~I>eO_o7|&BsKMs*bgR$))55 z8A2qtGo=}lg&Wx#L2tb{b+5s<#%%>g>upLNHXEpz9AWPoV9=&u`+11u_ppc>NV=gl zmG(q#AZ21#UxI}0sVV*MA2(_rv%FokUlscX#{gEYW;A|ME4$Ft5hbXY1>;g#KWp-a zv%^+V;-DKb{2?Hb5*LNbI6qkiXAFF}^^{Wf==4)S4u40wuM`(h^xI$A?P{ljC{r;< zb&|&Fs4P=3Aq<#-K3Y;7+}G6J=q1ntWPvap{}hz%p3NNO_q^Wj7Sl^Qy4V1YWH)%; zRAhge{DQ4fHo}cxPHMXi@s7UPY%Rk{+J{-V)OOxI)#AsocVNu!yoSK({_@+cFr~&? zDKU5qe#WsM?ac9pV(M(44Mvb_OHw+E)b91L0vqi=8dnG@HZSOXF=aDihqt1esupKA z-{K7Ti{!W!q;xMPcC-l`#9zHw1UY+bl_zTPIE>;xpfumL(L{#(5%4`UR16T5wUy=| zl?HZH%rVW3vO4Igf~;cNR_<$UI?$Z7NmAPc*+A1bGqfe`boMa@=pnuerFIi%w9p6=?3o5mnyl(+$m-> z53?{<(qLe2e9sMU7#ffWi8t1dG$hFPTcr=JzDFwd&A#>pWsL=)C+eLTKDx$NA1-LL za^7h+(YB?3s82g-GcP?+u_=8kkde3&7UegQ!vBSF+S^0v#j7*DsTH9w{N6tkgT-ip zWk%=L4zTysk*)nZ1;;V2NYvW zt#=NGW5FIgyx8(ZD*4VZ8ET5PAv~ew3+`|iBK6PnLZsTCo5#FtG$9~JM!~Q#BmO&Ew>Xk&3w^hIRC+QLUc&}z9spWp0mPvG#Dw1ql3~q4e z@iE%^&`POxozX8v3!Esr%G0BynyTy6Ui`G})si*Xe5a*C5^`jXt(++OG&^1*+`mq& zUFwb87I*j_G5QSKqrp;PSx*onBPiw$v3$fEKq^o;XmzRD(1|p!F~Jy`Obo6IwqG*t zL!=G8&9M!q>K_FpkFY-^k0!6Ab5nXC(>ERmJ&&I$PmGdPT%7H|A{i4djz%hk@3H9+ zl$Y9%-ipM?{rN!?o@irbuCc)(*cQg$f`|r6mw7GKh}po>-EmEyxxrOXEvxF!P3W6b z9-t05OeAl#>v$OvwJ&nC=Oe3mYLSbx>^`Qp|DvDDE_xQ&I# zlWgeAz^j;f4{ze2w03QC-jFM!7DBPd(=(*X3*c6#*TMd!%amP%RBL%SR2yFzLJ%c(2agiNv!}N2zS05Q zU$`7hH*qA=kW>~ge{&P@R@MGgOBmhQscI7)i}}|*4h-FLjMQ_ob44DtKZF0F$6v=5 zM_JLy@)UpDoAWQ5XYkD{?)@@VCk6_goICF$!X4Yj*hTapEvJRumX^pw_sqM_ufjN% zz{=C4zejr|M z`B@n81}(WtLb(h!mv0|gol<`KKK``4IRT7`_xiLHN85{Cz~XZZ>u_fgQcCe>YYP6A zY{h^A{>|#aA5~itY-PV3+iFex%c^R~E9?U-y$Qxq+}`8j?;idsI{eZw;&91MB>+kp zjUJoBlJvn@4wu@ph63}N%)xaawMDN|;mW!VlbTO}@Yo@MG|&|Mb34mI!{Jph4P4FM zIYErC1|J?`R(sr@R?IT2xqA<%HhiKoQ?_kRu@&fX70%cT0SS-q(fnqGD$iIQ0%P2Z z)Z-7cYN5LP!lm!i07CPFvvfuj51SHhn-vxWb?cCtF1Cz1j36I}RmAQ)9Z7!R=leHOMRzZ7r=o^P`(0g!%RyX+*VA7ISH?;!hC8=VD4p8nxT~=^8AVz43Slz=9S5EvR+%_v=0h&)36o8u z@xaG%8lm>1^yG)LOrp?i*Xw8L2OkC13WxTKU0t!vAX1*Gx4N;FydBCeu|%^+bV;QP zPWt&jZE-qYa(~b|PJ!VZijyDXj&TR;DcaMu@qx}oHfMgY^pWP;!UW;jWy4NO#u#12 zGH@QCVFC*qvLv5AWGn<0VtR&MftK^ghHClqv#zCIZHtM1V!VYZ;(TObwr(7NN>-#$ zoo`y_B8l^jFm>2ef88$%8B$rUFTF29E&|o$^L9lGfG2wPLGv`|1yn4ZD#~xN5PWm7{ z11rNPZeE6v-=}!}l5r=dg$=;S=H80l4r{YMe={3A59&BIcUdqOa{Mp>+~X8#VNAd! z5GTn|;dF;W=aUjAgFEW8xBjmK@sJsFa_xtVPRdgr+47H0oIx7-{?OE9vUWgcL}R@+ z8UDevuu{Y=&3+gol%)~fMZqzmAE`TA@jjE9SkboRFGE#1e-;x#Lmmjae==rix-~16?M2G_;)!tZ$a%J@{r9@MQ;_=T+R-)&A2ys~3>=Lt7++{FH=5eu zuN@oP%{F84Lb4c(8{3(;>7}o60`kC>6eFWKnYn}7$BD0r?3YV- z7cY5Tz}{KAC~8FC>{Ngp{>$2zW6Vvn)#GJD^&b3JUZGRiwq=dEY9*SEd0wd$4clMx zyhhF_tl9u(raT83j*LY#^1~rhS0PJx%M%fYl&{VwumP%r<`rt4BNFx()4^O!lk0e7 zY8S`PLMh?bQqAgl^}`7fci}p9|DSmsVY)>z6p})bL~CCCE2*x`gPG|JU4A*M%262 z+5T%be{0{`Sz=hT2})sL_}(n~pgB*^ouB!uEl-?!M{egGf4%vT2~p9Yjvt!Y&`D^n z4!J@>0aqn<7D9HmskSDJQ8nQHBfwY;~z1)|^fL;OAaf&N%*jk53Cx)#GI zT>9js@WQfavhkPk+IH2JvMt$&Zp-g>+~qWOL;5vw?t%T^k`T)$9d-&CG%Rto+qz?K zm4*+k^(3 zac93nBh6eSibSYFZ)m{4U`9*zn*#9|EB$;zL_e zZVrEr=MPTggATp0_ElDM9m<9AHI;9B+q6qQ!xZiyx(RgOnqBONQR- z(%JSbrL~t&eoq(nHIspL2Gtm?Kf#Sni1L^DIT0&`Sd)nEF69X#tR=3cfjA%#FS5>z zTg8cuZh)ummEBQUtLb6|;48aVa|p9gmx4MOh){BrY=rDD>>=EZ@PNPwjfmD;mY1ow zpG(k&_CAI)XT)LLRS&&KIX?EbUlfC4_1LXLMjcHrd?nyJ5w6?*EGZM4OGPzqTl9#( zhWN&+N3`KN@lILREeDKe3M()+JQH$dTYY=hHWM=iH#oefG&VY|y%+*u{*qk36jNIh zK1WB##MvLV8r5$>?F%BRzt&Wyu>l+Em}IfOW4aI>=0JhL8uE-{t&I;(V?SOHi7c=)(Ez%aGji;rm#I3s7%cIUVb_c}-6 zZ+!!hwM%+Hns0N++3!6q?or;iqnE{TiIBP3te*=?&d@wH^c=iW4_&En1&;v_LX zp#5&$_9>!O<_O4iSUvIJVf--MV&f0#bq>{;4jl#(y%bDAXAHgiI~)M$_+44zxM_@- z7GH1z=es&xBY9MJ{PYwlb*5lhYbAxju^gfzl7JdIszaI;xat)OZ~pHQ1NsIw^nGin zmS&FUf9M@QvNH7J{5x({%ZT5ZtM~OeKjLMt81Va#76_1VAKCMNTUseD#mVKI?y0O# z`&V>cDVHnFIcW)eLF82oYh}?y2OA0N)mi)xm5crU`hvsHwo|G>yK`E}s|`}rOxurg z6}67WnKmWv*dl+ZY)wAbt?$%5%4|TVmG-|l0s>iOQ|mAD%s|VAAtGH}cw1O@d6#W2 zwe4pNVF^}RZb}Ry<`(V7T>I;%yijksIbB`bIr?c&_|Zrdz+#e^MJ#)D1?wLTXGwbU zY;R;OxDFpPT1=TJdCOOHw@6*X5N5BufU&4J#nH{pNpanAAJFGGW(fQb!v95 zb}-a^g}S|OX{aTEQdHIH@^U|Z?r`;Br{DORvP%IgW;4Nf`S(!CF|DdeuJT0(Zc|jz zaT)oI1tSdRP?*dG!}JO5XG5(gPPUd{bF()F&rjKua`IbyQsSsGH?ZHn4hGj> zoTh%Gzf^3y>(~aQdgbQVnr#&<-V__!v99?HlRyj&&vnr-fqhhrQWJZHk9!Ae%PpQp z)FvZNgnR7U3OAa0&uu0i&;K+>SO7_x{c5wGZ0c4*)aIlrKyIC;lkPAe{FjeWH@G{= zHV(+-)Yff3YNx3`NjHj|`_Zh!*(@d&?=rKh2jqE83p5_KuNBm<^_^|*_`V_)Q>nK2 zGQa4q@=L#KfRbj7dF`q5M9D%PNiMiS*nv;ha7rSEm0$T}(S!I}m^9;(7&$p#OiX>) zl^P~7sy&g2-?!vgbY{_?d-5u=jq1eV09$0X=XBi#EDtPWWOk07lZE4Jv z-5omX*j&`N+l-MC`mMb@{eC+}?TF66NMGs6!t?UH!LniVl8sk(td*>d@5Pk^ zP@lui>86Dn4@N7WB=Q6KL@S(P!j&DFFfEfMWrP&c{nI>6O$$l)bJ^`-u}(ncer2V1z`viDZeuSk@8O|uvpx3En**(n2Fp6zgWqol%_QJ2H5;xYRSgJpD~nU|s={H?t#wsA zb1HFjULTUvWw%U=>&q@<$`?y|a}N(o6)$wg-iF@V?0%FvrwJuySV8US=#HkguS(I^ zvpLF*Ti%NonOU5hpPeere9=0o-5 ztmOqv3J;i1AlfwMHIS&3qQO5drYNUuy!q-A+0tl?PM$;$%b5)`GL6LIL_RU48}0Zi zS95A4Sh{Y$lSC8Bx9Q}+sX&>yQ3P~aP^C9-Yt_}cwXrMhxx6EdU+HdEKK8$D++PN_ zKhRy-w0?eaOfKnjx<8jpTn{2`3$7k0r!n2UTlkV{ee&a@E!o-bikd=4{_W)54!CoD z@3O%-V_iM6YtL+9@?D`NNO7?h^0nJOH)&)}!HO{uSga|}&yPoDs|EMPYmeLIAMkQfGQZvvrNA`j`=-8#c^*xt@hihU5dTEn@v-C)ded^2SBE80|hl zKG9E9%tN7b^_`Y*sFDQ422y=_`aX+KFSg_aZdBzm0GE&AN zlkZcrE2By$Ek)hR&`-r@!zFvp(nFCoQV2`k$zNA96Q0E5hm5mRG+~iFKpNbdx6Wk$ z2*{X57S?f}K}`lEI(4VN7h?VRh9;yadii!&sj9R>j{0+? zznCITo5woNc#Qv3^{?G|`>nQ_GpYgIVq1IjFKHrKJHv#gIx^}bWV~K5>OfQ*AmMC_ z&9DN`>(s_|bmeFQnmDB8&q?dOXcd9y>(0n`xm)R^MR7;ljs~4@3fuGo zMbww-qv-D9U>K?SCG2s}OJm~~ioH5E2cm5C2+}BRjxqVZeF(B5f3sm$ze~_|3OV(# z1Gw%*7(ofuZ4I{X$(oGJ0~G}=4mXBWu3FT=JY=+MkM*S7x<#4x$h(#WL(mcM_n@o< zd`e=v8(%5=zD={Y9h5rYS3aA~!TztqPN$b-eb!c$`zDE$0>VMw&lHz=Xu>*mh58PZ z>hD?oryx`MxsL1o)2Y-xSsUh57;wvvRj1f62RghRk&wi;q?&rb?GpEww*qK@#jqC0 zIV>;p*nrUge*f`*_z{?(-#7C#b|3G|$<}+HQ&JdMKlrDdE5+4FCRrg%bk0>k#ceXD z8)kU_bx-V%^p5|fTIGKkzXkC8XQx-EALu;JpWqV8%sPE^Ooe6#PWYzH5J?F6*46ve z)m^J3fSvIvR$=(@K8x9alheZ02LRtXM|!yC}W2jRo?J@;jK-rHIxPWu|S9sg&+uJ;us?_Vc&Jt%1` z4lpnd1az(~1o`9rrnZ^@RGP z+p}PoXw26%>%p!ijg<~8YvX40&GWzj%lpgAy_&r}J-e}*h8!xLot&3jv*U$IS_Ac> z_W2q`?9*%uU()>`5SuK`d)|Sq!x$@?h2J(NhN)>(p`AF#PFHxG=`Z06!*RB0z|$5Z z%@~dFe%z~#@m!RPsV>%ju~bmcoMIdh(ojZF>ldi(&KR~UBo}c=o$z`-P@#0@-zueP z2|j`EkWPI^JgwTz7&>||fzqbcm2{n}>KoPUceI1Mt{+At9IqX6A;WKhfKyCByO^gl zRFb)LZ+!3_gVouTYeAc+eia+Zdm!cX33EEpT=-CDmQgZm+A?h=)+KQ+Z7Kc1ppb|~ z3N^O36Uixyx!0#kV_q>xz$S~GS5f1YmoKPs(AhG(8w(dpRilrclTXzLbx3GEnkV3Y z0}i2lL0{5F5CZq_aN1zKYX!RUyVj5+iw~x90w0Jz@AtvYtHQl=`YYaFt(HHv0#6TI z+|O88*S*Ry{aIuqHu;I_H#K?+opRcsv7RZ%W@LSPJfB8;u#}PfY3=0&#z_$jtH>=+ zOoG~6*D34|g8QKc;!suR44m(G0 zW|^&3GQ2m@a!sJNvRci8MAQZ_(>exX;}jj?@ZD19<2zDP2M_d!7XI7;q$-(}!S& z3y4n{@Yj zmy&zsS;YoJ_qv)$6etXwYlkaYQ}qCq`gRv+3#ZgX_#+Koo_7ZZMD-Dk%g)72d#@(c~$iKH9VR* zA1f+b`*(LkHUX_9tNXeA>b0_hUC#1dE%)=Q#$Of;kEx|c-ryXV`nSAPIDFgJReh3G3)=t z01&pwwRqVC`@{?sK|Wiv*zZoh_9XD|f$ZT>U4X@w&!>t8v-esTUZxE|bgQ{JDl}wn zhX^=)cC=0MlfwAnqge*R$3+h?{f5{etQ0kdNmnpwQ|cgh6*OdpUdivvm25_su8w?4tU}4FHD)bhn!k-cd;rpmc;R7;$)KU#sT4QK7#HwDaUuQfP7JO2WpMd@{B>f)nL1f-HUb{A=*Z3jEj!-q+QFzYV+a;s(#(dyRY z;H2^DhyttDo(=1>bMb@KSEzCfC9?6ylulogVMwD*|I%loJafBwxfD!sJ3yq69%*3) zY58NxT*nXbc@nsnD*mV%Ax39cAZ~}aZ#EhAF^eh@krxxZt2zg`l7SjD&plPFz^Ld7 zwFn>0YMyA<~V_vT8>J-QFz#_FG zH;|xt&-zl0ugg}6C%F(yFvx*$!_oeyEAQG+&jr`Q1wlR%T0=wvcer4~i6}pWt2?>J zX5>M9JW)9Qtow(T(j%wlwiQa>sKbY+7cchaUodbm|Be|YHb^MwI7CZzTfSk^-F{V< zMSc0@*Z)J^dxk~1WbMBa1Q8Sg1(7IGi2~XL2_gz2K~W?J$wGG%1<42~L6U$Zn;@c~ z7l zx`tZ5M0q+383-kUeh$xpRtof%Vx({I$eq$|kem8xU{DaSPY6vGvZCW1T!SYB#iYWP zonTY1tMzUPjNXn5g)wF|^xef~olNK4>tCl!m|%m6P~Q0$*s{^v!41=-n7iX_$l}fN zttO<{bFm|}8ffez0p9g$aU^ZA!(3^p7^|Ceb^ zfczk~yJXAGZ7%yi5biU;t6HHWu#>adH5}b>u2VUGFx%gETkR4g23a6Gh>r}n^f2@alJ1;E})vG=3 z3;u+()|avB1IklCVOv)-Y!pY(2Td!GOwH7;XS7r(6wKXpTJWz&zIFuH^I8(=_&i6c zr*SJ%kLQ3=2;(8MV!;EoGlvzdo0^<=3UxbEf@hglfXn4<$PLL46@GcqDdgpM)}9rZ zeine^&hX?+S$6U=)Y2`PY{wD{oe>;?A>h_U&c*to8U=QSnJZ&o4Wgeg=9=>{h>Fgw zgYwZqvRHOT**E^O7fp}t=(mc{a(yfz(>I%7=!HN>1lc!q+-r>?vT!^#U2UeJ&stJJ zduG^Oh<)iDV4a#{`E~dGfY3sQSPwxq(-1^)4iRW?WhXxWN)Paf=YNrI+J8TMU(EZt zT}%(g^+@=t_7@t>2V%{)aI>b=dk^yzxj#PEV0bEa8`5!=ZqZHv?m8YU+#)1q=QF>! zzQ{k!2T-lXyK|Mup~4G6hU_;U(BsCu1)5x9PDMhnw?@7|tBHdaaG2|+lx{C#PKV$0 z^dqMLxAWUcGaIiRsF-VD(cHvaC-CK9afm#)kVm$8C<$nV-ugLW(F2IibBE>k; z?YKS_CW77c9>a^-7k!3oB*qIbBo%IN#|4TJrb9%EtvzIn`8;oafQW1>Yh=4Hdl`Gs zCeg+>YoHI)9hXg9k>6%#mG|86?LN8D0n}_YU#jlET{koAh*bL4ef;wAc7~l3?C@N6 zxJ_XDkM#^t(O;oYpso?o*Az7%exn|Piaft;+;hK_ zXWuLRS!kJ93gEzR;(7UtLIxTBEPAg*=O?`t2bRq{O(>t%uL1$D>Q9Bf%d<6o_Z3JC zQg2NClsCR69b=Df9Ip!zxt_Yy92LzJ($}{O>O7H0!WT^x#$8~V(lHkr@ATqXPSXmL zN9{nctc8ek8eJLhF`GG*PLH2kh zESvNDTOonCkvx+W{es>*!9=^)Xc#c?Z9Gh1kW7HMedpNrH0k|`mxT4Dmh!0}#Y33m zWbKv}Kiq=FcJxs-5Sg69p@pTQ&uZ}Y78{&`hUS=RPN9kE>{P1#^tIgh7Rri71u7%@ zt+|K_huvb?LIc{&1+3}r==;^z@e6FB*+XAj#RuAAehfr{+c!-ew(Cn(FuG4IN$&PC zm_%Z~Z5v7z8T*9}&}kC?ZF!ie(-ifJvtsFm34Ga9$gM9&XIuI+9bG6(g0fd<0Jt}T z!3m*13hQA(TCVhp1sYQ?CY)HnK%aBJGe5D)v7Q3Dw{W8zG0aGvM4>J~too*+dUS+3wqtZ_@eRYSu- z12!$63dsCH1?y&Q_3LXTZ^TH|i9X?PQ&z%NK;HbQVXj*Gg zK$CPSv>J?;@eIf{<%(JO%)_oh7Wau{!zd|0_;H%i=Y{t_-ZG5QK-r=!Jgs=Aip~6Q zHaXUB5-)=*~*CPMwMf4Y2B4AWR#-#m+U*RWUx%hV{#eev- zoBO3HusOn39qsNaU1p^DaWL6=yt9~7>*aN{x3=9meDt+ttQ=nrlp1bTY6V_}G}552 zfHd;>crhu-%iQrJ9+uoBvW9`I1Ln!vvHQ_hQ4A=l;c(%Nlb#3{<|S;GbQN1?r|tj+ zxD1sHn*56HTn>0gW{ z@qU#h?Hp_nzqJ^?Y5cF@@$8JHxy2?vyqY~fxy+6Uv1<KB?UK3@HrsDZ63bu+lkSi{}V{|BfQ~|S`Vpg&G_)? z!yAi1uuoPQ^J~f}Y!lzxj1$$B#&4qe@;=Ch_q%ggLI7y}>31*%BIGT(>uuQ~oxfi~ zG@`eq(eQY0nrzPUq_X=s7=5}1%e5z!xLaR;l$*flz&;{zwn*^SW zw;Rkiuti={K%Ses;I>-wA!nb&MQbT9j+^~29-;W>XoJajPmv24aT~t3>%`NDULA11 zoI@V;(b2j&%i`eDRaJO^vS z(&Z5%-0%&o>O!(df_E8gs`;PauLz>69yf4Mqq#V7x6o<+@_8n0Mo=PT$T{e)(MsC~ z@&;~tb-t#E2SFyYcEG>_=k~q%MN1t@IQAoOUK3jX1atpnHT}ng;z3rr+y1bet6?qw zdQDxU#e?!oGJ|y+Y-gIrE|Ur9vgC-~zay(eazk&N*FH-kvhD7ctx7yu*ZE^*wJ_+^ zjXPrk8SetkpA};lNF$!q_o{WROO@KGX+p_bP@RHnFNip zLGCpGOg8_!dlnlq3RaNOQJ}H9dhB!41Es(@27NW5?@QZWB&VUA-%IbhXi?bcscqzG zwaRD%dL%l0K>5LS`UWZJABJonayQGBcVDaCD{^n($Q*E-=y#z*cxPt1Lj@5xUG&Zm zw9zgy) z{>A;kwz=r@8|?l&2E~XNc3}R^pvkuQ&aNoow~L05aksP=F7k8gc8V zd6V`}^Sv*j`2YOn&%HOHe6b^JifcTkkGcMho0}qHmPR7)5BQX9+&`GW{uNl2`UtEH z60+>c99E#6xPFY4k@4CckdZm=F=M}16(b{kJlYF<<1W+6?&ioC_t*MSiVne$w1~p9Kh;86c0}kurZ{lqpn(DGxQ;0UyfV2EW7T8+nc}-I{}+c1!)>08 zG!@2oCG8aT-kYQ4TYrpdvLKX=d&$CrFZ3mLytvezb_x>Nx0z7gM(d8=q_8*jK!72o zjRv=?C)G)IpaU*D6c#GHW62KXEAzy>Lw*x+)jqV7&MrN2emo@2lhxjnWcf_H+Y_A> zBr+gj%ZEPH8JtSG#`BfW%r_PU zq1?;Nblv8cTIreKj1?>ePd$bBs!+_0y_5Mr&|`MPv-J-$&8Rx@TGh1C;`ZR2;rqt* z-edmz!6{-B7Fm}WJes7j0omN+>C<5b2;Z44wK#yllB-)4)Bg_b$9S6vfulX;*thlC z9dXu++}hQWG{Ms5k#a2K%B9CRl4+!V-F3x?avoiV-5DE?Jg4i{2ed@aXUBR!rHeZ1 z3LMg5jO%9@n_H0u^!>NnJU?x{|Eh0Rfq+DA2hL}0hrJYVqgWh`<%7%T8_}Ui%cJ3o zS-P!Dbwd(Yijyi0DQ0T28@(NO`7&|A`n(O5{OV&a8LIt+CaXwsYmt>{J1=kjaG6Vt18lbiz2pw4m zKI94YhVXrq4pYwy0%9B+#hrZ3%G^ril=|Yk*pX%};}+%j_p1dU%S~Xh#Tc85^t&Sb zGq38NfuVxj-pu*>SG6QwrF0vRV}Xxf+Zb2oJ`Xs?k}MlA^G7Fx!Kf{JzN6!64|P{5 zV(a?NGrgn0_TI)BlkVA38GsJWEDaqad~ zz7Jhmr*Z^%iw>jgb%Ov=syFVmng0;!$NDV&m1T{`q0%y3PxEhu;YXBQNDup1TsR1M za%nhYwbDqWuBv?yjawkt@ArV%3v%Gc4bqs^4^mj##pD9F3B{b}XyZv;% z(%oMyBOv+CnkDMTp%W9vcA^AjDZ@lBSBUEWNo9=xit325V>hdgjufUi9=EwBRM^I5 zBKe}V3#^W(@GB2)F6(kL?uTUz{L9K}+b}7l|E}l5@-oLL@d!3>>AvG?;t;@C+ZefOE%3k(Vu=L!PzQJdR)CwD) zThrDGX~d?NUw!Xd-Nf(=cfML3*=>qykbiqs9Jo5%e8yL_3Vy8`R5KVHK6eW z(Z_va6z#4}A_~dT7`VT8l(^l0*>~g3!B*b{iVPe*4RRV^@=jOO&>^ex6kZE`=XC6F zmD5 zeoj2KLz_RKF8|OjY5!LT`p%r00OYQCv(_L8foe|RkfY9zk2u`dZn>It#CEdPQx5pf+1wA-90SCP>lQ>zBRyIz%+5#i76C4rV zOO;kHUF8FZ3-*=3K{{K)9-&x$#c$2kn6oJE1^K*VPDfuPO=IYZZ4zKkRZK+bVR3zX z3MNaMl<4$yYlz`{@gL#l6M3IpScQN?Rb6i+%Jg}OgkU_ zh(qr7nY*a28iocZiu~|rCMY{-LGfE+fjai!LKB>%_1&w;TtAjeZUj9za?JL3jiCpO zw^{M5;Cl$QjR;M4_{+nTE;RGVmt)cxJDp1$7Y-(hWQ67`xaij~(Qz9G{KQQtdn!smI2-C>MDO>}A%N?pO z)aBCr;#48x7T7HVr58+8^;zMk!Ysc{3uzIvYzyX+uyZtJDo-Zq7ij1KvM z-*D|EOA%_pqGH8;i4zC3tKC$mq#CA4%5RKOjyoUybou`Ahe)E0o_M_x@;2?fy(Gi@ zv^MBqmZ!2H?h?%sRBOVOe3ZU0X1+p&~}b4 zN-rEdc}m_T3lZJ2MS{o2Yj4DO@Z9nmx1E99S&B}I7f@|!g*Vdf-(6ok9}=b5d$z8t z<14HVx~v_p0>>-^#?Z~E%U$WM&ll!DeVe;xTt3~Xa`3HG`e3Wp;l*oN2uo*BS3I zG6QyrOci17+fDjtdc#P06y(NXX$|K25`u>Zf?SpNM)TwodL+m9jYTAmd4%60-RvTEjLp-)Ztx8J?x zbAn#)6QLfLD9i3%7#P0YX6$HwGqO!MGZsp~kt?u8KbG@p$IU|>v<|T_wCf~Lk3LQr> zwZbdp-_Vvo`+|&f4Q?0unVk!)Q@VlP&o2pe#jT*>o750Jh}9(%EN^aNG5uP4@I^Bk z=v_TxjQJc?tQ11DXyu8(G4a!acBKwLx&Huas@H?%vPtd_pR!R!CN$l2I4j{sxz0EDx;Iw#HeFl3h>XvT*Zj{$`}6%X z_3O?joSL^Gl+bv21B}?%7S4sv=Ins~C7hT3j1T9hs$df|g>r)waqTb1PIZj`I__Fq z{5|SS!*jDWo0OeL(8)U)0#`##4iS}3Ho#^Exz9s_b1AIWa=)_hPUH^1MyXI!{Iqi9@0|uKekvB7=%wBaI-9a**zE*NtXR7bxg%|+u z%DTmyoj}4F<^e7=N2No(YJQtgMfFJ zI5nL=Dvp0C*1u3w5X<$X{bQaw+kI8X>0XU4vz4mxT-oo85P?{teS0uhw1oJtbXF`9_W3XqmF2)} zc-k}GvcFqOtvCF>ip7;@htFQp4p7$jE*W5SX~rU&V5wz(AZzra16h-T9#d8zOH;AE z-=J`MzTp{pp>S{3;<*h%+Mw^Ix5+DNNr<|g&K0DXxU8Ua+AMEXH?=q~p}OK~3$-uQ zUw*FSSgpyhRe2ZX8)7cK0FnjO!CAG22mVQ5yfHv?+V9FPU1;!`2ij{zXOcMQE4Ebs z^v>8YEvY4gf;WqWgdJ;kMHNOL(f{<7g}RBI66Kvt!$fj}VVM?-4`cTqBtIiH-4i}q z{KT9V2nC<4{9vCbswIn09-f?MKQJueC|7u%2wZ@>DyParnL$cX zfCw%UAii4DX*=*hVNAQu{GFJ^f@K2)V@`QJl@1=LO9Tlw;y7QhC4>4!r9A*LrT3HB zYkC;l4^LOKSaqHTYR<)B&dWhIZQ z8N|nh@D~wJ3B74PH0d2)acC!$$+q^SO`aCM4uyr+qz@)rkJ9FfE+C8*^^gUFosA4U|?69NB(*^44RoN~3q^Szho44Hwd*Dz&^A zOo&)@!}8<1A)!8P=JMlrV4B&Rzdu{SbkE)%IqUJs!x2xY^pEuQ+!^aj!=T+#<%Wq! z?`7x>Oh=zj?Qhzq8Zb#q>C{0c@d$3sgo`~bN^|>k6l^lG)Bi5|NJoMpG$W{Eup+aJ ztNv?vzcTck0@ge|p{h^;4Aw9~qUIUj6XJ_}^!wqZ4dF!%z3`q|2S*)A07lQ`2wG(<5Szx&9sqf+bR`aMHGH#B3r;t>ul&m*#bTd- zcM-4FbKM@5`og38gTa%Ej=>IMz3^l@Qxp)rBb40w2Lpj-@9GOQ!8s^MLo{Pp`gC8( zJeL-5b^UQP`U}c0uQ7L4iu~cRIbWrqADruMzej7L)uOO7Quqq9*-YZ-59xu_p|=4u5a z`8yIuB&gq=_F}A#+c_oM1~3)3+{?vfKR8RY^-;*4iph&p2$lVdi|pzpd_sX6STL`= ztS82vUVR_*~?s{Og)^;Ue($#C&-{97O&b}2Sg*gtqDDPsc{0riaOzm}ark?MG}v(wN3)UXva zJpWQ`=BwERt%8&^O>}HLDc>hTKuLX~B;a+v{)t|9Rwl zI2j{zod02*PN*QG+ZOLue9{$jk{()$1l=xX)*^xPbbc=lCaInBo}v>>#9kWNr4Z8z zdwLxoEE*Q-U2ipKYj*^_rw#UFUl``lEfYV)%yQs39zw^k znRnIX(b^Wz+BeWs*?Avd$i`QZLG}q>+>IM(i=cFoRh<+p!^Gr9CI~Mv)+MtC0Rsx# z@d_I&Z8OAt@h7&r?~yR0L^y}ynn9b7usbA(;LHcrkrOAc#NTyew=>&e>vni!p?@BY zxyVN?H-zYJdcs0f^4t>D$v@9o>?%q&`I1@>NJ<7!`W$+^;=nuNt@dMqh{32496mOb zBp0#o>&NB`I`ziTIpEi}Tixo1E4QJz%`YZC%$cIV$cl_@2GwO@n_9bKy6I5rs+FCy(Sq%&Ny!@(P6e@Xp`R}GdVXb)6 zp!0asAm#EC)1Z&|aFY>l8Wg96Q#LkYANkWX2y|YCbL`%{SH0Szm?w{;0QFhrq#mk& zEYsp^v!_A3qN2fDik5%Z(gm8l{%gTXx%=_a;rc+4jZn?MWd=t~JPUX|9u;XC@<03d z{MWr^fT@We0m%6NV;%T!CE;50TFSf{kdt8GDn2^gg?_BB1%=zIj>mh`(}LPVwT8z} z4!^c9AOC70JYKzxnT{2G z`{!X~8ppvk!KXr>EDisrq{**;tb3GHENHq<2}Bh|annZMQ|fnMYA$}Zb1Q=vH%s%@ z4@R3~rsMAI^$P`}np;T@d31;2i%J99*XBEcg3uXjz`fQx^T#C?Ev8(sk6ULKy)DTi;`X9X6z130~>+xqodu{W!H6b z_z`UjY`6++pqtw=(q6mkGP-eGrv2w|33ak!4$E|Gn6z}62u4=I4yOc}Z}P{YWCNb~ z&s5t5nA`22GRVd_(``}WdR>Y^1}3N^l-g)IdC)^By{G2Uy(0Zm;T63!&)wKWxK33t z$8w#AXOv7)qZ7*X@`79y5fJqycH5y-3WHLm5=5ev=f6Dxv_i>x+*=mjzb9iswh&IO z*JYPE&NFR4|Fq=SDLI6pS0|G&S6*IaUqBaElOd6js^|^%>EdewB%_2aYE@cxIKjE<1|kWANC}~%7Kx_D?^>WS(&TGQ`4!8Qd6tmXqjT09=LEz|V()N{s(YPzR-=p~ zPP<477Lm-uv}XieuQZxoqV1<%AFUz2EQo3#vv&ErU2RGZG7UdP$89PJ4~+;rFIU27 z&V+al?kjCpQHCqAaKI3FM(q){*ZO;1Qhfo+ zh}xxfStU;n?DV(6Nlo9ycVZcPfW{M3HWqB$DiEEgQ7)#y*Z(!;^ZD%-tpmRLJ>cyG z(qZ3@zq8TkCGSAUvhQ)||GZ}`ug&;<&Y4G?9lkL_n3@QHutTm{w)ngE-2N-V7E;#5?n_N+9RH?{tPs;d7( z0t7|Ww5kt=88h@;B^d+k;e*@Kulm~}g}TG1nj93IvK%cvU8aznGFxe3Oi}v>R)Glv zg2uhb5Zjt{m>J?OaG>3%lxL+0v@&kvwWk-~=CxQ$iZ>?ie}>)DPW zZp+P~+nFso+cBmEWkvMqLi}ywQL7s+xW0^ksjIT1S|74*+V=7b znTKQ2?fBCArK0i-;amatWC`I{`_J^&C*$mQGm^*FvFYRZ?CUh#0nssCZ_=zxIM&0?n$UD~ zLluFNL=b`G2#I$gyh~Gv9H)_rE6^2I*Wdi5o5zWK>-AGBK}nITxGCqxa8pRdv&9O0 zI^9L%SEqTQ(?DXf*VQ}BQFw7%?EC}d*Cc|WMc41|%o02&hFs{Ygb#s-&*Jn$^jN;V zX1wCU!0DO((Gp`a{0Rpvl8|{(<*tuReRwYpPci77d58&o91Y>ax~Q-WktnjK2~rEc zO`&bRMZ@~G1GxX{Di&Vy$bX~ZBE|Q#yCr8Pi|6)*Qg4Epb6s~^u?Vy zYq8pezVS}VW_;3uuNvZ-KI-3GnEir$gYQ~~aUE#&D;XqHc*KsBD*BM$`=rB#e0(~8 z;)U)|ZQE*rL+SJ_QbAF9QM(kg>>h{0^=I(*TN5_hT@l(b;cD344tCx=Y@rj;_rYNu z%{WTM^B0_=Vy?S&BOF>oReqxM6%rHvlJsSc!Joo>jvusk)XD@ZpTD!#?YwvQY2x;9 zQ%vSK)qF(EIrP|$l2)2NhiRXM%6)wQH@^}>u>kUo`0 zUn^$C#7wZ}tyg-0w6g6GUyUPcZP)I|0EZ6?%eAJQf2}UHfK9PfX-0@ss_tDnnJifd zsJpGilV#uGvb|zT_`dv9N%;RwZOT{_hgW<~I~;Y}{louPaMI|l^nakydyV1bbOr@o zaYf7n#8n{3nvIlT`>QVNpE}YfKT-jy0c&|4CW; zkb9L}h`fL&-!vL7xhZ-D4sRy1EG+~3SnM5_v% z|679kTZ4R#H)k|gwfRuwJ<>)=(*`?jTaW8MPMHC19pWo+aMX>KFy`YLsQ(ByRYM|~ zo?TrD8&vb6$#uD*6nrOvIa&71>$r6R)k7e(($0<>VQcnh`+er3oma}XEk6GtBUFKt z>?`9USL{(s_q`5f%Ot2A5M=7}AERW1ws$WjcJ;Jcx(f_skBYGQx=a367@mYLC(C}8 zGiRa{gT}~Q`F$twagZR(nE7%|lOCp*wq`gfD&bwS{&7p^*YMq{3pZkGxSIE=k4p3r zmvi?jd&vLx1oFV%r^eppdo2;S*SI^^6ojTJX3PiFEW5_H*zU`?y!;oxE03E2h1GH| zyougOyfv+%Sq{pi(P3SevDfg}FKvha@l2yKMDbTrzx?^(HsezeCyS@ncu(R#BbkSX zqTeRefOd%03!!fxV|b}&(N>_!RPRB84DG9aAU}R8k{@>mC>_cU(mz@IJAhI&7v|=H z78vuL!$+6mM$R^fD%n1Rq~AJK+m%nO$)5oyI+e`};)QYsDqGAvTeE(D)5#FiCbPgx zrbO!jTFE}Cbj;dj4Bv!NTt9^uViEkXnVu(o$)21$=BLNEZH3>G*fD8*d_^u$zW??30zI2SOS?LMn~xyGdf=J{6&R`iJ>#YdO4#Ab_SP|n@2%;T6>dB zNdHkF#o{~#Y=DAnX)PnOGU@q9(Jkhnzl4SOZ$Se?t@prsw6u3hjULp_-yi#C*cs?) zntB(qC`P<8DGb-G%G$H~<3^Lz4qGhrmpkIxplE}o?zI+7Pf1rrlpxFRz21SmEJEv-BcW>+I2TsLbJ|P7PaD5V5t1t{p&o zg_v(v_RbcaNVT`~=s(OuTE(}nRjM;3;LLZPFFV8-q71&h^U@)TbKi31_~$k8w=Eb1 zThyVkT!d2{rDCN=yzfxNF&vGw;#S=z!2Od_ZpXqP^+3a>iT}6*vSh&I;`FP*{7?54 z7d>l;uf>^4Vzi6DT79Up8h>MGkN z6a2N=Qjs}M#`B;l*28yx@^~L`#mXtDUY6TOWQPAsp}+7yxS(RM`uq<9SQ4;;nn=Lc zf1-hvxcB4VM}V!*|G;&4CynfR4aD6Sdb+1lOw;rE4yPIU7KXOd59ea^>SEkm( zZ85YXCp%`zs|)pL4=aHw^}KxB&E8n5Vr^;9b7E@*wQ5?Q*OS9pb#mcgy1Q)P@~VQ! zf6N80k{~r&)We-$)A398SE5!*CIxik%7R>slz3L?5$}w}CRy$jU5J(>TiyCV@Sb*c zX8-$Ppm^9=igFXOy-B-_q6ekt3(yZyLbP;zU={Q`u+xUsw`lMQ90;pT36~qb*EIU! zvNO6SwbK5ff}}+UxV(V|%D1t6+>w4Q&|t;VEknVzgKr z<`a?ovza}<71g7ZM@IH7{2mRpRzwdq{S*JA2U?&Rv*lJt7qbUwzqDGf;uyb7vQMK& zUHp7U(Rt}q-^LeJ8>iC2CY}A-xUXT|o{L1>J(E#|h2Va0J_zEPeV0Zref*2dvF5c% zR~Q;ke7KEcOv@uohtlDDJJ%Rcy4%%=NrN$V7YKtsmGfu3DX= zM;7gx#)s+&sIZR|$lIVU{~<(lfKxn6VhzHGqTi|V2#_1l$Rx;;Qj8m-DR7s(fxaAR ztcH|%|LQA>z7vv|5Em-H*jHcG_DVu{tA3AGs){p5XJazma<*Q9w41Y1XcDfqhUOnUps_ z1FH?*eR3(QB)fH?p)buk%`A8;e=;{5Wtb6KPPW~rYznV97Zm$?$H((}#{>7CEe@f# zs}r#W5Z}!_xBYyPZYp<9J*exR6}pFvzCtT(^PTXOBcOjj_T#4ENP}3lEi$C z7Bp!UEq3(~Y!(Zxp`+0=r%CV4dXq9^Kl#O$Cm}XqI{nv{Ao@i?dq>SfR$1ZHGD{t8 zS3NTICl)!c3KO~_osWJ=RJORGe< z*98d>S>U5NZw`5?J!~<}Ty@1oV5#9@XUrD}`AyIuJ!k~USymB|r+RgHAWy2=5-j-}bVW4_w-d0yf3M4lw z-Gd973jef?c#A8)rMvxDtg-@AWAZtR>8W8DX0+VVX6+P}_Uv+kLVC$S>F2Cu&7`RI znF1psHbgm7vR6*I8O^tO_{7f+6ry11_|l%4)8~P2_L5=&@fu7n45|alYrCU#jAoDC zm*B39J$yf2S#Y{VZf5U>4pM1ts|OO9b-g5SO0zb3Iytl*YwCUr^J=~+BX>>{r4@MV z?4(F@sy2cEuqKqi$B}l(vqFu`o7Q|D{vhMWI({YK5BYna&yaD%vp0XC(R&gmy#wP! zZpaA2sNk6tA5hd95pTMi#U6Eh1>wO6cd^cSHndH`eM3zqGJD4oX3`F(j@9Kinxy%Q zmx6?@z=KM4Si9D{dKRhWv(6q`0P)uB(BM7uINTg+ct zP2c|&mCKt`Zb@T)4hna3dY1ndV^b^dsU&X@BLbmmEf@QhEEzv~n(UB2i0-DD(!6g% zh}^_bz^zD!og2rqPZYD*!(_rq(tXdg0SrSf^FtDV)7D z@UYM$>WBAz&YDYSh&p^|AmNXr;V*9aE5REGUM4uP?%%!Mky+u^;D9^K;C7$B5O2n)qD5^C^25 z%R@N&BZIL3+i?!FR6}ycEyUAasvB~FKm~mu=(wEacv+u@|8{rC4Bv-aX5ZH^?vk#X z+WlV=t+pS41D`0T9LjEWj(k{YV?lmDYsq53{0I*F0y;#KfR@-A`?!%$r9R7i0QWIf z%VL|jiHDgvlCOd#Vw)YqmN-!1tyeO44LL<~Uk_eCMpc!!rn8p6R(%zm7xFVhyhX^h zpUC1|s%LmwqcQ)OxQSgCS^Ce}~n$KYazze7iO-Up#+ z_eodYpLeI<*GB&{5(X|cNlB|RZ^8GPHkRA|-jofV`UsyS#1qsC;!fg+|0kETzp>N` zIf|dK|5btpVuZXS`>%Rp9V3B*&p_++fI{ghV9RIv zE-$mXrgOU!8&lNT_b`cu*iP&N!Swh#|LM;$*zL}w?*+egvUuU~a~qykFJKP*@i0(~ z?;)aQ53n&r#4>?$E-5Kukvi#xs{o*?>&( zPS|7UZIKH#vEAhf0&cIzAq++PJtw6vFVPjB@Hzc0qZyjhdMrJ>r?jLP+BZAmF=nOslrJa zcQV&{dt>-@Ky%>*iv|@qJ|-WvcKI&sMrG$b0imgm86%7fWjurl%AVWzUp5uIgD%qO zX(7_1-zZ`?9yGWABn1Iddqf@xik7}cU$HH7&}EL`8wYr%T)$T8CH`kt%W9KDJSC)xPAAStbBd8qAST^YU~b@xNE z0ot~rP=8NJ-(ARrLWaW+o=UP!UttXN@eHp&Ut6qmx7(!NO%OSqS|vkv;aJk9=4|efUO98F*i}_x3B}(+U^T8l>Rz zTgh;l&mIW`aKzQu$_l*Chv(j=`z+9f<5=EHZY9>ZO4Cj9bvorNv$&Hwts1i#&7paOB}FEGF!2k};0 zzgv|M|B{`*c*aNU;@WG_?R}*xZj;6HkCpmI8|*LF%N(JEaFGFWq2%!0XY+5tSn2{eg$%e29c&ws9!*?>DV!@WzzUQ7D=)ec#42hzgsInx+7K($~ zO({L9F?bBrxN$zcB+UwEUtKDDY?*82lMju#<-cY`Dj$eD@_Q9@zR)2t*)j6oq`C=D z)!HY{DL0oPM2|fsnlE^bEcR1TV5;H)e6OTST1o$qT;Rhypo-M|do($Cm3fMtIbb3G zA7mx{CnWiV=!u}@|7EhK^?N|p1e^9DLvwJ>a`M*m9l+V#Tie<->Ll}tC3GLWkIey~ zOIM-VNUBzj%9|5K_9(Af>e+6d#ypsd~6|bBmV=qcv zSqaqOTSCu{is9Bq8?dzWdY!sNu9Px*m88Z)u((Wdz!v`%Bl9G#Q3vqFm7mV7UN?K; zFZ=^B{xwjD-+KR?z%u7#lh}Ye+1Q;G!j8iJP^m$5`+#&!sc>|=`24i=ihX`?+lShk8Y+usgB-6eS>^B;v~ zM`1f6LBadV4chI|y{P9+uddkTY12K98eTJcsxZh1?CaPbeU4OP?U-bE79H_A0$P9? zULn+7T$$Y@FiV`_CCk>>SNgf0VZu>dTff%+MEu)QB+k65Mw{imJ$IYP4<#qp3RU|x zsFa_B>V^7?=?pHwpk8$CrwNGDMJ9z*O@+Q_C=3E8D6Y?izr@!(2;S3uH4E!23~oTTo&oz?l0{Cp_}|nZx5`C~LG|w@Ul1N2Tf0@(^q? zj(c!;i@s4v?14|l_E^Lmd|;sz^}PihClCZ3Z$qt~@ww~=cu+c?QJh)c?+odLe+Uo{ zs9z9Wvy#-pp-&}*0J2m76d1eKJ2OMny^wVEM_SlHuIJnXqJ)rrf)(4!eu4$Uaht6! z#;{^~dj!($HUXgO^m~x?mG{n$ksdk^thE!+Ujpke%m_h0lz$prUC6=JCDZW!!SP~3 zyW@B$oF~Y{EVSGFS-cGvYq{P$Fk;mk zxwUa?O@gTv9o=)O+3~0R9(RbnK*cmW$D*!kSq)0%g&MRZYo0j``jSmI1=R?N6xO1)I35lqYv^j^gY5C$E>Jjw-0-_)WqzMQ?V?aU=3Q8|ZCqOs?0qG@xgeoGS^iF6ZT?j>b3n0=7M5&=k4TL7W zNKxL#?|$ET-@W(UH^v*|<&Qlw##(Fdx!0a+{^tCxwdPtF=WLKbi6S8Oe~oT$-R$>@ zKIxV?LKX`^N;GPkdAqXs#7rDkJiqgnSyY1%dn32d-+ig>skw1GF2s{(A4I%B4^+h7 zWxe- zi4!_CksadED3g+M?F(`gP4D2}`Y4lKWO9Vm@ic(N4`R^++b~NkjATQ=Nn5jeN$rkJ z@hur)Zkr}=q2(&DuozjJCH3rK>coV&pEJsi?L0Bn*)Fan|cz)R-{2e6unIngA9 z+xHf0_a17NPeq%w{QW)Pu3n$a2pUtzD#%zT|F=Ci!TmP=GVhYOX7%e4Ngix`#L4Lq zOmk}&@yZL|&TPj&7fC`}#8(kX!!6C4xykzBHKreJa~`!gDx%AW+MOmpKMm{XnNAI7 z@$=f4g&QK(4~%<5164*Xh(j*oAl1$NLUr-v>mt{>f}$t?Dg$WWmx14+D#vUPznWDNT>WRZcyQ>ycf&nejzAvEW*j`4JN$F@( zQTWpAZrdy4F+t`xsz%n(i~GR(zGmV|)W}h%*=*y+Q2ROkHb*O$^5s$gQhZ=ml<)o~ zzD(F^u1z*X#&To!h8M>j9kf|{C;BZlrM z0LkZ<>MA|A5Q!L&o~O2Y#X*JS3w!0GgmDL_GoK=xMN)69e^E=m$09;?7CLYN5@gs3 z#s*@S0$5svpq5Xxk;u~K^Hjn2;L(1*4fuQEEowbfLJT548)n!*{42vZNpKHn&lPiO z(fRpnAY&@0i$d;yF`H3aa#BduK1Z=K!C@D1yIuvsqT@M3QW zb_u)_9Y}Zg^;hWUxW0cY`MY{jqa26f$h6!=b{;iWpv~VU|LI0;Q`3j*sHG>ksdP+$ z{?q5%928`x-{+$@>+_?vm$_VRlrQIzn$ou>G!C`F`VuGn_&uUgoRbH2;#O|`n z6hDuqv^g|hw=QAZGmAyT9F5@N#4J^FJ^`8AOQprVEgqZuQQ@rknz%%LMAokl+g9#H zadDLy#P9|2y6`eAblWyHku*0WF=nTvyK-J8L^Bv(*5l~ZX8OeUSsInCORaLBpvcx* zN?Pp0OXoJF*z#aG0oAXKBdb^V*TNaVd@8%q7lSl zSR3kMnSR;FqSh`zRnq81uB*eCL79|)kuZ3@aa3Zd^DEMZry+6Gz(f}9`1-4xZc1mU zF>Ogsae~!wrp-;UF$UA9E?JGq%&+YYz1I(7HkC^TYV7BQ+tW(R(GR$Wf*fRfuk|oR z^0SL@vl5d1j&(-iDmL?~5SCXpSYy0EaSp$+c(u}(z5x&Ea6I^ti-a5iy9o+BMClTKkHpkw%6Rjm`%9gI7 z;yc#!I9`6tcCPYJvVEl@IU-4ScA!rTWhW2voQ#Lr{#N&Hv!Dt0_}%EfC|S=*{6g0T zqRUo3KXGocs-V4RxpWv5OB!xBXnY$lb8g72yY^QhoPEu{b^~Nwr}onzy zOB}VR?(lrTxBgpiD66%f4ZsDyTX|@F)?ko_5~-(8z`~sGu*u`a*Y$f24d=_nf*%!a zKeXLar?(tGcf)xL&@64TWJnn%UWbs#m0nC%w9`&YteQwQ*4_1V)$eUr6C9sa7^c|R zg-+Q003(mFpF@xOed1s=k18FT4g3l*=pI64(Ig_DvR=fdSe+P=q(1vrcz8ClN+^Fe zg$F5JMlBwz8Ql2M*g620aIr{Oanh$8opCM?R>SvO+oiP=y6UVuaR_>uEc!=A4b$gk zL=-7eQqlZ$9-eNOm)gQ?yh1@zP4Bf;I>Lmyi5qSQ<@pKv$i_Fn+VesKojV*Af3khk zD~IyLTwt#y3d$VQww*^Fe~c;BDA_bKt&3E!`03o4qI(&$T-n#{?CLd&Wkn1WyHNC* zI3_(_%&l6$*$-P>0_lo5zcIQy4+xfTjS-2X#PW{*nPrcKQynS5cG@=<3%Y187fG1L zC9!GRRC0~ciYrN+=mQH)^uI2V0+^FhW2*r0FRhI7%vA^puqCGgYgBhsq5o+>%FuM% zOaX=SHbp&$k8$BWG=h^sYY@OxvvEX_oCla**hkp3zm7q&b&9rS{#BI7=j4j)5^p29 z%e;iy`&f_{`p(1c$NMl+%aldqn9WV%c&WR&(yfE*W1Tl!B~KN2WDKy^#WpKhA$2Pw zDovM5;&0gB(na6+_E^%=({`*+0^w!5P@7-K$YZs$Mu4le?@55Rvez0V(~xQ~a| z%}|q$NNA69TYSti!6M*vh^eOGXeOmdj&rC>BSg?zQ&!M2%m7emA)USk#Ld_)ihKV zfB&g|ulsvCagNDb*E)n#A+nuH;-TW#t_vGF220+x&@ePrO|x%|k{tT1Ec;JcE(6%+(5CYq&gq$v!Up{Emyi=31l+itlX7q^$iSAne*TPogl!td zYS;RKIEz?WnJsEMJHpO4x`t|>TZAo*z}V&I*utGYZDMBBOcVEzLR)bruT?{fz_0vF zXbOye)Y{rFsV~$~DNgg@k`dnz6u0KKqBI@-gVK*P88w{ir@h0sRWL>^Ehq+H_z$C( zEc*h7N<0$HlB~2I&K!^uUn&{ zth?(UwrkR|$#%k(os<>oJwo&=r>IlomuE{P75|8%K(H8|xQ5p7GPiOe-TUZ3HF7I) z?wNTIrs>wosRYzp1>@AxLVyIiK`gR&f#;4ZN??$s8q{YXYjg8M_9vbARoWo2QYE@K zt{;vzVq$cV#ct%L(ySai89h?>10ku1GjJKI8c@#6o43UW>uO_3U3w$i^)ToC@t51P z>RQpuY>W))j_Si1y>~Ro21D_eydxEBHP%WymSBmWth>w=idg^R`7O+EWeJ}Dhq3@4 z1|KQChyAk68%+tnutHvc%q-$j8k(qI{cXCx^+IH&JdPexFiURJcf2t(H@kX7{ z+PNj{Ah9J>Lr#}s^6N5*aHvS$t!#Ur??v@Vy^A&v`$RQ1&e()@P?6T%i1zl9t*8B` z+UQN!{9Z=Jx4gOuLv$#4ri00LxK-!uhBuQ0*#fF~1hqAg)LfZ1F!BKZPFYki^ffSU3UyJTPu8ewj6P z23;uz*^;;UD(@P_?Nh zHMRINMR|!|D$i1Ri>b669D8C^4o-IB^G8a@WxLkU(I9+UbfPc>#QnJ?6 zd`)WgU`1k0JXcbcJCPky(M>fi6qjpEAgL1W=f2xxLv_<=ky4IlCM}?I5h*M*;iV-= zrYEswYO@53JF!xwn@U${KsQ5Z>^Qn+@B7vr6{FkwH@k$sT5@C{8cy=cy0=;6|)X=n`o37=zvOEt~RFK$n!-Ru|cML0sVV*PNm8js=1^K(~! z!a*JJ{-us>32eETK#BRyPIF6EVDDSUUSAOT0m^=u5a@{kJLU+@&vLgfry5@op;_`* z{Do_0f&AJV0s2n008`mVOpg;Dwn8INH;hVL9BaVeu}{rMwnlCYe=%})2acybT=ew| z0K2{UKEXXh7LTt6I!(~G$S){XV)f@Or(!I7g9fE2&Y?ygBXx<+7ti#--FkLJ? zhqy9;~ih7X!t8Bn9MD< z`I|~Sg$6^;m`B$0kqqdP%7}Pr)=>+m<=r*97d!-9_~i9J&ou(aZyp>x0$FWYmqcpR zHR~GDR-X17If%)n+9o+M!J3SRT6>dcI?ZyomHRbpSLH`7Ku71Qg|_f@PJAKQXS&E~ zNYuG&>2Yj}vbUCupVCskp9L9HEMS&@V7^AL6jlXu0Ima%d4oSoy@ij(b4n!YORQ24 z;S8i`Fr{8>;tFeU*s?*SXJO_B8d5CXB%7$o;&W9Ta0dGgjkJ>5v~aXk!~oIM)rSgO zljL1qloWwqqYE@^(O?pn;*F%X;tov`7yHnqbv&7-h%lFBmURhl^u@@06n|NtWzGk( z9ob;^i^iJ6@&rQ%`@(U0lg##=4T84vHDEM!uJlJ4Q4#;cD}ZG`e?iy3H@#gFwm^R1 z7deb}VOdjT7DISOVt zxart8EcBbGSn`1ZVf;ZQo%l6j_+9tHZ`ZHjgDvnsq{=&Jk;Ylc`sX!%=?s_F=BO5#;DOapO*7m> z0Y3cS81%Pae(2%TO3gyWb~cw5@Xh~)LI3XZ`!+N*_%t*&w$t6sfB7$Z?)BHC-ObHt zfg@^C$%zjaz<1orol8^vz-LB%O2>u5J{B85nsl$X@v1U`Wh!a1 zOe6&#-=BvaIF=pF=R8&0h08pzZ>q!)4#l^F6dXu+UX`Pu-K*OC^&0sHaD_3&VCBPa zQtQME>nq~tyHcUXxo*!uC!Q23=irFwFnM>4cK?Z9ot?gdVM&}WpUuIdli}AW&2i5k zJ2NqjyTp2Df0isx*5Fk(q;cn7UwV|PK$VfN)GP)j<`?W~G_C^76d%LDwhfA|Z{(VS zIf~!Cn(VF3PJC~LcHmGvsr$ld!mm7L9UsV2+Tg0O+$|Bk4==t`o0n7CJoKkolAX@0 zw!;}ejzC}J;MXYKH78F436+_jT86K)G^C9FTuM-q-)*8?uchqKDSpZ7T->Ea#- zikBQ7JWAT#S5E`pZSzJ1EB<;>84xQa_WNA*QJ>cmm3`&;jq-l@8Ne(h@EsGr?*!Z0VEnLK|4P=?^=CD90fZE3 zt*zbSU9w4RS}d?lZD`#I|BKt1d;E&0%w(}Cld#QJ1Lx~vp01`H#Fj+LRbIuk2|JAH z_ZvkX=}#0PZ$e#;;5&~ZOeb_DEguQ*3B&z9V^QYj?Sd%~1PZ}Ym0_;K?B%>>Ncm9l z;8AsKIO`XA-@2gf0__Cy;Uc11cX;PTPE*>2uTwM-g>rzjDf7t|P3F+!dB;Bff&7@@ zI4`41>RIIb6UyXkr!o~6v3yQ_(Kbsz?d@`_%9NLY+TDRaTc_J7jHDa>HOOvh!%bpjWb_^S0Bl*z?vH5ao$f01!^El#Lcei~G+K5Pt4xFTS zQBQ8i%E*Ysou(J?6#7?WrVcy1xb^DOH}V@$Ks;}k)Z$xL|5g7^6B-(!&nyyiL^L>Y z4PO{~ZWtG`FQo5@QpRb%K(`IH&znXgMD!)&_E6t|b`A~& z)KN$Yn&S1h`*`-iZ1QOpZdiQNz=-_&JBWSC_j%f2oF_km`sWq4bo9te`e@J`*nK%ri*5jhzF+SJZzBMZ~X zJm775UeS~$AFR^eBW2pd=n0H5oZ3paz1<003sH0N$r6?^eI}hh{ss}(T$Yf&kB3*K zxa4HmG<>}Y_rY(N-3Q}qN)Sk1`%>JsL+93Qn_4dWz2VQBi1kT5K90W4Kj*Ia*7Spn z%aX2ke7$Xd&oIYPf2bQl8tRV8Hp$Oh=Snf6OikPH%B>?bCq+jkq?AX^V^<}ULfe?- z7WuA3DwUYSacuGU9q-KfJKQn{swQI9Z5%fC*Gr>`L!Z%#yq_<=Vp zqDZ(1T){#RP?-r#{Lk6QBEwDoDPqmm()Nm8#`iD?G!0h zX`QjH9D}06raMQsnJ@7Gqy5&;wY2?(i!ype&V#7OQY;nGx9m|A-Jd(}1A*c5!dvEJ z)Jgu>VP|JAP9;gE_yB_|tampaZ|atQ`Aen0dJ-dMd3U2PGmU0UDW+;;_DJ(* zvtNP4_zx6ikXv-*1wed%#}V~eK~_}+8W8mcyR?Gvd98WE7OnpqqeXmPPjNr}uP9C9 zGT)rCq yhx@@o^{J8};0)IP|J}p+zmS^RdwnF*$(aZJv9lZL_bF7sPx-ONqhh(gUi~k None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "dataloader.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=True) + + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_no_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_found_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") + 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertTrue(checker.dataloader_issues) + + desc = self.rule.get("problem").format(dataloader_duration=dataloader_duration / 1000, + dataloader_duration_threshold=self.rule.get( + "dataloader_duration_threshold")) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, dur, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["dataloader"] = [TimelineEvent({"dur": dur, "name": "dataloader"})] + return dataset + + +if __name__ == '__main__': + tester = TestDataloaderChecker() + tester.test_no_dataloader() + tester.test_no_slow_dataloader() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py new file mode 100644 index 00000000000..d1df810a0ec --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py @@ -0,0 +1,62 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSyncBNChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "sync_batchnorm.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_syncbn(self): + dataset = self._get_mock_dataset(1, is_empty_dataset=True) + + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_syncbn_not_reach_threshold(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") - 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_found_slow_dataloader(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") + 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertTrue(checker.syncbn_issues) + + desc = self.rule.get("problem").format(syncbn_num=self.rule.get("max_syncbn_num") + 1) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, syncbn_num, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["sync_batchnorm"] = [] + for _ in range(syncbn_num): + dataset["sync_batchnorm"].append(TimelineEvent({"name": "SyncBatchNorm"})) + return dataset + + +if __name__ == '__main__': + tester = TestSyncBNChecker() + tester.test_no_syncbn() + tester.test_syncbn_not_reach_threshold() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py new file mode 100644 index 00000000000..360363ce371 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py @@ -0,0 +1,55 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSynchronizeChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "synchronize.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_synchronize_stream(self): + dataset = self._get_mock_dataset(1, [], is_empty_dataset=True) + + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def test_max_synchronize_stream(self): + dataset = self._get_mock_dataset(100, [], is_empty_dataset=False) + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def _get_mock_dataset(self, total_count, slow_synchronize_stream, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["synchronize_stream"] = TimelineEvent( + dict( + total_count=total_count, + slow_synchronize_stream=slow_synchronize_stream, + rule=dict(max_synchronize_num=10, problem="", solutions=[]), + ) + ) + return dataset + + +if __name__ == '__main__': + tester = TestSynchronizeChecker() + tester.test_no_synchronize_stream() + tester.test_max_synchronize_stream() diff --git a/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py new file mode 100644 index 00000000000..51acf3b8e24 --- /dev/null +++ b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py @@ -0,0 +1,145 @@ +import os +import shutil +import stat +import json + +import unittest +from profiler.advisor.interface.interface import Interface +from profiler.advisor.common.analyzer_scopes import SupportedScopes + + +class TestFrequencyAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT" + DEVICE_DIR = "./ascend_pt/PROF_000001_20240415174447255_OAANHDOMMJMHGIFC/device_0" + interface = None + err_interface = None + + def tearDown(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + self.clear_htmls() + + def setUp(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.TMP_DIR): + os.makedirs(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.OUTPUT_DIR): + os.makedirs(TestFrequencyAdvice.OUTPUT_DIR) + if not os.path.exists(TestFrequencyAdvice.DEVICE_DIR): + os.makedirs(TestFrequencyAdvice.DEVICE_DIR) + self.clear_htmls() + + @classmethod + def clear_htmls(cls): + current_path = os.path.dirname(os.path.abspath(__file__)) + for filename in os.listdir(current_path): + # 检查文件是否以“att”开头 + if filename.startswith("att"): + # 构建文件的完整路径 + file_path = os.path.join(current_path, filename) + # 删除文件 + os.remove(file_path) + + @classmethod + def get_basic_trace_view(cls): + # Python pid + py_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 1, "args": {"name": "Python"}} + # ascend pid + ascend_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 4, "args": {"name": "Ascend Hardware"}} + # ascend pid + cann_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 5, "args": {"name": "CANN"}} + # ascend hardware ops + ah_event1 = {"ph": "X", "name": "Slice1", "ts": "1699529623106750", "dur": 100, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + ah_event2 = {"ph": "X", "name": "Slice2", "ts": "1699529623106888", "dur": 80, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + # flow event + flow_event_s = {"ph": "s", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "200", "args": {}} + flow_event_e = {"ph": "f", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "1699529623106750", "args": {}} + return [py_pid_data, ascend_pid_data, cann_pid_data, ah_event1, ah_event2, flow_event_s, flow_event_e] + + @classmethod + def create_info_json(cls): + info = { + "DeviceInfo": [ + { + "id": 7, + "env_type": 3, + "ctrl_cpu_id": "ARMv8_Cortex_A55", + "ctrl_cpu_core_num": 1, + "ctrl_cpu_endian_little": 1, + "ts_cpu_core_num": 0, + "ai_cpu_core_num": 6, + "ai_core_num": 25, + "ai_cpu_core_id": 2, + "ai_core_id": 0, + "aicpu_occupy_bitmap": 252, + "ctrl_cpu": "0", + "ai_cpu": "2,3,4,5,6", + "aiv_num": 50, + "hwts_frequency": "49.999001", + "aic_frequency": "1850", + "aiv_frequency": "1850" + } + ] + } + with os.fdopen(os.open(f"{TestFrequencyAdvice.DEVICE_DIR}/info.json.0", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(info)) + + @classmethod + def create_non_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"ph": "X", "cat": "python_function", "name": "aten::slice", "ts": "200", "dur": 100, "tid": 2, + "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + py_event2 = {"ph": "X", "cat": "python_function", "name": "slice", "ts": "199", "dur": 200, "tid": 2, "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + # with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/msprof_20240415174455.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + @classmethod + def create_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"name": "AI Core Freq", "ts": "1699529623106000.061", "pid": 682820896, "tid": 0, + "args": {"MHz": 1850}, "ph": "C"} + py_event2 = {"name": "AI Core Freq", "ts": "1699529623106770.541", "pid": 682820896, "tid": 0, + "args": {"MHz": 800}, "ph": "C"} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + def test_run_should_run_success_when_msprof_not_contain_frequency_data(self): + self.create_info_json() + self.create_non_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(0, len(result.data.get("AI Core Frequency", []))) + result.clear() + + def test_run_should_run_success_when_trace_view_contain_frequency_data(self): + self.create_info_json() + self.create_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(2, len(result.data.get("AI Core Frequency", dict).get("data", []))) + result.clear() diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py index 44d97b248e6..80734635929 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py @@ -24,6 +24,11 @@ class ProfilingParser(BaseProfilingParser): self._enable_operator_compare = True self._enable_memory_compare = True self._enable_communication_compare = True + self._enable_kernel_compare = True + self._enable_api_compare = True + + def _update_kernel_details(self): + pass def _update_memory_list(self): pass -- Gitee From 46a6ec6b2c531e28dcc0e4809227c490ff04da25 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Mon, 5 Aug 2024 21:14:08 +0800 Subject: [PATCH 040/333] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96ut=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/graph/node_op.py | 2 +- .../builder/test_graph_builder.py | 52 +++++++++++++ .../builder/test_msprobe_adapter.py | 73 +++++++++++++++++++ .../compare/test_graph_comparator.py | 32 ++++++++ .../compare/test_mode_adapter.py | 61 ++++++++++++++++ .../visualization/graph/test_base_node.py | 64 ++++++++++++++++ .../visualization/graph/test_graph.py | 50 +++++++++++++ .../visualization/graph/test_node_op.py | 28 +++++++ 8 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py index ed06e0ef733..1629caabd19 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py @@ -34,4 +34,4 @@ class NodeOp(Enum): pattern = op_patterns[index] if re.match(pattern, node_name): return op - raise Exception("Cannot parse node_name {node_name} into NodeOp") + raise Exception(f"Cannot parse node_name {node_name} into NodeOp") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py new file mode 100644 index 00000000000..66eceea4b2a --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py @@ -0,0 +1,52 @@ +import unittest +from unittest.mock import MagicMock, patch +from msprobe.pytorch.visualization.builder.graph_builder import GraphBuilder, Graph + + +class TestGraphBuilder(unittest.TestCase): + + def setUp(self): + self.construct_path = "step/rank/construct.json" + self.data_path = "step/rank/dump.json" + self.model_name = "TestModel" + self.graph = Graph(self.model_name) + self.construct_dict = { + "Tensor1": "Module1", + "Module1": None + } + self.data_dict = { + "Module1": {"data": "data for Module1"}, + "Tensor1": {"data": "data for Tensor1"} + } + + @patch('msprobe.pytorch.visualization.builder.graph_builder.load_json_file') + @patch('msprobe.pytorch.visualization.builder.graph_builder.load_data_json_file') + def test_build(self, mock_load_data_json_file, mock_load_json_file): + mock_load_data_json_file.return_value = self.data_dict + mock_load_json_file.return_value = self.construct_dict + + graph = GraphBuilder.build(self.construct_path, self.data_path, self.model_name) + self.assertIsNotNone(graph) + self.assertIsInstance(graph, Graph) + self.assertEqual(len(graph.node_map), 3) + + @patch('msprobe.pytorch.visualization.builder.graph_builder.save_json_file') + def test_to_json(self, mock_save_json_file): + GraphBuilder.to_json("step/rank/output.vis", self.graph) + mock_save_json_file.assert_called_once() + + @patch('msprobe.pytorch.visualization.graph.node_op.NodeOp.get_node_op') + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.get_input_output', return_value=([], [])) + def test__init_nodes(self, mock_get_input_output, mock_get_node_op): + GraphBuilder._init_nodes(self.graph, self.construct_dict, self.data_dict) + mock_get_node_op.assert_any_call("Tensor1") + mock_get_node_op.assert_any_call("Module1") + self.assertIs(self.graph.root, self.graph.get_node("TestModel")) + + def test__create_or_get_node(self): + node_op = MagicMock() + data_dict = {"node1": {}} + node = GraphBuilder._create_or_get_node(self.graph, data_dict, node_op, "node1") + self.assertIn("node1", self.graph.node_map) + self.assertEqual(node.input_data, {}) + self.assertEqual(node.output_data, {}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py new file mode 100644 index 00000000000..12ae24279fd --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py @@ -0,0 +1,73 @@ +import unittest +from unittest.mock import patch +from msprobe.pytorch.visualization.builder.msprobe_adapter import ( + get_compare_mode, + run_real_data, + get_input_output, + compare_data, + format_node_data, + compare_node, + _format_decimal_string, + _format_data +) +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestMsprobeAdapter(unittest.TestCase): + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.task_dumppath_get', return_value=(True, False)) + def test_get_compare_mode_summary(self, mock_task_dumppath_get): + mode = get_compare_mode("dummy_param") + self.assertEqual(mode, GraphConst.SUMMARY_COMPARE) + + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter._do_multi_process') + def test_run_real_data(self, mock_do_multi_process): + run_real_data("dump_path", "csv_path") + mock_do_multi_process.assert_called_once_with("dump_path", "csv_path") + + def test_get_input_output(self): + node_data = { + 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.int64', 'shape': [5], + 'Max': 2049.0, 'Min': 0.0, 'Mean': 410.20001220703125, 'Norm': 2049.0009765625, + 'requires_grad': False, 'full_op_name': 'Distributed.broadcast.0.forward_input.0'}, + {'type': 'int', 'value': 0}], + 'input_kwargs': {'group': None}, + 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.int64', 'shape': [5], + 'Max': 2049.0, 'Min': 0.0, 'Mean': 410.20001220703125, 'Norm': 2049.0009765625, + 'requires_grad': False, 'full_op_name': 'Distributed.broadcast.0.forward_output.0'}, + {'type': 'int', 'value': 0}, None] + } + node_id = "Distributed.broadcast.0.forward" + input_data, output_data = get_input_output(node_data, node_id) + self.assertIn("Distributed.broadcast.0.forward_output.0", output_data) + self.assertIn("Distributed.broadcast.0.forward_input.0", input_data) + + def test_compare_data(self): + data_dict_list1 = {'key1': {'type': 'Type1', 'dtype': 'DType1', 'shape': 'Shape1'}} + data_dict_list2 = {'key1': {'type': 'Type1', 'dtype': 'DType1', 'shape': 'Shape1'}} + self.assertTrue(compare_data(data_dict_list1, data_dict_list2)) + + def test_format_node_data(self): + data_dict = {'node1': {'data_name': 'data1', 'full_op_name': 'op1'}} + result = format_node_data(data_dict) + self.assertNotIn('data_name', result['node1']) + self.assertNotIn('requires_grad', result['node1']) + + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.get_accuracy') + def test_compare_node(self, mock_get_accuracy): + node_ids = ["node1", "node2"] + data_dicts = [{'node1': {"input_args": [], "input_kwargs": {}, "output": {}}}, + {'node2': {"input_args": [], "input_kwargs": {}, "output": {}}}] + stack_json_data = {} + result = compare_node(node_ids, data_dicts, stack_json_data, False, False) + mock_get_accuracy.assert_called_once() + self.assertIsInstance(result, list) + + def test__format_decimal_string(self): + s = "0.123456789%" + formatted_s = _format_decimal_string(s) + self.assertIn("0.123457%", formatted_s) + + def test__format_data(self): + data_dict = {'value': 0.123456789} + _format_data(data_dict) + self.assertEqual(data_dict['value'], '0.123457') \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py new file mode 100644 index 00000000000..bece5380f04 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py @@ -0,0 +1,32 @@ +import unittest +from unittest.mock import patch +from msprobe.pytorch.visualization.compare.graph_comparator import GraphComparator +from msprobe.pytorch.visualization.graph.graph import Graph +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestGraphComparator(unittest.TestCase): + + def setUp(self): + self.graphs = [Graph("model1"), Graph("model2")] + self.data_paths = ["step1/rank/dump.json", "step2/rank/dump.json"] + self.stack_path = "step1/rank/stack.json" + self.output_path = "output/output.vis" + + @patch('msprobe.pytorch.visualization.compare.graph_comparator.get_compare_mode') + @patch('msprobe.pytorch.visualization.compare.graph_comparator.load_json_file') + @patch('msprobe.pytorch.visualization.compare.graph_comparator.load_data_json_file') + def test__parse_param(self, mock_load_data_json_file, mock_load_json_file, mock_get_compare_mode): + mock_load_data_json_file.return_value = "data_dict" + mock_load_json_file.return_value = "construct_dict" + mock_get_compare_mode.return_value = GraphConst.SUMMARY_COMPARE + self.comparator = GraphComparator(self.graphs, self.data_paths, self.stack_path, self.output_path) + self.comparator._parse_param(self.data_paths, self.stack_path, self.output_path) + + self.assertEqual(self.comparator.dump_path_param, { + 'npu_json_path': self.data_paths[0], + 'bench_json_path': self.data_paths[1], + 'stack_json_path': self.stack_path, + 'is_print_compare_log': True + }) + self.assertEqual(self.comparator.output_path, self.output_path) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py new file mode 100644 index 00000000000..7883a09a341 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py @@ -0,0 +1,61 @@ +import unittest +from unittest.mock import patch, MagicMock +from msprobe.pytorch.visualization.compare.mode_adapter import ModeAdapter +from msprobe.pytorch.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.pytorch.visualization.utils import GraphConst, ToolTip +from msprobe.core.common.const import CompareConst + + +class TestModeAdapter(unittest.TestCase): + + def setUp(self): + self.node_op = NodeOp.module + self.node_id = "node_1" + self.node = BaseNode(self.node_op, self.node_id) + self.compare_mode = GraphConst.REAL_DATA_COMPARE + self.adapter = ModeAdapter(self.compare_mode) + self.compare_data_dict = [{}, {}] + + def test_add_md5_compare_data(self): + node_data = {'md5_key': 'some_md5_value'} + compare_data_dict = {'md5_key': 'expected_md5_value'} + precision_status = ModeAdapter._add_md5_compare_data(node_data, compare_data_dict) + self.assertTrue(precision_status) + + @patch('msprobe.pytorch.visualization.compare.mode_adapter.ModeAdapter') + def test_parse_result(self, mock_mode_adapter): + mock_mode_adapter._add_summary_compare_data.return_value = (True, 0.5) + self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE + precision_status, precision_index, other_dict = self.adapter.parse_result( + self.node, self.compare_data_dict) + self.assertEqual(precision_status, True) + self.assertEqual(precision_index, 0.5) + self.assertEqual(other_dict, {}) + + def test_prepare_real_data(self): + self.adapter.is_real_data_compare = MagicMock(return_value=True) + result = self.adapter.prepare_real_data(self.node) + self.assertTrue(result) + + def test_compare_mode_methods(self): + self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE + self.assertTrue(self.adapter.is_summary_compare()) + self.assertFalse(self.adapter.is_md5_compare()) + self.assertFalse(self.adapter.is_real_data_compare()) + + def test_add_csv_data(self): + compare_result_list = ['result1', 'result2'] + self.adapter.add_csv_data(compare_result_list) + self.assertEqual(self.adapter.csv_data, compare_result_list) + + def test_add_error_key(self): + node_data = {'key': {}} + self.adapter.compare_mode = GraphConst.REAL_DATA_COMPARE + self.adapter.add_error_key(node_data) + self.assertEqual(node_data['key'][GraphConst.ERROR_KEY], + [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO]) + + def test_get_tool_tip(self): + self.adapter.compare_mode = GraphConst.MD5_COMPARE + tips = self.adapter.get_tool_tip() + self.assertEqual(tips, {'md5': ToolTip.MD5}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py new file mode 100644 index 00000000000..544950f3588 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py @@ -0,0 +1,64 @@ +import unittest +from msprobe.pytorch.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestBaseNode(unittest.TestCase): + + def setUp(self): + self.node_op = NodeOp.module + self.node_id = "node_1" + self.up_node = BaseNode(self.node_op, "up_node_1") + self.node = BaseNode(self.node_op, self.node_id, self.up_node) + + def test_init_and_str(self): + self.assertEqual(self.node.op, self.node_op) + self.assertEqual(self.node.id, self.node_id) + self.assertEqual(str(self.node), 'id:\tnode_1') + + def test_eq(self): + other_node = BaseNode(self.node_op, self.node_id, self.up_node) + self.assertEqual(self.node, other_node) + + def test_get_suggestions(self): + self.node.get_suggestions() + self.assertIn(GraphConst.SUGGEST_KEY, self.node.suggestions) + + def test_set_input_output(self): + input_data = {'input1': 'value1'} + output_data = {'output1': 'value2'} + self.node.set_input_output(input_data, output_data) + self.assertEqual(self.node.input_data, input_data) + self.assertEqual(self.node.output_data, output_data) + + def test_add_upnode(self): + self.node = BaseNode(self.node_op, self.node_id) + new_up_node = BaseNode(self.node_op, "new_up_node_1") + self.node.add_upnode(new_up_node) + self.assertEqual(self.node.upnode, new_up_node) + self.assertIn(self.node, new_up_node.subnodes) + + def test_add_link(self): + other_node = BaseNode(self.node_op, "other_node_1") + ancestors = ['a1', 'a2'] + self.node.add_link(other_node, ancestors) + self.assertEqual(self.node.matched_node_link, ancestors) + self.assertEqual(other_node.matched_node_link, ancestors) + + def test_to_dict(self): + expected_result = { + 'id': self.node_id, + 'node_type': self.node_op.value, + 'data': {}, + 'output_data': {}, + 'input_data': {}, + 'upnode': self.up_node.id, + 'subnodes': [], + 'matched_node_link': [], + 'suggestions': {} + } + self.assertEqual(self.node.to_dict(), expected_result) + + def test_get_ancestors(self): + expected_ancestors = ['up_node_1'] + self.assertEqual(self.node.get_ancestors(), expected_ancestors) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py new file mode 100644 index 00000000000..19d09874345 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py @@ -0,0 +1,50 @@ +import unittest +from msprobe.pytorch.visualization.graph.graph import Graph, NodeOp +from msprobe.pytorch.visualization.graph.base_node import BaseNode +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestGraph(unittest.TestCase): + + def setUp(self): + self.graph = Graph("model_name") + self.node_id = "node_id" + self.node_op = NodeOp.module + + def test_add_node_and_get_node(self): + self.graph.add_node(self.node_op, self.node_id) + node = self.graph.get_node(self.node_id) + self.assertIsNotNone(node) + self.assertIn(self.node_id, self.graph.node_map) + + def test_to_dict(self): + self.graph.add_node(self.node_op, self.node_id) + result = self.graph.to_dict() + self.assertEqual(result[GraphConst.JSON_ROOT_KEY], "model_name") + self.assertIn(self.node_id, result[GraphConst.JSON_NODE_KEY]) + + def test_str(self): + self.graph.add_node(self.node_op, self.node_id) + expected_str = f'{self.node_id}' + self.assertIn(expected_str, str(self.graph)) + + def test_match(self): + graph_a = Graph("model_name_a") + graph_b = Graph("model_name_b") + node_a = BaseNode(self.node_op, self.node_id) + graph_a.add_node(NodeOp.module, "node_id_a") + graph_b.add_node(NodeOp.module, "node_id_b") + matched_node, ancestors = Graph.match(graph_a, node_a, graph_b) + self.assertIsNone(matched_node) + self.assertEqual(ancestors, []) + + def test_dfs(self): + graph = Graph("model_name") + graph.add_node(NodeOp.module, "node_a") + graph.add_node(NodeOp.module, "node_b") + node_a = BaseNode(self.node_op, self.node_id) + result = {} + graph.dfs(node_a, result) + self.assertEqual(result, {'node_id': {'id': 'node_id', 'node_type': 0, 'data': {}, + 'output_data': {}, 'input_data': {}, 'upnode': 'None', 'subnodes': [], + 'matched_node_link': [], 'suggestions': {}}}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py new file mode 100644 index 00000000000..1a340ac8b3c --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py @@ -0,0 +1,28 @@ +import unittest +from msprobe.pytorch.visualization.graph.node_op import NodeOp + + +class TestNodeOp(unittest.TestCase): + + def test_get_node_op_valid(self): + node_name = "ModuleTest" + self.assertEqual(NodeOp.get_node_op(node_name), NodeOp.module) + + def test_get_node_op_invalid(self): + node_name = "InvalidNodeName" + with self.assertRaises(Exception): + NodeOp.get_node_op(node_name) + + def test_get_node_op_all(self): + test_cases = [ + ("ModuleTest", NodeOp.module), + ("TensorTest", NodeOp.function_api), + ("TorchTest", NodeOp.function_api), + ("FunctionalTest", NodeOp.function_api), + ("NPUTest", NodeOp.function_api), + ("VFTest", NodeOp.function_api), + ("DistributedTest", NodeOp.function_api), + ("AtenTest", NodeOp.function_api) + ] + for node_name, expected_op in test_cases: + self.assertEqual(NodeOp.get_node_op(node_name), expected_op) -- Gitee From d4be82829454a900b49c92d55aacc010bf6734bd Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Fri, 9 Aug 2024 15:08:34 +0800 Subject: [PATCH 041/333] update_precision_index --- .../module_visualization/graph/prof_node.py | 19 +++++++++++++++++++ .../graph_build/prof_graph_builder.py | 4 +++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index cfcdabbb991..7d96a49691c 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -23,6 +23,7 @@ class ProfNode(BaseNode): def __init__(self, event: TraceEventBean, parent_node=None): super().__init__(event, parent_node) self._kernel_total_list = [] + self._precision_index = 1 @property def node_id(self): @@ -66,6 +67,7 @@ class ProfNode(BaseNode): @property def data(self): return {"Input Data": self.input_data, + "precision_index": self.precision_index, "Host Self Duration(us)": round(self.host_self_dur, 2), "Host Total Duration(us)": round(self.host_total_dur, 2), "Device Self Duration(us)": round(self.device_self_dur, 2), @@ -83,8 +85,25 @@ class ProfNode(BaseNode): def is_root_node(self): return self.node_id == Constant.NPU_ROOT_ID + @property + def precision_index(self): + return self._precision_index + + @precision_index.setter + def precision_index(self, precision_index): + self._precision_index = precision_index + def update_child_nodes(self, node): self._child_nodes.append(node) def update_kernel_total_list(self, kernel_list: list): self._kernel_total_list.extend(kernel_list) + + def update_child_precision_index(self): + if not self.child_nodes: + return + max_dur = max((node.device_total_dur for node in self.child_nodes)) + min_dur = min((node.device_total_dur for node in self.child_nodes)) + diff_dur = max_dur - min_dur + for node in self.child_nodes: + node.precision_index = 1- (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index 83331b62502..a1bd6ba000e 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -29,7 +29,7 @@ class ProfGraphBuilder: def _create_event_bean_from_ops(cls, op_list: list, name: str) -> TraceEventBean: min_start = min((op.start_time for op in iter(op_list))) max_end = max((op.end_time for op in iter(op_list))) - # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了+1 +2处理 + # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-1 +2处理 return TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) @classmethod @@ -69,6 +69,8 @@ class ProfGraphBuilder: matched_node = matched_node.binary_search(start_time) all_data = root_node.find_all_child_nodes() all_data.append(root_node) + for node in all_data: + node.update_child_precision_index() return all_data def find_bwd_module(self) -> list: -- Gitee From 553dccdf77570cbc2af339a040f063a456a7bdd1 Mon Sep 17 00:00:00 2001 From: qianggee Date: Wed, 21 Aug 2024 08:22:47 +0000 Subject: [PATCH 042/333] merge from poc --- debug/accuracy_tools/kj600/README.md | 128 +++++- .../kj600/kj600/anomaly_inform.py | 1 - debug/accuracy_tools/kj600/kj600/const.py | 4 + .../kj600/distributed/wrap_distributed.py | 32 +- debug/accuracy_tools/kj600/kj600/features.py | 4 + .../accuracy_tools/kj600/kj600/module_hook.py | 382 +++++++++++++----- .../kj600/kj600/module_metric.py | 71 +++- .../kj600/kj600/module_spec_verifier.py | 7 - .../kj600/kj600/optimizer_collect.py | 83 +++- .../kj600/kj600/unittest/test_monitor.py | 145 +++++++ debug/accuracy_tools/kj600/kj600/utils.py | 27 +- debug/accuracy_tools/kj600/pyproject.toml | 6 +- 12 files changed, 725 insertions(+), 165 deletions(-) create mode 100644 debug/accuracy_tools/kj600/kj600/const.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py diff --git a/debug/accuracy_tools/kj600/README.md b/debug/accuracy_tools/kj600/README.md index 1782e58bec0..6ffb45274e9 100644 --- a/debug/accuracy_tools/kj600/README.md +++ b/debug/accuracy_tools/kj600/README.md @@ -20,21 +20,122 @@ ### 2. 安装 kj600 -方式一:从 git 直接安装 +方式一:下载源码安装 ``` -pip install git+https://gitee.com/xiangsen2/kj600.git +git clone -b poc https://gitee.com/ascend/mstt.git +cd mstt/debug/accuracy_tools/kj600 +pip install . ``` -方式二:下载源码安装 +## 快速上手 +### 梯度监控 +模型训练状态的异常通常会反映在loss和梯度上,通过对模型各个模块梯度的监控,可以帮助快速定位异常的第一现场。 + +1. 输出目录 +监控结果写入tensorboard的event文件/csv中,设置输出路径(默认为`kj600_output`,通过环境变量配置) +```bash +export KJ600_OUTPUT_DIR=/xxx/output_dir ``` -git clone https://gitee.com/xiangsen2/kj600.git -cd kj600 -pip install . + +2. 在训练脚本中使能工具(Megatron-LM) + +``` +from kj600.module_hook import TrainerMon +hooker = TrainerMon("./monitor_config.json", process_group=None, params_have_main_grad=True) + +model, optimizer, opt_param_scheduler = setup_model_and_optimizer( + model_provider, model_type) +# 模型、优化器初始化后使能工具 + +hooker.monitor_gnorm_with_ad( + model, grad_acc_steps=args.global_batch_size//args.data_parallel_size//args.micro_batch_size, optimizer=optimizer, dp_group=mpu.get_data_parallel_group(), tp_group=mpu.get_tensor_model_parallel_group()) + + +# 可以在任意位置获取当前的梯度统计量, 不同调用位置不能保证reduce已完成 +reduced, unreduced = hooker.generate_wgrad_metrics() +``` + + +| 字段名字 | 是否必选 | 解释 | +| ------------------------------------------------------------ | -------- | -------- | +|"grad_acc_steps"| 必选 |梯度累积的步数,当micro step=grad acc steps时,会触发反向hook获取模型梯度| +|"optimizer"| 可选 |各种并行域reduce后的梯度在opt.step前获取,数据写入在step后进行。默认patch pytorch的优化器,传入其他优化器(如MegatronOptimizer)可以调整工具行为,如clip_grad发生在megatron的优化器中,pytorch的优化器之前。| +|"dp_group"| 可选 |训练过程中的dp_group。dp域通信后,group内所有rank的梯度相同,落盘数据冗余。提供dp_group后,工具仅保留每个dp_group的第一个rank的梯度| +|"tp_group"| 可选 |训练过程中的tp_group。tp域通信后,group内部分参数所有rank的梯度相同,落盘数据冗余。提供tp_group后,工具仅保留每个tp_group中冗余参数在第一个rank的梯度。当前适配Megatron core_v0.6.0, 通过权重属性`tensor_model_parallel`判断是否冗余| + +3. 在json文件中配置工具 +``` +{ + "targets": { + "module": {}, + "module.module.language_model.encoder.layers.0": {"input_grad":"tuple[1]:0", "output_grad":"tuple[2]:0"} + }, + "print_struct": false, # 若不了解模型结构,可以打开print_struct打印模型结构 + "module_ranks": [0,1,2,3], # 需要监控的rank + "wg_distribution": true, + "format": "csv", # 如果不需要落盘文件,设置为 "api" + "ops": ["norm", "min", "max", "mean"], + "eps": 1e-8, + "ndigits: 6 +} +``` + +4. 结果验证 +训练日志中通常会打屏一个训练步的grad norm。提供了脚本校验落盘数据和打屏信息的一致性。 +```bash +python kj600/unittest/test_monitor.py -m kj600_output/Aug13_02-27-5 -l logs/train_gpt3_TP2_PP1_CP1_monitor.log -d 2 -t 2 +``` +`-m`指定落盘csv的路径前缀。`-l`指定训练日志。脚本通过关键词`grad norm: `匹配训练日志中的grad norm,根据实际情况修改。从落盘数据计算的grad norm和日志中的grad norm相对偏差超过1%,会有警告。`-d`、`--dp_size`声明data parallel size,`-t`、`--tp_size`声明tensor paralllel size。 +示例输出: +```txt +rank 2 is duplicated in dp group +rank 3 is duplicated in dp group +grad norm in consiste between training log and reduced gradients monitored +grad mean is in consisten between unreduced grad and reduced grad monitord. +``` +需要提供并行相关参数,具体参见: +```bash +python kj600/unittest/test_monitor.py -h +``` +### 梯度异常时序判断 +0. 训练前配置相关参数 +工具支持自动判断训练过程中的梯度异常,需要在配置文件中设置alert相关字段。`AnomalyTurbulence`会将当前数值与历史均值比较,如果相对偏差超过阈值,会在打屏信息中提示用户。如果打开`dump`选项,则会将异常梯度相关信息落盘,用于后续时序判断。 +```json + "alert": { + "rules": [{"rule_name": "AnomalyTurbulence", "args": {"threshold": 0.5}}], + "dump": true + }, +``` +1. 实例化工具时传入流水线并行group +```python +hooker = TrainerMon("./monitor_config.json", process_group=mpu.get_pipeline_model_parallel_group(), params_have_main_grad=True) +``` +照常开始训练 + +2. 进入工具路径启动异常分析脚本: +```shell +cd kj600/ +python3 anomaly_analyse.py -d $KJ600_OUTPUT_DIR/anomaly_detected +``` +支持以下参数配置 +| 字段名字 | 解释 | 是否必选释 | +| ------ | -------- | -------- | +|-d 或 --data_path| 指定梯度异常落盘文件夹,梯度监控功能输出,一般为$KJ600_OUTPUT_DIR/anomaly_detected。|是 | +|-o 或 --out_path| 排序后的异常落盘文件地址,默认在--data_path路径下落盘一个anomaly_analyse.json文件| 否 | +|-k 或 --topk| 指定保留前topk个异常,默认为8| 否 | +|-s 或 --step_list| 指定分析的step范围,默认为[]| 否 | + +## 已知问题 +- Megatron中使用流水线并行时,完成当前stage的计算并将output传递到下一个stage后,会调用`deallocate_output_tensor`释放output。当工具使能后,部分功能会给一些module注错反向hook,hook功能可能为output创建一个view副本,导致output内存无法释放。如果工具使能后出现如下报错,则需要跳过deallocate的步骤。在较新的megatron代码中,可以在`megatron/training/arguments.py`中将`kw_args['deallocate_pipeline_outputs']`设为False,或在`megatron/core/pipeline_parallel/schedules.py`中跳过`deallocate_output_tensor`的调用 +```bash +File "~/Megatron-LM/megatron/core/pipeline_parallel/schedules.py", line 117, in deallocate_output_tensor + assert out._base is None, "counter-productive to free a view of another tensor." +AssertionError: counter-productive to free a view of another tensor. ``` -# 快速上手 +## 详细配置 下面以Ascend/ModelLink训练框架为例,给出kj600工具的使用方法。 @@ -54,8 +155,10 @@ pip install . "cc_distribution": {"enable":true, "cc_codeline":[]}, "alert": { "rules": [{"rule_name": "AnomalyTurbulence", "args": {"threshold": 0.5}}], - "inform": {"recipient": "database", "connection_str": "mysql+pymysql://username:password@host:port/database"} + "inform": {"recipient": "database", "connection_str": "mysql+pymysql://username:password@host:port/database"}, + "dump": true }, + "format": "tensorboard" "ops": ["min", "max", "norm", "zeros", "id"], "eps": 1e-8 } @@ -80,6 +183,7 @@ pip install . |"wg_distribution"| 可选 | 若为true则会监控指定模块的参数梯度, 默认为false。 | |"alert"| 必选 | · "rules": 指定自动报警的异常检测机制及其相应的阈值。目前实现的异常检测是AnomalyTurbulence。 如果统计标量超出历史均值的指定浮动范围(threshold指定, 0.5意味着上浮或者下浮50%)则在控制台打印报警信息。
· "inform": 自动报警需要的配置,若想关闭自动报警删掉inform的配置即可。其中"recipient"指定自动报警的通知方式,可选值为"database"或"email",默认为"database"。
- 若"recipient"为"database",则需要指定"connection_str"字段,即数据库的连接URL,默认为{"recipient":"database", "connection_str": "mysql+pymysql://username:password@host:port/database"},若有特殊字符需要转义。
- 若"recipient"为"email",则需要指定"send_email_address"-发送方邮箱地址,"receive_email_address"-接收方邮箱地址,"send_email_username"-发送方邮箱用户名,"send_email_password"-发送方邮箱密码,"smtp_server"-发送方邮箱对应的SMTP服务器,"smtp_port"-发送方邮箱对应的SMTP端口号。默认为:
{"recipient":"email", send_email_address": "sender@huawei.com", "receive_email_address": "receiver@huawei.com", "send_email_username": "username", "send_email_password": "******", "smtp_server": "smtpscn.huawei.com", "smtp_port": "587"}| |"cc_distribution"| 可选 | 其中"enable"字段控制通信监控模块的开关;需要监控通信算子时,务必尽量早地实例化`TrainerMon`, 因为监控通过劫持原始func后挂hook实现,部分加速库初始化时会保存原始function,避免监控失效。"cc_codeline"字段指定监控的代码行,如:`train.py\\[23\\]`,默认为空列表,不特别指定;"cc_pre_hook"字段控制是否监控通信前的数据; 模块会在第二个optimize.step之前打印通信日志,包括通信api的调用栈、输入dtype、通信group。 "cc_log_only"为true时,仅打印日志,不监控通信的输入输出,并在打印后中断训练。可以根据通信日志设置"cc_codeline",规避与训练过程不相关的通信,比如一些时间、metrics的同步。| +|"format"| 可选 | 数据落盘格式,默认为tensorboard,支持可选 "csv"。 | |"ops"| 可选 |与ur_distribution、xy_distribution、mv_distribution、wg_distribution、mg_direction、cc_distribution配合,监控所选张量的min、max、norm、zeros值。其中,zeros代表监控所选张量的元素小于eps的比例,id代表监控所选的非张量本身,默认为[]。| |"eps"| 可选 |若ops里包含"zeros"则需要配置,默认为1e-8。| @@ -115,14 +219,17 @@ pip install . ``` from kj600.module_hook import TrainerMon - hooker = TrainerMon("./llama2_config.json", params_have_main_grad=True, opt_ty="Megatron_DistributedOptimizer") # or opt_ty=Megatron_Float16OptimizerWithFloat16Params + hooker = TrainerMon("./llama2_config.json", process_group=None, params_have_main_grad=True, opt_ty="Megatron_DistributedOptimizer") # or opt_ty=Megatron_Float16OptimizerWithFloat16Params hooker.hook_modules(model=model, grad_acc_steps=args.global_batch_size//args.data_parallel_size//args.micro_batch_size) ``` params_have_main_grad: 若为True则参数权重梯度为main_grad,否则为grad,默认为True。 如果不是Megatron-LM的训练框架, 可以设置对应的梯度累积步数grad_acc_steps。 - 如果要监控混合精度优化器的动量和方差, 需要在混合精度优化器构造后加入如下代码。 目前只支持Megatron_DistributedOptimizer, 使用bf16或者fp16混合精度时开启分布式优化器。 或者Megatron_Float16OptimizerWithFloat16Params, 使用bf16或者fp16混合精度选项并且不开启分布式优化器。 + 如果要监控优化器的动量和方差,需要在优化器构造后加入如下代码。 目前支持Megatron实现的优化器: + - Megatron_FP32OptimizerMon,普通优化器。 + - Megatron_Float16OptimizerWithFloat16Params, 使用bf16或者fp16混合精度选项并且不开启分布式优化器。 + - Megatron_DistributedOptimizer, 使用bf16或者fp16混合精度时开启分布式优化器。 ``` model, optimizer, opt_param_scheduler = setup_model_and_optimizer( @@ -171,6 +278,7 @@ TrainerMon.__init__(config_file_path, params_have_main_grad=True, opt_ty=None) - | 参数 | 说明 | 是否必选 | | ----- | -------------------- | -------- | | config_file_path |自己写的json配置文件路径。 | 是 | +| process_group | 传入ProcessGroup对象,用以确定pipeline并行不同rank异常间时序,megatron下通过core.parallel_state.get_pipeline_model_parallel_group()获得 | 否 | | params_have_main_grad |权重是否使用main_grad,是就为True,否则为False。默认为True。 | 否 | | opt_ty |优化器类型,有两个选项,Megatron_DistributedOptimizer:使用bf16或者fp16混合精度时开启分布式优化器;Megatron_Float16OptimizerWithFloat16Params:使用bf16或者fp16混合精度选项并且不开启分布式优化器,也适用于常规的adam优化器。如果使用的不是adam优化器,使用None。默认为None。 | 否 | diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_inform.py b/debug/accuracy_tools/kj600/kj600/anomaly_inform.py index 301ac769217..485c06d4d24 100644 --- a/debug/accuracy_tools/kj600/kj600/anomaly_inform.py +++ b/debug/accuracy_tools/kj600/kj600/anomaly_inform.py @@ -1,6 +1,5 @@ import smtplib from email.mime.text import MIMEText -import sqlite3 from datetime import datetime, timedelta from kj600.database import Database, ExceptionMessage diff --git a/debug/accuracy_tools/kj600/kj600/const.py b/debug/accuracy_tools/kj600/kj600/const.py new file mode 100644 index 00000000000..e4198a99422 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/const.py @@ -0,0 +1,4 @@ + +class Const: + vpp = "vpp" + vpp_sep = ':' \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py b/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py index 4e2d5e175ef..49e81ec5a57 100644 --- a/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py +++ b/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py @@ -106,6 +106,13 @@ class ApiRegistry: dist.Work.wait = wrapped_wait(dist.Work) +def get_process_group(process_group): + return ( + process_group + if isinstance(process_group, dist.ProcessGroup) + else dist.GroupMember.WORLD + ) + def stack_filter(stack): for pattern in StackBlackList: @@ -180,33 +187,12 @@ def create_async_callback_func(context, ops, args, prefix): catch_data(context, ops, args, prefix) return store_data -def get_tensor_dtype(args): - dtypes = [] - for arg in args: - if isinstance(arg, torch.Tensor): - dtypes.append(arg.dtype) - else: - dtypes.append(None) - return dtypes - -def get_group_members(args): - group = None - for arg in args: - if isinstance(arg, dist.ProcessGroup): - group = arg - if group is None: - group = dist.GroupMember.WORLD - return dist.get_process_group_ranks(group) - def create_hooks(context, monitor): def cc_log_hook(module, args, kwargs): - all_args = args + tuple(kwargs.values()) - dtypes = '|'.join([str(i) if i else '' for i in get_tensor_dtype(all_args)]) stack = ';'.join(get_callstack()) - group_members = '|'.join([str(i) for i in get_group_members(all_args)]) - monitor.cc_logged_stack[module.op_name_].add(';'.join([dtypes, group_members, stack])) + monitor.cc_logged_stack[module.op_name_].add(stack) return def cc_pre_hook(module, args, kwargs): @@ -235,8 +221,8 @@ def create_hooks(context, monitor): if (dist.is_initialized() and dist.get_rank() not in monitor.module_rank_list and monitor.module_rank_list != []): return [pre_hooks, hooks] - pre_hooks.append(cc_log_hook) if monitor.cc_log_only: + pre_hooks.append(cc_log_hook) return [pre_hooks, hooks] if monitor.cc_pre_hook: diff --git a/debug/accuracy_tools/kj600/kj600/features.py b/debug/accuracy_tools/kj600/kj600/features.py index 7810188f7d7..09b48cffdaf 100644 --- a/debug/accuracy_tools/kj600/kj600/features.py +++ b/debug/accuracy_tools/kj600/kj600/features.py @@ -11,6 +11,10 @@ def square_sum(x: torch.tensor): def get_min(x: torch.tensor): return torch.min(x) +@torch.no_grad() +def get_mean(x: torch.tensor): + return torch.mean(x) + @torch.no_grad() def get_norm(x: torch.tensor): return torch.norm(x, p=2) diff --git a/debug/accuracy_tools/kj600/kj600/module_hook.py b/debug/accuracy_tools/kj600/kj600/module_hook.py index 3b600b2b7f2..21c326ac019 100644 --- a/debug/accuracy_tools/kj600/kj600/module_hook.py +++ b/debug/accuracy_tools/kj600/kj600/module_hook.py @@ -2,21 +2,40 @@ import os import uuid import json from collections import defaultdict +from functools import partial from datetime import datetime import torch import torch.distributed as dist +from torch import Stream from torch.optim.optimizer import register_optimizer_step_pre_hook, register_optimizer_step_post_hook -from kj600.module_spec_verifier import get_config, validate_config_spec -from kj600.optimizer_collect import MixPrecsionOptimizerMon, print_rank_0, OptimizerMonFactory, MegatronDistributedOptimizerMon +from kj600.module_spec_verifier import validate_config_spec +from kj600.optimizer_collect import OptimizerMon, print_rank_0, OptimizerMonFactory from kj600.features import eff_rank, get_sign_matches from kj600.visualizer import HeatmapVisualizer -from kj600.anomaly_detect import AnomalyScanner, SummaryWriterWithAD +from kj600.anomaly_detect import AnomalyScanner, AnomalyDataFactory, SummaryWriterWithAD, CSVWriterWithAD, BaseWriterWithAD from kj600.anomaly_inform import AnomalyInformFactory -from kj600.module_metric import get_metrics, write_metrics_tensorboard, get_summary_writer_tag_name, TensorMetrics -from kj600.distributed.wrap_distributed import api_register, create_hooks, op_aggregate -from kj600.utils import print_warn_log, print_info_log, get_param_struct +from kj600.anomaly_analyse import AnomalyDataWriter +from kj600.module_metric import get_metrics, write_metrics_tensorboard, write_metrics_csv, get_summary_writer_tag_name, TensorMetrics, squash_param_name +from kj600.distributed.wrap_distributed import api_register, create_hooks, op_aggregate, get_process_group +from kj600.utils import print_warn_log, print_info_log, print_error_log, get_param_struct +from kj600.const import Const +from kj600.file_check import FileOpen +try: + import torch_npu +except ImportError: + pass + + +def param_is_not_tensor_parallel_duplicate(param, tp_group): + return (hasattr(param, 'tensor_model_parallel') and param.tensor_model_parallel) or ( + torch.distributed.get_rank(group=tp_group) == 0 + ) + +def param_is_data_parallel_duplicate(dp_group): + return torch.distributed.get_rank(group=dp_group) != 0 + class ModuleHookContext: def __init__(self, module_name) -> None: self.step = 0 @@ -35,9 +54,6 @@ class ModuleHookContext: self.format_by_arg[key_name] = target_config[self.module_name][key_name] elif key_name in ['input', 'input_grad']: self.ignore_in = True - else: - raise KeyError(f"Missing key: {key_name} of {self.module_name} in config.json") - class OptimizerContext: def __init__(self) -> None: @@ -71,30 +87,50 @@ class CommunicationContext: def aggregate(self): self.data = self._agg(self.data) +class GradContext: + def __init__(self) -> None: + self.pre = [] + self.post = [] + self.acc_metric = [] + self.acc = {} + self.actv = defaultdict(dict) + + def reset(self): + self.pre.clear() + self.post.clear() + self.acc_metric.clear() + self.acc.clear() + self.actv.clear() + class TrainerMon: tensor_metrics = TensorMetrics() # opt_ty: "Megatron_Float16OptimizerWithFloat16Params" or "Megatron_DistributedOptimizer" - def __init__(self, config_file_path, params_have_main_grad=True, opt_ty=None) -> None: + def __init__(self, config_file_path, process_group=None, params_have_main_grad=True, opt_ty=None) -> None: self.module_fwd_hook_context_by_module = defaultdict(ModuleHookContext) self.module_bwd_hook_context_by_module = defaultdict(ModuleHookContext) self.optimizer_context = defaultdict(OptimizerContext) self.cc_context = defaultdict(CommunicationContext) + self.grad_context = GradContext() + self.process_group = get_process_group(process_group) self.params_have_main_grad = params_have_main_grad - self.config = get_config(config_file_path) + with FileOpen(config_file_path, 'r') as f: + self.config = json.load(f) self.module_rank_list = self.config.get("module_ranks", []) + self.format = self.config.get('format', 'tensorboard') self.eps = self.config.get('eps', 1e-8) self.ops = self.config.get('ops', []) + self.ndigits = self.config.get('ndigits', 6) self.xy_distribution = self.config.get('xy_distribution', False) if not self.xy_distribution: print_rank_0("> module input/output input_grad/output_grad is not monitored. ") - # backward hook cause megatron-lm pipeline parallel schedule assert exception. # TBD: backward hook cause output tensor is view of some base tensor. root cause invesigation pending. self.forward_only = self.config.get('forward_only', False) if self.forward_only: print_rank_0("> only module forward is monitored. ") + self.backward_only = self.config.get('backward_only', False) self.ur_distribution = self.config.get('ur_distribution', False) if not self.ur_distribution: @@ -121,27 +157,69 @@ class TrainerMon: api_register.redirect_api() alert_setting = self.config.get('alert', {"rules":[]}) - self.alert_rules = AnomalyScanner.load_rules(alert_setting["rules"]) - + self.alert_rules = AnomalyScanner.load_rules(alert_setting["rules"]) anomaly_inform = AnomalyInformFactory.create_informer(**alert_setting["inform"]) if "inform" in alert_setting else None - self.optimizer_hooked = False output_base_dir = os.getenv('KJ600_OUTPUT_DIR', './kj600_output') cur_time = datetime.now().strftime('%b%d_%H-%M-%S') unique_id = str(uuid.uuid4())[:8] + if dist.is_initialized(): - if (dist.get_rank() in self.module_rank_list) or len(self.module_rank_list) == 0: - self.summary_writer = SummaryWriterWithAD( - os.path.join(output_base_dir, f"{cur_time}-rank{dist.get_rank()}-{unique_id}"), self.alert_rules, unique_id, anomaly_inform) + rank = dist.get_rank() + tensorboard_dir = os.path.join(output_base_dir, f"{cur_time}-rank{rank}-{unique_id}") + pp_stage = dist.get_group_rank(self.process_group, rank) + group_mates = dist.get_process_group_ranks(self.process_group) else: - self.summary_writer = SummaryWriterWithAD(os.path.join(output_base_dir, f"{cur_time}-{unique_id}"), self.alert_rules, unique_id, anomaly_inform) + rank = 0 + tensorboard_dir = os.path.join(output_base_dir, f"{cur_time}-{unique_id}") + pp_stage = 0 + group_mates = [0] + self.rank = rank + + # 初始化AnomalyData工厂 + self.anomaly_data_factory = AnomalyDataFactory(rank, pp_stage, group_mates) if alert_setting.get('dump', False) else None + + if self.format == 'tensorboard': + writer = SummaryWriterWithAD + self.write_metrics = write_metrics_tensorboard + elif self.format == 'csv': + writer = CSVWriterWithAD + self.write_metrics = write_metrics_csv + elif self.format == 'api': + writer = BaseWriterWithAD + self.write_metrics = write_metrics_tensorboard + + if (rank in self.module_rank_list) or len(self.module_rank_list) == 0: + + self.summary_writer = writer( + tensorboard_dir, + self.alert_rules, + unique_id, + anomaly_inform, + self.anomaly_data_factory, + self.ndigits + ) + # 初始化anomaly deteted文件目录 + if self.anomaly_data_factory: + self.anomaly_data_writer = AnomalyDataWriter( + os.path.join(output_base_dir, "anomaly_detected"), rank) + self.anomaly_data_writer.init_detected_json() + # A HeatmapVisualizer instance is associated with an image self.update_heatmap_visualizer = defaultdict(HeatmapVisualizer) self.ratio_heatmap_visualizer = defaultdict(HeatmapVisualizer) - self.micro_batch_number = 0 + self.micro_batch_number = 1 + + self.weight_hooked = False + self.optimizer_hooked = False + self.param_registered = False + self.vpp = False + self.dp_group = None + self.tp_group = None - self.param_name_list = [] self.param2name = defaultdict(str) + self.param_name_call_id = {} + self.call_id = 0 self.mix_precision_optimizer_mon = OptimizerMonFactory.create_optimizer_mon(opt_ty) if opt_ty is None: @@ -149,9 +227,13 @@ class TrainerMon: raise Exception("ur_distribution cannot be enabled with unknown optimizer.") if self.mv_distribution: raise Exception("mv_distribution cannot be enabled with unknown optimizer.") + self.verbose = False self.print_struct = self.config.get("print_struct", False) + if self.print_struct: + self.verbose = True self.struct_printed = False self.module_struct = {} + return def __del__(self): @@ -160,7 +242,7 @@ class TrainerMon: @staticmethod def set_wrapped_optimizer(_wrapped_optimizer): - MixPrecsionOptimizerMon.set_wrapped_optimizer(_wrapped_optimizer) + OptimizerMon.set_wrapped_optimizer(_wrapped_optimizer) @staticmethod def adhoc_check(target_tensor:torch.tensor, module_name:str, tensor_name:str, rank_list, ops_list): @@ -172,32 +254,67 @@ class TrainerMon: TrainerMon.tensor_metrics.stat_insert(target_tensor, ops_list, module_name, tensor_name, rank) def hook_modules(self, model:torch.nn.Module, grad_acc_steps): - # fwd=0, bkd=1 - # targets is module name list like ["xx.xxx1", "xxx.xxx2"] which can be obtained when first run. - print_rank_0("> module names:") - for name, _ in model.named_modules(): - print_rank_0(f"\t{name}") - self.micro_batch_number = grad_acc_steps + if self.module_rank_list and (self.rank not in self.module_rank_list): + return + + if not isinstance(model, list): + model = [model] + + self._register_param_name(model) - if not self.module_rank_list or (dist.is_initialized() and dist.get_rank() in self.module_rank_list): - targets = [x for x, _ in model.named_modules()] if self.print_struct else self.config['targets'].keys() - hooked_count = self._hook_module(targets, model, fwd_or_bkd=0) + self.micro_batch_number = grad_acc_steps + for vpp_stage, model_chunk in enumerate(model): + vpp_stage = f'{vpp_stage}{Const.vpp_sep}' if self.vpp else '' + targets = [x for x, _ in model_chunk.named_modules()] if self.print_struct else self.config['targets'].keys() + hooked_count = self._hook_module(targets, model_chunk, vpp_stage) print_rank_0(f"> {hooked_count} out of {len(self.config['targets'])} are monitored.") - else: - return if not self.optimizer_hooked: - self.optimizer_hooked = True - print_rank_0("> parameter names:") - for name, param in model.named_parameters(): - print_rank_0(f"\t{name}") - for target_module, _ in self.config['targets'].items(): - if name.startswith(target_module): # name : language_model.encoder.layers.0.mlp.weight, target_module:language_model.encoder.layers.0 - self.param_name_list.append(name) - self.param2name[param] = name self.hook_optimizer() return + def generate_wgrad_metrics(self): + if not self.wg_distribution: + return {}, {} + + unreduced = {} + if self.weight_hooked: + for metric_name in self.ops: + unreduced[metric_name] = get_metrics(metric_name, self.grad_context.acc, self.eps) + self.grad_context.acc_metric = [unreduced] + + grad_dict = {} + for param, name in self.param2name.items(): + if self.tp_group and not param_is_not_tensor_parallel_duplicate(param, self.tp_group): + continue + if self.dp_group and param_is_data_parallel_duplicate(self.dp_group): + continue + grad = param.main_grad if self.params_have_main_grad else param.grad + if grad is None: + print_warn_log(f"grad is None: {name}, maybe something wrong happened.") + continue + key = get_summary_writer_tag_name(name, 'post_grad', self.rank) + grad_dict[key] = grad + + reduced = {op:get_metrics(op, grad_dict, self.eps) for op in self.ops} + self.grad_context.post = [reduced] + + return reduced, unreduced + + + def monitor_gnorm_with_ad(self, model, grad_acc_steps=1, optimizer=None, tp_group=None, dp_group=None): + print_info_log(f'grad acc steps {grad_acc_steps}') + self.hook_optimizer(optimizer) + self.micro_batch_number = grad_acc_steps + self.backward_only = True + + self.dp_group = dp_group + self.tp_group = tp_group + + self._register_param_name(model) + self._hook_weights() + self.hook_modules(model, grad_acc_steps) + def build_tbtag_tensor_map(self, module_name, tag, tensor): metrics = {} rank = dist.get_rank() if dist.is_initialized() else None @@ -233,27 +350,29 @@ class TrainerMon: if not self.xy_distribution: return for _, fwd_context in self.module_fwd_hook_context_by_module.items(): + if len(fwd_context.actv) == 0: + continue if not len(fwd_context.actv) == self.micro_batch_number: print_warn_log(f"fwd_context.actv not equal to micro_batch_number: {len(fwd_context.actv)}, {self.micro_batch_number}") - for metric_name in self.ops: - write_metrics_tensorboard(metric_name, self.summary_writer, fwd_context.actv, step) + self.write_metrics(self.ops, self.summary_writer, fwd_context.actv, step, 'actv') fwd_context.actv.clear() - for _, bwd_context in self.module_bwd_hook_context_by_module.items(): - if not len(bwd_context.actvgrad) == self.micro_batch_number: - print_warn_log(f"bwd_context.actvgrad not equal to micro_batch_number: {len(bwd_context.actvgrad)}, {self.micro_batch_number}") - for metric_name in self.ops: - write_metrics_tensorboard(metric_name, self.summary_writer, bwd_context.actvgrad, step) - bwd_context.actvgrad.clear() + self.write_metrics(self.ops, self.summary_writer, [self.grad_context.actv], step, 'grad_actv') - def hook_optimizer(self): + def write_grad_tb(self, step): + if not self.wg_distribution: + return + + self.write_metrics(self.ops, self.summary_writer, self.grad_context.post, step, 'grad_reduced') + self.write_metrics(self.ops, self.summary_writer, self.grad_context.acc_metric, step, 'grad_unreduced') + + def hook_optimizer(self, optimizer=None): # in DDP by default use params_have_main_grad def optimizer_pre_step_hook(optimizer, args, kwargs): context = self.optimizer_context[optimizer] if self.print_struct and not all(value == {} for value in self.module_struct.values()) and not self.struct_printed: self._smallest_rank_print("> module struct:") self._smallest_rank_print(json.dumps(self.module_struct, indent=4)) - self.struct_printed = True if not self.cc_log_only: raise Exception("exit after first step when print model struct") if self.cc_log_only and context.step > 0: @@ -261,10 +380,15 @@ class TrainerMon: self._smallest_rank_print(json.dumps({k:[i.split(';') for i in v] for k,v in self.cc_logged_stack.items()}, indent=4)) raise Exception("exit after first step when print cc stack") - - context.param_exp_avg, context.param_exp_avg_sq, context.param_adam_update, context.param_adam_ratio = self.mix_precision_optimizer_mon.fetch_mv(self, - optimizer, self.param2name) + self.generate_wgrad_metrics() + + mv_result = self.mix_precision_optimizer_mon.fetch_mv(self, optimizer, self.param2name) + context.param_exp_avg = mv_result.exp_avg + context.param_exp_avg_sq = mv_result.exp_avg_sq + context.param_adam_update = mv_result.update + context.param_adam_ratio = mv_result.ratio + for param, name in self.param2name.items(): if "params_effrank" in self.config and name in self.config["params_effrank"]: context.param_effective_rank[name] = eff_rank(param.detach()) @@ -272,9 +396,8 @@ class TrainerMon: if grad is None: print_warn_log(f"grad is None: {name}, maybe something wrong happened.") continue - if self.wg_distribution: - context.param_weight_grad[name] = grad - if self.mg_direction: + + if self.mg_direction: if context.step == 0: same_direction_ratio = torch.tensor(1.) else: @@ -282,15 +405,11 @@ class TrainerMon: context.param_mg_direction[name] = same_direction_ratio tbtag_tensor_map = {} - if self.wg_distribution: - tbtag_tensor_map.update(self.generate_param_metrics('weight_grad', context.param_weight_grad)) if self.mv_distribution: tbtag_tensor_map.update(self.generate_param_metrics('exp_avg', context.param_exp_avg)) tbtag_tensor_map.update(self.generate_param_metrics('exp_avg_sq', context.param_exp_avg_sq)) if self.mg_direction: tbtag_tensor_map.update(self.generate_param_metrics('mg_direction', context.param_mg_direction)) - # if not tbtag_tensor_map: - # return metric_dict = {} for metric_name in self.ops: metric_dict[metric_name] = get_metrics(metric_name, tbtag_tensor_map, self.eps) @@ -299,6 +418,7 @@ class TrainerMon: cc_metrics = self.generate_cc_metrics(k, c) for op, m in cc_metrics.items(): metric_dict[op].update(m) + if not metric_dict: return context.metric_list.append(metric_dict) @@ -308,7 +428,10 @@ class TrainerMon: context = self.optimizer_context[optimizer] rank = dist.get_rank() if dist.is_initialized() else None + if self.anomaly_data_factory: + self.anomaly_data_factory.set_call_id(self.param_name_call_id) self.write_xy_tb(context.step) + self.write_grad_tb(context.step) self.write_adhoc_check(context.step) if self.ur_distribution: @@ -317,20 +440,43 @@ class TrainerMon: for param_name, _ in context.param_adam_ratio.items(): self.ratio_heatmap_visualizer[param_name].visualize(get_summary_writer_tag_name(param_name, 'adam_ratio', rank), context.step, self.summary_writer) - for metric_name in self.ops: - if not context.metric_list: - break - write_metrics_tensorboard(metric_name, self.summary_writer, context.metric_list, context.step) + if context.metric_list: + self.write_metrics(self.ops, self.summary_writer, context.metric_list, context.step, 'other') context.metric_list.clear() context.step += 1 + self.grad_context.reset() + if self.anomaly_data_factory: + self.anomaly_data_writer.write_detected_json(self.summary_writer.get_anomalies()) + self.summary_writer.clear_anomalies() + self.call_id = 0 + self.param_name_call_id.clear() + return + + def patch_step(func, optimizer): + def wrapper(*args, **kwargs): + optimizer_pre_step_hook(optimizer, args, kwargs) + out = func(*args, **kwargs) + optimizer_post_step_hook(optimizer, args, kwargs) + return out + return wrapper + + if self.optimizer_hooked: return - if not self.module_rank_list or (dist.is_initialized() and dist.get_rank() in self.module_rank_list): - register_optimizer_step_pre_hook(optimizer_pre_step_hook) - register_optimizer_step_post_hook(optimizer_post_step_hook) + + if optimizer: + optimizer.__class__.step = patch_step(optimizer.__class__.step, optimizer) + + else: + if not self.module_rank_list or (dist.is_initialized() and dist.get_rank() in self.module_rank_list): + register_optimizer_step_pre_hook(optimizer_pre_step_hook) + register_optimizer_step_post_hook(optimizer_post_step_hook) + self.optimizer_hooked = True return def _smallest_rank_print(self, msg): + if not self.verbose: + return if dist.is_initialized(): if self.module_rank_list: if dist.get_rank() == min(self.module_rank_list): @@ -341,7 +487,35 @@ class TrainerMon: else: print_info_log(msg) - def _hook_module(self, target_names, module: torch.nn.Module, fwd_or_bkd): + def _register_param_name(self, model): + if self.param_registered: + return + if not isinstance(model, list): + model = [model] + + if len(model) > 1: + self.vpp = True + self._smallest_rank_print('vpp enabled') + + for vpp_stage, model_chunk in enumerate(model): + prefix = f'{Const.vpp}{vpp_stage}{Const.vpp_sep}' if self.vpp else '' + for param_name, param in model_chunk.named_parameters(): + name = prefix + squash_param_name(param_name) + for target in self.config['targets'].keys(): + if param_name.startswith(target) and param.requires_grad: + self._smallest_rank_print(f'>> monitoring: {name}') + setattr(param, "zero_out_wgrad", True) + if name in self.param2name.values() or name == '': + print_error_log(f'same name {name} for different param. Current param is {param_name}. \ + May be error of squash_param_name') + raise Exception("param with same name will be overwriten.") + self.param2name[param] = name + break + + self.param_registered = True + + + def _hook_module(self, target_names, module: torch.nn.Module, vpp_stage=''): if '_modules' not in module.__dict__: # nothing to hook return 0 @@ -352,8 +526,6 @@ class TrainerMon: self.module_struct[context.module_name].update( {"input": f"{get_param_struct(module_input)}", "output": f"{get_param_struct(module_output)}"}) return - if not self.xy_distribution: - return if not context.format_by_arg: context.set_format_by_arg('input', self.config['targets']) context.set_format_by_arg('output', self.config['targets']) @@ -390,11 +562,11 @@ class TrainerMon: self.module_struct[context.module_name].update( {"input_grad": f"{get_param_struct(input_grad)}", "output_grad": f"{get_param_struct(output_grad)}"}) return - if not self.xy_distribution: - return if not context.format_by_arg: context.set_format_by_arg('input_grad', self.config['targets']) context.set_format_by_arg('output_grad', self.config['targets']) + if not context.format_by_arg: + return if not context.verified: if not context.ignore_in: context.focused_in_col = validate_config_spec(context.format_by_arg['input_grad'], input_grad, context.module_name, 'input_grad') @@ -404,32 +576,62 @@ class TrainerMon: tbtag_tensor_map = {} if not context.ignore_in: cared_input_grad = input_grad if context.focused_in_col is None else input_grad[context.focused_in_col] - tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name, 'input_grad', cared_input_grad)) + tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name+f'_{context.micro_step}', f'input_grad', cared_input_grad)) cared_output_grad = output_grad if context.focused_out_col is None else output_grad[context.focused_out_col] - tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name, 'output_grad', cared_output_grad)) - metric_dict = {} - for metric_name in self.ops: - metric_dict[metric_name] = get_metrics(metric_name, tbtag_tensor_map, self.eps) + tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name+f'_{context.micro_step}', f'output_grad', cared_output_grad)) + if context.micro_step == 0 and context.actvgrad: print_warn_log(f"actvgrad context of {context.module_name} is not empty when first micro_step, maybe something wrong happened. Now clear it.") context.actvgrad.clear() - context.actvgrad.append(metric_dict) + for metric_name in self.ops: + self.grad_context.actv[metric_name].update(get_metrics(metric_name, tbtag_tensor_map, self.eps)) + context.micro_step += 1 if context.micro_step == self.micro_batch_number: context.micro_step = 0 context.step += 1 return - hooked_count = 0 - for name, submodule in module.named_modules(): - self.module_struct[name] = {} - if name in target_names: - submodule.register_forward_hook(fwd_hook_fun) - self.module_fwd_hook_context_by_module[submodule] = ModuleHookContext(name) - if not self.forward_only: - submodule.register_full_backward_hook(bwd_hook_fun) - self.module_bwd_hook_context_by_module[submodule] = ModuleHookContext(name) - print_rank_0(f"> {name} is monitored successfully") - hooked_count += 1 + if self.backward_only and self.forward_only: + print_warn_log('not enable backward_only and forward_only simultaneously') + + hooked_count = 0 + if self.xy_distribution or self.print_struct: + for module_name, submodule in module.named_modules(): + name = vpp_stage + module_name + self.module_struct[name] = {} + if name in target_names or module_name in target_names: + if not self.backward_only: + submodule.register_forward_hook(fwd_hook_fun) + self.module_fwd_hook_context_by_module[submodule] = ModuleHookContext(name) + if not self.forward_only: + submodule.register_full_backward_hook(bwd_hook_fun) + self.module_bwd_hook_context_by_module[submodule] = ModuleHookContext(name) + print_rank_0(f"> {name} is monitored successfully") + hooked_count += 1 return hooked_count + + def _hook_weights(self): + context = self.grad_context + + @torch.no_grad + def param_hook(*args, context_dict, param, key, name): + param.micro_step += 1 + self.param_name_call_id[name] = self.call_id + self.call_id += 1 + if param.micro_step == self.micro_batch_number: + param.micro_step = 0 + if self.params_have_main_grad: + context_dict[key] = param.main_grad.clone() + else: + context_dict[key] = param.grad.clone() + + for param, name in self.param2name.items(): + key = get_summary_writer_tag_name(name, 'acc_grad', self.rank) + setattr(param, 'micro_step', 0) + param_tmp = param.expand_as(param) + grad_acc = param_tmp.grad_fn.next_functions[0][0] + grad_acc.register_hook(partial(param_hook, context_dict=context.acc, param=param, key=key, name=name)) + + self.weight_hooked = True diff --git a/debug/accuracy_tools/kj600/kj600/module_metric.py b/debug/accuracy_tools/kj600/kj600/module_metric.py index e09536b072c..b85e82c4829 100644 --- a/debug/accuracy_tools/kj600/kj600/module_metric.py +++ b/debug/accuracy_tools/kj600/kj600/module_metric.py @@ -1,15 +1,26 @@ import math +import re import statistics -from kj600.features import square_sum, get_max, get_min, get_zeros, get_nans, get_norm +from kj600.features import square_sum, get_max, get_min, get_zeros, get_nans, get_norm, get_mean def get_summary_writer_tag_name(module_or_param_name:str, tag:str, rank): if rank is None: return f"{module_or_param_name}/{tag}" else: - return f"{module_or_param_name}/{rank}/{tag}" - + return f"{module_or_param_name}/rank{rank}/{tag}" + +def squash_param_name(param_name): + name = '' + for pattern in ['(?<=layers\.)[\d]*.*', 'embeddings?\.(.*)', 'final.*', 'output.*','norm.*']: + match = re.findall(pattern, param_name) + if match: + name += match[0] + break + if name == '': + name = param_name + return name # 用于存储所有metric实现类的注册表 config_metric_registry = {} @@ -28,7 +39,7 @@ class TensorMetrics: self.metrics = {} #tensor_tag --> [] self.cur_idx = {} - fun_map = {"norm": get_norm, "max": get_max, "min": get_min} + fun_map = {"norm": get_norm, "max": get_max, "min": get_min, "mean": get_mean} #get stats and insert into metrics dictionary def stat_insert(self, tensor, stat_ops, module_name, tensor_name, rank, eps=1e-8): prefix = get_summary_writer_tag_name(module_name, tensor_name, rank) @@ -75,6 +86,19 @@ class MinMetric(Metric): summary_writer.add_scalar(f'{key}_min', min_value, step) +@register_config_metric("mean") +class MeanMetric(Metric): + @staticmethod + def get_metric_value(tensor, eps): + return get_mean(tensor) + + @staticmethod + def metric_tensorboard(metric_name, summary_writer, metric_value, step): + for key in metric_value[0][metric_name].keys(): + mean_value = sum([item[metric_name][key].item() for item in metric_value]) / len(metric_value) + summary_writer.add_scalar(f'{key}_mean', mean_value, step) + + @register_config_metric("max") class MaxMetric(Metric): @staticmethod @@ -134,12 +158,12 @@ class IdentMetric(Metric): return tensor @staticmethod - def metric_tensorboard(metric_name, summary_writer, metric_value, step): #metric_value is a dict, key is parameter name and value is a list of scalar tensor + def metric_tensorboard(metric_name, summary_writer, metric_value, context): #metric_value is a dict, key is parameter name and value is a list of scalar tensor if len(metric_value) == 1: for key, value in metric_value[0][metric_name].items(): if not value: continue - summary_writer.add_scalar(f'{key}_identical', value.item(), step) + summary_writer.add_scalar(f'{key}_identical', value.item(), context) def get_metrics(metric_name, tag2tensor, eps): @@ -150,9 +174,32 @@ def get_metrics(metric_name, tag2tensor, eps): raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e -def write_metrics_tensorboard(metric_name, summary_writer, metric_value, step): - try: - fun_metric = config_metric_registry[metric_name] - return fun_metric.metric_tensorboard(metric_name, summary_writer, metric_value, step) - except KeyError as e: - raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e +def write_metrics_tensorboard(ops, summary_writer, metric_value, step, prefix=''): + for metric_name in ops: + try: + fun_metric = config_metric_registry[metric_name] + fun_metric.metric_tensorboard(metric_name, summary_writer, metric_value, step) + except KeyError as e: + raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e + +def write_metrics_csv(ops, summary_writer, metric_value, step, prefix=''): + for metric_name in ops: + try: + fun_metric = config_metric_registry[metric_name] + fun_metric.metric_tensorboard(metric_name, summary_writer, metric_value, step) + + except KeyError as e: + raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e + + if not summary_writer.header: + if prefix in ['actv', 'grad_actv']: + summary_writer.header = ['param_name'] + ['input_'+op for op in ops] + ['output_'+op for op in ops] + else: + summary_writer.header = ['param_name'] + ops + + for key in metric_value[0][ops[0]].keys(): + if 'vpp' in key: + summary_writer.header.insert(0, 'vpp_stage') + break + summary_writer.write_csv(prefix, step) + summary_writer.header = [] diff --git a/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py b/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py index 395aa82f17a..66ea2805907 100644 --- a/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py +++ b/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py @@ -2,15 +2,8 @@ import json import re import abc import torch -from kj600.utils import check_file_valid_readable -def get_config(file_path='config.json'): - check_file_valid_readable(file_path) - with open(file_path, 'r') as file: - config = json.load(file) - return config - # 用于存储所有validator实现类的注册表 config_validator_registry = {} diff --git a/debug/accuracy_tools/kj600/kj600/optimizer_collect.py b/debug/accuracy_tools/kj600/kj600/optimizer_collect.py index 285f17ca6dc..61ae9de64a7 100644 --- a/debug/accuracy_tools/kj600/kj600/optimizer_collect.py +++ b/debug/accuracy_tools/kj600/kj600/optimizer_collect.py @@ -1,10 +1,12 @@ -from collections import defaultdict +from abc import ABC, abstractmethod +from collections import defaultdict, namedtuple import torch import torch.distributed as dist -from kj600.visualizer import HeatmapVisualizer +from kj600.utils import print_warn_log -def print_rank_0(message, debug=False, force=False): + +def print_rank_0(message): if dist.is_initialized(): if dist.get_rank() == 0: print(message) @@ -12,20 +14,29 @@ def print_rank_0(message, debug=False, force=False): print(message) -class MixPrecsionOptimizerMon: +MVResult = namedtuple('MVResult', ("exp_avg", "exp_avg_sq", "update", "ratio")) + + +class OptimizerMon(ABC): wrapped_optimizer = None + @classmethod + def set_wrapped_optimizer(cls, wrapped_optimizer): + cls.wrapped_optimizer = wrapped_optimizer + + @abstractmethod + def fetch_mv(self, monitor, torch_opt, params2name): + pass + + +class MixPrecisionOptimizerMon(OptimizerMon): def __init__(self) -> None: self.fp16_to_fp32_param = {} - @staticmethod - def set_wrapped_optimizer(_wrapped_optimizer): - MixPrecsionOptimizerMon.wrapped_optimizer = _wrapped_optimizer - # parameter tensors we want to monitor and their names are in params2name_dict # base_optimizer is pytorch optimizer, wrapped_optimizer is a normal object with base_optimizer def fetch_mv(self, monitor, torch_opt, params2name): - mix_prec_opt = MixPrecsionOptimizerMon.wrapped_optimizer + mix_prec_opt = self.wrapped_optimizer if not self.fp16_to_fp32_param and mix_prec_opt is not None: for fp16_group, fp32_group in zip(mix_prec_opt.float16_groups, mix_prec_opt.fp32_from_float16_groups): @@ -44,8 +55,12 @@ class MixPrecsionOptimizerMon: param = self.fp16_to_fp32_param[param] if param in torch_opt.state: - exp_avg = torch_opt.state[param]["exp_avg"] - exp_avg_sq = torch_opt.state[param]["exp_avg_sq"] + state_param = torch_opt.state.get(param, None) + exp_avg = state_param.get("exp_avg", None) + exp_avg_sq = state_param.get("exp_avg_sq", None) + if exp_avg is None or exp_avg_sq is None: + print_warn_log(f"exp_avg or exp_avg_sq of {name} is None, maybe something wrong happened.") + continue if monitor.mv_distribution: exp_avg_dict[name] = exp_avg exp_avg_sq_dict[name] = exp_avg_sq @@ -53,15 +68,15 @@ class MixPrecsionOptimizerMon: exp_avg_dict[name] = exp_avg if monitor.ur_distribution: update_dict[name] = exp_avg / (torch.sqrt(exp_avg_sq) + torch_opt.defaults['eps']) - ratio_dict[name] = exp_avg / torch.sqrt(exp_avg_sq) + ratio_dict[name] = (exp_avg / torch.sqrt(exp_avg_sq)).nan_to_num(0) monitor.update_heatmap_visualizer[name].pre_cal(update_dict[name]) monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) - return exp_avg_dict, exp_avg_sq_dict, update_dict, ratio_dict + return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) -class MegatronDistributedOptimizerMon(MixPrecsionOptimizerMon): +class MegatronDistributedOptimizerMon(MixPrecisionOptimizerMon): def fetch_mv(self, monitor, torch_opt, params2name): - mix_prec_opt = MixPrecsionOptimizerMon.wrapped_optimizer + mix_prec_opt = self.wrapped_optimizer if not (hasattr(mix_prec_opt, "model_float16_groups") and hasattr(mix_prec_opt, "shard_fp32_from_float16_groups")): raise Exception("megatron distributed optimizer should have model_float16_groups and shard_fp32_from_float16_groups, \ if not, please check megatron-lm version") @@ -73,18 +88,48 @@ class MegatronDistributedOptimizerMon(MixPrecsionOptimizerMon): return self._fetch_mv_in_adam(params2name, torch_opt, monitor) -class DummyOptimizerMon(MixPrecsionOptimizerMon): +class MegatronFP32OptimizerMon(OptimizerMon): + def fetch_mv(self, monitor, torch_opt, params2name): + exp_avg_dict = defaultdict(float) + exp_avg_sq_dict = defaultdict(float) + update_dict = defaultdict() + ratio_dict = defaultdict() + + for param, name in params2name.items(): + if param in torch_opt.state: + state_param = torch_opt.state.get(param, None) + exp_avg = state_param.get("exp_avg", None) + exp_avg_sq = state_param.get("exp_avg_sq", None) + if exp_avg is None or exp_avg_sq is None: + print_warn_log(f"exp_avg or exp_avg_sq of {name} is None, maybe something wrong happened.") + continue + if monitor.mv_distribution: + exp_avg_dict[name] = exp_avg + exp_avg_sq_dict[name] = exp_avg_sq + if monitor.mg_direction: + exp_avg_dict[name] = exp_avg + if monitor.ur_distribution: + update_dict[name] = exp_avg / (torch.sqrt(exp_avg_sq) + torch_opt.defaults['eps']) + ratio_dict[name] = (exp_avg / torch.sqrt(exp_avg_sq)).nan_to_num(0) + monitor.update_heatmap_visualizer[name].pre_cal(update_dict[name]) + monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) + return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) + + +class DummyOptimizerMon(OptimizerMon): def fetch_mv(self, monitor, torch_opt, params2name): - return None, None, None, None + return MVResult(exp_avg=None, exp_avg_sq=None, update=None, ratio=None) class OptimizerMonFactory: @staticmethod - def create_optimizer_mon(opt_ty:str): + def create_optimizer_mon(opt_ty: str): if opt_ty == "Megatron_Float16OptimizerWithFloat16Params": - return MixPrecsionOptimizerMon() + return MixPrecisionOptimizerMon() if opt_ty == "Megatron_DistributedOptimizer": return MegatronDistributedOptimizerMon() + if opt_ty == "Megatron_FP32Optimizer": + return MegatronFP32OptimizerMon() if opt_ty is None or opt_ty == "unknown": return DummyOptimizerMon() raise Exception("opt_ty should be Megatron_Float16OptimizerWithFloat16Params or Megatron_DistributedOptimizer or None or unknown") diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py new file mode 100644 index 00000000000..ddea3244f5c --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py @@ -0,0 +1,145 @@ +import sys +import os +import re +import argparse +import pandas as pd +from glob import glob +from collections import defaultdict + + +def parse_logfile(logfile): + grad_norm = [] + step = [] + with open(logfile) as f: + for line in f.readlines(): + if 'consumed samples' in line: + grad_norm.append(float(re.findall('(?<=grad norm\: )[\d\.]*', line)[0])) + # step = int(re.findall('(?<=iteration)[ \d]*', line)[0]) + return grad_norm + + +def parse_monitor_output(output_dir): + reduced = {} + unreduced = {} + for dir in glob(output_dir+'*'): + rank = int(re.findall('(?<=rank)[\d]*', dir)[0]) + unreduced[rank] = [] + reduced[rank] = [] + for file in os.listdir(dir): + # step = int(re.search("(?<=reduced\_)[\d]*", file)[0]) + # if step != 0: + # continue + df = pd.read_csv(os.path.join(dir, file)) + if '_unreduced_' in file: + unreduced[rank].append(df) + pass + elif '_reduced_' in file: + reduced[rank].append(df) + else: + print(f'unexpected file {file} in {dir}') + return reduced, unreduced + +def valid_reduce(reduced, unreduced, tp_size, dp_size, sequence_parallel): + steps = len(reduced[0]) + world_size = len(reduced) + errors = [] + for index, row in unreduced[0][0].iterrows(): + param = row['param_name'] + is_tp_duplicate = False + for step in range(2): + # sum reduced + reduced_mean = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + continue + df = reduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + if step == 0: + is_tp_duplicate = True + continue + reduced_mean += value[0] + + # sum unreduced + unreduced_mean = 0. + for rank in range(world_size): + df = unreduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + continue + unreduced_mean += list(df[df['param_name'] == param]['mean'])[0] + + unreduced_mean /= dp_size + if is_tp_duplicate and (not sequence_parallel or 'embedding' in param): + unreduced_mean /= tp_size + try: + assert_equal(unreduced_mean, reduced_mean) + except AssertionError as e: + errors.append([param, step, e, is_tp_duplicate]) + if errors: + print(errors) + else: + print(f'grad mean is in consist between unreduced grad and reduced grad monitord.') + + + +def assert_equal(a, b): + if b == 0 or a == 0: + return + if b == 0: + rel_diff = a + elif a == 0: + rel_diff = b + else: + rel_diff = abs(a/b-1) + assert rel_diff<0.01, f'{a}, {b}, {rel_diff}' + + +def valid_total_norm(total_norm, reduced, duplicate_embedding): + steps = len(total_norm) + world_size = len(reduced) + errors = [] + for step in range(steps): + calculated_norm = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + if step == 0: + print(f'rank {rank} is duplicated in dp group') + continue + for index, row in reduced[rank][step].iterrows(): + if duplicate_embedding and 'word_embedding' in row['param_name']: + continue + calculated_norm += row['norm']**2 + try: + assert_equal(calculated_norm**0.5, total_norm[step]) + except AssertionError as e: + errors.append([step, e]) + if errors: + print('total norm errors: ', errors) + else: + print('grad norm in consist between training log and reduced gradients monitored') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--monitor_output', '-m', type=str, required=True, help='path prefix to the output of monitor e.g. kj600_output/Aug12_07-16') + parser.add_argument('--logfile', '-l', type=str, required=True, help='path to the training log file') + parser.add_argument('--tp_size', '-t', type=int, required=True, help='tp parallel size') + parser.add_argument('--dp_size', '-d', type=int, required=True, help='dp parallel size') + parser.add_argument('--pp_size', '-p', type=int, required=True, help='pp parallel size') + parser.add_argument('--untie_embeddings_and_output_weights', '-u', action="store_true", default=False, help='whether untie_embeddings_and_output_weights in pp parallel') + parser.add_argument('--sequence_parallel', '-s', action="store_true", default=False, help='whether sequence parallel is enabled. Add -s to store true') + + args = parser.parse_args() + + assert args.tp_size > 0, 'if tp not enabled, set tp_size = 1' + assert args.dp_size > 0, 'if tp not enabled, set dp_size = 1' + assert args.pp_size > 0, 'if tp not enabled, set pp_size = 1' + + total_norm = parse_logfile(args.logfile) + reduced, unreduced = parse_monitor_output(args.monitor_output) + + duplicate_embedding = not args.untie_embeddings_and_output_weights and args.pp_size > 1 + + valid_total_norm(total_norm, reduced, duplicate_embedding) + valid_reduce(reduced, unreduced, args.tp_size, args.dp_size, args.sequence_parallel) \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/utils.py b/debug/accuracy_tools/kj600/kj600/utils.py index 53d47d99886..3aed6911c44 100644 --- a/debug/accuracy_tools/kj600/kj600/utils.py +++ b/debug/accuracy_tools/kj600/kj600/utils.py @@ -107,4 +107,29 @@ def check_file_valid_readable(path): def check_file_valid_writable(path): check_file_valid(path) check_path_writability(path) - \ No newline at end of file + + +def make_file_safety(file_path: str, permission=0o640): + if os.path.islink(file_path): + raise RuntimeError(f"Invalid soft link path: {file_path}") + file_real_path = os.path.realpath(file_path) + if os.path.exists(file_real_path): + return + parent_path = os.path.dirname(file_real_path) + if not os.path.exists(parent_path): + os.makedirs(parent_path, mode=0o750, exist_ok=True) + if not os.access(parent_path, os.W_OK): + raise PermissionError(f"The path {parent_path} is not writable!") + try: + os.close(os.open(file_real_path, os.O_WRONLY | os.O_CREAT, permission)) + except OSError as e: + raise RuntimeError("Can't create file: " + file_real_path) from e + os.chmod(file_real_path, permission) + + +def create_directory(dir_path): + dir_path = os.path.realpath(dir_path) + try: + os.makedirs(dir_path, mode=0o750, exist_ok=True) + except OSError as ex: + raise RuntimeError("Failed to create directory. Please check the path permission or disk space.") from ex \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/pyproject.toml b/debug/accuracy_tools/kj600/pyproject.toml index 5df96856334..dd5faebc38e 100644 --- a/debug/accuracy_tools/kj600/pyproject.toml +++ b/debug/accuracy_tools/kj600/pyproject.toml @@ -7,7 +7,6 @@ name = "kj600" version = "0.0.1" dependencies = [ "torch", - "torch_npu", "torchvision", "tensorboard", "matplotlib", @@ -16,4 +15,7 @@ dependencies = [ ] [tool.setuptools.packages] -find = {} # Scan the project directory with the default parameters \ No newline at end of file +find = {} # Scan the project directory with the default parameters + +[tool.setuptools.package-data] +kj600 = ["distributed/*.yaml"] \ No newline at end of file -- Gitee From 823d12ea7524208de02eb6f35ab91b8e402bf0db Mon Sep 17 00:00:00 2001 From: qianggee Date: Wed, 21 Aug 2024 08:31:19 +0000 Subject: [PATCH 043/333] merge from poc --- .../kj600/kj600/anomaly_analyse.py | 248 ++++++++++++++ .../kj600/kj600/anomaly_detect.py | 169 ++++++++- .../accuracy_tools/kj600/kj600/file_check.py | 324 ++++++++++++++++++ 3 files changed, 729 insertions(+), 12 deletions(-) create mode 100644 debug/accuracy_tools/kj600/kj600/anomaly_analyse.py create mode 100644 debug/accuracy_tools/kj600/kj600/file_check.py diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py new file mode 100644 index 00000000000..f6069db6fb3 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import argparse +import ast +import fcntl +import heapq +import json +import os +from pathlib import Path +import sys + +from kj600.utils import print_info_log, print_warn_log +from kj600.anomaly_detect import GradAnomalyData +from kj600.file_check import ( + change_mode, + check_link, + FileCheckConst, + check_path_before_create, + FileChecker, + FileOpen, +) + +ANOMALY_JSON = "anomaly.json" +ANALYSE_JSON = "anomaly_analyse.json" + +class AnomalyDataWriter: + """ + 异常数据写入类,负责将异常数据写入到JSON文件中。 + """ + + def __init__(self, dump_path, rank) -> None: + self.dump_path = dump_path + self.dump_rank_dir = os.path.join(self.dump_path, f"rank{rank}") + self.json_path = os.path.join(self.dump_rank_dir, ANOMALY_JSON) + + @staticmethod + def get_anomaly_dict(anomalies): + """将GradAnomalyData列表转换为json""" + anomalies_json = {} + for anomaly in anomalies: + anomalies_json.update({anomaly.get_key(): anomaly.to_dict()}) + return anomalies_json + + @staticmethod + def update_data_in_single_json(json_path, anomalies_data): + with FileOpen(json_path, "w+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + json.dump(anomalies_data, f, indent=1) + fcntl.flock(f, fcntl.LOCK_UN) + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def init_detected_json(self): + """初始化落盘文件""" + check_path_before_create(self.dump_path) + if not os.path.exists(self.dump_path): + Path(self.dump_path).mkdir( + mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True + ) + file_check = FileChecker(self.dump_path, FileCheckConst.DIR) + file_check.common_check() + + if not os.path.exists(self.dump_rank_dir): + Path(self.dump_rank_dir).mkdir( + FileCheckConst.DATA_DIR_AUTHORITY, parents=True, exist_ok=True + ) + + if os.path.exists(self.json_path): + file_check = FileChecker( + self.json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {self.json_path}.") + os.remove(self.json_path) + Path(self.json_path).touch() + change_mode(self.json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def write_detected_json(self, anomalies): + """ + 落盘异常数据 + Args: + anomalies: GradAnomalyData对象列表 + """ + anomalies_json = self.get_anomaly_dict(anomalies) + print_info_log(f"{ANOMALY_JSON} is at {self.dump_rank_dir}.") + if Path(self.json_path).exists() and os.path.getsize(self.json_path) > 0: + with FileOpen(self.json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_to_write = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + else: + data_to_write = {} + data_to_write.update(anomalies_json) + self.update_data_in_single_json(self.json_path, data_to_write) + + +class AnomalyDataLoader: + def __init__(self, data_path) -> None: + self.data_path = data_path + + @staticmethod + def create_instances_from_dict(anomalies_dict: dict): + instances = [] + for values in anomalies_dict.values(): + try: + instances.append(GradAnomalyData(**values)) + except KeyError as e: + print_warn_log(f"Missing key in anomaly data: {e}") + except ValueError as e: + print_warn_log( + f"Value error when creating a GradAnomalyData instance: {e}" + ) + return instances + + def get_anomalies_from_jsons(self): + """遍历文件夹,从rankK/anomaly.json中读取异常数据 + return: anomalies: GradAnomalyData对象列表 + """ + anomalies = [] + check_link(self.data_path) + for rank_dir in os.listdir(self.data_path): + rank_path = os.path.join(self.data_path, rank_dir) + if not os.path.isdir(rank_path): + continue + json_path = os.path.join(rank_path, ANOMALY_JSON) + if not os.path.exists(json_path): + continue + with FileOpen(json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_anomalies = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + instances = self.create_instances_from_dict(data_anomalies) + anomalies.extend(instances) + return anomalies + + +class AnomalyAnalyse: + def __init__(self) -> None: + self.sorted_anomalies = [] + + def get_range_top_K(self, topk, step_list, anomalies): + """ + 获取前topk个step_list范围内的异常。 + """ + if not step_list: + filtered_anomalies = anomalies + else: + filtered_anomalies = [ + anomaly for anomaly in anomalies if anomaly.step in step_list + ] + if topk >= len(filtered_anomalies): + self.sorted_anomalies = sorted(filtered_anomalies) + else: + self.sorted_anomalies = list(heapq.nsmallest(topk, filtered_anomalies)) + return self.sorted_anomalies + + def rewrite_sorted_anomalies(self, output_path): + """ + 将排序后的异常数据重新落盘 + """ + file_check = FileChecker( + output_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + + sorted_data = AnomalyDataWriter.get_anomaly_dict(self.sorted_anomalies) + print_info_log(f"{ANALYSE_JSON} is at {output_path}.") + json_path = os.path.join(output_path, ANALYSE_JSON) + if os.path.exists(json_path): + file_check = FileChecker( + json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {json_path}.") + os.remove(json_path) + Path(json_path).touch() + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + AnomalyDataWriter.update_data_in_single_json(json_path, sorted_data) + + +def _get_parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--data_path", dest="data_path_dir", default="./", type=str, + help=" The anomaly detect result dictionary: generate from kj600 tool.", + required=True, + ) + parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, + help=" The analyse task result out path.", + required=False, + ) + parser.add_argument("-k", "--topk", dest="top_k_number", default=8, type=int, + help=" Top K number of earliest anomalies.", + required=False, + ) + parser.add_argument("-s", "--step", dest="step_list", default=[], type=str, + help=" Analyse which steps.", + required=False, + ) + return parser.parse_args(sys.argv[1:]) + +def _get_step_and_stop(args): + try: + step_list = ast.literal_eval(args.step_list) + if not isinstance(step_list, list): + raise ValueError(f"{args.step_list} is not a list") + except (ValueError, SyntaxError, RecursionError) as e: + raise Exception( + f"The step list must be a resolvable list type" + ) from e + if args.top_k_number <= 0: + raise Exception("The top k number must be greater than 0.") + return step_list, args.top_k_number + +def _anomaly_analyse(): + args = _get_parse_args() + step_list, top_k_number = _get_step_and_stop(args) + loader = AnomalyDataLoader(args.data_path_dir) + anomalies = loader.get_anomalies_from_jsons() + analyser = AnomalyAnalyse() + top_anomalies = analyser.get_range_top_K( + top_k_number, step_list, anomalies + ) + analyser.rewrite_sorted_anomalies( + args.out_path if args.out_path else args.data_path_dir + ) + + print_info_log(f"Top {top_k_number} anomalies are listed as follows:") + for index, anomaly in enumerate(top_anomalies): + print_info_log(f"{index}: {anomaly.message}") + + +if __name__ == "__main__": + _anomaly_analyse() + print_info_log("Analyse task completed.") diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_detect.py b/debug/accuracy_tools/kj600/kj600/anomaly_detect.py index cbd7b6daa2f..46b6f32f18f 100644 --- a/debug/accuracy_tools/kj600/kj600/anomaly_detect.py +++ b/debug/accuracy_tools/kj600/kj600/anomaly_detect.py @@ -1,10 +1,16 @@ +import os +import sys import statistics as st from abc import ABC from typing import List -import sys -from torch.utils.tensorboard import SummaryWriter from collections import defaultdict -from kj600.utils import print_info_log +from dataclasses import dataclass, field +import pandas as pd +from torch.utils.tensorboard import SummaryWriter +from kj600.utils import print_info_log, check_file_valid_writable, make_file_safety, create_directory +from kj600.const import Const +from kj600.file_check import change_mode, FileCheckConst + class ScanRule(ABC): def apply(self, history, cur): @@ -59,15 +65,101 @@ class bcolors: BOLD = '\033[1m' UNDERLINE = '\033[4m' -class SummaryWriterWithAD(SummaryWriter): - def __init__(self, path, ad_rules, job_id, anomaly_inform=False): - super().__init__(path) +class AnomalyDataFactory(ABC): + def __init__(self, rank, pp_stage, group_mates): + super().__init__() + self.rank = rank + self.pp_stage = pp_stage + self.group_mates = group_mates + self.micro_step = 0 + self.vpp_stage = 0 + self.name2callid = {} + + def set_call_id(self, name2callid): + """根据当前GradContext信息更新call_id vpp_stage等信息 + """ + self.name2callid = name2callid + + def create(self, tag_name, message, step): + """如果检查出异常, 调用当前接口生成GradAnomalyData实例 + """ + param_name = tag_name.split('/')[0] + call_id = self.name2callid.get(param_name,-1) + if Const.vpp in param_name: + vpp_stage = int(param_name.lstrip(Const.vpp).split(Const.vpp_sep)[0]) + else: + vpp_stage = 0 + + return GradAnomalyData( + self.rank, + step, + self.micro_step, + self.pp_stage, + self.vpp_stage, + call_id, + tag_name, + message, + self.group_mates + ) + +@dataclass(eq=True) +class GradAnomalyData: + rank: int = 0 + step: int = 0 + micro_step: int = 0 + pp_stage: int = 0 + vpp_stage: int = 0 + call_id: int = 0 + tag_name: str = field(default=None, compare=False) + message: str = field(default="", compare=False) + group_mates: list = field(default=None, compare=False) + + def __lt__(self, other): + if not isinstance(other, GradAnomalyData): + return NotImplemented + if self.step != other.step: + return self.step < other.step + if self.micro_step != other.micro_step: + return self.micro_step < other.micro_step + if self.pp_stage != other.pp_stage: + return self.pp_stage > other.pp_stage + if self.vpp_stage != other.vpp_stage: + return self.vpp_stage > other.vpp_stage + if self.call_id != other.call_id: + return self.call_id < other.call_id + return False + + def __le__(self, other): + if not isinstance(other, GradAnomalyData): + return NotImplemented + return self == other or self < other + + def to_dict(self): + return self.__dict__ + + def get_key(self): + return ''.join( + (str(self.tag_name), "_step_", str(self.step), "_call_" , str(self.call_id))) + +class BaseWriterWithAD: + def __init__(self, path, ad_rules, job_id, anomaly_inform=False, anomaly_factory=None, ndigits=6): self.tag2scalars = defaultdict(list) self.ad_rules = ad_rules self.job_id = job_id self.anomaly_inform = anomaly_inform - - def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False, double_precision=False): + self.anomaly_factory = anomaly_factory + self.anomalies = [] + self.ndigits = ndigits + + def get_anomalies(self): + """返回已检测到的异常列表 + """ + return self.anomalies + + def clear_anomalies(self): + self.anomalies.clear() + + def add_scalar(self, tag, scalar_value, global_step=None): new_avg = avg = scalar_value if tag in self.tag2scalars: N = len(self.tag2scalars[tag]) @@ -76,11 +168,64 @@ class SummaryWriterWithAD(SummaryWriter): self.tag2scalars[tag].append((scalar_value, new_avg)) detected, rule_name = self._ad(scalar_value, history=avg) if detected: - print_info_log(f"{bcolors.WARNING}> Rule {rule_name} reports anomaly signal in {tag} at step {global_step}.{bcolors.ENDC}") - exception_message = f"{bcolors.WARNING}> Rule {rule_name} reports anomaly signal in {tag} at step {global_step}.{bcolors.ENDC}" + exception_message = f"Rule {rule_name} reports anomaly signal in {tag} at step {global_step}." + print_info_log(f"{bcolors.WARNING}> {exception_message}{bcolors.ENDC}") if self.anomaly_inform: self.anomaly_inform.run(exception_message, self.job_id) - return super().add_scalar(tag, scalar_value, global_step, walltime, new_style, double_precision) - + + if self.anomaly_factory: + self.anomalies.append(self.anomaly_factory.create(tag, exception_message, global_step)) + def _ad(self, scalar_value, history): return AnomalyScanner.scan(self.ad_rules, history, cur=scalar_value) + + +class CSVWriterWithAD(BaseWriterWithAD): + def __init__(self, path, ad_rules, job_id, anomaly_inform=False, anomaly_factory=None, ndigits=6): + super().__init__(path, ad_rules, job_id, anomaly_inform, anomaly_factory, ndigits) + + self.log_dir = path + create_directory(path) + self.context_dict = defaultdict(list) + self.header = [] + + def write_csv(self, prefix, step): + if len(self.context_dict) == 0: + return + filepath = os.path.join(self.log_dir, f'{prefix}_{step}.csv') + if not os.path.exists(filepath): + make_file_safety(filepath) + data_frame = pd.DataFrame(columns=self.header) + data_frame.to_csv(filepath, index=False) + change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY) + + check_file_valid_writable(filepath) + new_data = [] + for name, metric_value in self.context_dict.items(): + if Const.vpp not in name: + new_data.append([name]+metric_value) + else: + new_data.append(name.lstrip(Const.vpp).split(Const.vpp_sep)+metric_value) + new_data = pd.DataFrame(new_data) + new_data.to_csv(filepath, mode='a+', header=False, index=False) + self.context_dict = defaultdict(list) + + def add_scalar(self, tag, scalar_value, global_step): + super().add_scalar(tag, scalar_value, global_step) + + name = tag.split('/')[0] + self.context_dict[name].append(round(scalar_value, self.ndigits)) + + def close(self): + pass + +class SummaryWriterWithAD(SummaryWriter, BaseWriterWithAD): + def __init__(self, path, ad_rules, job_id, anomaly_inform=False, anomaly_factory=None, ndigits=6): + super(SummaryWriter, self).__init__(path, ad_rules, job_id, anomaly_inform, anomaly_factory, ndigits) + super().__init__(path) + change_mode(path, FileCheckConst.DATA_DIR_AUTHORITY) + + def add_scalar(self, tag, scalar_value, global_step): + super(SummaryWriter, self).add_scalar(tag, scalar_value, global_step) + return super().add_scalar(tag, scalar_value, global_step) + \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py new file mode 100644 index 00000000000..21f9e351a2f --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/file_check.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import os +import re + +from kj600.utils import print_info_log + + +class CodedException(Exception): + def __init__(self, code, error_info=""): + super().__init__() + self.code = code + self.error_info = self.err_strs.get(code) + error_info + + def __str__(self): + return self.error_info + + +class FileCheckException(CodedException): + INVALID_FILE_ERROR = 0 + FILE_PERMISSION_ERROR = 1 + SOFT_LINK_ERROR = 2 + ILLEGAL_PATH_ERROR = 3 + ILLEGAL_PARAM_ERROR = 4 + FILE_TOO_LARGE_ERROR = 5 + + err_strs = { + SOFT_LINK_ERROR: "[kj600] 检测到软链接: ", + FILE_PERMISSION_ERROR: "[kj600] 文件权限错误: ", + INVALID_FILE_ERROR: "[kj600] 无效文件: ", + ILLEGAL_PATH_ERROR: "[kj600] 非法文件路径: ", + ILLEGAL_PARAM_ERROR: "[kj600] 非法打开方式: ", + FILE_TOO_LARGE_ERROR: "[kj600] 文件过大: ", + } + + +class FileCheckConst: + """ + Class for file check const + """ + + READ_ABLE = "read" + WRITE_ABLE = "write" + READ_WRITE_ABLE = "read and write" + DIRECTORY_LENGTH = 4096 + FILE_NAME_LENGTH = 255 + FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" + FILE_PATTERN = r"^[a-zA-Z0-9_./-]+$" + JSON_SUFFIX = ".json" + MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 + DIR = "dir" + FILE = "file" + DATA_DIR_AUTHORITY = 0o750 + DATA_FILE_AUTHORITY = 0o640 + FILE_SIZE_DICT = { + JSON_SUFFIX: MAX_JSON_SIZE, + } + + +class FileChecker: + """ + The class for check file. + + Attributes: + file_path: The file or dictionary path to be verified. + path_type: file or dictionary + ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability + file_type(str): The correct file type for file + """ + + def __init__( + self, file_path, path_type, ability=None, file_type=None, is_script=True + ): + self.file_path = file_path + self.path_type = self._check_path_type(path_type) + self.ability = ability + self.file_type = file_type + self.is_script = is_script + + @staticmethod + def _check_path_type(path_type): + if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: + print_info_log( + f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." + ) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + return path_type + + def common_check(self): + """ + 功能:用户校验基本文件权限:软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符 + 注意:文件后缀的合法性,非通用操作,可使用其他独立接口实现 + """ + check_path_exists(self.file_path) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + check_path_type(self.file_path, self.path_type) + self.check_path_ability() + if self.is_script: + check_path_owner_consistent(self.file_path) + check_path_pattern_vaild(self.file_path) + check_common_file_size(self.file_path) + check_file_suffix(self.file_path, self.file_type) + return self.file_path + + def check_path_ability(self): + if self.ability == FileCheckConst.WRITE_ABLE: + check_path_writability(self.file_path) + if self.ability == FileCheckConst.READ_ABLE: + check_path_readability(self.file_path) + if self.ability == FileCheckConst.READ_WRITE_ABLE: + check_path_readability(self.file_path) + check_path_writability(self.file_path) + + +class FileOpen: + """ + The class for open file by a safe way. + + Attributes: + file_path: The file or dictionary path to be opened. + mode(str): The file open mode + """ + + SUPPORT_READ_MODE = ["r", "rb"] + SUPPORT_WRITE_MODE = ["w", "wb", "a", "ab"] + SUPPORT_READ_WRITE_MODE = ["r+", "rb+", "w+", "wb+", "a+", "ab+"] + + def __init__(self, file_path, mode, encoding="utf-8"): + self.file_path = file_path + self.mode = mode + self.encoding = encoding + self._handle = None + + def __enter__(self): + self.check_file_path() + binary_mode = "b" + if binary_mode not in self.mode: + self._handle = open(self.file_path, self.mode, encoding=self.encoding) + else: + self._handle = open(self.file_path, self.mode) + return self._handle + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._handle: + self._handle.close() + + def check_file_path(self): + support_mode = ( + self.SUPPORT_READ_MODE + + self.SUPPORT_WRITE_MODE + + self.SUPPORT_READ_WRITE_MODE + ) + if self.mode not in support_mode: + print_info_log("File open not support %s mode" % self.mode) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + self.check_ability_and_owner() + check_path_pattern_vaild(self.file_path) + if os.path.exists(self.file_path): + check_common_file_size(self.file_path) + + def check_ability_and_owner(self): + if self.mode in self.SUPPORT_READ_MODE: + check_path_exists(self.file_path) + check_path_readability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_WRITE_MODE and os.path.exists(self.file_path): + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_READ_WRITE_MODE and os.path.exists(self.file_path): + check_path_readability(self.file_path) + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + + +def check_link(path): + abs_path = os.path.abspath(path) + if os.path.islink(abs_path): + print_info_log("The file path {} is a soft link.".format(path)) + raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) + + +def check_path_length(path, name_length=None): + file_max_name_length = ( + name_length if name_length else FileCheckConst.FILE_NAME_LENGTH + ) + if ( + len(path) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(path)) > file_max_name_length + ): + print_info_log("The file path length exceeds limit.") + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_exists(path): + if not os.path.exists(path): + print_info_log("The file path %s does not exist." % path) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_readability(path): + if not os.access(path, os.R_OK): + print_info_log("The file path %s is not readable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_writability(path): + if not os.access(path, os.W_OK): + print_info_log("The file path %s is not writable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_executable(path): + if not os.access(path, os.X_OK): + print_info_log("The file path %s is not executable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_other_user_writable(path): + st = os.stat(path) + if st.st_mode & 0o002: + print_info_log( + "The file path %s may be insecure because other users have write permissions. " + % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_owner_consistent(path): + file_owner = os.stat(path).st_uid + if file_owner != os.getuid(): + print_info_log( + "The file path %s may be insecure because is does not belong to you." % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_pattern_vaild(path): + if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): + print_info_log("The file path %s contains special characters." % (path)) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_file_size(file_path, max_size): + file_size = os.path.getsize(file_path) + if file_size >= max_size: + print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") + raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) + + +def check_common_file_size(file_path): + if os.path.isfile(file_path): + for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): + if file_path.endswith(suffix): + check_file_size(file_path, max_size) + break + + +def check_file_suffix(file_path, file_suffix): + if file_suffix: + if not file_path.endswith(file_suffix): + print_info_log(f"The {file_path} should be a {file_suffix} file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_type(file_path, file_type): + if file_type == FileCheckConst.FILE: + if not os.path.isfile(file_path): + print_info_log(f"The {file_path} should be a file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + if file_type == FileCheckConst.DIR: + if not os.path.isdir(file_path): + print_info_log(f"The {file_path} should be a dictionary!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_before_create(path): + if path_len_exceeds_limit(path): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, "The file path length exceeds limit." + ) + + if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, + "The file path {} contains special characters.".format(path), + ) + + +def change_mode(path, mode): + if not os.path.exists(path) or os.path.islink(path): + return + try: + os.chmod(path, mode) + except PermissionError as ex: + raise FileCheckException( + FileCheckException.FILE_PERMISSION_ERROR, + "Failed to change {} authority. {}".format(path, str(ex)), + ) from ex + + +def path_len_exceeds_limit(file_path): + return ( + len(os.path.realpath(file_path)) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(file_path)) > FileCheckConst.FILE_NAME_LENGTH + ) -- Gitee From b040901cd019d2d8a52a77a12ee204bc8509b4dd Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Mon, 19 Aug 2024 19:48:59 +0800 Subject: [PATCH 044/333] add_kernels_and_communications --- .../module_visualization/graph/prof_node.py | 46 +++++++-- .../graph_build/prof_graph_builder.py | 71 ++++++++++--- .../prof_parse/prof_data_pre_process.py | 99 ++++++++++++------- profiler/prof_common/base_node.py | 4 + profiler/prof_common/constant.py | 6 ++ profiler/prof_common/file_reader.py | 27 +++++ profiler/prof_common/kernel_bean.py | 43 ++++++++ profiler/prof_common/trace_event_bean.py | 24 ++++- profiler/prof_common/tree_builder.py | 6 +- 9 files changed, 268 insertions(+), 58 deletions(-) create mode 100644 profiler/prof_common/kernel_bean.py diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index 7d96a49691c..3588a8b81b1 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -18,39 +18,49 @@ from profiler.prof_common.trace_event_bean import TraceEventBean class ProfNode(BaseNode): - MODULE_TYPE = 1 def __init__(self, event: TraceEventBean, parent_node=None): super().__init__(event, parent_node) self._kernel_total_list = [] + self._communication_total_list = [] self._precision_index = 1 @property def node_id(self): return self._event.unique_id + @property + def node_type(self): + if self._event.event_type is None: + return Constant.VIRTUAL_TYPE + return self._event.event_type + @property def total_kernels(self): return self._kernel_total_list @property def host_total_dur(self): - if self.is_root_node: + if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: return sum((node.host_total_dur for node in self.child_nodes)) return self._event.dur @property def host_self_dur(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return 0 return self.host_total_dur - sum((node.host_total_dur for node in self.child_nodes)) @property def device_total_dur(self): - if self.is_root_node: + if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: return sum((node.device_total_dur for node in self.child_nodes)) return sum((kernel.dur for kernel in self._kernel_total_list)) @property def device_self_dur(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return 0 return self.device_total_dur - sum((node.device_total_dur for node in self.child_nodes)) @property @@ -64,6 +74,22 @@ class ProfNode(BaseNode): data["Input type"] = input_type return data + @property + def kernel_data(self) -> list: + if self.node_type == Constant.VIRTUAL_TYPE: + return [kernel for node in self.child_nodes for kernel in node.kernel_data] + return [kernel.kernel_info for kernel in self.total_kernels] + + @property + def communication_data(self) -> list: + if self.node_type == Constant.VIRTUAL_TYPE: + return [comm for node in self.child_nodes for comm in node.communication_data] + return [[comm.name, comm.dur] for comm in self._communication_total_list] + + @property + def overall_data(self): + return {"Computing Time(ms)": 1, "Uncovered Communication Time(ms)": 1, "Free Time(ms)": 1} + @property def data(self): return {"Input Data": self.input_data, @@ -71,12 +97,14 @@ class ProfNode(BaseNode): "Host Self Duration(us)": round(self.host_self_dur, 2), "Host Total Duration(us)": round(self.host_total_dur, 2), "Device Self Duration(us)": round(self.device_self_dur, 2), - "Device Total Duration(us)": round(self.device_total_dur, 2)} + "Device Total Duration(us)": round(self.device_total_dur, 2), + "kernels": self.kernel_data, + "communications": self.communication_data} @property def info(self): return {"id": self.node_id, - "node_type": self.MODULE_TYPE, + "node_type": self.node_type, "data": self.data, "upnode": self.parent_node.node_id if self.parent_node else "None", "subnodes": [node.node_id for node in iter(self.child_nodes)]} @@ -96,9 +124,15 @@ class ProfNode(BaseNode): def update_child_nodes(self, node): self._child_nodes.append(node) + def reset_child_nodes(self, nodes): + self._child_nodes = nodes + def update_kernel_total_list(self, kernel_list: list): self._kernel_total_list.extend(kernel_list) + def update_communication_total_list(self, communication_list: list): + self._communication_total_list.extend(communication_list) + def update_child_precision_index(self): if not self.child_nodes: return @@ -106,4 +140,4 @@ class ProfNode(BaseNode): min_dur = min((node.device_total_dur for node in self.child_nodes)) diff_dur = max_dur - min_dur for node in self.child_nodes: - node.precision_index = 1- (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 + node.precision_index = 1 - (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index a1bd6ba000e..331e0cb050c 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -30,7 +30,9 @@ class ProfGraphBuilder: min_start = min((op.start_time for op in iter(op_list))) max_end = max((op.end_time for op in iter(op_list))) # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-1 +2处理 - return TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) + event = TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) + event.event_type = Constant.MODULE_TYPE + return event @classmethod def _trans_flow_to_dict(cls, flow_events: dict, end_events: list) -> dict: @@ -48,6 +50,31 @@ class ProfGraphBuilder: result_data.setdefault(start_point.start_time, []).append(end_event) return result_data + @classmethod + def _create_virtual_node(cls, root_node: ProfNode): + virtual_nodes = [] + first_level_nodes = root_node.child_nodes + root_node.reset_child_nodes([]) + merged_nodes = [] + order_id = 1 + for node in first_level_nodes: + if node.node_type == Constant.OPERATOR_TYPE: + merged_nodes.append(node) + continue + if len(merged_nodes) >= 2: + virtual_node = ProfNode(TraceEventBean({}, f"Operators_Between_Modules_{order_id}"), root_node) + root_node.update_child_nodes(virtual_node) + order_id += 1 + for op_node in merged_nodes: + op_node.parent_node = virtual_node + virtual_node.update_child_nodes(op_node) + virtual_nodes.append(virtual_node) + elif len(merged_nodes) == 1: + root_node.update_child_nodes(merged_nodes[0]) + root_node.update_child_nodes(node) + merged_nodes = [] + return virtual_nodes + def build_graph(self): self._prof_data = ProfDataPreProcess(self._prof_data_path).run() all_data = [*self._prof_data.get(Constant.MODULE_EVENT, []), @@ -59,19 +86,17 @@ class ProfGraphBuilder: order_id = name_dict.get(event.name, 0) event.set_id(f"{event.name}_{order_id}") name_dict[event.name] = order_id + 1 - root_node = TreeBuilder.build_tree(all_data, ProfNode, TraceEventBean({}, Constant.NPU_ROOT_ID)) - kernel_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), - self._prof_data.get(Constant.KERNEL_EVENT, [])) - for start_time, kernels in kernel_flow_dict.items(): - matched_node = root_node.binary_search(start_time) - while matched_node != Constant.INVALID_RETURN: - matched_node.update_kernel_total_list(kernels) - matched_node = matched_node.binary_search(start_time) - all_data = root_node.find_all_child_nodes() - all_data.append(root_node) - for node in all_data: + all_nodes = TreeBuilder.build_tree(all_data, ProfNode, TraceEventBean({}, Constant.NPU_ROOT_ID)) + if len(all_nodes) < 2: + msg = "Failed to build graph." + raise RuntimeError(msg) + self._update_kernel_details(all_nodes[0]) + self._update_communication_details(all_nodes[0]) + virtual_nodes = self._create_virtual_node(all_nodes[0]) + all_nodes.extend(virtual_nodes) + for node in all_nodes: node.update_child_precision_index() - return all_data + return all_nodes def find_bwd_module(self) -> list: bwd_module_list = [] @@ -102,7 +127,7 @@ class ProfGraphBuilder: pre_status = Constant.FWD_OR_OPT # 通过连线匹配正向module,构建出反向的整体module关系 - root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({})) + root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({}))[0] fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, cpu_op_list) for start_time, end_events in fwdbwd_flow_dict.items(): matched_node = root_node.binary_search(start_time) @@ -115,3 +140,21 @@ class ProfGraphBuilder: bwd_module_list.append( self._create_event_bean_from_ops(module_node.bwd_op_list, f"{module_node.name} [BACKWARD]")) return bwd_module_list + + def _update_kernel_details(self, root_node): + kernel_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), + self._prof_data.get(Constant.KERNEL_EVENT, [])) + for start_time, kernels in kernel_flow_dict.items(): + matched_node = root_node.binary_search(start_time) + while matched_node != Constant.INVALID_RETURN: + matched_node.update_kernel_total_list(kernels) + matched_node = matched_node.binary_search(start_time) + + def _update_communication_details(self, root_node): + communication_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), + self._prof_data.get(Constant.HCCL_EVENT, [])) + for start_time, communications in communication_flow_dict.items(): + matched_node = root_node.binary_search(start_time) + while matched_node != Constant.INVALID_RETURN: + matched_node.update_communication_total_list(communications) + matched_node = matched_node.binary_search(start_time) diff --git a/profiler/module_visualization/prof_parse/prof_data_pre_process.py b/profiler/module_visualization/prof_parse/prof_data_pre_process.py index 9dc820e4ca5..c16daaecd71 100644 --- a/profiler/module_visualization/prof_parse/prof_data_pre_process.py +++ b/profiler/module_visualization/prof_parse/prof_data_pre_process.py @@ -12,10 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging import os from profiler.prof_common.file_reader import FileReader from profiler.prof_common.constant import Constant +from profiler.prof_common.kernel_bean import KernelBean from profiler.prof_common.trace_event_bean import TraceEventBean @@ -23,13 +25,23 @@ class ProfDataPreProcess: def __init__(self, prof_data_path: str): self._prof_data_path = prof_data_path self._trace_path = "" + self._kernel_details_path = "" self._kernel_pid = None + self._hccl_pid = None self._result_data = {Constant.CPU_OP_EVENT: [], Constant.MODULE_EVENT: [], Constant.KERNEL_EVENT: [], - Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}} + Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: []} + + @staticmethod + def _check_trace_data(trace_data): + if not isinstance(trace_data, list): + msg = f"Invalid profiling data path, this feature only supports performance data " \ + f"collected by Ascend PyTorch Profiler." + raise RuntimeError(msg) def run(self) -> dict: self._check_trace_path() self._parse_trace_events() + self._parse_kernel_details() self._check_result_data() return self._result_data @@ -50,53 +62,68 @@ class ProfDataPreProcess: msg = f"Invalid profiling path: {self._prof_data_path}. The data path should be the " \ f"folder that ends with the ascend_pt collected by the Ascend PyTorch Profiler." raise RuntimeError(msg) + kernel_path = os.path.join(profiler_output, "kernel_details.csv") + if os.path.isfile(kernel_path): + self._kernel_details_path = kernel_path self._trace_path = json_path def _parse_trace_events(self): trace_data = FileReader.read_json_file(self._trace_path) self._check_trace_data(trace_data) - iter_trace_data = iter(trace_data) - for event in iter_trace_data: - bean = TraceEventBean(event) - if bean.is_optimizer(): - self._result_data[Constant.MODULE_EVENT].append(bean) - elif bean.is_cpu_op(): - if not bean.is_step(): - self._result_data[Constant.CPU_OP_EVENT].append(bean) - elif bean.is_nn_module(): - self._result_data[Constant.MODULE_EVENT].append(bean) - elif bean.is_torch_to_npu(): - if bean.is_flow_start(): - self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(bean.id, {})["start"] = bean - else: - self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(bean.id, {})["end"] = bean - elif bean.is_fwd_bwd_flow(): - if bean.is_flow_start(): - self._result_data[Constant.FWD_BWD_FLOW].setdefault(bean.id, {})["start"] = bean - else: - self._result_data[Constant.FWD_BWD_FLOW].setdefault(bean.id, {})["end"] = bean - elif bean.is_kernel_event(self._kernel_pid): - self._result_data[Constant.KERNEL_EVENT].append(bean) - - def _check_trace_data(self, trace_data): - if not isinstance(trace_data, list): - msg = f"Invalid profiling data path, this feature only supports performance data " \ - f"collected by Ascend PyTorch Profiler." - raise RuntimeError(msg) - iter_trace_data = iter(trace_data) + iter_trace_data = [TraceEventBean(data) for data in trace_data] for event in iter_trace_data: - bean = TraceEventBean(event) - if bean.is_npu_process(): - self._kernel_pid = bean.pid + if self._kernel_pid is not None and self._hccl_pid is not None: break + if not event.is_meta(): + continue + if event.is_npu_process(): + self._kernel_pid = event.pid + elif event.is_hccl_process(): + self._hccl_pid = event.pid if self._kernel_pid is None: - msg = f"There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." + msg = "There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." raise RuntimeError(msg) + for event in iter_trace_data: + if event.is_optimizer(): + event.event_type = Constant.MODULE_TYPE + self._result_data[Constant.MODULE_EVENT].append(event) + elif event.is_cpu_op(): + if not event.is_step(): + event.event_type = Constant.OPERATOR_TYPE + self._result_data[Constant.CPU_OP_EVENT].append(event) + elif event.is_nn_module(): + event.event_type = Constant.MODULE_TYPE + self._result_data[Constant.MODULE_EVENT].append(event) + elif event.is_torch_to_npu(): + if event.is_flow_start(): + self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(event.id, {})["start"] = event + else: + self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(event.id, {})["end"] = event + elif event.is_fwd_bwd_flow(): + if event.is_flow_start(): + self._result_data[Constant.FWD_BWD_FLOW].setdefault(event.id, {})["start"] = event + else: + self._result_data[Constant.FWD_BWD_FLOW].setdefault(event.id, {})["end"] = event + elif event.is_kernel_event(self._kernel_pid): + self._result_data[Constant.KERNEL_EVENT].append(event) + elif event.is_hccl_event(self._hccl_pid): + self._result_data[Constant.HCCL_EVENT].append(event) + + def _parse_kernel_details(self): + if not self._kernel_details_path: + return + try: + all_kernels = FileReader.read_csv_file(self._kernel_details_path, KernelBean) + except Exception as e: + logging.error(e) + kernels = list(filter(lambda x: x.is_computing_op, all_kernels)) + if kernels: + self._result_data[Constant.KERNEL_EVENT] = kernels def _check_result_data(self): if not self._result_data.get(Constant.CPU_OP_EVENT): - msg = f"This data does not have any aten operator, please make sure to enable the CPU switch." + msg = "This data does not have any aten operator, please make sure to enable the CPU switch." raise RuntimeError(msg) if not self._result_data.get(Constant.MODULE_EVENT): - msg = f"This data does not collect any modules, please make sure to turn on the with_stack switch." + msg = "This data does not collect any modules, please make sure to enable the with_stack or with_modules." raise RuntimeError(msg) diff --git a/profiler/prof_common/base_node.py b/profiler/prof_common/base_node.py index b7cd6780003..1e122943281 100644 --- a/profiler/prof_common/base_node.py +++ b/profiler/prof_common/base_node.py @@ -47,6 +47,10 @@ class BaseNode: def end_time(self) -> Decimal: return self._event.end_time + @parent_node.setter + def parent_node(self, parent_node): + self._parent_node = parent_node + def update_child_nodes(self, node): self._child_nodes.append(node) diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 87bc51b56bc..90ec6d006e5 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -23,9 +23,15 @@ class Constant(object): CPU_OP_EVENT = "op_event" TORCH_TO_NPU_FLOW = "torch_to_device" KERNEL_EVENT = "kernel_event" + HCCL_EVENT = "hccl_event" FWD_BWD_FLOW = "fwd_to_bwd" NPU_ROOT_ID = "NPU" FWD_OR_OPT = 0 BACKWARD = 1 INVALID_RETURN = -1 + + # node type + MODULE_TYPE = 0 + OPERATOR_TYPE = 1 + VIRTUAL_TYPE = 9 diff --git a/profiler/prof_common/file_reader.py b/profiler/prof_common/file_reader.py index d8a9c8fb4d6..9a225131f94 100644 --- a/profiler/prof_common/file_reader.py +++ b/profiler/prof_common/file_reader.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import csv import json import logging import os @@ -57,3 +58,29 @@ class FileReader: file.write(json.dumps(data, indent=indent)) except Exception as e: raise RuntimeError(f"Can't create the file: {output_path}") from e + + @classmethod + def read_csv_file(cls, file_path: str, bean_class: any = None) -> any: + PathManager.check_path_readable(file_path) + if not os.path.isfile(file_path): + raise FileNotFoundError("File not exists.") + file_size = os.path.getsize(file_path) + if file_size <= 0: + return [] + if file_size > Constant.MAX_FILE_SIZE_5_GB: + check_msg = input( + f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") + if check_msg.lower() != "y": + logging.warning(f"The user choose not to read the file: %s", file_path) + return [] + result_data = [] + try: + with open(file_path, newline="") as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + row_data = bean_class(row) if bean_class else row + result_data.append(row_data) + except Exception as e: + msg = f"Failed to read the file: {file_path}" + raise RuntimeError(msg) from e + return result_data diff --git a/profiler/prof_common/kernel_bean.py b/profiler/prof_common/kernel_bean.py new file mode 100644 index 00000000000..cbfa10c0a92 --- /dev/null +++ b/profiler/prof_common/kernel_bean.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from profiler.prof_common.utils import convert_to_decimal + + +class KernelBean: + def __init__(self, data: dict): + self._name = data.get("Name", "") + self._op_type = data.get("Type", "") + self._core_type = data.get("Accelerator Core", "") + self._input_shape = data.get("Input Shapes", "").replace("\"", "") + self._input_type = data.get("Input Data Types", "") + self._input_format = data.get("Input Formats", "") + self._duration = data.get("Duration(us)", 0) + self._ts = data.get("Start Time(us)", "") + + @property + def start_time(self): + return convert_to_decimal(self._ts) + + @property + def is_computing_op(self): + return self._core_type != "HCCL" + + @property + def dur(self): + return float(self._duration) + + @property + def kernel_info(self): + return [self._name, self._op_type, self._core_type, self._input_shape, self._input_type, self.dur] diff --git a/profiler/prof_common/trace_event_bean.py b/profiler/prof_common/trace_event_bean.py index 2d4b96e4f6a..0aee79907ba 100644 --- a/profiler/prof_common/trace_event_bean.py +++ b/profiler/prof_common/trace_event_bean.py @@ -19,9 +19,10 @@ from profiler.prof_common.analyze_dict import AnalyzeDict class TraceEventBean(AnalyzeDict): - def __init__(self, data: dict, unique_id: int = None): + def __init__(self, data: dict, unique_id: str = None): super().__init__(data) self._id = unique_id + self._type = None @property def unique_id(self): @@ -35,6 +36,18 @@ class TraceEventBean(AnalyzeDict): def end_time(self) -> Decimal: return self.start_time + convert_to_decimal(self.dur) + @property + def kernel_info(self): + return [self.name, self.args.get("Task Type", ""), self.dur] + + @property + def event_type(self): + return self._type + + @event_type.setter + def event_type(self, event_type): + self._type = event_type + def set_id(self, name_id): self._id = name_id @@ -62,8 +75,17 @@ class TraceEventBean(AnalyzeDict): def is_flow_end(self): return self.ph == "f" + def is_meta(self): + return self.ph == "M" + def is_kernel_event(self, kernel_pid): return self.ph == "X" and self.pid == kernel_pid + def is_hccl_event(self, hccl_pid): + return self.ph == "X" and self.pid == hccl_pid and self.name.startswith("hcom_") + def is_npu_process(self): return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "Ascend Hardware" + + def is_hccl_process(self): + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "HCCL" diff --git a/profiler/prof_common/tree_builder.py b/profiler/prof_common/tree_builder.py index b7d3e1baf6a..b6311c1a937 100644 --- a/profiler/prof_common/tree_builder.py +++ b/profiler/prof_common/tree_builder.py @@ -19,8 +19,10 @@ class TreeBuilder: @staticmethod def build_tree(event_list: list, node_class: any, root_bean: any): root_node = node_class(root_bean) + all_nodes = [root_node] + [None] * len(event_list) event_list.sort(key=lambda x: x.start_time) last_node = root_node + index = 1 for event in event_list: while last_node: if last_node != root_node and event.start_time > last_node.end_time: @@ -28,6 +30,8 @@ class TreeBuilder: continue tree_node = node_class(event, last_node) last_node.update_child_nodes(tree_node) + all_nodes[index] = tree_node last_node = tree_node + index += 1 break - return root_node + return all_nodes -- Gitee From 8ca203d1ea616b13784a91c0895ca8727b2a029e Mon Sep 17 00:00:00 2001 From: qianggee Date: Thu, 22 Aug 2024 08:56:57 +0000 Subject: [PATCH 045/333] reduce circular complexity --- .../accuracy_tools/kj600/kj600/module_hook.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/debug/accuracy_tools/kj600/kj600/module_hook.py b/debug/accuracy_tools/kj600/kj600/module_hook.py index 21c326ac019..1b9f752cf13 100644 --- a/debug/accuracy_tools/kj600/kj600/module_hook.py +++ b/debug/accuracy_tools/kj600/kj600/module_hook.py @@ -486,7 +486,26 @@ class TrainerMon: print_info_log(msg) else: print_info_log(msg) - + + def _is_target_param(self, param_name, param): + for target in self.config['targets'].keys(): + if param_name.startswith(target) and param.requires_grad: + self._smallest_rank_print(f'>> monitoring: {param_name}') + setattr(param, "zero_out_wgrad", True) + return True + + return False + + def _register_chunk(self, model_chunk, prefix): + for param_name, param in model_chunk.named_parameters(): + name = prefix + squash_param_name(param_name) + if self._is_target_param(param_name, param): + if name in self.param2name.values() or name == '': + print_error_log(f'same name {name} for different param. Current param is {param_name}. \ + May be error of squash_param_name') + raise Exception("param with same name will be overwritten.") + self.param2name[param] = name + def _register_param_name(self, model): if self.param_registered: return @@ -499,19 +518,8 @@ class TrainerMon: for vpp_stage, model_chunk in enumerate(model): prefix = f'{Const.vpp}{vpp_stage}{Const.vpp_sep}' if self.vpp else '' - for param_name, param in model_chunk.named_parameters(): - name = prefix + squash_param_name(param_name) - for target in self.config['targets'].keys(): - if param_name.startswith(target) and param.requires_grad: - self._smallest_rank_print(f'>> monitoring: {name}') - setattr(param, "zero_out_wgrad", True) - if name in self.param2name.values() or name == '': - print_error_log(f'same name {name} for different param. Current param is {param_name}. \ - May be error of squash_param_name') - raise Exception("param with same name will be overwriten.") - self.param2name[param] = name - break - + self._register_chunk(model_chunk, prefix) + self.param_registered = True -- Gitee From d0e1854e796f5acb49c8db60ff0d87bc02b31904 Mon Sep 17 00:00:00 2001 From: qianggee Date: Thu, 22 Aug 2024 09:06:28 +0000 Subject: [PATCH 046/333] split PR --- .../kj600/kj600/anomaly_analyse.py | 248 -------------- .../accuracy_tools/kj600/kj600/file_check.py | 324 ------------------ .../kj600/kj600/unittest/test_monitor.py | 145 -------- 3 files changed, 717 deletions(-) delete mode 100644 debug/accuracy_tools/kj600/kj600/anomaly_analyse.py delete mode 100644 debug/accuracy_tools/kj600/kj600/file_check.py delete mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py deleted file mode 100644 index f6069db6fb3..00000000000 --- a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import argparse -import ast -import fcntl -import heapq -import json -import os -from pathlib import Path -import sys - -from kj600.utils import print_info_log, print_warn_log -from kj600.anomaly_detect import GradAnomalyData -from kj600.file_check import ( - change_mode, - check_link, - FileCheckConst, - check_path_before_create, - FileChecker, - FileOpen, -) - -ANOMALY_JSON = "anomaly.json" -ANALYSE_JSON = "anomaly_analyse.json" - -class AnomalyDataWriter: - """ - 异常数据写入类,负责将异常数据写入到JSON文件中。 - """ - - def __init__(self, dump_path, rank) -> None: - self.dump_path = dump_path - self.dump_rank_dir = os.path.join(self.dump_path, f"rank{rank}") - self.json_path = os.path.join(self.dump_rank_dir, ANOMALY_JSON) - - @staticmethod - def get_anomaly_dict(anomalies): - """将GradAnomalyData列表转换为json""" - anomalies_json = {} - for anomaly in anomalies: - anomalies_json.update({anomaly.get_key(): anomaly.to_dict()}) - return anomalies_json - - @staticmethod - def update_data_in_single_json(json_path, anomalies_data): - with FileOpen(json_path, "w+") as f: - fcntl.flock(f, fcntl.LOCK_EX) - json.dump(anomalies_data, f, indent=1) - fcntl.flock(f, fcntl.LOCK_UN) - change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) - - def init_detected_json(self): - """初始化落盘文件""" - check_path_before_create(self.dump_path) - if not os.path.exists(self.dump_path): - Path(self.dump_path).mkdir( - mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True - ) - file_check = FileChecker(self.dump_path, FileCheckConst.DIR) - file_check.common_check() - - if not os.path.exists(self.dump_rank_dir): - Path(self.dump_rank_dir).mkdir( - FileCheckConst.DATA_DIR_AUTHORITY, parents=True, exist_ok=True - ) - - if os.path.exists(self.json_path): - file_check = FileChecker( - self.json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE - ) - file_check.common_check() - print_warn_log(f"The existing file will be deleted: {self.json_path}.") - os.remove(self.json_path) - Path(self.json_path).touch() - change_mode(self.json_path, FileCheckConst.DATA_FILE_AUTHORITY) - - def write_detected_json(self, anomalies): - """ - 落盘异常数据 - Args: - anomalies: GradAnomalyData对象列表 - """ - anomalies_json = self.get_anomaly_dict(anomalies) - print_info_log(f"{ANOMALY_JSON} is at {self.dump_rank_dir}.") - if Path(self.json_path).exists() and os.path.getsize(self.json_path) > 0: - with FileOpen(self.json_path, "r+") as f: - fcntl.flock(f, fcntl.LOCK_EX) - data_to_write = json.load(f) - fcntl.flock(f, fcntl.LOCK_UN) - else: - data_to_write = {} - data_to_write.update(anomalies_json) - self.update_data_in_single_json(self.json_path, data_to_write) - - -class AnomalyDataLoader: - def __init__(self, data_path) -> None: - self.data_path = data_path - - @staticmethod - def create_instances_from_dict(anomalies_dict: dict): - instances = [] - for values in anomalies_dict.values(): - try: - instances.append(GradAnomalyData(**values)) - except KeyError as e: - print_warn_log(f"Missing key in anomaly data: {e}") - except ValueError as e: - print_warn_log( - f"Value error when creating a GradAnomalyData instance: {e}" - ) - return instances - - def get_anomalies_from_jsons(self): - """遍历文件夹,从rankK/anomaly.json中读取异常数据 - return: anomalies: GradAnomalyData对象列表 - """ - anomalies = [] - check_link(self.data_path) - for rank_dir in os.listdir(self.data_path): - rank_path = os.path.join(self.data_path, rank_dir) - if not os.path.isdir(rank_path): - continue - json_path = os.path.join(rank_path, ANOMALY_JSON) - if not os.path.exists(json_path): - continue - with FileOpen(json_path, "r+") as f: - fcntl.flock(f, fcntl.LOCK_EX) - data_anomalies = json.load(f) - fcntl.flock(f, fcntl.LOCK_UN) - instances = self.create_instances_from_dict(data_anomalies) - anomalies.extend(instances) - return anomalies - - -class AnomalyAnalyse: - def __init__(self) -> None: - self.sorted_anomalies = [] - - def get_range_top_K(self, topk, step_list, anomalies): - """ - 获取前topk个step_list范围内的异常。 - """ - if not step_list: - filtered_anomalies = anomalies - else: - filtered_anomalies = [ - anomaly for anomaly in anomalies if anomaly.step in step_list - ] - if topk >= len(filtered_anomalies): - self.sorted_anomalies = sorted(filtered_anomalies) - else: - self.sorted_anomalies = list(heapq.nsmallest(topk, filtered_anomalies)) - return self.sorted_anomalies - - def rewrite_sorted_anomalies(self, output_path): - """ - 将排序后的异常数据重新落盘 - """ - file_check = FileChecker( - output_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE - ) - file_check.common_check() - - sorted_data = AnomalyDataWriter.get_anomaly_dict(self.sorted_anomalies) - print_info_log(f"{ANALYSE_JSON} is at {output_path}.") - json_path = os.path.join(output_path, ANALYSE_JSON) - if os.path.exists(json_path): - file_check = FileChecker( - json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE - ) - file_check.common_check() - print_warn_log(f"The existing file will be deleted: {json_path}.") - os.remove(json_path) - Path(json_path).touch() - change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) - AnomalyDataWriter.update_data_in_single_json(json_path, sorted_data) - - -def _get_parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--data_path", dest="data_path_dir", default="./", type=str, - help=" The anomaly detect result dictionary: generate from kj600 tool.", - required=True, - ) - parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, - help=" The analyse task result out path.", - required=False, - ) - parser.add_argument("-k", "--topk", dest="top_k_number", default=8, type=int, - help=" Top K number of earliest anomalies.", - required=False, - ) - parser.add_argument("-s", "--step", dest="step_list", default=[], type=str, - help=" Analyse which steps.", - required=False, - ) - return parser.parse_args(sys.argv[1:]) - -def _get_step_and_stop(args): - try: - step_list = ast.literal_eval(args.step_list) - if not isinstance(step_list, list): - raise ValueError(f"{args.step_list} is not a list") - except (ValueError, SyntaxError, RecursionError) as e: - raise Exception( - f"The step list must be a resolvable list type" - ) from e - if args.top_k_number <= 0: - raise Exception("The top k number must be greater than 0.") - return step_list, args.top_k_number - -def _anomaly_analyse(): - args = _get_parse_args() - step_list, top_k_number = _get_step_and_stop(args) - loader = AnomalyDataLoader(args.data_path_dir) - anomalies = loader.get_anomalies_from_jsons() - analyser = AnomalyAnalyse() - top_anomalies = analyser.get_range_top_K( - top_k_number, step_list, anomalies - ) - analyser.rewrite_sorted_anomalies( - args.out_path if args.out_path else args.data_path_dir - ) - - print_info_log(f"Top {top_k_number} anomalies are listed as follows:") - for index, anomaly in enumerate(top_anomalies): - print_info_log(f"{index}: {anomaly.message}") - - -if __name__ == "__main__": - _anomaly_analyse() - print_info_log("Analyse task completed.") diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py deleted file mode 100644 index 21f9e351a2f..00000000000 --- a/debug/accuracy_tools/kj600/kj600/file_check.py +++ /dev/null @@ -1,324 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -import os -import re - -from kj600.utils import print_info_log - - -class CodedException(Exception): - def __init__(self, code, error_info=""): - super().__init__() - self.code = code - self.error_info = self.err_strs.get(code) + error_info - - def __str__(self): - return self.error_info - - -class FileCheckException(CodedException): - INVALID_FILE_ERROR = 0 - FILE_PERMISSION_ERROR = 1 - SOFT_LINK_ERROR = 2 - ILLEGAL_PATH_ERROR = 3 - ILLEGAL_PARAM_ERROR = 4 - FILE_TOO_LARGE_ERROR = 5 - - err_strs = { - SOFT_LINK_ERROR: "[kj600] 检测到软链接: ", - FILE_PERMISSION_ERROR: "[kj600] 文件权限错误: ", - INVALID_FILE_ERROR: "[kj600] 无效文件: ", - ILLEGAL_PATH_ERROR: "[kj600] 非法文件路径: ", - ILLEGAL_PARAM_ERROR: "[kj600] 非法打开方式: ", - FILE_TOO_LARGE_ERROR: "[kj600] 文件过大: ", - } - - -class FileCheckConst: - """ - Class for file check const - """ - - READ_ABLE = "read" - WRITE_ABLE = "write" - READ_WRITE_ABLE = "read and write" - DIRECTORY_LENGTH = 4096 - FILE_NAME_LENGTH = 255 - FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" - FILE_PATTERN = r"^[a-zA-Z0-9_./-]+$" - JSON_SUFFIX = ".json" - MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 - DIR = "dir" - FILE = "file" - DATA_DIR_AUTHORITY = 0o750 - DATA_FILE_AUTHORITY = 0o640 - FILE_SIZE_DICT = { - JSON_SUFFIX: MAX_JSON_SIZE, - } - - -class FileChecker: - """ - The class for check file. - - Attributes: - file_path: The file or dictionary path to be verified. - path_type: file or dictionary - ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability - file_type(str): The correct file type for file - """ - - def __init__( - self, file_path, path_type, ability=None, file_type=None, is_script=True - ): - self.file_path = file_path - self.path_type = self._check_path_type(path_type) - self.ability = ability - self.file_type = file_type - self.is_script = is_script - - @staticmethod - def _check_path_type(path_type): - if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: - print_info_log( - f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." - ) - raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) - return path_type - - def common_check(self): - """ - 功能:用户校验基本文件权限:软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符 - 注意:文件后缀的合法性,非通用操作,可使用其他独立接口实现 - """ - check_path_exists(self.file_path) - check_link(self.file_path) - self.file_path = os.path.realpath(self.file_path) - check_path_length(self.file_path) - check_path_type(self.file_path, self.path_type) - self.check_path_ability() - if self.is_script: - check_path_owner_consistent(self.file_path) - check_path_pattern_vaild(self.file_path) - check_common_file_size(self.file_path) - check_file_suffix(self.file_path, self.file_type) - return self.file_path - - def check_path_ability(self): - if self.ability == FileCheckConst.WRITE_ABLE: - check_path_writability(self.file_path) - if self.ability == FileCheckConst.READ_ABLE: - check_path_readability(self.file_path) - if self.ability == FileCheckConst.READ_WRITE_ABLE: - check_path_readability(self.file_path) - check_path_writability(self.file_path) - - -class FileOpen: - """ - The class for open file by a safe way. - - Attributes: - file_path: The file or dictionary path to be opened. - mode(str): The file open mode - """ - - SUPPORT_READ_MODE = ["r", "rb"] - SUPPORT_WRITE_MODE = ["w", "wb", "a", "ab"] - SUPPORT_READ_WRITE_MODE = ["r+", "rb+", "w+", "wb+", "a+", "ab+"] - - def __init__(self, file_path, mode, encoding="utf-8"): - self.file_path = file_path - self.mode = mode - self.encoding = encoding - self._handle = None - - def __enter__(self): - self.check_file_path() - binary_mode = "b" - if binary_mode not in self.mode: - self._handle = open(self.file_path, self.mode, encoding=self.encoding) - else: - self._handle = open(self.file_path, self.mode) - return self._handle - - def __exit__(self, exc_type, exc_val, exc_tb): - if self._handle: - self._handle.close() - - def check_file_path(self): - support_mode = ( - self.SUPPORT_READ_MODE - + self.SUPPORT_WRITE_MODE - + self.SUPPORT_READ_WRITE_MODE - ) - if self.mode not in support_mode: - print_info_log("File open not support %s mode" % self.mode) - raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) - check_link(self.file_path) - self.file_path = os.path.realpath(self.file_path) - check_path_length(self.file_path) - self.check_ability_and_owner() - check_path_pattern_vaild(self.file_path) - if os.path.exists(self.file_path): - check_common_file_size(self.file_path) - - def check_ability_and_owner(self): - if self.mode in self.SUPPORT_READ_MODE: - check_path_exists(self.file_path) - check_path_readability(self.file_path) - check_path_owner_consistent(self.file_path) - if self.mode in self.SUPPORT_WRITE_MODE and os.path.exists(self.file_path): - check_path_writability(self.file_path) - check_path_owner_consistent(self.file_path) - if self.mode in self.SUPPORT_READ_WRITE_MODE and os.path.exists(self.file_path): - check_path_readability(self.file_path) - check_path_writability(self.file_path) - check_path_owner_consistent(self.file_path) - - -def check_link(path): - abs_path = os.path.abspath(path) - if os.path.islink(abs_path): - print_info_log("The file path {} is a soft link.".format(path)) - raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) - - -def check_path_length(path, name_length=None): - file_max_name_length = ( - name_length if name_length else FileCheckConst.FILE_NAME_LENGTH - ) - if ( - len(path) > FileCheckConst.DIRECTORY_LENGTH - or len(os.path.basename(path)) > file_max_name_length - ): - print_info_log("The file path length exceeds limit.") - raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) - - -def check_path_exists(path): - if not os.path.exists(path): - print_info_log("The file path %s does not exist." % path) - raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) - - -def check_path_readability(path): - if not os.access(path, os.R_OK): - print_info_log("The file path %s is not readable." % path) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_writability(path): - if not os.access(path, os.W_OK): - print_info_log("The file path %s is not writable." % path) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_executable(path): - if not os.access(path, os.X_OK): - print_info_log("The file path %s is not executable." % path) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_other_user_writable(path): - st = os.stat(path) - if st.st_mode & 0o002: - print_info_log( - "The file path %s may be insecure because other users have write permissions. " - % path - ) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_owner_consistent(path): - file_owner = os.stat(path).st_uid - if file_owner != os.getuid(): - print_info_log( - "The file path %s may be insecure because is does not belong to you." % path - ) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_pattern_vaild(path): - if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): - print_info_log("The file path %s contains special characters." % (path)) - raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) - - -def check_file_size(file_path, max_size): - file_size = os.path.getsize(file_path) - if file_size >= max_size: - print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") - raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) - - -def check_common_file_size(file_path): - if os.path.isfile(file_path): - for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): - if file_path.endswith(suffix): - check_file_size(file_path, max_size) - break - - -def check_file_suffix(file_path, file_suffix): - if file_suffix: - if not file_path.endswith(file_suffix): - print_info_log(f"The {file_path} should be a {file_suffix} file!") - raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) - - -def check_path_type(file_path, file_type): - if file_type == FileCheckConst.FILE: - if not os.path.isfile(file_path): - print_info_log(f"The {file_path} should be a file!") - raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) - if file_type == FileCheckConst.DIR: - if not os.path.isdir(file_path): - print_info_log(f"The {file_path} should be a dictionary!") - raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) - - -def check_path_before_create(path): - if path_len_exceeds_limit(path): - raise FileCheckException( - FileCheckException.ILLEGAL_PATH_ERROR, "The file path length exceeds limit." - ) - - if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): - raise FileCheckException( - FileCheckException.ILLEGAL_PATH_ERROR, - "The file path {} contains special characters.".format(path), - ) - - -def change_mode(path, mode): - if not os.path.exists(path) or os.path.islink(path): - return - try: - os.chmod(path, mode) - except PermissionError as ex: - raise FileCheckException( - FileCheckException.FILE_PERMISSION_ERROR, - "Failed to change {} authority. {}".format(path, str(ex)), - ) from ex - - -def path_len_exceeds_limit(file_path): - return ( - len(os.path.realpath(file_path)) > FileCheckConst.DIRECTORY_LENGTH - or len(os.path.basename(file_path)) > FileCheckConst.FILE_NAME_LENGTH - ) diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py deleted file mode 100644 index ddea3244f5c..00000000000 --- a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py +++ /dev/null @@ -1,145 +0,0 @@ -import sys -import os -import re -import argparse -import pandas as pd -from glob import glob -from collections import defaultdict - - -def parse_logfile(logfile): - grad_norm = [] - step = [] - with open(logfile) as f: - for line in f.readlines(): - if 'consumed samples' in line: - grad_norm.append(float(re.findall('(?<=grad norm\: )[\d\.]*', line)[0])) - # step = int(re.findall('(?<=iteration)[ \d]*', line)[0]) - return grad_norm - - -def parse_monitor_output(output_dir): - reduced = {} - unreduced = {} - for dir in glob(output_dir+'*'): - rank = int(re.findall('(?<=rank)[\d]*', dir)[0]) - unreduced[rank] = [] - reduced[rank] = [] - for file in os.listdir(dir): - # step = int(re.search("(?<=reduced\_)[\d]*", file)[0]) - # if step != 0: - # continue - df = pd.read_csv(os.path.join(dir, file)) - if '_unreduced_' in file: - unreduced[rank].append(df) - pass - elif '_reduced_' in file: - reduced[rank].append(df) - else: - print(f'unexpected file {file} in {dir}') - return reduced, unreduced - -def valid_reduce(reduced, unreduced, tp_size, dp_size, sequence_parallel): - steps = len(reduced[0]) - world_size = len(reduced) - errors = [] - for index, row in unreduced[0][0].iterrows(): - param = row['param_name'] - is_tp_duplicate = False - for step in range(2): - # sum reduced - reduced_mean = 0. - for rank in range(world_size): - if len(reduced[rank]) == 0: - continue - df = reduced[rank][step] - value = list(df[df['param_name'] == param]['mean']) - if value == []: - if step == 0: - is_tp_duplicate = True - continue - reduced_mean += value[0] - - # sum unreduced - unreduced_mean = 0. - for rank in range(world_size): - df = unreduced[rank][step] - value = list(df[df['param_name'] == param]['mean']) - if value == []: - continue - unreduced_mean += list(df[df['param_name'] == param]['mean'])[0] - - unreduced_mean /= dp_size - if is_tp_duplicate and (not sequence_parallel or 'embedding' in param): - unreduced_mean /= tp_size - try: - assert_equal(unreduced_mean, reduced_mean) - except AssertionError as e: - errors.append([param, step, e, is_tp_duplicate]) - if errors: - print(errors) - else: - print(f'grad mean is in consist between unreduced grad and reduced grad monitord.') - - - -def assert_equal(a, b): - if b == 0 or a == 0: - return - if b == 0: - rel_diff = a - elif a == 0: - rel_diff = b - else: - rel_diff = abs(a/b-1) - assert rel_diff<0.01, f'{a}, {b}, {rel_diff}' - - -def valid_total_norm(total_norm, reduced, duplicate_embedding): - steps = len(total_norm) - world_size = len(reduced) - errors = [] - for step in range(steps): - calculated_norm = 0. - for rank in range(world_size): - if len(reduced[rank]) == 0: - if step == 0: - print(f'rank {rank} is duplicated in dp group') - continue - for index, row in reduced[rank][step].iterrows(): - if duplicate_embedding and 'word_embedding' in row['param_name']: - continue - calculated_norm += row['norm']**2 - try: - assert_equal(calculated_norm**0.5, total_norm[step]) - except AssertionError as e: - errors.append([step, e]) - if errors: - print('total norm errors: ', errors) - else: - print('grad norm in consist between training log and reduced gradients monitored') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--monitor_output', '-m', type=str, required=True, help='path prefix to the output of monitor e.g. kj600_output/Aug12_07-16') - parser.add_argument('--logfile', '-l', type=str, required=True, help='path to the training log file') - parser.add_argument('--tp_size', '-t', type=int, required=True, help='tp parallel size') - parser.add_argument('--dp_size', '-d', type=int, required=True, help='dp parallel size') - parser.add_argument('--pp_size', '-p', type=int, required=True, help='pp parallel size') - parser.add_argument('--untie_embeddings_and_output_weights', '-u', action="store_true", default=False, help='whether untie_embeddings_and_output_weights in pp parallel') - parser.add_argument('--sequence_parallel', '-s', action="store_true", default=False, help='whether sequence parallel is enabled. Add -s to store true') - - args = parser.parse_args() - - assert args.tp_size > 0, 'if tp not enabled, set tp_size = 1' - assert args.dp_size > 0, 'if tp not enabled, set dp_size = 1' - assert args.pp_size > 0, 'if tp not enabled, set pp_size = 1' - - total_norm = parse_logfile(args.logfile) - reduced, unreduced = parse_monitor_output(args.monitor_output) - - duplicate_embedding = not args.untie_embeddings_and_output_weights and args.pp_size > 1 - - valid_total_norm(total_norm, reduced, duplicate_embedding) - valid_reduce(reduced, unreduced, args.tp_size, args.dp_size, args.sequence_parallel) \ No newline at end of file -- Gitee From 9d6f5c935c274de263e2332ae0d10da0bf2d9934 Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 22 Aug 2024 15:50:40 +0800 Subject: [PATCH 047/333] add_overall_metrics --- .../module_visualization/graph/prof_node.py | 69 ++++++++++++++----- .../graph_build/prof_graph_builder.py | 33 +++++++-- .../prof_parse/prof_data_pre_process.py | 12 +++- profiler/prof_common/constant.py | 11 +++ profiler/prof_common/kernel_bean.py | 4 ++ profiler/prof_common/trace_event_bean.py | 11 ++- 6 files changed, 114 insertions(+), 26 deletions(-) diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index 3588a8b81b1..df77d325df8 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -24,6 +24,9 @@ class ProfNode(BaseNode): self._kernel_total_list = [] self._communication_total_list = [] self._precision_index = 1 + self._computing_time = 0 + self._uncovered_comm_time = 0 + self._free_time = 0 @property def node_id(self): @@ -37,11 +40,19 @@ class ProfNode(BaseNode): @property def total_kernels(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return [kernel for node in self.child_nodes for kernel in node.total_kernels] return self._kernel_total_list + @property + def total_communications(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return [comm for node in self.child_nodes for comm in node.total_communications] + return self._communication_total_list + @property def host_total_dur(self): - if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: + if self.node_type == Constant.VIRTUAL_TYPE: return sum((node.host_total_dur for node in self.child_nodes)) return self._event.dur @@ -53,9 +64,7 @@ class ProfNode(BaseNode): @property def device_total_dur(self): - if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: - return sum((node.device_total_dur for node in self.child_nodes)) - return sum((kernel.dur for kernel in self._kernel_total_list)) + return sum((kernel.dur for kernel in self.total_kernels)) @property def device_self_dur(self): @@ -82,24 +91,27 @@ class ProfNode(BaseNode): @property def communication_data(self) -> list: - if self.node_type == Constant.VIRTUAL_TYPE: - return [comm for node in self.child_nodes for comm in node.communication_data] - return [[comm.name, comm.dur] for comm in self._communication_total_list] + return [[comm.name, comm.dur] for comm in self.total_communications] @property def overall_data(self): - return {"Computing Time(ms)": 1, "Uncovered Communication Time(ms)": 1, "Free Time(ms)": 1} + return {"Computing Time(us)": round(self._computing_time, 3), + "Uncovered Communication Time(us)": round(self._uncovered_comm_time, 3), + "Free Time(us)": round(self._free_time, 3)} @property def data(self): - return {"Input Data": self.input_data, - "precision_index": self.precision_index, - "Host Self Duration(us)": round(self.host_self_dur, 2), - "Host Total Duration(us)": round(self.host_total_dur, 2), - "Device Self Duration(us)": round(self.device_self_dur, 2), - "Device Total Duration(us)": round(self.device_total_dur, 2), - "kernels": self.kernel_data, - "communications": self.communication_data} + data = { + "Overall Metrics": self.overall_data} if self.node_type != Constant.OPERATOR_TYPE else {} + data.update({"Input Data": self.input_data, + "precision_index": self.precision_index, + "Host Self Duration(us)": round(self.host_self_dur, 3), + "Host Total Duration(us)": round(self.host_total_dur, 3), + "Device Self Duration(us)": round(self.device_self_dur, 3), + "Device Total Duration(us)": round(self.device_total_dur, 3), + "kernels": self.kernel_data, + "Communications": self.communication_data}) + return data @property def info(self): @@ -141,3 +153,28 @@ class ProfNode(BaseNode): diff_dur = max_dur - min_dur for node in self.child_nodes: node.precision_index = 1 - (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 + + def update_overall_metrics(self, overlap_analysis_event): + if not self.total_kernels and not self.total_communications: + return + kernel_start = min((kernel.start_time for kernel in self.total_kernels)) if self.total_kernels else float("inf") + kernel_end = max((kernel.end_time for kernel in self.total_kernels)) if self.total_kernels else float("-inf") + comm_start = min((comm.start_time for comm in self.total_communications)) \ + if self.total_communications else float("inf") + comm_end = max((comm.end_time for comm in self.total_communications)) \ + if self.total_communications else float("-inf") + device_start = min(kernel_start, comm_start) + device_end = max(kernel_end, comm_end) + for event in overlap_analysis_event: + if event.start_time >= device_end: + continue + if event.end_time <= device_start: + continue + duration_us = float( + min(device_end, event.end_time) - max(device_start, event.start_time)) + if event.name == Constant.COMPUTING_EVENT: + self._computing_time += duration_us + elif event.name == Constant.FREE_EVENT: + self._free_time += duration_us + elif event.name == Constant.UNCOVERED_COMMUNICATION_EVENT: + self._uncovered_comm_time += duration_us diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index 331e0cb050c..9606193acd8 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from decimal import Decimal + from profiler.module_visualization.graph.prof_node import ProfNode from profiler.module_visualization.graph_build.fwd_module_node import FwdModuleNode from profiler.prof_common.tree_builder import TreeBuilder @@ -29,8 +31,9 @@ class ProfGraphBuilder: def _create_event_bean_from_ops(cls, op_list: list, name: str) -> TraceEventBean: min_start = min((op.start_time for op in iter(op_list))) max_end = max((op.end_time for op in iter(op_list))) - # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-1 +2处理 - event = TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) + # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-0.0001 +0.0001处理 + event = TraceEventBean( + {"ts": min_start - Decimal("0.0001"), "dur": float(max_end - min_start + Decimal("0.0001")), "name": name}) event.event_type = Constant.MODULE_TYPE return event @@ -73,6 +76,15 @@ class ProfGraphBuilder: root_node.update_child_nodes(merged_nodes[0]) root_node.update_child_nodes(node) merged_nodes = [] + if len(merged_nodes) >= 2: + virtual_node = ProfNode(TraceEventBean({}, f"Operators_Between_Modules_{order_id}"), root_node) + root_node.update_child_nodes(virtual_node) + for op_node in merged_nodes: + op_node.parent_node = virtual_node + virtual_node.update_child_nodes(op_node) + virtual_nodes.append(virtual_node) + elif len(merged_nodes) == 1: + root_node.update_child_nodes(merged_nodes[0]) return virtual_nodes def build_graph(self): @@ -96,6 +108,8 @@ class ProfGraphBuilder: all_nodes.extend(virtual_nodes) for node in all_nodes: node.update_child_precision_index() + if node.node_type != Constant.OPERATOR_TYPE: + node.update_overall_metrics(self._prof_data.get(Constant.OVERLAP_ANALYSIS_EVENT, [])) return all_nodes def find_bwd_module(self) -> list: @@ -121,20 +135,29 @@ class ProfGraphBuilder: if op.tid == bwd_tid: bwd_op_list.append(op) pre_status = Constant.BACKWARD + continue elif pre_status == Constant.BACKWARD: bwd_module_list.append(self._create_event_bean_from_ops(bwd_op_list, "nn.Module: BACKWARD")) + bwd_module_list.extend(self._match_fwd_module(module_list, fwdbwd_flow, bwd_op_list)) bwd_op_list.clear() pre_status = Constant.FWD_OR_OPT + if bwd_op_list: + bwd_module_list.append(self._create_event_bean_from_ops(bwd_op_list, "nn.Module: BACKWARD")) + bwd_module_list.extend(self._match_fwd_module(module_list, fwdbwd_flow, bwd_op_list)) + bwd_op_list.clear() + return bwd_module_list + def _match_fwd_module(self, module_list, fwdbwd_flow, bwd_op_list): # 通过连线匹配正向module,构建出反向的整体module关系 - root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({}))[0] - fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, cpu_op_list) + bwd_module_list = [] + all_nodes = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({})) + root_node = all_nodes[0] + fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, bwd_op_list) for start_time, end_events in fwdbwd_flow_dict.items(): matched_node = root_node.binary_search(start_time) while matched_node != Constant.INVALID_RETURN: matched_node.update_bwd_op(end_events) matched_node = matched_node.binary_search(start_time) - all_nodes = root_node.find_all_child_nodes() for module_node in all_nodes: if module_node.bwd_op_list: bwd_module_list.append( diff --git a/profiler/module_visualization/prof_parse/prof_data_pre_process.py b/profiler/module_visualization/prof_parse/prof_data_pre_process.py index c16daaecd71..2b5291ea3e4 100644 --- a/profiler/module_visualization/prof_parse/prof_data_pre_process.py +++ b/profiler/module_visualization/prof_parse/prof_data_pre_process.py @@ -28,8 +28,10 @@ class ProfDataPreProcess: self._kernel_details_path = "" self._kernel_pid = None self._hccl_pid = None + self._overlap_analysis_pid = None self._result_data = {Constant.CPU_OP_EVENT: [], Constant.MODULE_EVENT: [], Constant.KERNEL_EVENT: [], - Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: []} + Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: [], + Constant.OVERLAP_ANALYSIS_EVENT: []} @staticmethod def _check_trace_data(trace_data): @@ -72,7 +74,7 @@ class ProfDataPreProcess: self._check_trace_data(trace_data) iter_trace_data = [TraceEventBean(data) for data in trace_data] for event in iter_trace_data: - if self._kernel_pid is not None and self._hccl_pid is not None: + if self._kernel_pid is not None and self._hccl_pid is not None and self._overlap_analysis_pid is not None: break if not event.is_meta(): continue @@ -80,6 +82,8 @@ class ProfDataPreProcess: self._kernel_pid = event.pid elif event.is_hccl_process(): self._hccl_pid = event.pid + elif event.is_overlap_analysis_process(): + self._overlap_analysis_pid = event.pid if self._kernel_pid is None: msg = "There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." raise RuntimeError(msg) @@ -108,6 +112,8 @@ class ProfDataPreProcess: self._result_data[Constant.KERNEL_EVENT].append(event) elif event.is_hccl_event(self._hccl_pid): self._result_data[Constant.HCCL_EVENT].append(event) + elif event.is_overlap_analysis_event(self._overlap_analysis_pid): + self._result_data[Constant.OVERLAP_ANALYSIS_EVENT].append(event) def _parse_kernel_details(self): if not self._kernel_details_path: @@ -124,6 +130,6 @@ class ProfDataPreProcess: if not self._result_data.get(Constant.CPU_OP_EVENT): msg = "This data does not have any aten operator, please make sure to enable the CPU switch." raise RuntimeError(msg) - if not self._result_data.get(Constant.MODULE_EVENT): + if not [event for event in self._result_data.get(Constant.MODULE_EVENT) if event.is_nn_module()]: msg = "This data does not collect any modules, please make sure to enable the with_stack or with_modules." raise RuntimeError(msg) diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 90ec6d006e5..b0c8877f569 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -24,6 +24,7 @@ class Constant(object): TORCH_TO_NPU_FLOW = "torch_to_device" KERNEL_EVENT = "kernel_event" HCCL_EVENT = "hccl_event" + OVERLAP_ANALYSIS_EVENT = "overlap_event" FWD_BWD_FLOW = "fwd_to_bwd" NPU_ROOT_ID = "NPU" @@ -35,3 +36,13 @@ class Constant(object): MODULE_TYPE = 0 OPERATOR_TYPE = 1 VIRTUAL_TYPE = 9 + + # trace bar + NPU_BAR = "Ascend Hardware" + HCCL_BAR = "HCCL" + OVERLAP_BAR = "Overlap Analysis" + + # overlap_analysis event + COMPUTING_EVENT = "Computing" + FREE_EVENT = "Free" + UNCOVERED_COMMUNICATION_EVENT = "Communication(Not Overlapped)" diff --git a/profiler/prof_common/kernel_bean.py b/profiler/prof_common/kernel_bean.py index cbfa10c0a92..4d60a69080f 100644 --- a/profiler/prof_common/kernel_bean.py +++ b/profiler/prof_common/kernel_bean.py @@ -30,6 +30,10 @@ class KernelBean: def start_time(self): return convert_to_decimal(self._ts) + @property + def end_time(self): + return self.start_time + convert_to_decimal(self.dur) + @property def is_computing_op(self): return self._core_type != "HCCL" diff --git a/profiler/prof_common/trace_event_bean.py b/profiler/prof_common/trace_event_bean.py index 0aee79907ba..f1ba62e69b9 100644 --- a/profiler/prof_common/trace_event_bean.py +++ b/profiler/prof_common/trace_event_bean.py @@ -14,6 +14,7 @@ # limitations under the License. from decimal import Decimal +from profiler.prof_common.constant import Constant from profiler.prof_common.utils import convert_to_decimal from profiler.prof_common.analyze_dict import AnalyzeDict @@ -84,8 +85,14 @@ class TraceEventBean(AnalyzeDict): def is_hccl_event(self, hccl_pid): return self.ph == "X" and self.pid == hccl_pid and self.name.startswith("hcom_") + def is_overlap_analysis_event(self, overlap_analysis_pid): + return self.ph == "X" and self.pid == overlap_analysis_pid + def is_npu_process(self): - return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "Ascend Hardware" + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == Constant.NPU_BAR def is_hccl_process(self): - return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "HCCL" + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == Constant.HCCL_BAR + + def is_overlap_analysis_process(self): + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == Constant.OVERLAP_BAR -- Gitee From 7b579615949c1f99792f07bb19311152b76953b0 Mon Sep 17 00:00:00 2001 From: litian_drinksnow <1063185601@qq.com> Date: Tue, 27 Aug 2024 18:52:12 +0800 Subject: [PATCH 048/333] merge kj600 --- .../kj600/kj600/anomaly_analyse.py | 248 ++++++++++++++ .../accuracy_tools/kj600/kj600/file_check.py | 324 ++++++++++++++++++ .../kj600/kj600/unittest/test_monitor.py | 145 ++++++++ 3 files changed, 717 insertions(+) create mode 100644 debug/accuracy_tools/kj600/kj600/anomaly_analyse.py create mode 100644 debug/accuracy_tools/kj600/kj600/file_check.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py new file mode 100644 index 00000000000..f6069db6fb3 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import argparse +import ast +import fcntl +import heapq +import json +import os +from pathlib import Path +import sys + +from kj600.utils import print_info_log, print_warn_log +from kj600.anomaly_detect import GradAnomalyData +from kj600.file_check import ( + change_mode, + check_link, + FileCheckConst, + check_path_before_create, + FileChecker, + FileOpen, +) + +ANOMALY_JSON = "anomaly.json" +ANALYSE_JSON = "anomaly_analyse.json" + +class AnomalyDataWriter: + """ + 异常数据写入类,负责将异常数据写入到JSON文件中。 + """ + + def __init__(self, dump_path, rank) -> None: + self.dump_path = dump_path + self.dump_rank_dir = os.path.join(self.dump_path, f"rank{rank}") + self.json_path = os.path.join(self.dump_rank_dir, ANOMALY_JSON) + + @staticmethod + def get_anomaly_dict(anomalies): + """将GradAnomalyData列表转换为json""" + anomalies_json = {} + for anomaly in anomalies: + anomalies_json.update({anomaly.get_key(): anomaly.to_dict()}) + return anomalies_json + + @staticmethod + def update_data_in_single_json(json_path, anomalies_data): + with FileOpen(json_path, "w+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + json.dump(anomalies_data, f, indent=1) + fcntl.flock(f, fcntl.LOCK_UN) + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def init_detected_json(self): + """初始化落盘文件""" + check_path_before_create(self.dump_path) + if not os.path.exists(self.dump_path): + Path(self.dump_path).mkdir( + mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True + ) + file_check = FileChecker(self.dump_path, FileCheckConst.DIR) + file_check.common_check() + + if not os.path.exists(self.dump_rank_dir): + Path(self.dump_rank_dir).mkdir( + FileCheckConst.DATA_DIR_AUTHORITY, parents=True, exist_ok=True + ) + + if os.path.exists(self.json_path): + file_check = FileChecker( + self.json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {self.json_path}.") + os.remove(self.json_path) + Path(self.json_path).touch() + change_mode(self.json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def write_detected_json(self, anomalies): + """ + 落盘异常数据 + Args: + anomalies: GradAnomalyData对象列表 + """ + anomalies_json = self.get_anomaly_dict(anomalies) + print_info_log(f"{ANOMALY_JSON} is at {self.dump_rank_dir}.") + if Path(self.json_path).exists() and os.path.getsize(self.json_path) > 0: + with FileOpen(self.json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_to_write = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + else: + data_to_write = {} + data_to_write.update(anomalies_json) + self.update_data_in_single_json(self.json_path, data_to_write) + + +class AnomalyDataLoader: + def __init__(self, data_path) -> None: + self.data_path = data_path + + @staticmethod + def create_instances_from_dict(anomalies_dict: dict): + instances = [] + for values in anomalies_dict.values(): + try: + instances.append(GradAnomalyData(**values)) + except KeyError as e: + print_warn_log(f"Missing key in anomaly data: {e}") + except ValueError as e: + print_warn_log( + f"Value error when creating a GradAnomalyData instance: {e}" + ) + return instances + + def get_anomalies_from_jsons(self): + """遍历文件夹,从rankK/anomaly.json中读取异常数据 + return: anomalies: GradAnomalyData对象列表 + """ + anomalies = [] + check_link(self.data_path) + for rank_dir in os.listdir(self.data_path): + rank_path = os.path.join(self.data_path, rank_dir) + if not os.path.isdir(rank_path): + continue + json_path = os.path.join(rank_path, ANOMALY_JSON) + if not os.path.exists(json_path): + continue + with FileOpen(json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_anomalies = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + instances = self.create_instances_from_dict(data_anomalies) + anomalies.extend(instances) + return anomalies + + +class AnomalyAnalyse: + def __init__(self) -> None: + self.sorted_anomalies = [] + + def get_range_top_K(self, topk, step_list, anomalies): + """ + 获取前topk个step_list范围内的异常。 + """ + if not step_list: + filtered_anomalies = anomalies + else: + filtered_anomalies = [ + anomaly for anomaly in anomalies if anomaly.step in step_list + ] + if topk >= len(filtered_anomalies): + self.sorted_anomalies = sorted(filtered_anomalies) + else: + self.sorted_anomalies = list(heapq.nsmallest(topk, filtered_anomalies)) + return self.sorted_anomalies + + def rewrite_sorted_anomalies(self, output_path): + """ + 将排序后的异常数据重新落盘 + """ + file_check = FileChecker( + output_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + + sorted_data = AnomalyDataWriter.get_anomaly_dict(self.sorted_anomalies) + print_info_log(f"{ANALYSE_JSON} is at {output_path}.") + json_path = os.path.join(output_path, ANALYSE_JSON) + if os.path.exists(json_path): + file_check = FileChecker( + json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {json_path}.") + os.remove(json_path) + Path(json_path).touch() + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + AnomalyDataWriter.update_data_in_single_json(json_path, sorted_data) + + +def _get_parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--data_path", dest="data_path_dir", default="./", type=str, + help=" The anomaly detect result dictionary: generate from kj600 tool.", + required=True, + ) + parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, + help=" The analyse task result out path.", + required=False, + ) + parser.add_argument("-k", "--topk", dest="top_k_number", default=8, type=int, + help=" Top K number of earliest anomalies.", + required=False, + ) + parser.add_argument("-s", "--step", dest="step_list", default=[], type=str, + help=" Analyse which steps.", + required=False, + ) + return parser.parse_args(sys.argv[1:]) + +def _get_step_and_stop(args): + try: + step_list = ast.literal_eval(args.step_list) + if not isinstance(step_list, list): + raise ValueError(f"{args.step_list} is not a list") + except (ValueError, SyntaxError, RecursionError) as e: + raise Exception( + f"The step list must be a resolvable list type" + ) from e + if args.top_k_number <= 0: + raise Exception("The top k number must be greater than 0.") + return step_list, args.top_k_number + +def _anomaly_analyse(): + args = _get_parse_args() + step_list, top_k_number = _get_step_and_stop(args) + loader = AnomalyDataLoader(args.data_path_dir) + anomalies = loader.get_anomalies_from_jsons() + analyser = AnomalyAnalyse() + top_anomalies = analyser.get_range_top_K( + top_k_number, step_list, anomalies + ) + analyser.rewrite_sorted_anomalies( + args.out_path if args.out_path else args.data_path_dir + ) + + print_info_log(f"Top {top_k_number} anomalies are listed as follows:") + for index, anomaly in enumerate(top_anomalies): + print_info_log(f"{index}: {anomaly.message}") + + +if __name__ == "__main__": + _anomaly_analyse() + print_info_log("Analyse task completed.") diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py new file mode 100644 index 00000000000..21f9e351a2f --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/file_check.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import os +import re + +from kj600.utils import print_info_log + + +class CodedException(Exception): + def __init__(self, code, error_info=""): + super().__init__() + self.code = code + self.error_info = self.err_strs.get(code) + error_info + + def __str__(self): + return self.error_info + + +class FileCheckException(CodedException): + INVALID_FILE_ERROR = 0 + FILE_PERMISSION_ERROR = 1 + SOFT_LINK_ERROR = 2 + ILLEGAL_PATH_ERROR = 3 + ILLEGAL_PARAM_ERROR = 4 + FILE_TOO_LARGE_ERROR = 5 + + err_strs = { + SOFT_LINK_ERROR: "[kj600] 检测到软链接: ", + FILE_PERMISSION_ERROR: "[kj600] 文件权限错误: ", + INVALID_FILE_ERROR: "[kj600] 无效文件: ", + ILLEGAL_PATH_ERROR: "[kj600] 非法文件路径: ", + ILLEGAL_PARAM_ERROR: "[kj600] 非法打开方式: ", + FILE_TOO_LARGE_ERROR: "[kj600] 文件过大: ", + } + + +class FileCheckConst: + """ + Class for file check const + """ + + READ_ABLE = "read" + WRITE_ABLE = "write" + READ_WRITE_ABLE = "read and write" + DIRECTORY_LENGTH = 4096 + FILE_NAME_LENGTH = 255 + FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" + FILE_PATTERN = r"^[a-zA-Z0-9_./-]+$" + JSON_SUFFIX = ".json" + MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 + DIR = "dir" + FILE = "file" + DATA_DIR_AUTHORITY = 0o750 + DATA_FILE_AUTHORITY = 0o640 + FILE_SIZE_DICT = { + JSON_SUFFIX: MAX_JSON_SIZE, + } + + +class FileChecker: + """ + The class for check file. + + Attributes: + file_path: The file or dictionary path to be verified. + path_type: file or dictionary + ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability + file_type(str): The correct file type for file + """ + + def __init__( + self, file_path, path_type, ability=None, file_type=None, is_script=True + ): + self.file_path = file_path + self.path_type = self._check_path_type(path_type) + self.ability = ability + self.file_type = file_type + self.is_script = is_script + + @staticmethod + def _check_path_type(path_type): + if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: + print_info_log( + f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." + ) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + return path_type + + def common_check(self): + """ + 功能:用户校验基本文件权限:软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符 + 注意:文件后缀的合法性,非通用操作,可使用其他独立接口实现 + """ + check_path_exists(self.file_path) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + check_path_type(self.file_path, self.path_type) + self.check_path_ability() + if self.is_script: + check_path_owner_consistent(self.file_path) + check_path_pattern_vaild(self.file_path) + check_common_file_size(self.file_path) + check_file_suffix(self.file_path, self.file_type) + return self.file_path + + def check_path_ability(self): + if self.ability == FileCheckConst.WRITE_ABLE: + check_path_writability(self.file_path) + if self.ability == FileCheckConst.READ_ABLE: + check_path_readability(self.file_path) + if self.ability == FileCheckConst.READ_WRITE_ABLE: + check_path_readability(self.file_path) + check_path_writability(self.file_path) + + +class FileOpen: + """ + The class for open file by a safe way. + + Attributes: + file_path: The file or dictionary path to be opened. + mode(str): The file open mode + """ + + SUPPORT_READ_MODE = ["r", "rb"] + SUPPORT_WRITE_MODE = ["w", "wb", "a", "ab"] + SUPPORT_READ_WRITE_MODE = ["r+", "rb+", "w+", "wb+", "a+", "ab+"] + + def __init__(self, file_path, mode, encoding="utf-8"): + self.file_path = file_path + self.mode = mode + self.encoding = encoding + self._handle = None + + def __enter__(self): + self.check_file_path() + binary_mode = "b" + if binary_mode not in self.mode: + self._handle = open(self.file_path, self.mode, encoding=self.encoding) + else: + self._handle = open(self.file_path, self.mode) + return self._handle + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._handle: + self._handle.close() + + def check_file_path(self): + support_mode = ( + self.SUPPORT_READ_MODE + + self.SUPPORT_WRITE_MODE + + self.SUPPORT_READ_WRITE_MODE + ) + if self.mode not in support_mode: + print_info_log("File open not support %s mode" % self.mode) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + self.check_ability_and_owner() + check_path_pattern_vaild(self.file_path) + if os.path.exists(self.file_path): + check_common_file_size(self.file_path) + + def check_ability_and_owner(self): + if self.mode in self.SUPPORT_READ_MODE: + check_path_exists(self.file_path) + check_path_readability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_WRITE_MODE and os.path.exists(self.file_path): + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_READ_WRITE_MODE and os.path.exists(self.file_path): + check_path_readability(self.file_path) + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + + +def check_link(path): + abs_path = os.path.abspath(path) + if os.path.islink(abs_path): + print_info_log("The file path {} is a soft link.".format(path)) + raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) + + +def check_path_length(path, name_length=None): + file_max_name_length = ( + name_length if name_length else FileCheckConst.FILE_NAME_LENGTH + ) + if ( + len(path) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(path)) > file_max_name_length + ): + print_info_log("The file path length exceeds limit.") + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_exists(path): + if not os.path.exists(path): + print_info_log("The file path %s does not exist." % path) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_readability(path): + if not os.access(path, os.R_OK): + print_info_log("The file path %s is not readable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_writability(path): + if not os.access(path, os.W_OK): + print_info_log("The file path %s is not writable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_executable(path): + if not os.access(path, os.X_OK): + print_info_log("The file path %s is not executable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_other_user_writable(path): + st = os.stat(path) + if st.st_mode & 0o002: + print_info_log( + "The file path %s may be insecure because other users have write permissions. " + % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_owner_consistent(path): + file_owner = os.stat(path).st_uid + if file_owner != os.getuid(): + print_info_log( + "The file path %s may be insecure because is does not belong to you." % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_pattern_vaild(path): + if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): + print_info_log("The file path %s contains special characters." % (path)) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_file_size(file_path, max_size): + file_size = os.path.getsize(file_path) + if file_size >= max_size: + print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") + raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) + + +def check_common_file_size(file_path): + if os.path.isfile(file_path): + for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): + if file_path.endswith(suffix): + check_file_size(file_path, max_size) + break + + +def check_file_suffix(file_path, file_suffix): + if file_suffix: + if not file_path.endswith(file_suffix): + print_info_log(f"The {file_path} should be a {file_suffix} file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_type(file_path, file_type): + if file_type == FileCheckConst.FILE: + if not os.path.isfile(file_path): + print_info_log(f"The {file_path} should be a file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + if file_type == FileCheckConst.DIR: + if not os.path.isdir(file_path): + print_info_log(f"The {file_path} should be a dictionary!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_before_create(path): + if path_len_exceeds_limit(path): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, "The file path length exceeds limit." + ) + + if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, + "The file path {} contains special characters.".format(path), + ) + + +def change_mode(path, mode): + if not os.path.exists(path) or os.path.islink(path): + return + try: + os.chmod(path, mode) + except PermissionError as ex: + raise FileCheckException( + FileCheckException.FILE_PERMISSION_ERROR, + "Failed to change {} authority. {}".format(path, str(ex)), + ) from ex + + +def path_len_exceeds_limit(file_path): + return ( + len(os.path.realpath(file_path)) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(file_path)) > FileCheckConst.FILE_NAME_LENGTH + ) diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py new file mode 100644 index 00000000000..ddea3244f5c --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py @@ -0,0 +1,145 @@ +import sys +import os +import re +import argparse +import pandas as pd +from glob import glob +from collections import defaultdict + + +def parse_logfile(logfile): + grad_norm = [] + step = [] + with open(logfile) as f: + for line in f.readlines(): + if 'consumed samples' in line: + grad_norm.append(float(re.findall('(?<=grad norm\: )[\d\.]*', line)[0])) + # step = int(re.findall('(?<=iteration)[ \d]*', line)[0]) + return grad_norm + + +def parse_monitor_output(output_dir): + reduced = {} + unreduced = {} + for dir in glob(output_dir+'*'): + rank = int(re.findall('(?<=rank)[\d]*', dir)[0]) + unreduced[rank] = [] + reduced[rank] = [] + for file in os.listdir(dir): + # step = int(re.search("(?<=reduced\_)[\d]*", file)[0]) + # if step != 0: + # continue + df = pd.read_csv(os.path.join(dir, file)) + if '_unreduced_' in file: + unreduced[rank].append(df) + pass + elif '_reduced_' in file: + reduced[rank].append(df) + else: + print(f'unexpected file {file} in {dir}') + return reduced, unreduced + +def valid_reduce(reduced, unreduced, tp_size, dp_size, sequence_parallel): + steps = len(reduced[0]) + world_size = len(reduced) + errors = [] + for index, row in unreduced[0][0].iterrows(): + param = row['param_name'] + is_tp_duplicate = False + for step in range(2): + # sum reduced + reduced_mean = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + continue + df = reduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + if step == 0: + is_tp_duplicate = True + continue + reduced_mean += value[0] + + # sum unreduced + unreduced_mean = 0. + for rank in range(world_size): + df = unreduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + continue + unreduced_mean += list(df[df['param_name'] == param]['mean'])[0] + + unreduced_mean /= dp_size + if is_tp_duplicate and (not sequence_parallel or 'embedding' in param): + unreduced_mean /= tp_size + try: + assert_equal(unreduced_mean, reduced_mean) + except AssertionError as e: + errors.append([param, step, e, is_tp_duplicate]) + if errors: + print(errors) + else: + print(f'grad mean is in consist between unreduced grad and reduced grad monitord.') + + + +def assert_equal(a, b): + if b == 0 or a == 0: + return + if b == 0: + rel_diff = a + elif a == 0: + rel_diff = b + else: + rel_diff = abs(a/b-1) + assert rel_diff<0.01, f'{a}, {b}, {rel_diff}' + + +def valid_total_norm(total_norm, reduced, duplicate_embedding): + steps = len(total_norm) + world_size = len(reduced) + errors = [] + for step in range(steps): + calculated_norm = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + if step == 0: + print(f'rank {rank} is duplicated in dp group') + continue + for index, row in reduced[rank][step].iterrows(): + if duplicate_embedding and 'word_embedding' in row['param_name']: + continue + calculated_norm += row['norm']**2 + try: + assert_equal(calculated_norm**0.5, total_norm[step]) + except AssertionError as e: + errors.append([step, e]) + if errors: + print('total norm errors: ', errors) + else: + print('grad norm in consist between training log and reduced gradients monitored') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--monitor_output', '-m', type=str, required=True, help='path prefix to the output of monitor e.g. kj600_output/Aug12_07-16') + parser.add_argument('--logfile', '-l', type=str, required=True, help='path to the training log file') + parser.add_argument('--tp_size', '-t', type=int, required=True, help='tp parallel size') + parser.add_argument('--dp_size', '-d', type=int, required=True, help='dp parallel size') + parser.add_argument('--pp_size', '-p', type=int, required=True, help='pp parallel size') + parser.add_argument('--untie_embeddings_and_output_weights', '-u', action="store_true", default=False, help='whether untie_embeddings_and_output_weights in pp parallel') + parser.add_argument('--sequence_parallel', '-s', action="store_true", default=False, help='whether sequence parallel is enabled. Add -s to store true') + + args = parser.parse_args() + + assert args.tp_size > 0, 'if tp not enabled, set tp_size = 1' + assert args.dp_size > 0, 'if tp not enabled, set dp_size = 1' + assert args.pp_size > 0, 'if tp not enabled, set pp_size = 1' + + total_norm = parse_logfile(args.logfile) + reduced, unreduced = parse_monitor_output(args.monitor_output) + + duplicate_embedding = not args.untie_embeddings_and_output_weights and args.pp_size > 1 + + valid_total_norm(total_norm, reduced, duplicate_embedding) + valid_reduce(reduced, unreduced, args.tp_size, args.dp_size, args.sequence_parallel) \ No newline at end of file -- Gitee From 82c6f7155c96748daf92d06a0ba115f805cc0fe7 Mon Sep 17 00:00:00 2001 From: litian_drinksnow <1063185601@qq.com> Date: Wed, 28 Aug 2024 15:01:14 +0800 Subject: [PATCH 049/333] fix reviews --- .../accuracy_tools/kj600/kj600/file_check.py | 51 ++++++++----------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py index 21f9e351a2f..80f456a6287 100644 --- a/debug/accuracy_tools/kj600/kj600/file_check.py +++ b/debug/accuracy_tools/kj600/kj600/file_check.py @@ -17,7 +17,7 @@ import os import re -from kj600.utils import print_info_log +from kj600.utils import print_error_log class CodedException(Exception): @@ -94,7 +94,7 @@ class FileChecker: @staticmethod def _check_path_type(path_type): if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: - print_info_log( + print_error_log( f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." ) raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) @@ -167,7 +167,7 @@ class FileOpen: + self.SUPPORT_READ_WRITE_MODE ) if self.mode not in support_mode: - print_info_log("File open not support %s mode" % self.mode) + print_error_log(f"File open not support {self.mode} mode") raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) check_link(self.file_path) self.file_path = os.path.realpath(self.file_path) @@ -194,52 +194,45 @@ class FileOpen: def check_link(path): abs_path = os.path.abspath(path) if os.path.islink(abs_path): - print_info_log("The file path {} is a soft link.".format(path)) + print_error_log(f"The file path {path} is a soft link.") raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) -def check_path_length(path, name_length=None): - file_max_name_length = ( - name_length if name_length else FileCheckConst.FILE_NAME_LENGTH - ) - if ( - len(path) > FileCheckConst.DIRECTORY_LENGTH - or len(os.path.basename(path)) > file_max_name_length - ): - print_info_log("The file path length exceeds limit.") +def check_path_length(path): + if path_len_exceeds_limit(path): + print_error_log("The file path length exceeds limit.") raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) def check_path_exists(path): if not os.path.exists(path): - print_info_log("The file path %s does not exist." % path) + print_error_log(f"The file path {path} does not exist.") raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) def check_path_readability(path): if not os.access(path, os.R_OK): - print_info_log("The file path %s is not readable." % path) + print_error_log(f"The file path {path} is not readable.") raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_path_writability(path): if not os.access(path, os.W_OK): - print_info_log("The file path %s is not writable." % path) + print_error_log(f"The file path {path} is not writable.") raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_path_executable(path): if not os.access(path, os.X_OK): - print_info_log("The file path %s is not executable." % path) + print_error_log(f"The file path {path} is not executable.") raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_other_user_writable(path): st = os.stat(path) if st.st_mode & 0o002: - print_info_log( - "The file path %s may be insecure because other users have write permissions. " - % path + print_error_log( + f"The file path {path} may be insecure because other users have write permissions. " ) raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) @@ -247,22 +240,22 @@ def check_other_user_writable(path): def check_path_owner_consistent(path): file_owner = os.stat(path).st_uid if file_owner != os.getuid(): - print_info_log( - "The file path %s may be insecure because is does not belong to you." % path + print_error_log( + f"The file path {path} may be insecure because is does not belong to you." ) raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_path_pattern_vaild(path): if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): - print_info_log("The file path %s contains special characters." % (path)) + print_error_log(f"The file path {path} contains special characters.") raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) def check_file_size(file_path, max_size): file_size = os.path.getsize(file_path) if file_size >= max_size: - print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") + print_error_log(f"The size of file path {file_path} exceeds {max_size} bytes.") raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) @@ -277,18 +270,18 @@ def check_common_file_size(file_path): def check_file_suffix(file_path, file_suffix): if file_suffix: if not file_path.endswith(file_suffix): - print_info_log(f"The {file_path} should be a {file_suffix} file!") + print_error_log(f"The {file_path} should be a {file_suffix} file!") raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) def check_path_type(file_path, file_type): if file_type == FileCheckConst.FILE: if not os.path.isfile(file_path): - print_info_log(f"The {file_path} should be a file!") + print_error_log(f"The {file_path} should be a file!") raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) if file_type == FileCheckConst.DIR: if not os.path.isdir(file_path): - print_info_log(f"The {file_path} should be a dictionary!") + print_error_log(f"The {file_path} should be a dictionary!") raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) @@ -301,7 +294,7 @@ def check_path_before_create(path): if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): raise FileCheckException( FileCheckException.ILLEGAL_PATH_ERROR, - "The file path {} contains special characters.".format(path), + f"The file path {path} contains special characters." ) @@ -313,7 +306,7 @@ def change_mode(path, mode): except PermissionError as ex: raise FileCheckException( FileCheckException.FILE_PERMISSION_ERROR, - "Failed to change {} authority. {}".format(path, str(ex)), + f"Failed to change {path} authority. {str(ex)}", ) from ex -- Gitee From 241df4110ae49fe4a123c3002b96912edbf6925a Mon Sep 17 00:00:00 2001 From: heweidong7 <511650494@qq.com> Date: Sat, 31 Aug 2024 11:12:38 +0800 Subject: [PATCH 050/333] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=99=A8update=E5=92=8Cratio=E5=90=91=E9=87=8F=E7=9A=84?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 67 ++++++++++--------- .../kj600/kj600/optimizer_collect.py | 14 ++-- 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index a364b7d06a1..c4837490977 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -4,7 +4,9 @@

-

Affinity API Issues

+

Affinity API Issues

+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} The analysis results of following affinity APIs are based on runtime env - cann-{{ cann_version }} + cann-{{ cann_version }} and - torch-{{ torch_version }} + {{profiling_type}}-{{ profiling_type }}
{% if empty_stacks %} Suggestion: These APIs have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to - Ascend PyTorch Profiler to set - 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. + Ascend Profiler to set + 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. {% endif %} {% for api_name, stacks in result.items() %} diff --git a/profiler/advisor/display/html/templates/ai_core_frequency.html b/profiler/advisor/display/html/templates/ai_core_frequency.html index d0451420373..405460ac961 100644 --- a/profiler/advisor/display/html/templates/ai_core_frequency.html +++ b/profiler/advisor/display/html/templates/ai_core_frequency.html @@ -1,7 +1,10 @@ {% if data|length > 0 %}
-

AI CORE Frequency Issues

+

AI CORE Frequency Issues

+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} Issue: {{ desc }}
Suggestion: {{ suggestion }} diff --git a/profiler/advisor/display/html/templates/byte_alignment.html b/profiler/advisor/display/html/templates/byte_alignment.html new file mode 100644 index 00000000000..5677dd5c1f8 --- /dev/null +++ b/profiler/advisor/display/html/templates/byte_alignment.html @@ -0,0 +1,43 @@ + +
+

Byte Alignment Analysis

+
+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} + {{ desc }} + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ {% if datas|safe %} + The details of top {{ num }} abnormal communication + operators are as follows: +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in datas %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ {% endif %} +
+
diff --git a/profiler/advisor/display/html/templates/communication_retransmission_analysis.html b/profiler/advisor/display/html/templates/communication_retransmission_analysis.html new file mode 100644 index 00000000000..75754fde724 --- /dev/null +++ b/profiler/advisor/display/html/templates/communication_retransmission_analysis.html @@ -0,0 +1,40 @@ +
+

Communication Retransmission Analysis

+
+ {{ desc }} + + + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+

+ {{ desc }} + + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
diff --git a/profiler/advisor/display/html/templates/comparison.html b/profiler/advisor/display/html/templates/comparison.html new file mode 100644 index 00000000000..b81802d6b05 --- /dev/null +++ b/profiler/advisor/display/html/templates/comparison.html @@ -0,0 +1,25 @@ +{% if rows|length > 0 %} +
+

{{ sheet_name }}

+
+ Issue: {{ desc }} +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in rows %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
+{% endif %} \ No newline at end of file diff --git a/profiler/advisor/display/html/templates/contention.html b/profiler/advisor/display/html/templates/contention.html new file mode 100644 index 00000000000..3d7fb89c5e0 --- /dev/null +++ b/profiler/advisor/display/html/templates/contention.html @@ -0,0 +1,41 @@ +
+

Bandwidth Contention Analysis

+
+ {{ desc }} + + + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+

+ The following table lists the {{topk}} operators with the + most severe performance deterioration. + + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
diff --git a/profiler/advisor/display/html/templates/environment_variable.html b/profiler/advisor/display/html/templates/environment_variable.html new file mode 100644 index 00000000000..ab950963939 --- /dev/null +++ b/profiler/advisor/display/html/templates/environment_variable.html @@ -0,0 +1,21 @@ +
+

Environment Variable Issues

+
+ + + {% for header in result.get("headers") %} + + {% endfor %} + + + {% for row in result.get("data") %} + + {% for value in row %} + + {% endfor %} + + {% endfor %} + +
{{ header }}
{{ value|safe }}
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/gc.html b/profiler/advisor/display/html/templates/gc.html new file mode 100644 index 00000000000..236c0acaec3 --- /dev/null +++ b/profiler/advisor/display/html/templates/gc.html @@ -0,0 +1,42 @@ + +
+

{{ title }}

+
+ {% if rank is not none %} + Analysis of rank {{ rank|safe }}. + {% endif %} + {{ desc }} + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ {% if datas|safe %} + The details of top {{ num }} garbage collection events are as follows: +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in datas %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ {% endif %} +
+
diff --git a/profiler/advisor/display/html/templates/main.html b/profiler/advisor/display/html/templates/main.html index 3727125b419..25db9caed36 100644 --- a/profiler/advisor/display/html/templates/main.html +++ b/profiler/advisor/display/html/templates/main.html @@ -1,6 +1,7 @@ + \"))\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import pandas as pd\n", + "pd.set_option(\"display.max_rows\", 100)\n", + "pd.set_option(\"display.width\", 1000)\n", + "\n", + "import cluster_display\n", + "\n", + "slow_link_sum_df = pd.read_csv(\"slow_link_sum.csv\")\n", + "slow_link_ops_df = pd.read_csv(\"slow_link_ops.csv\", index_col=\"opTypeRelatedRanksDataSize\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cluster_display.display_transmittime_bar(slow_link_sum_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### 集群异常的opType_relatedRanks_dataSize分析\n", + "\n", + "统计集群异常的opType_relatedRanks_dataSize,时间单位为微秒(us)\n", + "\n", + "包含以下统计项:\n", + "- Count:算子数量\n", + "- Mean:平均耗时\n", + "- Std:标准差\n", + "- Min:最小值\n", + "- Q1:四分之一分位数\n", + "- Median:中位数\n", + "- Q3:四分之三分位数\n", + "- Max:最大值\n", + "- Sum:总耗时\n", + "- MinRank:耗时最少算子所在的Rank\n", + "- MaxRank:耗时最长算子所在的Rank" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(slow_link_ops_df)\n", + "fig_slow_link_ops = cluster_display.display_duration_boxplots(None, slow_link_ops_df, x_title=\"opTypeRelatedRanksDataSize\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- Gitee From d5d7c8d001c9a4d95dac0749dcbf25e9ff81497d Mon Sep 17 00:00:00 2001 From: i-robot Date: Fri, 28 Feb 2025 06:07:41 +0000 Subject: [PATCH 195/333] =?UTF-8?q?!3059=20=E3=80=90feature=E3=80=91?= =?UTF-8?q?=E7=9C=9F=E5=AE=9E=E6=95=B0=E6=8D=AE=E6=AF=94=E5=AF=B9=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E6=AC=A7=E5=BC=8F=E8=B7=9D=E7=A6=BB=20Merge=20pull=20?= =?UTF-8?q?request=20!3059=20from=20yinglinwei/master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/const.py | 10 ++- .../msprobe/core/compare/acc_compare.py | 24 +++--- .../msprobe/core/compare/highlight.py | 2 + .../core/compare/multiprocessing_compute.py | 21 +++-- .../msprobe/core/compare/npy_compare.py | 42 ++++++--- .../msprobe/core/compare/utils.py | 8 +- .../docs/10.accuracy_compare_PyTorch.md | 20 +++-- .../msprobe/mindspore/compare/ms_compare.py | 3 +- .../mindspore/compare/ms_graph_compare.py | 11 +-- .../test/core_ut/compare/test_acc_compare.py | 38 +++++---- .../compare/test_acc_compare_npy_compare.py | 85 ++++++++++++++----- .../core_ut/compare/test_acc_compare_utils.py | 44 +++++----- .../core_ut/compare/test_cmp_highlight.py | 30 +++---- .../test_cmp_multiprocessing_compute.py | 20 +++-- .../compare/test_ms_graph_compare.py | 2 +- .../msprobe/visualization/utils.py | 7 +- 16 files changed, 229 insertions(+), 138 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index b49b4fffd5e..27dc231c75c 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -256,6 +256,7 @@ class CompareConst: MEAN_DIFF = "Mean diff" NORM_DIFF = "L2norm diff" COSINE = "Cosine" + EUC_DIST = "EucDist" MAX_ABS_ERR = "MaxAbsErr" MAX_RELATIVE_ERR = "MaxRelativeErr" MIN_RELATIVE_ERR = "MinRelativeErr" @@ -330,8 +331,8 @@ class CompareConst: ULP_ERR_STATUS = "ulp_err_status" COMPARE_RESULT_HEADER = [ - NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, COSINE, MAX_ABS_ERR, MAX_RELATIVE_ERR, - ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO, + NPU_NAME, BENCH_NAME, NPU_DTYPE, BENCH_DTYPE, NPU_SHAPE, BENCH_SHAPE, COSINE, EUC_DIST, + MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO, NPU_MAX, NPU_MIN, NPU_MEAN, NPU_NORM, BENCH_MAX, BENCH_MIN, BENCH_MEAN, BENCH_NORM, ACCURACY, ERROR_MESSAGE ] @@ -357,7 +358,8 @@ class CompareConst: Const.MD5: MD5_COMPARE_RESULT_HEADER } - ALL_COMPARE_INDEX = [COSINE, MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO, FIVE_THOUSANDTHS_ERR_RATIO] + ALL_COMPARE_INDEX = [COSINE, EUC_DIST, MAX_ABS_ERR, MAX_RELATIVE_ERR, ONE_THOUSANDTH_ERR_RATIO, + FIVE_THOUSANDTHS_ERR_RATIO] SUMMARY_COMPARE_INDEX = [MAX_DIFF, MIN_DIFF, MEAN_DIFF, NORM_DIFF, MAX_RELATIVE_ERR, MIN_RELATIVE_ERR, MEAN_RELATIVE_ERR, NORM_RELATIVE_ERR] @@ -467,7 +469,7 @@ class CompareConst: BENCH_MEAN: None, BENCH_NORM: None, ACCURACY: '', ERROR_MESSAGE: '' } MS_GRAPH_NPY = { - COSINE: None, MAX_ABS_ERR: None, MAX_RELATIVE_ERR: None, ONE_THOUSANDTH_ERR_RATIO: None, + COSINE: None, EUC_DIST: None, MAX_ABS_ERR: None, MAX_RELATIVE_ERR: None, ONE_THOUSANDTH_ERR_RATIO: None, FIVE_THOUSANDTHS_ERR_RATIO: None } MS_GRAPH_STATISTIC = { diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index 55229d72657..f0ac97a0293 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -311,9 +311,9 @@ class Comparator: ] if self.dump_mode == Const.SUMMARY: - result_item = base_result_item + [" "] * 8 + result_item = base_result_item + [" "] * 8 # 8个统计量数据情况的比对指标 else: - result_item = base_result_item + [" "] * 5 + result_item = base_result_item + [" "] * 6 # 6个真实数据情况的比对指标 npu_summary_data = npu_ops_all.get(ms_op_name).get("summary") result_item.extend(npu_summary_data) @@ -456,11 +456,13 @@ class Comparator: def compare_ops(self, idx, dump_path_dict, result_df, lock, input_param): cos_result = [] + euc_dist_result = [] max_err_result = [] max_relative_err_result = [] - err_mess = [] one_thousand_err_ratio_result = [] five_thousand_err_ratio_result = [] + err_mess = [] + is_print_compare_log = input_param.get("is_print_compare_log") bench_data = load_json(input_param.get("bench_json_path")).get('data') for i in range(len(result_df)): @@ -469,8 +471,8 @@ class Comparator: if is_print_compare_log: logger.info("start compare: {}".format(npu_op_name)) - cos_sim, max_abs_err, max_relative_err, one_thousand_err_ratio, five_thousand_err_ratio, err_msg = \ - self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param, bench_data) + cos_sim, euc_dist, max_abs_err, max_relative_err, one_thousand_err_ratio, five_thousand_err_ratio, err_msg \ + = self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param, bench_data) if is_print_compare_log: logger.info( @@ -479,26 +481,28 @@ class Comparator: "five_thousand_err_ratio {}".format(npu_op_name, cos_sim, max_abs_err, max_relative_err, err_msg, one_thousand_err_ratio, five_thousand_err_ratio)) cos_result.append(cos_sim) + euc_dist_result.append(euc_dist) max_err_result.append(max_abs_err) max_relative_err_result.append(max_relative_err) - err_mess.append(err_msg) one_thousand_err_ratio_result.append(one_thousand_err_ratio) five_thousand_err_ratio_result.append(five_thousand_err_ratio) + err_mess.append(err_msg) cr = ComparisonResult( cos_result=cos_result, + euc_dist_result=euc_dist_result, max_err_result=max_err_result, max_relative_err_result=max_relative_err_result, - err_msgs=err_mess, one_thousand_err_ratio_result=one_thousand_err_ratio_result, - five_thousand_err_ratio_result=five_thousand_err_ratio_result + five_thousand_err_ratio_result=five_thousand_err_ratio_result, + err_msgs=err_mess ) return _save_cmp_result(idx, cr, result_df, lock) - def do_multi_process(self, input_parma, result_df): + def do_multi_process(self, input_param, result_df): try: - result_df = _handle_multi_process(self.compare_ops, input_parma, result_df, + result_df = _handle_multi_process(self.compare_ops, input_param, result_df, multiprocessing.Manager().RLock()) return result_df except ValueError as e: diff --git a/debug/accuracy_tools/msprobe/core/compare/highlight.py b/debug/accuracy_tools/msprobe/core/compare/highlight.py index cf3e1c4c03e..1983313249f 100644 --- a/debug/accuracy_tools/msprobe/core/compare/highlight.py +++ b/debug/accuracy_tools/msprobe/core/compare/highlight.py @@ -146,11 +146,13 @@ class HighlightRules: } # 用于比较输入和输出的规则 + # 真实数据检查规则 compare_rules = { "check_order_magnitude": CheckOrderMagnitude(), "check_one_thousand_error": CheckOneThousandErrorRatio(), "check_cosine_similarity": CheckCosineSimilarity() } + # 统计量数据检查规则 summary_compare_rules = { "check_order_magnitude": CheckOrderMagnitude(), "check_max_relative_diff": CheckMaxRelativeDiff(), diff --git a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py index c2c1461e452..f79671827c1 100644 --- a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,8 +15,11 @@ import multiprocessing from dataclasses import dataclass +from functools import partial + import pandas as pd from tqdm import tqdm + from msprobe.core.common.log import logger from msprobe.core.common.utils import CompareException from msprobe.core.common.const import CompareConst @@ -44,7 +47,7 @@ def _handle_multi_process(func, input_parma, result_df, lock): progress_bar = tqdm(total=len(result_df), desc="API/Module Item Compare Process", unit="row", ncols=100) - def update_progress(size, progress_lock): + def update_progress(size, progress_lock, extra_param=None): with progress_lock: progress_bar.update(size) @@ -54,8 +57,10 @@ def _handle_multi_process(func, input_parma, result_df, lock): result = pool.apply_async(func, args=(idx, op_name_mapping_dict, df_chunk, lock, input_parma), error_callback=err_call, - callback=update_progress(chunk_size, lock)) + callback=partial(update_progress, chunk_size, lock) + ) results.append(result) + final_results = [r.get() for r in results] pool.close() pool.join() @@ -110,11 +115,12 @@ def read_dump_data(result_df): @dataclass class ComparisonResult: cos_result: list + euc_dist_result: list max_err_result: list max_relative_err_result: list - err_msgs: list one_thousand_err_ratio_result: list five_thousand_err_ratio_result: list + err_msgs: list def _save_cmp_result(offset, result: ComparisonResult, result_df, lock): @@ -135,15 +141,16 @@ def _save_cmp_result(offset, result: ComparisonResult, result_df, lock): for i, _ in enumerate(result.cos_result): process_index = i + offset result_df.loc[process_index, CompareConst.COSINE] = result.cos_result[i] + result_df.loc[process_index, CompareConst.EUC_DIST] = result.euc_dist_result[i] result_df.loc[process_index, CompareConst.MAX_ABS_ERR] = result.max_err_result[i] result_df.loc[process_index, CompareConst.MAX_RELATIVE_ERR] = result.max_relative_err_result[i] - result_df.loc[process_index, CompareConst.ERROR_MESSAGE] = result.err_msgs[i] - result_df.loc[process_index, CompareConst.ACCURACY] = ( - check_accuracy(result.cos_result[i], result.max_err_result[i])) result_df.loc[process_index, CompareConst.ONE_THOUSANDTH_ERR_RATIO] = ( result.one_thousand_err_ratio_result)[i] result_df.loc[process_index, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = ( result.five_thousand_err_ratio_result)[i] + result_df.loc[process_index, CompareConst.ACCURACY] = ( + check_accuracy(result.cos_result[i], result.max_err_result[i])) + result_df.loc[process_index, CompareConst.ERROR_MESSAGE] = result.err_msgs[i] return result_df except ValueError as e: logger.error('result dataframe is not found.') diff --git a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py index c551985780c..4103d361fec 100644 --- a/debug/accuracy_tools/msprobe/core/compare/npy_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/npy_compare.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -70,7 +70,7 @@ def get_error_flag_and_msg(n_value, b_value, error_flag=False, error_file=None): error_flag = True return CompareConst.NONE, CompareConst.NONE, error_flag, err_msg if not n_value.shape: # 判断数据是否为0维张量 - err_msg = (f"This is type of 0-d tensor, can not calculate '{CompareConst.COSINE}', " + err_msg = (f"This is type of 0-d tensor, can not calculate '{CompareConst.COSINE}', '{CompareConst.EUC_DIST}', " f"'{CompareConst.ONE_THOUSANDTH_ERR_RATIO}' and '{CompareConst.FIVE_THOUSANDTHS_ERR_RATIO}'. ") error_flag = False # 0-d tensor 最大绝对误差、最大相对误差仍然支持计算,因此error_flag设置为False,不做统一处理 return n_value, b_value, error_flag, err_msg @@ -168,8 +168,9 @@ def statistics_data_check(result_dict): class TensorComparisonBasic(abc.ABC): """NPU和bench中npy数据的比较模板""" + @abc.abstractmethod - def apply(self, n_value, b_value, relative_err): + def apply(self, n_value, b_value, relative_err, err_msg): raise NotImplementedError @@ -190,6 +191,7 @@ def get_relative_err(n_value, b_value): class GetCosineSimilarity(TensorComparisonBasic): """计算cosine相似度""" + @staticmethod def correct_data(result): if result == CompareConst.NAN: @@ -198,9 +200,9 @@ class GetCosineSimilarity(TensorComparisonBasic): return round(float(result), 6) return result - def apply(self, n_value, b_value, relative_err): - if not n_value.shape: - return CompareConst.UNSUPPORTED, "" + def apply(self, n_value, b_value, relative_err, err_msg): + if "This is type of 0-d tensor" in err_msg: + return CompareConst.UNSUPPORTED, err_msg with np.errstate(divide="ignore", invalid="ignore"): if len(n_value) == 1: @@ -224,9 +226,22 @@ class GetCosineSimilarity(TensorComparisonBasic): return result, "" +class GetEuclideanDistance(TensorComparisonBasic): + """计算欧式距离""" + + def apply(self, n_value, b_value, relative_err, err_msg): + if "This is type of 0-d tensor" in err_msg: + return CompareConst.UNSUPPORTED, err_msg + + distance = np.linalg.norm(n_value - b_value, ord=2) + + return distance, "" + + class GetMaxAbsErr(TensorComparisonBasic): """计算最大绝对误差""" - def apply(self, n_value, b_value, relative_err): + + def apply(self, n_value, b_value, relative_err, err_msg): temp_res = n_value - b_value max_value = np.max(np.abs(temp_res)) if np.isnan(max_value): @@ -237,7 +252,8 @@ class GetMaxAbsErr(TensorComparisonBasic): class GetMaxRelativeErr(TensorComparisonBasic): """计算最大相对误差""" - def apply(self, n_value, b_value, relative_err): + + def apply(self, n_value, b_value, relative_err, err_msg): max_relative_err = np.max(np.abs(relative_err)) if np.isnan(max_relative_err): msg = "Cannot compare by MaxRelativeError, the data contains nan/inf/-inf in dump data." @@ -247,12 +263,13 @@ class GetMaxRelativeErr(TensorComparisonBasic): class GetErrRatio(TensorComparisonBasic): """计算相对误差小于指定阈值(千分之一、千分之五)的比例""" + def __init__(self, threshold): self.threshold = threshold - def apply(self, n_value, b_value, relative_err): - if not n_value.shape: - return CompareConst.UNSUPPORTED, "" + def apply(self, n_value, b_value, relative_err, err_msg): + if "This is type of 0-d tensor" in err_msg: + return CompareConst.UNSUPPORTED, err_msg if not np.size(relative_err): return CompareConst.NAN, "" @@ -264,6 +281,7 @@ class GetErrRatio(TensorComparisonBasic): class CompareOps: compare_ops = { "cosine_similarity": GetCosineSimilarity(), + "euclidean_distance": GetEuclideanDistance(), "max_abs_error": GetMaxAbsErr(), "max_relative_error": GetMaxRelativeErr(), "one_thousand_err_ratio": GetErrRatio(CompareConst.THOUSAND_RATIO_THRESHOLD), @@ -295,7 +313,7 @@ def compare_ops_apply(n_value, b_value, error_flag, err_msg): n_value, b_value = reshape_value(n_value, b_value) for op in CompareOps.compare_ops.values(): - result, msg = op.apply(n_value, b_value, relative_err) + result, msg = op.apply(n_value, b_value, relative_err, err_msg) result_list.append(result) err_msg += msg return result_list, err_msg diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index a2edf57e5bb..72b75ab254e 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -285,9 +285,9 @@ def result_item_init(n_info, b_info, dump_mode): md5_compare_result = CompareConst.PASS if n_info.struct[2] == b_info.struct[2] else CompareConst.DIFF result_item.extend([n_info.struct[2], b_info.struct[2], md5_compare_result]) elif dump_mode == Const.SUMMARY: - result_item.extend([" "] * 8) + result_item.extend([" "] * 8) # 8个统计量数据情况的比对指标 else: - result_item.extend([" "] * 5) + result_item.extend([" "] * 6) # 6个真实数据情况的比对指标 else: err_msg = "index out of bounds error will occur in result_item_init, please check!\n" \ f"npu_info_struct is {n_info.struct}\n" \ @@ -453,9 +453,9 @@ def get_un_match_accuracy(result, n_dict, dump_mode): result.append(result_item) continue if dump_mode == Const.SUMMARY: - result_item.extend([CompareConst.N_A] * 8) + result_item.extend([CompareConst.N_A] * 8) # 8个统计量数据情况的比对指标 if dump_mode == Const.ALL: - result_item.extend([CompareConst.N_A] * 5) + result_item.extend([CompareConst.N_A] * 6) # 6个真实数据情况的比对指标 npu_summary_data = safe_get_value(summary_reorder, index, "summary_reorder") bench_summary_data = [CompareConst.N_A] * 4 diff --git a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md index b4525d738d8..a5f83d8dfcb 100644 --- a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md @@ -257,11 +257,11 @@ PyTorch 精度比对是以 CPU 或 GPU 的计算结果为标杆,通过计算 统计量有 4 种:最大值(max)、最小值(min)、平均值(mean)和 L2-范数(L2 norm)。 -|dump 数据模式|Cosine (tensor 余弦相似度)|MaxAbsErr (tensor 最大绝对误差)|MaxRelativeErr (tensor 最大相对误差)|One Thousandth Err Ratio (tensor 相对误差小于千分之一的比例)|Five Thousandth Err Ratio (tensor 相对误差小于千分之五的比例)|NPU 和 bench 的统计量绝对误差 (max, min, mean, L2 norm) diff| NPU 和 bench 的统计量相对误差 (max, min, mean, L2 norm) RelativeErr |NPU 和 bench 的统计量 (max, min, mean, L2 norm)|NPU MD5 (NPU 数据 CRC-32 值)|BENCH MD5 (bench 数据 CRC-32 值)|Result (比对结果)|Accuracy Reached or Not (计算精度是否达标)|Err_message (错误信息提示)|NPU_Stack_Info (堆栈信息)|Data_Name (NPU 真实数据名)| -|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -|真实数据模式|√|√|√|√|√|||√||||√|√|√|√| -|统计数据模式||||||√|√|√|||√||√|√|| -|MD5 模式|||||||||√|√|√|||√|| +|dump 数据模式|Cosine (tensor 余弦相似度)|EucDist (tensor 欧式距离)|MaxAbsErr (tensor 最大绝对误差)|MaxRelativeErr (tensor 最大相对误差)|One Thousandth Err Ratio (tensor 相对误差小于千分之一的比例)|Five Thousandth Err Ratio (tensor 相对误差小于千分之五的比例)|NPU 和 bench 的统计量绝对误差 (max, min, mean, L2 norm) diff| NPU 和 bench 的统计量相对误差 (max, min, mean, L2 norm) RelativeErr |NPU 和 bench 的统计量 (max, min, mean, L2 norm)|NPU MD5 (NPU 数据 CRC-32 值)|BENCH MD5 (bench 数据 CRC-32 值)|Result (比对结果)|Accuracy Reached or Not (计算精度是否达标)|Err_message (错误信息提示)|NPU_Stack_Info (堆栈信息)|Data_Name (NPU 真实数据名)| +|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +|真实数据模式|√|√|√|√|√|√|||√||||√|√|√|√| +|统计数据模式|||||||√|√|√|||√||√|√|| +|MD5 模式||||||||||√|√|√|||√|| 上表中NPU_Stack_Info字段需要配置-s参数生成。 @@ -320,7 +320,7 @@ MD5 模式: 5. "This is empty data, can not compare.":读取到的数据为空(真实数据模式); 6. "Shape of NPU and bench Tensor do not match. Skipped.":NPU 和 Bench 的数据结构不一致(真实数据模式); 7. "The Position of inf or nan in NPU and bench Tensor do not match.":NPU 和 Bench 的数据有 nan/inf(真实数据模式); -8. "This is type of 0-d tensor, can not calculate 'Cosine', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'.":NPU 为0维张量(真实数据模式); +8. "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'.":NPU 为0维张量(真实数据模式); 9. "Dtype of NPU and bench Tensor do not match.":NPU 和 Bench 数据的数据类型不同(真实数据模式); 10. "":除以上情况的其余情况(真实数据模式、统计数据模式)。 @@ -330,13 +330,15 @@ MD5 模式: 1. Cosine:通过计算两个向量的余弦值来判断其相似度,数值越接近于 1 说明计算出的两个张量越相似,实际可接受阈值为大于 0.99。在计算中可能会存在 nan,主要由于可能会出现其中一个向量为 0。 -2. MaxAbsErr:当最大绝对误差越接近 0 表示其计算的误差越小,实际可接受阈值为小于 0.001。 +2. EucDist:通过计算两个向量的欧式距离来判断其相似度,定义为多维空间中两个点之间的绝对距离。数值越接近0,张量越相似,数值越大,差异越大。 -3. MaxRelativeErr:当最大相对误差越接近 0 表示其计算的误差越小。 +3. MaxAbsErr:当最大绝对误差越接近 0 表示其计算的误差越小,实际可接受阈值为小于 0.001。 + +4. MaxRelativeErr:当最大相对误差越接近 0 表示其计算的误差越小。 当 dump 数据中存在 0 或 Nan 时,比对结果中最大相对误差则出现 inf 或 Nan 的情况,属于正常现象。 -4. One Thousandth Err Ratio(相对误差小于千分之一的元素比例)、Five Thousandths Err Ratio(相对误差小于千分之五的元素比例)精度指标:是指 NPU 的 Tensor 中的元素逐个与对应的标杆数据对比,相对误差小于千分之一、千分之五的比例占总元素个数的比例。该数据仅作为精度下降趋势的参考,并不参与计算精度是否通过的判定。 +5. One Thousandth Err Ratio(相对误差小于千分之一的元素比例)、Five Thousandths Err Ratio(相对误差小于千分之五的元素比例)精度指标:是指 NPU 的 Tensor 中的元素逐个与对应的标杆数据对比,相对误差小于千分之一、千分之五的比例占总元素个数的比例。该数据仅作为精度下降趋势的参考,并不参与计算精度是否通过的判定。 ## 4 多卡比对结果提取汇总通信算子数据 diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 8509a7f38ad..de507e87665 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -125,7 +125,8 @@ class MSComparator(Comparator): result_df.loc[warning_flag, CompareConst.RESULT] = CompareConst.WARNING result_df.loc[warning_flag, CompareConst.ERROR_MESSAGE] = 'Need double check api accuracy.' else: - fill_cols = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + fill_cols = [CompareConst.COSINE, CompareConst.EUC_DIST, + CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, CompareConst.ERROR_MESSAGE] result_df.loc[~condition_no_bench, fill_cols] = '' diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py index 701988ba483..153f4fd6552 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_graph_compare.py @@ -195,11 +195,12 @@ class GraphMSComparator: if not error_flag: result_list, err_msg = compare_ops_apply(n_value, b_value, False, "") result_dict[CompareConst.COSINE] = result_list[0] - result_dict[CompareConst.MAX_ABS_ERR] = result_list[1] - result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[2] - result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[3] - result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[4] - result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[1]) + result_dict[CompareConst.EUC_DIST] = result_list[1] + result_dict[CompareConst.MAX_ABS_ERR] = result_list[2] + result_dict[CompareConst.MAX_RELATIVE_ERR] = result_list[3] + result_dict[CompareConst.ONE_THOUSANDTH_ERR_RATIO] = result_list[4] + result_dict[CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] = result_list[5] + result_dict[CompareConst.ACCURACY] = check_accuracy(result_list[0], result_list[2]) result_dict[CompareConst.ERROR_MESSAGE] = err_msg return pd.Series(result_dict) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py index b4566fcfe6f..c882e331f55 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py @@ -191,17 +191,21 @@ summary_line_3 = ['Functional_batch_norm_0_forward.output.2', 'Functional_batch_ 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0, 0, 0, 0, 2, 0, 1, 1, 1, 1, 1, 1, 'Warning', ''] line_input = ['Functional.batch.norm.0.forward.input.0', 'Functional.batch.norm.0.forward.input.0', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 1, 1, 0.95, 1, 1, 1, 1, 1, 1.01, 1, 1, 1, + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 1, 0.5, 1, 1, 0.95, 1, + 1, 1, 1, 1, 1.01, 1, 1, 1, 'Yes', ''] line_1 = ['Functional.batch.norm.0.forward.output.0', 'Functional.batch.norm.0.forward.output.0', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1, 1, 0.59, 1, 'nan', 0, 1, 1, 19, 1, 1, 1, - 'Warning', ''] + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 0.5, 1, 1, 0.59, 1, + 'nan', 0, 1, 1, 19, 1, 1, 1, + 'Yes', ''] line_2 = ['Functional.batch.norm.0.forward.output.1', 'Functional.batch.norm.0.forward.output.1', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 1, 1, 0.8, 1, 0, 0.12, 0, 1, 1, 0.1, 1, 1, 1, - 'Warning', ''] + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.9, 0.5, 1, 1, 0.8, 1, + 0, 0.12, 0, 1, 1, 0.1, 1, 1, + 'Yes', ''] line_3 = ['Functional.batch.norm.0.forward.output.2', 'Functional.batch.norm.0.forward.output.2', 'torch.float16', - 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 1.1e+10, 1, 0.85, 1, 9, 0.12, 0, 1, 1, 0.1, 1, - 1, 1, 'Warning', ''] + 'torch.float32', [256, 256, 14, 14], [256, 256, 14, 14], 0.8, 0.5, 1.1e+10, 1, 0.85, 1, + 9, 0.12, 0, 1, 1, 0.1, 1, 1, + 'Yes', ''] op_data = { 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.float32', 'shape': [16, 1, 3, 3], @@ -363,7 +367,7 @@ class TestUtilsMethods(unittest.TestCase): 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', 'File']] result_all = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', + 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', 'File', '-1']] columns_md5_stack_mode_true = CompareConst.MD5_COMPARE_RESULT_HEADER + ['NPU_Stack_Info'] result_table_md5_true = pd.DataFrame(result_md5, columns=columns_md5_stack_mode_true, dtype=object) @@ -403,10 +407,10 @@ class TestUtilsMethods(unittest.TestCase): 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '']] result_all_test = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', + 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '', '-1']] result_all = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', + 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']] columns_md5_stack_mode_true = CompareConst.MD5_COMPARE_RESULT_HEADER result_table_md5_true = pd.DataFrame(result_md5, columns=columns_md5_stack_mode_true, dtype='object') @@ -632,10 +636,10 @@ class TestUtilsMethods(unittest.TestCase): def test_do_multi_process(self): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']] o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', - 'torch.float32', 'torch.float32', [2, 2], [2, 2], 'unsupported', 'unsupported', 'unsupported', - 'unsupported', 'unsupported', + 'torch.float32', 'torch.float32', [2, 2], [2, 2], + 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 1, 1, 1, 1, 1, 1, 1, 1, 'None', 'No bench data matched.', '-1']] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -669,7 +673,7 @@ class TestUtilsMethods(unittest.TestCase): result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {}) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 'No bench data matched.']) + 'unsupported', 'No bench data matched.']) def test_compare_by_op_2(self): npu_op_name = 'Functional.linear.0.forward.input.0' @@ -691,7 +695,7 @@ class TestUtilsMethods(unittest.TestCase): {'Functional.linear.0.forward': {'input_args': [ {'data_name': 'Functional.linear.0.forward.input.0.pt'}]}}) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - f'Dump file: {pt_path} not found.']) + 'unsupported', f'Dump file: {pt_path} not found.']) pt_name = 'Functional.linear.0.forward.input.0.pt' pt_path = os.path.join(base_dir, pt_name) @@ -699,13 +703,13 @@ class TestUtilsMethods(unittest.TestCase): input_param = {'npu_dump_data_dir': base_dir, 'bench_dump_data_dir': base_dir} result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {}) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 'Bench does not have data file.']) + 'unsupported', 'Bench does not have data file.']) generate_pt(base_dir) result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {'Functional.linear.0.forward': {'input_args': [ {'data_name': 'Functional.linear.0.forward.input.0.pt'}]}}) - self.assertEqual(result, [1.0, 0.0, 0.0, 1.0, 1.0, '']) + self.assertEqual(result, [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, '']) def test_get_bench_data_name_input(self): bench_op_name = "Functional.linear.0.forward.input.0" diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py index aec6cdc5117..da315b657c8 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_npy_compare.py @@ -20,7 +20,7 @@ from unittest.mock import patch from msprobe.core.common.const import CompareConst from msprobe.core.compare.npy_compare import handle_inf_nan, reshape_value, get_error_flag_and_msg, \ npy_data_check, statistics_data_check, get_relative_err, GetCosineSimilarity, GetMaxAbsErr, GetMaxRelativeErr, \ - GetErrRatio, error_value_process, compare_ops_apply + GetErrRatio, error_value_process, compare_ops_apply, GetEuclideanDistance op_name = 'Functional.conv2d.0.backward.input.0' @@ -113,7 +113,7 @@ class TestUtilsMethods(unittest.TestCase): n_value, b_value, error_flag, err_msg = get_error_flag_and_msg(n_value, b_value, error_flag=error_flag) self.assertFalse(error_flag) - self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', " + self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', " "'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ") def test_get_error_flag_and_msg_shape_unmatch(self): @@ -239,15 +239,17 @@ class TestUtilsMethods(unittest.TestCase): b_value_1 = np.array(1) relative_err = get_relative_err(n_value_1, b_value_1) n_value_1, b_value_1 = reshape_value(n_value_1, b_value_1) - result, err_msg = op.apply(n_value_1, b_value_1, relative_err) + err_msg = "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. " + result, err_msg = op.apply(n_value_1, b_value_1, relative_err, err_msg) self.assertEqual(result, CompareConst.UNSUPPORTED) - self.assertEqual(err_msg, "") + self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ") n_value_2 = np.array([1, 2]) b_value_2 = np.array([1, 2]) relative_err = get_relative_err(n_value_2, b_value_2) n_value_2, b_value_2 = reshape_value(n_value_2, b_value_2) - result, err_msg = op.apply(n_value_2, b_value_2, relative_err) + err_msg = "" + result, err_msg = op.apply(n_value_2, b_value_2, relative_err, err_msg) self.assertEqual(result, 1.0) self.assertEqual(err_msg, "") @@ -255,7 +257,8 @@ class TestUtilsMethods(unittest.TestCase): b_value_3 = np.array([0, 0]) relative_err = get_relative_err(n_value_3, b_value_3) n_value_3, b_value_3 = reshape_value(n_value_3, b_value_3) - result, err_msg = op.apply(n_value_3, b_value_3, relative_err) + err_msg = "" + result, err_msg = op.apply(n_value_3, b_value_3, relative_err, err_msg) self.assertEqual(result, 1.0) self.assertEqual(err_msg, "") @@ -263,7 +266,8 @@ class TestUtilsMethods(unittest.TestCase): b_value_4 = np.array([1, 2]) relative_err = get_relative_err(n_value_4, b_value_4) n_value_4, b_value_4 = reshape_value(n_value_4, b_value_4) - result, err_msg = op.apply(n_value_4, b_value_4, relative_err) + err_msg = "" + result, err_msg = op.apply(n_value_4, b_value_4, relative_err, err_msg) self.assertEqual(result, CompareConst.NAN) self.assertEqual(err_msg, 'Cannot compare by Cosine Similarity, All the data is Zero in npu dump data.') @@ -271,7 +275,8 @@ class TestUtilsMethods(unittest.TestCase): b_value_5 = np.array([0, 0]) relative_err = get_relative_err(n_value_5, b_value_5) n_value_5, b_value_5 = reshape_value(n_value_5, b_value_5) - result, err_msg = op.apply(n_value_5, b_value_5, relative_err) + err_msg = "" + result, err_msg = op.apply(n_value_5, b_value_5, relative_err, err_msg) self.assertEqual(result, CompareConst.NAN) self.assertEqual(err_msg, 'Cannot compare by Cosine Similarity, All the data is Zero in Bench dump data.') @@ -282,7 +287,9 @@ class TestUtilsMethods(unittest.TestCase): b_value_1 = np.array([1]) relative_err = get_relative_err(n_value_1, b_value_1) n_value_1, b_value_1 = reshape_value(n_value_1, b_value_1) - result, err_msg = op.apply(n_value_1, b_value_1, relative_err) + err_msg = "" + + result, err_msg = op.apply(n_value_1, b_value_1, relative_err, err_msg) self.assertEqual(result, CompareConst.UNSUPPORTED) self.assertEqual(err_msg, "This is a 1-d tensor of length 1.") @@ -294,8 +301,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([1, 1]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, CompareConst.NAN) self.assertEqual(err_msg, "Cannot compare by Cosine Similarity, the dump data has NaN.") @@ -319,8 +327,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([0, 0]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, 2.0) self.assertEqual(err_msg, "") @@ -333,8 +342,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([1, 1]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, CompareConst.NAN) self.assertEqual(err_msg, "Cannot compare by MaxAbsError, the data contains nan/inf/-inf in dump data.") @@ -347,8 +357,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([1, 1]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, 1.0) self.assertEqual(err_msg, "") @@ -361,8 +372,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([1, 1]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, CompareConst.NAN) self.assertEqual(err_msg, "Cannot compare by MaxRelativeError, the data contains nan/inf/-inf in dump data.") @@ -375,8 +387,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([1, 1]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, 0.5) self.assertEqual(err_msg, "") @@ -387,11 +400,12 @@ class TestUtilsMethods(unittest.TestCase): n_value = np.array(1) # 标量 b_value = np.array(1) relative_err = np.array(0) + err_msg = "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. " - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, CompareConst.UNSUPPORTED) - self.assertEqual(err_msg, "") + self.assertEqual(err_msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ") def test_GetThousandErrRatio_not_size(self): op = GetErrRatio(CompareConst.THOUSAND_RATIO_THRESHOLD) @@ -399,8 +413,9 @@ class TestUtilsMethods(unittest.TestCase): n_value = np.array([1, 2]) b_value = np.array([1, 2]) relative_err = np.array([]) # 空数组 + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, CompareConst.NAN) self.assertEqual(err_msg, "") @@ -412,8 +427,9 @@ class TestUtilsMethods(unittest.TestCase): b_value = np.array([1, 1]) relative_err = get_relative_err(n_value, b_value) n_value, b_value = reshape_value(n_value, b_value) + err_msg = "" - result, err_msg = op.apply(n_value, b_value, relative_err) + result, err_msg = op.apply(n_value, b_value, relative_err, err_msg) self.assertEqual(result, 0.5) self.assertEqual(err_msg, "") @@ -471,5 +487,34 @@ class TestUtilsMethods(unittest.TestCase): error_flag = False err_msg = '' a, b = compare_ops_apply(n_value, b_value, error_flag, err_msg) - self.assertEqual(a, [1.0, 0.0, 0.0, 1.0, 1.0]) + self.assertEqual(a, [1.0, 0.0, 0.0, 0.0, 1.0, 1.0]) self.assertEqual(b, '') + + +class TestGetEuclideanDistance(unittest.TestCase): + + def setUp(self): + self.euc_distance = GetEuclideanDistance() + + def test_euclidean_distance_normal(self): + # 测试计算两个张量之间的欧式距离 + n_value = np.array([1, 2, 3]) + b_value = np.array([4, 5, 6]) + relative_err = None + err_msg = "" + + result, msg = self.euc_distance.apply(n_value, b_value, relative_err, err_msg) + expected_distance = np.linalg.norm(n_value - b_value) + self.assertEqual(result, expected_distance) + self.assertEqual(msg, '') + + def test_euclidean_distance_0d_tensor(self): + # 测试计算两个张量之间的欧式距离 + n_value = np.array(1) + b_value = np.array(1) + relative_err = None + err_msg = "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. " + + result, msg = self.euc_distance.apply(n_value, b_value, relative_err, err_msg) + self.assertEqual(result, CompareConst.UNSUPPORTED) + self.assertEqual(msg, "This is type of 0-d tensor, can not calculate 'Cosine', 'EucDist', 'One Thousandth Err Ratio' and 'Five Thousandths Err Ratio'. ") diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py index ab8703dcd35..2e9a4657266 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py @@ -221,28 +221,34 @@ o_result_unmatch_2 = [ 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None'] ] o_result_unmatch_3 = [ - ['Functional.conv2d.0.forward.input.0', 'N/A', 'torch.float32', 'N/A', [1, 1, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', - 'N/A', 'N/A', 3.029174327850342, -2.926689624786377, -0.06619918346405029, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + ['Functional.conv2d.0.forward.input.0', 'N/A', 'torch.float32', 'N/A', [1, 1, 28, 28], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 3.029174327850342, -2.926689624786377, -0.06619918346405029, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.forward.input.1', 'N/A', 'torch.float32', 'N/A', [16, 1, 5, 5], 'N/A', 'N/A', 'N/A', 'N/A', - 'N/A', 'N/A', 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + ['Functional.conv2d.0.forward.input.1', 'N/A', 'torch.float32', 'N/A', [16, 1, 5, 5], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.forward.input.2', 'N/A', 'torch.float32', 'N/A', [16], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 'N/A', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + ['Functional.conv2d.0.forward.input.2', 'N/A', 'torch.float32', 'N/A', [16], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.forward.parameters.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', - 'N/A', 'N/A', - 'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.forward.parameters.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', - 'N/A', - 'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.forward.output.0', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', - 'N/A', 'N/A', 2.1166646480560303, -2.190781354904175, -0.003579073818400502, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + ['Functional.conv2d.0.forward.parameters.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], + ['Functional.conv2d.0.forward.parameters.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], + ['Functional.conv2d.0.forward.output.0', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 2.1166646480560303, -2.190781354904175, -0.003579073818400502, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.parameters_grad.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', - 'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], - ['Functional.conv2d.0.parameters_grad.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', - 'N/A', 'N/A', 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'] + ['Functional.conv2d.0.parameters_grad.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], + ['Functional.conv2d.0.parameters_grad.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', + 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'] ] # test_merge_tensor @@ -558,7 +564,7 @@ class TestUtilsMethods(unittest.TestCase): dump_mode = Const.ALL result_item = result_item_init(n_info, b_info, dump_mode) self.assertEqual(result_item, ['Tensor.add.0.forward.input.0', 'Tensor.add.0.forward.input.0', - 'torch.float32', 'torch.float32', [96], [96], ' ', ' ', ' ', ' ', ' ']) + 'torch.float32', 'torch.float32', [96], [96], ' ', ' ', ' ', ' ', ' ', ' ']) dump_mode = Const.SUMMARY result_item = result_item_init(n_info, b_info, dump_mode) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py index f561a3e05ec..3261bce5d6d 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_highlight.py @@ -26,7 +26,7 @@ def generate_result_xlsx(base_dir): data_path = os.path.join(base_dir, 'target_result.xlsx') data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] ] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -101,8 +101,8 @@ class TestUtilsMethods(unittest.TestCase): self.assertEqual(result, None) def test_CheckOneThousandErrorRatio_str(self): - api_in = [1, 1, 1, 1, 1, 1, 1, 1, 1, "unsupported"] - api_out = [1, 1, 1, 1, 1, 1, 1, 1, 1, "unsupported"] + api_in = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, "unsupported"] + api_out = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, "unsupported"] info = (api_in, api_out, 1) color_columns = () dump_mode = Const.ALL @@ -113,8 +113,8 @@ class TestUtilsMethods(unittest.TestCase): @patch("msprobe.core.compare.highlight.add_highlight_row_info") def test_CheckOneThousandErrorRatio_red(self, mock_add_highlight_row_info): - api_in = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - api_out = [1, 1, 1, 1, 1, 1, 1, 1, 1, 0.5] + api_in = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, 1] + api_out = [1, 1, 1, 1, 1, 1, 0.9, 0.5, 1, 1, 0.5] info = (api_in, api_out, 1) ColorColumns = namedtuple('ColorColumns', ['red', 'yellow']) color_columns = ColorColumns(red=[], yellow=[]) @@ -315,7 +315,7 @@ class TestUtilsMethods(unittest.TestCase): columns = CompareConst.COMPARE_RESULT_HEADER data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', ''] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', ''] ] result_df = pd.DataFrame(data, columns=columns) @@ -329,7 +329,7 @@ class TestUtilsMethods(unittest.TestCase): def test_highlight_rows_xlsx_red(self): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] ] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -342,7 +342,7 @@ class TestUtilsMethods(unittest.TestCase): def test_highlight_rows_xlsx_yellow(self): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] ] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -356,7 +356,7 @@ class TestUtilsMethods(unittest.TestCase): def test_highlight_rows_xlsx_malicious_columns(self, mock_save_book): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] ] columns = CompareConst.COMPARE_RESULT_HEADER + ['=Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -378,10 +378,10 @@ class TestUtilsMethods(unittest.TestCase): def test_highlight_rows_xlsx_malicious_type(self, mock_save_book): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', '=torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'], + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'], ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', '=torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] ] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -416,10 +416,10 @@ class TestUtilsMethods(unittest.TestCase): def test_update_highlight_err_msg(self): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'], + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'], ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1'] ] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) @@ -433,10 +433,10 @@ class TestUtilsMethods(unittest.TestCase): t_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'a\nb', '-1'], + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'a\nb', '-1'], ['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'd', '-1'] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', 'd', '-1'] ] target_result_df = pd.DataFrame(t_data, columns=columns) self.assertTrue(result_df.equals(target_result_df)) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py index 9c2dea835fe..3fa16b0d9d4 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py @@ -16,12 +16,12 @@ from test_acc_compare import generate_dump_json data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']] o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', + 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 1, 1, 1, 1, 1, 1, 1, 1, 'None', 'No bench data matched.', '-1']] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] @@ -34,9 +34,9 @@ class TestUtilsMethods(unittest.TestCase): def setUp(self): self.result_df = pd.DataFrame(columns=[ - CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, - CompareConst.ERROR_MESSAGE, CompareConst.ACCURACY, - CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO + CompareConst.COSINE, CompareConst.EUC_DIST, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO, + CompareConst.ACCURACY, CompareConst.ERROR_MESSAGE ]) os.makedirs(base_dir, mode=0o750, exist_ok=True) self.lock = threading.Lock() @@ -72,9 +72,10 @@ class TestUtilsMethods(unittest.TestCase): cos_result=[0.99, 0.98], max_err_result=[0.01, 0.02], max_relative_err_result=[0.001, 0.002], - err_msgs=['', 'Error in comparison'], + euc_dist_result=[0.5, 0.49], one_thousand_err_ratio_result=[0.1, 0.2], - five_thousand_err_ratio_result=[0.05, 0.1] + five_thousand_err_ratio_result=[0.05, 0.1], + err_msgs=['', 'Error in comparison'] ) offset = 0 updated_df = _save_cmp_result(offset, comparison_result, self.result_df, self.lock) @@ -88,9 +89,10 @@ class TestUtilsMethods(unittest.TestCase): cos_result=[0.99], max_err_result=[], max_relative_err_result=[0.001], - err_msgs=[''], + euc_dist_result=[0.5], one_thousand_err_ratio_result=[0.1], - five_thousand_err_ratio_result=[0.05] + five_thousand_err_ratio_result=[0.05], + err_msgs=[''] ) with self.assertRaises(CompareException) as context: _save_cmp_result(0, comparison_result, self.result_df, self.lock) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py index e3fd9348efe..c2e7c9368c3 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_graph_compare.py @@ -78,7 +78,7 @@ class TestMsGraphCompare(unittest.TestCase): result_correct = ( f"[['{npu_file_path}', '{bench_file_path}', dtype('float16'), dtype('float16'), (10, 10), (10, 10), " - f"44.0, 44.0, 44.0, inf, 44.0, 44.0, 44.0, inf, 'Yes', '', 1.0, 0.0, 0.0, 1.0, 1.0]]") + f"44.0, 44.0, 44.0, inf, 44.0, 44.0, 44.0, inf, 'Yes', '', 1.0, 0.0, 0.0, 0.0, 1.0, 1.0]]") self.assertNotEqual(len(files), 0) self.assertEqual(result, result_correct) diff --git a/debug/accuracy_tools/msprobe/visualization/utils.py b/debug/accuracy_tools/msprobe/visualization/utils.py index 623bcd11c45..f6e8258bb67 100644 --- a/debug/accuracy_tools/msprobe/visualization/utils.py +++ b/debug/accuracy_tools/msprobe/visualization/utils.py @@ -182,11 +182,8 @@ class GraphConst: STR_MAX_LEN = 50 SMALL_VALUE = 1e-3 MD5_INDEX_LIST = [CompareConst.RESULT] - REAL_DATA_INDEX_LIST = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, - CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] - SUMMARY_INDEX_LIST = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, - CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + REAL_DATA_INDEX_LIST = CompareConst.ALL_COMPARE_INDEX + SUMMARY_INDEX_LIST = CompareConst.SUMMARY_COMPARE_INDEX VALUE_INDEX_LIST = [Const.MAX, Const.MIN, Const.MEAN, Const.NORM] APIS_BETWEEN_MODULES = 'Apis_Between_Modules' NULL = 'null' -- Gitee From 6e8fd5928267736af035332ebf37ffe5768dc730 Mon Sep 17 00:00:00 2001 From: i-robot Date: Tue, 4 Mar 2025 01:45:19 +0000 Subject: [PATCH 196/333] =?UTF-8?q?!3103=20=E3=80=90feature=E3=80=91?= =?UTF-8?q?=E3=80=90=E9=9C=80=E6=B1=82=E3=80=91[msprobe]10.accuracy=5Fcomp?= =?UTF-8?q?are=5FPyTorch.md=E6=AF=94=E5=AF=B9=E7=BB=93=E6=9E=9C=E6=88=AA?= =?UTF-8?q?=E5=9B=BE=E6=9B=B4=E6=96=B0=20Merge=20pull=20request=20!3103=20?= =?UTF-8?q?from=20=E8=94=A1=E4=BC=9F=E4=BC=9F/master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/docs/img/compare_result.png | Bin 77942 -> 62634 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/img/compare_result.png b/debug/accuracy_tools/msprobe/docs/img/compare_result.png index 07cdb51707fe43d07723ed976275d99f55b50571..b6d7ec6dfcbc44b4b7056e1297a481f495ceb86e 100644 GIT binary patch literal 62634 zcmd43dstHG`akSUGu3o*S7W86%+6_QQpYhf#{;mNOxm3sM~&2!DG#VvmSQT1t6kGc zQ?|Rw(o&|VG*L)VOAM$f#WF-RKrNv%MLD8uRG@V)rGyLS4=r5*fg ze|*}3gs-v>9){kwvkOFKAKo8_OTd42AOV|v2)<*73;Ntw^*CG)M=LKf_LnRzc!0-Po`(Wy!4F7ES>bC{^L)SCOo~sdo>G6sF z-v&~v6Ce`pxuT?*7!=<2tTnHi?vMmOOQG^AP3*t!37X}+D^RL$RmHiu`& zj{#a&eA0wprqe z%}ABL6nH+fe_KghHfBP;ynIf1-bVB^N_~|%exnDO%5_2;_~#(vvspC*K>3jq>(CQe zqt)(vfPUY>a(0~J_9CF;q_sI8WeCgo7$|vZl&^={4%mWZf;aO(_wl+LJZy#oP*Om^ ze+*1dUWbzN!F^|;y5lF&Go8RAV;)e}WZN=M>Q8lSwM^>#=F`qs{<^mQ8O%_Hm z?Qenq9LrYZGXd{GCDf>vG!QKf*3yY7e0qAofl6njyrp9~%b?jeAG&FPu34?a{*GCjLIQ^WP zF=Old)jJG5*CWwJUklWV)#xdCioP@bufXWTd>VTWD<6se7#j80_ORJfh>YU&T@mBu zLkGZpi)R?gOfz-nWQZ?xTD<2zu~iss%iHflmCnr68#26!{?SZpC~oO%0Ec3tE250@yMx@Os}p;fcAQfa?U1muX;PTcq!qbZDJ@_M=-#>U|tdu{6{k$GME#Llj4)(RetajfC1h3bF?#rzS zKfW8oVRv9U<9pK1MTaQG)%!f#D zhS?1Z9CBILkmwKp4?Gl_4~t}L-eyq-G-K@%mgas^+|0~FYvFeuiJs!+MGkKNPx|XL z@z1p!{uls#4Yfbf0=+N~D03|eAyK)2J}PjyJ45J#2mVFOH3shko$p=@UCq|xt z`#$*dTHuYt%nvS+Mmnuam%^o;uc5agqMDg@;~=Gy@ypw3n&ioR@8u1go}pOcyFwoO z4Lv+c?1EP8n|Zzk`iDTwt6c&IOXQ3Bh6@mLdmS>f+_^#EzE&#DnBfDh>0dM$KL(&BZ~#hI7m{NB z;cXF`f(@of-Iq_I)GIBLYr(6`GM#`p0>8M>nA-*0O+Ui#s$B|GL+P`3KFw{9G^Wd&?vuznbRo zpkWy1UzlN>rn}u5ZzzA;gv&5$vi_Vb?qq%5tx26pUAv{t_Uch2+Yt2cq5z-B~-Ec`bqErTi* z&S=zBo_gi`JH`ILx%zsR{@otD2ncrhPl0C9pObDcL8Q+4bJE^VU`?1yyurFa7yE^? z7Cf-d@;+>{dw;w zA!)|6x~)$cLr?42n4};tFo^`P|lK2mvNqC3Vzh zw#l-B;J4m35w_LLsFgkG=MB+@KeRi>HLAF&ch;&$T5R=e$JPk|b=SQ{EvjeCrmP|p z%`{R2AjkY9=Vk?2nYi<<0(Ws@mE8Y~k#Igi~*uu`?w1)&u-#BuGjLAReX6rlRP z5Jb3;)4)-FD2W_Ql9|D?CKOvg;i(MRu_D|@5f!uk;ywisFK%UMcTMDKu`WC9W%No-Pi`I)q%2-^8u$Qsy2V~F= zF0to=LcH0AN3_D8OQI01l-k_mNGt+i2T%^%p8f!i^QX8a?D4i7Pt<0)6lEN(j>N>mKUtkNf;KUwon(D{ABQyK4j}gYRW(vngf(L z(?j;CZ*3F|^2aF&Ll)O6T~ViWg8fzLteuRyuHGy`c8l9KBEF5Fl~tf} zs>ilM3D@>Mic|y7?xddDj`rxd{OSpKE-6Opj@Ds6nfhs`rp_JvBh%veR{|rYrVqHD zT&P@3UTHOdD}R#W#_nG2k~SkF26pSd-kEmRXCX)2zPmoznrw;)g`Byx`+4?f%YN)C08)0O8+~;%b zUT!-Db{r+uS4)2wE3Cz0z{lOg6Ef}Na254RQHnYO=n$=n0rpU?Di3N@`o?VAYJ2h{ z-3{K+aW3v@ksCR@ph`#Aa%}A;;)wU&5O`CDjpQcfwm)|futZO2{afga_{lY;A(HQQY0(qgz~aSYZuttAYq5?OfE``Ylauc9ZFs~$ z3uRXvH5^y9EHnl~n5geHryCXdam_#8{AaubZU-tQ?_KX$XgHK3vk=bsK2Pj%`;cw#t_o@;Y=vx;7f zn7*Vym*-U~(BJ+}P2-K>3%zXS$J#W%Y%WUId7HR8>N&68+@@r@IO9^Ty(m_xwCjBH z9Z}}Ex^0&e;yF~m0~lPDSb{qsNeQW#rI%m0l?dDbu!e7@40UkYa>nbOK=3!jn{H-r z0^s~O!t&S!8Tz!V3_E2B(@qe{DFM||f7y~ZDr1E~+9yqwT>$CexvpBser|$@UjHT6^Evf7 z2G1Qa{_9O)SFOw|n)|ugOE+R~cX$(kNWHVXfLSHJv%F6E_vT-eckZoNFT6t3A@h?C z#ffD%6t?l2W?gYe)?J|Ji>aaYPxnErt)%}PTH8gsnM~p zgCQo;Pg#22nF`4QuFbvj0NGd+e-yx`Pg@o0f>9@FACk(l35Ms9DUb;FF?}>dZ^&)6 zW+nh7uban2L5NI=S~;2nu&GGmR1t{RFq*kiAtm9pu|r3bF&>qv%xA;(i~)7=rfZ%j zKopsV*1Ryx_i;7TZud8;oQO5ZFqzkLnR0Bt-AmB|>%}qbfsp!WO44%u>JhFkXl5VW zrkl`@2M(;{cp#p}X0fxHbj|CfvVd!n#4+t;CMx@9W^K7T6|d_vN(h!OCGwigfFd1m z3LXo?GR#%TXtUt#|w;& za5!SRKPG3-kc0jrA>&O{RuH@SGSa_=0+7u+h=OVV6Mb@qo0`Z_nJ*cP*Sx}rpA*!| zD+m{>xv^V6VcUk6HjdVDOZ)q@SXF>-M6G7@#oF0L-?f|`tLR(4DUN|~#F>!ieOF)) zi&?}CQ>TuOAtb|s`b0KbhC~^NzT#0vMJd? zw40#E<|34JtcsA3daLh5YRvPDHzN0F)4`dWfl$C7+BeRws9nh~7>*&@P)Pz;Asnw! zr4sT!+7#z=QSRXFh_kkMmol2L+7CZl-GZt&1T2MPY9=?sGD5lR($ciZNbcG`7HGuT z*JrM#zY);(GP!Q1^R<*#N&QVhAJ`j*iX#fg7(ExZtIniI=-fFdJ*8D3;rA;le45c; zJV$+s$p93CU%y@)7+YAw_}5UvB)gYpizV6F1sm(`u7#17G-Yew#Cy)x-GHxe0K!$~ z%k<2AvMLvsIIW7;M%a=%*WHAR#n_}n4Qdb2sjSi(L2lO}j;m!NlsUXQyXjO)MG}WD zRdJ+e4)n(zB%Kn7q)(OsnI*fS_c*6goX6CXGs^l)VuHoVMdqfe7M=Jqr5 zQFW>6>YNy2|HZXYJ0>$j$&1Pbf^W8O!hT9VfU{_lJZmH1-PYu38JTI-s1Zv)^`Ub5nXK1 zn^HpU2e=XvdvR+6uitdeeQk8Kq_)V~Oc~He9%pY=1>p>rbbF&I#Y0I*S(p!BpHxm_ zpGUHqFqljjMtYyp%BS-}iu$%tXWg%MLqY}2WKwR+<>jmh{jg>xi_ ztdd%D5y&2Y_>}Cn-zc$iraf^VVJO;m-Qw0vX=-pskNOlDSL;=uS8}nHYC4m2s4&9H z%*@@#?lFE~7yE!*X$fYy&|JLADuXgZ?I~f-xTh1Usy$aTBS|;OZS{sEch@I{%G}+V zHF&El{J_SL$xK(RtDmFDsq`jut6ymBr;G!Uxf@GfWuQ`m(*r{>@IBC5o>ZEi;7e(% zk6P2+xarB+scYE|9j~c%Mf5gRwAkr!$8_r0PQ73B0ihG>swz=MTQ-zj)E2&ye`{mH z=h%fDij9s-s*t!IoZc5#y_IkSdWcfG4ix#64CTPP>ofS0-ZM8@0%6>mZkBXMJ=t9f zjrH8fgV2N`T(0^EkAzcN;Zc;K#K|=ybtN^M@==h`HImW*+(4Zh{*u!MNDyVq`6J}U zJhxEf+ZKli*~w2SDBQWWrX||$IFW0x=2Ofd7YCDJd>B|B=ZpM=pk_i1;ZaiTkBmY> z)o{jiV1Y6FQ&r0Jj>pr&y~|*Q35Gyh49>?hN>BvPTV}fvZ%T46Ah&JEt*}*SX$^C0 zJJP*L8%tVIagGQ9Ac=zJp5YedTeiHFHj_QVV5!>eL!rv^m4w)#^%v-6ND1Ri)gA>Q zbu+dHbdoeigs~q=4C?;MC~9}`^qR;X9{VP*8tT#35laMlr+)G)9?q%9pMk4I5zoCz zm~>yex{Q{e$#e3~4H4Nvvt0{`&G)6@+%d6rw)1p2_?v?GqTJ19*ewe_Uhr$ zKPMf#Fk?x-(m6P3(0nX=0Q>c+A-{zWJkZp}eZIyA;-Ut4Aazh-`U{%juNraZbTkJBs{AW_+YE`VU!>^BH`ZTqM;xbkDti>J_{{P{@j0SYCew45t>_S>wk5P$T>YX~ z#2~3On9!}-v=|6TR25dCd@+@tZo?hv^@h25oT`vYa+fHRRXh~ItSybn(5`y9&0=lR zN{J!?(v_8&iHX+f$FJmAbqDy`0hXj#>Og*@Inh!l;B#i?d%)7ej|N**3{%vEJmVWv6n7Sw_kgCJu|&LqjA+c$NN+ub zvnRzWWDN(gLX#b+R&SQcF~%yqvKw<8z|n7%r7aAt?PU0s)BTDM1@*^GP2SKb7L-ec ze2JpMoiP<6N0WZ622gjv_eFT0qf>+;v?=Z-s8j|30o5SxOiGK4c z$&eN?`Ysnse=e0EUsrL^uNg?!w3P^R+uiksSrbw8iElF#pl-g5l8hj0$hjHWsJFJx zZ17g87OOThXtHSXFk~J8d??)_fs$E;Va1*vR>LXXS~Te>NB(9|l>{}5?K;IcP|FRq z`DnMJxp!Rnw(ZxnG~)$2r-#C~I*8CowxTLqnW+(Pji8h>H2T!vGuaV)2BZ*Q~KtcIf0wAB*ux3}ou5GY_A|F}%bQS74Zr|DTYtpiwWQWjQ4L6bE zT-P3|OELY(nkQ!oxX!AZyzSSMF>pruh24TPV%fKQ0{W5N_>dXKQ(m#SmV2_a-&N>a zL4NGp@T7v>`8PRlX5F0`k>bX!F0a?)iAubn)TO*xm;Ta(nhrdE$Tl#j>QPt}7E+c! z&^bJ+%6Rtv0a1z!(LXe&#&BaB5-Vq=CXmfa2fe&R8d^x+$}3BYCp>fQ_Mr#)g{JWp z4eSx|nTA7og$m;aL)mK;{uK4>F!{{Ja=(B(afOai7hAP~Pr_=c)hcl;6kTOIJYqh!yJ8|>qUtgh3YlulVzrG^)ljH(+p;We3rM`c>9urV7n z7SStl)r)Q~Bi3wvDn%0jr8Qu?_U<$;DBfH!MS5D4Y2Q!lw(b+23*9;Sw!vY^L}6`R zW?m0dy;!_MV3oK{;u2M8a4$zNNeF7R`B;5i6wVR58k#6a!u3f{85n;0M+26U3~?@F zm;q74p6V#lgy$|Ws(~B3vmhaRmui8t*8dr;k`vYr)xwtNZ#qTtB;@jK4 z7nV1H5i{KN814W=pu54ih*FiDFBaW%9f%FEE?||iRFKROoznDutu|1>PV_k+s$hbG z?pmPXke_2De{JX(@JQyW5RVS4ri(m$qHl=_dA%C4sBjoPDx8HVV7jCcg?vN{w|W!d zh0?g7UsmB3C?dr6u;5+Bk{|e`M#;~(fe*_4rg*P~M^48UoU66!e~xDKFgge1vVti= zJ1`F?Tb1Wzm__+kcL2FVXL$S?@_s;cN$2%gXXiW;5>8vAf01r63}#&=3O3MSf6pSl zZS{bO_)jBkQodQ8>6WNRs%-89CX~vV3339MU}tv>xmm;TVNA~%X$=ug+hlz?2 zQsZ)hnjV8?$j`wEFA75pmIDm)U{P-d$2M;KQDv|HK@0S8bo_Jhxq$>ao`o`_?C*w( zYcAI{o$ll=m>$8Cb-+m^BJC)t7PIGRouxD8Jhvo?v&hm%rphT}uoyvU9U8`os( z1Mi!E7WfF++Q&28`hF>1FKfpgi~t(w8g-#gq7498rpOq9$EIq*h7Gb&LWFI%dN`mg ziS3g&`bxGyiFtI6+mBJ9bQlq=R%KxXOC&~r8_NWl9!zK8W*q5`ZE-bNF6C!!6cTWj z-iKVrXWTPr0}W^Q;;fsDiNh9DES=t`fZ9yS*&WHG3*mBQZb1#zRAQ_}g)eW-bRnnU z^p&bit#gbGURv7I<-#<>_V2P4@6)zaE9fC;?t;!Yk(YtxdG7q-r>W)*RQ6+n*Txd< zgyG8Q0aIK^go=x*tJ(CVuw6HdtrcVPT`E#{$MEz%)loKhenR8+3m21JnVu(+{9p1K0bomHAbL-Y~6)lKWp+ZZ4{e~42==X7Ucgi?Q?8gzXs_I!W{jIl+Mz7$LD zc5XT@+NvcStp0O;T0G9<0U=bjeO7zbc)-JPH@J%kH7?6v&%Mm;x%dMjdWSwCsIKPp zT4HQhW-f+Wt(8&thSQ8tQwKn*W62LAa+g5U zN^K$y8F_FSnspDYh%AyYujJQ!Rp1sQ-4mm~k>i9Okn_LO7QTMco*Vk2cZVdf5Dj15 zuu184Eg`9PxcVkbI27K$;cF~v52f|OSLz5t$)MI9d#WaFrof#N(pJBYc|g;@MA#!Y zo^M&^tLyd&8H^y$tt}*+OLB~&HCNv{g!jA9wBYHjbGm}`^$QUNBl0*0SAC%rQ${OO zhIYo{;Q`Q0mrEq5y7l8{@iC}O*Y0a&>u>VP7*Rh_ibX4nmx+SB3Qz`1wV1~o%y;)J zSm}>0S(zMF$=W};UGGN->_%4i49*=>XQ1LD0=rrFzFJm1lyGD#_T0#H3)3WMpJg}7 zZ$E$Jk&ydICf*i0@tdCoQCf?HwfL)>V0J4W<{e4md;TTnY>R;FO9nvWX)FX(`r~oT zA+W-te1^#-kvxz9_*PQ>-=IezlP_Tc`{si`mThOZ=ab)_S*D#mkxWEnZn2BAs~@hbndx`!QmCkU*$_Uwv3jeY zL)1kP7E&*-hWF2P)iO>qcOPM3DA(ymcLTfA2fk)K;Eg+xYYqBzU2DkiG|6|{%OX5K5RPP}f2&>&x8d)>vX^SADHzz}GF3|^!%E~hJp;fQefT`H&ldSHh*Akb~{qs5cWfO8tFuT1< z37eJnjcEQ26;c^@gZ~|0HinUtbw8V&nx-)=r^G4NGF7EtHIBsFM)HxJaoKMq)?pbf zb*%uk1+2z(QgbX{@pEHlHVtPMM-25CQVFU(9%!}%hd-ZOaTnn%vbb7lgpivN;5h@b zW5lu;H(c-i z_6S9)+`M4T>YfYhVzuP-@aHG`K!0xKbBx?pe}J?7@_xK^!2p*K9|SqPu<>K_Y9%fM zrr?`}Vw9o3hL>d9?u}O;r5`yqmmP8!0Nc!=@SB*;jW_ups~mGWD-x#tfS=y!5qDzm z@a_|D!n0eQuo?-a_|8HK1QBqse30#-`!?;~^SHF5NQ1_+csVZiQHyDO*ZGwG5J4li zTS4rBhK{=h*Ju+7bFd776e6DDMGV&|K%kNd;d%OskiHgMDtifEF3jLlZnPkr{mG^t zEmTfYg?Jyr1NampesQ6SyX4b<9P z#w0=`!&?2q^^6!ZC~#+jBhABtYgVzzJ6d)G57>QEF* zFX1uot()_5K`^YKk3E|6DS2#r%d7zqrCUBUlJir>ocd;;n>po(jtcgAT?KI=D1ACE zs2#1V6dIedtBlcvE02ZD6x&luu_|MeaFpOObgpX*z{${GPZ`R}xhn3?8m|oiPuv*L zAQxwf#qc?Z`=yifrI%~%)}+~6v3mzGympJTXw=n;?4vAZ8FwN7#Z$|V zQ6+sJg7?-EoIQ>%C+jOj;|Fawtk%KL2<~`DlyGyIA59=OJ$t^7YG&S>TY!d>NTT^- zs-*KC!v*HOY5?$Hn#uTrt@^3Cp@IP67(lib?pwK)L9_K26r0!Pe;HX@tZ-E^_LB)W zRLL1`EKghGY#=RIFR#E83n4kRMx3ik>`*ekZ8)v0=pqISd_stqn}$q=#%Yx(+^ibW zA$xqy{pZIg8#D8$8f?iSu&B>k_=DtNhkvwcy~QU|wft{KZM~!@_;5s+by4qNPziqPctPd<8JDM0|O%Uj5s z2OltQU>G<1F({w3qq~B?3n`kYRQY&wy`nK*&xz1{-PVHs2M}Y;311oZ1&>qM>OPv6 zIdGq-n!DpXci9+MXUf^7()~>yecT5-j5$HioP!^3hZUfB&IFGQO*+|$L+ssL|11jN z`2^`|`!ev*1i8oi0qpLEqdH$Hs|?M2(P`8Yps+lKFC3dg9d zXY8is!rTGv*rnRj#IP6}6$V4;Vf)EeoHVMMKtwcUS|pHa3p+Tdp`&9kWBsyc29l7sQGjcEOj zqelh3>A=DAw2mmdwqiY*TS<c~lo0*y3VWG}JDy3`4EOQIkizF-c>=ci}diYnA2AmfK;vf@=k z&X!x>5CEKu4BItk8`os5VsmMNIfrKxFfwf(NA<$H*Ibw-HIERhv)&lZ7Z+Qy1Z(#J zcjwS-9lFM}_R-BRg&8+`;QdI#{lfahfpd*;Jecq^s*>1XbF)n5FjY8f{ehyPtno5S zV~{5ErQjQ;7Nv&jcZPNkW$PA~9Ruz$R(Drxfb~g`W>22mfY$GFu}TsuB9C&b)x%-V zu>~fE1gkR%y7|64B(@PN%E+N~X8HI7crc0U5C!<+^9BI9Y(Qv@dJ4-jOLE8VlpKPG zWLb;8hzgNMGASM-1pg3TDyel8q~@^lO6jEFE;BBzfjgch=0@T{xg1uoP$j#$UD@=E-XB#7F+Kbmtue}^5nIsifivjY1$lC9R68hUmcf2BZmbI>D zIr@`|jT<)DuRj<+Hnhuft6e7-`kh;swUE_BM?MGa@oh;rLne6wX2Mu?EPNVK^t~1) zy!b;A`l>1z7aiDxUC}IA&yKs!VIi1WSG*QC?aqtKA*9Md;-@qvvUuEt4~MigAMBf` zr_->jO60c@$PE-_zOKu(I9HuixS%#V3>OZbn}+pqb&gw!SSz+lC-d^hG}-8RdtjSY zk`1pZYG)Fqlp6(zVdc+@s8@C}E|sVC?YBjcPRleUR`N5I1>vH=>28ZD+b9WbwngPJ zlG~SDhz+QXR9)YBM*y#@L2cr;QP~QoP+M)1y2^G2$+8!0Bel5=Ym#=0)8cm-0u$A+ zTJix`@>$w7;@K{1pLFy^QQ>*ck!}&IFlY1kM1tcoM5NmU~X^{|L!M#%|dZSF%*0zE76lr|iAG zs^dSj!b?krf|Dwc64Sapt?**Hs_IZsRCrjcfAzg>8r)Xa+9!@@Z_!6gGul+}4abPx z*A=X42!~R9NtsV)%;;a?Wufp=vW}77lpdcJcij^_SIjSlWZn?Haaq-Nl8Zu{%)7tlGi zWy-cQdxr4wjgk9qO;sDi3(_}wt}a*^V_Q($)4vYV!jjeNu#PW)3rkzi(U02KOCw)+ zZ@^rDWcm?@C>ne^klImT!quokLq(m_lBinrF34ZBnx>6ELZvw?D9H}}(ZnDxvN(mF zfG48AR<@_Iqb9%2PqF~Cqo-m_)u1lPp3*mQ6 z*+=)yD3V*`Ptsj?slt8TV-*M#8D&Cc5>{QclE#y2@=;M2C9Q!FretO7>Uu&*rL0^u zgZBF&Ex6*`ox{^<+nonw_h*d4?~h~p`?lpnS#QQQYX0EkwSScvOs1$j(DGJa``_U8 z?d|TXj3!fe7tG-K{DYTEdMSUGnSHP3%GgBizo@#2ufSicp7~w%KtMtEFrMf3cFx)C zLs1H(<|5O8@kBdYUSt|m2o?kDtyRbed9zWYgXNarG}L3NI*EXM=WC5dRhvmXqBCU; zI$Qc@ygs)*k(Qj#=?g@SO{%MYghk(7)fZF%{-$ppT~)HAz`u4LLk@;(1`1v8J)(@A<9xTLKi~wC z@*r+%p?vMvroD#n8Lvk=iVG)R87GIF0v@;(2~sj8##K($+WEFW19x{FM% z;o1SqjPjSW{@^L0*45>_z#du{v0C+|T1=T4xDE{2wy}#_8#%vBj|Tkhq;%~IBrs@Q zk+T!__r~G&jMkPgqecSKg5Xa_`8F2b@7x;#vQFf3`a5Q>ibbdy}#GIFnz4L1y z#HrIDm(JZ^F5h%p3SU-~w={F@ADlWmus{hX&>>OL*l`)n|q= zJvP4_0GiLdP5zfneOHY*D}=kkvlsDYM9!jepR&qs?LH0pizuKF2(?zM8sNG@mq@{5 z`Dw`6BM8QZApwRBN!<<#FVjhv#eBWDFtvxne?UAg*&| z`R3NJ0I3$t|39lsp|xJu6m{Qg_s*U^lQdR?=x(^_PgY_!Zwv{#u6WVc3?UJ^SnhOr zXhn*1M9dwX6TDz6ylG;abY=KE#c8p{cpD!5dH)r1U!CKA4CF!axXCN7cm>HP(3)$U zYk6!{3`dt~sg9qP?=J+LT{T(ZO!BlX9ta*8_O*t+b#@4D5iW+JxDdJJNh|+tAx}1kG)ne2GqUQYx?gpfm?ZVMw|b3*2VKb zl$*xj8y>Sj_gQd%_(QQ$>1Lz4#7flt<|NBbe~MQNvX$qS)&e?_r;Dn;ho2zYS& zHKX`vtwB>>oks?UKhOcyHYO&|}j7bm3 z7ou%hPaFk4iotrIM7UfP*ht2{0jrdEUP_)drjmw300irpg?}z=L#Ef zWxfPhL>gxgY9{X3KSxd`-k$TIU5ZMV7W#m8_ncaMt?(ySljn?Y(as^uJZDq{xl&)L z1DVK%vRY(W3TGVjzg%prwz7{iC6u9dBZ@Ao^j6tYt-^3kc_uKzVpWhePV=wb; zw#7D@u_!T~Q<~F2Y1MrzN5`@`bcwN2o3CT`u7mVJp*9m)j>J+mQK>)C!4WIjfZ=C3 z;O*@k2jJY3Ssb}|e3vz5HetrQCD_OHQ8|&FDd$mUL*M{ciK40m2AI(JyjCX4!dvz6 ziMzKDpN<1R`dCrvB6V@uxnk39w0M|2@vv+Ue=M=Vvt_aM3sGQU0z440miV>tzw`-J zu6|BI$qk=R$i2I8?U|O{x_$GwGf^9lp*W;OdmvHGE!KbN7`0MF@srI(2_%XghhZzA zTS2GHs|TYW(l;#_Wzf`km|z;(3G)xx%BcMtzM)9!AvCV0@H@V5J=byCX~ZSAnL~&F z^3(!lg$$QFGOUTElPGz2A3lv0jFu@76$*${9R@&%!6h6%YymWfjG?&O^oDf~O`n<8Sb zZzbh&*Oe?D3a(4tRG_GnxYlO}H-U2p&g~Ea*PC1+ME6rumV`r%ddQ>g-L&O~BDdHc zH^IcIi&ER%*LGu#ldCMub!BovBO*WINXT`SfAx%OBiw7vlfte*fUXU#$tSLkk%U4h zjjEgZnc+Fb!;*abzqx&?uWakdjQBSX(L?0b^;D}55BXSHo&pm};gpwx7OLdy7BOX! z^sP|&Xe7hl#ac*UiYPUk3fu-vk-4Af{t}wJqX%%jlvYxc1-WyK;*qkj7NO5^a3ws8mK3JzK2u$8 zy^y-8ocQ_L1BDesPyeaqzdqIJFtmr2LWBywni<8AL+Xk6`T69i00>7{mtQUEu0bbH zNP{HsmN^>*yarBwGTF`BpE<0^^Y#pke6du)DpCet-|7>wlk9_OT!#_%ue%c^^S+h? z0mPu0fY53OTzx=dLo5oy?JX?#>?qt=8#Zt|`-XWH+(iKsDUST8(>GJ=*1IIf#4azG zz!`Gkf-@->c2oBEP>Sc+^T)AN6RIDWoG>WMD1 zO$v~<3B^p{`yY;D6B>EQLhGx^H^>A_n?!<6?yvk`!MrdSOa}nUkxsEC36T{xJ`eul z*Fd?Y4`*8X`*GJRw`-Bz;YGQ?c7X)yTqu)W{yJ^ps&jg`Cfqucz%iPdEC&7i95EK*w9NEcXBudc8 zY0LRY^X7nx5A0Swv?taveet@?wxwx}!;xvxOB= znQ+No{1p+3wYIw zB1zYYt9Rk1U+_kIsm)Fh_zBaPI<{_^GgezXes*K6FaySnBM+yz`-^i$^^Mg3VI|UU zpJgrNAe2G{8hccC$X1@0nTS>EWdFn4O@e>e`jKS=ylQI<$>tnwjodO6GE6~|@q(Jz zIuS)^WG|cff`lU!OdyC2x@JH3t?3`Rvtb6c1p3k}|7QH($>?%n+4AXoGH>+g?=!Bx z3>mY(zvr6yF>iFQ`iN~sD4!~KQkk+?uQD0N(t6%M$y>`_$AzYqZ}x!*pa#P2%myE^ z{(-DJ^fIJorD3V6Eficqej{7naX-~LiUC>UAQ9@{KRw`Qaj}dxVFsvz&|TWB&otOi zwMoyo@?$pQ2s0i$_moklL=XR!`}s?*o!v_xNE9E44Ql#c)L(D`CBSxp!h-#c6_SR| z`xH#lz+4vCT~k>T!&MC^qAw}VBJFtf!(jAz9Ny;woovHSSW+iPvhBsw?~;m;jlcbY z^MHz0u4iswRF+4d|GTi*KYrSv$$JV^4e0KjS`7FnT4~Bm&YE_K1`a8YK+!(i&WfoW zAA%5sHFk>``v!TErYR1!z{l$v;osnw31Ord#54)<>{{FUmh!$~iP{u1L^!ERDnjDcL589Yj zS(IVQxV1+I@t@~L-G1|2;pOIh?8NNv1Q&MelK`TIb<#(bSTlB2;|XaL15_({3MIMq znt<32L2Lrc!S$-`COGI0}LMmsyINPY#xEMp9hAB)2U}qqb{2hNu9V+irO}Ib! z?6*G?&3db9_-S?o-=TMdo|ZztU($?;$oRPFskOSy?RLUATD0#U9vR%~;d!a0<_I73 z4;?QU)qVx8QeYt2=8Q7*)U_eYq3{M|0iAmq$+!Zuvm58z6NOc=fupTYIHM<@yq8$W zf=L#A(Cicec`94$``7udetwzK7zKPu$gcOxbbc?(l!$`&(!U>LTt-G=GU}yNlRF3a z7|fLg++WZa$CIUjoQ?xgY2ntED;}mY9*0L**Vo_wy#3d--}(hzm?}U+|GA`t6y$~z ztt9&`dY7RqZ-dSMv?o*{`}P&1c6u}zX0?z`Hf`qqK%Kdrp_pCFo*e}#;P zDS_RjA{TQ;Az8_+8KVrv!yC9t$Vg{xZbNA!#nWWzsL{8Xvir>}Fc%mdfy>nakoN8( zqVD5|25Z-eA%H$z+utKecJI(sxeTQgZKy`o(Z%ifT1f?GcU)uzO1lo7@lz-~KeI9T zCXTI}&UEUS3amyomr>6tLu+V08)8(Am5iRfCGt@C6Va-KaPol|$O{<7fRoDG>I2Ej zQ2OvXDglDPFBD0hMHUUKOpp$KuK0@git@+uB+mFQVnmtOzcOZ1y^X8*5_U3UyLWJU z=MVTVC{!cMcPt{gZCj_~xz0Bk8!`Hu?DhN=kNr*vOLL)uqz&jBj;?;#ul>&pAg9u0 zUG>Je&*ck(u~U9!3}R30g~W4ju58{Iwuv%qyd*|@jU7q2R?0N`qC?!SNBP^dMkohh-|>qGopK63M>Dx$S}XDVhoH;#ES0diNdMZXkkrVHj00*1AxK^U3~H%b0~ z;28ho&!=d#+D3~U0I9DMB1?vhvfLLBt-ogWvN-1aq1t0BAI^b5HS+s^hSvC=wD11_ z@?oh|Zr0Zij~+YqKF0cCnr@0`O7euE$Ie52h8Eqs?*OYwIF(}>|0ey8voZH3Gc|m)G z%Amu(8T-4w2UD|QVl7X4ox74a_6bBZ&<1yCgxD*80&7QlPVIPr`p2J3h1fdfV{)Y3 zu{ekiw@l6bFNCAnGH;djJ_X6MBQ73b5V(r}ABsmaU&K0AXRprOlodZP!#n^WH z-W&Hpo)=6S_Fn%Iu}mv=4A+b_x3R$TU&WhWrJYqnGqSneqi{7hT(U|O4$RpA1c#D? zt;9tJ?rFr1VL`j4`c__o%dbMhLyGB&(d4 zr4<@_yi}M7hQ?zvr$T*}4l`!FF=3~E!YrAJ!pj%gj*&9X zPrH#$mJjZJ^}_atFO9slC{0wXGG=VBfs<7}MZ?yWF5Pe3pE9$AwP3GgQ;mAI`$4ZR6p0 zjJl@8gSJ)8TMmdLR+c-Lz__-#WKNhk%;V(lJ;Qs#U)*K~#k~9=$3k+S{#(-tMyA6X z=K_YRS@J1cn0Y7ifqrQ}fsVj_+_}-KpSv_0oejC-=e8pVGSq74$E1F$0JAb5N9wYB zd;&4m>YAr9={-1b_V4wz|74Qtb7!ouX#q8!MsJ39^n$e!!4TyVXg$oZIOm{>Q9tr2 zCllS71aFoX&C(r2i;-?buFmjpUL0g>5)y|8WUGO7&!4*A31iErZ^shCnF4|@zNm6J zmrguc>T<6baf=y5Z&$3X6E5Fdj0F^0mcE<*GeH~lU<#u9tMcTr_&Do@=Di#=RvM%kPcUM~p)X0-7fO}r3 zC?%nL>EPyv_U)K3i)YcMFESxl5*CC9=0OpU#d-|RZf%jz`zJ#jMC@BoR`Nl#0ogJOBA>{T6Zy*KEYxY^egvI=1f0QNs(+Tj(2JI_D!3C+6?1AHV)&_P&FxnQFDD z1$1=ux4_zACKU)6+z|_oh==;9YF>zEMduRBYkvNEz^Lfmi92v#O;^8iO4LXOk+VH% zy|LReAKhG{1HxRky86s*bk?0|FZ(bD)2pG1B#mD2p5+gmd zd0>BeMW|QKfH&Z0oBb_~mWApm5T&yJu%;HQ3aqv_*8@A5FOvp`W`B=%(R^$aJtFu{ z9Js9rJnc{1o@w24fH6+5)LIWv3oWk(9n?&F^8?+9w0blTQEg?_JT^HX6rA{z1Hz*A zQ(~g_kVsuDjbS1B;`K9{q6CKT(NN|T&0;2AO=$-$6^@KpQ7Qerlp5~~Fd+rNNkv+X zo#@Y;D!02;*b63f9$6Qii1M%lL%basz`Dfi=fBltcgJY!?Xf_uaOp84^viS5_R7+W z`t0O8j(;0>w6aQ?If0SLD)+C|g!b{2wl$C{bbC1%e@*HU4^^;N%RV6p6~sh+DHzFw z1;+U@?m97ZHwc@}s3b~VQvB6v1bz9$O|S1L>_Zbzg!bzl1H+fXc6#dDTm}WO+UnoZ z5u27UDbKiBGTPm(l&`Kqpp!=c{!?So$hhfo$kpk${9&)y5KUw_+3=04DH^P9nBSR& zuQfy`RxqUf&$xGuhGx3p^IUt=z?H)gMjv<71kO!PVChRz?G!aC$4wvfO^&{7piBxK zRnCo%>tM6Z#$!SClH#E%n3AR>tc~j{*74i~h!Y-z^bzb4kxID}67>dd(|G zjqYomHvT%<;uzm5d-=i86cy(6@r2iI=3RM6O`#tv=}}rXx#QhzAo#VpRfRy%dJ|qV9sMsXIRrm*R&4+W ztEStGa(@}{{~|B-`j)@(!BAV)A0$jTA%;9#S?fo2m^J*-VrA6Hd$fD#e8;q1)LOM| zZr-qm5+kFZX{+#%XAXZwonrMZlHxwiygapE7YDRs0k01+j_i<(cIt- z(8CE4`!2M&ZP0o~xR8EQw>{l(I`L#hx7WYbI;uy9LhfPO53Y<+L5t9%Ng+W99e*+J2&p zdU-di%kKlM)2na60nOSX(fT$D%xG3m{F#FrN4{%z|AagZdi85>_W);q(g*dRQ|uYd zymfiW$R&7)r6?BXqo1W~Q0pGXIzosOY6U1puGxBV4Z)8}Cm;7((P#r=FwtOZ9Do)jLE;uC&Z%8^Oy1EbUDTa83~!OK#_? zPtPR&C|J8BMn+RJb4-r6 z)VBGIAUw&-#m5sM_vP^MbJ zeFPsqy%ssZaOn*DVYjJPYT5}7b)|J>4^J;%l1lyTf%uN#?o<2pGCYpW(=V-%*4_tG zCzj?6njXUaT4WtnIH3^hFvU5=g6;ZOq(4}XZAN~(cVd%5bE=W6NHn>;LnJ{T>M-<1 zBe)UCK8^yvq3E{laX&;?EIqI~s;uEib^l|6sKU1>u~iTo!ANNI%+2wa{3B zFCt~$r*FS~(~c@w%BKhm2)ond9bu3iq2F&cq}dTxt)Gr#r^@wSrq$cZ8Ir)(ryG9> zFbV&zrs};b)7#h)-@+Tf6Ur#V^Z1Ft=1^*8iZ?BE;f4kuCxTyCiVfq#P8XSAn$nJu z>NCd1P2@S!m-BE)@qQdbd4l;St(Y;$B#KX4tT9s{Y1bA<$Q1 z^d*n032$g`8?*TaaQ>0TAHPyoKAqq_U$+oKJCz)FKBE9ztVa4XsTZ z1D5ExWciRVC^H1jq!2zQHu>5sSEWt$e-vk@weI?>=_e(iW#|}ZrpWh0mib}wTd_W5 z%T(@lD#4bb052zAF1%ftv589bV&2YPx@552g`SIW$ht^YcWvF(a&Ur8Pci2?zwF#{7AA!ajoy$@ zp5oq;w*MbKjHrYR6bC;|9i*&AV7;Tt`?)puXdWszI_L|1`lF@|j`yM@0iuMCG^a!t z$$gjajmg~7@hQX5_;n11fsoPgl74@DOoTi@PL9p@%E z1He5yxO8{E&+!;cC@&@Z)Ii|!!O>-fwF#!BQSX9vCuD@bDRjnyB{@~xthvBiCJH4k z8nA6*Yqo*4YuYthO8(xa3xVj|FL}0taUn26VVn%>&AtG291ov?FQt^Ac}>e4hq|C$ zrU%gobw?;pv@{Tam$ooQx-UU?`Rx3rc^FOiEcJmj^nv`v%lDEC0YjlQWWJ>IlRP*FLpE;7NKzQ5mSQ-?HX z_kB8*geC7W2NuG9yJJgOfho`6yTqXk+NE5{)=L#RF(rxHH~(-eVWtc{A*I9aIXMIm z^rx-nE3Vn+xQw+o`xpl%y;wuZrg)7zi1funBWUan;l z86*x@7I(EmT&v}c=fr!49e`4)*zN-sRD-?0VJi^pJyg>>Oba?4QUgrXi+SSxnllQ} zALWYiz?w9Cw~a1AMVlpxGetpMLQoXwP1o%GAxjO~iUiZD?_SFGh$TAtXJ($i0m4F6 zWbc)I9nW$qsBvk)Id&{G_!2QfX`5e=RbGF}4_F{%F!f?1l+H4BRL|SZX8SPK3D}L0 zt0^E{nHaBZ8$7~Ebn;<{{ouB5QQmkG@JS971ez++hdycCYhMsgytUqZi&L+Gk>`Se z`1r77;ZB*J4#(b#97NE9`|ES14uwJBNED<@*I|UMDfNV0ONX`!CcR&dyp?%PGf!lu z%h)m7a7*$GCz5X^6oX@l%HFmCYCEf#)9OhJs2x=K3eBd;azX_>~+ zeECa}8-8N|Lnb?#<%zJv=hL;$L{q&+F<&H}L24tt%{aq0p~h%8?32DZ3SCEzhY!K^ zr#ydU=n27+qr#tX zNLFU>aXnVcUG)q5=V&+r0|^Rck;;7Fjy=s`7*dwFLat=nk*ICxymI9(Ts-8_bs8;B z1HCg{#jhR8J%P}#=H}Z5f^AC5g5ZAS&PX`os6Mj-Dnfn*lzTgPY8k)+QB~rpxaO_H zP;8x?kqbElM}3a~tD4egOgEa=BX$E?LWQKv55YDYRSupH56iBrkAj&p^tAk2-y8NM zjjLq&PMYr{s`p}^og8Jx&mMJLe^0)Yi2%16;C>upngtI zZURc6xvx9jGXZ#jUO*`hDN~#8J@+wDz(cx;hx8bfn>gRd3q+1>AB^Osq}g_wr-C-Yl#3E}&E zS4Y*JDk?4SPm8e!E}4b-j*T8(thNWr=H%=ORJe;@%C5nH;8agkxS<8q zSEmPf@Z8hF0iQm_bWzFz=R#d!*IrLCMj1bofBO5gCFbei(7ysWr%r!Z57xv98PT3p z0XF?1VM|SuU}*rgufAfXZ~c}Fi3IL1teL^|+?Hw~eEP#BGBF3eVGVvRHa=5|Koy3F zT(d*e*ganXi?@2?{dGzeJnE&~g{Er2pxA&RWbkw+qw_J}psMN z`PjMDvP=7@F&(u6(mL7liwSe_Z_7&0Q1SU-`kz~5s2zX**@D2XcHGp-e_6^ftyhe^ zWcz}pC3{M z3~i3y)Z(_SeE}v9zb=L7&fumr{c>+8!#g}VuxoyfSV|0VFBnxWu)4NUr@ge-bmBqS zI!?jWQ1YwBtHTX0rDy-{@yj!;n!=FV#MxL5ILTOjM`)vzo^T;TZA%y zKmFRyt35Ys9eT_JT;CkzXE#WwG5leC4z|s5$JZ_+b5l%8>Jclei{m-~5Qj&S5T%i&9WYtHfVYSPPxp)_8FhygldWURucxf) z;5HXN5+&E0Kr<#EVCgR6L7{1~!+)lIw$;=@68o`-Hh0-QBFT%z1+Y7f+owyJAlF%q z#At{V@$*vCDrF6i+pJ1?m(LE?8g-KCKzhe52x*XKoo%3E@#wxGweY*sE)I1=J2%ce zC^-uKA|CkCybOr_8s?I&W~~C^pI^ZrXIJb~y)FNA$VJ-$FEU+ONZw6-1~lu6-Q?zc zqiw#^jhIq|ebA|DAI1E;K%C9?2Jf#j%v}5-#D*g3*L;pj2)>;sWGr7&SmNw_{>3$B1P)fYT@=V)b`Y0#i=(tK}&*s?t*bAo&y((@P(PGu9btFyCUpNmnWHa-F*k9pRn zhFUOn(edtv8Y+F+uK5ZU~m%NSR|V#3Xnp(Aw2l zoKUW^0XYk0o~!V;=*h6{xiPFnrdm4j^Xg{UwZxPtg=;D0fI!jo7F-^2z>M4MjIV|7 zz!}TB`I&#`SL8T`{t8N>4T}DSf$YLkg#f>9TahW-363Y8MBPM~YC|q*(1lo&_6$21 zI$2zo=bJEGxlMQ$qa2q?^`yda2Z7cldv0G@zQXoued<%I;7Zb(8{*gwk70G=N+uio zb$K!tQmj>9oD0^`t~ zao2Nmv4vD_?G7?@8BOGtkvWG+H`VO7#h$ydi0iS-^p;tH&qa*KVqEWrle5%nY9K6c$-9t; zZcjKx?{B$Z3yj6zIR9Kdceq}=fTYRISU0oqJvLi6tEg2l13k|t?Z|~B32_FivtRB1%WAhK}_lPUfd9Yu3Q9Cs> z9zNDavlX?Ogr{urKgv00a#&+gZ>#x`owtzW#c&@|xrLHaE6jI#?NgHh4=dhu`mTXZ z2Rq#gNwLo7Odv!M&0VZGm7xsYtjexDkl}IJsnig45+$KtHjOiuCw@}FKBw=5tG!J0 zN8SFU>hU|K6Y56`(OaFYugx7A{rgU>Xq#%ZMvbUI5GWbPc0+zmV16b4k`%zCIpnTP z{iLH83I|x2TigQNR2;;a(dm$8cU-)+hJB&IGaK#4@P0Zd&i%a`%jM8sN{b7j$!!#D z^1u@%L5*J{EM_bj=iAB|j9vNPquKYF;HU|VO=ap5jD+za0AU7E(F_3Dc~6PlTx(8L zFL{iey|sWN`*A@3dHoskF6z>{>=Nu68gIb1gGjJdYnRr^&CyIl^P|T!57KpwfuM2? zH#HiH+VS3NAdIu3Z11BKY?w-K*Qc1VW~-`pUOqY>2y+zo?i5tX);#^H2%E0Le=G+T zwc3sBlGJ1acGE%gw%r%yqYV+3VtR3jni|e<;+}!xIjxC}7dzC7c;FOV!qfW=P2Uai zOlT%t8p%(nYM4?tmfCE{im&F%872H?$%AJMZv%#Ej`0BHT!A#5cH@q^GAX`mCDHU6 zW5WDS4derFm3F5EDz&(-#mFNcn6trAb1iG3}nQVmHRraOr?dV*^hO-{xQ)-M6+I^LM>UwigWdv*!*^fU^tdQyx z^;!Pfqiu6=Y`Gq*UvKufpWx1iwUIKoL|0#xF!x|-FS5O>m(iReHnJ zf!YQ>?p*D045RV4A zM{Kv|A7j#n2LR51^!OGNSiSdlC&Jad>OLJXDa>==@v9x`T1w!`<}tTKV9+bnxsF7k zBPF!_*aIedPQGFbEG@H6UY<}L-E?clF!UDign5}XUA~goqxyv$QbfgNa|iL!mPAve zf{Oi`1I~(952K?V=-PFJ$_>w}wkgoENzhfXbMlOw{vp)J5w|Jj3JA zY|4i`qANEYErJ8^X%;gXvq_UDGEWD7VSSdDYD1q)Vvxa|&&(^ z^P+$Mqgz#ujbFoy2W~ywF1XnnSCx1DKi3Zhlw94kFo*mJsB5dYdpoZya$JS?>HqRV zMj7ksakMQCQty8^2SViEJhor>liJQ3Fv^EgS~wuqU?Y8;>>~>ZDX!ZF)M_)(!K=f5 ztWXSCaGF{A7&DGsl(;65@N1n%gg%FNMc z=^U7Py869RUawO;yiGr3#F-EVS+T}>EpU-Bp$is7AwXU3$@f)EK_8AOS?GEcL-JDF z1wzBDI<|)2ar)bt!}x)cOP*B}Z>+c+omx@-HhNvnfeQ*J=tqq>F9s@!a+w2mSY<|! zP=k_o7dkQ34lEfXPS@eXgKd~w=5hy7Op~{T{B?d{-w`g3!m#thp$?WyP4jr7_WTNJ zQLXm7YzxaAbkEB#7*-X6&3wY+$V+wiq*Lwo`f_5gWc%C1XmZWb%sYqu01TsC?iyWO_d$_=tM1sl;~0SK#h($;zS;eDvLdp zcWWljp#{mWlY%N5SpJKf9=Lj>eiFa}#6g$*kaM+uio2u!BH9^9_GtPDO1l zeP21Y@M@Wl((0szcgQmpNlj${#)0S?@)lON&*Zcxliv`z6M^i*TZaH~2&Jp}PR%sg zKJN)r+lv3h)OKgA>PjKXCmtLECq>%;b(s8X2m32^iF)*4hxoLXc}uPyM1886RsGvhIyH15`dq*5oY{+bOW7)^5jbY*kFP(zUi3HM`S z*Q{%z;t-@2oyfjt7~szMJ)JqQ^335Ca2>#%TeH zY4D=Xg`Ov*1!EHqHYJ!D{;Nu;f2Z7v@ueX`;bgzw`yL9fZL?7+Re2kOd z@(1#C`6q7Fn8NMnMYf`?2M~O(%Jq#XX3>y)$??#v15xq59m)Q)ZY>^elNMAdi$6~F z?zBsK&scjuIyMUnDBI`%ltOD__JsVn(?YuYPlVv49^uy`|DissBkWG|;?1L##r18! zcM{ z;P-&Htq_r+*De}?tjVVUCr}4)bobn!oqo$W4^R@00G-+6v^xN=O{yG|0*W=7xUx*3 zd_h_GQi#u0pRAQ7vfRk;qCCLz4~WE}KPeLDjvay%r@fgu8S2vwk*o0baD1yC3mBFg ze_}{Tg)$7hr~%G#?|bpQG!{NKRm>i~mG8-QTLaTYF76 zZ%)AaUnBE?553^ z8jFtX0C)?6#VPWD$6@480~4#$SfuCPogAhufh0Ayd_e`~HZ$GGODK71USp@;ISN1_ zbdX@u+CE^F7&>gaI^kSndcw$AT&}%;wbxJ<@|Jt{OIsnh0>?m+mo%a2&#pR5j90Ke zmL)rwd;#ws&US+Ay=p=)9}%vwcLAjnz&Xfo*eKW|QGZ@@o+*5}m;qL`2Ahs%w!`o{k37%-~f>2pu05dbS|au|^qx~D|mLAb#br4~20ok$-P_wD8h z-8CM9b$11VZl9O8y%eE>D_Tv|SDM`toO$#xS==@T#PoyYy~RShNtITI|3 zd^P`1^92lTfh^57(bu>*3~@)!?gOIJgGUB)-5&Jb3$KeVa_o$6V z9m>Sy934)%HA{($FSI?7Beim5P*OCcLw2joy!%|5qD+HOR7#QI@d8uE9m?9 z+3OdUm!G+$j<(LAp#jXD^Vr#=>1 zP}WvllNqyyUwAe%t8+u}+v@4KM%f8Jeyc_k&C;|1{&||}n3T9~SOHr2>dtU-)fL`= z&q);X!1;{J7ukb=L~UoEkW(RxDYzW~jKM%@=p*+fnvMxj+H}P{B#I@gyM4gRkYTWn3>GA3e?A7V%O~0V`^PSpMDvty6*8jE&Bje%|$b5mJWIR8XNoh68Vh;@c$rY|GU$y=CYS3>EsCY17%hLzni2>OYx$>{L zr8_RPqYBmapKUd~;|d{=uywQVNEFA&qo`9HvV|dOs?@!ne7Z;5Dp#ncu7I$mh+D|2 z1OGq<_>7FGNnBGZECD2EkW-n@elJ^yqpk;5pm@dn{W zb&P3)+s|3D>aw#6myxOPUXSq|b=_#sZK8I%)+jpmhhQ>PG~UugFFiI!f0X5ANISM# z6bC$H3s5zmrpb~|7IiN58(zOWm+@eg1xOa+T~k+q>`Pf}fjMkC*1a6K6+r?`<$o=K z<`-qN%C~Q;DMutcrEenu$n=6L9fFNlEZ%dJT8W>EMlE&rsApI%_kx z-lfn(r!Se8&cnq)cB-KC`)YiuK{+R0)+T3{)GM)`aOQ`3vEi{G_Vhc)Lef7{hmWh# zV{lFbTln|#eJVI)bUbe~7BteUSlm|Y<_99Ss5i==F`tF5(FRpH`3PwdEk;0<{R^}i zUx~Onufr5ZWemEkbK)xwdeD~EnV0{T&&=(K(#n$JO-`t2#_|InQha&vseV0h7O>_X zFNFwtg8Ku|yD9>4HJ4jL)xtsLDlWF#oH zUr5|<%uB)p_%cc^!-jH~09hAbeBmx=TjEKks}~bUg(%89_Cia*O|w+}xRV_g521!! z6t-Pt7<(VK)%os}<#BzdKs_fa4dRe+)5@zfv?xwgMp;8w+Kt}c{{zdiFNBEjWIs`; zCL7qNJRilC;u0bE4GAWpiYjN*15tRNTJt;QX&Us{xZ4RQrMDsYFUTHNafRYeFQbJ0 zK1L+2RwsU-WIUA`qNoyQ%M%8PU}!-X-j=RStbwZvJwXboTV3nv;W^*_V6Q-$&+dD; z;JZZ!L;}wx!++Is|9P)_#Qb?}vlaCylbo{4Vu2xjyJk8uAm*vnf+1|M*~{iIr+%UKB=r-1YsEL z)m&@FcIb!&wEUL-YL*9u(KaDYisg1ScDqHj?RP3*Ltov7v^$>Yr?dj26fluAJ*k?O zg_*!&n59$P?!AyGeUGr%N*E)}65th6x1_q95;qXWgzs~N-=hsH-ONKN)iNY!C?#v$hg-v*!SkQofwA3|od)z50JLm$0cbOm!y&NU z5C&3Wi41jPH!Flaos~(C#`%|O?-yvP8UalD#WW}XOCh6zrm$5DyZxxCbi+bs+k$0~ zE197a=iFo4L=IvqsQqR9(@){{&SFX=v{7mN3~)$A#_T3-N*!bdY)o ze29`kH4xoYuPUx_GPy;@oDQ|X)5Q5m?sYWb4z^E8 zUh24(pCdOUFZ8AdsM9s$khz|^8)LQdh0je0^HEMAjxy`&%XG4 zZNvEGm-XNCN9=jd0kOLBmPN=kFXfdVr{~07is#&0^0r2>Q0$SvtilxIhZ>eV$0tX` zK9yhVj>DKVDgaFTlq=bKb7Y}$%0XM<=3i~*Kj6k?v>C7(Nqd0lB$wj}j!u{IBQ&=AVfc~3S;W<_<^>zSt|H&=Lb0v8|vgAkI5M1gI zl-Fl>-9Fhg?xh*Sk)#Rygw++y_jl22=b&m4!{@38To>R=Em|s2k2S7_4=4i+TQ|pn z-{BrU+gk4zIoL>$_~8iE2*1`7U|b^b<6I0n*}n3d;9V`hAkGmI$*W3oJGv3&!tRa* zmXIJDTx_w)ZYO~il2_(C*V$EcY`>2%v&KfC!s2R|7KNSvv^@dGX}>a<+AnpvmL(-K z`+gV2DZh3MO^`Dxu(#JMQY{tRu2Luw@cU_;SDfZd_a3ShbQ>&w7Gjc)qaP>TIx!@D zci)9Q5ul<_+fSCWDSHI#5&-fYsDcSl{<%U>qyb>0O%1yZzYEAUx<{kswH@%r%OL(H zObC@DI2YCSWq0j>WGXRCGa43NOC*<)+U(#cJjo_mHajiHV`MR4%R^V9ZdbC1#PUnh0k3SyWCisCYuRbXM>S$07mr}HX5zGm_}f5cbk?I_t2A9H_z%L zfe5{t`ZiRWroB5$&Ja6jpm6hhxM0)$I5lP-O_K5wgxgQmXIGpa1oOxPAA&PLpo=)JXPQ(* zw8DJ0pOKjE85#NLfm_L_Wz6y$!%+U3!-`-vBc5M%^;q$zrR6mdUvTFqXKJhW1cn{C zLkCuwoZnbwTuYdHhn zwW0GHaq@8Odi=UI;$E!`cb-$#Yy@vV&z4Q#tq^5eA%=EkE*WIa%|qRIx&~9T1dmrA zTFC@QfWska7PHGfsOivNQHKeuJ^q68tO^L^ualF?NLq0#=6^5ef91fcB3bzhcVzFILy6)h_yK& z-s4T9)^*?HE5FCvFMfR+9GSy-5IJh7QgpI|@g(WKTX*09 z>>e-tVfBMJvwDLuw?p3nlmC-cQe~u!lM4;{2evm(C>ulmo(-I zxK&OdkOQ{vB3$Yxy&bzj^N--z6%$)g0D~+YeO|@(43O4`87WEd%$MhG>zs}Q!D47C zIOsj_nP1NC?2SDM!kO~$v)C8nW=G`7=ebw+UBqJ^#+eOQUFmWn4(m*4dTidLg_p8(cpoj%yG|5qH8Tuw) zGWE;{q#bRATsMO7QUOYE8AEN{*_~gjwLC?Vh&GX}^Xp``j&y>9NDzP5v03^&TZj4N zUsE>u3=B5v{KX4#+)%nSr4xiQiyAv2aKY{(JdR$H$Ezn{myJI_AUsy^5?G;Pf+t(Y&mz^KQi5ip77Nz`Ac1}eMQ`XLRJd8?y zfyCZZt%+Eu&htl27JaRoprK0^81#QImD+Y`Y}KA{&4m$66en~Ju#+_1NwLkDmc6D* zGxjkY9&h5etO$pbuU2}{;ZCsCj2=_1|BQ~<>pDssG@Qg=$;GQrZ;%oqGNP1QEAf=Vr8GzW^)+sx5N{6tTQi~78X zYx4R7{FMpmtu*yNOUkhf2+pi*Ij#z{uocRV|7o^@9RPRhY8Q)U^`-ynkBujD0yZbm zAEYl^@*p0^_wZ|5zbV06DBJ;(WR`I*FL9%4sCey<^ovMMee(hdk&;g0=rM5zA6-%su3XkmKTB=ErtxL!_BOilA9n7;4W|l zofCfCg5=rP?ETO&4WkFaHv#VLr+!I1h2OQYGE-xBH}J5)N1cthwYUUv5gVT11AIN} zhQ-qZ$)G~rLGwu9fco#jvYJFo`sK2MT?L;q9z^wTE%Xd5@1L?1xHpy+8hm*M9Eh0p zxYvw@9gfBV=eY7006!8x|N+(}Wi2ZJ1#C@{UNGvEN^||7C^P@cRl86AjVa z7L-2C67(fh9s`-J4z6KdMcm6TB}zMg^-L4gMnPBDv<76P~{)yjv_2~%!GA_{H?7hp_OzxG*N>>lJ|BleR;LX|Y8>3$E zZ7zes=tG;8$eE6S7{xXB_afvkC&5pCJHP9i?-@PK>hm`!5LZ5#KwmEh!@_}iz>~Au z9YO6IO^TjRsXmPsU`Vu3}akdDiAznHE&E&*vo_kN3f@-e(ZK283U8J0-6P*6V4 z{}(k!o;TNvr@=$r^ZvnPq)dRWuU1F^3j2SE%JkpRBGJ;Drcq{Gb%Z~B_^ySG^%sjA z8p?2qqU+T1TZDDLQBzfC27S2z%yF~ zmXignV_vZ*Q$T9wnB*|v#ZJUT-qZ@aLE>wS&G-pJ81YBVeG@jmw!{HsYYFE;nFH_s zU1V{-6RC#gV=;hVuwBS*pK0-JKZ|qsoVzQP>di`cn-3z^D}j60Ldt%DlC*%cF*@ZX zcZ!+x4m$J=WSd6QO!PRc#akY^ggq4?fr~M8*khVeypHyfGAf+H_h+xN)_pX^{8uY0 z#uH$w(=4vaE)j&3xf4APYp5YJO4VRG@HT8*EAzRpJ{{>1F|5Zawm?n*n6#WAA8=~$ z!A^jg-{x{bRHgRY&FsmWUQ&#}73_#al+-}xLtUsBQ$ET0SsTSsEsfQhM>Fw11 zI|*VyyF$JBNCyenSH$dzYjFY4KP6Mf78kw{4w=`(?}}~yN03n#`DstP%TrLAQ7qZW zzRe5F@l6pCrsYf+0bkuu0X;!y^pH25IC^=WgQ05PGPn9L`&3IBNwbWZsUY~*g3Q2{ z7<$*S-LVu9k$ge=1{R?AD%rO!I!04z;1rw~y9#8uU^-^)omeO@WTC z_S+OCL?kwA&No|GUADH!s^@zDX<4;R)zxCgZTnZqr%pIYjgeYYmzY0!<_zRD8`?cV6zS`4&Ofm-0KWoU zyFsuLZRU*tnBHlje(`7K|F*w;E#t}I16Bv)EC{DpmxFmXrU83!^O#zlO&Q(k z_QlQK(d`K8(IZcsNxCp{&1q_><}2`;U*e;#Nha^0R7$ui*&2}Z{^d8Ed{cvd+K=2Q zb0mm8Lf@rHl)7@-t<~^xus6KCXLvfiL%y^v#4MV4B1+v8^qofiQDW3DO+CKcAjot0 z@Y)Fn^g-W4&_@2tllP>UscaOZR2kd-(5jSEfJ*s!W0U@zgx1<=-8nh~oPhx@Hs8#! zr*S2p_+I7bMv;4tp=BUuj%oXsBs6f-=7HI2#c)|XRtIs0E*m;%g6>W!~6tbmNTC`K03TX73}M2fhxWp zu3gl=tS~ovGxVlQGMFM0y?a1NFDz9PPqq)B(pBUhuADrj&~VR^*YSTH+WH$c^J+Y^qa>nmg0b@l(^97)cx~O;i zNry@$t{xP}T{C;A0s(fqAG=xYp#13t;u)-8_Bt-MP+xA{o1nf(Z(V)`eR&H9?GkJi zREX7b6*%`<(lQD{{!Cg1<*!h;jd~9J<^sSGs_|991Zsl(-Z39lMOB!7~d#i31E4d>NoS-)js6fivLD_H8D-NR-&GyNR$snv3 zM-Fgc>3B+Iaozr6{%1VuCe6$TZr{ev_^&Jiu6HxmzPZ|%V;cSE+(?dvG7Jr(W3|Kf`p5gmEkRO$#g%)g?c4ZbpA@LZap8*+; zqvkV76vVmQk3y-&rTv%Ujh7N$e5kPoWt&%bKsU5t`~{?clFN-5kNefY=*X$~)dr3) z8}*e0Zv&bxnc@l8K>`TcB<0PJ)g|*l&QFyi>0DKu+Zv1QsTY8QUK0f0%ebTIk17}7 zQxOHbx4Z_Cv^iA@($oPNT6nfhTkLtb4gWn_tbOmZb(%+a<&DcPs(twEmX1P3@(`)) z=+EWltc;hpy^euY40DT~kZ)7p)PR?bHpd$(6piVFrqdvIL&eGmxw5>F#b6J6= zwJ#vQ4+>Ia1_(NU=s;vD26Iecm3ehjJnZEX*z{s4@c)7-ET^@9YcK`OxtFDcenxX8 z%3yTbF5@f!51nWXSRU!ld{aZ0AIQ3GoJ`12-h_TiR9matzL-qyO6i>T<0N~TcDqx1 zjrR77bNS?mol}g~qXJ%Gq#fG+6h@lb{89g-6ImV^dh`R?-t?he4*{&%N3FLSZ+~fK zQYx(1eemXyxg0#)2WI8AarsosK6Mby-md)8(Am7)%BrmQH|D11Yey>*!(Z5&QWp49 z2CCkA(=1{oe-%XWzUf~8MB9^*G^D2Qnu; zT!4u?kG4x3QLKmLmjT}A%EtifLG>&nfov@F`9%}Xnw3wvyx4d!Fk?h#Zb9eT3oWSF zwjX|*!e!ZT-_C?hbn^Aj|Ij1kjs8db1cyuPnbLLRx3WHrjxO_mc~*z!WLX0+BIfvD z;hCcLDe_-C4at+Ibg~isaRCie9C`eaJvudc*~D?x)mT9+c>bj zm)2HaRt9L8;+r&V8>mpU{r{)EFOO^DY}dB6imgjrDm$b~Egr>UM0S!?MdDi*q>6wG z0fR&Zr3lIrlBuF1vZU0?D#Q{6WM2f?BO(E1iL3z?BA^5a5FjKW+vIzKuh!b0bKduN zmTx=xBfn&r8D=KWJab?7ecjg;R(0~AvA*c|3A||g3GM}N+k)({dT!t`_7piPvkR#3 zxt#e`CuY2vbvTFdCEBLv)(05?10$zmL@QK+IafCjpwSlW*&NmxdU|%fc;agK&CdS^Hd7; z6nS;KegIA^Ix7Qy14(-!JNNczp5@Y@ zS$~z)&IhmW-l=X))3#Ior!4SsSzv2dwHmzjqui{WqLp@U^nI)HQQv=}Rax~MSH=KN zl&V57@1a>HTkRoj+521z1ZjSO`Y8DK0{`xF3GKn7fDc_a*%(itpRUJ&BP-D(6lsLx zLin}KLX3Zv!Fz|P_Q2`gaqB+X8%X~e2R3~Q3_Cy#{^sz5l(`K5KOmZ$Tq5tP&=||b z?R4S^*Qd7=HAiF*X}z*fZN)cLyrH~Aw(uz7zu#K(U-j<<0Ak)e3i%bg%DnBL^LzLw zx`~VY$y40;>Upi?H~-`H@c*aR!(0&be@o)tOD?tjXgceX4*JD$_%UliKnW@c?cxj1 z2cO!K!hz$+-Rbmcdv4?L=Y%ult3#e77U(BlpH}2Wtss!v2=nH3hpD*c5vhMRocwF9 zdG{h0*AAz_nBNoiXOu>Uyx$a#V$0w$mMJEaRhS0Nc(NEz6#i?$bSER=_?8gozr%6z zudzJnjitMi%aPZELjgALQ(<#+6*gb0lCk)GQ8L!S^$`94rG8u>jk!rFp(#%Wy2PFc zg3XcQAC>R=c$cW+kl$agm~$c^mXq$*zb~BkaPM4Rod9YA-!lM_{C59Tpb@oxN9|-% zmAnA+ys_R<_BH51o&z(g?OyrKZ7?X22#C`2Nr)GcukH=Cz(*CKkGLjK(e(AQKd15^ za(#v3FycqWyin|qUlKV^AIg*Xybe4d)_ZFd<;O*Q@t`ScEH@Y}nmYU`U!c z1%xB}^~9h3X#R)ieYm`=jbWVoQC$eo5aNMD!Ib?@KmcHE9qd+(=VKi1CQ#1Kv_sNgxYb_S!Lj?ADlg0Yj_XMK|iw&Gv^ zk9^_>E_UFU&Pp?I>GS~(?|^{X16Wg|Nh`uJU(KMbDROvoYc(JX$T->4^)V^^GB2M_z>Hrc zf1*HDZ6^)G8_N%Of?llojH{Qh#n^dA!Y2%Yhj>-Jf?)g3O#3RkYKZt$f5$&S7$DgE z0^yD37o9&@l`4%jjs7V6vk)4ZBuy5*pq!^-aHgG{i%m3#4!VOfWj@URjrEJfWDDbT zCO#od{>x9wPv`x_p5)eEAg$>6-qFkW(443s&iHC)uxq>3e2DVfd4pl;?xK{!3yR!; zbRYLLLl}gOi87zPxviZCff5F-Mo?5oA4EI1zg_eP@8gZ~Aa!N+|U`LGK)*rV+>%v^0Av`^_%9t-XJ;U zhV?({X4#xRD_GK38&%2R4a)`6vBS8P%~!C>fAg)Lt`a0+rS_2%p>nbP3o!q9b3rxE zz}9DILrRU#cW5V-r}lU=@4w2ED$!){oqu-Or$>e;|Gwq6QFNg4e7Yjwzn+dzGz-6{ zBj`a4iR{~mQ&kQ^{<)H2FMXLl?>7%GFCT;V%RXeOkfeiNCaJ|_xM7L97`>#I^sArx z$EfE39rp0sslRC(`OAuF*gCb!7O3+4AUPmO7o0$i7nERj_!U(V0XO2y_MYPUUcie* z&6_t?OXa?z?QFGya)l1r*GZsn#G&`v{fxBh|Kf1=@}G&7-j|JxN3*7O{T|GeK(RN3 z&*048D-!M4;36*PHN`3hF)L6M(8aeH{C~vA@}Hm^{1b-U2k{4p!2g=(^`C5{T{FA~ zgA<(h`D^#?);&ET?_4r(hI_UQu`3-qk}_~4D;tdAFL= zRHjk+<~>R1zD4j_!91v?R%!R6?T4nBANy6kx2d#y+dub5uPR_A{Cof0n*T`uTr-=^ zo~9`!)T?p8W}6EMd=cVz?RAeQRdkKtUuT!hnODyLqZs+iFXsq~v4C7T69E)YwDqa= z(}6ZymoR!7kvw`;$kAeZ)j7Z44@P5dgOqo= z=LZAm^lSSl`Mnsd>2$gse2s2F!Y8AZ>2$U6?TdUdShj!G zL#W7+aqFwg%RzL-hbRM%wtwT)9! z#p3V43V>Qsd*N#kU}4qu2$g_Z5w)w{_v`E7rwK~{wc^&veNf~S5TG_)t5+Yh{+rAw z)E+>s7@MEenc-IEN|QU;yz zXq0}j_ryt{8#k?T(2Nh|QktUCnh}In6q6p?tOv|DT_|@*YnS>>z67GhY6M>+D`sFv zVHq8GYc{G3Jl&V2fDZP)?FR!sqai!pKxj3doI?Xf-6a7f6yT%T)7K+S0LB<3B9Hj5 z{Lw6r$q-S4#e`02ZZ(AoBC0<)e7BE-%F4IkVtdGgx>Ly4Y!QpV8Rs#8uG?ZS$8u%! z21sQqgz2)=BkYg(8LKk*c3XNBTF|&t&t0lNYq?xl!4nXcx4$~ zAl;+#_vRe9fRGgI+97v7s6+a$8H@qus7de7*n+bSK-cXxU}H`jc16+UsIJ$1joozf zg+m4zJ_&;--;F2L;Gg^TO~UY@{NAnzuNhRvZ}8_Hu>Uq{Ib5avHo$5hL7H~tHGRSO z`V}4wioRMJpk}Tqyt#pm$+ii^%uNAN}nbEjH{~4z$QU8pe_@)eET)=cS;>e zdOi%FF(%J>cH&({X(o)S|6IVf@!k{ z(uhIySfm7mPp6^~?N>@y(LrTr_GHt6kw9hMDn3gu({-)r8kK2t^IPcjF}R}~5IDdu z!w0UPqb7sLES=nTbNEj|ge9dowXE)L6IYzeeHdHKx#_o{WJZyDYh(Uhe!!cf7dMd^ z8@Wc&v+((Jp-J>)7-|41daz`Obx8n-9o8}pFuelb@i04KzdQ1fs1Pwn_|$GZ9T*bH z_SMI;GtMb!t$L>(Xu!O0bCa2Pm{+sCOKK*x-A zYPIPSEzP7Hv_MTjM~&%uz0^ChN*|04kV48Ef+8;Y!ny)_!D(OtYJnKFyR*}>Q*&jV zo!c#Jz5Hahw%FFHB@}e?I_xv;S`!g5Isix>nl>V?MPDaT3vl~9O!G0Ln1xh@Wpseh z#cmXb7s=yZ0t@o5plXhFDOX-yTVT`@3dCwryPT*@=iHc)c&aH`Kl+5Peh1w-a$9-u zR_3{=J@p~kBZ41|A_TYf>y9sJ>!hP%y*#Xj2zzN^1eaTC!u9HepL6x23XU4IqmW)w z8{NhMn+#lmW~jgLWaJy|glLI#JGb9iox2Tvy-xE^7LH)V{*L@L>-c7UZvEk#p8m&M zZojxetqDA;)0MfEzRGW5Z%W7#=HT=ELwB{3<4uM|uL+y=*;k7klX^Fs+@)TQiLK$5p=a$e3kQAAIA^s*?rYMr zMW0Z(-$K^PPcQL0nvlWGh9Y^F`fN?NR9VMH1=Mdz?KVOT$~A|a2+o5OubyMhkilRx zOM-QRz%PHm5zWO;jF-QUmnq1W`)$;2E^_#yH9 zF}6&*bSX(j5Q;KPpCkPB|!?Tr@Jd4~5dK{&ve6jMYR>1^DlPK|9APQbI!m~A{ z17}bt4o+4(oq*Xb)_-1s2%$bXT@;jnY3x)?`IDL(rVBI-uGgY;sQKM4ienx;6HmDS z70*~+b%0UZ1t*;K5XRlM`?CG6U-AZ^IwUlJo@o%g6zhHREK^}?e{SBWOgn?`0G%3+ z%#o00AT>y{2Oj@H=p-rnOkbkeu_VI1e0Jw~vQ^XTK~wFr>fHpTQIz7vIIT1Mp{Yj| zsibAan|@t~$e0|TJSj0oNUhRQulF~|qI$rXP+`qO$pT>?Ax z2DLnF1XtVj;Ai10%S+NRiMnVEd;C6SYnS^v@!ED`&tb?IO7-QP`kVCnIlFsOqW?ga zl+In@NY;(;XYQdGR%c>^^&L9v3wF`m*U292=;RDrMxSY|V>C%kZy@Wa9Xrrg=7m;{={?dko{Ku zsG#`hv8mf3UVW=A37c>%%L5QqQ4^mdo3Az*x?3Irp3qg5uE0}%cHLw{*;-HT4J;HM zTr3`2W`rb#51^Q#+CzA~63kKi)h|9#;Nme)`xFKa3bc%29fQ?6QXNsCu-@IFOkoGy z$F#>}RELd3u7~bmnJ39<1`cFIJ27bQ1Sf-WTCoS0Z+^?Z<2IfSvxfLfTR>xDhcfw^ zl+zB}_>gK7Mtkisgi#`dFX73}l)h>D*O zmiRn24XQJGd`wjTAZ)y$oT4loQpL6AThN&+Ml7OjQ4VJ!-4E~lbyf2S71al_m3DUq zEmqdq7NgR#`cW%SIcu=cH%qnxLl$_GM_I2l=Bn2tKl{6zmZA+3p~WU;{GTL6`ts0= zZ^XJz0B|&p1s8?z{qDP_;V)(~=rRtSauaCDSA8n3O&nXXu#q4&DiS`BK2S4Z`yirI z7zdz3*kAT3P5eh$qX7%}BgLgGlbpdk3$;quR@0DrAwfv5w(>t0eH7V3%Q4U)y0vs! z;SNX|<;38yEa~*usa`*EiO+iQ4t&xNK#blemd-OUp5FaQ1aIVTuZml`@FN-Sx}*BH zbO_y=L&v=}Vj1U^9#v~e+5OjTdEquw^_o5&!3Pc6e09T*r#{!Ly|2}EULm@k;onhM zdTqUNW6u4%zX30_;ynJDMqLwI{hhYjqFc!?P|;fPaZ!UoUxpoyTmQgBUFdWwAth?3 zY2VJR<}JuN?|xRT$TY~{`h)@2v!2H{rimUMzIm1q_=m7sVw26d(u5TLSxZk^3w_*9y`JYYl}DQfB> z!b#>U(h{P%dZJPa5F~?sZ5$1d=grXF71ZT#3Z>wI!dT_P=RKCK5;)zmKTpQx;_sfw zml^WB7CYp`2+!P|HrE02E0Eckbc-^fr8Lucz`BV2h4|{YhVqfr8`^0}+CqW?H=OyK zzZrC;&IvwSBi`D^>P7M8v{K52s(d^DTgMEV>ZA_(T}*5Nc44m1;vTpl*dmO&ZK=@O zO}uucWTf5--rUQav}iLCRZ_ZTOGLMOl;McME{<}#%Qb_r)FwCI!YqvKz#BGj-ee=r z3mD)w4Y9iTgXBoI0WdV_sGr)!-=Bl1L17JBkirS>!^5pWYU-r3QoRTVLHZ>OZJ)r^ zj=5DmL6ad1vq3<=|AXLvxXEX*6*XkMjS&d%ONH$ohDZdIOnBlMu;S#!VP!QT9@5nr z!P)ogs>D5u2d$+>j#1wq(GHGdGZ~t8DS`2rMtz}6b@$u=nbHT^?dsw3`8{ZK^>V_Xzx%oUwE)F=3iCO_&p55xrmyx~`*MbBF zC7;oNQjF^MDtm+pE%D7n)dR<}yqbG0u?S`X??)UPn@E=dmz&-MA<+!4ySM@aun!u8 z?-~fp#pO{uX~&RR(0OO&9%7SpV|02smU?wOuSksG+1ns0@O{NRnw2` zYN7>6*}bF7p7KokTJ4F*xK2S!F+2*@NJRNnwZj_KPLn#s!064rp+aJHHuVp1hd)_g z-`ql#G=7I{6hLVDGJm&}7LjoxLgp@08!YI%xlQ@t zA($k+;b-QdhE~5$2Vl^W``p#C$wA}$fE2gz=g0gCbSbM+F_Pv*K}dat21LSq5@r8!|Nzn zx+R!c##YjFBop*BXEzqgM=~dsH1mH65eNo%FZc2o)Q@;F9l4(N=8$4%871WNtq7(g znvfk6MM}kFmDw0Q7&U$xl`*h3=Y?#$G?SScgw9q=)A3HZ^4Rupz{EP|fB>UJc5Sd* zd251Uo0YKW<%(w-pUJ*Mq~2QL3@}G^>}{n{z~wC2;1khdv*`TNY>gSWzn%7w?A||r zFrMhH9Y*nnFYt4``X4> z*M{6l25p$t3{Szj=}!hL^-sl2Y|F7`L=pDJvKr3)=&Dm|p@ZMWNk(BQ=TnPq-E?!= z%($<*8kp!YZA}MEV~$i%22r8#)=-lW=P*KyCqj}M7%2$86O?>W@a^BeZHe#*W+T6j z*oG(!iZS|D$2=8I?apY~m7BA!Jr^S~z8 zj`)c}UlwtRw|Y4}0)TGc1DzBmn)20%vUpRQf?c{@x2{*rv6iuGFMP0yYUQaFAE6~4 zZ@6A!{(7!of8-B&n7>GDzE9HrDNDJ?cN68+Pw$WB_v1RPLQfwZ#G1>CWm(C)@X$+A z!HK0u&OIhh)iPRb#`mVr2YKV%I(__|NTE0`p3M`oOrxtG_Fh5DFfV9ye#JypZ@89w zo;Wt)<+5Jb3T5CuMf}=qZDYM_X6d4)+7Ml2Cuc#D32(71TJ>wU6`WPCZLp$N6I5|GN`^)xqjkV8@aO0b6HGYXQ-ao&l-00iT5dgNj0}dJL;Li>q z2qhOf@x>GCCNtVLFRXXaV8jbge8VU=%P5GC{Qk%;W#>!1%#e=M_+^fLyTt(HzlUGS>?aX#GH1BOmExZ>}qB#T3vTya3Q!A;bo3K ziW%dArSXlD~-#Qtu<8#F}ZcakoA9@IgsT zG{|UDzD^cKNx3p{nFFR>;|E6V9;IGYCY2X)ygc6~ja67o8KO$Mg@)RA9hnC2Mdh(V zxZ>4I3&;^EMvOLkD5Al$uf7i_WrSi^dl8gdGU0e^1`d^B0N(7=$}xJL(+`f~A;o5- zpSA44D3XUmtxgWrd+OtN9~Hy~tM-XuqYz>Q(-hnG=q)B2L4LORPRk02LNZxkkgc9~lqg zT2`a1X>!T#4qii;1sFnLnwh$NRpJzugJ2)1yYQWIMH;m*K)LH6#De+zUfVNc`NzGL zEh~i#u23(B<^OdiHq|6m9s_IAg$dENLL5-py&*Fn%{z>3Uop0nyUKK!ouMp}kU|AT z3LPP2mQz8xKe3ORiWX#h|^# zk9M_K*U0>S&C0VsiV!8N!{~&tYWPc1=RlRV{7?kodi)VRT?Jf^eOuG9%s@c-)Q*WB z-1X(EPG)@W)%sm`+>hDh9bb_~v=#WQX&mpOZaumx#i?Ks(Bzgp*nC5_EFQ~yt1;Jx_16EeeWcT{?J3s1l%$-a*G#{r&st0$JeFov%Pd*JKmL>?k| z^5FAdhP~l0BJ$+Dm-wgdAlF!RyGG@cAFQsVl}z-Hx=#Dqdhj9!K+$DT8zdxU)v5Y) z)YlbCYlY);*L=$okx#IQc)9bDpEB5x(&7z2MMj8GuGEY!><`F#gJg#{wth1GigLAj zx!lnh_fYyEzDP*q`2Dd#8pl={zq=aUrhwJN4 z{-qro?li4I{o>z4IxZzM500LM>=b;7M5`0BB1t!v)J%ZV*g-zlIiOMDD^wiF0a-$m zrdjKi8y_c6^0l51^6Nve18Dw9x$JoH)$d7G0oCw;$*d?}5uVg~_EF1oYZQ%n4ZA%= zyF{!&->9ZL^CKXl}zG4bi zcwzJjQya`T74dNQfj3ety}-!)dQ?N#5GiK}e_cBz+x}KP+cfj2k_ZSWYaNfKoe#!; z0p+v3vo8rB&Mc5NHrQm`Z+Y1mHhZIHlpc4aK`wAZ+I=b8m!Fp|yV1M*nCEos{7UsD zGg>4!X{&!%7&=H$V}|Q7QIe4pu-I;m&psW!s`tV@`zTso37uBZ-zbv=)`lnN9bTp;{&WZ2f?-9xxtdZ|WNm>fIfs zjU&@^svpGYql}-{>D$S&y0DowA19~h!g0=1q)t6D@yp2rCHRzRl8QR0KQo{ zL-gw}k34CV)&FF|{r#miB{8v6y|?cnK(e|jV2{H;Nz zUT4!Na)gG=V;1UfSSpJM8QyZE;!CtADuxn3sZ3Pne9n~kHBp(FzV>$pZ8CIX)ZZK= z?D|sY;$Kw3=+`5*Vx8`gQsY}RM;EnF3o?E?doaxIeap5LvE%|e!O?csn@O- zGqGLRc8vj6TlSPkiS$ynr)lQY@D(fi^~Tcy!n<0lE(W=ry-R&f^gkaN&ir{FWcOv< z8@F4_cg+Y2uAfw{DW##*O*`oERc@vUXWeu~ypOAf`UIQP4k322KDLK{T@}?6c$u1a zP7!!P_u_e!nYO^~th&yr_^@*K!J89=$h%G@A-h*)y*`t-aAgpzY5*vNH-tjIn!y}qBXf!=E`*+gSz9( z(cYkR{5xEG`)STKDNes+@tSXI%5x#t0c+Jheg}e}Ql|iPC=+t;kb8yxmQI^aS`}Nb zvqG?=rAih(IzGbU2@kvGX)KH2e24|tJnrr0aqEwC3%KlnS53t!mSR_Lm)cm}DHr*q z&sKX<`^Zm#;^S4o4Xpbi6mg-I)A?x?8no+g zFGZSX7p+H8Kb_F^gyl~|)@>#uO1hYRsa8Fus8lBu?uX{yi*KBQ0~C82XO;+$|Ck_T1fRMIZE0J)a^@ z9~7c!BMwVO^9B_4_s8tlZ1|COQYnuR*D@wk$ZDOp#>m@W`}f@-4!_4pRYGljUxh&* zsy-n^=JWbY4bdkuYb|s+&{%ZhQJ}agKqQYWl39~rL*FaLXPmxy9zlcGo&9WXP=gaT z;jzZxh-oxk(J#-2dq99U8^)^%UX;c1_n47JUgBwuQ{R{*qY_#EmnGI#%JmM4>s4GL zSg#5Bh7_+{)WRC2?d{-PgG%+Y!}6tjaCUjD^`$ATAeYy!MVIWs#WUyv>dMZ%ZW3k9 zYnG~?35cNHII$njat>6maKRJouS$K%(v3W!1j;~7|>ksa_@*J&YJdWy%-T~`5V zI%B6JjOik*5*2z1)c1`mBEmOr<aEJ<6(Htefp=b?&m(;VCu45@} zY?R7&|5614{ui41pmjZuTI^7-*|GMu{RD#NvqpeJf7M-k4T{{SjLa18Qy9wHFA9C6 z4!6+Q!7;cc1(0km2U?941zukN-~gmhh}Ve{IN_*l&l#)5W&vpd3awAjWtXs8q#WgS zFM+Ms{en>#wIv6ftf!Fb=0T1Gn=z*A+yB8$tlb#StIn@k&cMn03KW+}8PD+RJ96AG=tty>tvdFxPv_8J?dagOUAj> zGBWj8ZzzERXT}-0DULL`D5`E?5if`uE^h{!89&cD7UJ>_sN!|5xYv9?J$uTkDOz;$U`9@W+E`)W)z0UM{2uz%S=}G;<7GblOTBqV?wxDpTzO2r zB*f+YQ>Y~Sb>o}%80^5)Zumap03=`5l0x&n01;Wn;8}4kksXcrTOsmRwuZ98wi77` z8%b%RrRSee7{Dnziy=*bMF%*YS}s@v0dYuL8JdY5LbRh3u=ehaQk)zeaay?#k?MYNy`L(OmYQ0|9p*-k+v8~n7$r_NifV-HYl3q1ZjG^i!z4I z9rc$>JW}#u1F^Y)I&!3jw&s;oKI?$K#=S&Bh&jhfy*MT|{gS#EzQn1%Pw=*i;!9T1 z#x_e&d?WqzGIvos+8J98V!?mEijvOqhR3A8U$cuOU%e1xg};hLxPtgdgp_4JmTx*J zV0G08o0VvEBU|d_y#1a0Vd54ZqLFAtzrmW(>XtVlBOjpV^Kb`&&Z;O(0A_MnJySyD z5zsm>7LOsg1o(W$Xuym~f-nqBpsi1c(k0%MAq4Y9{6(QbrX=rE(R$m36B4v&#*kmu zTKpShH+>;E31~&qF)rcZs^*g$ynjjN3+kPU=e!f>zE~9pr zVxN(;_sNst*Cu*eQH=RmZ->r-G0*(C-bH;Lm4Ozqos+Ygd4o39xeos!XmKDWhZRcJ zbQoLzZc2r|82(M{Ab-T)IiAB;rMz#@#k54UJfRorB>JXf%|NQrv_r$($4qIIt#ol# zd{2Y}?Q=&Fjd}hhl*ny4bf-XjMFBAlZtLa@f70U6OfA!_omti?*UMMK*D@!&Ylrei z=~ty~rI@08nCLFtm@FAYmt|r>Y|tY`Ei8_9?MDmiQxc@R2@qY)0%wXHI=-acB%8I5 z;n4j3D%#?Sq_O8jk~RoETi}WfrLl{S(sG(x#=EaE3$;}qK8f$H%6g5#9{G|II3sM` zu&EAVwhU#=TRONUq&Wjazk!$3p3`y|&+t5?+@+KUn#KzEpX$cH-l4`R!s@T#p(4U( zDg=X_avz7yEi?jU>!B#pT0QPI(1EZ%U3*7r_o7GvHy#<~IGD1lK4`!B+D%XIF*O~J z-jS365$8I8=6>9dl*pY+=R};pY5UQ&0}W&smz@8MTkqVlABZ?-^fgrov*NX!$LU;9 zz#QC$cteP+oFjT0F65A2w=!;cQ!LusMn4j z3@*OVv7Zx>eL+>X`ktSi*A{y<<)Rkh? z&CYIMQ;GE}oHYn^08u;(Vy*^d=$N^V0OFvvkZxlW{ZNW2u|5=q(~ckz^zqi$qnK+8 zLlEu!rZlCU>!LNHG=XR3l_KcC={(6>j{E!gc}i;*a-iTqawjEQsSwjr&iygCo(sqy zzGcAib}R2pzV%AA$NiZ_`6pWK+%NKuef*YdJJ)f5Rf@3|l}3$)m9P_)1o%ULrz)eQ rKCe~N;s??F*+~8Si`P81lw&_5t8H=nMoCk368vrFKHJ>wZkPTS0oMJr literal 77942 zcmc$`30#_2zBf+VnP$4?wrNbkOv>QcuVH6Th?ZIe`M6%iHbNR&iO3?eF`l9_hW z8U-dvHEv)NYFt3565Mb}Cqd!{H42DpA^}tof<#bpc^|WMzwiCe{lE9Vem;JlhjR|+ zoab`4?`8b^>ffIF@XNoY^%YbL+5&$6H`w16Pr9qWDOfjc zZS0IM*xImet*_~L6x7`pQk7rCSuU66*Co6DOSgqVv!usg<&^fU%XR`i{o&8$?>@jY zfODZGoWcr1fdwnXd0%zmWfDs&AM4YIBZVe4pl6Q_yWfD)|z3 zgHP^DVl*#fj`9TcUQAI;CavYXa*QqVBX8#}Ctjlt_BJ%W+LW21`a z1py@@{@Ba-f)voelrm&7lcJCqbRAuCDI9)Ao%VL*_6bf1rI2O$<0~n{8eT@Mj3Gu0rg=F4X*~_ zI^&?c)>@bkVYik)J??9m0EIkK=~sy*i}6CaUspx7SZY{AVVBI%^VKNbacZ247OQ~7u}a0O zN&!tCQd3$0bIYp6uQRN4*eYh0ULD|4iQ%N27dKarUz1m;nqy0PES+6ud?Rzl{9sa^ z6aEwkVnR6YaKC?7UyyV3jN(4!EfHzc$18DFIAERi&FksM^;Nqq$fV^F{$%fH7sopj zCdiI6idpL$-GALO@7n4d!CIujId+u%I(HbiQ@#Vl*xZ~Qn4137L)(Nu=|MX&!0F}v zc{~bJp=zr0#HwZVLelt7*DhD1*eqU|i?bCl>hC%E#zj0Tp!GtLuOW)D;DvDMk)R{5 z71{DdeB4n2^y+L|>+ygV%r$23K`|y0AsJ-5?#7tXHr&V%XT{Y}JPSW$j=kp4;~AWt zWjSQWqsI*?@F(HQs(nR@*Ak826ZwMK*v^mzeI5&UOn1EV!X>KnW!zkaOHqu`xx!vi^6i}MrCi?9gSb!!1(c4OFBBXM>&4Kv zJ0moFGSHy6Ub$V7gT@RWhZbQ;LCpf{<9>cn#rB50$yH!QM8;2&8j4S#G1y}H?J`Y{ z*FwL2AwiH#E2d}A92#qVhfJ}fC4?BOamvj@N6nZOjI>$(eTwX3JP(YsW9!uv%<%4z-SfIgW1mgAO7wEfmO-QXki%Wgf`f^Dj#S7tG@1;OxeTy z6?-_eRTEV0w?=wz(#KH35XT)OOI-un$I*(3AMdoXJX* z>mJ1|xv9;v5l5X3TXiVmi>eMy&+AEg$ZnM~#5WAt-FjZ;5E8I9KdVe3Gxu2gEtL#0 z*N?TrSh3&ezQ(i0)}I%pz+jB&bUjwERCP;LQbFPj--!@ zw>YD(Yv$U-RT^3>t;GwWXi8B?9uI(DsS$=jE9&)_CcXB{Aw{X04D>EYN(cK&Gu4B! z-CM;QZ8Xh$E>ekKQu}!GWE=Arxw1s8Gzl~A1zXB#VQR%yW!R1k?kw4HtDK=N?%x>7 z;8YK{N%0-bhAI(fNuO&We9>t1ZYB~>23NE1Mdx%&mJ6VjhDdws)hxnnVzwH~U?_|q z+6opM{OdHY4=CYEscPJcYb6+NDH0k54z0K?dzS{7otiSBSZr$h3w!sk|hHtdQXRO-p%6i$*sO8J4)>v z_nMQ!ziPZ#e{T{Q^I$LfxGfSo%Gx-S=)Ua#nhYjii)M&=uPy2$lW{f@XR)rzIG3L}X;rRranAfWHjc+B$m+7~ zd-{4HU`g($~#yZWv={wo>G;%cz|JR*frBp+fqAV=o=T#Be?~|M4yw{pK9-0 zuNBjYRqYsO6ldCH-sdn!S zgVmJAg`GUCiXr17_iLY}$Cu1#BN_hp{j(%56B)V^I$n|3aW(+-)=&f*m0w;^OjiD4S7!Cl#g5Rc}lQ zO1FQ84N6|5Fnxu!%TuU>XD+VN_^$X%goHjr)6(UXT04CBB<{hAaqTN@%_2dDS&Bx{ zKim(S&>d}rsY&+4YHW0Nk?^p@uo={WyC2T-!{vur<6qX!ccf%b1vyL=O%rNLk&@%n zsf{IW=4@-fSMv@W#<(Y8l3RDRz)=rx=u*opid0OCq7Xd^qo< ziJ1T$Igs9~nUC15=*xK11eIq=?&ZO@?1wC7@a9M|UMIGbI$Qe<1GAfcEwbDSO#a4v zbB+Q6XSZV|7YlW(aSaz(Di)M3?jCOIp9Lyt_Uk%VJw)l)=PDZ(Q>|>WHX{lDUEuL}|u_lole*OU=~=Xl4=vl|FS z;?%^;AuvmcSO7NS29KE{hp6Ht*lzPR5o9nm`SEl6Aq(VmY@zng({z}dGkr)wMETFf zVP6}lUiOC#jFc5BLJ1gPwUT0jXnlY#3 z(%vg&x(B&mT^yb~T=1%vT!~b}L#}b;f3CQw7U55pM~2(^m9){4ef`eSk9W6}!>1Q~ z?S~Y)$na2t=c-=`@nD#_xvUW1B`*cP=@Osv)kV?K{ag%kuy}S)NqYs0gdDu7JAN=? zC?oS|{K0{~q^qycF+4lzWtjh|Bn~8`Bg>Zy^XFzBtsLm>Y*&`C@3ob&e#k=(9xq7F zD`6?h%h;J|U$H_-*jc>(%V7r;zE;J&xQr=E!JOA{HG{oJiHwp*$t6xT3RYFICT59s zD0?)p0N6r#LaO{pmm{l!794xzn&vWlQBbj5{`g6@lY{8VnAd2@`8`>zmHy+*M0&=X z74dzpg}R%9W_&t6fl-K6c?>L<+N+QGMiuH}?Er}Q!Jz9h6>X#i4HbF&^>06(qRA^I z)O5ZK_PCjc^JSnoqq(g9Cef>?JTmW38v2?9qbw`Lu&c&?`lFzSNy_!K$#^G(X?!pP zVLGIV0V{KgPdFBWS$RVPy&s8PGsOAV9u8CV%~e*nIW~E)vcI?SeLUxa>dCLztA#7d z)uN`uiB5Erm-sld<){D8~*+^(_^Jdynl9%Jtpuzk>H1T&((L!ZMz*QK& zONM#sG2wv7S?H;JK%hG;G_Rd_m&fTH2Z5vyRIM`ws)*bjKd=%Y(EUTO%{vtQw~N*p z%9fPZv$>|&Bl-GYCqEt$AD}Y`zSU*3!R;q4fS|9S^M2aobvKh}?|)uk?7m7&r}*nx zr9YO<0={`iv+~Ecsts|QCb27}^@E;Uz7wq%M&7oz=lT#beNw?$DLg*{RYE`cC&wa! zpk7%v(PuiF&*RQ4;73f6G*Xh}Wt0vsN2=#m4v3>O5)k2{^Ms-3?S!_L;{lqEjE}>d z$(b-8UTpd|&5EfET3jPsNhoDunik`R8ugKVZ#xt9!yBEQNnPjr6P>{j?#W{TS)-|q z3#3pm#BRc#zse5I%=_wT&>`b)inx1eLsde)YrenU&O<-=u5fY61KT!$q*m|88DqdV zP!fl!ngT@C4aJbWJ4tF-&Pu?ed$&KNN?)Eh(sS!CCo{S5@Gllf9huXJWj%u|iI$G2 z2lE-_?93N&EU?6y|rI*mpycxjM%&@{!JNLI{_fzQnHxV$@um8X30Ir{eT_f0g!-P=aE#w+dx7DzBDMtr@EhUnc(z zt$*8h?cYhPGI8wCIyP-|NvZeMx2&n>Oc4Z8aPZ|)Q$j&X31hy6V!8E$6Gvryg{|)}VD9LJE(SWzB8Qp@iW+t{-w&80%su&avpvYB*Uz^rx9v z3mLauv5-5}mEn_YR#l{COO+MaMoCrSDq?i0auNDk&edVuH%k=@{osYcgk1>&dDah`s4mk_LrbYUc-PI%bh&p+zwr<2H*}T{C7t5V%Ic{J_mFDi$JnnuHrTZF zzpsAo&fj{o-7~e&BXveQCFq>Mtlrbw2_{vpZ6zqzjXTbOg-hf&^+QTKl!% z=}C*BQfv{r1}E)@lvEumh-tw__tn=;qTf$ucv{bOSe{C3w6)vLEq4&BS5JBP7poBR zjXm_&PygL8vxgUJG#Iip{iN9yCHYX$R`~MMrzw^oG4yWs7mV{W`0D>f=J9cq5T?j{ zZzg0>Kv*{K)YhQ$3iUe%TnmngQWVL0zMGk~R>y*``+ zu!7;@W_(m%`7jA9$CvnL=9R!4q_fDu`PhI{9$-I+aS+1AP+yLZn)$bxjTBd47VojX z@7)tt)53BpBHtj4VeDP`!x&CdcrwF{jGUaHxsmhFMfCKrSIyob>CyYKXT>|;YIW;D zNH$;6Wwn|?vE(IVl{+)v%4acbw-N>#Cv~WWzZp+kNcG!1%Qc6sXck$XBUi*892g!u zARg=;J+Mti`3fA2G0OCVT%c4Lz`2}cu5{O7V;Lt7GDR3K9TpQR_CE#iumpZngVJG} z6teG<^nPUL=gU=0D@=$6W=+Kx6q~Kchku1yFn@f~7BI6mK3lQ`WYG2N;!exP@FiO%qzehiAm(F)bTw zbvevoWah3RdWud2u5c6)IijEDv9Nt(4$~gm2F)TQm-$A^3^GhwupI0X9IPzh&+BKGo7~wG=V$)HtFxWo9$%3z>)18xg8!z(PpE&vrIInl==y@O$^zS2vouHD zy19YEXQoeDIBhs`L(VvmyKbHh!BS?r{~6?J zV=u6bzqb%tq^avs#3?>3yK}lco#WFTwkF!kd;l|}Tc#W8)Y2^6vw44{1m{&`Pwo}u zxdyQUy9&O@UVQ%R-hyh0Af#Tz#R&tNV%TmM2Xn6U;juld zJCLZv_8E33tMvNrRb$webA3LX^btHGZE-lKCB0WVw>sW)VbNFEidZ6BZ1V2rF_h$m zygzO+_D;C3joKhLDB^-8RmctUV3oq1ObM9TT;DNxa+B_p-RmrdPz zYY$6k(j3+5s^Sc?+nSqv16PoQOC1?|xn#V%W3#PR7`#j0zS*Ft)Q$6-!}q(>5Xs~d zBV{9aI&SMgWot*zxqb+W43D-th|Xls7cep#v8&?;1EQ3knplc<5Cz62lW*Eqf5)ra z|7VP}XJs5>?$pOgmjox$t~Mh|{mwbx)taw1c@nF-ie`Azt(K{UL58+9e;U5))6cd0 zxnFJ2HeOS1piQ|0iQ?*yZRwQF&i>jEN=RS5PxsB8t$!qcQq~_AW)&_xoAbbFPY|vE z=S>V(v`a=mL2eF%g`w7h&oxoNOVV|*u~ty5upgKY`C>pa(u7E`KBrXHtMMppCtyLN zz->FpRkP*{tuL1|QW7;Rki6VdFljDnwvn|Y3BVEwnGr#RGZDvle_yE>ha$vfxdAcoa+$QQGqt+hsq%gZ+sP5OCcL4S=%T^zE?Y;=4HfG2W6BA=#Bsji`S0s> zr1C~g^H`v=ealY(?LaG3uFk<<@Ja0UlC{j4OT0@%JWddV-mKd<0d!+-kWHrMIalxH z`3bdF0X(ZqxJgUfQg&4?tR9rE$w+=OViFC-GYn)_nJ#Y3{yuI`f3<#meaC&Ak?j={ ze?-(-rZSBo`2J9sq{t)#CsYN#qTp+AYXU3WuDY=%DRI!0qh~7}n66sU%7Rn5tV-Fo zZG|eDG`Lmh)_kbRen3WIwU=}%yRb2^dj-eu+*$A z4^6D_onNY^LKHk$aX6U)4|kGg45m6q0mvwGv^>M32H?>yT?R{_9|p<8umGJ25V8JUxn=WwwDa$lhOAP*cVAwIhP zZTH|WEDZ6rdbed-p7+*?Py!mYH;>2FCb!V~X7SFr>ibSs@`GwOzRkKll8KGUgGFE$ zdcJ@B4YR6qpKIe?l|+o$s%xwE4{o(*^oyFXM*xxm*$7vgF*C;#l1;;;1Sf@)jxCGZPEY_ld%EZ7 z5*E0G4QiZ`ybD_0iK6DRxS>E4xUNu;2MZ|=ynHy#)Z&byAAtC*~0IAeo zQstJSb3#&DXz>pXkgB2Krm@pbM7<0p-Yz^$3^k`<45z^Gj+mB|0E2f0wOuxrHc4W( z45m&-XMB5=_zhD_RNz~t&*x_{hC7vImFl_JwA6sBNz-20$%2?4zLP-W23(?&qWDea z!&So$th}#B=12j#(?d(yD!jg)Hc8jy78H{v;j{RofO((x6(&>_HtTCArULf3XC@W+ z(#mjKUs9;{rk$uMk|HAT*ZUz5N(rogI46KR8(ZqA=vy04Nt&*MsYv4qc79>1aB*;$ z%|p{3S&(w=?0j4p+}yjZ`b>e13S&I|qu~d-u3BIDY|ATu!q~I5S&gP_Eb~08-uAYAJPUjA_l=%2wh5t4tmSr0hb{S zE*{u-qph6cY!2aOT4O)S100swh(Ol5g^2-!7J#*|D;-+kl|M|YFQ&_Z33Np{M%y;> zpuV;73T6p0u>@fJsys6nkq^HV2EKZMaC48YaWKO}``FHHVd1*A7hh+E-B53Ia%XdR zEjQRx*ZHOxp5Z~$gQFfb*Pb>waVt&#Wq)GWwuv0w+eM8ARv9PIX^HzLv$0#8nWIiz zISS1-%X5QskM4IPr=Av(DV#s8zBa`8ly4A~){ z|0G!I@yM#pRPP%yX=WYdfchk#dD%LJ5;lkXyBD#6iF#@X1GqLkH1uty6fgVmu3Vgmr9++SSTz>Sg#5|bGVi504k;8ur5qlYeYdo!1K5;}<6 z!|5qLP75rdimFb@_&w(qdm!(s2W3@W-LvM*Q+7`$zD|BA#g}tpDv8|MZ*RzmA=L6S zo5^uq;`fAtF(s~7iqB#QA%yg7_7O2MF+U=^VP^Mwl20njCnNn;^FDU@Rxvd$q%!#I zlpgF$rsb@UB;X?eP?xrqy{2!iISKL5Ge|7FrIVBJG^1|npJAd1@!wC3UuV~ zAa7bBNqntR)x=mn^Agm@c6=%+$ZRt?lBPmFTn>J%<6Y-<{d95$pBK&1*pGA^{&749jAbSwZ)l>vC zAIo@y$7PX{y@dev@mjn#+(TQbj$VlFBYnXoOf7F3G=<9{0ICwQbeq7htd73v-%OlJ z(=$A@5VA!$9dt&T(nB8HU9dG~zZ$%1-)w~nEm{>48nH=Lyp-s@F?-~g&x-zio{7V2 zI$p|ZilmiO>mwifFr@k?`d9pRhT%8u;^wn7-clG)c@fwdAPV-@q0k+} z=HA}HFSIpISP}tTDZr&T&ybUg7@lN(l7%k*fY1qc*VkP=WSf*_L z76cGJ&UvD1V^(-74I%f821fiBvXBTY~if6k@Cbx2-BI2d?N7hCkx zyfEo2aJjh((X`Rol{CZ6&vQ=Ca_(ASRxN55_t?7EHVuQ-PxIHd6@J)a7aK+2pgcM?5q~5|F|z34sf|BN;6j*-4c%VK53{6E@Wi4PUlL6 zxP2)1mWZ#d7iE2_=?)|Ln(=V*v|(J>87gmvkR~TQr4|piLxTEf@Um~=;)nXqApbFj z^~5>}chWK@T_6&@d3nV`i9wgc*OhR)=;L6|(*mXWHus~!LQ+bL#Qj^F$xgYEa?r35 z>yE<@!D-@aMn&qY2t2Qij$~l{ryHBmgTr`X0GkL+e?YjYfb-3r71eLs3K~bVNQl^UGD1Y2;Gb|d}*G>7hGEGf((?CDa{M26nwfdH=+98 zp!szP`S1{*z?I}*4NtxHXo$N$TbzhAy6ILnQV6u~iAlEK<-(AR*?ihk)))l8QQI)u zXH>)^oOuBqmu@XZS0PZ^1MzN#{LhD+1=-)`)8+GNFLt9iD9gUii6|EpOVGL0okGUi zxRU2G=W-HztL7Y;Gg+`{N$ij`l~lqNZ^5522}%0wQQ zki4jeB44c5L>YBchbgjQ&uHePF(u+qdUmKcA&5wN$9v|laZIdqBU^sc(!I27?YFT? zrFV!+ErbYVCtJ7mo^Y8em_*gi_6vgG(s_Y7pwj#mx#xMFcgD_ag6)}4EqOScm z9C+wM)t|K^bfrR|MM#VjW|(cWsvZ4La}chg*JI z$-2gKahqwarPpfRqRB_wQ&iB+`m4>KUC zpW<>xbm_w+USbeXu`q4Z(?xYS+&$i=&)>1nY^9Hd4A)L8>^Flf7hSFP(-=)>2+O(( z*tsB0z?S$@km}M&*VJd%KZKoG!M$T+gt~qn@^|^Lu683lqP}K|4An`DGyM=Dnb=4W z2${HE$<>cAZ%jvpD(<(B&zzTWGfJ~fHknPnd223&KlW0*yJSd?uHUwCUsmM&oy$ka z=f?*CHFBIMCsKsEV_;Sp+aQS3IyYd^N|gJ6y(ne23ck4~3-@nuV2>(P@m`Vsg~kND zY#z}7GqDtS?O{n!CIgqv;5z8J7l(?h!zWt9hYb*<C z72^k6JOM|=>OHW}nD5&{#lhT-Q_fa+vMoUn@NLl(tDl!k?|WkNgAAj4(OCua$9kfV zCO5KdsO9JNb6}4w`MNxAp^8gj)Ymkxvj|yK!^R(mYD-}=HZC`Ayvk2oR;X>y;Q7_5 ziHZ3VT9=3P3$Au>+p^~m$gl`rU6gnYMQ_?R(Bm5Ugl;~>X!I!Q=wxLLPo*m1yW`!a z6|!XmN1e*xlIuBUT3pPS{cL+yqr86FC?PfA(0!6`2^PdT;zB`Uj3zF8(p)&(m8EFD z!Y>Wd9kVL%%(SVXZwbUn!1N<19HKMj^B&oC>FO7<*)@znru%f5x;yc5M$8>q2lJ8= z@>)^;MTQL)HNdhFgExnV35P0ME8^=Ttof_C%Ju#Zmid5?*5r)R6-zoh6t@&6k_o~m z7og*l6+)fymZf@1SkH_wbI^>yD3rTqYiL ztD6!Sfq))aIYmw>fjL@Z_1Ia6JkPsY3!6F+f@z1O1EO!B(CRE(&VyQlMEuwwGKKe^ zw>4VV^2uXRtOcxLfE!IFug2=b_?-)hyBWcJ+OrF=aE{|%=umBtv zpa@y*;BsFG+7{G6<^pK#)^@EgcZceY_gBZW=GTYg*j2JNq--h z%-8QK%x}hvkD1zNf`}ZNEqG0OUb$i0CSQ2+H-9$DboqU}R@Fp_o|INFCyFJ<^l_Aq z$UM;!rYKa~mw`>O!qo%}tYWdoe60s_jn11ljY&E3hNaaefcbA;nS;JT!_k&ysg2K7 zJKDnF(%X1V`cl!JTNCGg(=fh@c|$XJoU0X=iBbDq#G_te;!tqP!Gb@zU?2gnXy0Su zsaVzUfxx2jebS6SGW-T+`q;5*dapl|8pfIgU=MVlFjL}1ap%1=llErG@K13% zg-Za0%iwL{vP%YLi8^&;kVx*POpF~ zxMB@loD_se^^d&c;D|wX<)$?v@LmIO!24=63>O^jD%Q%5`V{GEMInF9z&< zua-K9UHm~uVSk#`4GWMH06DV#*$<#7+W+}D4Uz!}m>3W6o{sReHxi|D>%*Naup3}4 zx~j_nH?qgIH9jl_kQ+>OvTB+eFrm9;pQ39+(r6;UsI1a(GCUuX9lBe?fc9EWZEuT7 z4u36PTv?>>gHR5pm+&33`FQ7|j~aW!LRVR<|HCB3H)cKR`hUBKl88h6OU*Ox@@Uu%Qn-)EwG5U?u#$L0VC@P-+>X_19+7m z8b1Ac_k*WEZO{EwX+LfMFzsjR`{`#NraAny-1jt}E)Xd3ua5#09#wfJ?S{Vp@vE(AYZ^Cr+!uN+ftc#(O zpxsPvx#%Ag8@Hcd{yj*)rUsamJabcZlSz|NEE}ZshI-SRD&7a&W&Rxch!1)NbVfUy z-}&1Fk<=4MY87ktShL~Yrd@3B?K(f$YH-`S(7Ss&!La$-2S1^7Kp^h<0@MCxpY&r} z;#YoC2>p*0(q!~aq`GW4*I~qo{yy8}&U)lvL7JW`ILKV~2mIdyor^-j(^ed!FB0(b z2`M^(1{RtBxh%J7@efn$=Tur%qdKyL7z%#abrcwwgF8SuXM1{9-X!}80xiY8UFqgV zo;SC>-&FR{Mcc!$$Lh;Ab9&tz&34L3pa5E#{VNsF_$45FW}Wn~N#v_`e1|bXA>c(l z_#fVr-V|N}YU*i-3OF@%0-X%z=OQvDQ=M_)gp%G)SHe*{KL=|8^pbyY{Nn<6?Nia>B*S3w7^9C-rC2g`|q`N<3Hm)OLzqnD}q&bK< zF$jp0buz&Gd1#fjmdUO^vC#fo?&libPG%r!sLHb4=A(%4*0x#$R&h0htAs>gt#77+ zK$$CmLcNxf+nd_mRmhlPc}xs&RgUuu&@d@(b=#)dIvZD!=Bgla7a$RVLibeCpq^Q$ z_T`hWly=1s9hBvWdaH-_mdcKx>O2KNrokeDz9P;r^sCF-hXSWXOoKL#ak)b>GGr)> zbMgCmed592PORrY!zMP6MO6pl0UiqM5yTTNZKl{ls8a)VJ3!xGet>Z9`seQHN&VAj zTZ34oV-4K%vT=t^Z}xeA5a`>i$}QCZL)mjv zbK`x$6K33L^Us%eKHeS3&Emg{uRi${GLv|O8TNk&LVFf;y^FCnJ0PiCpS$t)Hq1{Z zD+MCypQMLiy<$yP$TaJg<&&Q+;e`sZ`krRT79W967;Qi%g6qfTAz z;UBC1cmM`Z{kH#dDWzfb(-Oj^xKNa~U+#<4M@9Z7%bqE_&5_SHSW&L6+_>rGO%N#O z9D4mD!m?&Qy<>cxIswo;%`0i%d(25RF$O&(zrTLSrx&3;ji3G?L0Z9^S4<<2c`c>q zNWNlK6E6jqQ&6>61^DD2k``wKNs@uUDULUu46p_0F8b?xn z_L?(a%nlG^nqaRe`fobnx@)&_IbN!`szooVlbGDQ5+D-C5;hxGUWOU6KG;^M46EZ0 z*4bSNgCqWvN+-H#^MeG>-2~1mvtTj5L==p!WKsl|ibx%2g8+(i%(W1goJ8}V8QFfO zHZ5+%%(G9mo9)~)0h1a z-!L=hv0v&RD=$q|7HV&Ira4~}VXap0^=^zS=u8P(|Ix^Q0kCS4XagBTUX(x=F*7w2 z#7_l6bWFshsdzvs6164bikk6@!m^9ti!4AZ zi|Ug(NsD~x!hH(d$;be1{$9eH*o=crD%PXDLPhe`;P8@>B`1;(Us%~4t}-*j|8 zf&ldabUAyyiCKRym8KN|;JR2RXgT1h=q>`6`+*&#y?*@3;n-4K z%Bs+G$P&%6*-y>W=W5p}f>etbO+&l%P13>Ipfka>kNg~Kw%+gkVE@k1=G~9p<0Fp| z)12v{yQS{Jj|d8QL8@FS_qU#%u-E_{ORE5MEZuY1C(!8U8}EOb8tc7f>)G1SEnq3@ zyC_~rk@-grV9`OO4Vc-9$-{qyd+i9LZ^ToV$=V7^RFUzw9>nkS8i`vL1 z&jXtN><63}_(1HM_iy?PxcA2M{>r_$6wHc}lgRgd{HWLSO*MN#*SFh)_A6m4nq88u zx4or&A6ZTm>rii~zoQ7y$_%|BuH+lwA6M=tO5Tn(KXmTh>_33+)S(}Ic0BEon`G*` z7xIy)B7Zq(!r)pkvoU?e&Dudo50lfgEaX}@>)JK))2CnH8J|=HKpGV%#Zo@v=sy|u z%l$woxCTvs;}P7{ofcJKGpogl5r6)^3?VxAayRGsrFiXz$1AdIoaE28S9+giRO}ytC zT76%WVGVpcm6T}N_0fv>yqFp%ajxJ0T z(W2#Ff~TVcPAxtcCU12D%*^Gr>+n!%{5FKGKV_=k|vES@QA|y zxvYCDvzfmNv!1W3X^dQ52d{A&9=Y*Rn)w1KAm$widlX4pdrk|ClJx$zc94FRwX4y= z+C9|T-O{G<5Muy2!2cY(4Ukzm621dHZ7}{AVH+JL&-{uP=R&j9w<_F9{F|L{04G`B z(Or$J`Q(d|eg`DYAKo{g`DCofl;yRAF2glmb5&cZA*v70CUU?-a0he2pfBL$K4~*0 z$RDIB+x%;R2#=eB%<99m_(sgo3GVGOD~1MW=&EVDw)rY{ptljc$o6vQ*4U!|ujALz z94g-Bm!@VyZjDl?~F1xsq2%rYvK#8tJxGCv8B<9wur+)?gSz2)NAp6 z1L(zgly*MFI*R;E29b3=4n1@#VJJ`}3O~QP*N{940?mEA#-=6SeK!4+Wyg&PM*d;% z#gHsqSNvzn_H`%=a7ZQXOnvt2**>0dY6gz@5yU0kWbPVm`vGq~S%;vPOS5{%PBQ?c z_#tn?Q};X2_1YsnEC1ZNUaj2u0=uZyBXyPUCb(uo%A3S|DDGdahvs@dupU+O47*A=x)dL&`^m!l=+=BLZhWnUKCTP| zIK8F_8T<%8u9iKWoj=);N>bRK2HknRz!W$ntE}yrPc;Xg`Y+P0zA6H%Ev1DEL?}@u zd&~#L6UTYT@V#i51+iYhirtB0*^2V_D(r%$!(?cFpbZ^mJdp0bNlna0O zadQWq5wDtdZaurv0)*adz<|&j9U5`yv;WfBBT8}V>de-L5siS^RFoA=p9k~O59?1; z+!D~$9~y3Z6%B*{B|JWmK;SSM%aM-%d>O8uxuRGSy@rE>Ixq|wnMMKd;+z7OHCsMNcT_EN3=B`u{ zGrzvK(m~`K(4ag35U8gEEvsXWk{?&+CnvBxd!4a*`}v-CL7=dWE3WTh9{bg(ut0q8 z!^jfam@;eYzRh5@b8c5K$U)5o*Z4C*zdSa0#ZpdnRKCT!1asTwk6K!E5Ja`UCi%p!EM^#KlID` z!BGQ;q&Z%WH^%-W&hgJ{p9fwN_B{K^fVr7+{GTuy^wemE;b#n_@zOQ{3G=f0oIdcc z5|FAApFcVw3Ewn_4w|k41Q_B_b2nh{H?b8vxo_r9r$TkOqau7d#EzQLX;g48utLGe zJiC!@n(*-dT$Y=7Z|$#W$cQlSlhl5#bQlY#o}h`Z$W|^g{tuj_bN+GJf7O$u-7`$? zRL?oDj*Tj*3YRT!BzTrUIMMQ38JzmV;k8f33UH57Xcvh8FQMLlPE~&0iz_z&@(3gC zxwy58Y5n+W;`Z4eO32I=_A@2K6mZtbq=#pGOwBSos!^5w?I}7a;}HRO*XYwdS2+*( zc357^%Az-<`3Pz0->}V2!8|oN2;;xym+hB|$xq2E@yXgiqY!+Hkv))%+Y#0Sqsb+Ou#2rA~mHT2(4Nz{WdT((fn2bpHTEFNOyglxzAaW_$0LbO$-) z4)m961PK7H#mF1}&2jNJdXoK{fzz<$r_7YRcE8QJRM3N%IH{GYwDS`I*K5%j#Fs8u*u@h=T@+1~R4v;qM3d2m^OZYe?IJE;ShASMtRnAdqLzSmo8$ zJwwau0D5BIRFzez=YR;g;$Heq%u#W|VEJ%@ln%hR!%$j1Am{?2fXCNAfc9plC4a@n zrGW9o5%2@(dr^GM|G{rzLr>P^@Z?W9!hYTo>2Z|&llbrCgXz}*9Uq`hyWj5I zO6TkVeJZVgs2_zdz$#_9fV1Q!pVx=FInjMP1p4=$sF9D*R_^-BV(ZMx#rJ-| zWWWtnLWiwH%(ZT8+KgMP%L?>X$3N|Vxg0*30KYA0{*W3ou(fHVY;~+DUAJzy9*Hq^ z{@@@_mtENI>XX0gH7mED2i@@iaNLt5(DmepX@`H7w*O&T>O;J@Edrx&kRD>5?Qfm2 zKI9D%cL1;V7dK2hU4JJ`fFId<=Oc@#Lb(3Uj58W z`FOyrKjJ?I-U;_Ktr`{}mM84d z#Fzb)*0h{CCJ-5EkJT`Q`-@#Y>@cq4eYpwKMZ!tH~wXJ`S#0I!W-#y~4`PhT~ zrf#(GFwH@I%!E8@g8<<*|6^G+2-}geGV&`6i6+0DaZ++lofGv$Uk{2OV0q|@Gqxj_ zi#jqIauS~c-G6~XF(q44AZw=^@WzQe9dPc|ue^y(w|YC@4+v!8jR*@$opTg$kPXG} zv7WP)$w)D(egcBV0e^yY8)&-f0cJGlZ0#`~#MmDYBZ9XpA^AZkfIksDsZ96ZCNhhU z`2sx@6ObePWR<+i0x#XIxSj6-J|&>}*R~(VOw+-y0SC&FVa$OFrnh6O=C^pu@5*E{ zM`F;g<`S1%!_y6M8OFVj0(!kWQa?swOEHSq9v$%JZK$^?1@%?97RiXRtJnbqat2NX zEb1<2HdUmjCttuUr7WW}zcv8xbz$Y_6pq0$|z37nn&)(T-~AZU$5s2 z_hG5uTKyJ04KvfGK>;m##|5T#TbgU*>tW(>Z2bd=>24k8%N2ZMZOu8b8pPc!|19}0XOSA zxxe-Ni}53~<0wf9MQCx(6FvZMV!WP42Q1QPeZRM&&kXuf?RB=Ef=+7f_EoGTr4e_tWXGm9nl{G>mPE;V2M%!;CZVfi6&b~ zl5<0e!OH?qxTzHf4Ve5!(WOJ(aw_TGOOrZDMW>~K@OYHW75~3(cj$VJDwxh`zW3Br|>ZHxgEXF;g8K){je+rre43QqzzV3i`Xw33okd__A5V-oPp*=Yn;jg8$5(-y z@L^+$YbWZ-Fm(Jf`HyET$=4%Zu1((mWV83A3i@0v&gfe0VgBkX1)qb%z0CZTPsqRT z`kz4YG{vo1$?G<`OuDw{x^}X;Q2Q0kxS{lG#>WF2Qa{-Jr@f8`ZrbI4u&?WWX;8=hd_+?px4a{@*=xAJ=OHgvl!AQn($CvA zX3_TlFXsx6{BXdvYW)K6L?qUm0O+;2B-O7F_&&!ANPfc?pHlUFFKTG6;js#F7&sNA(;-xD4LZg90j}%O?@2E>2=kk^{9|AK zYy6(dKJOTx^2yc3eZf8I?eY*$|8-^o3lQm!5y7sZbMlq^}6TWB#nC*--`ppBCSZRW9CSx;%6HzfoF>#>9AtD>sO{ zaV4P&MVt#X7IlU77`GeMZ3aqElwA(T^B)?#6jef;n2Jw~e-54`+;7UWxg8WwUCqX4 zhRHC7Fl(bqemowIhEGWOZ`a+7NG|aZSJXeq7UYudxv{j3K&hBZ;vn$tNG;mb4W6uaMFj!9#JOekE*9CNi{b}@!LFk2E2+v9A_ZS$Qo*Ai>a*k;dcvyzy z@dVaBh<`Njs92Uq+!`nhhk_D9?0-i_pfKd~B6?HB`euH@O#LV2zHS(yabwJwC*(5e zgiJtUQ28Vsw-A3FFTOQ?m^)@z-e?_GmlEwA#IOIg_F-K9R`&JQ221TTH$NV~YJk^P z!uANAdP9a~q~jv#z9$=_lJV4 z-*hMyHFxhAkgdZ)-!gii)W^}ER)zmZse>D1MKHDs$8P(wRkt^0CE%~mwpAtk8<=i; zmoS6KAQgT&GoJsW>#Ks1e|KmM7R-+^Gg`vg=gfyw)?caIvLhImdS(f>{<}{N?J@R( z@{YS?-jn}>&KJ1Cr&b7{>ry>actOy&QIE&9Po~4k+GF{T?*F5CcyP5@1T3~Fr{M5z zNyS8T-=K(5_esB@0%axrF!iOpOLY0?1|4S}(yCizH1%#2?+Koa50X%n+fz^J2fsci zmcUaB$ZxGb^|T&;Is`uw-!~i`NIa!0^RU!`MrDjHTV~@><6)l0Try~jbbGB;x96H4 z`NPky^~`;%WH94b9Mph(L)rHnibqOb`l|q~xsan54z>dvW6th5-@chiop`NqBI8xv zpU!5Jn~al=A)!-eD6NACUe$rleEs($f0~FNe#A!&JXRW#l#O54{QWojdjE6;{Ljn7 zPZJsdJ=FVC$1j`LPjp{ZKrk>H=~J=WI=G<{lvY_K-?aq4O;i}3Wx|v+J=0^tp#v&> z;*gJJj~nX2l6iI>BH4Lhu;WYmX5y}a-HpKD1LT7lKQ*rJMmso+U}iT!7rwTK!A}OD z_ZcfkB6wcz(5&Bj20p1YfAF{rI`8t>JMycaJt>Pe{@U=4Y+e4}?;S~>wlgo}U@QFO z^*G!M-slf*Tb%syx!1`npKSW%08DnL>#fVArWC)6%z;^7z^i=xF}`D?Rs>#^Pgqr} z7c$oFt+?Ku53;>o;_q9COpE57a010JWwChtnNVI^kD4?D@m&U0B^S3=tv! z5dq@=Pj!KduKzJ69r~U z-S_13{r`RnZ2vidHIwnN|AGQA)IOK(>_J);tJy4YH)gaLhejUv6*nyOLH7p+LjIH^0p8zCYRVUK_(Val;Am9Ct^Vsr^KP zPrO{+WDL_5K0RV}&+j_XaDhL#+TB@%UR-&4GP@<{cX`U#AvweMi8BM+`Y%Z4{&~Ih z-!O#y7#-(6i+lN-g_eH!ZEc(0hZ{>W$N=LVpcYCjd9QSZub;uC65dFrm$3*>;O|lI z4fGiXZeh+K?3gs3&9*&%qG^l6$Gu*b>Y1}|KA%wLy#tf-Em*VoU7Fd7*yB!#nrRTl zVvmNw(R_#{SukC<2}(f(0>Zdm+@nzRh31I2Ogs_Wn|S)A-9YmP=%u6A(h_V3rcOy7 zoDSM8jn48Fo0Iz02YMFb;;)o|)QzhCawlM8E|1_$S807$Y5iZ5;(b?X{o77pa0mac()zB_dfEf=U8VJ1rS)B< z<-hh_PwTs$)>Agu?|NFmMe$vw^${%TKbaotrLU~delbx4 zd(rfJAi&MW-sDjBH6hozz!mQU4h<{7Q}8v>7a)=B`?qQLWdE;+vaK$vsp~5ynCr)z z6+2#7Ti3n18G>$)bI<<^@(P;(3zdXhW*cNb;)A#dLF4dXrly^hxmC`tYS(pMX(Zqp zDLh@<{P5I-P)(rGeYYpx5y^h%rrrUNkV&2dsY=is$ zrW6)%q_l?k25-DK-yJ2_g%HCf(?o`N<$Bb>In6(@1FHYKa@xhXpNt1KPVuBXQW@KuNrI**J10bM3d92bHlg$vnE4k zdu?n;GeslGa#N>YY3&`%oLmS zj`ToO#dKi6+8jNoEhjl8-~NXRpqk2#!qV9&I55GJXM& zAU4K7GC)QZ;1Za-j1B8)D|O^Dm#)vYa(xg*EbIm!|Id;J3`~eMWLvx;*O1({9+{S$ zOfvL$F3uOpKgsX0tn2>v{l^B}r8LV<98Y^oA29jNcsO2e$LvppwHj-E&@e2>H7!5E zT&u*;mDYE281AXG%dJm5}X)Pmz9>6ua^x*jzH&Fg#ClqmH>_LXLGz9EH){h{1#U&N&9k z3a`_oi=-n;AIWWfl_=Mx2rBu9o3@Tqt|92B+s_|(0?S_}&W$Nt)5)W*=TPL+8S?GS z_6=r!3@ZOx;N4xd%Nj0oeUjU^Kj`XQF+OBJ|BY>YquCIrnrpf-@w&{%`8E*b5a{|m z+*#8e!Egb3T$B%+2FVbfU5zcUTrmdws4Q*;V5Hyy@W~JqmM&^OE=L@Qmv%3Dv>uVK ziq!hZN4)@~C+JX{4E4!!2pjBuN zk;Wsfy4EC%W2-2sN~@7l?jmE1`*ro77%-goeX>qP77>M`?!dh|k?xr|^O@Rjh{Rnh z>*fW2GRTW(3>!Z~PTFqet<=qT1kI1n)mv#)(`kA+YbiWid9`=;Y5;H3xw{JO^`1jw zWV-t~s9^v+on%z;XAiJCSZfZy6>hC&Q`cEg=W9fFNO`}dr&42J)cYgE} zwoeJT1jWg#Y5mh}Abd`mldNL3gx zJ(NL708v}2)k(8BF$z2`Kcke_EW9eF=--JGg{N^^mA(x9XQa^fFr}4Hz1>e**|-$| z^9>pY}acfx9Rf<{RsA`7Prz&yXe0P*!D;KF5zdO&2OP+&Is&J&1bOn;$NNg30Poey2 zHV{RlNXSq)=1XEFw7hi?C|ukqS??A7U4D=~R!b?>mBbkX-Zlz(n6cIVQvou^Xl?9x ze9LhfXX_Xon4$U62;OTw&3ty6p;q}xx#MC&2Fnp@e{13ru=>FHUQuzQajusnXmn}y zXc2@aMnUCu{p{v;7Ve!k4xDNj1Vqm!D5YEW+LrZ)APpufP2IX%`ujKGg*(# zmV_HbH{Gqd*CRz-ciLJ#3-%vWrKbXOdyIafZ~Qo%@m3#@go+ibREBXo8Ba2ty9hu) ziu#30%@XF;n9_v`qt8r-ApEZmPy=A~RqNBil#VQ~e)O5l-PV}8cInb~nV<}6I4(-O zp38_(dDO68R0Jhjf9+=N>@^WBetnI4>x|^?&ntBc%(@4~dc#tkA={`xs_Uth;*OGG zAD(HCF7LpGJ8xsQu_pkyhK!1)+bXfZ{w{GO~8Z-VLRJAlB z4n1YOMn1_T{o?nH8*u;l)sB)pB*_#Ez+w-p*2`BZ9|>~U*Gfaj8uvadSXnr?3Aa5X zclA|VOe!H#X-(HPx}eF__CCy~P)Uf{Q_6~^J<7c_5FhM?3J}^tj$J?Q zpps6!jxIxkHs45Wpf=Nwt6r;b=DXX(Rnxsb_*|?AUpz~G{|kF`6#Zf5Fhv>wOggX4yh#yl0?jxzmmcXmBw{P!s`? z31iEd^w~97vyXm!f+IuFD)MqY@y5N9vBn|8T6MG5(EiKHt@1ihxXES2x#6GJhwNjE znghz|J}@~*`}i}~lIqDSi;6|zxoibU0#MbxeOW1>*WooL_JdO(hfj>X)+-QBjtZ^V zTkaMI%4{wo#V!OshBK@m^`si)3yb05();=?BWhPcxWcu6cL{d=zUV&<{fi^tkm&nU z_?*7s7*MON&UTc~LcGNsm^B$9S&YVM$*|K<_R0Ny($pFq2!c)zBT}a-WhzeAMQBiu z9%Ar~^J2C4$csV5+Oi_9T(J~G6!z|lG415((!6}*CZHBPPn6WDjTvrjOLSG-JO#4i zcqAprkV6Dhi?ewNwH=%2pFzDg;Lol{{`cZODdWZPtYY16;JheUv8uU2K?BN*p1jANtaAbgXr4M0(MS% z8Yu?lA8wTDr(z}G-s$7&9WNET+4?0M;u|4=mz;v~%WUjz4+~JH5SLGCJW7SV?5Vav z!a=ajS_+JoAOenrZ%$I3o=_mF!R?71JTQpaHDET$2<}#(*l`Xv&!%nT1(O<@sT$6F zj1}dk1$!u-Xflq?!`F=`YCPapKwOT-xM?{Rb2CWVDn-Q{8mDJ!2{fmz<}g&q@8wR7 zt-Z1IJ~2vLf_{yDRa0WYut>fcI6vPCq$#b09(SQ;vpS!R{zzh9c(t}?+7gWg#U}2&sdwODsZzvokJu46kY!Xu6Op)MOD}#t| z`MVfo`nZ_O=Y|o>d*gUZ2RK{1fk4i+F*%ct;*9s5(uMQP(ZfwnyP1uwdID3wk(5qd zCmSaSR)=$c?YOkUWUIo{n1P~jc&5vS(9J#_{_RC_!B2`8cf1iY01``R`Xa9lVCZ6u zC7(6-qlx)9H-ij3s}1P&?O$#GSj=7{2Mn%{-QK@=*OvQ3kU#qPar>X=d;m~>EHT*O z<9=YTT7QtnTThn!x8vI1x@qhS`35T#NbbKTMgfwy|5{-CS5Nj^&hRAB-S-=ODDZv2 zHg^6PdBr52I9Bo)7_sRgByXt!usA0Pob0|h zUhCJCKl=B0+0@H?;iR`d&it99()tlg*9nzZ;A^*>*FAI$m84cD{+4yo1~7wPdc0oL z-y__8Ms18k&)7rVUN0Fw;OESRRFHev)(R63aKSbndf>Lu-h8peAnEFJH1QBv6+ZAu zu3K7(L@Vv3&t9@F+13{)BRfXI(_>r#U3=^7O(bydf6w8qtD=nDV(VRG?Ox6)=`R@J z&zQtet)+lUY#PxIihpmrSTTGGd#7UZFuQX2?+^)e)_ldWbNR?mjSsJgzqfb_w01tO zT-+UGH{^XTUH?n8!gqoZN_hGj2=$XAJ5=c^-~$e6+aZ?E00+Z!!+0xF2nI2UE#fP{x^VQgU5fw*ok)r-z* z4^P$(ipKRpz4l@}wFObd0;)%GTu}VpnoqSUL7k(0fJ3UQ1zrrNIn*KlIJy#K73Zb* zsM#WWfZ2TRNSC+;&Ax`cHM7x!;2e8Cx9D$lP$`oOu5kL;npmkxh+CM zUktE6WnAL3yA9St30QSqG}DBj{e_uK_G+2g?aLjU$Giv_1h)}2_*Li5PmMe8MMjb~PyB4-DTG|jS7|Zpov=0Ghx^xw zY&TI?WT&*8;+P!@6Re4l+VW1ByK50g9KZF?;Ud|0`w1TBrP%2rFWAFG4`c3-De)ZhJoxp1h-{=E8fn3n4!LorK{M@p8;Nmx za@`)qI$KdxCEKM7W=PIKgvi-U_xI+|w$MMua6xR+qnE4?f52Q``DB&WwfKJUACGOX ztb2eL{XFfscqvS)0C<8B>CY-Auw(@!MVl6`?YL~`8#9x&3p`sz-&A)QeB+(tOdR>x zzmV(vxc~ccaP}(kuW8OVQR`|%EKe8`vhf!ESq3cwf{ob-}y_I z`?QOIciHF~pZ9)`D0>{l86j^(CSY)kVkuSE*2dtswXSE!jxRTL^9Ce_rjZHn(fyo& zz{qD)mO7J`ATTXDp=8oD?!yCB-8X`pxAn=}A&KhLSE2goRG_dv%N^;>*rQJ%@zMER zYeUWj@ri246bNRjDGl+r=P1pEq?-bMN;oPGBX;v2b38%UWsni6vFbuuv77TKPhN7| zQ)&>WpiI4mMq^ z;|%d#8FPt=c}@HO()zOvkCPqDo*bqI;B_U49Rhj8Nh&+E{g4+3HUgOU^1-$qUZ(%< zs7#PPT-Ms$>-uz<^5sE7=JD-5thVx!H`$=44~XxluT*v)1*xIC=sUEhynPBQ6(>K; zxR&~=c%(LkbvVCT3Ma4iJP-R099sKRhfy`9tN?|hx4+KD-`yQLIE~;DsqL{7T;*#N z<3FgO(hHbx0(`)|6V!Gq8yF|jUk63b1!xaSIUrS0WDr0q^-0kc_cSLw_HRYL)$afN zm+eNNRh92qVpM8i!-Grgl@kD8pSexkJl=MESndak)zw|np{eGw07pE;5HJe!mX^%> zuzZioA@voPq|h}s#ld0ybN|m5_d%X9gndyf`C5!|sLjmVu49qJ;?RZ8bB*6|LjO4@ zG(0cN@Qh#o?knd(IMI%Ls)GY%xeKhyyFaV_g@66A2BzPGdVW)fGIS+>6nXAHMM71{ ziUkXHO5RtrgxX}LGHyADUfI2oPK0es8t@TF)w-FA*q^j3y+m&TY7?hxo$6)TbaU;} z&C$n|-qGtJmfvZQIx{VYzn}p;${j^L00}0~oIC{+)PuzIKL-qYj|=&9j=p-WrzUXz zcYl9|4ZmhA@?wQf4@_D%X;oM3Rg@crSQLf?A+pe!am7jmfHR{EY59A_BRQI){}8BB z|Ca-v>_J)yx7U=(8nbqqSO27Sep@m;U)u{|Ic?xSIaL5{3-%~CKUh~6i3Ue2JN!v% zK6)!A3E`LS88>$wbI4k&4-#Chq@%=X!5gM^07<$gLqNhY)LsdT*sdU4(Kbd~>0ff2 z{@*5j+SihQ&Ghy9QjwQs3Yl@kji@dOfI_%}s|D@hR_+6;gvZV$3yISpT5#W#i?Zwe zP>7V1CUV~Je;roKp2mApjKS=xL1I|Kf62LTC!K^Bhlnd?wJI5E{J(_qya^ILM<~ty zO+IE8$G`hY5tpCqxw?katNQeMS}s0xAG~(&`NI8O1{@g%f9FeNoISBEs4|~Hp42Ya zv^T4mkEDlT!!J3UkHmw@%JRZ5H_EmS!q{4FHSF`%8_w4;p-rXviQKmWgcRy598lv! zrS2mkahHn0KLN$>^P;b@Ho@s1CM**WyT6|Avr)DA8}O?|Nn_Tiki&VYo{5=}fhg`> z;rr$Y?%}>xP!7M?VfZo+?ez^-Tu5Ha#;OMDd_&_(_tnA557%xXhBan2PaIJ|U`79ha3A{{0-IePuLP{1k(${R)Vt5XIJuzU z-#tBDa(5z!zf{>OH373zd}zz!FMe*gt_HAjXBKI)f@eOJm!BZ+UTEkRt=9BS4pTiD zX8|d;dyO&}k~yFtq~sx9mG=!lvU~5#|6ljsR}2>l=+E4Lg#5b!2Wq^?CXIJ`@1`K3w`5{QOIx zx&!l*d?xYxoR2X-q1#J{2!HB5UxD$@;c^n2<7<4& z)j@vWH@`}DwCjA>X6xMWo%xRTC-ICUpO|aE^3*>eQRs*7XlKSfE;=^C(;x`!q4i>z z(N~QE1ci-`7k!TJ2PEg$xBmQq$SACpZV zMq8v@6Y~rg3@IIo;j6~x9Qp-vVxjxW#E!(Uu0J)m1>f)Z^|M_l8gPP0LH)HSVAS<% zGKSu?&K_SW@;Fccv|ipV{z~8Q)PZ}PFe&WYvw7~l&K$0y+&8_WvGv<<_Y{=908pN9vYI>? z-Syd^K7>i&(*ZE8O9{%fvu{~ZHkesb3671Y>E9iMu=vOXu84eU{FL`rb?zL=nS6aV zww{pF`8s%ih_8%P{@tJS_F`oe$UFpFtJD0eKso@7+|!7390R1zZdlK+mc$pu5NDhQ zg53hC22G0g#D>s-w=n56Py$lkCIK^hTZq5fBty6Wy+OEayiI#Ll+`{stFL}P>ORKD z1*Eun&)Iy>;aS>xJ6y^0)EC^#LT-Rh-&1isFE9>t#x!)xi()kZGzqQ_etYO8V}1;? z(d_AlonQPzOOFRR#kUphMaQQ&ZD>FOVK@NWASzN5SN)$+}Z+0dPe7)elNP){c=p_4c?$ z$XZCO=2TZCXc_dEn9oQ4CU>oJ7Q>MDFzqOBhjWoU>8y#ldaSf-xd8#!4mHkZ_t4Pc z+!3^8eA1b#jgV|r&NZFmi3Bc1d#tMaG@|GZ=$KIgSIBtTo_}IdJ7=qr1cGqSLUwN$%jlJQiw1-^>I7_Tb!LIKN30dUoPhLh?rK76eNp z2XE!5`7Xgz;CERIdDMN?ebK8FFPd5OYKsJH00+8iwezpmUKgTDXU!hYdQ|bCDd2Hi z+-W-mPMm<21& z{(sdmlPv@0Aj*%8g}1Z_QoVFA?Lf4=0ALsdUPGB-<9%J7i<4U(PXEQS;~#$H$wjC2 zndY_yWKrbDZC5EEi5#?o3eXbCWz$lo^A0(0yVdfzn9FHVD1x`!0>f;#d{Nrl4f7~_ zWp^||T*1|tS88^5!#onTvw0=#6G04ON{qYDHtYRAn9>oWgiL1kJ%mck1MNQ zqns?*lc_@|h^s)a?Cu?HPv?AFi&E?vvgw!9nUT}nw$dqplzqZQuUpAqMIB=Q<%|0D zWZzSlBxb%Ijomqi{hPF8^>oWs$&kHl?W`6fLwRxR)Ekr{bp?`y&2E&nO`S|^9Il@} zc{5r*GrGmPV6WJ|uRr_7w{*t*?csV@ewz=!s519rBuLY?u$X_t#h_ z(zp9~Gc>2N5mlE#VUwPM9CX9mzck2ElLFS;*yCZ^?ts|-;>i@h?K8VSe3F+!Va{OW z%&zCfD*~_mNVPHoB(4V%5@M!o`K zA``lsJp4JA7x)9WI!=(gG2u&(`(<8`gU|HMP;Tphqj{b@U5U)QQ1wi(4pc&gbuW8(zbx=~)U1VqV$W8N$^s|sMXp;`T@d2d<=OLuj{ zlh@p|wLIO-%E&cE84*gMpoMXA1RHBlkGE?N(oh{pI`nvQ9$#)h3H?MYie^7e{Var+ z31?Moa%g+83mN)jBmM9Ntu!=$FzJ>ho#r#HY3Px@|0G_##PD{PyX3wu1)bQ0vgQVI z+Pu{EfAF>M&RtJ@@R1)x?q(k}&AuuPhS=PVMj8>`s6*82PE-78CEZoM2$?xrvBE=_ z^|reOgkd`B(vD()#!gaI6O}BTZ9gQgoE>FXT<*q0Ee*8|n3}4?42=_ilvm$;+;@MG z>vh{nb6I_r_Fc|G_GYw86ip<^DxS)Tl3^j(xjMA{;Dcz%5N)UtE$sy{b>Mt)ja$XM zDpm}!BK7$8i4TYb@J(q6TJg%0Ge^ijn(~G!bTs?Zf!F17XfjAfif?1(Y8-cpXgAoKExw^&CI3~ zeJGyzqp{hm?}i*!{DndLR9@(h2tb$wX7$M@yLlESwaJo7gi+ z;M8g&lBFCByd74B^eFA_>Xt1aG(iQ*$cCGrmD^N}R_ADmiWW7jqUM$u==2MZvWCDQ@#%zTd!YugJ)|YGL-o1rNV^}Bwl)W zKM!M>C3wUHT&SEOw0b9Jzy@aHDz+UcMoly?nM z(N_1Mf4aj**GBOTOFKsTey+GuN3&WZ?#{PMhUc8A*sFU$@2c*7%$3G%iSe1MWkZe4 zIGAT*`woh1{x$XzO{dNmBzA4%$=l|(se`-#=G=vs8yXASjALSGRqyQ~#*L53Kuz1c zibF3&guAqs8;A>H560X}6ozvCMt-=<)`|U7>PVVa^PV;xCgSE5hwJ*C212jtI-_H{ z`pbdbM-t-oL=eDC&6Ykt!axZ&7&)aZQ!Hc9T{%55z`eY*o!+EC4Q%mHHFi$DYnwLEI`7j}XweBNduc3f7Jclaq$ zp}f)obBa!X$5uG^qn31yKKJFmqCA6#Sgq3Mp1RRPhhwLD1uUl`LZwXjsr+H-oYo{` z0E0!4ePPKI0uQQJ%Jd;R7Qe|_b{x1n9qU5x5K{Kv0O)Eg*pbKumSpun)D5~|b^TsXLzbM#g5 zDq>DtCIo?zm|d>&&cx8$p{{u|qYq~Dy~drJ`FaSkDFtUmn^cmM%Jpr)VWPZR&@?KY zKjr3)-$h@2Ep^GJkTyF#7G6^d+uez|uW*sc6L;sk77FK}lcO8LYcyR$8r312MPOlY z^M>NMtsa+*nC+OoiR6@5w~AwIT^P>?{QgdO z_D{KV5A+*uTgaz2{|P3>>_T4SKFk{<09Cpml&ygZ5bHvVE{13rja=YQ$7RrJmUGK@ zj`78?cC$Xv#N&q%J0teuTbKLCVY1OWq#fdQ`@DI z&|KBo71;veQDx3xy(f}cBscji|18K~Z{dOg`%ya&&Dhv0d3d`bjO zHCXeu*;Uv2_i`t$3z!k(9@Ri~O=4K#(UQ(WlMk`7J7(CHjY|Ey(yqF`RqO;eM+ML5 z6=-!mw~|WOt?}V4(Ss7-k>5(j7I+7$nj|oN`}<~2T(WM2U&6%tMdCLLBtN{DXkip? zCPBJ3Av9;Wt56%^l6=zHl0)P0U<@WV0rlue=;EiZTLOtuaqtf4iM|Qv4Y~E|!yl~u z_+4L7{*D${>e5jqsx^--A0_t$Y0A&hMAAH(w)X9s*#6^m3Fn<)THI)d|APjK>e2GM zcup&wz&A$pH@q0GTZRs#&bV=J*Hp^Rqc|5K3%cBO$^1jXCko!ZV_fP-wiv0#V`^Gp ztbTjH16LdRSZ~eoSs^Y&A*?lXo1~Lc@ihL|4A+iNk!w2=8T}JgAnI|_edp>wsLj^b z7gXtMeV6>5iF;EC&SQ}cCt}1jP7pNK(3YRA$;UYCfoVM6!t4!!xe)|O z{ioCRsd$AGP^B$mZ=^PEG+W=W$Cimp6fe~`kJA>AN3%7IKqcI`Cv0JYbc_R1z)~55 zCtKaK@~`#khT(m)y!i-4ei+gx^01t~fRL$g3&28$B%Ot{xQ`(#h9TY$RP++!0AHxr7Y!mT5>EkCs`M93PW50bllB*fy-Lg@JXQr2{o2Ldz zZ{i~RqcTr5vN#Cc>CM+>(amx0!u^<2QrS4HIk>@sXiR<;=|-S%u)brjkIUVmsaZGD zQL}Kus+)KDPNdvBGWs?#x}WqYA<%I0`y;Z2gcQP2KMTz%V7)xfz>zO3hYM}zaETg~ zGT5jOM=~u5i3_>O9vR{Q;e0k-XZV`!kT&Jd|?NEBsMSM2W@ zS(r_zMpEgWwu>>l&6kc;x{iRmXtOEyz&3SKF;(1Ip?SENljxKl`p(n)gtid)Y~VZ=h7V9Q+BkN6!i3m)$5pv;h>xDAL)6=UI`}wbQ3!Viy$&(I)j5f6F2NSBwQ= zW(Z>-oHdfxf6L^t?+!7=6h7$|V`34UrBkdEX?`h_Ior3+1oIa6WNup?2)>gL+j*Ef zaHi|^maq@Ky@9i~R--#*!>~;K9@N356UDIuC2@x;$F8?GR{^z=wP*XlRI!Ab)So8N zKZxzl9^@xs6jlDdl|Gq>%D$hha~Du2>MJy3u{Ez~mr|N|JLyH96$AE1 z6aH*JT?)betdF9O)+j1AUfU4xMts%*JVt~lUOVf_`HFjJD%Z-mfl65i`_>yQVpu?! z>9;k57{6QtxQ=4=xNtOp`uvNZsXE=Cmfxcd#=YvQ#V75d%gfTh8xUeGwc{pUQ+;#&) zvYFJI$Yv0I`Q^v?R`cABbuaCu)s-bPHFSN&Y;>=BX6EzFLEa2p@`zTsn2nI4&RLcNy<{8ZhoHt5mhe16Q{lyFpM1SPiE@TUD2qojYTKh?$I+{+WF(4zHm z4YvD4P#}4UlTkf_Xl%Yzqn9lrnk}ic;GksgVAL$WJ0vKh)K=&#G&^T@w-;*%vJ&}I z#f>$tEp5cr$lrT%hoEB-F7(zZWc~Z%p5{#GZ63=#ac|__Fm2-!h#&_GB>R=2f#I^o zt=2=MuwqjaJX6#bgALu+6MM5$hW@L-5Qo#mYrra49IMH6R3C%RN6TmL@~6I7Je!)* zg6;&VIME8?n|mW+#e+LD>MLsM-I>Z(LF+5X>`)aBYGt9LEq+5Zqa+C5V_$3EukqQp zn|g@a%5t;S*FKm@Kw~qK8zcHDoIKpbtSX`y>;a+_3B`4_hW+!lN2;`+v_)Bdl7c)r zBZkMw&f&fxi#c7tvh1^%7Ti(IjFbDIh;eX;CCggQi0;GKr1#6~vV|b5rv0yTdkmiSxAbLnB7gWYx?SC$ zFHC*-Yra!m6*untyNfatW%{L)vO2S`J*0LV+{k8dQ@*=y8%#|?>88Vsz7Wt%Vmw^6 z&~ua&2*$XL(Za-QyW}~xTh<-Ye)1B!r0c`=yYF@VO!dKAk>uai?X9mRF=6sMW%!|P`Z4axF2?!z-g@^wy};X%3V-ZoqNPz{sYN5_uFx)+fIbaU{V z?0U!I6cqvjcJZsV+p{-U*X<#>@hA6@WzPxMK%e>nw3axO~ohWHhF z^8$PehrCNn-uw!#qmm0e+2U)l#gr0oZ#Z5unSB(QceHVeF5ymwn-w{JQ`rY9{b^;p zM8=~ou-hM)*^7gDevFa;df(Ksy(wYFz{_IngSV7Rod;73;Kr5ygeH97^{%_9^%rQ} z9Yt-{^wNp=?$e<{g2UYs6@DAfnzC0LNb1tRqC2~KvTG;rW+JuB>^@SbXgIFq-i{hb z{anMon^moAU?O1Fn6 z@f6Q=xOBmO?}MZ_o6`CRrs`7yvJ=#K{nJ>#jArkx;)PKggj_R=sL$kDq?-~`DxFq0 zou?2d46U6lWteShU~)kFI>Is=+bFSfC|_4aRqJPlf$27@yN;8Tw-KMHcH?Z5d$U&? zrf5?11Q*(0w=@~-xEpGTDTp=MZW!gl$^0$ZY9`ba)6moxi7t!j2M zrb91qFvK-{g9d7w>m}}^cDck7e>B!Nn@21AnNT`*S)#mu6rRG#KgQ@~ZbkD@$v8{R z-EQlNfGy{)Dlli{`+9C2_UQvXj3Cu712e+J8+P!igzm*M(5wLR>jZZWGR77=fG^T@ z^WAKw%sCss{z3h%{*xU6&7~4s+0aQX$=RBppgW*Iti)mI%AC;MMY@w?@V@SZ3Y7S@ zE|W>o8SX7`r4~%KO|VyYVs<3raR=0DaIy9;X)k!eD-)E)cZ9FEl)_OUe9MsUsT$kG zwsrm96IO3K2uh_`#?ie~2Ut8;Ao>rr?TYPeCe>v zMmWKpy>!$>0khF})C_fN=dGUd>nwaqfg-voH1De3WYW9+7x3wLI|ctN2=tyFF=E zQL`_&R%1IUit*Ei?#_X0aXK|0t62383tnB=IR`fH4B0Cm`xSz*1eXb~bzdT*HcIs3cI`24*ZUI35t8$SRxlp~)#Rbj=N|dy z?Z<^>0_*-J z(V3wpSo1<)YV5`aeySl+Aw3O7R5LM3d4zTew8_|vjO*{$II@n3amm_~jTuQtX_~qR zWd9cOSqScjeZ)t;4xCRh1@6IfC}8Q)=WbG1X_2+T(k%$h-A##;l}*1SZmo>qC>J=( z7n_9C!aV_9sh$fA4vAiOy?Iedq&U(^mwKZ5CZze%Fwft4CJv-J>&VwBe1xe#TQ<`x z5(m-vg5l1a?}lR|vRmXO!wm~xG`Vg#1;UTuTT zdxx+%soL7D>mOVtrqkfA1LQYa*7W*nL1+mtf1$G+>}!#7Hl~~sj9WJBjyv1$df;sS zpSmoay*;S*=;9*p5D<%5dQ+_?W#;j>>@e;wSZW{~|CZy_{#k~igW8?HZ>njkeI;al zHcaiaooy{KG~ODJFk-uA%y%sL2d0+&p9*K*{`*y8x@CYo}&2ro?y-lJ8PiR=kS91aAG;Oqh+7$48a&vZa@~Dt^2$HuG z9)JWeG0Ur_gm|seKbUC;PM2xv`%D*zoHM5hK;#1x;coKdyGy2mj$e^77hm*9l%|x5 z50Cqla3XR<*23j1@Wo#_gA7h#hL1>$LUy0b$#3^AE;+f-ZZHpw`VFz`v|{z-9mm;O zaF*N@w|$3LB|`q4T>lw(moaL>BqC)R)iLxeXl;p@luBWA=t_w6arZ558nH092$*#y z>K_nLWO$ah3xR(JKlH`54Mp<{dnd=4yIfU$o*^o*^zRn(Jn%JoH) zS>G8PmZ%kcdB{yRl0M>a1{eD9V9l7W!pTDun}vYX!0Q{XRpJ?~iuSpI$aYE1>A$#e)V+zF zaeQjlBTm}|7%{sXaQGCQlDQ`yi(bQq+BtIS2D*XXB3p_{#cI{yJa<2)M3izDcD^iY zw|I)S)n}{EP7!l*1VfUqP;ux%R7jJob>PAbc;>1`4*OIXt)VqdvOZ%6yVcAaq>ft^_uSVUv@;QR|_sk?)Boq_q%x~ulWr&&2^8(jNr;Ff=Ovyw60zu?Tj`e~*+&_}FrW|r>%SDg9uR(I)snZ$Q*)a~B2 zbOT(R@HMtOj8^POl_rn1?{N;bE@t*O&zhO{>u#2JEV)O(?fi`bb@2&gfvQUD3Z0`s zb&kFJv=P;DB-f^4!G&SCfQomy*%l6=5_kpwL6N(vF9wG=gei?M?8R z4n3UP?h@H5@DLRx?>=?)$evXWQ{R~eq~A*qR)1$OuylyRe~uG3b<)mCRNu^QZn!`F zIbckpgg-C|nf6y!zPCnL?l_JbK+kt$3W+0N!0%$UTX*8^sG{4V7{7?nM=vnZCj`}A zC*iEr5ZY1U{)y7c63t8?a@(Z~9wl5HQdXXp5MyXaZp&(nyNyHJIWfFmi@kVGb<`&? zy<1#7(;)}UE1WW^za(i$X;L<0?;cn9mKS|+V0PmWBgXjPOI{_L&^qQBF(lBQNhFG1s_Ra=ah#DJ61;qrxwjY6 zck*iRsGITK{SDB2LAKqoB{B=$@E072@j#~!dV_A@yE*?B*kffyAJdMWFI&WG9nQ9%GU4Ib}+D-ReTwRC8 z&zt_{6}eAiX>Ti4tf@Ln9bkQq7?U-tI+(}uLqSO)9AF(`CZn2l;asi9b_qH;ymV6z zo?_ZxW-q?9^$%vKDglr%o}Xw%$dw-M*ep%0N5r+3+O(QPcfWx+^2Q-zafgzd*%)*C zys%JxQxa{IT?E-L<-TCGGAb`N$I3}|Mw~4z*5>mC9uB0yCn@vQqLS1V zo-2g*k15@KJV-T0YaV=9s!h5O7F_Zj@NE}2nLfSNWB>fm2S$e8#V*wAvLI}K#$WDd zRS1Fg4{@wrF~2r3w|FW^cO+OK1xad@VG4ALVX9>WM=%^Q3LBBhy%cdTQVBChY5PDEGv85n&!zw9m9KV!X2f#(3eCYkH%FvMS^ZWGfrX| zQC*RIfqMy++PMIF^NW95Hs9SZ-wrqFHZB#Wd2uC44HY9+MEf=0SQGhE1T)%{lH6cq zLo33%5<}3iGl*r9(YHB2&Z{ZJ>m!t!H+JkHZTid$SueI7)_sj9L?}oaXI3`eYrKLz z5BI_AT4%Jxxa0A?Lp>buk*lm3|2${3Copqj`b*8d-x1!6xEv)M;WYG}M)V)DKMK+} z^#*Kqt)5TkASQ0lxhA4Bm1)p;|ML^Ss!x^1&9^25eS)Ux@E@r3*`k@{hEn)+o>e!< z0_mVwm`r{F%Zs7S%%&u#$z72X61MK9wBJc%cBc$0NX<_Cw_;iKJ8FO~xhAmfM$e|P zgU=zJ+rCZJos~(4nU00CH%%U2iALX6DyNBtD^@lG|K-$yXqFNy!|zHfg#v=K3N2f^?_!gup{k zmYq9@CCf<4dz_q8K}i>9)fAP=L*9B`R*qO5yztfsB-NV2YaqXjO9u%nMYikLF3ExT z_63IpPu;@k?~PpXdJ`eki%ZtEp`@YEEM(0L)AXh7+9xpaAbFL}m=N5mjHpZDxF$hL zRH+k^gt$kxSq`O@H*#bUNY#3d>7>SG294|T-_)e+0@|vZ_O~Wb69Y>As`gUZ@T}FhT6yfa65e`9IhY zGb)H6mxQu%q=L7-P>Bt0ruC?<5yI7ZcdU}b!cb$|<}%qqH;MZaE%_=0V!0*t(mGrj zysvKTC=~z>@ z%+kJ-cpg&@8yq>swz!+L+FUfu$#O-tSTFZLBvH%}*yNZ*>@oKW{HF!fcLJiyq*;+8T@`(r@Y zL8MAXX0a}63Ogz{*nuKw7=yHxOY2se5>ki=O3@C*_r?7RutRuvl#IeUSG#V7$@cF8 z3^O!Z4xXmfjsbimDwOSW8Nc2=m$uOm-IksniGCFj-~(MBXu1%-*)^mhcLHcyyz*_b zw20(KSG^Hj=2QlFe@+)lv!0d+qmx}KsG}>bbBq@%p-`_MfU})l-E;-xbmw3lI269y zm{xD`;kND;EiQugcWCxpmvI)=#L83_EpA|2qs_b~H);?;cEm+GJ>FCkH&H)lL5)te zbcjMN85v2a23BmV`$JNCXte20lgfj5?bY1)o%8c*a4%w2r@q3Nb*&Gs0B~ zg%!`+6*3mob0kU3HXnJds;FWwP54fp)N}c-2|{<$Zq~RWfA^VgQMj`-`E?$8Bl(Za zw*;+dck}M;BIX?VoGgyL_>R+h6x^(jOO<{f`MBQ-AiN-O@@2Lk-ngrC9JH@Uuc1t@H)oaYg%S9A){8I8{%9 z=GojyuhGF>XK-<8T`S`FmPoQ_{7P$Dm%122H92Y+j;auSn69W>I9s7!4)@b;IA{fp z%Lg&j{zeABb+k#eemFw{0rtno8}Xs7&JTOtWhu^b1+DFs%xl1y-lbRn^~rww|4PX< z>F);|zWdmY`MfUb%HT@i_4~EMa5u^LAOct~f7+hYEdA*L zA6~n;3i;DbV5BZxxchqt=LedhYql#M!@qc44kuus-3oOk|7{esiv%R=Vr@!x@R75^ zhLz7$-L~S(Q13%Sxwa3A5ylI}PbDk_0lCx(*DUqQ3vpfMoy&Xide(xgC2m1+n*LC6 z&uktq(KY?+;6K&O1jKDE?YwKd&2Hw7h0Mr5`O$r@5YOC_>Rq8_kNy_%OwL2gMZs!P zMEgtH(x6Wij{R4TH}McDYa$ek0B#wW+kVB`z6E0IsiAIWP1p9%cPVJGWo$x|WnM9z z^CP!9{b`f&VK=Nu)+G>yc#T%^J{uYH|7M|ZVWGBlEL`_)r(!XmJn*(x)7-zApdjtc zRrl08c+bYGQ&-xIdp}b>vS|{kt)yvFG5lDGqw% zo^-g2Tt>gHd!TB)D#j1&P)cRw)LS?Uvc{M+Id4r|{I2Yl@_j{BxR}i^-p6)y0ksuH zwUcnVT(nS*=!O?*<fbaNVX3ofv%yDF@!P>J7O!npr~Ic5oTx1^$KNs8gp zI$yepdV4(Za`TGb;K9oZH|UTb`POe8Y%;y|JzSTT6Og`{Du2oOY|v0e5Bn0z zX&gi~FGm$g8lK%2PS#INTc1Tkjb1*w_s!-V5-`pt9wq6zSxQxyYcCNXEUI4XK&KmZpRK$D zlA|EF_(+0~GZvW2sgdPhYe`CAG`raqvWze$cyokRTd~vaASf%r@jKsS`wYViGY2+K+8} zTH9=Y?J4O1WgfM#QZd335+JB78yxr$vXI+F7+B}|x|_o(nMV5M2^|`?wX;mN4!iPk zyzXHp`{s=cLLL594ql#)&5GQUl(?gFRdhFvJ%ek_dr1l%)qVfZG@RPcYrY%m;X(BY zpkcR5**`j5-ds;!=)p>L^b73)E{O<1NS8-g8~{OmcWKQgQnA}g zNQJ{)Ked3{Y}@{;Y`ycnum}t8_2uCi*T-ELY0wz^lk;x*0q^~wK9J_#nl0zfY!Ust zB1=nTBInnSzMF_8|KBJsjZ| zQFuBx#JyGbaK!)0U^y&daOpZpL!=Z*Pu4kZLghn^GS5?Dv1M!l+OUahy;5}3N4WGIv5wM-siZPj`lxqx3Xwz-Fr;V`p%irBxGrO!wya2v}N%X|d+t5Xogh zTcJCGa%}Toml7MY`&g?uYE;(!AmvjC?ok}9r0-5LFGYTRq#6CEEjKFry{^I;>U(bz z{LrS8!Z^D!3Z;TAvVF(>tbJ{?fT7-*xnfpNuW4yS`A*1av{Ko0mvz|Bzyc0CQ!48N zf5P@u>E|WnN)O4y)tu@^sp%AeOCAw9(qFZ#*6Hm57Y+^)_XK4>M`fy3Li_PhYOCpG zdjUS)nyA=ux8k;*_t1M9mUUm4d+L;Uv^`?qld|=4=RV;s?iFi7rk+icWmrvmpd<|0tdH~decG>~YLwpd0qW<4z1 z%Yf~;V=odfCP)ftelER^ZUwOh-~dZ?kuC;q1bj8kP7^V&)!Km@#&7f(rxaWv1A_~E z1IgP{-_i9r2X8F0Yn<#Buro}-pmat46bA#!Ue@?_fE9<+n3&SH#eNcTVG`P~eT#2~ z-2-O8*K032nJ~8e2Ps!?_js2FYKh#@#ldF*p2IjW_rae&u4FZE&*(u$1;?p~(Gr^= z29?c9crXh9JfGT0yRIo(5x*@}r~^f0-5JYf%|7`L3r9-5mGq`apyt*#u49jx0GPrH zt!nMI`uhgQu1I_6E7;0qHS2}{i&folica;0;1?nzvhguu0mh0Cw7=2Y**1%HJx zR>#zI%6xMs&7@7)u3&D1R*K1`Fy%I;wM|Cj8l#9_ZT;~Rb^N65qHK*mMhmqZTuN)~ zp|Re(P1WpR+~bx4n=bA!ul6u;27w>`7Z-r+W@+wqZH5+7nv|Y!+#MHORdjME=*)eQUZ+w)73^JLPX;`1|1Jh0)eyYVQc*6hRLxnIfP+DL zs6puubiV!&Lh1%sm8oe!dK20YEgMxKM~>u+lIQt7JW7i~QF3__C^N)@%iFT`o)7Ez zwr@kcEIF^D+$1;I1sc}4j2sQdL5@JYX^%fVnV&MpErvzI#FLQ@eM%^rsdcrgiz43=WYi~T# zAQt<$;VJsPwLs~JZl|d0dAqjiX-A$_uS?zltCZ{0f}Mn^fUKf@&MJKi zVC&#tj}H4%@>fcCu-h{Yam3e)PB$@@7ChPL7c7@iZWzVevwsbeeqG92x$*NX^?>-! z|K%#$=I*tiitVegR$zubs)5hY;V$D&x*ATz2~b5lTE?sK4L_E!7N=}c*!~Y83v9qL zhHI&_LnUsog)M)!f6>M)C8P4Y0+}f<(bdde#FN z6Sk!qo7pmh7c7?vN*l&nAC;>Bwk?sJv)d@1Y}YR?{P25+yY5fB6|SeO+!yw<`?jsz zhXMEHe3d@Rx7t1f0>39mVSRTh3B)4Z{lxqFudItg?~fCi^%u!sZMVGNowiy~CBp(` z!=o){XYj)p9a?_xmj7LRfMwI@lF%%;Jdtbs+9@5gySOQ`J7oOvBR^CH8ou7Ny!?X0 zx3gU914qtdl*BJ!DVZ>jgrOVsuxZ;>*k;2`dgwg zM}6p@1;xF}`ye9r?eu~3vLCh>5Boj*ySrwZ+fq=6I%i6YvRYnOZ62}c^DBL~yse!& zm2}zL;qGPK@}}>NetSqQ>ZVGweBkm#KrDCZ^gtvY?FANw;S4?lbDA7?tPULe^M^Hob8C*a-&j*tYx|dPN>xZOpepK-F^2T?aIUVAkANi4oc336+4a}NHO#?-%GqPSvN84 zf^`@Pkv7&qt3!%+`Jqaz9)D~FJA&wO8s&lP_>y(*nLLNM{>goorH(S z**==tc1GcU^{+8{bd+G)h?{twS~C$)tuJ-!^1> zL-p5fp&L;LKr>7@KSukg)`{m0N>>U?Sk1i5a?s*lP{FHp_c=}xSN0ydh*FmL00an{ z{Vm?uwrgq1%3gj)a`MhZ(DI4RZ3rle37oV#qMAVrin7Gge2LIG*%+Wa!P@%K?=Jvwa^ir|8!i58sNY$4oGDxYRUL^)!2qw3>MiJR*SP3sf zC0AEWnBChj?*gAs6Lvkg4cev~<$WNCE!ybUY0#=Y=`T6_`4l;L;Q}mNdT1mac2mNP zqyQkNUp0+^aV?x2JN&Y}%t@H8|E|KBLf}d@$x7eZcROv9JdDd-N^!6}RNJYHV$^K* z*>2%~IpO28M=94FS>9?A64aFlSFX1bCGVRXAnIMYmrBLj0@J|!4%waRM4T$W22|+( z{<_$$4wpIIEdLx~?0=t9+ZJ1Ry4fxYV5SMQ9ft0HqJMU2*LhzbEi; zCMmi$?3E9b+I3H*x#}*?_kTu=b7}kl85@cnD3kR#8B?;ZJnJs(Vb0ahY#o@xuR1t1 zJjR4;n#>ay#AgwRB^mkbiA*E|=t{Ji;v{~U*GH@mO^H9PA@=L}52r>OS9&oZ7ZDAa z|E_E+?0%@gIr^lASq9$GJ*3A4xG;`0bw2#n)Z$4fSIcZkN+Dt@3T8)m!9Qqj`Q|@t zmD=A=VEJ=jc|Thb!*2`E62h~Q7heK06LVSEtFbZP#Id+$ zLq*yvwtZet+CfjQp>z1SSJi_e{JWieL}d((nFpKCnysIRlCE;N&Rx*LcEe#%>nY|Q z+jknd5&#>|vNJyu&a#a6#)w;eYW$?r&)M%YZVK^1m&(E5kRs!T@!g`KBaLz4Wj%cO z690|K&XbGwuvuDiwJ2vxqhG=Fu0j6?tH%rs>9;F(SXb78(KQTCLSe77HmwjO)&8}M za@waq|G z!EWOZlQf|jURXXJG)zrzF10Vq%(*Dog;Lv`VG*uU68^$7M zk(2`^mnQ`O{)6ePr2U|=>_^85osO&&3;Rwcb-GZXJ|W``EM(#yC8hRtt~aFWwB=5V zXUT05T4ixx8g5$SaJ>U>`Fi@;zGt>LeW*B})R5=wir*~6NIbVoSji6ur`z{Xy5qSY ztAg4jZzLH~d@@KI=<2SZcHq^ zU46O-fYGSaGRKkv(5N13Nx~L1=W4i)=L6P*?rHv|bqpWZP2&7vVFs^%V~SxA@3S4Z z9XM6Z;oD+S(4C+|TQHv2=+!aF(ItmZh8Gs24HaPxqMQWaQr;v6(QIFKV@HMMT@{YN zYV0c;h^z*Wa4+cP_m=N9{VoLEt4~Wx$r!{ttycwqHFntoFvD%H22yky2NKlwfCI*CtR>d^WN6nHN-lw3Z^zgb-R|uYFq1%$ z39W+1RU4_Io2_`iKy=klJvH>$OF@QBSTfab`zia5IHTy5MIr$I+Xi$TrDSnNur=xz{qJA@py@i5v=IEF!@ztZ z`Rk&`#l|AtHY_VvaU;qYm`s2ymW@Wh;;#^5VaWMHu307tjQAGZ7==*R{`pni?2i~X z{~}^APJ`c<9x6R|~`1>_6DRI9bLf0*_j~v)Cdt#pi82rc0yU}|-8}3=NGIAht zRI@3LE*QugKh^2Vs2~>=A!w+1cA)D7>jrl~!H{3d3&8|jjknj3u0OEY3EC4>_UOj| zM5o%-ZJeXP{X+=%L&0IemLZPK-+AVj+y)dw?ymm^lnm8n!|F|m&o0v;{o0Vv?)iFD zmDui46t@H3yS)|&n$wgB|L*uW@#N){gNCHm)h9mfG1S`|v(eS2OSqmlL>7|9-3Rm9 z`68+43d_LU1>~>cpimaLsVKQ!CugpshxjNK(!wo)I`{wbU$dW9O1)dlW-GJ)ao?Cq zZJc0+(IN|rj~15fOZ16>f;k24cBHk-@SD~ zlwyLL1JciqRP|)&;QfieFbjH=B(9v4v%*6_kg~Iqz$w*XFTQt*5?1Ufs;s}6*)t4h z{U&ou5Cec$nFTk`|KCKe|1pNPkF9UNGh<4!!_)r;xC*9X|1O_T*VU{5(!6=!t)j!D zWuQzW0@*As-Q@c~v6~pxiFyWHUH)dJq6Y~PfI$U19%D3vk*ge0hg1CyP1G?rpe}eu z0Ckwh5Q{sP&(0bxqNemYI84234=#GJ8U{Y}ucSv9-{f4uIXBMtICU8(fFb~B6g)s( zZG&!!4_V~Rxc_)bwSPD7@}_kL-2%bNU71lZ`gleGfT5qlUa?ZGAAgXgj|EQ6Yf<~F z1hZw_4t}O`j153x(sw4@s`${^FF@P{)!oM-PtTwIiNe~)O)1NkgITZ2Z()wTYNN<* zIXO5SB6QitI?pTIk`0~vO15&YEHG@Z-<#sc0>AC>bTKa<%zQ$=pyU2{p?bH|^d&F# zQ3K)cKwd;c%GDjt_GbhygQfuvyyB%V+^i?x1@Ssbz{Y!cS;ywc)-RC-KZQob?SHMasY`9H$UyR9g}_tGuG zz(ZT8-p0hPdnWPTBK;l&c4O~%IC1cUonsbBCEwJL3nE$U7!?110)y5xyf^f6BgH6K z_lt9xKX@aJ73X@sNDKuTG?u?D6)wx1>n4iBXpZ!ZemQIB7F|cw*^x48_f6u_ z6_m*~5I2yt1FIa~pWs@5czE?fldBs*XfdMkYT!;7enV6^i5HEROIjU$>Cf5KSeWdG z-nd=`_sP>_CRKGzHYVOv2htZ}BzpyCz?cIA7;})Wc{x{sXLOVTHh@}HWo}?RvJ#Hs#yK|aM3y+ z`a+NO|3Sn5WoUTa(mcAAM~$?br+|5%vErSQII=5svB15&O-l7i^&4$$V^P9r5L6e2 zAD@Nvpnzh0f6O$^1Hwgn;njtCD>uYMg4RrS ziN1g=e!Xet_YQk!xYl!+hh?$pOD7Z{RMy_|#x;;)#q_1DoJ|nI{9v^7uU_J8tbH<% zBj{=Tv|2s>F%(-dGLCBBL(n!%^d?n7B={IFy*I;a{x=TJXMy%``42_-g?n=k-9GM{ z1J3Ba>!5uDj+L~{lGw+z3Mzp3T3eX=DU%ojPO{4@j4|MS>+;CdFwUtY z#&uu}J%S1&-vv0`~lE&B*uJ$mA^O?Chc41wBoXg%#a(fE5Bz zS0~GtO_81rYguW*%b_u;JM1;bLj9%YT;f5w|3xGS3!0s>a#B_bv291RG6WxdPh4W3)NA+Qki3mQ9DCR>(_(__ zq>l=-r&}|rVTJJ%O_RH0r^`*K!rLIWl>pOrR%*Sfu1#KHHJ6v~m)lc|T(JjHl;T{N z2n1Jw+>vp9!?Y5de0z2GzvUpWfOGdCj4?_$Pi@D$>M3H{Z(s7b6rT_! zXJexja!)Cj%`OC}%LSaog5^I2p|S+le}QmAVgNc4yL|&f>9q1O9*D}cdGwvU$ao2T zGFqz%Hf^v^ZYk!Ii-$Mqn!r$kLdWw&WXD{&;}+d)_HNr*%UDqM0k4t~uosKTRX>Gg z7tgN<4AqWSS8TnYFuCv)*^->mzbo)EVKj5!6` z-c8GYdWbjv{xg{{egkJ|=@z6~V!S@ky^ri2#e zPudi_1||o&XldV*u$95_TY|@K#X2~=uko=Jct@m?KkxS56ja=IXNH^+FKj+*vF2vCo zr7GKnoZ67wtlJ+)V+ezyom=fe86e8<>r!%E-}@Wh_OiYA6VUC7!YIBox=24p{gPi# zwyhvrRr7-isa1-*Lswml|p-8gHvU3s?6rPk;0p@kf*`z0;4~!D*qaQL# zn(8XkS})d-R*Cy zzIEWq>L*%AP(NPjAof&=$4e#ECwPzU9E3Zg<0K3pbB0l}NWkWu(ORWZ@QA(lV1 z=0LiK)2!YOF;k$Dy0}6KNTE7HLPLwEs|!HM@7cSCJKl)JrT)B@Dq;GbK~5AJ7>;1v z7i*H(?R9&e9iR@trRx;ND-1Vf5 z@?%$(l3?KLN%RSX70m95Pe;44p<0lXd(UF8bsqW)}WknS|3msGZNLt)dkAAOd!%YKO2(H(~0rSAnavknxnIP;r zHQGJnS9QbJeCo?ox7hldA0o0ZJZkIPVT^qX;#(KT*DP=Usk-UldS6xIMm9M(_&(j0 zcc6V{+m`8YXwhug@pLg197DO@E&oUT0l0lVe(V!(`@x@V&rT@3{=(>6Hy#hGT6yI4 z$WfA2FSc1=4iPDCzrK55kj^e;B-nbc2IoUM7XSl0utQZbL+9) zl1l;6g&=yVs?ItU8k1AaAkIydhgTD0)8VJ< z?j*QRY6jq0jg)fL6e)b$rTW=?$y_(ER0U~O;jgxVX04%hgi{k+J432Z4#xjcH$p(h zX~faV_4OR}Q@{9l2m4G!tC$bB#%y(Jxl_`B`mTE0jA$>TijK z#xZvS>!xFo{*pF<@d)}qvJ&noEkeGj)l4l?pdf<59!nUV;zhldx-=W_)Wq~Y@ddsBgu~l2OLGtZ6trhn^jvYK#=mtfLZg&ZARlxr@&SIa~wTT4OQVC`+pQB>39)*3`7)u}K+ujYSWVa=A^ zJwnTo);dzT3?;~L;HI&tthlRqsSbOTJJtYi-wr=*X@BqHOBt{T=0y;I*!VUSbT1Qq zbl)_aa+Gwx&rOHjbD7zb`L5%pA^8Av?`Thu;H_a5#oh0+y0%T_DG}EPe0Urfwtx#N zNN>@cHzO0=vL=-t&|@&2R=Z|z`-{?7Qu#LPJZ$OMgViO)8vSj{H>H;^7>P-VowdAD zBk^)uEp-6$Zl}CM^1gfzpMFl&QI2103JPR(B)4Hs!~l&b<1;$?h6o=$mnpnyW;gql zTvqcyj0CoEXNI?8IqB%t8YTj~)}tJ5hXSYVvrw^ZYPMjLS|+6(P48_l$O)E}fH zl5cES`oz87@GFn}zy2ByLq(jo(;Yfm{2Z%Q4Bhf<8r0tVEVq?=Kqd*O$D|^*Pc2T- zUU#!=iinPM=}QZ_!xgH<(EMu%^E6D*)fZ67d+aF(1OR zDLDKKkzSF8KD72t%S_(Y;Cq(qE*6k;0fk7l#Y|Nlu;8*gPipvNRvsUX85d+rwN79##xs3rc(!1{SlaUsyS_Az6PqWB!N5E#HTOKQ$ui|ixfv2;dasCk z2TTrVN&y4-Y!80O&<$DpDgJyXD_?me|FxedPtbFVKYxy_cyl-I z9eeP^uF@OvDM(q%R;Wpx4`-4~HQn(kC2*P$2j(zLbAU`532Kh!7za|7d8T$t%JW;H}NuOsQ{MV->@B>*?$Fv zg!4q~c6I=);2QW=@f7ovOjy%C7fZ86z3jp!6iYPC33L$X#vQa^)&#l@?Hnpljz+>e z?`dw)3Uw)z+I3MMnDW>OXG?SqejOYXzMG+NZBIcPh|0D0=>`R9F6oMV3iGUHM2zq6 zI2mIYXt`EC{&`1bVk|*!E|iFkzjonVvABb4=Z0*Sf9D4CLK+BjjRP7#HL$Z~A4?7| z^kqY9uK&W#wz=qZ$WM8dTm>>@$TsuIwS}@?Ao~1=!x}wH64CaMMq-hE*#$dtTmB>& zV<%9S3rpvLVw&XfU1De#u(R3tugw0O5Vm=*M{#Np=E z{zojUdGHg7_$p>^J&^w-Th~V!cqNyv~zqW(rPEOXOme-x=?E4HeW_ip$-*?Y~^Y zWMB^mWXtFB-jPA~fF-aZA*;JWO^aMTYC7UV1?}!Mh$p{IRo`+dt;n9Y&M6$3_3Dg_ zW2|HE;)+GU)SR6KE%VvF__FjGRcueNWFtV1#k>G3*twi?B@w=Sl2m5fXAZMxK`KaK zOyY0&r<)Q9`4othzaAEky`oBlpmiry2z=hDN6K(D?2?IHCi$5%T+23;(C}NxSwz~m zr`$28At@}CdULhj(q2S6uc&6+%xuMZWrez^lREEtO?HTuXfAQC?{+eBiM*m274{lw8ZqKO7XSPw+4cTYsKpfF^sWXj$U3I(~5 zrBCHXt~Xom{VvU0S!f*nNJBx4P1EKKx@jq0lO98l>ynry{OvN1r=+SU*qA?$&{of@ z4LrKR?}5Azhas1Z;Kk;esUzLHmYs4AYCN_;;?t!XGsl+6C6H^;xa{KNcqZqDQKqj9 zn$NG9>8+{VIg-`P&Ye9G+Sq2ySNV!f>+c$-Os^lSnTnETeP$y)_)$2Dlix>yj)Ly@CDDU-jvNI9`PL{Z@`!lS5=3^4gH{83Kr}8R1@i9 z!%m7kNaUO$!zKrCHfwav7I(im5a$4*??dAb9t*K>)qEMqRRU2RcDqzJp;KpI`(a^GU@tb2xc#22#P=J?U}Crnddt9C7a{hI_2%iUW$ z`e-UuK=AzLEdh@dSB-@rtucW+n}Su$E6Psd-8O`tB*_pnL5Y^YsQoE=uF2Ui(6YLy zw$vu$U7hGfh-M3gnQBxfU$z)2CrQo_Sk)J?x|-O|)e(^Wa_oI~vpP@&CdarG_k+ce z<6){8L8E*(x}liEs2xK$X7X9ae+!Ih$9mO#t7>6EuobfVzwKOWa>l`vX|HQLnLwbJ zGn2~&p_T{n(6Z1uml1ej(`yvfr9ZASP#QSPz4`#q9fmy=gKQNEVGmsm6A|~?n zfl;@Ce^!0V=xU?+lYLZW`2kxB7C) zYm;tI@7`nCk7u7}^-|ql{PtN9maMrFkYkY+o10h~pU5elXPCO94L2RRNz~3vbyG*| zB07}OC_EOIe>J8XL<45{A_$p+xJHdT;ZTtn1k7TK=2CX|=Sco)D4avL?qXXT`?D!Q z4WPf2h{o1MlHhTp-Ti0uuJO0i$4NZ)XSYG(VRwI)J;*r=wo^n%+a@To%uAfM1C_gf zy!?<_qD)S^x%f+R_VlocUTiLm)42mNg7mVKxGL??M7Vk4uCXJb6NzfqW z0z__gRkYMAltkt^a;dm6*&SWr6Dr@e(YJDlGgu~t_ZPHwy-`!qy)RCUiC_Z=KY6@q zH9auKxCOUa;9Z4NR5IpMv56mWXi~WGPaTvZE*;%lO?Rf0*XMlZi)ALqV$Dl4;Vy*Y zfIf`%EJK2BCZwu-8CFm>?cLCOG%L6UsY#)TyCs`&m*di|f}HgjMt(3dPSvFxuMW%; z$?W_LlN~H;V#yG}g1}xXK~kt~(tC=}`hA#~D4nRtINvybJOtf+SXF|$#4dg6^do1H zo@nw%)!owNU4yn}1TICNK`JHeDASks5E=PB$7vlc1+!v;HLo9eIv{3SiBcD$r*pD# zHwmb;1Tl-l3c*%{F%{OGlc&e{spinuv;>_moSD+R)RP9IKsTZcY-qiceA+I-&$p2G zkFcE_^1mPpQfw3T>EB$Hoag@y*#uIB9&_kFk6jY%e(3~_{O9|AO_2Q#m+x6p+0?(*i_s?`i~g zTfpAn-~R9SUy2oNKv@F8jx>Q-XjT(jy0*KiBdS8MwM2r2xOb+2@t;3^HkwV{=!=4}eFxB$|J`0h(oaSntIXy(w?kIbKfS&IodNO~*r*o}*4g|7fAKiH@ zv{9Sb__cxVh(o`E0Wq;>BXC`F)kQrZ`cqL2bP!#|VSLANd!%ScDlUfMEF(4~J2<4H;bvukN%vNMo?Y1Bk`Htj5xb zjmd4pt+BN&x*nwtI!eV-dzz%>s4vB$STl<0VF2tuI|R=^3u+@Pk&4lBgq@MMw(P)$ znvR1xd-eBy?9&Pi(ODB*NQo#X3e*Hv5*rOi3jJeN5*=BWEOXhPVu08dR071{2$#Xg zhfqFvmfGgnZI{L*obWEc&Q0SDbuxZ-KcWdlHi_mN>RM{)mdHx!5VOPlR6XElzq>MB z0b!q-&{a&$_mNa~OeTkK8XBJ&)~ChgKerBG_=Z;wF`;Ii&6|D42m-7P55Zlnm0d)6 zm(mU9xZL-w{Bt_vm=m#yr@!#TXhC4h!63=1h9@T1zo-644@P5A)UMvQ#8~Y^58C^+@Xx8{z#83pq)-`85ExiZf z@4Oyo_t~aDz~@9Q_+PBO@D%M|-k$)xwl0@JmeU{*%0OtbwunCwb~5uuQn3LT`G-`q~(gUVdsNQ5!s9#?oNWm9Pf`6!nw9(c&oLIJw&*+dYX(hUE zr4iZl(my1P+7_>%X`?89@yY~*2a1B{%sF6$m}Y@zr0K{;Itq;HsfwcSwcBi?7`e62 z8g&qJa%u^rJk=DTQ3qAcxlyMpdMuLr87=`Kp0&&G6NTQd!fCpLeRG^-tA}TB|I87Z zJDE7v7Mi7k)X(OQ<{n7OFpl_n9W*Up0i#oEhdhjajPs0}(C&j?Qkt$#dLBaH?@Y{U zhUz`DsT1T5MMg;v?oVt3G-koD5m%|@?4sxMYg_mMZLF}a&HT|8f1Iq1?T73(exqWx zA5ZjIfQBZGGtm=MY?(EocuvuBE#ikr-_FQ7ajk5%H2Xg@Ho>KN$)4KM*l?2nF;iph zNeTgTAe=iF!38GQ#os;>0$f_y&VkH~;M9kKRsQxQrz0mvc!xbtUF+c2XN2Zx6=r6F&2y;gT8AqOTA(tf z%6llYf{y`@>cQnUN+wT-fRL#wVYTcm%P@>7#qUv}^`$~HnC}>jx^gf&u2;m1IT<_t z`sfXe*A`Q1hDWFg$4Ns{3G+^Ioiq2b-N{Pf<>T<`^eg=xJ(ko!30}zW^3{)PG@*I>JeIW!u2R;CkK*s|6v*jm! z$|e;)9qIY`S9OtvS~2RvUabiO6AcTW@O3A-Ju5U(h#b+ax%p_=<8RUb)Ue0q8u@Zp zoHZ+;DzqqRc5ikzPn%B-@w@zhz)L}&X${Sw)?)`-uE}Ui4WSD>HYZsdSq)*=cmM#O zt)yomgwgUmzo95-K^#^0c@KhhLTGNQt~)jjDG3OaLxP!+UFame_}z<_52WrPtj zi*L9XYfei@$&e9&3+fPwgOHlYB5fu9+n}6I+5lGPd^7|uaf_J zwA+keFP0qk3pf|bi>I;%bndFO;3_tDi=bnJ-w8IaCeCywFfs0wZ-}K{E1wg!RX||%iue^qF9RV8LN~nOHz9)9mkCyP)$uQ_gw&auptkF*a8z;RQ+u4I1 zNmNPlmz@E@1~gWwM_;Z{NE?ZC>*`Y7OrS<>F19*14VHBidS`E)%qeJI@J+??&QWn! zYPC*X{q@bx_F0c~Q%b%YEy_-NYlC(yq%HHNyKpLLJIpjlZpIIwuCUPsng!Baan_5> zAN1Dd`dRDHMT6zTiMdO1*5O%Lo$@-QM^`i$+*3=NeZ{tH{!*T3y6`GtDa*y0Pa2$E z-Od&-*)uo=4M0b<@N$UY`a25C*Lmswwu>EA#k_SKsHp#pN{YEMSMMZ&PIG`;{zHEk zTtBP*cum45XJ+C+uSQ}$M9WDp6(n=Kt9mb?Kok$|W(7wb9K;H!^yYd`Ce>*{)#z9t z7{Rh*w7^(2ud83LjUgUmw9UB?j~6$j=t2G_rMUm@=MZ^)M^fhwt#5OXmc;}zb(W5)*guN$~k~WPJRcucRQgAqs zOseqY%^>kGr5qC7U+9x)fg(O(7}+=a3kfBGCJw@5z5E)f>%2yP3xOWUWB9R&vCgf! zDFo|ZOc;5;_J^1qkJAoWapJJdO3b zR=k)2gA4DrsySy^$bCj9)?8+gN2t~(RJWz>{%lfjToM-FGV@BuoY*kjwDV8B&l&qT z+Cz|0q^PbQj*1CQWYwwkma(kKspX(+poBLGdp(|`k=G`b(mc9=_ON*~MXAFHG|0%gIz zz~KLHU7fFEcg~kxT^pLo2Rse#^rNufkjupk!TlGLc5~6|8>g)ez1e@Ry1S%CBzaY# z_SMg!m3=qLfftpp0AAwg=v%I9{pqO8V&L^5T877fw>Flp3l%I*1@;8gj>(=42Cf3$ zw;H&1e2r}B^PfI3`s+gPp4aMs*@dc4bk^b7U9TgocTEFFVDXWEeO`S#-shaRdcXG2 zW0Sg&$*1fZ_(;8bV*Rb1_U}&4ns*j>u)^-hK8f8z#Ygvs{Mfru{_M3&zqiyM!FyP! zRnd&BdF#Gzc)nZo7F}+3bk@dpv!m&zt#>nQF3BwZArahp`bCWxu)qXff2bVx)9tb3 zS>s*nfxXfxy{?;o11ERdpPl}6?%5qJwR73PR_u1;`A3gb=BA(d6!Piy-;J)j4MBG+ z&R7S!Ok!&3mwz{|YR3k&bwIQ_POWX>T^fMQj;&fsS5{3Zu-wvuQU20 z|A-ncz5yB&4#`{>xX-I6@+ipO3Rxouor?M~XAjV|w_kIo9snX~@L zi-SG;H<(uUhB-8GNSbS!tLgh=!agI)@r(iuPaqRR8b%|39@~cgO!f9q%3XF$TV0_;}Vy n^m9Xj^Rc4HE~!uk9t%~^STnQD)z4*}Q$iB}sO24` -- Gitee From e88a021b9baa90a627d2e20b4138a6e8f6077227 Mon Sep 17 00:00:00 2001 From: i-robot Date: Tue, 4 Mar 2025 01:46:23 +0000 Subject: [PATCH 197/333] =?UTF-8?q?!3073=20=E3=80=90=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E3=80=91=E6=AF=94=E5=AF=B9=E5=86=85=E9=83=A8framework=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E4=BC=98=E5=8C=96=20Merge=20pull=20request=20!3073=20?= =?UTF-8?q?from=20yinglinwei/master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/utils.py | 4 ++ .../msprobe/mindspore/compare/ms_compare.py | 15 +++--- .../test_dump_file/dump_no_pt_no_ms.json | 3 ++ .../test_dump_file/ms_dump_no_framework.json | 4 ++ .../test_dump_file/pt_dump_no_framework.json | 4 ++ .../msprobe/test/core_ut/common/test_utils.py | 46 ++++++++++++++++++- .../dump_file/mindspore_data/dump.json | 1 + .../compare/dump_file/pytorch_data/dump.json | 1 + .../mindspore_ut/compare/test_ms_compare.py | 29 ++++++------ .../layer_mapping/mindspore/dump.json | 1 + .../resources/layer_mapping/pytorch/dump.json | 1 + 11 files changed, 85 insertions(+), 24 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/dump_no_pt_no_ms.json create mode 100644 debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/ms_dump_no_framework.json create mode 100644 debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/pt_dump_no_framework.json diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index c06b5b64927..7ec0490168f 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -247,6 +247,10 @@ def md5_find(data): def detect_framework_by_dump_json(file_path): + json_data = load_json(file_path) + framework = json_data.get("framework", None) + if framework in [Const.PT_FRAMEWORK, Const.MS_FRAMEWORK]: + return framework pattern_ms = r'"type":\s*"mindspore' pattern_pt = r'"type":\s*"torch' with FileOpen(file_path, 'r') as file: diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 8509a7f38ad..e0915f8179b 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -22,10 +22,10 @@ import pandas as pd from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.exceptions import FileCheckException -from msprobe.core.common.file_utils import FileOpen, create_directory, load_json, load_npy, load_yaml +from msprobe.core.common.file_utils import create_directory, load_json, load_npy, load_yaml from msprobe.core.common.log import logger from msprobe.core.common.utils import CompareException, check_compare_param, check_configuration_param, \ - check_op_str_pattern_valid, get_dump_mode, set_dump_path + check_op_str_pattern_valid, get_dump_mode, set_dump_path, detect_framework_by_dump_json from msprobe.core.compare.acc_compare import Comparator, ModeConfig from msprobe.core.compare.check import dtype_mapping from msprobe.core.compare.layer_mapping import generate_data_mapping_by_layer_mapping @@ -382,12 +382,11 @@ class MSComparator(Comparator): def check_cross_framework(bench_json_path): - pattern = r'"data_name":\s*"[^"]+\.pt"' - with FileOpen(bench_json_path, 'r') as file: - for line in file: - if re.search(pattern, line): - return True - return False + framework = detect_framework_by_dump_json(bench_json_path) + if framework == Const.PT_FRAMEWORK: + return True + else: + return False def ms_compare(input_param, output_path, **kwargs): diff --git a/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/dump_no_pt_no_ms.json b/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/dump_no_pt_no_ms.json new file mode 100644 index 00000000000..63a062d8ffa --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/dump_no_pt_no_ms.json @@ -0,0 +1,3 @@ +{ + "task": "tensor" +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/ms_dump_no_framework.json b/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/ms_dump_no_framework.json new file mode 100644 index 00000000000..b223c74b231 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/ms_dump_no_framework.json @@ -0,0 +1,4 @@ +{ + "task": "tensor", + "type": "mindspore.float16" +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/pt_dump_no_framework.json b/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/pt_dump_no_framework.json new file mode 100644 index 00000000000..2444ae1fd40 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/core_ut/common/test_dump_file/pt_dump_no_framework.json @@ -0,0 +1,4 @@ +{ + "task": "tensor", + "type": "torch.float16" +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py index 3472ca9018e..61766ed27c0 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -# Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,11 +18,13 @@ import json import os import tempfile from datetime import datetime, timezone +import unittest from unittest import TestCase from unittest.mock import MagicMock, mock_open, patch import OpenSSL import numpy as np +from pathlib import Path from msprobe.core.common.const import Const from msprobe.core.common.file_utils import ( @@ -53,7 +55,8 @@ from msprobe.core.common.utils import (CompareException, recursion_depth_decorator, MsprobeBaseException, check_str_param, - is_json_file) + is_json_file, + detect_framework_by_dump_json) class TestUtils(TestCase): @@ -488,3 +491,42 @@ class TestCheckCrtValid(TestCase): with self.assertRaises(RuntimeError) as context: check_crt_valid(self.cert_file_path) self.assertIn('The SSL certificate is invalid', str(context.exception)) + + +class TestDetectFrameworkByDumpJson(unittest.TestCase): + + @patch('msprobe.core.common.utils.load_json') + def test_valid_pytorch_framework(self, mock_load_json): + mock_load_json.return_value = {"framework": Const.PT_FRAMEWORK} + + result = detect_framework_by_dump_json("dummy_path") + + self.assertEqual(result, Const.PT_FRAMEWORK) + + @patch('msprobe.core.common.utils.load_json') + def test_valid_mindspore_framework(self, mock_load_json): + mock_load_json.return_value = {"framework": Const.MS_FRAMEWORK} + + result = detect_framework_by_dump_json("dummy_path") + + self.assertEqual(result, Const.MS_FRAMEWORK) + + def test_detect_framework_in_file(self): + self.current_dir = Path(__file__).parent + file_path = self.current_dir / "test_dump_file/pt_dump_no_framework.json" + result = detect_framework_by_dump_json(file_path) + self.assertEqual(result, Const.PT_FRAMEWORK) + + self.current_dir = Path(__file__).parent + file_path = self.current_dir / "test_dump_file/ms_dump_no_framework.json" + result = detect_framework_by_dump_json(file_path) + self.assertEqual(result, Const.MS_FRAMEWORK) + + @patch("msprobe.core.common.utils.logger") + def test_detect_framework_exception(self, mock_logger): + self.current_dir = Path(__file__).parent + file_path = self.current_dir / "test_dump_file/dump_no_pt_no_ms.json" + with self.assertRaises(CompareException) as context: + result = detect_framework_by_dump_json(file_path) + self.assertEqual(context.exception.code, CompareException.INVALID_PARAM_ERROR) + mock_logger.error.assert_called_once_with(f"{file_path} must be based on the MindSpore or PyTorch framework.") diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/mindspore_data/dump.json b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/mindspore_data/dump.json index 5b954f6d644..48800c0455c 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/mindspore_data/dump.json +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/mindspore_data/dump.json @@ -1,6 +1,7 @@ { "task": "statistics", "level": "mix", + "framework": "mindspore", "dump_data_dir": null, "data": { "Tensor.__add__.0.forward": { diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/pytorch_data/dump.json b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/pytorch_data/dump.json index 150cbd43b16..b2704185ff1 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/pytorch_data/dump.json +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/dump_file/pytorch_data/dump.json @@ -1,6 +1,7 @@ { "task": "statistics", "level": "mix", + "framework": "pytorch", "dump_data_dir": null, "data": { "Tensor.__add__.0.forward": { diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py index b5cbff9784a..035fe0c53a4 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py @@ -5,6 +5,7 @@ import random import shutil import tempfile import unittest +from unittest.mock import patch import numpy as np import torch @@ -350,21 +351,21 @@ class TestUtilsMethods(unittest.TestCase): finally: shutil.rmtree(data_path) - def test_check_cross_framework(self): - ms_data = { - "data_name": "Cell.model.language_model.encoder.layers.5.input_norm.FusedRMSNorm.forward.0.input.0.npy", - } - pt_data = { - "data_name": "Module.module.module.language_model.encoder.layers.0.input_norm.RMSNorm.forward.0.input.0.pt", - } + @patch('msprobe.mindspore.compare.ms_compare.detect_framework_by_dump_json') + def test_check_cross_framework_valid_pytorch(self, mock_detect_framework): + mock_detect_framework.return_value = Const.PT_FRAMEWORK + + result = check_cross_framework("dummy_path") + + self.assertTrue(result) + + @patch('msprobe.mindspore.compare.ms_compare.detect_framework_by_dump_json') + def test_check_cross_framework_invalid_framework(self, mock_detect_framework): + mock_detect_framework.return_value = Const.MS_FRAMEWORK + + result = check_cross_framework("dummy_path") - def check_data(data): - with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', encoding='utf-8', delete=True) as temp_file: - json.dump(data, temp_file, ensure_ascii=False, indent=4) - temp_file.flush() - return check_cross_framework(temp_file.name) - self.assertFalse(check_data(ms_data)) - self.assertTrue(check_data(pt_data)) + self.assertFalse(result) def test_comapre_process(self): data_path = tempfile.mkdtemp(prefix='dump_data', dir='/tmp') diff --git a/debug/accuracy_tools/msprobe/test/resources/layer_mapping/mindspore/dump.json b/debug/accuracy_tools/msprobe/test/resources/layer_mapping/mindspore/dump.json index b55f9e0699f..153d84e7d11 100644 --- a/debug/accuracy_tools/msprobe/test/resources/layer_mapping/mindspore/dump.json +++ b/debug/accuracy_tools/msprobe/test/resources/layer_mapping/mindspore/dump.json @@ -1,6 +1,7 @@ { "task": "statistics", "level": "mix", + "framework": "mindspore", "dump_data_dir": null, "data": { "Cell.network_with_loss.module.language_model.embedding.word_embeddings.VocabParallelEmbedding.forward.0": { diff --git a/debug/accuracy_tools/msprobe/test/resources/layer_mapping/pytorch/dump.json b/debug/accuracy_tools/msprobe/test/resources/layer_mapping/pytorch/dump.json index d7dd1c0c38e..02239176a9d 100644 --- a/debug/accuracy_tools/msprobe/test/resources/layer_mapping/pytorch/dump.json +++ b/debug/accuracy_tools/msprobe/test/resources/layer_mapping/pytorch/dump.json @@ -1,6 +1,7 @@ { "task": "statistics", "level": "mix", + "framework": "pytorch", "dump_data_dir": null, "data": { "Module.module.module.language_model.embedding.word_embeddings.VocabParallelEmbedding.forward.0": { -- Gitee From f40ed9cfe095a3291704c7327437e6d54891329b Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Wed, 22 Jan 2025 21:04:56 +0800 Subject: [PATCH 198/333] =?UTF-8?q?=E9=9D=99=E6=80=81=E5=9B=BEcell?= =?UTF-8?q?=E7=BA=A7dump?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/const.py | 5 + .../msprobe/docs/02.config_introduction.md | 20 +- .../msprobe/docs/06.data_dump_MindSpore.md | 62 ++- .../msprobe/mindspore/common/const.py | 1 + .../mindspore/debugger/precision_debugger.py | 6 +- .../mindspore/dump/cell_dump_process.py | 450 ++++++++++++++++++ .../mindspore/dump/dump_tool_factory.py | 17 +- .../mindspore/dump/graph_mode_cell_dump.py | 69 +++ .../msprobe/mindspore/task_handler_factory.py | 7 +- .../debugger/test_graph_cell_dump.py | 309 ++++++++++++ 10 files changed, 922 insertions(+), 24 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py create mode 100644 debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index b49b4fffd5e..6824fc8b42b 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -206,12 +206,14 @@ class Const: TORCH_FLOAT32 = "torch.float32" TORCH_BFLOAT16 = "torch.bfloat16" + TYPE = 'type' DTYPE = 'dtype' SHAPE = 'shape' MAX = 'Max' MIN = 'Min' MEAN = 'Mean' NORM = 'Norm' + DATA_NAME = 'data_name' CODE_STACK = 'Code Stack' OP_NAME = 'Op Name' @@ -224,6 +226,9 @@ class Const: SCOPE_SEPARATOR = "/" REPLACEMENT_CHARACTER = "_" + FORWARD_PATTERN = SEP + FORWARD + SEP + BACKWARD_PATTERN = SEP + BACKWARD + SEP + OPTIMIZER = "optimizer" CLIP_GRAD = "clip_grad" END_PREFIX = "end_" diff --git a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md index f134bd45362..5b2e6d50278 100644 --- a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md +++ b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md @@ -10,19 +10,19 @@ ### 1.1 通用配置 -| 参数 | 解释 | 是否必选 | -| ----------------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- | -| task | dump 的任务类型,str 类型。可选参数:
"statistics":仅采集统计信息,默认值;
"tensor":采集统计信息和完全复刻整网的真实数据;
"run_ut":精度预检,仅 PyTorch 场景支持,采集数据时勿选;
"overflow_check":溢出检测;
"free_benchmark":无标杆比对;
"grad_probe":梯度监控;
"structure":仅采集模型结构以及调用栈信息,不采集具体数据。
根据 task 参数取值的不同,可以配置不同场景参数,详见:
[1.2 task 配置为 statistics](#12-task-配置为-statistics),
[1.3 task 配置为 tensor](#13-task-配置为-tensor),
[1.4 task 配置为 run_ut](#14-task-配置为-run_ut),
[1.5 task 配置为 overflow_check](#15-task-配置为-overflow_check),
[1.6 task 配置为 free_benchmark](#16-task-配置为-free_benchmark),
[1.7 task 配置为 grad_probe](#17-task-配置为-grad_probe)。
**配置示例**:"task": "tensor"。 | 否 | -| dump_path | 设置 dump 数据目录路径,str 类型。
**配置示例**:"dump_path": "./dump_path"。 | 是 | -| rank | 指定对某张卡上的数据进行采集,list[Union[int, str]] 类型,默认未配置(表示采集所有卡的数据),应配置元素为 ≥0 的整数或类似"4-6"的字符串,且须配置实际可用的 Rank ID。
PyTorch 场景: Rank ID 从 0 开始计数,最大取值为所有节点可用卡总数-1,若所配置的值大于实际训练所运行的卡的 Rank ID,则 dump 数据为空,比如当前环境 Rank ID 为 0 到 7,实际训练运行 0 到 3 卡,此时若配置 Rank ID 为 4 或不存在的 10 等其他值,dump 数据为空。
MindSpore 场景:所有节点的 Rank ID 均从 0 开始计数,最大取值为每个节点可用卡总数-1,config.json 配置一次 rank 参数对所有节点同时生效。
注意,单卡训练时,rank必须为[],即空列表,不能指定rank。
**配置示例**:"rank": [1, "4-6"]。 | 否 | -| step | 指定采集某个 step 的数据,list[Union[int, str]] 类型。默认未配置,表示采集所有 step 数据。采集特定 step 时,须指定为训练脚本中存在的 step,可逐个配置,也可以指定范围。
**配置示例**:"step": [0, 1 , 2, "4-6"]。 | 否 | -| level | dump 级别,str 类型,根据不同级别采集不同数据。可选参数:
"L0":dump 模块级精度数据,仅 PyTorch 与 MindSpore 动态图场景支持,使用背景详见 [1.1.1 模块级精度数据 dump 说明](#111-模块级精度数据-dump-说明);
"L1":dump API 级精度数据,默认值,仅 PyTorch 与 MindSpore 动态图场景支持;
"L2":dump kernel 级精度数据,PyTorch场景详细介绍见 [PyTorch 场景的 kernel dump 说明](./04.kernel_dump_PyTorch.md);MindSpore场景详细介绍见 [MindSpore 场景的 kernel dump 说明](./28.kernel_dump_MindSpore.md);
"mix":dump module 模块级和 API 级精度数据,即"L0"+"L1",仅 PyTorch 与 MindSpore 动态图场景支持。
"debug":单点保存功能,细节详见[单点保存工具 README](./28.debugger_save_instruction.md)
**配置示例**:"level": "L1"。 | 否 | -| enable_dataloader | 自动控制开关,bool 类型,仅 PyTorch 场景支持。可选参数 true(开启)或 false(关闭),默认为 false。配置为 true 后自动识别 step 参数指定的迭代,并在该迭代执行完成后退出训练,此时 start、stop 和 step 函数可不配置,开启该开关要求训练脚本是通过 torch.utils.data.dataloader 方式加载数据。仅支持 PyTorch 单卡训练使用,分布式训练场景下存在数据 dump 不全问题。 **这个特性下个版本将被废弃** | 否 | +| 参数 | 解释 | 是否必选 | +| ----------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- | +| task | dump 的任务类型,str 类型。可选参数:
"statistics":仅采集统计信息,默认值;
"tensor":采集统计信息和完全复刻整网的真实数据;
"run_ut":精度预检,仅 PyTorch 场景支持,采集数据时勿选;
"overflow_check":溢出检测;
"free_benchmark":无标杆比对;
"grad_probe":梯度监控。
根据 task 参数取值的不同,可以配置不同场景参数,详见:
[1.2 task 配置为 statistics](#12-task-配置为-statistics),
[1.3 task 配置为 tensor](#13-task-配置为-tensor),
[1.4 task 配置为 run_ut](#14-task-配置为-run_ut),
[1.5 task 配置为 overflow_check](#15-task-配置为-overflow_check),
[1.6 task 配置为 free_benchmark](#16-task-配置为-free_benchmark),
[1.7 task 配置为 grad_probe](#17-task-配置为-grad_probe)。
**配置示例**:"task": "tensor"。 | 否 | +| dump_path | 设置 dump 数据目录路径,str 类型。
**配置示例**:"dump_path": "./dump_path"。 | 是 | +| rank | 指定对某张卡上的数据进行采集,list[Union[int, str]] 类型,默认未配置(表示采集所有卡的数据),应配置元素为 ≥0 的整数或类似"4-6"的字符串,且须配置实际可用的 Rank ID。
PyTorch 场景: Rank ID 从 0 开始计数,最大取值为所有节点可用卡总数-1,若所配置的值大于实际训练所运行的卡的 Rank ID,则 dump 数据为空,比如当前环境 Rank ID 为 0 到 7,实际训练运行 0 到 3 卡,此时若配置 Rank ID 为 4 或不存在的 10 等其他值,dump 数据为空。
MindSpore 场景:所有节点的 Rank ID 均从 0 开始计数,最大取值为每个节点可用卡总数-1,config.json 配置一次 rank 参数对所有节点同时生效。静态图 L0 级别 dump 暂不支持指定rank。
注意,单卡训练时,rank必须为[],即空列表,不能指定rank。
**配置示例**:"rank": [1, "4-6"]。 | 否 | +| step | 指定采集某个 step 的数据,list[Union[int, str]] 类型。默认未配置,表示采集所有 step 数据。采集特定 step 时,须指定为训练脚本中存在的 step,可逐个配置,也可以指定范围。
**配置示例**:"step": [0, 1 , 2, "4-6"]。 | 否 | +| level | dump 级别,str 类型,根据不同级别采集不同数据。可选参数:
"L0":dump 模块级精度数据,PyTorch 与 MindSpore 均支持,使用背景详见 [1.1.1 模块级精度数据 dump 说明](#111-模块级精度数据-dump-说明);
"L1":dump API 级精度数据,默认值,仅 PyTorch 与 MindSpore 动态图场景支持;
"L2":dump kernel 级精度数据,PyTorch场景详细介绍见 [PyTorch 场景的 kernel dump 说明](./04.kernel_dump_PyTorch.md);
"mix":dump module 模块级和 API 级精度数据,即"L0"+"L1",仅 PyTorch 与 MindSpore 动态图场景支持。
**配置示例**:"level": "L1"。 | 否 | +| enable_dataloader | 自动控制开关,bool 类型,仅 PyTorch 场景支持。可选参数 true(开启)或 false(关闭),默认为 false。配置为 true 后自动识别 step 参数指定的迭代,并在该迭代执行完成后退出训练,此时 start、stop 和 step 函数可不配置,开启该开关要求训练脚本是通过 torch.utils.data.dataloader 方式加载数据。仅支持 PyTorch 单卡训练使用,分布式训练场景下存在数据 dump 不全问题。 **这个特性下个版本将被废弃** | 否 | | async_dump | 异步 dump 开关,bool 类型。可选参数 true(开启)或 false(关闭),默认为 false。配置为 true 后开启异步 dump,即采集的精度数据会在当前 step 训练结束后统一落盘,训练过程中工具不触发同步操作。由于使用该模式有**显存溢出**的风险,当 task 配置为 tensor 时,即真实数据的异步dump模式,必须配置 [list](#13-task-配置为-tensor) 参数,指定需要 dump 的 tensor 。该模式暂不支持复数类型 tensor
的统计量计算。 | 否 | #### 1.1.1 模块级精度数据 dump 说明 -仅 PyTorch 与 MindSpore 动态图场景支持。 +PyTorch 与 MindSpore 均支持。 大模型场景下,通常不是简单的利用自动迁移能力实现从 GPU 到 NPU 的训练脚本迁移,而是会对 NPU 网络进行一系列针对性的适配,因此,常常会造成迁移后的 NPU 模型存在部分子结构不能与 GPU 原始模型完全对应。模型结构不一致导致 API 调用类型及数量不一致,若直接按照 API 粒度进行精度数据 dump 和比对,则无法完全比对所有的 API。 @@ -46,7 +46,7 @@ MindSpore 静态图场景配置 kernel_name,可以是算子的名称列表,也可以指定算子类型("level": "L2"时不支持),还可以配置算子名称的正则表达式(当字符串符合“name-regex(xxx)”格式时,后台则会将其作为正则表达式。
配置示例:list: ["name-regex(Default/.+)"]
可匹配算子名称以“Default/”开头的所有算子。 data_modedump 数据过滤,str 类型。否 PyTorch 与 MindSpore 动态图场景:支持"all"、"forward"、"backward"、"input"和"output",除"all"外,其余参数可以自由组合。默认为["all"],即保存所有 dump 的数据。
配置示例:"data_mode": ["backward"] (仅保存反向数据)或 "data_mode": ["forward", "input"](仅保存前向的输入数据)。 - MindSpore 静态图场景:仅支持"all"、"input"和"output"参数,且各参数只能单独配置,不支持自由组合。
配置示例:"data_mode": ["all"]。 + MindSpore 静态图场景:L0 级别 dump 仅支持"all"、"forward"和"backward"参数;L2 级别 dump 仅支持"all"、"input"和"output"参数。且各参数只能单独配置,不支持自由组合。
配置示例:"data_mode": ["all"]。 summary_mode控制 dump 文件输出的模式,str 类型,仅 PyTorch 与 MindSpore 动态图场景支持,可选参数:
md5:dump 输出包含 CRC-32 值以及 API 统计信息的 dump.json 文件,用于验证数据的完整性;
statistics:dump 仅输出包含 API 统计信息的 dump.json 文件,默认值。
配置示例:"summary_mode": "md5"。否MindSpore静态图jit_level=O2场景L2级dump,支持上述配置的同时额外支持配置统计项列表,可选统计项为max、min、mean、l2norm,可从中任意选取组合搭配。其中mean、l2norm的结果为float数据格式。
配置示例:"summary_mode": ["max", "min"]。 diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index f7507facd2a..0ee33b44a8c 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -30,8 +30,10 @@ dump 的"tensor"模式采集数据量大小,可以参考[数据量基线](data ## 5. 场景介绍 -### 5.1 静态图场景 -在静态图场景下,msprobe 仅支持 **L2 Level** 的数据采集。 +### 5.1 静态图场景 +在静态图场景下,msprobe 支持 **L0 Level** 和 **L2 Level** 的数据采集。 +- **L0 Level(Cell 级)** :采集 `Cell` 对象的数据,适用于需要分析特定网络模块的情况。 + - **L2 Level(Kernel 级)** :采集底层算子的输入输出数据,适用于深入分析算子级别的精度问题。 采集方式请参见[示例代码 > 静态图场景](#71-静态图场景)。详细介绍请参见[《config.json 配置文件介绍》](./02.config_introduction.md#11-通用配置)中的“level 参数”和[《config.json 配置示例》](./03.config_examples.md#2-mindspore-静态图场景) 中的“MindSpore 静态图场景”。 @@ -110,7 +112,7 @@ stop() **功能说明**:结束一个 step 的数据采集,完成所有数据落盘并更新 dump 参数。在一个 step 结束的位置添加,且必须在 **stop** 函数之后的位置调用。 该函数需要配合 **start** 和 **stop** 函数使用,尽量添加在反向计算代码之后,否则可能会导致反向数据丢失。 -**仅未使用 Model 高阶 API 的动态图场景支持。** +**仅未使用 Model 高阶 API 的动态图和静态图场景支持。** **原型**: @@ -152,7 +154,7 @@ save(variable, name, save_backward=True) ### 6.2 msprobe.mindspore.common.utils.MsprobeStep -**功能说明**:MindSpore Callback类,自动在每个step开始时调用start()接口,在每个step结束时调用stop()、step()接口。实现使用 Model 高阶 API 的动态图场景下 L0、L1、mix 级别的精度数据采集控制,控制粒度为单个 **Step** ,而 PrecisionDebugger.start, PrecisionDebugger.stop 接口的控制粒度任意训练代码段。 +**功能说明**:MindSpore Callback类,自动在每个step开始时调用start()接口,在每个step结束时调用stop()、step()接口。实现使用 Model 高阶 API 的动态图场景下 L0、L1、mix 级别,和静态图场景下 L0级别的精度数据采集控制,控制粒度为单个 **Step** ,而 PrecisionDebugger.start, PrecisionDebugger.stop 接口的控制粒度任意训练代码段。 **原型**: @@ -188,6 +190,54 @@ seed_all(seed=1234, mode=False, rm_dropout=True) ### 7.1 静态图场景 +#### 7.1.1 L0 级别 + +##### 7.1.1.1 未使用 Model 高阶 API + + +```python +import mindspore as ms +ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend") + +from msprobe.mindspore import PrecisionDebugger +debugger = PrecisionDebugger(config_path="./config.json") + +# 模型、损失函数的定义以及初始化等操作 +# ... +model = Network() +# 数据集迭代的地方往往是模型开始训练的地方 +for data, label in data_loader: + debugger.start(model) # 进行 L0 级别下Cell 对象的数据采集时调用 + # 如下是模型每个 step 执行的逻辑 + grad_net = ms.grad(model)(data) + # ... + debugger.step() # 更新迭代数 +``` + +##### 7.1.1.2 使用 Model 高阶 API + + +```python +import mindspore as ms +from mindspore.train import Model +ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend") + +from msprobe.mindspore import PrecisionDebugger +from msprobe.mindspore.common.utils import MsprobeStep +debugger = PrecisionDebugger(config_path="./config.json") + +# 模型、损失函数的定义以及初始化等操作 +# ... + +model = Network() +# 进行 L0 级别下 Cell 对象的数据采集时调用 +debugger.start(model) +trainer = Model(model, loss_fn=loss_fn, optimizer=optimizer, metrics={'accuracy'}) +trainer.train(1, train_dataset, callbacks=[MsprobeStep(debugger)]) +``` + +#### 7.1.2 L2 级别 + ```python import mindspore as ms ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend") @@ -301,7 +351,9 @@ trainer.train(1, train_dataset) ### 8.1 静态图场景 -训练结束后,数据将保存在 `dump_path` 指定的目录下。 +训练结束后,数据将保存在 `dump_path` 指定的目录下。
+L0 级别 dump 的目录结构与动态图场景下目录结构一致。
+L2 级别 dump 的目录结构如下所示: 若jit_level=O2,且使用mindstudio-probe发布包或源码编包时添加了`--include-mod=adump`选项,目录结构示例如下: ``` diff --git a/debug/accuracy_tools/msprobe/mindspore/common/const.py b/debug/accuracy_tools/msprobe/mindspore/common/const.py index 067e783842f..b41dc5ce012 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/const.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/const.py @@ -61,6 +61,7 @@ class Const: DROPOUT_API_NAME_PREFIX = "dropout" GRAPH_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.INPUT, CoreConst.OUTPUT] + GRAPH_CELL_DUMP_DATA_MODE_LIST = [CoreConst.ALL, CoreConst.FORWARD, CoreConst.BACKWARD] HOOK_MS_PREFIX_DICT = { OPS_DATA_PREFIX: OPS_PREFIX, diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 7694d71dd98..a7082d3e569 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -34,6 +34,7 @@ from msprobe.mindspore.ms_config import parse_json_config from msprobe.mindspore.runtime import Runtime from msprobe.mindspore.service import Service from msprobe.mindspore.task_handler_factory import TaskHandlerFactory +from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump try: from msprobe.lib import _msprobe_c @@ -164,7 +165,7 @@ class PrecisionDebugger: else: if not instance.first_start: api_register.api_set_ori_func() - handler = TaskHandlerFactory.create(instance.config) + handler = TaskHandlerFactory.create(instance.config, model) handler.handle() instance.first_start = True @@ -199,6 +200,9 @@ class PrecisionDebugger: _msprobe_c._PrecisionDebugger().step() if instance.task in PrecisionDebugger.task_not_need_service: return + if instance.config.execution_mode != MsConst.PYNATIVE_MODE and instance.config.level == MsConst.CELL: + GraphModeCellDump.step() + return if instance.service: instance.service.step() HOOKCell.cell_count = defaultdict(int) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py new file mode 100644 index 00000000000..a21c4590b80 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py @@ -0,0 +1,450 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +import re +import json +import atexit +from multiprocessing import Pool + +import numpy as np +import mindspore as ms +from mindspore import nn, ops + +from msprobe.mindspore.common.log import logger +from msprobe.core.common.const import Const as CoreConst +from msprobe.core.common.file_utils import load_npy, save_json, remove_path +from msprobe.core.common.const import FileCheckConst + + +CONSTRUCT_FILE_NAME = "construct.json" +DEFAULT_RANK_DIR = "rank0" +KEY_LAYERS = "layers" +construct = {} +cell_list = [] +KEY_SIDE_EFFECT = "side_effect_io" +td = ops.TensorDump() +td_in = ops.TensorDump("in") +td.add_prim_attr(KEY_SIDE_EFFECT, False) +td_in.add_prim_attr(KEY_SIDE_EFFECT, False) +np_ms_dtype_dict = { + "bool": ms.bool_, + "int8": ms.int8, + "byte": ms.byte, + "int16": ms.int16, + "short": ms.short, + "int32": ms.int32, + "intc": ms.intc, + "int64": ms.int64, + "intp": ms.intp, + "uint8": ms.uint8, + "ubyte": ms.ubyte, + "uint16": ms.uint16, + "ushort": ms.ushort, + "uint32": ms.uint32, + "uintc": ms.uintc, + "uint64": ms.uint64, + "uintp": ms.uintp, + "float16": ms.float16, + "half": ms.half, + "float32": ms.float32, + "single": ms.single, + "float64": ms.float64, + "double": ms.double, + "bfloat16": ms.bfloat16, + "complex64": ms.complex64, + "complex128": ms.complex128 +} + + +def generate_file_path(dump_path, cell_prefix, suffix, io_type, index): + step_path = os.path.join(dump_path, "{step}") + rank_path = os.path.join(step_path, "{rank}") + data_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA) + file_name = CoreConst.SEP.join([cell_prefix, suffix, io_type, str(index)]) + return os.path.join(data_path, file_name) + + +def partial_func(func, dump_path, cell_prefix, index, io_type): + def newfunc(*args, **kwargs): + return func(dump_path, cell_prefix, index, io_type, *args, **kwargs) + return newfunc + + +def clip_gradient(dump_path, cell_prefix, index, io_type, dx): + if io_type == CoreConst.OUTPUT: + temp = td(generate_file_path(dump_path, cell_prefix, CoreConst.BACKWARD, io_type, index), dx) + dx = ops.depend(dx, temp) + if io_type == CoreConst.INPUT: + temp = td_in(generate_file_path(dump_path, cell_prefix, CoreConst.BACKWARD, io_type, index), dx) + dx = ops.depend(dx, temp) + return dx + + +def cell_construct_wrapper(func, self): + def new_construct(self, *args, **kwargs): + new_args = [] + out_list = [] + + index = 0 + item = None + # The inputs of the cell. + for index, item in enumerate(args): + if self.data_mode == "backward" or self.data_mode == "all": + if ops.is_tensor(item): + item = self.output_clips[index](item) + if self.data_mode == "forward" or self.data_mode == "all": + if ops.is_tensor(item): + temp = td_in(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.INPUT, index), item) + item = ops.depend(item, temp) + new_args.append(item) + + out = func(*new_args, **kwargs) + + # The outputs of the cell. + if isinstance(out, tuple): + for index, item in enumerate(out): + if self.data_mode == "backward" or self.data_mode == "all": + if ops.is_tensor(item): + item = self.input_clips[index](item) + if self.data_mode == "forward" or self.data_mode == "all": + if ops.is_tensor(item): + temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), item) + item = ops.depend(item, temp) + out_list.append(item) + else: + out_list.append(item) + out_list = tuple(out_list) + return out_list + else: + if self.data_mode == "backward" or self.data_mode == "all": + out = self.input_clips[0](out) + if self.data_mode == "forward" or self.data_mode == "all": + if ops.is_tensor(out): + temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), out) + out = ops.depend(out, temp) + return out + + return new_construct.__get__(self, type(self)) + + +# 获取目录下所有文件名并根据TensorDump落盘自增id从小到大排序 +def sort_filenames(path): + filenames = os.listdir(path) + id_pattern = re.compile(rf'{CoreConst.REPLACEMENT_CHARACTER}(\d+){CoreConst.NUMPY_SUFFIX}$') + filenames.sort(key=lambda x: int(id_pattern.findall(x)[0])) + return filenames + + +# 删除重复dump的文件:自定义文件名相同,并且数据相同 +def del_same_file(path, filenames): + result_list = [] + seen_prefixes = {} + for current_filename in filenames: + parts = current_filename.rsplit(CoreConst.REPLACEMENT_CHARACTER, 1) + prefix = parts[0] + if prefix not in seen_prefixes: + result_list.append(current_filename) + seen_prefixes[prefix] = current_filename + else: + current_file_path = os.path.join(path, current_filename) + current_file = load_npy(current_file_path) + prev_filename = seen_prefixes[prefix] + prev_file_path = os.path.join(path, prev_filename) + prev_file = load_npy(prev_file_path) + if np.array_equal(current_file, prev_file): + remove_path(current_file_path) + logger.warning(f"{current_file_path} is deleted!") + else: + result_list.append(current_filename) + return result_list + + +def rename_filename(path): + filenames = sort_filenames(path) + filenames = del_same_file(path, filenames) + + filename_dict = {} + for filename in filenames: + name_field = filename.rsplit(CoreConst.REPLACEMENT_CHARACTER, 1)[0] + + if name_field in filename_dict: + filename_dict[name_field] += 1 + else: + filename_dict[name_field] = 0 + + cell_index = filename_dict[name_field] + + # 修改文件名,增加重复调用Cell的序号 + if CoreConst.FORWARD_PATTERN in filename: + #Format: Cell.{cell_name}.{class_name}.{forward/backward}.{number}.{input/output}.{index}_{dtype}_{id}.npy + newFileName = filename.replace(CoreConst.FORWARD_PATTERN, CoreConst.FORWARD_PATTERN + str(cell_index) + CoreConst.SEP) + if CoreConst.BACKWARD_PATTERN in filename: + newFileName = filename.replace(CoreConst.BACKWARD_PATTERN, CoreConst.BACKWARD_PATTERN + str(cell_index) + CoreConst.SEP) + os.rename(os.path.join(path, filename), os.path.join(path, newFileName)) + logger.info(f"==========The rename_filename phase is Finished!==========") + + +# Extract the field between the first "." and the third to last ".", i.e. {cell_name} +def get_cell_name(str): + parts = str.split(CoreConst.SEP) + if len(parts) < 4: + return None + start_index = 1 + end_index = len(parts) - 3 + return CoreConst.SEP.join(parts[start_index:end_index]) + + +# Extract the field between the last "." and the second to last ".", i.e. {data_made} +def get_data_mode(str): + last_dot_index = str.rfind(CoreConst.SEP) + second_last_dot_index = str.rfind(CoreConst.SEP, 0, last_dot_index) + data_mode = str[second_last_dot_index + 1:last_dot_index] + return data_mode + + +# 判断二者之间是否存在父子关系 +def check_relation(cell_name, parent_cell_name): + layers_pattern = rf"{CoreConst.SEP}{KEY_LAYERS}{CoreConst.SEP}\d+$" + last_dot_index = cell_name.rfind(CoreConst.SEP) + if last_dot_index != -1: + # 如果cell_name最后一个'.'之前的字段等于parent_cell_name,则判定存在父子关系 + sub_cell_name = cell_name[:last_dot_index] + if sub_cell_name == parent_cell_name: + return True + elif re.search(layers_pattern, cell_name): + # 如果cell_name以".layer.{layer_id}"结尾,且去掉该字段后等于parent_cell_name,则判定存在父子关系 + sub_cell_name = re.sub(layers_pattern, '', cell_name) + if sub_cell_name == parent_cell_name: + return True + return False + + +def get_construct(cell_list_input): + for cell in cell_list_input: + cell_name = get_cell_name(cell) + cell_data_mode = get_data_mode(cell) + found_flag = False + for parent_cell in cell_list_input: + parent_cell_name = get_cell_name(parent_cell) + parent_data_mode = get_data_mode(parent_cell) + has_relation = check_relation(cell_name, parent_cell_name) + if has_relation and parent_data_mode == cell_data_mode: + construct.update({cell: parent_cell}) + found_flag = True + break + if not found_flag: + construct.update({cell: None}) + + +def generate_construct(path): + global construct + filenames = sort_filenames(path) + + # 提取文件名中Cell.{cell_name}.{class_name}.{data_mode}.{重复调用此cell的序号}字段,并存入cell_list + for filename in filenames: + point_position = 3 + mid_field = filename.rsplit(CoreConst.SEP, point_position)[0] + if CoreConst.INPUT in filename: + if mid_field in cell_list: + cell_list.remove(mid_field) + cell_list.append(mid_field) + else: + if mid_field not in cell_list: + index = filenames.index(filename) + output_field = mid_field + CoreConst.OUTPUT + find_flag = False + for filename_other in cell_list[index + 1:]: + if output_field in filename_other: + find_flag = True + if find_flag is False: + cell_list.append(mid_field) + + get_construct(cell_list) + + # 生成JSON文件 + rank_dir = os.path.dirname(path) + json_path = os.path.join(rank_dir, CONSTRUCT_FILE_NAME) + save_json(json_path, construct, indent=1) + + # 清空'construct'继续处理下一个路径下的数据 + construct = {} + logger.info(f"Construct data saved to {json_path}") + + +def process_file(file_path): + try: + # 读取.npy文件内容 + npy_content = load_npy(file_path) + logger.info(f"Loaded {file_path}: shape is {npy_content.shape}, dtype is {npy_content.dtype}") + + # 文件名举例:Cell.network._backbone.loss.CrossEntropyLoss.forward.0.input.0_float32_165.npy + parts = os.path.basename(file_path).split(CoreConst.SEP) + data_dtype = "" + # 获取0_float32_165或者0_in_float32_165中的float32 + data_dtype_list = parts[-2].split('_') + if len(data_dtype_list) > 1: + data_dtype = data_dtype_list[-2] + # op_name是Cell.network._backbone.loss.CrossEntropyLoss.forward.0 + op_name = CoreConst.SEP.join(parts[:-3]) + ms_dtype = np_ms_dtype_dict.get(data_dtype) + if ms_dtype is None: + logger.warning(f"Get dtype None from file {file_path}") + tensor_json = { + CoreConst.TYPE: 'mindspore.Tensor', + CoreConst.DTYPE: str(ms_dtype), + CoreConst.SHAPE: list(npy_content.shape), + CoreConst.MAX: npy_content.max().item(), + CoreConst.MIN: npy_content.min().item(), + CoreConst.MEAN: npy_content.mean().item(), + CoreConst.NORM: np.linalg.norm(npy_content).item(), + CoreConst.DATA_NAME: os.path.basename(file_path) + } + + # 根据文件名的最后一个部分(输入或输出)确定是添加到input_args还是output + if parts[-3] == CoreConst.INPUT: + return op_name, CoreConst.INPUT_ARGS, tensor_json + elif parts[-3] == CoreConst.OUTPUT: + return op_name, CoreConst.OUTPUT, tensor_json + else: + return None, None, None + + except Exception as e: + logger.error(f"Error reading {file_path}: {e}") + return None, None, None + + +def custom_sort(item, key_to_index): + key = item[0] + return key_to_index.get(key, float('inf')) + + +def generate_dump_info(path): + if not os.path.exists(path): + logger.error("The provided path does not exist.") + return + + dump_data = {"task": "tensor", "level": "L0", "dump_data_dir": path, "data": {}} + + with Pool(processes=10) as pool: + file_paths = [] + for root, _, files in os.walk(path): + for file in files: + if file.endswith(FileCheckConst.NUMPY_SUFFIX): + file_paths.append((os.path.join(root, file),)) + file_paths.sort() + results = pool.starmap(process_file, file_paths) + + # 收集结果 + for op_name, key, tensor_json in results: + if op_name: + if op_name not in dump_data.get(CoreConst.DATA, {}): + dump_data.get(CoreConst.DATA, {})[op_name] = {CoreConst.INPUT_ARGS: [], + CoreConst.INPUT_KWARGS: {}, + CoreConst.OUTPUT: []} + if key not in dump_data.get(CoreConst.DATA, {}).get(op_name, {}): + dump_data.get(CoreConst.DATA, {}).get(op_name, {})[key] = [] + dump_data.get(CoreConst.DATA, {}).get(op_name, {}).get(key, []).append(tensor_json) + + # 根据cell_list排序 + data_dict = dump_data.get(CoreConst.DATA, {}) + key_to_index = {key: index for index, key in enumerate(cell_list)} + sorted_data_dict = dict(sorted(data_dict.items(), key=lambda item: custom_sort(item, key_to_index))) + dump_data[CoreConst.DATA] = sorted_data_dict + + # 将数据写入dump.json + json_path = os.path.join(os.path.dirname(path), 'dump.json') + save_json(json_path, dump_data, indent=1) + + logger.info(f"Dump data saved to {json_path}") + + +def generate_stack_info(path): + if not os.path.exists(path): + logger.error("The provided path does not exist.") + return + + stack_data = {} + file_paths = [] + # 传入的path为工具生成的./dump_tensor_data,内容为npy文件 + for root, _, files in os.walk(path): + for file in files: + if file.endswith(FileCheckConst.NUMPY_SUFFIX): + file_paths.append(os.path.join(root, file)) + file_paths.sort() + for file_path in file_paths: + # 文件名举例:Cell.network._backbone.loss.CrossEntropyLoss.forward.0.input.0_float32_165.npy + parts = os.path.basename(file_path).split(CoreConst.SEP) + # op_name是Cell.network._backbone.loss.CrossEntropyLoss.forward.0 + op_name = CoreConst.SEP.join(parts[:-3]) + stack_data.update({op_name: []}) + + # 将数据写入stack.json + json_path = os.path.join(os.path.dirname(path), 'stack.json') + save_json(json_path, stack_data, indent=1) + + logger.info(f"Stack data saved to {json_path}") + + +def process(dump_path): + logger.info(f"==========Start processing data that has already been stored on the disk!==========") + rank_id = os.environ.get('RANK_ID') + rank_dir = DEFAULT_RANK_DIR + if rank_id is not None: + rank_dir = CoreConst.RANK + str(rank_id) + + step_dir_list = os.listdir(dump_path) + for step_dir in step_dir_list: + step_path = os.path.join(dump_path, step_dir) + rank_path = os.path.join(step_path, rank_dir) + npy_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA) + rename_filename(npy_path) + generate_construct(npy_path) + generate_dump_info(npy_path) + generate_stack_info(npy_path) + + +def start(net=None, dump_path="./", data_mode=CoreConst.ALL): + if net is None: + return + + black_list = ["grad_reducer", ""] + for name, cell in net.cells_and_names(): + class_name = cell.__class__.__name__ + # 跳过黑名单cell + if name in black_list: + logger.info(f"Cell {name}.{class_name} is skipped!") + continue + # 跳过框架内部的cell + if class_name.startswith(CoreConst.REPLACEMENT_CHARACTER): + logger.info(f"Cell {name}.{class_name} is skipped!") + continue + else: + #Format: Cell.{cell_name}.{class_name} + cell.cell_prefix = CoreConst.SEP.join([CoreConst.CELL, name, cell.__class__.__name__]) + + cell.construct = cell_construct_wrapper(cell.construct, cell) + logger.info(f"Cell {name}: construct function is wrapped!") + cell.dump_path = dump_path + cell.data_mode = data_mode + cell.input_clips = [] + cell.output_clips = [] + # It is assumed that each cell has a maximum of 50 outputs and 50 inputs. + for i in range(50): + cell.input_clips.append(ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, CoreConst.INPUT))) + cell.output_clips.append(ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, CoreConst.OUTPUT))) + + logger.info(f"==========The cell_dump_process_start phase is Finished!==========") + atexit.register(process, dump_path=dump_path) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py index 0ca63b4a84a..c0933d20aaa 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py @@ -17,13 +17,14 @@ from msprobe.mindspore.common.const import Const from msprobe.mindspore.debugger.debugger_config import DebuggerConfig from msprobe.mindspore.dump.kernel_graph_dump import KernelGraphDump from msprobe.mindspore.dump.kernel_kbyk_dump import KernelKbykDump +from msprobe.mindspore.dump.graph_mode_cell_dump import GraphModeCellDump class DumpToolFactory: tools = { Const.CELL: { - Const.GRAPH_KBYK_MODE: None, - Const.GRAPH_GE_MODE: None, + Const.GRAPH_KBYK_MODE: GraphModeCellDump, + Const.GRAPH_GE_MODE: GraphModeCellDump, Const.PYNATIVE_MODE: None }, Const.API: { @@ -39,9 +40,13 @@ class DumpToolFactory: } @staticmethod - def create(config: DebuggerConfig): - if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_DATA_MODE_LIST: - raise Exception("data_mode must be one of all, input, output.") + def create(config: DebuggerConfig, model): + if config.level == Const.CELL: + if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_CELL_DUMP_DATA_MODE_LIST: + raise Exception("data_mode must be one of all, forward, backward.") + else: + if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_DATA_MODE_LIST: + raise Exception("data_mode must be one of all, input, output.") tool = DumpToolFactory.tools.get(config.level) if not tool: raise Exception("Valid level is needed.") @@ -49,4 +54,4 @@ class DumpToolFactory: if not tool: raise Exception(f"Data dump is not supported in {config.execution_mode} mode " f"when dump level is {config.level}.") - return tool(config) + return tool(config, model) if tool == GraphModeCellDump else tool(config) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py new file mode 100644 index 00000000000..e32866868f4 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py @@ -0,0 +1,69 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from msprobe.mindspore.common.log import logger +from msprobe.mindspore.debugger.debugger_config import DebuggerConfig +import mindspore as ms +from mindspore._c_expression import _tensordump_set_step +from mindspore.ops.primitive import _run_op +from mindspore import hal, ops +import msprobe.mindspore.dump.cell_dump_process as cellDumper +from msprobe.mindspore.common.const import Const + + +class GraphModeCellDump: + def __init__(self, config: DebuggerConfig, model): + self.net = model + self.white_list = [] + self.black_list = [] + self.dump_path = config.dump_path if config.dump_path else "./" + self.rank = config.rank + self.step = config.step + self.scope = config.scope + self.list = config.list + self.data_mode = config.data_mode + self.file_format = config.file_format + self.check_config() + self.set_step() + + @staticmethod + def step(): + hal.synchronize() + temp_tensor = ms.Tensor([1], dtype=ms.float32) + step_flag = "" + _run_op(ops.TensorDump(), "TensorDump", (step_flag, temp_tensor)) + ops.tensordump(step_flag, temp_tensor) + + def check_config(self): + if self.rank != []: + raise Exception("In graph mode, cell dump does not currently support specifying rank.") + if self.scope != []: + raise Exception("In graph mode, cell dump does not currently support specifying scope.") + if self.list != []: + raise Exception("In graph mode, cell dump does not currently support specifying list.") + if len(self.data_mode) != 1 or self.data_mode[0] not in Const.GRAPH_CELL_DUMP_DATA_MODE_LIST: + raise Exception("In graph mode and cell dump, data_mode must be one of all, forword, backword.") + if self.file_format != []: + logger.warning("In graph mode, cell dump does not currently support specifying file_format. The file will be stored in npy format.") + if not self.net: + raise Exception("The model is empty and cell dump is not enabled.") + return True + + def set_step(self): + _tensordump_set_step(self.step) + + def handle(self): + os.environ['MS_JIT_MODULES'] = 'msprobe' + cellDumper.start(net=self.net, dump_path=self.dump_path, data_mode=self.data_mode[0]) diff --git a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py index a9cb5e6dd40..5cfbbaeb4a4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py @@ -29,11 +29,14 @@ class TaskHandlerFactory: } @staticmethod - def create(config: DebuggerConfig): + def create(config: DebuggerConfig, model): task = TaskHandlerFactory.tasks.get(config.task) if not task: raise Exception("Valid task is needed.") - handler = task.create(config) + if task == DumpToolFactory: + handler = task.create(config, model) + else: + handler = task.create(config) if not handler: raise Exception("Can not find task handler") return handler diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py new file mode 100644 index 00000000000..b111e644378 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py @@ -0,0 +1,309 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import re +import unittest +from unittest.mock import MagicMock, patch + +import mindspore as ms +from mindspore import ops + +from msprobe.core.common.const import Const as CoreConst +from msprobe.mindspore.dump.cell_dump_process import generate_file_path +from msprobe.mindspore.dump.cell_dump_process import partial_func, clip_gradient +from msprobe.mindspore.dump.cell_dump_process import cell_construct_wrapper +from msprobe.mindspore.dump.cell_dump_process import rename_filename, sort_filenames, del_same_file +from msprobe.mindspore.dump.cell_dump_process import check_relation + + +class TestGenerateFilePath(unittest.TestCase): + def setUp(self): + self.dump_path = "/path" + self.cell_prefix = "Cell.network._backbone.LlamaForCausalLM" + self.suffix = "forward" + self.io_type = "input" + self.index = 0 + + def test_generate_file_path(self): + expected_path = os.path.join( + self.dump_path, + "{step}", + "{rank}", + CoreConst.DUMP_TENSOR_DATA, + CoreConst.SEP.join([self.cell_prefix, self.suffix, self.io_type, str(self.index)]) + ) + result = generate_file_path(self.dump_path, self.cell_prefix, self.suffix, self.io_type, self.index) + self.assertEqual(result, expected_path) + + +class TestPartialFunc(unittest.TestCase): + + @patch('msprobe.mindspore.dump.cell_dump_process.CoreConst') + @patch('msprobe.mindspore.dump.cell_dump_process.td') + @patch('msprobe.mindspore.dump.cell_dump_process.td_in') + @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') + @patch('msprobe.mindspore.dump.cell_dump_process.ops.depend') + def test_clip_gradient_output(self, mock_depend, mock_generate_file_path, mock_td_in, mock_td, mock_CoreConst): + mock_CoreConst.OUTPUT = "output" + mock_CoreConst.BACKWARD = "backward" + mock_generate_file_path.return_value = "mock_path" + mock_td.return_value = "temp_tensor" + mock_depend.return_value = "dependent_tensor" + + result = clip_gradient("dump_path", "cell_prefix", 0, "output", "dx") + + mock_generate_file_path.assert_called_with("dump_path", "cell_prefix", "backward", "output", 0) + mock_td.assert_called_with("mock_path", "dx") + mock_depend.assert_called_with("dx", "temp_tensor") + self.assertEqual(result, "dependent_tensor") + + @patch('msprobe.mindspore.dump.cell_dump_process.CoreConst') + @patch('msprobe.mindspore.dump.cell_dump_process.td') + @patch('msprobe.mindspore.dump.cell_dump_process.td_in') + @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') + @patch('msprobe.mindspore.dump.cell_dump_process.ops.depend') + def test_clip_gradient_input(self, mock_depend, mock_generate_file_path, mock_td_in, mock_td, mock_CoreConst): + mock_CoreConst.INPUT = "input" + mock_CoreConst.BACKWARD = "backward" + mock_generate_file_path.return_value = "mock_path" + mock_td_in.return_value = "temp_tensor" + mock_depend.return_value = "dependent_tensor" + + result = clip_gradient("dump_path", "cell_prefix", 0, "input", "dx") + + mock_generate_file_path.assert_called_with("dump_path", "cell_prefix", "backward", "input", 0) + mock_td_in.assert_called_with("mock_path", "dx") + mock_depend.assert_called_with("dx", "temp_tensor") + self.assertEqual(result, "dependent_tensor") + + def test_partial_func(self): + def mock_func(dump_path, cell_prefix, index, io_type, *args, **kwargs): + return dump_path, cell_prefix, index, io_type, args, kwargs + + new_func = partial_func(mock_func, "dump_path", "cell_prefix", 0, "io_type") + result = new_func("arg1", "arg2", kwarg1="value1") + + self.assertEqual(result, ("dump_path", "cell_prefix", 0, "io_type", ("arg1", "arg2"), {'kwarg1': 'value1'})) + + +class TestCellWrapperProcess(unittest.TestCase): + + @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') + @patch('msprobe.mindspore.dump.cell_dump_process.td') + @patch('msprobe.mindspore.dump.cell_dump_process.td_in') + def test_cell_construct_wrapper(self, mock_td_in, mock_td, mock_generate_file_path): + # Mock the generate_file_path function + mock_generate_file_path.return_value = "mock_path" + + # Mock the TensorDump operations + mock_td.return_value = MagicMock() + mock_td_in.return_value = MagicMock() + + # Create a mock cell with necessary attributes + mock_cell = MagicMock() + mock_cell.data_mode = "all" + mock_cell.dump_path = "mock_dump_path" + mock_cell.cell_prefix = "mock_cell_prefix" + mock_cell.input_clips = [MagicMock() for _ in range(50)] + mock_cell.output_clips = [MagicMock() for _ in range(50)] + + # Define a mock function to wrap + def mock_func(*args, **kwargs): + return args + + # Wrap the mock function using cell_construct_wrapper + wrapped_func = cell_construct_wrapper(mock_func, mock_cell) + + # Create mock inputs + mock_input = ms.Tensor([1, 2, 3]) + mock_args = (mock_input,) + + # Call the wrapped function + result = wrapped_func(mock_cell, *mock_args) + + # Check if the result is as expected + self.assertEqual(result, mock_args) + + # Verify that the TensorDump operations were called + mock_td_in.assert_called() + mock_td.assert_called() + + @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') + @patch('msprobe.mindspore.dump.cell_dump_process.td') + @patch('msprobe.mindspore.dump.cell_dump_process.td_in') + def test_cell_construct_wrapper_with_tuple_output(self, mock_td_in, mock_td, mock_generate_file_path): + # Mock the generate_file_path function + mock_generate_file_path.return_value = "mock_path" + + # Mock the TensorDump operations + mock_td.return_value = MagicMock() + mock_td_in.return_value = MagicMock() + + # Create a mock cell with necessary attributes + mock_cell = MagicMock() + mock_cell.data_mode = "all" + mock_cell.dump_path = "mock_dump_path" + mock_cell.cell_prefix = "mock_cell_prefix" + mock_cell.input_clips = [MagicMock() for _ in range(50)] + mock_cell.output_clips = [MagicMock() for _ in range(50)] + + # Define a mock function to wrap + def mock_func(*args, **kwargs): + return (args[0], args[0]) + + # Wrap the mock function using cell_construct_wrapper + wrapped_func = cell_construct_wrapper(mock_func, mock_cell) + + # Create mock inputs + mock_input = ms.Tensor([1, 2, 3]) + mock_args = (mock_input,) + + # Call the wrapped function + result = wrapped_func(mock_cell, *mock_args) + + # Check if the result is as expected + self.assertEqual(result, (mock_input, mock_input)) + + # Verify that the TensorDump operations were called + mock_td_in.assert_called() + mock_td.assert_called() + + +class TestSortFilenames(unittest.TestCase): + + @patch('os.listdir') + def test_sort_filenames(self, mock_listdir): + # Mock the list of filenames returned by os.listdir + mock_listdir.return_value = [ + 'Cell.network._backbone.model.LlamaModel.backward.0.input.0_float16_177.npy', + 'Cell.network._backbone.model.LlamaModel.forward.0.input.0_in_int32_1.npy', + 'Cell.network._backbone.model.LlamaModel.forward.0.output.10_float16_165.npy', + 'Cell.network._backbone.model.norm_out.LlamaRMSNorm.backward.0.input.0_float16_178.npy' + ] + + # Mock the CoreConst values + CoreConst.REPLACEMENT_CHARACTER = '_' + CoreConst.NUMPY_SUFFIX = '.npy' + + # Expected sorted filenames + expected_sorted_filenames = [ + 'Cell.network._backbone.model.LlamaModel.forward.0.input.0_in_int32_1.npy', + 'Cell.network._backbone.model.LlamaModel.forward.0.output.10_float16_165.npy', + 'Cell.network._backbone.model.LlamaModel.backward.0.input.0_float16_177.npy', + 'Cell.network._backbone.model.norm_out.LlamaRMSNorm.backward.0.input.0_float16_178.npy' + ] + + # Call the function + sorted_filenames = sort_filenames('/mock/path') + + # Assert the filenames are sorted correctly + self.assertEqual(sorted_filenames, expected_sorted_filenames) + + +class TestRenameFilename(unittest.TestCase): + + @patch('msprobe.mindspore.dump.cell_dump_process.sort_filenames') + @patch('msprobe.mindspore.dump.cell_dump_process.del_same_file') + @patch('msprobe.mindspore.dump.cell_dump_process.os.rename') + def test_rename_filename(self, mock_rename, mock_del_same_file, mock_sort_filenames): + # Mock the constants + CoreConst.REPLACEMENT_CHARACTER = '_' + CoreConst.FORWARD_PATTERN = '.forward.' + CoreConst.BACKWARD_PATTERN = '.backward.' + CoreConst.SEP = '.' + + # Mock the filenames + mock_sort_filenames.return_value = [ + "Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_101.npy", + "Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_102.npy", + "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.0_float32_103.npy", + "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.1_bool_104.npy", + "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.output.1_bool_105.npy", + "Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_111.npy", + "Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_112.npy", + ] + mock_del_same_file.return_value = [mock_sort_filenames.return_value] + + # Call the function + rename_filename('/mock/path') + + # Check if os.rename was called with the correct arguments + mock_rename.assert_any_call( + '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_101.npy', + '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.0.input_0_int32_101.npy' + ) + mock_rename.assert_any_call( + '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_102.npy', + '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.0.output_0_float32_102.npy' + ) + mock_rename.assert_any_call( + '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.0_float32_103.npy', + '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.input_0_float32_103.npy' + ) + mock_rename.assert_any_call( + '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.1_bool_104.npy', + '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.input_1_bool_104.npy' + ) + mock_rename.assert_any_call( + '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.output.1_bool_105.npy', + '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.output_1_bool_105.npy' + ) + mock_rename.assert_any_call( + '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_111.npy', + '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.1.input_0_int32_111.npy' + ) + mock_rename.assert_any_call( + '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_112.npy', + '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.1.output_0_float32_112.npy' + ) + + # Mock the filenames + mock_sort_filenames.return_value = [] + mock_del_same_file.return_value = [] + + # Call the function + rename_filename('/mock/path') + + # Check if os.rename was not called + mock_rename.assert_not_called() + + +class TestCheckRelation(unittest.TestCase): + + def setUp(self): + CoreConst.SEP = '.' + global KEY_LAYERS + KEY_LAYERS = "layers" + + def test_direct_parent_child_relation(self): + self.assertTrue(check_relation("network._backbone", "network")) + self.assertTrue(check_relation("network._backbone.model", "network._backbone")) + + def test_no_relation(self): + self.assertFalse(check_relation("network._backbone", "network.loss")) + self.assertFalse(check_relation("network._backbone.model", "network.loss")) + + def test_layer_pattern_relation(self): + self.assertTrue(check_relation("network.model.layers.0", "network.model")) + self.assertTrue(check_relation("network._backbone.model.layers.1", "network._backbone.model")) + + def test_no_layer_pattern_relation(self): + self.assertFalse(check_relation("network.model.layers.0", "network.loss")) + self.assertFalse(check_relation("network._backbone.model.layers.1", "network._backbone.model.layers")) + + def test_edge_cases(self): + self.assertFalse(check_relation("", "network")) + self.assertFalse(check_relation("network.layer1", "")) + self.assertFalse(check_relation("", "")) -- Gitee From 3ac2ba51aefc8800092cb5f4b064f5a31389ba73 Mon Sep 17 00:00:00 2001 From: fuchao <1501312275@qq.com> Date: Mon, 17 Feb 2025 18:18:33 +0800 Subject: [PATCH 199/333] =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=9D=99=E6=80=81?= =?UTF-8?q?=E5=9B=BEcell=E7=BA=A7dump=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mindspore/dump/cell_dump_process.py | 50 +++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py index a21c4590b80..a9121e14354 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py @@ -133,7 +133,7 @@ def cell_construct_wrapper(func, self): out = self.input_clips[0](out) if self.data_mode == "forward" or self.data_mode == "all": if ops.is_tensor(out): - temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), out) + temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, 0), out) out = ops.depend(out, temp) return out @@ -302,6 +302,21 @@ def process_file(file_path): ms_dtype = np_ms_dtype_dict.get(data_dtype) if ms_dtype is None: logger.warning(f"Get dtype None from file {file_path}") + + #修改落盘文件名字,去掉TensorDump自带的数据类型和自增id字段 + data_file_name = os.path.basename(file_path) + data_file_dir = os.path.dirname(file_path) + parts = data_file_name.split(CoreConst.SEP) + if len(parts) >= 2: + param_index = parts[-2].split(CoreConst.REPLACEMENT_CHARACTER)[0] + pre_parts = CoreConst.SEP.join(parts[:-2]) + new_file_name = pre_parts + CoreConst.SEP + param_index + CoreConst.NUMPY_SUFFIX + os.rename(os.path.join(data_file_dir, data_file_name), os.path.join(data_file_dir, new_file_name)) + logger.info(f"{data_file_name} is renamed to {new_file_name}") + else: + logger.warning(f"Failed to rename {data_file_name}.") + new_file_name = data_file_name + tensor_json = { CoreConst.TYPE: 'mindspore.Tensor', CoreConst.DTYPE: str(ms_dtype), @@ -310,7 +325,7 @@ def process_file(file_path): CoreConst.MIN: npy_content.min().item(), CoreConst.MEAN: npy_content.mean().item(), CoreConst.NORM: np.linalg.norm(npy_content).item(), - CoreConst.DATA_NAME: os.path.basename(file_path) + CoreConst.DATA_NAME: new_file_name } # 根据文件名的最后一个部分(输入或输出)确定是添加到input_args还是output @@ -398,8 +413,28 @@ def generate_stack_info(path): logger.info(f"Stack data saved to {json_path}") +def is_download_finished(directory, interval=3): + """ + 判断指定目录在一段时间后是否有数据被下载完成 + :param directory: 指定目录的路径 + :param interval: 检查的时间间隔(秒),默认为 3 秒 + :return: 如有数据被下载完成返回 True,否则返回 False + """ + # 检查目录是否存在 + if not os.path.exists(directory): + logger.warning(f"The specified directory {directory} does not exist.") + return False + initial_modification_time = os.path.getmtime(directory) + time.sleep(interval) + current_modification_time = os.path.getmtime(directory) + # 比较初始和当前修改时间 + if current_modification_time > initial_modification_time: + return False + else: + return True + + def process(dump_path): - logger.info(f"==========Start processing data that has already been stored on the disk!==========") rank_id = os.environ.get('RANK_ID') rank_dir = DEFAULT_RANK_DIR if rank_id is not None: @@ -410,10 +445,19 @@ def process(dump_path): step_path = os.path.join(dump_path, step_dir) rank_path = os.path.join(step_path, rank_dir) npy_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA) + while True: + is_finished = is_download_finished(npy_path) + if not is_finished: + logger.info(f"There is data being downloaded in the specified directory, continue checking...") + else: + logger.info(f"There is no data being downloaded in the specified directory, Stop checking.") + break + logger.info(f"==========Start processing data that has already been stored on the disk!==========") rename_filename(npy_path) generate_construct(npy_path) generate_dump_info(npy_path) generate_stack_info(npy_path) + logger.info(f"==========JSON file generation completed!==========") def start(net=None, dump_path="./", data_mode=CoreConst.ALL): -- Gitee From 2c7d47c533041669ba090aae8be5d20d38428f2b Mon Sep 17 00:00:00 2001 From: wangnan39 Date: Wed, 26 Feb 2025 17:31:45 +0800 Subject: [PATCH 200/333] bf16/int4 support dump npy for mindspore O2 --- .../ccsrc/core/AclDumpDataProcessor.cpp | 20 +++-- .../msprobe/ccsrc/core/AclTensor.cpp | 74 ++++++++++++++++--- .../msprobe/ccsrc/core/AclTensor.hpp | 4 +- 3 files changed, 80 insertions(+), 18 deletions(-) diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp index 72178d6486a..3374aa0be31 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp @@ -77,6 +77,11 @@ static const std::map {DebuggerSummaryOption::MD5, {kStatsHeaderMD5, kStatsHeaderMD5}}, }; +const static std::map kDtypeTransMap = { + {AclDtype::DT_BF16, AclDtype::DT_FLOAT}, + {AclDtype::DT_INT4, AclDtype::DT_INT8}, +}; + class AclTensorStats { public: AclTensorStats() = default; @@ -605,7 +610,7 @@ static std::string GenDataPath(const std::string& path) { inline std::string GetTensorInfoSuffix(AclTensorInfo& tensor) { return "." + tensor.inout + "." + std::to_string(tensor.slot) + - "." + DataUtils::GetFormatString(tensor.hostFmt) + "." + DataUtils::GetDTypeString(tensor.dtype); + "." + DataUtils::GetFormatString(tensor.hostFmt) + "." + DataUtils::GetDTypeString(tensor.oriDtype); } static DebuggerErrno DumpOneAclTensorFmtBin(AclTensorInfo& tensor) @@ -642,10 +647,13 @@ static DebuggerErrno DumpOneAclTensorFmtNpy(AclTensorInfo& tensor) return DebuggerErrno::OK; } - if (tensor.dtype == AclDtype::DT_BF16) { - ret = AclTensor::TransDtype(tensor, AclDtype::DT_FLOAT); + auto it = kDtypeTransMap.find(tensor.dtype); + if (it != kDtypeTransMap.end()) { + AclDtype dstDtype = it->second; + ret = AclTensor::TransDtype(tensor, dstDtype); if (ret != DebuggerErrno::OK) { - LOG_ERROR(ret, tensor + ": Failed to transform dtype from bf16 to fp32."); + LOG_ERROR(ret, tensor + ": Failed to transform dtype from " + DataUtils::GetDTypeString(it->first) + " to " + + DataUtils::GetDTypeString(it->second)+ "."); return ret; } } @@ -738,7 +746,9 @@ static DebuggerErrno DumpOneAclTensor(AclTensorInfo& tensor, std::vector( @@ -763,34 +767,80 @@ static void TransBf16ToFp32(const uint8_t* input, size_t num, uint8_t* output, s } } -DebuggerErrno TransDtype(AclTensorInfo& tensor, AclDtype to) +static void TransInt4ToInt8(const uint8_t* input, size_t elemNums, uint8_t* output, size_t bufferSize) { + if (bufferSize < elemNums * sizeof(int8_t)) { + LOG_ERROR(DebuggerErrno::ERROR_BUFFER_OVERFLOW, "Insufficient space for converting data from int4 to int8."); + return; + } + const int8_t *srcData = reinterpret_cast(input); + int8_t *dstData = reinterpret_cast(output); + size_t inputLength = elemNums / 2; + int maxValue = 7; + int minValue = -8; + int signBitShift = 3; + int signBitMask = 0x08; + for (size_t i = 0; i < inputLength; ++i) { + int8_t s = *srcData; + int8_t t = s & 0xf; + // keep the sign bit not change + int8_t signBit = (t & signBitMask) >> signBitShift; + if (signBit == 1) { + t = t | 0xf0; + } else { + t = t & 0x0f; + } + if (t < minValue || t > maxValue) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_VALUE, "Invalid int4 value."); + } + *dstData = t; + ++dstData; + + int highByteShift = 4; + t = s >> highByteShift; + signBit = (t & signBitMask) >> signBitShift; + if (signBit == 1) { + t = t | 0xf0; + } else { + t = t & 0x0f; + } + if (t < minValue || t > maxValue) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_VALUE, "Invalid int4 value."); + } + *dstData = t; + ++dstData; + ++srcData; + } + return; +} - const static std::set> kSupportedDtypeTrans = { - {AclDtype::DT_BF16, AclDtype::DT_FLOAT}, - }; +DebuggerErrno TransDtype(AclTensorInfo& tensor, AclDtype to) +{ if (tensor.dtype == to) { return DebuggerErrno::OK; } - if (kSupportedDtypeTrans.find({tensor.dtype, to}) == kSupportedDtypeTrans.end()) { - return DebuggerErrno::ERROR_UNKNOWN_TRANS; - } - + tensor.oriDtype = tensor.dtype; std::vector buffer; AssertConsis(tensor); size_t bufferSize = EleNumOfTensor(tensor) * SizeOfAclDType(to); - buffer.reserve(bufferSize); + buffer.resize(bufferSize); const uint8_t* input = tensor.transBuf.empty() ? tensor.aclData : tensor.transBuf.data(); uint8_t* output = buffer.data(); - /* 目前仅支持bf16->fp32,若有通用转换需求再用更泛化的方式重写 */ if (tensor.dtype == AclDtype::DT_BF16 && to == AclDtype::DT_FLOAT) { TransBf16ToFp32(input, EleNumOfTensor(tensor), output, bufferSize); + } else if (tensor.dtype == AclDtype::DT_INT4 && to == AclDtype::DT_INT8) { + TransInt4ToInt8(input, EleNumOfTensor(tensor), output, bufferSize); + } else { + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_TRANS, tensor + ": Trans " + DataUtils::GetDTypeString(tensor.dtype) + + " to " + DataUtils::GetDTypeString(to) + " is not supported."); + return DebuggerErrno::ERROR_UNKNOWN_TRANS; } tensor.transBuf = std::move(buffer); + tensor.dtype = to; return DebuggerErrno::OK; } diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp index 8b5ba5b06d9..f2ac429a7f1 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp @@ -40,6 +40,7 @@ struct AclTensorInfo { std::string dumpPath; const uint8_t* aclData; AclDtype dtype; + AclDtype oriDtype; AclFormat deviceFmt; AclFormat hostFmt; AclShape deviceShape; @@ -52,7 +53,7 @@ struct AclTensorInfo { std::vector transBuf; std::string ToString() const { - return "AclTensor(path=" + dumpPath + ",dtype=" + std::to_string(dtype) + ",inout=" + inout + ")"; + return "AclTensor(path=" + dumpPath + ",dtype=" + DataUtils::GetDTypeString(dtype) + ",inout=" + inout + ")"; } }; @@ -71,6 +72,7 @@ AclTensorInfo ParseAttrsFromDumpData(const std::string &dumpPath, const uint8_t* const std::string& io, uint32_t slot); DebuggerErrno TransFormatD2H(AclTensorInfo& tensor); DebuggerErrno TransDtype(AclTensorInfo& tensor, AclDtype to); +bool IsDtypeSupportTrans(AclDtype dtype); } } -- Gitee From 3ac803dc9671bf86ffd2617a9d86cf4aa3efc71c Mon Sep 17 00:00:00 2001 From: wangnan39 Date: Wed, 26 Feb 2025 14:56:35 +0800 Subject: [PATCH 201/333] support overflow set max num --- .../msprobe/ccsrc/base/DebuggerConfig.hpp | 2 +- .../msprobe/ccsrc/core/AclDumper.cpp | 30 +++++++++++++++++++ .../msprobe/ccsrc/core/AclDumper.hpp | 6 ++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp index 15ea9e6fda4..d56191443f8 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp +++ b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp @@ -199,7 +199,7 @@ public: OverflowCheckCfg() = default; ~OverflowCheckCfg() = default; - uint32_t overflowNums{1}; + int32_t overflowNums{1}; DebuggerOpCheckLevel checkMode{DebuggerOpCheckLevel::CHECK_LEVEL_ALL}; private: diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp index 80769d7fc5f..ec6e59dafd7 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp @@ -151,6 +151,29 @@ bool AclDumper::IsCfgEnableAclDumper() ELE_IN_VECTOR(tasks, DebuggerTaskType::TASK_OVERFLOW_CHECK)); } +bool AclDumper::IsOverflowCompleted() +{ + if (overflowNums != -1 && realOverflowNums > overflowNums) { + return true; + } + return false; +} + +void AclDumper::CountOverflowNumbers(const acldumpChunk* chunk) +{ + if (IsOverflowCompleted() || !isOverflowDump || !chunk->isLastChunk) { + return; + } + const std::string fileName = chunk->fileName; + auto separator = fileName.rfind("/"); + auto fileBaseName = fileName.substr(separator + 1); + if (fileBaseName.rfind("Opdebug.Node_OpDebug.") == 0) { + // count according to the first file: Node_OpDebug + realOverflowNums++; + } + return; +} + std::string AclDumper::GetDumpPath(uint32_t curStep) const { if (!initialized || foreDumpPath.empty()) { @@ -357,6 +380,11 @@ DebuggerErrno AclDumper::Initialize() void AclDumper::OnAclDumpCallBack(const acldumpChunk* chunk, int32_t len) { DEBUG_FUNC_TRACE(); + CountOverflowNumbers(chunk); + if (IsOverflowCompleted()) { + return; + } + std::string dumpPath = FileUtils::GetAbsPath(chunk->fileName); auto it = dataProcessors.find(dumpPath); if (it == dataProcessors.end()) { @@ -424,6 +452,8 @@ void AclDumper::SetDump(uint32_t rank, uint32_t curStep, ExtArgs& args) ret = AclDumpGenStatJson(statisticsCfg, rank, curStep, kernels); } else if (overflowCheckCfg != nullptr) { ret = AclDumpGenOverflowJson(overflowCheckCfg, rank, curStep); + overflowNums = overflowCheckCfg->overflowNums; + isOverflowDump = true; } if (ret != DebuggerErrno::OK) { diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp index dcfad5fafca..6985df65e16 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp @@ -58,11 +58,17 @@ private: uint32_t curStep, const char** kernels); DebuggerErrno AclDumpGenOverflowJson(std::shared_ptr overflowCfg, uint32_t rank, uint32_t curStep); + void CountOverflowNumbers(const acldumpChunk* chunk); + bool IsOverflowCompleted(); + bool initialized{false}; bool aclDumpHasSet{false}; std::string foreDumpPath; std::vector hostAnalysisOpt; std::map> dataProcessors; + bool isOverflowDump{false}; + int32_t overflowNums{1}; + int32_t realOverflowNums{0}; }; void KernelInitDump(); -- Gitee From 00f7203c8104cedbac9e05551a999855b5086150 Mon Sep 17 00:00:00 2001 From: DavidFFFan Date: Thu, 27 Feb 2025 10:35:35 +0000 Subject: [PATCH 202/333] Update debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp --- debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp index ec6e59dafd7..805a6a7a0a2 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp @@ -153,10 +153,7 @@ bool AclDumper::IsCfgEnableAclDumper() bool AclDumper::IsOverflowCompleted() { - if (overflowNums != -1 && realOverflowNums > overflowNums) { - return true; - } - return false; + return overflowNums != -1 && realOverflowNums > overflowNums; } void AclDumper::CountOverflowNumbers(const acldumpChunk* chunk) -- Gitee From b5481d314759ad186c2ca4cede475fb6a16f616a Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 4 Mar 2025 16:08:16 +0800 Subject: [PATCH 203/333] fix to bug --- .../msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 905687c1bfc..c7a48844ee8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -65,6 +65,7 @@ DETAILS_FILE_NAME = "accuracy_checking_details_" + current_time + ".csv" not_backward_list = ['repeat_interleave'] unsupported_backward_list = ['masked_select'] +unsupported_api_list = ["to"] tqdm_params = { @@ -218,6 +219,7 @@ def blacklist_and_whitelist_filter(api_name, black_list, white_list): If api is both in black_list and black_list, black_list first. return: False for exec api, True for not exec """ + black_list.extend(unsupported_api_list) if black_list and api_name in black_list: return True if white_list and api_name not in white_list: -- Gitee From 6b63d5bc91ee6ec7fd939878ee61277920c2da02 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 4 Mar 2025 16:51:53 +0800 Subject: [PATCH 204/333] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dp2pop=E5=9C=A8tensor?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F=E4=B8=8B=E8=A7=A3=E6=9E=90=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/pytorch_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index b7b42dfa3b9..66523da9c55 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -228,7 +228,7 @@ class PytorchDataProcessor(BaseDataProcessor): if isinstance(element, dist.ProcessGroup): return self._analyze_process_group(element) if isinstance(element, dist.P2POp): - return self._analyze_p2pop(element) + return self._analyze_p2pop(element, Const.SEP.join([str(suffix) for suffix in suffix_stack])) if isinstance(element, dist.ReduceOp): return self._analyze_reduce_op(element) converted_numpy, numpy_type = self._convert_numpy_to_builtin(element) @@ -247,10 +247,10 @@ class PytorchDataProcessor(BaseDataProcessor): module_input_output.update_output_with_args_and_kwargs() return super().analyze_forward_output(name, module, module_input_output) - def _analyze_p2pop(self, arg): + def _analyze_p2pop(self, arg, suffix): p2pop_info = {"class_type": "torch.distributed.P2POp"} try: - tensor_info = self._analyze_tensor(arg.tensor, []) + tensor_info = self._analyze_tensor(arg.tensor, suffix) p2pop_info.update({"tensor": tensor_info}) p2pop_info.update({"op": arg.op.__name__}) p2pop_info.update({"peer": arg.peer}) -- Gitee From 66d3f4af73c705b59bcdf908f9798cc32826f362 Mon Sep 17 00:00:00 2001 From: z30043230 Date: Tue, 4 Mar 2025 17:52:40 +0800 Subject: [PATCH 205/333] add cluster_time_compare_summary --- .../msprof_analyze/cluster_analyse/README.md | 4 +- .../cluster_time_compare_summary/__init__.py | 0 .../cluster_time_compare_summary.py | 115 +++++++++++++++ .../cluster_time_summary.py | 17 ++- .../msprof_analyze/prof_common/constant.py | 4 + .../test_cluster_time_compare_summary.py | 136 ++++++++++++++++++ 6 files changed, 268 insertions(+), 8 deletions(-) create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/__init__.py create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/cluster_time_compare_summary.py create mode 100644 profiler/msprof_analyze/test/ut/cluster_analyse/recipes/test_cluster_time_compare_summary.py diff --git a/profiler/msprof_analyze/cluster_analyse/README.md b/profiler/msprof_analyze/cluster_analyse/README.md index b129885363b..1c3761a2202 100644 --- a/profiler/msprof_analyze/cluster_analyse/README.md +++ b/profiler/msprof_analyze/cluster_analyse/README.md @@ -79,7 +79,9 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( | hccl_sum | 集合通信算子耗时分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/HcclSum目录下输出交付件stats.ipynb。 | 否 | | mstx_sum | 集群场景mstx打点信息汇总分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/MstxSum目录下输出交付件stats.ipynb。 | 否 | | slow_link | 集群慢链路异常分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/SlowLink目录下输出交付件stats.ipynb。 | 否 | - + | cluster_time_summary | 集群场景性能数据分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db和analysis.db文件。--export_type为db时,输出交付件cluster_analysis.db,db里面有ClusterTimeSummary,不支持导出notebook。 | 否 | + | cluster_time_compare_summary | 集群场景性能数据对比分析,使用前集群数据必须先分析cluster_time_summary,需要配合--bp参数使用。输入性能数据需要基于cluster_analysis_output下的cluster_analysis.db文件。--export_type为db时,输出交付件cluster_analysis.db,db文件中有对比结果的表ClusterTimeCompareSummary,不支持导出notebook。 | 否 | + --parallel_mode参数示例如下: ```bash diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/__init__.py b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/cluster_time_compare_summary.py b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/cluster_time_compare_summary.py new file mode 100644 index 00000000000..71a5fbee9d4 --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_compare_summary/cluster_time_compare_summary.py @@ -0,0 +1,115 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from msprof_analyze.cluster_analyse.recipes.base_recipe_analysis import BaseRecipeAnalysis +from msprof_analyze.prof_common.constant import Constant +from msprof_analyze.prof_common.database_service import DatabaseService +from msprof_analyze.prof_common.db_manager import DBManager +from msprof_analyze.prof_common.logger import get_logger +from msprof_analyze.prof_common.path_manager import PathManager + +logger = get_logger() + + +class ClusterTimeCompareSummary(BaseRecipeAnalysis): + BP = "bp" # 被对比的路径参数 + TABLE_CLUSTER_TIME_COMPARE_SUMMARY = "ClusterTimeCompareSummary" + CLUSTER_TIME_SUMMARY_CSV = "cluster_time_summary.csv" + CLUSTER_TIME_SUMMARY_COLUMNS = [ + "rank", + "step", + "computation", + "communicationNotOverlapComputation", + "communicationOverlapComputation", + "communication", + "free", + "communicationWaitStageTime", + "communicationTransmitStageTime", + "memory", + "memoryNotOverlapComputationCommunication", + "taskLaunchDelayAvgTime" + ] + + def __init__(self, params): + super().__init__(params) + self.db_path = os.path.join(self._collection_dir, Constant.CLUSTER_ANALYSIS_OUTPUT, + Constant.DB_CLUSTER_COMMUNICATION_ANALYZER) + self.base_db_path = os.path.join(self._extra_args.get(self.BP, ""), Constant.CLUSTER_ANALYSIS_OUTPUT, + Constant.DB_CLUSTER_COMMUNICATION_ANALYZER) + self.compare_result = None + + @property + def base_dir(self): + return os.path.basename(os.path.dirname(__file__)) + + @classmethod + def add_parser_argument(cls, parser): + BaseRecipeAnalysis.add_parser_argument(parser) + parser.add_argument('--bp', type=PathManager.expanduser_for_argumentparser, default="", + help="base profiling data path") + + def run(self, context=None): + logger.info("ClusterTimeCompareSummary starts running.") + if not self.check_params_is_valid(): + return + self.get_compare_data() + self.save_db() + + def check_params_is_valid(self) -> bool: + base_path = self._extra_args.get(self.BP, "") + if not base_path: + logger.error("Must specify the --bp parameter.") + return False + if self._export_type == Constant.NOTEBOOK: + logger.error("For cluster_time_compare_summary, the export_type parameter only supports db.") + return False + try: + PathManager.check_input_directory_path(base_path) # 校验目录 + except RuntimeError: + logger.error(f"{base_path} is not valid.") + return False + if not DBManager.check_tables_in_db(self.db_path, Constant.TABLE_CLUSTER_TIME_SUMMARY): + logger.error(f"{Constant.TABLE_CLUSTER_TIME_SUMMARY} in {self.db_path} does not exist.") + return False + if not DBManager.check_tables_in_db(self.base_db_path, Constant.TABLE_CLUSTER_TIME_SUMMARY): + logger.error(f"{Constant.TABLE_CLUSTER_TIME_SUMMARY} in {self.base_db_path} does not exist.") + return False + return True + + + def get_compare_data(self): + database_service_for_db = DatabaseService(self.db_path) + database_service_for_db.add_table_for_query(Constant.TABLE_CLUSTER_TIME_SUMMARY, + self.CLUSTER_TIME_SUMMARY_COLUMNS) + cluster_time_summary_df_dict = database_service_for_db.query_data() + cluster_time_summary_df = cluster_time_summary_df_dict.get(Constant.TABLE_CLUSTER_TIME_SUMMARY) + database_service_for_base_db = DatabaseService(self.base_db_path) + database_service_for_base_db.add_table_for_query(Constant.TABLE_CLUSTER_TIME_SUMMARY, + self.CLUSTER_TIME_SUMMARY_COLUMNS) + base_cluster_time_summary_df_dict = database_service_for_base_db.query_data() + base_cluster_time_summary_df = base_cluster_time_summary_df_dict.get(Constant.TABLE_CLUSTER_TIME_SUMMARY) + self.compare_result = ( + cluster_time_summary_df.set_index(["rank", "step"]) + .subtract(base_cluster_time_summary_df.set_index(["rank", "step"])) + .dropna() + .reset_index() + .rename(columns=lambda x: f"{x}Diff" if x not in ["rank", "step"] else x) + ) + + def save_db(self): + self.dump_data(self.compare_result, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, + self.TABLE_CLUSTER_TIME_COMPARE_SUMMARY, index=False) \ No newline at end of file diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_summary/cluster_time_summary.py b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_summary/cluster_time_summary.py index 627edaa32f4..a574850ec6d 100644 --- a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_summary/cluster_time_summary.py +++ b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_time_summary/cluster_time_summary.py @@ -14,7 +14,6 @@ # limitations under the License. import os -from collections import namedtuple import pandas as pd from msprof_analyze.cluster_analyse.common_func.context import ConcurrentContext @@ -78,11 +77,11 @@ class ClusterTimeSummary(BaseRecipeAnalysis): merged_df = pd.merge(merged_df, df, on=['rank', 'step'], how='outer') # 根据 step 和 rank 列对合并后的 DataFrame 进行排序 merged_df = merged_df.sort_values(by=['rank', 'step']) - merged_df["free"] = merged_df["free"] - merged_df["memoryNotOverlapComputeCommunication"] + merged_df["free"] = merged_df["free"] - merged_df["memoryNotOverlapComputationCommunication"] merged_df = merged_df.rename(columns={ 'computing': 'computation', - 'overlapped': 'communicationOverlappingComputation', - 'communication_not_overlapped': 'communicationNotOverlappingComputation'}) + 'overlapped': 'communicationOverlapComputation', + 'communication_not_overlapped': 'communicationNotOverlapComputation'}) return merged_df.sort_values(by=['rank', 'step']) @classmethod @@ -157,7 +156,7 @@ class ClusterTimeSummary(BaseRecipeAnalysis): return None memory_df = ClusterTimeSummary.calculate_memory_time(df) memory_not_overlap_df = (df.groupby(["step"]).apply(ClusterTimeSummary.get_memory_not_overlap). - reset_index(name="memoryNotOverlapComputeCommunication")) + reset_index(name="memoryNotOverlapComputationCommunication")) dispatch_df = ClusterTimeSummary.calculate_dispatch_time(df) result_df = pd.merge(memory_df, memory_not_overlap_df, on='step', how='inner') result_df = pd.merge(result_df, dispatch_df, on='step', how='inner') @@ -177,7 +176,11 @@ class ClusterTimeSummary(BaseRecipeAnalysis): self.mapper_func(context) context.wait_all_futures() self.stats_data = self.aggregate_stats(context) - self.save_db() + if self._export_type == Constant.DB: + self.save_db() + else: + logger.warning("cluster_time_summary only supports export db.") def save_db(self): - self.dump_data(self.stats_data, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, "ClusterTimeSummary", index=False) + self.dump_data(self.stats_data, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, + Constant.TABLE_CLUSTER_TIME_SUMMARY, index=False) diff --git a/profiler/msprof_analyze/prof_common/constant.py b/profiler/msprof_analyze/prof_common/constant.py index 15a42f131d8..8aa499eef45 100644 --- a/profiler/msprof_analyze/prof_common/constant.py +++ b/profiler/msprof_analyze/prof_common/constant.py @@ -114,6 +114,9 @@ class Constant(object): DB = "db" INVALID = "invalid" + # export_type + NOTEBOOK = "notebook" + # db name DB_COMMUNICATION_ANALYZER = "analysis.db" DB_CLUSTER_COMMUNICATION_ANALYZER = "cluster_analysis.db" @@ -126,6 +129,7 @@ class Constant(object): TABLE_HOST_INFO = "HostInfo" TABLE_RANK_DEVICE_MAP = "RankDeviceMap" TABLE_CLUSTER_BASE_INFO = "ClusterBaseInfo" + TABLE_CLUSTER_TIME_SUMMARY = "ClusterTimeSummary" # data config key CONFIG = "config" diff --git a/profiler/msprof_analyze/test/ut/cluster_analyse/recipes/test_cluster_time_compare_summary.py b/profiler/msprof_analyze/test/ut/cluster_analyse/recipes/test_cluster_time_compare_summary.py new file mode 100644 index 00000000000..9cc3dd81808 --- /dev/null +++ b/profiler/msprof_analyze/test/ut/cluster_analyse/recipes/test_cluster_time_compare_summary.py @@ -0,0 +1,136 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest +from unittest import mock +import pandas as pd + +from msprof_analyze.cluster_analyse.recipes.cluster_time_compare_summary.cluster_time_compare_summary import \ + ClusterTimeCompareSummary +from msprof_analyze.prof_common.constant import Constant + +NAMESPACE = "msprof_analyze.prof_common" + + +class TestClusterTimeCompareSummary(unittest.TestCase): + PARAMS = { + Constant.COLLECTION_PATH: "/data", + Constant.DATA_MAP: {}, + Constant.DATA_TYPE: Constant.DB, + Constant.CLUSTER_ANALYSIS_OUTPUT_PATH: "./test_cluster_time_compare_summary", + Constant.RECIPE_NAME: "ClusterTimeCompareSummary", + Constant.RECIPE_CLASS: ClusterTimeCompareSummary, + Constant.PARALLEL_MODE: Constant.CONCURRENT_MODE, + Constant.EXPORT_TYPE: Constant.DB, + ClusterTimeCompareSummary.RANK_LIST: Constant.ALL, + } + + def test_check_params_is_valid_should_return_false_when_bp_param_does_not_exist(self): + params = {} + params.update(self.PARAMS) + self.assertFalse(ClusterTimeCompareSummary(params).check_params_is_valid()) + + def test_check_params_is_valid_should_return_false_when_export_type_is_notebook(self): + params = {Constant.EXTRA_ARGS: ["--bp", "/data2"]} + params.update(self.PARAMS) + params[Constant.EXPORT_TYPE] = Constant.NOTEBOOK + self.assertFalse(ClusterTimeCompareSummary(params).check_params_is_valid()) + + def test_check_params_is_valid_should_return_false_when_base_path_is_invalid(self): + params = {Constant.EXTRA_ARGS: ["--bp", "/data2"]} + params.update(self.PARAMS) + with mock.patch(NAMESPACE + ".path_manager.PathManager.check_input_file_path", side_effect=RuntimeError): + self.assertFalse(ClusterTimeCompareSummary(params).check_params_is_valid()) + + def test_check_params_is_valid_should_return_false_when_table_cluster_time_summary_does_not_exist(self): + params = {} + params.update(self.PARAMS) + with mock.patch(NAMESPACE + ".db_manager.DBManager.check_tables_in_db", return_value=False): + self.assertFalse(ClusterTimeCompareSummary(params).check_params_is_valid()) + + def test_check_params_is_valid_should_return_false_when_base_table_cluster_time_summary_does_not_exist(self): + params = {Constant.EXTRA_ARGS: ["--bp", "/data2"]} + params.update(self.PARAMS) + with mock.patch(NAMESPACE + ".path_manager.PathManager.check_input_file_path"), \ + mock.patch(NAMESPACE + ".db_manager.DBManager.check_tables_in_db", side_effect=[True, False]): + self.assertFalse(ClusterTimeCompareSummary(params).check_params_is_valid()) + + def test_run_when_all_parameters_are_normal(self): + params = {Constant.EXTRA_ARGS: ["--bp", "/data2"]} + params.update(self.PARAMS) + params[Constant.EXPORT_TYPE] = "" + base_cluster_time_summary_df_dict = { + Constant.TABLE_CLUSTER_TIME_SUMMARY: pd.DataFrame( + { + "rank": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6], + "step": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + "computation": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "communicationNotOverlapComputation": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "communicationOverlapComputation": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "communication": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "free": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "communicationWaitStageTime": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "communicationTransmitStageTime": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "memory": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "memoryNotOverlapComputationCommunication": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + "taskLaunchDelayAvgTime": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + } + ) + } + cluster_time_summary_df_dict = { + Constant.TABLE_CLUSTER_TIME_SUMMARY: pd.DataFrame( + { + "rank": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7], + "step": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + "computation": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "communicationNotOverlapComputation": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "communicationOverlapComputation": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "communication": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "free": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "communicationWaitStageTime": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "communicationTransmitStageTime": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "memory": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "memoryNotOverlapComputationCommunication": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + "taskLaunchDelayAvgTime": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + } + ) + } + expected_result = pd.DataFrame({ + "rank": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6], + "step": [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + "computationDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "communicationNotOverlapComputationDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0], + "communicationOverlapComputationDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0], + "communicationDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "freeDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "communicationWaitStageTimeDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "communicationTransmitStageTimeDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0], + "memoryDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "memoryNotOverlapComputationCommunicationDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0], + "taskLaunchDelayAvgTimeDiff": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + }) + with mock.patch(NAMESPACE + ".path_manager.PathManager.check_input_file_path"), \ + mock.patch(NAMESPACE + ".db_manager.DBManager.check_tables_in_db", side_effect=[True, True]), \ + mock.patch(NAMESPACE + ".database_service.DatabaseService.query_data", + side_effect=[cluster_time_summary_df_dict, base_cluster_time_summary_df_dict]): + cluster_time_compare_summary = ClusterTimeCompareSummary(params) + cluster_time_compare_summary.run() + self.assertTrue(cluster_time_compare_summary.compare_result.equals(expected_result)) + -- Gitee From 267ed885a05a3e685882d98e15d744d38a575272 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Fri, 28 Feb 2025 17:51:15 +0800 Subject: [PATCH 206/333] compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve compare bench_data_name get improve --- .../msprobe/core/compare/acc_compare.py | 88 +++++-------------- .../core/compare/multiprocessing_compute.py | 10 +-- .../msprobe/core/compare/utils.py | 14 +-- .../docs/10.accuracy_compare_PyTorch.md | 10 +-- .../msprobe/mindspore/compare/ms_compare.py | 8 ++ .../test/core_ut/compare/test_acc_compare.py | 42 +++------ .../core_ut/compare/test_acc_compare_utils.py | 16 ++-- .../test_cmp_multiprocessing_compute.py | 8 +- .../mindspore_ut/compare/test_ms_compare.py | 27 +++++- 9 files changed, 100 insertions(+), 123 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index f0ac97a0293..f2aa8c479ec 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -329,7 +329,9 @@ class Comparator: else: result_item.append(CompareConst.NONE) if self.dump_mode == Const.ALL: - result_item.append(npu_ops_all.get(ms_op_name).get("data_name", None)) + ms_data_name = npu_ops_all.get(ms_op_name).get("data_name", None) + pt_data_name = bench_ops_all.get(bench_op_name).get("data_name", None) + result_item.append([ms_data_name, pt_data_name]) result.append(result_item) elif ms_op_name not in npu_ops_all: logger.warning(f'Can not find npu op name : `{ms_op_name}` in npu dump json file.') @@ -349,47 +351,48 @@ class Comparator: result_df = self.make_result_table(result) return result_df - def compare_by_op(self, npu_op_name, bench_op_name, op_name_mapping_dict, input_param, bench_data): + def compare_by_op(self, npu_op_name, bench_op_name, op_name_mapping_dict, input_param): """ :param npu_op_name: excel中的NPU_Name,例如:MintFunctional.conv2d.0.forward.input.3.0 :param bench_op_name: excel中的Bench_Name,例如:Functional.conv2d.0.forward.input.3.0 :param op_name_mapping_dict: op_name和npy或pt文件的映射关系 :param input_param: npu_json_path/bench_json_path/stack_json_path等参数 - :param bench_data: bench的dump数据中"data"字段 :return: result_list,包含余弦相似度、最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率和错误信息 - 用于读取excel中的NPU_Name和Bench_Name,根据映射关系找到npy或pt文件,然后读取文件中的数据进行比较,计算余弦相似度、 + 用于读取excel中的NPU_Name和Bench_Name,根据映射关系找到npy或pt文件,然后读取文件中的数据进行比较,计算余弦相似度、欧式距离 最大绝对误差、最大相对误差、千分之一误差率、千分之五误差率并生成错误信息 """ - npu_bench_name_list = op_name_mapping_dict[npu_op_name] - data_name = safe_get_value(npu_bench_name_list, 1, "npu_bench_name_list") error_file, relative_err, error_flag = None, None, False - bench_data_name = get_bench_data_name(bench_op_name, bench_data) - if data_name == '-1' or data_name == -1: # 没有真实数据路径 - n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE - error_flag = True - elif not bench_data_name: + + data_name_pair = op_name_mapping_dict.get(npu_op_name) + npu_data_name = data_name_pair[0] + bench_data_name = data_name_pair[1] + + if str(npu_data_name) == '-1': # 没有npu真实数据 + n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True + elif str(bench_data_name) == '-1': # 没有bench真实数据 n_value, b_value, error_flag = CompareConst.READ_NONE, CompareConst.READ_NONE, True error_file = 'no_bench_data' else: + npu_dir = input_param.get("npu_dump_data_dir") + bench_dir = input_param.get("bench_dump_data_dir") try: - read_npy_data = getattr(self, "read_npy_data") frame_name = getattr(self, "frame_name") + read_npy_data = getattr(self, "read_npy_data") if frame_name == "MSComparator": - n_value = read_npy_data(input_param.get("npu_dump_data_dir"), npu_op_name + Const.NUMPY_SUFFIX) + n_value = read_npy_data(npu_dir, npu_data_name) if self.cross_frame: - b_value = read_npy_data(input_param.get("bench_dump_data_dir"), bench_data_name, - load_pt_file=True) + b_value = read_npy_data(bench_dir, bench_data_name, load_pt_file=True) else: - b_value = read_npy_data(input_param.get("bench_dump_data_dir"), bench_data_name) + b_value = read_npy_data(bench_dir, bench_data_name) else: - n_value = read_npy_data(input_param.get("npu_dump_data_dir"), npu_op_name + Const.PT_SUFFIX) - b_value = read_npy_data(input_param.get("bench_dump_data_dir"), bench_data_name) + n_value = read_npy_data(npu_dir, npu_data_name) + b_value = read_npy_data(bench_dir, bench_data_name) except IOError as error: error_file = error.filename n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True except (FileCheckException, CompareException): - error_file = data_name + error_file = npu_data_name n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True @@ -464,7 +467,7 @@ class Comparator: err_mess = [] is_print_compare_log = input_param.get("is_print_compare_log") - bench_data = load_json(input_param.get("bench_json_path")).get('data') + for i in range(len(result_df)): npu_op_name = result_df.iloc[i, 0] bench_op_name = result_df.iloc[i, 1] @@ -472,7 +475,7 @@ class Comparator: logger.info("start compare: {}".format(npu_op_name)) cos_sim, euc_dist, max_abs_err, max_relative_err, one_thousand_err_ratio, five_thousand_err_ratio, err_msg \ - = self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param, bench_data) + = self.compare_by_op(npu_op_name, bench_op_name, dump_path_dict, input_param) if is_print_compare_log: logger.info( @@ -508,46 +511,3 @@ class Comparator: except ValueError as e: logger.error('result dataframe is not found.') raise CompareException(CompareException.INVALID_DATA_ERROR) from e - - -def get_bench_data_name(bench_op_name, bench_data): - bench_name_list = re.split(r'\.(input|output|kwargs|parameters|parameters_grad)\.', bench_op_name) - if len(bench_name_list) > 1 and bench_name_list[1] == Const.PARAMS_GRAD: - bench_data_bundle = bench_data.get(bench_name_list[0] + Const.SEP + bench_name_list[1], {}) - else: - bench_data_bundle = bench_data.get(bench_name_list[0], {}) - if not bench_data_bundle or len(bench_name_list) < 3: - return None - layers = bench_name_list[2].split(Const.SEP) - - def _get(key, container): - if isinstance(container, dict): - return container.get(key) - if isinstance(container, list): - try: - return container[int(key)] - except (ValueError, IndexError): - return None - return None - - def get_by_layer(container, params_grad=False): - data = container - # dump.json中parameters_grad的结构为key:[{}], 如果存在key,有且只有一个列表元素,而op_name中只命名到了key,因此加'0' - if params_grad: - layers.append('0') - for layer in layers: - data = _get(layer, data) - return _get(CompareConst.DATA_NAME.lower(), data) - - if Const.INPUT == bench_name_list[1]: - return get_by_layer(bench_data_bundle.get(Const.INPUT, bench_data_bundle.get(Const.INPUT_ARGS))) - elif Const.KWARGS == bench_name_list[1]: - return get_by_layer(bench_data_bundle.get(Const.INPUT_KWARGS)) - elif Const.OUTPUT == bench_name_list[1]: - return get_by_layer(bench_data_bundle.get(Const.OUTPUT)) - elif Const.PARAMS == bench_name_list[1]: - return get_by_layer(bench_data_bundle.get(Const.PARAMS)) - elif Const.PARAMS_GRAD == bench_name_list[1]: - return get_by_layer(bench_data_bundle, params_grad=True) - else: - return None diff --git a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py index f79671827c1..71b0f29d64f 100644 --- a/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/core/compare/multiprocessing_compute.py @@ -25,7 +25,7 @@ from msprobe.core.common.utils import CompareException from msprobe.core.common.const import CompareConst -def _handle_multi_process(func, input_parma, result_df, lock): +def _handle_multi_process(func, input_param, result_df, lock): process_num = max(int((multiprocessing.cpu_count() + 1) // 4), 1) op_name_mapping_dict = read_dump_data(result_df) @@ -55,7 +55,7 @@ def _handle_multi_process(func, input_parma, result_df, lock): idx = df_chunk_size * process_idx chunk_size = len(df_chunk) result = pool.apply_async(func, - args=(idx, op_name_mapping_dict, df_chunk, lock, input_parma), + args=(idx, op_name_mapping_dict, df_chunk, lock, input_param), error_callback=err_call, callback=partial(update_progress, chunk_size, lock) ) @@ -97,12 +97,12 @@ def _ms_graph_handle_multi_process(func, result_df, mode): def read_dump_data(result_df): try: npu_dump_name_list = result_df.iloc[0:, 0].tolist() - npu_dump_tensor_list = result_df.iloc[0:, -1].tolist() + dump_tensor_pair_list = result_df.iloc[0:, -1].tolist() op_name_mapping_dict = {} for index, _ in enumerate(npu_dump_name_list): npu_dump_name = npu_dump_name_list[index] - npu_dump_tensor = npu_dump_tensor_list[index] - op_name_mapping_dict[npu_dump_name] = [npu_dump_tensor, npu_dump_tensor] + dump_tensor_pair = dump_tensor_pair_list[index] + op_name_mapping_dict[npu_dump_name] = dump_tensor_pair return op_name_mapping_dict except ValueError as e: logger.error('result dataframe is not found.') diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 72b75ab254e..8656daf7cae 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -321,8 +321,8 @@ def get_accuracy(result, n_dict, b_dict, dump_mode): has_stack = npu_stack_info and bench_stack_info if dump_mode == Const.ALL: - npu_data_name = n_dict.get("data_name", None) - bench_data_name = b_dict.get("data_name", None) + npu_data_name_list = n_dict.get("data_name", None) + bench_data_name_list = b_dict.get("data_name", None) for index in range(min_len): n_name = safe_get_value(n_dict, n_start + index, "n_dict", key="op_name") @@ -353,7 +353,9 @@ def get_accuracy(result, n_dict, b_dict, dump_mode): result_item.append(err_msg) result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) if dump_mode == Const.ALL: - result_item.append(safe_get_value(npu_data_name, n_start + index, "npu_data_name")) + npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") + bench_data_name = safe_get_value(bench_data_name_list, n_start + index, "bench_data_name_list") + result_item.append([npu_data_name, bench_data_name]) result.append(result_item) @@ -388,7 +390,9 @@ def get_accuracy(result, n_dict, b_dict, dump_mode): result_item.append(err_msg) result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) if dump_mode == Const.ALL: - result_item.append(safe_get_value(npu_data_name, n_start + index, "npu_data_name")) + npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") + bench_data_name = safe_get_value(bench_data_name_list, n_start + index, "bench_data_name_list") + result_item.append([npu_data_name, bench_data_name]) result.append(result_item) @@ -467,7 +471,7 @@ def get_un_match_accuracy(result, n_dict, dump_mode): result_item.append(err_msg) append_stack_info(result_item, npu_stack_info, index) if dump_mode == Const.ALL and result_item[1] == CompareConst.N_A: - result_item.extend(["-1"]) + result_item.extend([["-1", "-1"]]) result.append(result_item) diff --git a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md index a5f83d8dfcb..6f886215b0a 100644 --- a/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/10.accuracy_compare_PyTorch.md @@ -257,11 +257,11 @@ PyTorch 精度比对是以 CPU 或 GPU 的计算结果为标杆,通过计算 统计量有 4 种:最大值(max)、最小值(min)、平均值(mean)和 L2-范数(L2 norm)。 -|dump 数据模式|Cosine (tensor 余弦相似度)|EucDist (tensor 欧式距离)|MaxAbsErr (tensor 最大绝对误差)|MaxRelativeErr (tensor 最大相对误差)|One Thousandth Err Ratio (tensor 相对误差小于千分之一的比例)|Five Thousandth Err Ratio (tensor 相对误差小于千分之五的比例)|NPU 和 bench 的统计量绝对误差 (max, min, mean, L2 norm) diff| NPU 和 bench 的统计量相对误差 (max, min, mean, L2 norm) RelativeErr |NPU 和 bench 的统计量 (max, min, mean, L2 norm)|NPU MD5 (NPU 数据 CRC-32 值)|BENCH MD5 (bench 数据 CRC-32 值)|Result (比对结果)|Accuracy Reached or Not (计算精度是否达标)|Err_message (错误信息提示)|NPU_Stack_Info (堆栈信息)|Data_Name (NPU 真实数据名)| -|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -|真实数据模式|√|√|√|√|√|√|||√||||√|√|√|√| -|统计数据模式|||||||√|√|√|||√||√|√|| -|MD5 模式||||||||||√|√|√|||√|| +|dump 数据模式|Cosine (tensor 余弦相似度)|EucDist (tensor 欧式距离)|MaxAbsErr (tensor 最大绝对误差)|MaxRelativeErr (tensor 最大相对误差)|One Thousandth Err Ratio (tensor 相对误差小于千分之一的比例)|Five Thousandth Err Ratio (tensor 相对误差小于千分之五的比例)|NPU 和 bench 的统计量绝对误差 (max, min, mean, L2 norm) diff| NPU 和 bench 的统计量相对误差 (max, min, mean, L2 norm) RelativeErr |NPU 和 bench 的统计量 (max, min, mean, L2 norm)|NPU MD5 (NPU 数据 CRC-32 值)|BENCH MD5 (bench 数据 CRC-32 值)|Result (比对结果)|Accuracy Reached or Not (计算精度是否达标)|Err_message (错误信息提示)|NPU_Stack_Info (堆栈信息)| Data_Name ([NPU真实数据名,Bench真实数据名]) | +|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---------------------------------:| +|真实数据模式|√|√|√|√|√|√|||√||||√|√|√| √ | +|统计数据模式|||||||√|√|√|||√||√|√| | +|MD5 模式||||||||||√|√|√|||√| | 上表中NPU_Stack_Info字段需要配置-s参数生成。 diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 9f1523c03aa..c3767abf871 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -78,6 +78,11 @@ class MSComparator(Comparator): raise TypeError(f"The type of parameter `data_mapping` must be dict, str or None, but got " f"{type(self.data_mapping)}") + @staticmethod + def process_data_name(result): + result['data_name_x'] = result.apply(lambda row: [row['data_name_x'], row['data_name_y']], axis=1) + return result + def calc_accuracy(self, result_df, header): condition_no_bench = result_df[CompareConst.BENCH_NAME] == CompareConst.N_A result_df[condition_no_bench] = result_df[condition_no_bench].fillna(CompareConst.N_A) @@ -140,6 +145,8 @@ class MSComparator(Comparator): header.append(CompareConst.STACK) if self.dump_mode == Const.ALL: header.append(CompareConst.DATA_NAME) + result = self.process_data_name(result) + result.rename(columns={'op_name_x': CompareConst.NPU_NAME, 'op_name_y': CompareConst.BENCH_NAME, 'dtype_x': CompareConst.NPU_DTYPE, @@ -170,6 +177,7 @@ class MSComparator(Comparator): result[npu_summary] = result['summary_x'].apply(set_summary).tolist() result[bench_summary] = result['summary_y'].apply(set_summary).tolist() + result_df = pd.DataFrame(columns=header) for h in header: if h in result.columns: diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py index c882e331f55..1b2f6bb2fde 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py @@ -11,7 +11,7 @@ import torch from msprobe.core.common.const import CompareConst, Const from msprobe.core.common.utils import CompareException -from msprobe.core.compare.acc_compare import Comparator, ModeConfig, get_bench_data_name +from msprobe.core.compare.acc_compare import Comparator, ModeConfig from msprobe.core.compare.highlight import find_error_rows, find_compare_result_error_rows, ApiBatch from msprobe.core.compare.utils import get_accuracy from msprobe.pytorch.compare.pt_compare import PTComparator @@ -636,11 +636,11 @@ class TestUtilsMethods(unittest.TestCase): def test_do_multi_process(self): data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], - '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', '-1']] + '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, 'Yes', '', ['-1', '-1']]] o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 1, 1, 1, 1, 1, 1, 1, 1, 'None', 'No bench data matched.', '-1']] + 1, 1, 1, 1, 1, 1, 1, 1, 'None', 'No bench data matched.', ['-1', '-1']]] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) o_result = pd.DataFrame(o_data, columns=columns) @@ -670,7 +670,7 @@ class TestUtilsMethods(unittest.TestCase): mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) pt_comparator = PTComparator(mode_config) - result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {}) + result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'No bench data matched.']) @@ -688,43 +688,23 @@ class TestUtilsMethods(unittest.TestCase): pt_comparator = PTComparator(mode_config) pt_name = '-1' - pt_path = os.path.join(base_dir, pt_name) - op_name_mapping_dict = {'Functional.linear.0.forward.input.0': [pt_path, pt_path]} + op_name_mapping_dict = {'Functional.linear.0.forward.input.0': [pt_name, pt_name]} input_param = {'npu_dump_data_dir': base_dir, 'bench_dump_data_dir': base_dir} - result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, - {'Functional.linear.0.forward': {'input_args': [ - {'data_name': 'Functional.linear.0.forward.input.0.pt'}]}}) + result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 'unsupported', f'Dump file: {pt_path} not found.']) + 'unsupported', 'No bench data matched.']) pt_name = 'Functional.linear.0.forward.input.0.pt' - pt_path = os.path.join(base_dir, pt_name) - op_name_mapping_dict = {'Functional.linear.0.forward.input.0': [pt_path, pt_path]} + op_name_mapping_dict = {'Functional.linear.0.forward.input.0': [pt_name, pt_name]} input_param = {'npu_dump_data_dir': base_dir, 'bench_dump_data_dir': base_dir} - result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, {}) + result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, ['unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', - 'unsupported', 'Bench does not have data file.']) + 'unsupported', 'Dump file: Functional.linear.0.forward.input.0.pt not found.']) generate_pt(base_dir) - result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param, - {'Functional.linear.0.forward': {'input_args': [ - {'data_name': 'Functional.linear.0.forward.input.0.pt'}]}}) + result = pt_comparator.compare_by_op(npu_op_name, bench_op_name, op_name_mapping_dict, input_param) self.assertEqual(result, [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, '']) - def test_get_bench_data_name_input(self): - bench_op_name = "Functional.linear.0.forward.input.0" - bench_data = {"Functional.linear.0.forward": {"input_args": [{"data_name": "Functional.linear.0.forward.input.0.pt"}], "input_kwargs": {}, "output": []}} - result = get_bench_data_name(bench_op_name, bench_data) - - self.assertEqual(result, "Functional.linear.0.forward.input.0.pt") - - def test_get_bench_data_name_output(self): - bench_op_name = "Functional.linear.0.forward.output.0" - bench_data = {"Functional.linear.0.forward": {"input_args": [], "input_kwargs": {}, "output": [{"data_name": "Functional.linear.0.forward.output.0.pt"}]}} - result = get_bench_data_name(bench_op_name, bench_data) - - self.assertEqual(result, "Functional.linear.0.forward.output.0.pt") - class TestComparator(unittest.TestCase): def setUp(self): diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py index 2e9a4657266..bf23f4de1da 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare_utils.py @@ -224,31 +224,31 @@ o_result_unmatch_3 = [ ['Functional.conv2d.0.forward.input.0', 'N/A', 'torch.float32', 'N/A', [1, 1, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 3.029174327850342, -2.926689624786377, -0.06619918346405029, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 'No bench data matched.', 'None', '-1'], + 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.forward.input.1', 'N/A', 'torch.float32', 'N/A', [16, 1, 5, 5], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 0.19919930398464203, -0.19974489510059357, 0.006269412115216255, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 'No bench data matched.', 'None', '-1'], + 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.forward.input.2', 'N/A', 'torch.float32', 'N/A', [16], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 0.19734230637550354, -0.18177609145641327, 0.007903944700956345, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 'No bench data matched.', 'None', '-1'], + 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.forward.parameters.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.forward.parameters.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.forward.output.0', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 2.1166646480560303, -2.190781354904175, -0.003579073818400502, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 'No bench data matched.', 'None', '-1'], + 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.parameters_grad.weight', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'], + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', ['-1', '-1']], ['Functional.conv2d.0.parameters_grad.bias', 'N/A', 'torch.float32', 'N/A', [1, 16, 28, 28], 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', - 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', '-1'] + 1.0, 1.0, 1.0, 1.0, 'N/A', 'N/A', 'N/A', 'N/A', 'N/A', 'No bench data matched.', 'None', ['-1', '-1']] ] # test_merge_tensor diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py index 3fa16b0d9d4..49f084ce07c 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_cmp_multiprocessing_compute.py @@ -18,12 +18,12 @@ data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.inp 'torch.float32', 'torch.float32', [2, 2], [2, 2], '', '', '', '', '', '', 1, 1, 1, 1, 1, 1, 1, 1, - 'Yes', '', '-1']] + 'Yes', '', ['-1', '-1']]] o_data = [['Functional.linear.0.forward.input.0', 'Functional.linear.0.forward.input.0', 'torch.float32', 'torch.float32', [2, 2], [2, 2], 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 'unsupported', 1, 1, 1, 1, 1, 1, 1, 1, - 'None', 'No bench data matched.', '-1']] + 'None', 'No bench data matched.', ['-1', '-1']]] columns = CompareConst.COMPARE_RESULT_HEADER + ['Data_name'] result_df = pd.DataFrame(data, columns=columns) o_result = pd.DataFrame(o_data, columns=columns) @@ -54,9 +54,9 @@ class TestUtilsMethods(unittest.TestCase): func = Comparator(mode_config).compare_ops generate_dump_json(base_dir) - input_parma = {'bench_json_path': os.path.join(base_dir, 'dump.json')} + input_param = {'bench_json_path': os.path.join(base_dir, 'dump.json')} lock = multiprocessing.Manager().RLock() - result = _handle_multi_process(func, input_parma, result_df, lock) + result = _handle_multi_process(func, input_param, result_df, lock) self.assertTrue(result.equals(o_result)) def test_read_dump_data(self): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py index 035fe0c53a4..6f737789400 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/compare/test_ms_compare.py @@ -8,6 +8,7 @@ import unittest from unittest.mock import patch import numpy as np +import pandas as pd import torch import yaml @@ -534,4 +535,28 @@ class TestUtilsMethods(unittest.TestCase): api_list = ["Mint"] with self.assertRaises(CompareException): - ms_comparator.get_api_name(api_list) \ No newline at end of file + ms_comparator.get_api_name(api_list) + + def test_process_data_name(self): + stack_mode = True + auto_analyze = True + fuzzy_match = False + dump_mode = Const.ALL + + mode_config = ModeConfig(stack_mode, auto_analyze, fuzzy_match, dump_mode) + mapping_config = MappingConfig() + ms_comparator = MSComparator(mode_config, mapping_config) + + data = pd.DataFrame({ + 'data_name_x': ['A', 'B', 'C'], + 'data_name_y': ['X', 'Y', 'Z'] + }) + + result = ms_comparator.process_data_name(data.copy()) + + expected = pd.DataFrame({ + 'data_name_x': [['A', 'X'], ['B', 'Y'], ['C', 'Z']], + 'data_name_y': ['X', 'Y', 'Z'] + }) + + pd.testing.assert_frame_equal(result, expected) -- Gitee From 9bd9e4df473f710833345cc4e600dd055345f062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=95=E6=96=87=E6=B3=A2?= <1010589661@qq.com> Date: Tue, 4 Mar 2025 20:11:07 +0800 Subject: [PATCH 207/333] =?UTF-8?q?=E3=80=90=E5=BC=80=E5=8F=91=E8=87=AA?= =?UTF-8?q?=E6=8F=90=E3=80=91=E3=80=90=E7=BD=91=E7=BB=9C=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E3=80=91=E8=BD=BB=E9=87=8F=E5=8C=96=E5=BF=AB=E6=85=A2=E5=8D=A1?= =?UTF-8?q?=E5=88=86=E6=9E=90=E8=83=BD=E5=8A=9B=E7=AE=80=E6=98=93=E5=8F=AF?= =?UTF-8?q?=E8=A7=86=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../recipes/cluster_display.py | 91 ++++++++++--------- .../recipes/slow_link/stats.ipynb | 10 +- 2 files changed, 52 insertions(+), 49 deletions(-) diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py index 81d68901ea9..fbf89bc4909 100644 --- a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py +++ b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py @@ -242,69 +242,72 @@ def display_stats_optional_combobox(options, display_func, args, description="Op display_func(options[0], args) -def get_ratio_data(lst, num_intervals): +def compute_quantile_intervals(lst, num_intervals): lst.sort(reverse=False) if len(lst) > num_intervals: - result = [min(lst)] + [lst[int(math.ceil((i + 1) * len(lst) / num_intervals)) - 1] for i in range(num_intervals - 1)] + [max(lst)] + min_value = min(lst) + max_value = max(lst) + interval_size = len(lst) / num_intervals + result = [min_value] + for i in range(1, num_intervals): + index = int(math.ceil(i * interval_size)) - 1 + result.append(lst[index]) + result.append(max_value) else: result = lst return result[::-1] -def display_transmittime_bar(slowlinksum_df, ratio_set = 0.05, opType='hcom_allGather_', - relatedRanks=5, dataSize=1024): - slowlinksum_df_filtered = slowlinksum_df[(slowlinksum_df['opType']==opType)&(slowlinksum_df['relatedRanks']==relatedRanks)&(slowlinksum_df['dataSize']==dataSize)] - slowlinksum_df_filtered['relatedRanks'] = slowlinksum_df_filtered['relatedRanks'].apply(str) - slowlinksum_df_filtered['dataSize'] = slowlinksum_df_filtered['dataSize'].apply(str) - slowlinksum_df_filtered['opType_relatedRanks_dataSize'] = slowlinksum_df_filtered['opType']+slowlinksum_df_filtered['relatedRanks']+'_'+slowlinksum_df_filtered['dataSize'] - slowlinksum_df_filtered['transmitTime_Zscore'] = slowlinksum_df_filtered['transmitTime'].apply(lambda x: (x - slowlinksum_df_filtered['transmitTime'].mean())/slowlinksum_df_filtered['transmitTime'].std()) - num_intervals = int(1 / ratio_set) - data_dict = slowlinksum_df_filtered.groupby('opType_relatedRanks_dataSize')['transmitTime'].apply(list).to_dict() - - data_dict = {k: get_ratio_data(v, num_intervals) for k, v in data_dict.items()} +def calculate_zscore(x, mean, std): + if std != 0: + zscore = (x - mean) / std + elif x > mean: + zscore = 100 + else: + zscore = -100 + return zscore - max_length = max(len(lst) for lst in data_dict.values()) - # 使用列表推导式和切片操作一次性填充 +def process_data(df, group_cols, value_col, num_intervals): + grouped = df.groupby(group_cols)[value_col].apply(list).to_dict() + data = {k: compute_quantile_intervals(v, num_intervals) for k, v in grouped.items()} + max_len = max(len(v) for v in data.values()) data_dict = { - key: value + [None] * (max_length - len(value)) - for key, value in data_dict.items() + k: v + [np.nan] * (max_len - len(v)) + for k, v in data.items() } - # 使用sorted()函数和lambda表达式对字典的键进行排序,reverse=True表示降序排列 sorted_items = sorted(data_dict.items(), key=lambda item: item[0], reverse=True) - # 将排序后的列表转换为字典 data_dict = dict(sorted_items) - data_dealed = pd.DataFrame(data_dict) + return data_dealed - data_dealed.T.plot(kind='bar') - plt.gca().legend_.remove() - plt.show() - - data_dict_zscore = slowlinksum_df_filtered.groupby('opType_relatedRanks_dataSize')['transmitTime_Zscore'].apply(list).to_dict() - - data_dict_zscore = {k: get_ratio_data(v, num_intervals) for k, v in data_dict_zscore.items()} - max_length_zscore = max(len(lst) for lst in data_dict_zscore.values()) - - # 使用列表推导式和切片操作一次性填充 - data_dict_zscore = { - key: value + [None] * (max_length_zscore - len(value)) - for key, value in data_dict_zscore.items() - } - - # 使用sorted()函数和lambda表达式对字典的键进行排序,reverse=True表示降序排列 - sorted_items_zscore = sorted(data_dict_zscore.items(), key=lambda item: item[0], reverse=True) +def plot_data(df, title, ylabel): + ax = df.plot(kind='bar', figsize=(12, 6)) + ax.set_title(title, fontsize=14) + ax.set_xlabel('opTypeRelatedRanksDataSize', fontsize=12) + ax.set_ylabel(ylabel, fontsize=12) + ax.legend(title='Percentiles', bbox_to_anchor=(1.05, 1)) + plt.tight_layout() + plt.show() - # 将排序后的列表转换为字典 - data_dict_zscore = dict(sorted_items_zscore) - data_dealed_zscore = pd.DataFrame(data_dict_zscore) +def display_transmittime_bar(slowlinkops_df, ratio_set=0.05, optype='hcom_allGather_', + relatedranks=5, datasize=1024): + slowlinkops_df_f = slowlinkops_df[(slowlinkops_df['opType'] == optype) & + (slowlinkops_df['relatedRanks'] == relatedranks) & (slowlinkops_df['dataSize'] == datasize)] + slowlinkops_df_f['relatedRanks'] = slowlinkops_df_f['relatedRanks'].apply(str) + slowlinkops_df_f['dataSize'] = slowlinkops_df_f['dataSize'].apply(str) + slowlinkops_df_f['opTypeRelatedRanksDataSize'] = slowlinkops_df_f['opType'] + \ + slowlinkops_df_f['relatedRanks'] + '_' + slowlinkops_df_f['dataSize'] + slowlinkops_df_f['transmitTime_Zscore'] = slowlinkops_df_f['transmitTime'].apply( + lambda x: calculate_zscore(x, slowlinkops_df_f['transmitTime'].mean(), slowlinkops_df_f['transmitTime'].std())) + num_intervals = int(1 / ratio_set) - data_dealed_zscore.T.plot(kind='bar') + data_tt = process_data(slowlinkops_df_f, 'opTypeRelatedRanksDataSize', 'transmitTime', num_intervals) + data_ttzscore = process_data(slowlinkops_df_f, 'opTypeRelatedRanksDataSize', 'transmitTime_Zscore', num_intervals) - plt.gca().legend_.remove() - plt.show() - plt.clf() \ No newline at end of file + plot_data(data_tt, 'Transmit Time Distribution', 'Time (ns)') + plot_data(data_ttzscore, 'Z-Score of Transmit Time Distribution', 'Z-Score') \ No newline at end of file diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/slow_link/stats.ipynb b/profiler/msprof_analyze/cluster_analyse/recipes/slow_link/stats.ipynb index 9672f62e557..30edbc24537 100644 --- a/profiler/msprof_analyze/cluster_analyse/recipes/slow_link/stats.ipynb +++ b/profiler/msprof_analyze/cluster_analyse/recipes/slow_link/stats.ipynb @@ -39,8 +39,8 @@ "\n", "import cluster_display\n", "\n", - "slow_link_sum_df = pd.read_csv(\"slow_link_sum.csv\")\n", - "slow_link_ops_df = pd.read_csv(\"slow_link_ops.csv\", index_col=\"opTypeRelatedRanksDataSize\")" + "slow_link_ops_df = pd.read_csv(\"slow_link_ops.csv\")\n", + "slow_link_sum_df = pd.read_csv(\"slow_link_sum.csv\", index_col=\"opTypeRelatedRanksDataSize\")" ] }, { @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "cluster_display.display_transmittime_bar(slow_link_sum_df)" + "cluster_display.display_transmittime_bar(slow_link_ops_df, 0.05, 'hcom_allGather_', 5, 1024)" ] }, { @@ -82,8 +82,8 @@ "metadata": {}, "outputs": [], "source": [ - "display(slow_link_ops_df)\n", - "fig_slow_link_ops = cluster_display.display_duration_boxplots(None, slow_link_ops_df, x_title=\"opTypeRelatedRanksDataSize\")" + "display(slow_link_sum_df)\n", + "fig_slow_link_ops = cluster_display.display_duration_boxplots(None, slow_link_sum_df, x_title=\"opTypeRelatedRanksDataSize\")" ] } ], -- Gitee From 19e3b1f36ad3efda3e127a0049997f81fe916b6f Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 4 Mar 2025 19:48:05 +0800 Subject: [PATCH 208/333] compare dtype match check bugfix compare dtype match check bugfix --- .../msprobe/core/common/const.py | 11 ++++------- .../msprobe/core/compare/check.py | 8 ++------ .../msprobe/mindspore/compare/ms_compare.py | 18 +++++++++--------- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 27dc231c75c..ce72b22d63b 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -364,13 +364,10 @@ class CompareConst: MAX_RELATIVE_ERR, MIN_RELATIVE_ERR, MEAN_RELATIVE_ERR, NORM_RELATIVE_ERR] # dtype match - MS_TYPE = [ - [Const.FLOAT16, Const.FLOAT32], [Const.FLOAT32, Const.FLOAT16], - [Const.FLOAT16, Const.BFLOAT16], [Const.BFLOAT16, Const.FLOAT16] - ] - TORCH_TYPE = [ - [Const.TORCH_FLOAT16, Const.TORCH_FLOAT32], [Const.TORCH_FLOAT32, Const.TORCH_FLOAT16], - [Const.TORCH_FLOAT16, Const.TORCH_BFLOAT16], [Const.TORCH_BFLOAT16, Const.TORCH_FLOAT16] + + DTYPE_MATCH_GROUPS = [ + {Const.FLOAT16, Const.FLOAT32, Const.BFLOAT16}, + {Const.TORCH_FLOAT16, Const.TORCH_FLOAT32, Const.TORCH_BFLOAT16} ] # read_op diff --git a/debug/accuracy_tools/msprobe/core/compare/check.py b/debug/accuracy_tools/msprobe/core/compare/check.py index 653823e20b2..9429d7ffa1a 100644 --- a/debug/accuracy_tools/msprobe/core/compare/check.py +++ b/debug/accuracy_tools/msprobe/core/compare/check.py @@ -82,12 +82,8 @@ def check_type_shape_match(npu_struct, bench_struct): f'should both be 2, please check!') raise CompareException(CompareException.INDEX_OUT_OF_BOUNDS_ERROR) from error shape_match = npu_shape == bench_shape - type_match = npu_type == bench_type - if not type_match: - if ([npu_type, bench_type] in CompareConst.MS_TYPE) or ([npu_type, bench_type] in CompareConst.TORCH_TYPE): - type_match = True - else: - type_match = False + type_match = ((npu_type == bench_type) or + any(npu_type in group and bench_type in group for group in CompareConst.DTYPE_MATCH_GROUPS)) struct_match = shape_match and type_match if not struct_match: return False diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 9f1523c03aa..afd88db7b5b 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -270,15 +270,15 @@ class MSComparator(Comparator): bench_dtype = match_result['dtype_y'] if self.cross_frame: npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) - return ((npu_dtype == bench_dtype) | - ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.FLOAT32)) | - ((npu_dtype == Const.FLOAT32) & (bench_dtype == Const.FLOAT16)) | - ((npu_dtype == Const.FLOAT16) & (bench_dtype == Const.BFLOAT16)) | - ((npu_dtype == Const.BFLOAT16) & (bench_dtype == Const.FLOAT16)) | - ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_FLOAT32)) | - ((npu_dtype == Const.TORCH_FLOAT32) & (bench_dtype == Const.TORCH_FLOAT16)) | - ((npu_dtype == Const.TORCH_FLOAT16) & (bench_dtype == Const.TORCH_BFLOAT16)) | - ((npu_dtype == Const.TORCH_BFLOAT16) & (bench_dtype == Const.TORCH_FLOAT16))) + + equal_condition = npu_dtype == bench_dtype + match_condition = ( + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[0]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[0])) | + (npu_dtype.isin(CompareConst.DTYPE_MATCH_GROUPS[1]) & bench_dtype.isin( + CompareConst.DTYPE_MATCH_GROUPS[1])) + ) + return equal_condition | match_condition match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A return self.make_result_df(match_result) -- Gitee From ec7f0e0a391d96e09eb02bb0d56671fbf0031e8d Mon Sep 17 00:00:00 2001 From: shawn_zhu1 Date: Wed, 5 Mar 2025 09:39:57 +0800 Subject: [PATCH 209/333] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/nan_analyse/analyze_dump_graph.py | 39 +++++++++++-------- .../nan_analyse/analyze_pp_partition.py | 5 +++ .../nan_analyse/pre_process_dump_data.py | 2 +- .../msprobe/pytorch/nan_analyse/utils.py | 15 ------- 4 files changed, 28 insertions(+), 33 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_dump_graph.py b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_dump_graph.py index 171bcba8cad..9a5f8020537 100644 --- a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_dump_graph.py +++ b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_dump_graph.py @@ -29,32 +29,37 @@ class NodeType(Enum): COLLECTIVE = "collective" +class EdgeType(Enum): + SEQUENTIAL = "sequential" + COMMUNICATION = "communication" + + @dataclass class Node: - id: str # Unique identifier (rank_id:api_name) + node_id: str # (rank_id:api_name) rank: int api_info: APIInfo node_type: NodeType def __hash__(self): - return hash(self.id) + return hash(self.node_id) def __eq__(self, other): - return isinstance(other, Node) and self.id == other.id + return isinstance(other, Node) and self.node_id == other.node_id def __str__(self): - return self.id + return self.node_id class Edge: - def __init__(self, src: Node, dst: Node, edge_type: str = "sequential"): + def __init__(self, src: Node, dst: Node, edge_type: EdgeType = EdgeType.SEQUENTIAL): self.src = src self.dst = dst - self.edge_type = edge_type # "sequential" or "communication" + self.edge_type = edge_type self.edge_id = self.__generate_edge_name() def __generate_edge_name(self): - return f'{self.src.id}_{self.dst.id}' + return f'{self.src.node_id}_{self.dst.node_id}' class DistributedComputeGraph: @@ -67,12 +72,12 @@ class DistributedComputeGraph: self.in_degrees: Dict[Node, int] = defaultdict(int) def add_node(self, node: Node): - self.nodes[node.id] = node + self.nodes[node.node_id] = node if not self.rank_to_nodes.get(node.rank): self.rank_to_nodes[node.rank] = [] self.rank_to_nodes[node.rank].append(node) - def add_edge(self, src: Node, dst: Node, edge_type: str = "sequential"): + def add_edge(self, src: Node, dst: Node, edge_type: EdgeType = EdgeType.SEQUENTIAL): edge = Edge(src, dst, edge_type) # 边去重 if self.edges.get(edge.edge_id): @@ -141,7 +146,7 @@ class GraphBuilder: # Step 2: Connect sequential operations within each rank for _, nodes in rank_nodes.items(): for i in range(len(nodes) - 1): - graph.add_edge(nodes[i], nodes[i + 1], "sequential") + graph.add_edge(nodes[i], nodes[i + 1], EdgeType.SEQUENTIAL) # Step 3: Connect communication operations between ranks GraphBuilder._connect_p2p_operations(graph, rank_nodes) @@ -166,7 +171,7 @@ class GraphBuilder: for candi_node in graph.get_nodes_by_rank(rank): if GraphBuilder._match_comm_ops(node, candi_node): - graph.add_edge(node, candi_node, "communication") + graph.add_edge(node, candi_node, EdgeType.COMMUNICATION) break @staticmethod @@ -186,8 +191,8 @@ class GraphBuilder: for j, node_j in enumerate(group): if i >= j: continue - graph.add_edge(node_i, node_j, "communication") - graph.add_edge(node_j, node_i, "communication") # Bidirectional for collectives + graph.add_edge(node_i, node_j, EdgeType.COMMUNICATION) + graph.add_edge(node_j, node_i, EdgeType.COMMUNICATION) # Bidirectional for collectives @staticmethod def _match_comm_ops(no1: Node, no2: Node) -> bool: @@ -270,7 +275,7 @@ class GraphTraversal: """ 获取节点的详细信息,用于调试和打印 """ - return (f"Node(id={node.id}, rank={node.rank}, call_index={node.api_info.call_index}, " + return (f"Node(id={node.node_id}, rank={node.rank}, call_index={node.api_info.call_index}, " f"type={node.node_type.value})") @staticmethod @@ -278,11 +283,11 @@ class GraphTraversal: """ 打印每一层的节点信息 """ - logger.info("\n层次遍历结果:") + logger.info("Level visit results:") for i, level in enumerate(levels): - logger.info(f"\n层级 {i}:") + logger.info(f"level {i}:") for node in level: - logger.info(f" {GraphTraversal.get_node_info(node)}") + logger.info(f"node: {GraphTraversal.get_node_info(node)}") @staticmethod def print_cycles_info(cycles: Set[Tuple[Node, Node]]): diff --git a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py index ac2b812922b..a85b2dfaf69 100644 --- a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py +++ b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py @@ -21,6 +21,8 @@ from msprobe.pytorch.nan_analyse.pre_process_dump_data import process_on_all_ran from msprobe.pytorch.nan_analyse.utils import singleton +MAX_RECURSIVE_DEPTH = 100 + def __is_send_op(op_name: str) -> bool: if op_name.startswith('Distributed.') and 'send.' in op_name: return True @@ -115,6 +117,9 @@ class PPAnalyzer: visited = set() def dfs(rank_id: int, stage: int): + if stage >= MAX_RECURSIVE_DEPTH: + raise ValueError("Recursive depth exceeds the limit") + if rank_id in visited: return visited.add(rank_id) diff --git a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/pre_process_dump_data.py b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/pre_process_dump_data.py index 305e01a8282..73815f8b922 100644 --- a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/pre_process_dump_data.py +++ b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/pre_process_dump_data.py @@ -19,7 +19,7 @@ from collections import OrderedDict from msprobe.core.common.const import Const from msprobe.core.common.log import logger -from msprobe.pytorch.nan_analyse.utils import load_json +from msprobe.core.common.file_utils import load_json from msprobe.pytorch.nan_analyse.api_info import APIInfo, AnomalyDetector diff --git a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/utils.py b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/utils.py index 3b068a35362..2eb54dc488c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/utils.py @@ -13,12 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import hashlib -import json from typing import Any -import fcntl - -from msprobe.core.common.file_check import FileOpen CHECK_FIELDS = ['Max', 'Min', 'Mean'] OVERFLOW_VALUES = ['inf', '-inf', 'nan'] @@ -52,14 +48,3 @@ def generate_hash(input_string): sha256_hash = hashlib.sha256() sha256_hash.update(input_string.encode('utf-8')) return sha256_hash.hexdigest() - - -def load_json(json_path): - try: - with FileOpen(json_path, "r") as f: - fcntl.flock(f, fcntl.LOCK_SH) - data = json.load(f) - fcntl.flock(f, fcntl.LOCK_UN) - except Exception as e: - raise RuntimeError(f"Load json file {json_path} failed.") from e - return data -- Gitee From 1e24cb472102902458a501f0d93385fc261b10a8 Mon Sep 17 00:00:00 2001 From: shawn_zhu1 Date: Wed, 5 Mar 2025 09:58:34 +0800 Subject: [PATCH 210/333] =?UTF-8?q?codecheck=20=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/pytorch/nan_analyse/analyze_pp_partition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py index a85b2dfaf69..59e6952ce6a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py +++ b/debug/accuracy_tools/msprobe/pytorch/nan_analyse/analyze_pp_partition.py @@ -23,6 +23,7 @@ from msprobe.pytorch.nan_analyse.utils import singleton MAX_RECURSIVE_DEPTH = 100 + def __is_send_op(op_name: str) -> bool: if op_name.startswith('Distributed.') and 'send.' in op_name: return True -- Gitee From 15aadcb56364e7953513546226aae30b2fdbd1c8 Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 5 Mar 2025 10:05:07 +0800 Subject: [PATCH 211/333] add hccl deterministic --- .../api_accuracy_checker/run_ut/run_distributed_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 24c8303cd52..2c6793d801b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -21,7 +21,6 @@ import time import argparse from collections import namedtuple -import tqdm import torch import torch_npu import torch.distributed as dist @@ -43,6 +42,7 @@ from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_ from msprobe.pytorch.api_accuracy_checker.common.config import CheckerConfig +os.environ['HCCL_DETERMINISTIC'] = str(True) current_time = time.strftime("%Y%m%d%H%M%S") RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" RESULT_CSV_HEADER = [['API_NAME', 'RANK', 'COMPARE_RESULT', 'MESSAGE']] -- Gitee From 7e6cd45cf5bc26111563d419fa290c1fafd2fb7c Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Tue, 4 Mar 2025 19:11:35 +0800 Subject: [PATCH 212/333] =?UTF-8?q?=E4=BF=AE=E5=A4=8DMindspore=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=AF=BC=E8=87=B4=E7=9A=84=5Ftensordump=5Fset=5Fstep?= =?UTF-8?q?=E5=AF=BC=E5=85=A5=E5=A4=B1=E8=B4=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mindspore/debugger/debugger_config.py | 4 + .../mindspore/dump/cell_dump_process.py | 5 +- .../mindspore/dump/dump_tool_factory.py | 2 +- .../mindspore/dump/graph_mode_cell_dump.py | 15 +- .../msprobe/mindspore/task_handler_factory.py | 2 +- .../debugger/test_graph_cell_dump.py | 309 ------------------ .../debugger/test_ms_precision_debugger.py | 7 + .../mindspore_ut/test_dump_tool_factory.py | 2 +- 8 files changed, 31 insertions(+), 315 deletions(-) delete mode 100644 debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py index 92155b4ec4e..558df954326 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py @@ -42,6 +42,10 @@ class DebuggerConfig: self.framework = Const.MS_FRAMEWORK self.summary_mode = task_config.summary_mode self.async_dump = common_config.async_dump if common_config.async_dump else False + if hasattr(task_config, 'td_config_path'): + self.td_config_path = "" if not task_config.td_config_path else task_config.td_config_path + else: + self.td_config_path = "" self.check() create_directory(self.dump_path) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py index a9121e14354..2e349b05e41 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py @@ -36,7 +36,10 @@ construct = {} cell_list = [] KEY_SIDE_EFFECT = "side_effect_io" td = ops.TensorDump() -td_in = ops.TensorDump("in") +if (ms.__version__ >= "2.5.0"): + td_in = ops.TensorDump("in") +else: + td_in = ops.TensorDump() td.add_prim_attr(KEY_SIDE_EFFECT, False) td_in.add_prim_attr(KEY_SIDE_EFFECT, False) np_ms_dtype_dict = { diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py index c0933d20aaa..66fa892599f 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/dump_tool_factory.py @@ -40,7 +40,7 @@ class DumpToolFactory: } @staticmethod - def create(config: DebuggerConfig, model): + def create(config: DebuggerConfig, model=None): if config.level == Const.CELL: if len(config.data_mode) != 1 or config.data_mode[0] not in Const.GRAPH_CELL_DUMP_DATA_MODE_LIST: raise Exception("data_mode must be one of all, forward, backward.") diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py index e32866868f4..e35340360a3 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py @@ -16,12 +16,17 @@ import os from msprobe.mindspore.common.log import logger from msprobe.mindspore.debugger.debugger_config import DebuggerConfig import mindspore as ms -from mindspore._c_expression import _tensordump_set_step from mindspore.ops.primitive import _run_op from mindspore import hal, ops import msprobe.mindspore.dump.cell_dump_process as cellDumper from msprobe.mindspore.common.const import Const +tensordump_flag = True +try: + from mindspore._c_expression import _tensordump_set_step +except ImportError: + tensordump_flag = False + class GraphModeCellDump: def __init__(self, config: DebuggerConfig, model): @@ -62,7 +67,13 @@ class GraphModeCellDump: return True def set_step(self): - _tensordump_set_step(self.step) + if tensordump_flag: + _tensordump_set_step(self.step) + else: + raise Exception( + "Importing _tensordump_set_step failed, " + "please use the latest version package of MindSpore." + ) def handle(self): os.environ['MS_JIT_MODULES'] = 'msprobe' diff --git a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py index 5cfbbaeb4a4..10b74ea22b0 100644 --- a/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py +++ b/debug/accuracy_tools/msprobe/mindspore/task_handler_factory.py @@ -29,7 +29,7 @@ class TaskHandlerFactory: } @staticmethod - def create(config: DebuggerConfig, model): + def create(config: DebuggerConfig, model=None): task = TaskHandlerFactory.tasks.get(config.task) if not task: raise Exception("Valid task is needed.") diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py deleted file mode 100644 index b111e644378..00000000000 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_graph_cell_dump.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import re -import unittest -from unittest.mock import MagicMock, patch - -import mindspore as ms -from mindspore import ops - -from msprobe.core.common.const import Const as CoreConst -from msprobe.mindspore.dump.cell_dump_process import generate_file_path -from msprobe.mindspore.dump.cell_dump_process import partial_func, clip_gradient -from msprobe.mindspore.dump.cell_dump_process import cell_construct_wrapper -from msprobe.mindspore.dump.cell_dump_process import rename_filename, sort_filenames, del_same_file -from msprobe.mindspore.dump.cell_dump_process import check_relation - - -class TestGenerateFilePath(unittest.TestCase): - def setUp(self): - self.dump_path = "/path" - self.cell_prefix = "Cell.network._backbone.LlamaForCausalLM" - self.suffix = "forward" - self.io_type = "input" - self.index = 0 - - def test_generate_file_path(self): - expected_path = os.path.join( - self.dump_path, - "{step}", - "{rank}", - CoreConst.DUMP_TENSOR_DATA, - CoreConst.SEP.join([self.cell_prefix, self.suffix, self.io_type, str(self.index)]) - ) - result = generate_file_path(self.dump_path, self.cell_prefix, self.suffix, self.io_type, self.index) - self.assertEqual(result, expected_path) - - -class TestPartialFunc(unittest.TestCase): - - @patch('msprobe.mindspore.dump.cell_dump_process.CoreConst') - @patch('msprobe.mindspore.dump.cell_dump_process.td') - @patch('msprobe.mindspore.dump.cell_dump_process.td_in') - @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') - @patch('msprobe.mindspore.dump.cell_dump_process.ops.depend') - def test_clip_gradient_output(self, mock_depend, mock_generate_file_path, mock_td_in, mock_td, mock_CoreConst): - mock_CoreConst.OUTPUT = "output" - mock_CoreConst.BACKWARD = "backward" - mock_generate_file_path.return_value = "mock_path" - mock_td.return_value = "temp_tensor" - mock_depend.return_value = "dependent_tensor" - - result = clip_gradient("dump_path", "cell_prefix", 0, "output", "dx") - - mock_generate_file_path.assert_called_with("dump_path", "cell_prefix", "backward", "output", 0) - mock_td.assert_called_with("mock_path", "dx") - mock_depend.assert_called_with("dx", "temp_tensor") - self.assertEqual(result, "dependent_tensor") - - @patch('msprobe.mindspore.dump.cell_dump_process.CoreConst') - @patch('msprobe.mindspore.dump.cell_dump_process.td') - @patch('msprobe.mindspore.dump.cell_dump_process.td_in') - @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') - @patch('msprobe.mindspore.dump.cell_dump_process.ops.depend') - def test_clip_gradient_input(self, mock_depend, mock_generate_file_path, mock_td_in, mock_td, mock_CoreConst): - mock_CoreConst.INPUT = "input" - mock_CoreConst.BACKWARD = "backward" - mock_generate_file_path.return_value = "mock_path" - mock_td_in.return_value = "temp_tensor" - mock_depend.return_value = "dependent_tensor" - - result = clip_gradient("dump_path", "cell_prefix", 0, "input", "dx") - - mock_generate_file_path.assert_called_with("dump_path", "cell_prefix", "backward", "input", 0) - mock_td_in.assert_called_with("mock_path", "dx") - mock_depend.assert_called_with("dx", "temp_tensor") - self.assertEqual(result, "dependent_tensor") - - def test_partial_func(self): - def mock_func(dump_path, cell_prefix, index, io_type, *args, **kwargs): - return dump_path, cell_prefix, index, io_type, args, kwargs - - new_func = partial_func(mock_func, "dump_path", "cell_prefix", 0, "io_type") - result = new_func("arg1", "arg2", kwarg1="value1") - - self.assertEqual(result, ("dump_path", "cell_prefix", 0, "io_type", ("arg1", "arg2"), {'kwarg1': 'value1'})) - - -class TestCellWrapperProcess(unittest.TestCase): - - @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') - @patch('msprobe.mindspore.dump.cell_dump_process.td') - @patch('msprobe.mindspore.dump.cell_dump_process.td_in') - def test_cell_construct_wrapper(self, mock_td_in, mock_td, mock_generate_file_path): - # Mock the generate_file_path function - mock_generate_file_path.return_value = "mock_path" - - # Mock the TensorDump operations - mock_td.return_value = MagicMock() - mock_td_in.return_value = MagicMock() - - # Create a mock cell with necessary attributes - mock_cell = MagicMock() - mock_cell.data_mode = "all" - mock_cell.dump_path = "mock_dump_path" - mock_cell.cell_prefix = "mock_cell_prefix" - mock_cell.input_clips = [MagicMock() for _ in range(50)] - mock_cell.output_clips = [MagicMock() for _ in range(50)] - - # Define a mock function to wrap - def mock_func(*args, **kwargs): - return args - - # Wrap the mock function using cell_construct_wrapper - wrapped_func = cell_construct_wrapper(mock_func, mock_cell) - - # Create mock inputs - mock_input = ms.Tensor([1, 2, 3]) - mock_args = (mock_input,) - - # Call the wrapped function - result = wrapped_func(mock_cell, *mock_args) - - # Check if the result is as expected - self.assertEqual(result, mock_args) - - # Verify that the TensorDump operations were called - mock_td_in.assert_called() - mock_td.assert_called() - - @patch('msprobe.mindspore.dump.cell_dump_process.generate_file_path') - @patch('msprobe.mindspore.dump.cell_dump_process.td') - @patch('msprobe.mindspore.dump.cell_dump_process.td_in') - def test_cell_construct_wrapper_with_tuple_output(self, mock_td_in, mock_td, mock_generate_file_path): - # Mock the generate_file_path function - mock_generate_file_path.return_value = "mock_path" - - # Mock the TensorDump operations - mock_td.return_value = MagicMock() - mock_td_in.return_value = MagicMock() - - # Create a mock cell with necessary attributes - mock_cell = MagicMock() - mock_cell.data_mode = "all" - mock_cell.dump_path = "mock_dump_path" - mock_cell.cell_prefix = "mock_cell_prefix" - mock_cell.input_clips = [MagicMock() for _ in range(50)] - mock_cell.output_clips = [MagicMock() for _ in range(50)] - - # Define a mock function to wrap - def mock_func(*args, **kwargs): - return (args[0], args[0]) - - # Wrap the mock function using cell_construct_wrapper - wrapped_func = cell_construct_wrapper(mock_func, mock_cell) - - # Create mock inputs - mock_input = ms.Tensor([1, 2, 3]) - mock_args = (mock_input,) - - # Call the wrapped function - result = wrapped_func(mock_cell, *mock_args) - - # Check if the result is as expected - self.assertEqual(result, (mock_input, mock_input)) - - # Verify that the TensorDump operations were called - mock_td_in.assert_called() - mock_td.assert_called() - - -class TestSortFilenames(unittest.TestCase): - - @patch('os.listdir') - def test_sort_filenames(self, mock_listdir): - # Mock the list of filenames returned by os.listdir - mock_listdir.return_value = [ - 'Cell.network._backbone.model.LlamaModel.backward.0.input.0_float16_177.npy', - 'Cell.network._backbone.model.LlamaModel.forward.0.input.0_in_int32_1.npy', - 'Cell.network._backbone.model.LlamaModel.forward.0.output.10_float16_165.npy', - 'Cell.network._backbone.model.norm_out.LlamaRMSNorm.backward.0.input.0_float16_178.npy' - ] - - # Mock the CoreConst values - CoreConst.REPLACEMENT_CHARACTER = '_' - CoreConst.NUMPY_SUFFIX = '.npy' - - # Expected sorted filenames - expected_sorted_filenames = [ - 'Cell.network._backbone.model.LlamaModel.forward.0.input.0_in_int32_1.npy', - 'Cell.network._backbone.model.LlamaModel.forward.0.output.10_float16_165.npy', - 'Cell.network._backbone.model.LlamaModel.backward.0.input.0_float16_177.npy', - 'Cell.network._backbone.model.norm_out.LlamaRMSNorm.backward.0.input.0_float16_178.npy' - ] - - # Call the function - sorted_filenames = sort_filenames('/mock/path') - - # Assert the filenames are sorted correctly - self.assertEqual(sorted_filenames, expected_sorted_filenames) - - -class TestRenameFilename(unittest.TestCase): - - @patch('msprobe.mindspore.dump.cell_dump_process.sort_filenames') - @patch('msprobe.mindspore.dump.cell_dump_process.del_same_file') - @patch('msprobe.mindspore.dump.cell_dump_process.os.rename') - def test_rename_filename(self, mock_rename, mock_del_same_file, mock_sort_filenames): - # Mock the constants - CoreConst.REPLACEMENT_CHARACTER = '_' - CoreConst.FORWARD_PATTERN = '.forward.' - CoreConst.BACKWARD_PATTERN = '.backward.' - CoreConst.SEP = '.' - - # Mock the filenames - mock_sort_filenames.return_value = [ - "Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_101.npy", - "Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_102.npy", - "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.0_float32_103.npy", - "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.1_bool_104.npy", - "Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.output.1_bool_105.npy", - "Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_111.npy", - "Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_112.npy", - ] - mock_del_same_file.return_value = [mock_sort_filenames.return_value] - - # Call the function - rename_filename('/mock/path') - - # Check if os.rename was called with the correct arguments - mock_rename.assert_any_call( - '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_101.npy', - '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.0.input_0_int32_101.npy' - ) - mock_rename.assert_any_call( - '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_102.npy', - '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.0.output_0_float32_102.npy' - ) - mock_rename.assert_any_call( - '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.0_float32_103.npy', - '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.input_0_float32_103.npy' - ) - mock_rename.assert_any_call( - '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.input.1_bool_104.npy', - '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.input_1_bool_104.npy' - ) - mock_rename.assert_any_call( - '/mock/path/Cell.loss_scaling_manager.DynamicLossScaleUpdateCell.backward.output.1_bool_105.npy', - '/mock/path/Cell_loss_scaling_manager_DynamicLossScaleUpdateCell.backward.0.output_1_bool_105.npy' - ) - mock_rename.assert_any_call( - '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.input.0_int32_111.npy', - '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.1.input_0_int32_111.npy' - ) - mock_rename.assert_any_call( - '/mock/path/Cell.learning_rate.CosineWithWarmUpLR.forward.output.0_float32_112.npy', - '/mock/path/Cell_learning_rate_CosineWithWarmUpLR.forward.1.output_0_float32_112.npy' - ) - - # Mock the filenames - mock_sort_filenames.return_value = [] - mock_del_same_file.return_value = [] - - # Call the function - rename_filename('/mock/path') - - # Check if os.rename was not called - mock_rename.assert_not_called() - - -class TestCheckRelation(unittest.TestCase): - - def setUp(self): - CoreConst.SEP = '.' - global KEY_LAYERS - KEY_LAYERS = "layers" - - def test_direct_parent_child_relation(self): - self.assertTrue(check_relation("network._backbone", "network")) - self.assertTrue(check_relation("network._backbone.model", "network._backbone")) - - def test_no_relation(self): - self.assertFalse(check_relation("network._backbone", "network.loss")) - self.assertFalse(check_relation("network._backbone.model", "network.loss")) - - def test_layer_pattern_relation(self): - self.assertTrue(check_relation("network.model.layers.0", "network.model")) - self.assertTrue(check_relation("network._backbone.model.layers.1", "network._backbone.model")) - - def test_no_layer_pattern_relation(self): - self.assertFalse(check_relation("network.model.layers.0", "network.loss")) - self.assertFalse(check_relation("network._backbone.model.layers.1", "network._backbone.model.layers")) - - def test_edge_cases(self): - self.assertFalse(check_relation("", "network")) - self.assertFalse(check_relation("network.layer1", "")) - self.assertFalse(check_relation("", "")) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py index 066ff537ce6..00b7419e078 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py @@ -94,10 +94,17 @@ class TestPrecisionDebugger(unittest.TestCase): self.assertTrue(Handler.called) def test_stop_step(self): + class MockConfig: + def __init__(self): + self.execution_mode = None + self.level = None + class MockPrecisionDebugger: def __init__(self): self.task = Const.TENSOR self.service = None + self.config = MockConfig() + PrecisionDebugger._instance = None with self.assertRaises(Exception) as context: PrecisionDebugger.stop() diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py index 8f5d207c419..9abd7a56853 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_dump_tool_factory.py @@ -63,7 +63,7 @@ class TestDumpToolFactory(TestCase): config.level = Const.CELL with self.assertRaises(Exception) as context: DumpToolFactory.create(config) - self.assertEqual(str(context.exception), "Data dump is not supported in graph_ge mode when dump level is cell.") + self.assertEqual(str(context.exception), "The model is empty and cell dump is not enabled.") config.execution_mode = Const.GRAPH_KBYK_MODE config.level = Const.KERNEL -- Gitee From 2e20dd7ac458d9af0ba85d0e55cf59cb91827f2a Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Wed, 5 Mar 2025 10:29:10 +0800 Subject: [PATCH 213/333] add MsprobeInitStep class --- .../msprobe/docs/06.data_dump_MindSpore.md | 19 +++++---- .../msprobe/mindspore/__init__.py | 4 +- .../msprobe/mindspore/common/utils.py | 40 ++++++++++++------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index ba76836cb5d..19109d0dae9 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -148,9 +148,7 @@ save(variable, name, save_backward=True) | name | 指定的名称 | str | 是 | | save_backward | 是否保存反向数据 | boolean | 否 | - - -### 6.2 msprobe.mindspore.common.utils.MsprobeStep +### 6.2 msprobe.mindspore.MsprobeStep **功能说明**:MindSpore Callback类,自动在每个step开始时调用start()接口,在每个step结束时调用stop()、step()接口。实现使用 Model 高阶 API 的动态图场景下 L0、L1、mix 级别的精度数据采集控制,控制粒度为单个 **Step** ,而 PrecisionDebugger.start, PrecisionDebugger.stop 接口的控制粒度任意训练代码段。 @@ -164,7 +162,17 @@ MsprobeStep(debugger) 1. debugger:PrecisionDebugger对象。 -### 6.3 msprobe.mindspore.seed_all +### 6.3 msprobe.mindspore.MsprobeInitStep + +**功能说明**:MindSpore Callback 类,自动获取并设置初始 step 值。仅适用于静态图 O0/O1 模式的断点续训场景。 + +**原型**: + +```Python +MsprobeInitStep() +``` + +### 6.4 msprobe.mindspore.seed_all **功能说明**:用于固定网络中的随机性和开启确定性计算。 @@ -181,9 +189,6 @@ seed_all(seed=1234, mode=False, rm_dropout=True) 3. rm_dropout:控制dropout失效的开关。可配置 True 或 False,默认值:True,非必选。参数示例:rm_dropout=True。该参数设置为 True 后,将会使mindspore.ops.Dropout,mindspore.ops.Dropout2D,mindspore.ops.Dropout3D,mindspore.mint.nn.Dropout和mindspore.mint.nn.functional.dropout失效,以避免因随机dropout造成的网络随机性。建议在采集mindspore数据前开启。注意:通过rm_dropout控制dropout失效或生效需要在初始化Dropout实例前调用才能生效。 - - - ## 7. 示例代码 ### 7.1 静态图场景 diff --git a/debug/accuracy_tools/msprobe/mindspore/__init__.py b/debug/accuracy_tools/msprobe/mindspore/__init__.py index 089c29eb098..a519d861f57 100644 --- a/debug/accuracy_tools/msprobe/mindspore/__init__.py +++ b/debug/accuracy_tools/msprobe/mindspore/__init__.py @@ -24,5 +24,5 @@ except ImportError: logger.info("Module _msprobe_c has not been installed. L2-Dump may not work normally.") from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger -from msprobe.mindspore.common.utils import seed_all -from msprobe.mindspore.monitor.module_hook import TrainerMon \ No newline at end of file +from msprobe.mindspore.common.utils import seed_all, MsprobeStep, MsprobeInitStep +from msprobe.mindspore.monitor.module_hook import TrainerMon diff --git a/debug/accuracy_tools/msprobe/mindspore/common/utils.py b/debug/accuracy_tools/msprobe/mindspore/common/utils.py index ded3faaa22b..825b1cc6d32 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/utils.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/utils.py @@ -28,6 +28,30 @@ from msprobe.core.common.const import Const from msprobe.core.common.utils import CompareException, check_seed_all +class MsprobeStep(ms.train.Callback): + def __init__(self, debugger): + super(MsprobeStep, self).__init__() + self.debugger = debugger + + def on_train_step_begin(self, run_context): + self.debugger.start() + + def on_train_step_end(self, run_context): + self.debugger.stop() + self.debugger.step() + + +class MsprobeInitStep(ms.train.Callback): + def on_train_begin(self, run_context): + try: + from ms._c_expression import _set_init_iter + except ImportError: + logger.warning('MsprobeInitStep does not work on this version of MindSpore.') + return + cb_params = run_context.original_args() + _set_init_iter(cb_params.cur_step_num) + + def get_rank_if_initialized(): if ms.communication.GlobalComm.INITED: return ms.communication.get_rank() @@ -93,20 +117,6 @@ def seed_all(seed=1234, mode=False, rm_dropout=True): remove_dropout() -class MsprobeStep(ms.train.Callback): - - def __init__(self, debugger): - super(MsprobeStep, self).__init__() - self.debugger = debugger - - def on_train_step_begin(self, run_context): - self.debugger.start() - - def on_train_step_end(self, run_context): - self.debugger.stop() - self.debugger.step() - - class Dropout(ops.Dropout): def __init__(self, keep_prob=0.5, seed0=0, seed1=1): super().__init__(1., seed0, seed1) @@ -196,4 +206,4 @@ def check_save_param(variable, name, save_backward): logger.warning("PrecisionDebugger.save_backward name not valid, " "should be bool. " "Skip current save process.") - raise ValueError \ No newline at end of file + raise ValueError -- Gitee From 13ffabe650cf605b595af37895b6122671961635 Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 6 Mar 2025 09:44:52 +0800 Subject: [PATCH 214/333] fix review --- .../pytorch/api_accuracy_checker/run_ut/data_generate.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py index 9fd67c3d0e1..f5ee9282ebd 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py @@ -30,8 +30,7 @@ from msprobe.core.common.const import Const, FileCheckConst, CompareConst, Distr from msprobe.pytorch.hook_module.wrap_distributed import get_distributed_ops -distribute_api_set = get_distributed_ops() -distribute_api_list = list(distribute_api_set) +distribute_api_list = list(get_distributed_ops()) TORCH_TYPE = ["torch.device", "torch.dtype"] -- Gitee From e084d780f9f61880bcafa8424f921fb144d7e3b7 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Thu, 6 Mar 2025 14:45:27 +0800 Subject: [PATCH 215/333] fix --- debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md | 2 +- debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md | 2 +- debug/accuracy_tools/msprobe/mindspore/common/utils.py | 4 ++-- debug/accuracy_tools/msprobe/pytorch/common/utils.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md b/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md index 9a99ab1c2eb..e45be7736b9 100644 --- a/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md @@ -183,7 +183,7 @@ save(variable, name, save_backward=True) **参数说明**: | 参数名称 | 参数含义 | 支持数据类型 | 是否必选| | ---------- | ------------------| ------------------- | ------------------- | -| variable | 需要保存的变量 |dict, list, torch.tensor, int, float, str | 是 | +| variable | 需要保存的变量 |dict, list, tuple, torch.tensor, int, float, str | 是 | | name | 指定的名称 | str | 是 | | save_backward | 是否保存反向数据 | boolean | 否 | diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index 19109d0dae9..27eb496747e 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -144,7 +144,7 @@ save(variable, name, save_backward=True) **参数说明**: | 参数名称 | 参数含义 | 支持数据类型 | 是否必选| | ---------- | ------------------| ------------------- | ------------------- | -| variable | 需要保存的变量 |dict, list, torch.tensor, int, float, str | 是 | +| variable | 需要保存的变量 |dict, list, tuple, torch.tensor, int, float, str | 是 | | name | 指定的名称 | str | 是 | | save_backward | 是否保存反向数据 | boolean | 否 | diff --git a/debug/accuracy_tools/msprobe/mindspore/common/utils.py b/debug/accuracy_tools/msprobe/mindspore/common/utils.py index 825b1cc6d32..6da6db0f830 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/utils.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/utils.py @@ -192,9 +192,9 @@ def set_register_backward_hook_functions(): def check_save_param(variable, name, save_backward): # try catch this api to skip invalid call - if not isinstance(variable, (list, dict, ms.Tensor, int, float, str)): + if not isinstance(variable, (list, dict, tuple, ms.Tensor, int, float, str)): logger.warning("PrecisionDebugger.save variable type not valid, " - "should be one of list, dict, ms.Tensor, int, float or string. " + "should be one of list, dict, tuple, ms.Tensor, int, float or string. " "Skip current save process.") raise ValueError if not isinstance(name, str): diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index 16067f6d2be..4e82bee4a04 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -449,9 +449,9 @@ def is_recomputation(): def check_save_param(variable, name, save_backward): # try catch this api to skip invalid call - if not isinstance(variable, (list, dict, torch.Tensor, int, float, str)): + if not isinstance(variable, (list, dict, tuple, torch.Tensor, int, float, str)): logger.warning("PrecisionDebugger.save variable type not valid, " - "should be one of list, dict, torch.Tensor, int, float or string. " + "should be one of list, dict, tuple, torch.Tensor, int, float or string. " "Skip current save process.") raise ValueError if not isinstance(name, str): -- Gitee From 8e9372640dfe120ed5a1cadcc3471fe13895f2ad Mon Sep 17 00:00:00 2001 From: l30036321 Date: Wed, 5 Mar 2025 17:28:05 +0800 Subject: [PATCH 216/333] add set_init_step --- debug/accuracy_tools/msprobe/core/common/utils.py | 9 +++++++++ .../msprobe/docs/05.data_dump_PyTorch.md | 15 +++++++++++++++ .../msprobe/docs/06.data_dump_MindSpore.md | 13 +++++++++++++ .../mindspore/debugger/precision_debugger.py | 10 +++++++++- debug/accuracy_tools/msprobe/mindspore/service.py | 7 +++++-- .../pytorch/debugger/precision_debugger.py | 10 +++++++++- debug/accuracy_tools/msprobe/pytorch/service.py | 7 +++++-- .../msprobe/test/mindspore_ut/test_ms_service.py | 5 ++--- .../test/mindspore_ut/test_primitive_dump.py | 15 ++++++++------- .../msprobe/test/pytorch_ut/test_service.py | 2 +- 10 files changed, 76 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index 7ec0490168f..38fcddfaead 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -428,6 +428,15 @@ def get_real_step_or_rank(step_or_rank_input, obj): return real_step_or_rank +def check_init_step(step): + if not is_int(step): + raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, + f"{step} must be an integer") + if not step >= 0: + raise MsprobeException(MsprobeException.INVALID_PARAM_ERROR, + f"{step} must be greater than or equal to 0") + + def check_seed_all(seed, mode, rm_dropout): if is_int(seed): if seed < 0 or seed > Const.MAX_SEED_VALUE: diff --git a/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md b/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md index 9a99ab1c2eb..965f20d4dde 100644 --- a/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/05.data_dump_PyTorch.md @@ -187,6 +187,21 @@ save(variable, name, save_backward=True) | name | 指定的名称 | str | 是 | | save_backward | 是否保存反向数据 | boolean | 否 | +### 1.10 set_init_step + +**功能说明**:设置起始step数,step数默认从0开始计数,使用该接口后step从指定值开始计数。该函数需在 **start** 函数调用前使用,建议写在训练迭代的循环开始前。 + +**原型**: + +```Python +debugger.set_init_step(step) +``` + +**参数说明**: + +1.step: 指定的起始step数。 + + ## 2 示例代码 ### 2.1 快速上手 diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index ba76836cb5d..cabbf4a928c 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -148,6 +148,19 @@ save(variable, name, save_backward=True) | name | 指定的名称 | str | 是 | | save_backward | 是否保存反向数据 | boolean | 否 | +#### 6.1.6 set_init_step + +**功能说明**:设置起始step数,step数默认从0开始计数,使用该接口后step从指定值开始计数。该函数需在 **start** 函数调用前使用,建议写在训练迭代的循环开始前。 + +**原型**: + +```Python +set_init_step(step) +``` + +**参数说明**: + +1.step: 指定的起始step数。 ### 6.2 msprobe.mindspore.common.utils.MsprobeStep diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 7694d71dd98..3aa03f8f709 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -22,7 +22,7 @@ from mindspore._c_expression import MSContext from msprobe.core.common.const import Const, FileCheckConst, MsgConst from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common.file_utils import FileChecker -from msprobe.core.common.utils import get_real_step_or_rank +from msprobe.core.common.utils import get_real_step_or_rank, check_init_step from msprobe.mindspore.cell_processor import CellProcessor from msprobe.mindspore.common.const import Const as MsConst from msprobe.mindspore.common.utils import set_register_backward_hook_functions, check_save_param @@ -233,6 +233,14 @@ class PrecisionDebugger: instance.service = Service(instance.config) instance.service.save(variable, name, save_backward) + @classmethod + def set_init_step(cls, step): + instance = cls._instance + if not instance: + raise Exception(MsgConst.NOT_CREATED_INSTANCE) + check_init_step(step) + instance.service.init_step = step + @classmethod def _need_service(cls): instance = cls._instance diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 5afbd046be4..7e0f8ed24e9 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -63,6 +63,8 @@ class Service: self.inner_switch = False self.primitive_switch = False self.current_iter = 0 + self.loop = 0 + self.init_step = 0 self.first_start = True self.current_rank = None self.dump_iter_dir = None @@ -276,11 +278,12 @@ class Service: if self.config.task == Const.TENSOR: self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() - self.current_iter += 1 - self.data_collector.update_iter(self.current_iter) + self.loop += 1 self.reset_status() def start(self, model=None): + self.current_iter = self.loop + self.init_step + self.data_collector.update_iter(self.current_iter) if self.config.level == Const.LEVEL_DEBUG: return self.start_call = True diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py index 5bb1d3a14e8..a19702ff864 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -19,7 +19,7 @@ import torch from msprobe.core.common.const import Const, FileCheckConst, MsgConst from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common.file_utils import FileChecker -from msprobe.core.common.utils import get_real_step_or_rank +from msprobe.core.common.utils import get_real_step_or_rank, check_init_step from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import check_save_param from msprobe.pytorch.debugger.debugger_config import DebuggerConfig @@ -171,6 +171,14 @@ class PrecisionDebugger: except ValueError: return instance.service.save(variable, name, save_backward) + + @classmethod + def set_init_step(cls, step): + instance = cls._instance + if not instance: + raise Exception(MsgConst.NOT_CREATED_INSTANCE) + check_init_step(step) + instance.service.init_step = step def module_dump(module, dump_name): diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index fd81a7f1cf0..a8d191515c0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -50,6 +50,8 @@ class Service: self.switch = False self.inner_switch = False self.current_iter = 0 + self.loop = 0 + self.init_step = 0 self.first_start = True self.current_rank = None self.dump_iter_dir = None @@ -246,6 +248,8 @@ class Service: return HookFn(pre_forward_hook_fn, forward_hook_fn, backward_hook_fn, forward_hook_torch_version_below_2_fn) def start(self, model): + self.current_iter = self.loop + self.init_step + self.data_collector.update_iter(self.current_iter) if self.config.level == Const.LEVEL_DEBUG: return if self.need_stop_service(): @@ -304,8 +308,7 @@ class Service: if self.config.task == Const.TENSOR: self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() - self.current_iter += 1 - self.data_collector.update_iter(self.current_iter) + self.loop += 1 self.reset_status() def need_stop_service(self): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py index 912830ea1ab..baeb247649f 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py @@ -218,10 +218,9 @@ class TestService(unittest.TestCase): HOOKCell.cell_count = {"test_api": 1} JitDump.jit_count = {"test_api": 1} self.service.primitive_hook_service.primitive_counters = {"test_api": 1} - self.service.current_iter = 0 + self.service.loop = 0 self.service.step() - self.assertEqual(self.service.current_iter, 1) - self.service.data_collector.update_iter.assert_called_once_with(1) + self.assertEqual(self.service.loop, 1) self.service.data_collector.reset_status.assert_called_once() self.assertEqual(JitDump.jit_count, defaultdict(int)) self.assertEqual((self.service.primitive_hook_service.primitive_counters), {}) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_primitive_dump.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_primitive_dump.py index 3cafd49f2c1..79deeee08e1 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_primitive_dump.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_primitive_dump.py @@ -84,9 +84,9 @@ class TestService(unittest.TestCase): self.assertEqual(self.service.primitive_hook_service.primitive_counters[primitive_name], 1) def test_step_updates_iteration(self): - initial_iter = self.service.current_iter + initial_iter = self.service.loop self.service.step() - self.assertEqual(self.service.current_iter, initial_iter + 1) + self.assertEqual(self.service.loop, initial_iter + 1) @patch.object(HOOKCell, 'cell_count', new_callable=lambda: defaultdict(int)) def test_step_resets_counters(self, _): @@ -96,12 +96,13 @@ class TestService(unittest.TestCase): self.assertEqual(self.service.primitive_hook_service.primitive_counters, {}) self.assertEqual(HOOKCell.cell_count, defaultdict(int)) - def test_step_calls_update_iter(self): - # 检查是否在调用 step 时调用了 update_iter + def test_start_calls_update_iter(self): + # 检查是否在调用 start 时调用了 update_iter with patch.object(self.service.data_collector, 'update_iter') as mock_update_iter: - initial_iter = self.service.current_iter - self.service.step() - mock_update_iter.assert_called_once_with(initial_iter + 1) + initial_iter = self.service.loop + init_step = self.service.init_step + self.service.start() + mock_update_iter.assert_called_once_with(initial_iter + init_step) class TestPrimitiveHookService(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py index 6687f311105..d443433dcc8 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py @@ -67,7 +67,7 @@ class TestService(unittest.TestCase): def test_step_success(self): self.service.step() - self.assertEqual(self.service.current_iter, 1) + self.assertEqual(self.service.loop, 1) def test_step_fail(self): self.service.should_stop_service = True -- Gitee From 59067a0c0c0b1d73b5ad81137d9aa2cd6864ee89 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 6 Mar 2025 17:25:27 +0800 Subject: [PATCH 217/333] visualization fuzzy match bugifx --- debug/accuracy_tools/msprobe/core/compare/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 8656daf7cae..e93ff775e78 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -354,7 +354,7 @@ def get_accuracy(result, n_dict, b_dict, dump_mode): result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) if dump_mode == Const.ALL: npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") - bench_data_name = safe_get_value(bench_data_name_list, n_start + index, "bench_data_name_list") + bench_data_name = safe_get_value(bench_data_name_list, b_start + index, "bench_data_name_list") result_item.append([npu_data_name, bench_data_name]) result.append(result_item) @@ -373,7 +373,7 @@ def get_accuracy(result, n_dict, b_dict, dump_mode): continue result_item = [ n_name, CompareConst.NAN, n_struct[0], CompareConst.NAN, n_struct[1], CompareConst.NAN, - " ", " ", " ", " ", " " + " ", " ", " ", " ", " ", " " ] summary_data = n_dict.get(CompareConst.SUMMARY)[n_start + index] result_item.extend(summary_data) @@ -391,8 +391,7 @@ def get_accuracy(result, n_dict, b_dict, dump_mode): result_item = stack_column_process(result_item, has_stack, index, key, npu_stack_info) if dump_mode == Const.ALL: npu_data_name = safe_get_value(npu_data_name_list, n_start + index, "npu_data_name_list") - bench_data_name = safe_get_value(bench_data_name_list, n_start + index, "bench_data_name_list") - result_item.append([npu_data_name, bench_data_name]) + result_item.append([npu_data_name, "-1"]) result.append(result_item) -- Gitee From 187677a215b64d02292667a4130da599df1dc25a Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 6 Mar 2025 19:08:51 +0800 Subject: [PATCH 218/333] visualization fuzzy match bugfix --- .../msprobe/test/core_ut/compare/test_acc_compare.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py index 1b2f6bb2fde..94244be326e 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/compare/test_acc_compare.py @@ -159,16 +159,16 @@ aten_result = [ -10.640625, -0.008758544921875, 5.397906303405762, -5.796811580657959, 2.5283952709287405e-10, 'Warning', 'Need double check api accuracy.', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward.output.1', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', + ' ', ' ', ' ', ' ', ' ', ' ', 0.30550330877304077, -0.24485322833061218, -0.010361209511756897, 'Nan', 'Nan', 'Nan', 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward.output.2', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', + ' ', ' ', ' ', ' ', ' ', ' ', 623.9192504882812, 432.96826171875, 520.2276611328125, 'Nan', 'Nan', 'Nan', 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward.output.3', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', + ' ', ' ', ' ', ' ', ' ', ' ', 2.4797861576080322, -3.055997371673584, -0.04795549064874649, 'Nan', 'Nan', 'Nan', 'Yes', '', 'None'], ['Aten__native_batch_norm_legit_functional.default_0_forward.output.4', 'Nan', 'torch.float32', 'Nan', [256], 'Nan', - ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', + ' ', ' ', ' ', ' ', ' ', ' ', 61.7945556640625, 42.59713363647461, 52.03831481933594, 'Nan', 'Nan', 'Nan', 'Yes', '', 'None']] highlight_dict = {'red_rows': [], 'yellow_rows': []} -- Gitee From 1cfb95f718ff8db390a079c1196ddd39a1bd5e81 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 7 Mar 2025 11:24:55 +0800 Subject: [PATCH 219/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../compare/test_multi_mapping.py | 14 +++--- .../visualization/compare/graph_comparator.py | 45 +++++++++++++------ .../visualization/compare/multi_mapping.py | 34 ++++++++++---- 3 files changed, 65 insertions(+), 28 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py index af51ba5daff..f9ea98cd383 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py @@ -84,23 +84,23 @@ class TestMultiMapping(unittest.TestCase): graph.add_node(NodeOp.module, 'Module.layer3.Linear.forward.0', graph.root) result = MultiMapping.merge_nodes(['Module.layer1.Linear.forward.0', 'Module.layer3.Linear.forward.0'], graph) - self.assertTrue(isinstance(result, BaseNode)) - self.assertEqual(result.subnodes, [graph.get_node('Module.layer1.Linear.forward.0'), + self.assertTrue(isinstance(result.multi_node, BaseNode)) + self.assertEqual(result.multi_node.subnodes, [graph.get_node('Module.layer1.Linear.forward.0'), graph.get_node('Module.layer2.Linear.forward.0'), graph.get_node('Module.layer3.Linear.forward.0')]) - self.assertEqual(result.upnode, graph.get_node('graph')) - self.assertEqual(result.id, GraphConst.MERGE_NODES + '.forward.0') + self.assertEqual(result.multi_node.upnode, graph.get_node('graph')) + self.assertEqual(result.multi_node.id, GraphConst.MERGE_NODES + '.forward.0') result = MultiMapping.merge_nodes(['Module.layer1.Linear.forward.0'], graph) - self.assertEqual(result, graph.get_node('Module.layer1.Linear.forward.0')) + self.assertEqual(result.multi_node, graph.get_node('Module.layer1.Linear.forward.0')) result = MultiMapping.merge_nodes(['Module.layer5.Linear.forward.0', 'Module.layer6.Linear.forward.0'], graph) - self.assertIsNone(result) + self.assertIsNone(result.multi_node) result = MultiMapping.merge_nodes(['Module.layer3.Linear.forward.0', 'Module.layer1.Linear.forward.0'], graph) - self.assertIsNone(result) + self.assertIsNone(result.multi_node) def test_split_mapping_str(self): result = MultiMapping._split_mapping_str('a, b,c, d') diff --git a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py index 8002d71e174..d54cff197a7 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,6 +24,7 @@ from msprobe.core.common.const import Const, CompareConst from msprobe.core.common.log import logger from msprobe.core.common.file_utils import load_yaml from msprobe.visualization.compare.multi_mapping import MultiMapping +from msprobe.core.compare.utils import get_name_and_state class GraphComparator: @@ -202,9 +203,11 @@ class GraphComparator: continue if not MultiMapping.validate_ids_in_graph(node_b_ids, self.graph_b, GraphConst.JSON_BENCH_KEY): continue - node_n = MultiMapping.merge_nodes(node_n_ids, self.graph_n) + merged_items_n = MultiMapping.merge_nodes(node_n_ids, self.graph_n) + merged_items_b = MultiMapping.merge_nodes(node_b_ids, self.graph_b) + node_n = merged_items_n.multi_node node_n_data = self.data_n_dict - node_b = MultiMapping.merge_nodes(node_b_ids, self.graph_b) + node_b = merged_items_b.multi_node node_b_data = self.data_b_dict if node_n.op == NodeOp.multi_collection: @@ -224,9 +227,10 @@ class GraphComparator: # 准备真实数据比对接口需要的参数 full_param_name_n = compare_result[0] full_param_name_b = compare_result[1] - data_path_n = self._get_data_path(node_n, full_param_name_n) - data_path_b = self._get_data_path(node_b, full_param_name_b) - op_name_mapping_dict = {full_param_name_n: [data_path_n, data_path_b]} + + data_name_n = self._get_dump_data_name(merged_items_n, full_param_name_n) + data_name_b = self._get_dump_data_name(merged_items_b, full_param_name_b) + op_name_mapping_dict = {full_param_name_n: [data_name_n, data_name_b]} real_compare_result = run_real_data_single([full_param_name_n, full_param_name_b], op_name_mapping_dict, self.dump_path_param, @@ -247,12 +251,27 @@ class GraphComparator: self.add_compare_result_to_node(node_n, compare_result_list) @staticmethod - def _get_data_path(node, full_param_name): + def _get_dump_data_name(merged_items, full_param_name): """ - 根据节点参数名称,从节点中获取此参数的真实数据路径 - node: 节点 - full_param_name: 参数名称,例如Module.layer.Linear.forward.0.input.0 + 根据节点参数名称,从融合节点信息中获取此参数的真实数据名称 + Args: + merged_items: 融合节点信息 + full_param_name: 参数名称,例如Module.layer.Linear.forward.0.input.0 + + Returns: 真实数据名称,例如Module.layer.Linear.forward.0.input.0.pt + """ - return node.output_data.get(full_param_name, {}).get("data_name") \ - if Const.SEP + Const.OUTPUT + Const.SEP in full_param_name \ - else node.input_data.get(full_param_name, {}).get("data_name") + try: + _, state = get_name_and_state(full_param_name) + except Exception: + return "-1" + node = merged_items.multi_node + # 如果是融合节点,那么其真实数据的存盘data_name需要从融合节点的首节点和尾节点中获取 + if node.op == NodeOp.multi_collection: + return merged_items.end_node.output_data.get(full_param_name, {}).get("data_name", "-1") \ + if Const.OUTPUT == state in full_param_name \ + else merged_items.start_node.input_data.get(full_param_name, {}).get("data_name", "-1") + else: + return node.output_data.get(full_param_name, {}).get("data_name", "-1") \ + if Const.OUTPUT == state in full_param_name \ + else node.input_data.get(full_param_name, {}).get("data_name", "-1") diff --git a/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py b/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py index 2fe18eccb56..585853682a0 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py @@ -13,13 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass + from msprobe.core.common.const import Const from msprobe.core.common.log import logger from msprobe.visualization.utils import GraphConst -from msprobe.visualization.graph.graph import NodeOp +from msprobe.visualization.graph.graph import NodeOp, BaseNode from msprobe.core.compare.utils import get_name_and_state +@dataclass +class MergedItems: + multi_node: BaseNode = None + start_node: BaseNode = None + end_node: BaseNode = None + + class MultiMapping: @staticmethod @@ -67,15 +76,24 @@ class MultiMapping: @staticmethod def merge_nodes(node_ids, graph): - if not node_ids: - return None + """ + 根据传入的节点名称列表,将列表中的节点合并为一个节点,并取列表中的首节点输入数据作为融合节点的输入,尾节点的输出数据作为融合节点的输出 + Args: + node_ids: 节点名称列表 + graph: 图 + + Returns: 融合节点,首节点,尾节点 + + """ + if not node_ids or not isinstance(node_ids, (list, tuple)): + return MergedItems() if len(node_ids) == 1: - return graph.get_node(node_ids[0]) + return MergedItems(graph.get_node(node_ids[0])) # 根据映射文件中配置的首尾节点id,得到首尾节点id之间的所有节点id列表 node0 = graph.get_node(node_ids[0]) node1 = graph.get_node(node_ids[-1]) if not node0 or not node1: - return None + return MergedItems() current_node_list = node0.upnode.subnodes start_index = end_index = 0 @@ -88,7 +106,7 @@ class MultiMapping: if start_index > end_index: logger.warning(f'{node_ids[0]} and {node_ids[-1]} are in the wrong order, {node_ids[0]} should come first, ' f'and the mapping is not effective.') - return None + return MergedItems() current_node_list = current_node_list[start_index:end_index + 1] @@ -106,7 +124,7 @@ class MultiMapping: multi_node.upnode.subnodes[start_index:end_index + 1] = [multi_node] - # 给节点添加输入输出数据, parameters信息不添加, 因为多对多节点之间的shape会不一致导致无法比对 + # 给节点添加输入输出数据, parameters信息不添加, 因为多对多节点之间的parameters的shape会不一致导致无法比对 input_data = {} output_data = {} for key, value in node0.input_data.items(): @@ -117,7 +135,7 @@ class MultiMapping: multi_node.input_data = input_data multi_node.output_data = output_data - return multi_node + return MergedItems(multi_node, node0, node1) @staticmethod def _split_mapping_str(x: str): -- Gitee From efdbb091d59053b9b9e6967387210dd6e909b8ff Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 7 Mar 2025 11:26:26 +0800 Subject: [PATCH 220/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/visualization/builder/msprobe_adapter.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py index aba680f1da1..f42771ae522 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py @@ -78,18 +78,15 @@ def run_real_data_single(op_names, op_name_mapping_dict, input_param, framework, return [] mode_config = ModeConfig(stack_mode=False, auto_analyze=True, fuzzy_match=False, dump_mode=Const.ALL) set_dump_path(input_param) - bench_data = load_json(input_param.get("bench_json_path")).get('data') if framework == Const.PT_FRAMEWORK: from msprobe.pytorch.compare.pt_compare import PTComparator - return PTComparator(mode_config).compare_by_op(op_names[0], op_names[1], op_name_mapping_dict, input_param, - bench_data) + return PTComparator(mode_config).compare_by_op(op_names[0], op_names[1], op_name_mapping_dict, input_param) else: from msprobe.mindspore.compare.ms_compare import MSComparator, MappingConfig ms_comparator = MSComparator(mode_config, MappingConfig()) ms_comparator.cross_frame = is_cross_frame - return ms_comparator.compare_by_op(op_names[0], op_names[1], op_name_mapping_dict, input_param, - bench_data) + return ms_comparator.compare_by_op(op_names[0], op_names[1], op_name_mapping_dict, input_param) def get_input_output(node_data, node_id): -- Gitee From 86ed6b47d7884462da7660e4361681d9bde65fb5 Mon Sep 17 00:00:00 2001 From: fuchao <1501312275@qq.com> Date: Fri, 7 Mar 2025 17:03:01 +0800 Subject: [PATCH 221/333] =?UTF-8?q?=E9=9D=99=E6=80=81=E5=9B=BECell?= =?UTF-8?q?=E7=BA=A7Dump=E5=8A=A8=E6=80=81=E9=85=8D=E7=BD=AEcell=E7=9A=84T?= =?UTF-8?q?ensorDump=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/docs/06.data_dump_MindSpore.md | 2 + .../mindspore/dump/cell_dump_process.py | 127 +++++++++++++++--- .../mindspore/dump/graph_mode_cell_dump.py | 8 +- .../msprobe/mindspore/ms_config.py | 1 + 4 files changed, 118 insertions(+), 20 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index a79738950fa..d7ded20c572 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -212,6 +212,8 @@ seed_all(seed=1234, mode=False, rm_dropout=True) #### 7.1.1 L0 级别 +**说明**: 静态图 L0 级别的Dump功能是基于mindspore.ops.TensorDump算子实现。在Ascend平台上的Graph模式下,可以通过设置环境变量 [MS_DUMP_SLICE_SIZE 和 MS_DUMP_WAIT_TIME](https://www.mindspore.cn/docs/zh-CN/r2.5.0/api_python/env_var_list.html) 解决在输出大Tesnor或输出Tensor比较密集场景下算子执行失败的问题。 + ##### 7.1.1.1 未使用 Model 高阶 API diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py index 2e349b05e41..a77f3d4fe37 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/cell_dump_process.py @@ -25,7 +25,7 @@ from mindspore import nn, ops from msprobe.mindspore.common.log import logger from msprobe.core.common.const import Const as CoreConst -from msprobe.core.common.file_utils import load_npy, save_json, remove_path +from msprobe.core.common.file_utils import load_npy, save_json, remove_path, load_yaml from msprobe.core.common.const import FileCheckConst @@ -35,6 +35,11 @@ KEY_LAYERS = "layers" construct = {} cell_list = [] KEY_SIDE_EFFECT = "side_effect_io" +KEY_TOPLAYER = "TopLayer" +KEY_FORWARD = CoreConst.FORWARD +KEY_BACKWARD = CoreConst.BACKWARD +KEY_INPUT = CoreConst.INPUT +KEY_OUTPUT = CoreConst.OUTPUT td = ops.TensorDump() if (ms.__version__ >= "2.5.0"): td_in = ops.TensorDump("in") @@ -72,7 +77,7 @@ np_ms_dtype_dict = { } -def generate_file_path(dump_path, cell_prefix, suffix, io_type, index): +def gen_file_path(dump_path, cell_prefix, suffix, io_type, index): step_path = os.path.join(dump_path, "{step}") rank_path = os.path.join(step_path, "{rank}") data_path = os.path.join(rank_path, CoreConst.DUMP_TENSOR_DATA) @@ -87,15 +92,19 @@ def partial_func(func, dump_path, cell_prefix, index, io_type): def clip_gradient(dump_path, cell_prefix, index, io_type, dx): - if io_type == CoreConst.OUTPUT: - temp = td(generate_file_path(dump_path, cell_prefix, CoreConst.BACKWARD, io_type, index), dx) + if io_type == KEY_OUTPUT: + temp = td(gen_file_path(dump_path, cell_prefix, KEY_BACKWARD, io_type, index), dx) dx = ops.depend(dx, temp) - if io_type == CoreConst.INPUT: - temp = td_in(generate_file_path(dump_path, cell_prefix, CoreConst.BACKWARD, io_type, index), dx) + if io_type == KEY_INPUT: + temp = td_in(gen_file_path(dump_path, cell_prefix, KEY_BACKWARD, io_type, index), dx) dx = ops.depend(dx, temp) return dx +def need_tensordump_in(cell_obj, attr): + return hasattr(cell_obj, attr) and getattr(cell_obj, attr) == "in" + + def cell_construct_wrapper(func, self): def new_construct(self, *args, **kwargs): new_args = [] @@ -110,7 +119,16 @@ def cell_construct_wrapper(func, self): item = self.output_clips[index](item) if self.data_mode == "forward" or self.data_mode == "all": if ops.is_tensor(item): - temp = td_in(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.INPUT, index), item) + if need_tensordump_in(self, 'input_dump_mode'): + temp = td_in( + gen_file_path(self.dump_path, self.cell_prefix, KEY_FORWARD, KEY_INPUT, index), + item + ) + else: + temp = td( + gen_file_path(self.dump_path, self.cell_prefix, KEY_FORWARD, KEY_INPUT, index), + item + ) item = ops.depend(item, temp) new_args.append(item) @@ -124,7 +142,16 @@ def cell_construct_wrapper(func, self): item = self.input_clips[index](item) if self.data_mode == "forward" or self.data_mode == "all": if ops.is_tensor(item): - temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, index), item) + if need_tensordump_in(self, 'output_dump_mode'): + temp = td_in( + gen_file_path(self.dump_path, self.cell_prefix, KEY_FORWARD, KEY_OUTPUT, index), + item + ) + else: + temp = td( + gen_file_path(self.dump_path, self.cell_prefix, KEY_FORWARD, KEY_OUTPUT, index), + item + ) item = ops.depend(item, temp) out_list.append(item) else: @@ -136,7 +163,16 @@ def cell_construct_wrapper(func, self): out = self.input_clips[0](out) if self.data_mode == "forward" or self.data_mode == "all": if ops.is_tensor(out): - temp = td(generate_file_path(self.dump_path, self.cell_prefix, CoreConst.FORWARD, CoreConst.OUTPUT, 0), out) + if need_tensordump_in(self, 'output_dump_mode'): + temp = td_in( + gen_file_path(self.dump_path, self.cell_prefix, KEY_FORWARD, KEY_OUTPUT, 0), + out + ) + else: + temp = td( + gen_file_path(self.dump_path, self.cell_prefix, KEY_FORWARD, KEY_OUTPUT, 0), + out + ) out = ops.depend(out, temp) return out @@ -260,14 +296,14 @@ def generate_construct(path): for filename in filenames: point_position = 3 mid_field = filename.rsplit(CoreConst.SEP, point_position)[0] - if CoreConst.INPUT in filename: + if KEY_INPUT in filename: if mid_field in cell_list: cell_list.remove(mid_field) cell_list.append(mid_field) else: if mid_field not in cell_list: index = filenames.index(filename) - output_field = mid_field + CoreConst.OUTPUT + output_field = mid_field + KEY_OUTPUT find_flag = False for filename_other in cell_list[index + 1:]: if output_field in filename_other: @@ -332,10 +368,10 @@ def process_file(file_path): } # 根据文件名的最后一个部分(输入或输出)确定是添加到input_args还是output - if parts[-3] == CoreConst.INPUT: + if parts[-3] == KEY_INPUT: return op_name, CoreConst.INPUT_ARGS, tensor_json - elif parts[-3] == CoreConst.OUTPUT: - return op_name, CoreConst.OUTPUT, tensor_json + elif parts[-3] == KEY_OUTPUT: + return op_name, KEY_OUTPUT, tensor_json else: return None, None, None @@ -371,7 +407,7 @@ def generate_dump_info(path): if op_name not in dump_data.get(CoreConst.DATA, {}): dump_data.get(CoreConst.DATA, {})[op_name] = {CoreConst.INPUT_ARGS: [], CoreConst.INPUT_KWARGS: {}, - CoreConst.OUTPUT: []} + KEY_OUTPUT: []} if key not in dump_data.get(CoreConst.DATA, {}).get(op_name, {}): dump_data.get(CoreConst.DATA, {}).get(op_name, {})[key] = [] dump_data.get(CoreConst.DATA, {}).get(op_name, {}).get(key, []).append(tensor_json) @@ -463,10 +499,47 @@ def process(dump_path): logger.info(f"==========JSON file generation completed!==========") -def start(net=None, dump_path="./", data_mode=CoreConst.ALL): +def get_yaml_keys(yaml_data): + keys = [] + for key, _ in yaml_data.items(): + keys.append(key) + return keys + + +def get_tensordump_mode(input_str): + left_index = input_str.find('(') + right_index = input_str.find(')') + + # 提取括号内的字符串 + if left_index != -1 and right_index != -1: + inner_str = input_str[left_index + 1:right_index] + # 分割字符串得到元素列表 + elements = inner_str.split(',') + if len(elements) >= 2: + # 去除元素前后的空格 + first_element = elements[0].strip() + second_element = elements[1].strip() + return first_element, second_element + return None, None + + +def set_tensordump_mode(cell, input_str): + first_str, second_str = get_tensordump_mode(input_str) + if first_str and second_str: + cell.input_dump_mode = first_str + cell.output_dump_mode = second_str + + +def start(net=None, dump_path="./", data_mode=CoreConst.ALL, td_config_path=''): if net is None: return + if td_config_path == "": + yaml_data = {} + else: + yaml_data = load_yaml(td_config_path) + first_layer_key = get_yaml_keys(yaml_data) + black_list = ["grad_reducer", ""] for name, cell in net.cells_and_names(): class_name = cell.__class__.__name__ @@ -481,7 +554,19 @@ def start(net=None, dump_path="./", data_mode=CoreConst.ALL): else: #Format: Cell.{cell_name}.{class_name} cell.cell_prefix = CoreConst.SEP.join([CoreConst.CELL, name, cell.__class__.__name__]) - + + # 根据yaml配置文件设置cell的TensorDump模式 + if class_name in first_layer_key: + layer_data = yaml_data.get(class_name) + if layer_data: + for child_name, child_cell in cell.cells_and_names(): + if child_name in layer_data: + set_tensordump_mode(child_cell, layer_data[child_name]) + top_layer_data = yaml_data.get(KEY_TOPLAYER) + if top_layer_data and name in top_layer_data: + set_tensordump_mode(cell, top_layer_data[name]) + + # 替换construct函数 cell.construct = cell_construct_wrapper(cell.construct, cell) logger.info(f"Cell {name}: construct function is wrapped!") cell.dump_path = dump_path @@ -490,8 +575,12 @@ def start(net=None, dump_path="./", data_mode=CoreConst.ALL): cell.output_clips = [] # It is assumed that each cell has a maximum of 50 outputs and 50 inputs. for i in range(50): - cell.input_clips.append(ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, CoreConst.INPUT))) - cell.output_clips.append(ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, CoreConst.OUTPUT))) + cell.input_clips.append( + ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, KEY_INPUT)) + ) + cell.output_clips.append( + ops.InsertGradientOf(partial_func(clip_gradient, cell.dump_path, cell.cell_prefix, i, KEY_OUTPUT)) + ) logger.info(f"==========The cell_dump_process_start phase is Finished!==========") atexit.register(process, dump_path=dump_path) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py index e35340360a3..52e2d57af22 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/graph_mode_cell_dump.py @@ -40,6 +40,7 @@ class GraphModeCellDump: self.list = config.list self.data_mode = config.data_mode self.file_format = config.file_format + self.td_config_path = config.td_config_path self.check_config() self.set_step() @@ -77,4 +78,9 @@ class GraphModeCellDump: def handle(self): os.environ['MS_JIT_MODULES'] = 'msprobe' - cellDumper.start(net=self.net, dump_path=self.dump_path, data_mode=self.data_mode[0]) + cellDumper.start( + net=self.net, + dump_path=self.dump_path, + data_mode=self.data_mode[0], + td_config_path=self.td_config_path + ) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index f20ed804c5b..ff7fc28e76e 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -28,6 +28,7 @@ class TensorConfig(BaseConfig): super().__init__(json_config) self.check_mode = None self.file_format = json_config.get("file_format") + self.td_config_path = json_config.get("td_config_path") self.check_config() self._check_config() -- Gitee From 6d4f7a7ea607ea696f1a03fb6ec0a8dbedc1dc76 Mon Sep 17 00:00:00 2001 From: qianzhengxin Date: Fri, 7 Mar 2025 17:54:37 +0800 Subject: [PATCH 222/333] fix --- .../mindspore/debugger/precision_debugger.py | 17 ++++++++++++----- .../debugger/test_ms_precision_debugger.py | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index fd6c5ab692b..b3f8f5d77b6 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -85,7 +85,7 @@ class PrecisionDebugger: common_config.dump_path = dump_path if dump_path else common_config.dump_path self.config = DebuggerConfig(common_config, task_config) - if _msprobe_c: + if self._need_msprobe_c() and _msprobe_c: _msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path) self.config.execution_mode = self._get_execution_mode() @@ -152,7 +152,7 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception(MsgConst.NOT_CREATED_INSTANCE) - if _msprobe_c: + if cls._need_msprobe_c() and _msprobe_c: _msprobe_c._PrecisionDebugger().start() if instance.task in PrecisionDebugger.task_not_need_service: return @@ -181,7 +181,7 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception(MsgConst.NOT_CREATED_INSTANCE) - if _msprobe_c: + if cls._need_msprobe_c() and _msprobe_c: _msprobe_c._PrecisionDebugger().stop() if instance.task == Const.GRAD_PROBE: instance.gm.stop() @@ -196,7 +196,7 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception(MsgConst.NOT_CREATED_INSTANCE) - if _msprobe_c: + if cls._need_msprobe_c() and _msprobe_c: _msprobe_c._PrecisionDebugger().step() if instance.task in PrecisionDebugger.task_not_need_service: return @@ -253,4 +253,11 @@ class PrecisionDebugger: if instance.config.execution_mode != MsConst.PYNATIVE_MODE: return False else: - return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config) \ No newline at end of file + return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config) + + @classmethod + def _need_msprobe_c(cls): + instance = cls._instance + if not instance: + raise Exception(MsgConst.NOT_CREATED_INSTANCE) + return instance.config.level_ori == Const.LEVEL_L2 \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py index 00b7419e078..790a02b4048 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/debugger/test_ms_precision_debugger.py @@ -98,6 +98,7 @@ class TestPrecisionDebugger(unittest.TestCase): def __init__(self): self.execution_mode = None self.level = None + self.level_ori = Const.LEVEL_L1 class MockPrecisionDebugger: def __init__(self): -- Gitee From 50087ddaa19943527d41147bee80469661b0f708 Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Wed, 5 Mar 2025 14:48:41 +0800 Subject: [PATCH 223/333] reconstruct api_wrap code --- .../msprobe/core/common/const.py | 86 +++++++ .../msprobe/core/data_dump/api_registry.py | 175 +++++++++++++++ .../data_processor/mindspore_processor.py | 49 ++-- .../mindspore/debugger/precision_debugger.py | 6 +- .../mindspore/dump/hook_cell/api_register.py | 125 +++++++++++ .../mindspore/dump/hook_cell/api_registry.py | 207 ----------------- .../mindspore/dump/hook_cell/hook_cell.py | 19 +- .../mindspore/dump/hook_cell/wrap_api.py | 212 ------------------ .../msprobe/mindspore/dump/jit_dump.py | 13 +- .../free_benchmark/api_pynative_self_check.py | 16 +- .../msprobe/mindspore/service.py | 9 +- .../run_ut/run_ut_utils.py | 49 ++-- .../msprobe/pytorch/common/utils.py | 4 +- .../pytorch/dump/module_dump/module_dump.py | 7 +- .../pytorch/hook_module/api_register.py | 128 +++++++++++ .../pytorch/hook_module/api_registry.py | 166 -------------- .../pytorch/hook_module/hook_module.py | 25 +-- .../pytorch/hook_module/support_wrap_ops.yaml | 6 +- .../pytorch/hook_module/wrap_distributed.py | 79 ------- .../pytorch/hook_module/wrap_functional.py | 66 ------ .../pytorch/hook_module/wrap_npu_custom.py | 85 ------- .../pytorch/hook_module/wrap_tensor.py | 69 ------ .../msprobe/pytorch/hook_module/wrap_torch.py | 84 ------- .../msprobe/pytorch/hook_module/wrap_vf.py | 60 ----- .../accuracy_tools/msprobe/pytorch/service.py | 10 +- .../core_ut/data_dump/test_api_registry.py | 73 ++++++ .../test_ms_api_pynative_self_check.py | 26 ++- .../test/mindspore_ut/test_ms_service.py | 11 +- .../run_ut/test_run_ut_utils.py | 21 +- .../test/pytorch_ut/common/test_pt_utils.py | 19 +- .../test/pytorch_ut/dump/test_module_dump.py | 12 +- .../hook_module/test_api_registry.py | 130 ----------- .../hook_module/test_wrap_distributed.py | 41 ---- .../hook_module/test_wrap_functional.py | 73 ------ .../hook_module/test_wrap_npu_custom.py | 43 ---- .../hook_module/test_wrap_tensor.py | 40 ---- .../pytorch_ut/hook_module/test_wrap_torch.py | 48 ---- .../pytorch_ut/hook_module/test_wrap_vf.py | 11 - .../test/pytorch_ut/monitor/demo_model.py | 18 ++ .../test/pytorch_ut/monitor/test_csv2tb.py | 18 ++ .../pytorch_ut/monitor/test_module_hook.py | 20 +- .../msprobe/test/pytorch_ut/test_service.py | 20 +- 42 files changed, 834 insertions(+), 1545 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/core/data_dump/api_registry.py create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py delete mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py delete mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_api.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py create mode 100644 debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_api_registry.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_npu_custom.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py delete mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 27dc231c75c..a184a6f72e9 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -230,6 +230,92 @@ class Const: TENSOR_STAT_LEN = 2 + SUPPORT_API_FILE_NAME = "support_wrap_ops.yaml" + + PT_API_TYPE_FUNCTIONAL = "functional" + PT_API_TYPE_TENSOR = "tensor" + PT_API_TYPE_TORCH = "torch" + PT_API_TYPE_VF = "_VF" + PT_API_TYPE_NPU = "torch_npu" + PT_API_TYPE_ATEN = "aten" + PT_API_TYPE_DIST = "distributed" + PT_API_TYPE_NPU_DIST = "npu_distributed" + + MS_API_TYPE_OPS = "ops" + MS_API_TYPE_TENSOR = "tensor" + MS_API_TYPE_STUB_TENSOR = "stubtensor" + MS_API_TYPE_MINT = "mint.ops" + MS_API_TYPE_MINT_FUNC = "mint.nn.functional" + MS_API_TYPE_COM = "communication.comm_func" + + FUNCTIONAL_API_TYPE_PREFIX = "Functional" + TENSOR_API_TYPE_PREFIX = "Tensor" + DIST_API_TYPE_PREFIX = "Distributed" + + TORCH_API_TYPE_PREFIX = "Torch" + NPU_API_TYPE_PREFIX = "NPU" + ATEN_API_TYPE_PREFIX = "Aten" + VF_API_TYPE_PREFIX = "VF" + + MINT_API_TYPE_PREFIX = "Mint" + MINT_FUNC_API_TYPE_PREFIX = "MintFunctional" + + SUPPORT_API_DICT_KEY_MAP = { + PT_FRAMEWORK: { + PT_API_TYPE_FUNCTIONAL: PT_API_TYPE_FUNCTIONAL, + PT_API_TYPE_TENSOR: PT_API_TYPE_TENSOR, + PT_API_TYPE_TORCH: PT_API_TYPE_TORCH, + PT_API_TYPE_VF: PT_API_TYPE_VF, + PT_API_TYPE_NPU: PT_API_TYPE_NPU, + PT_API_TYPE_ATEN: PT_API_TYPE_ATEN, + PT_API_TYPE_DIST: PT_API_TYPE_DIST, + PT_API_TYPE_NPU_DIST: PT_API_TYPE_NPU_DIST + }, + MS_FRAMEWORK: { + MS_API_TYPE_OPS: MS_API_TYPE_OPS, + MS_API_TYPE_TENSOR: MS_API_TYPE_TENSOR, + MS_API_TYPE_STUB_TENSOR: MS_API_TYPE_TENSOR, + MS_API_TYPE_MINT: MS_API_TYPE_MINT, + MS_API_TYPE_MINT_FUNC: MS_API_TYPE_MINT_FUNC, + MS_API_TYPE_COM: MS_API_TYPE_COM + }, + MT_FRAMEWORK: { + PT_API_TYPE_FUNCTIONAL: PT_API_TYPE_FUNCTIONAL, + PT_API_TYPE_TENSOR: PT_API_TYPE_TENSOR, + PT_API_TYPE_TORCH: PT_API_TYPE_TORCH, + PT_API_TYPE_NPU: PT_API_TYPE_NPU, + PT_API_TYPE_DIST: PT_API_TYPE_DIST + } + } + + API_DATA_PREFIX = { + PT_FRAMEWORK: { + PT_API_TYPE_FUNCTIONAL: FUNCTIONAL_API_TYPE_PREFIX, + PT_API_TYPE_TENSOR: TENSOR_API_TYPE_PREFIX, + PT_API_TYPE_TORCH: TORCH_API_TYPE_PREFIX, + PT_API_TYPE_VF: VF_API_TYPE_PREFIX, + PT_API_TYPE_NPU: NPU_API_TYPE_PREFIX, + PT_API_TYPE_ATEN: ATEN_API_TYPE_PREFIX, + PT_API_TYPE_DIST: DIST_API_TYPE_PREFIX, + PT_API_TYPE_NPU_DIST: DIST_API_TYPE_PREFIX + }, + MS_FRAMEWORK: { + MS_API_TYPE_OPS: FUNCTIONAL_API_TYPE_PREFIX, + MS_API_TYPE_TENSOR: TENSOR_API_TYPE_PREFIX, + MS_API_TYPE_STUB_TENSOR: TENSOR_API_TYPE_PREFIX, + MS_API_TYPE_MINT: MINT_API_TYPE_PREFIX, + MS_API_TYPE_MINT_FUNC: MINT_FUNC_API_TYPE_PREFIX, + MS_API_TYPE_COM: DIST_API_TYPE_PREFIX + }, + MT_FRAMEWORK: { + PT_API_TYPE_FUNCTIONAL: FUNCTIONAL_API_TYPE_PREFIX, + PT_API_TYPE_TENSOR: TENSOR_API_TYPE_PREFIX, + PT_API_TYPE_TORCH: TORCH_API_TYPE_PREFIX, + PT_API_TYPE_NPU: NPU_API_TYPE_PREFIX, + PT_API_TYPE_DIST: DIST_API_TYPE_PREFIX + } + } + class CompareConst: """ diff --git a/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py b/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py new file mode 100644 index 00000000000..27106044e1e --- /dev/null +++ b/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py @@ -0,0 +1,175 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Any, Optional, Callable, Union, List, Tuple + +from msprobe.core.common.const import Const +from msprobe.core.common.file_utils import load_yaml + + +def _get_attr(module, attr_name): + if Const.SEP in attr_name: + sub_module_name, sub_attr = attr_name.rsplit(Const.SEP, 1) + sub_module = getattr(module, sub_module_name, None) + attr = getattr(sub_module, sub_attr, None) + else: + attr = getattr(module, attr_name, None) + return attr + + +class ApiWrapper: + def __init__( + self, api_types: Dict[str, Dict[str, Any]], + api_list_paths: Union[str, List[str], Tuple[str]] + ): + self.api_types = api_types + if not isinstance(api_list_paths, (list, tuple)): + api_list_paths = [api_list_paths] * len(self.api_types) + elif len(api_list_paths) != len(self.api_types): + raise RuntimeError("The number of api_list_paths must be equal to the number of frameworks in 'api_types', " + "when api_list_paths is a list or tuple.") + self.api_list_paths = api_list_paths + self.api_names = self._get_api_names() + self.wrapped_api_functions = dict() + + def wrap_api( + self, api_templates, hook_build_func: Optional[Callable] + ): + api_types_num = sum([len(v) for v in self.api_types.values()]) + if not isinstance(api_templates, (list, tuple)): + api_templates = [api_templates] * api_types_num + elif len(api_templates) != api_types_num: + raise RuntimeError("The number of api_templates must be equal to the number of api_types, " + "when api_templates is a list or tuple.") + + self.wrapped_api_functions.clear() + index = 0 + for framework, api_types in self.api_types.items(): + wrapped_functions_in_framework = dict() + for api_type, api_modules in api_types.items(): + wrapped_functions = dict() + name_prefix = Const.API_DATA_PREFIX.get(framework, {}).get(api_type, "API") + api_template = api_templates[index] + index += 1 + for api_name in self.api_names.get(framework, {}).get(api_type, []): + ori_api = _get_attr(api_modules[0], api_name) + if callable(ori_api): + def wrap_api_func(api_name, api_func, prefix, hook_build_func, api_template): + def api_function(*args, **kwargs): + return api_template(api_name, api_func, prefix, hook_build_func)(*args, **kwargs) + return api_function + wrapped_functions[api_name] = wrap_api_func(api_name, ori_api, name_prefix, + hook_build_func, api_template) + wrapped_functions_in_framework[api_type] = wrapped_functions + self.wrapped_api_functions[framework] = wrapped_functions_in_framework + return self.wrapped_api_functions + + def _get_api_names(self): + api_names = dict() + + for index, framework in enumerate(self.api_types.keys()): + api_list = load_yaml(self.api_list_paths[index]) + valid_names = dict() + for api_type, api_modules in self.api_types.get(framework, {}).items(): + api_from_file = api_list.get(Const.SUPPORT_API_DICT_KEY_MAP.get(framework, {}).get(api_type), []) + names = set() + for api_name in api_from_file: + target_attr = api_name + target_module = api_modules[0] + if Const.SEP in api_name: + sub_module_name, target_attr = api_name.rsplit(Const.SEP, 1) + target_module = getattr(api_modules[0], sub_module_name) + if target_attr in dir(target_module): + names.add(api_name) + valid_names[api_type] = names + api_names[framework] = valid_names + + return api_names + + +class ApiRegistry: + """ + Base class for api registry. + """ + + def __init__(self, api_types, inner_used_api, supported_api_list_path, api_templates): + self.ori_api_attr = dict() + self.wrapped_api_attr = dict() + self.inner_used_ori_attr = dict() + self.inner_used_wrapped_attr = dict() + self.api_types = api_types + self.inner_used_api = inner_used_api + self.supported_api_list_path = supported_api_list_path + self.api_templates = api_templates + + @staticmethod + def store_ori_attr(ori_api_group, api_list, api_ori_attr): + for api in api_list: + api_ori_attr[api] = _get_attr(ori_api_group, api) + + @staticmethod + def set_api_attr(api_group, attr_dict): + for api, api_attr in attr_dict.items(): + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(api_group, sub_module_name, None) + if sub_module is not None: + setattr(sub_module, sub_op, api_attr) + else: + setattr(api_group, api, api_attr) + + def register_all_api(self): + for framework, api_types in self.api_types.items(): + for api_type, api_modules in api_types.items(): + api_type_with_framework = framework + Const.SEP + api_type + for module in api_modules[1]: + self.set_api_attr(module, self.wrapped_api_attr.get(api_type_with_framework, {})) + + def register_inner_used_api(self): + for api_type in self.inner_used_api.keys(): + self.set_api_attr(self.inner_used_api.get(api_type)[0], self.inner_used_wrapped_attr.get(api_type, {})) + + def restore_all_api(self): + for framework, api_types in self.api_types.items(): + for api_type, api_modules in api_types.items(): + api_type_with_framework = framework + Const.SEP + api_type + for module in api_modules[1]: + self.set_api_attr(module, self.ori_api_attr.get(api_type_with_framework, {})) + + def restore_inner_used_api(self): + for api_type in self.inner_used_api.keys(): + self.set_api_attr(self.inner_used_api.get(api_type)[0], self.inner_used_ori_attr.get(api_type, {})) + + def initialize_hook(self, hook_build_func): + api_wrapper = ApiWrapper(self.api_types, self.supported_api_list_path) + wrapped_api_functions = api_wrapper.wrap_api(self.api_templates, hook_build_func) + + for framework, api_types in self.api_types.items(): + for api_type, api_modules in api_types.items(): + ori_attr = dict() + self.store_ori_attr(api_modules[0], api_wrapper.api_names.get(framework).get(api_type), ori_attr) + api_type_with_framework = framework + Const.SEP + api_type + self.ori_api_attr[api_type_with_framework] = ori_attr + self.wrapped_api_attr[api_type_with_framework] = wrapped_api_functions.get(framework).get(api_type) + + for inner_used_api_type, inner_used_api_list in self.inner_used_api.items(): + ori_attr = dict() + wrapped_attr = dict() + for api_name in inner_used_api_list[1:]: + if self.ori_api_attr.get(inner_used_api_type, {}).get(api_name): + ori_attr[api_name] = self.ori_api_attr.get(inner_used_api_type).get(api_name) + wrapped_attr[api_name] = self.wrapped_api_attr.get(inner_used_api_type).get(api_name) + self.inner_used_ori_attr[inner_used_api_type] = ori_attr + self.inner_used_wrapped_attr[inner_used_api_type] = wrapped_attr diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 8c4542a1917..782deec51c9 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -26,7 +26,7 @@ from msprobe.core.data_dump.data_processor.base import (BaseDataProcessor, Tenso from msprobe.core.common.file_utils import path_len_exceeds_limit, save_npy from msprobe.mindspore.common.utils import convert_bf16_to_fp32, save_tensor_as_npy from msprobe.mindspore.common.log import logger -from msprobe.mindspore.dump.hook_cell.api_registry import api_register +from msprobe.mindspore.dump.hook_cell.api_register import get_api_register has_adump = True try: @@ -44,6 +44,7 @@ class MindsporeDataProcessor(BaseDataProcessor): "dtype": self.analyze_dtype_in_kwargs } self._async_dump_cache = {} + self.api_register = get_api_register() @staticmethod def get_md5_for_tensor(x): @@ -74,46 +75,29 @@ class MindsporeDataProcessor(BaseDataProcessor): else: if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) - api_register.norm_inner_op_set_ori_func() - get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max) - get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min) - get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean) - if hasattr(mint, "norm"): - get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm) - else: - get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm) - tensor_stat.max = get_max_value(data).item() - tensor_stat.min = get_min_value(data).item() - tensor_stat.mean = get_mean_value(data).item() + get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm + tensor_stat.max = mint.max(data).item() + tensor_stat.min = mint.min(data).item() + tensor_stat.mean = mint.mean(data).item() tensor_stat.norm = get_norm_value(data).item() - api_register.norm_inner_op_set_hook_func() return tensor_stat @staticmethod def get_stat_info_async(data): tensor_stat = TensorStatInfo() - stack_method = api_register.functional_ori_attr.get("stack", ms.ops.stack) if data.dtype == ms.complex64 or data.dtype == ms.complex128: logger.warning("Async dump do not support complex data!") return tensor_stat elif data.dtype == ms.bool_: - tensor_stat.stack_tensor_stat = (["Max", "Min"], stack_method([data.any(), data.all()])) + tensor_stat.stack_tensor_stat = (["Max", "Min"], ops.stack([data.any(), data.all()])) elif not data.shape: - tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method([data, data, data, data])) + tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack([data, data, data, data])) else: if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) - api_register.norm_inner_op_set_ori_func() - get_max_value = api_register.mint_ops_ori_attr.get("max", mint.max) - get_min_value = api_register.mint_ops_ori_attr.get("min", mint.min) - get_mean_value = api_register.mint_ops_ori_attr.get("mean", mint.mean) - if hasattr(mint, "norm"): - get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm) - else: - get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm) - tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], stack_method( - [get_max_value(data), get_min_value(data), get_mean_value(data), get_norm_value(data)])) - api_register.norm_inner_op_set_hook_func() + get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm + tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack( + [mint.max(data), mint.min(data), mint.mean(data), get_norm_value(data)])) return tensor_stat @staticmethod @@ -125,14 +109,17 @@ class MindsporeDataProcessor(BaseDataProcessor): return super().get_special_types() + cls.mindspore_special_type def get_stat_info(self, data): + self.api_register.restore_inner_used_api() tensor_stat = TensorStatInfo() if data.numel() == 0: - return tensor_stat + stat_info = tensor_stat else: if self.config.async_dump: - return MindsporeDataProcessor.get_stat_info_async(data) + stat_info = MindsporeDataProcessor.get_stat_info_async(data) else: - return MindsporeDataProcessor.get_stat_info_sync(data) + stat_info = MindsporeDataProcessor.get_stat_info_sync(data) + self.api_register.register_inner_used_api() + return stat_info def analyze_single_element(self, element, suffix_stack): if suffix_stack and suffix_stack[-1] in self.mindspore_object_key: @@ -191,7 +178,7 @@ class TensorDataProcessor(MindsporeDataProcessor): else: save_tensor_as_npy(tensor, file_path) return single_arg - + def _analyze_numpy(self, ndarray, suffix): dump_data_name, file_path = self.get_save_file_path(suffix) save_npy(ndarray, file_path) diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 7694d71dd98..61b360824b4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -27,7 +27,7 @@ from msprobe.mindspore.cell_processor import CellProcessor from msprobe.mindspore.common.const import Const as MsConst from msprobe.mindspore.common.utils import set_register_backward_hook_functions, check_save_param from msprobe.mindspore.debugger.debugger_config import DebuggerConfig -from msprobe.mindspore.dump.hook_cell.api_registry import api_register +from msprobe.mindspore.dump.hook_cell.api_register import get_api_register from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell from msprobe.mindspore.grad_probe.grad_monitor import GradientMonitor from msprobe.mindspore.ms_config import parse_json_config @@ -163,7 +163,7 @@ class PrecisionDebugger: instance.service.start(model) else: if not instance.first_start: - api_register.api_set_ori_func() + get_api_register().restore_all_api() handler = TaskHandlerFactory.create(instance.config) handler.handle() @@ -241,4 +241,4 @@ class PrecisionDebugger: if instance.config.execution_mode != MsConst.PYNATIVE_MODE: return False else: - return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config) \ No newline at end of file + return instance.config.task != Const.FREE_BENCHMARK and not instance._is_graph_dump(instance.config) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py new file mode 100644 index 00000000000..6563ed70086 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py @@ -0,0 +1,125 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from mindspore import Tensor, ops, mint +from mindspore.mint.nn import functional +from mindspore.common._stub_tensor import StubTensor +from mindspore.communication import comm_func + +from msprobe.core.common.file_utils import load_yaml +from msprobe.core.common.utils import Const +from msprobe.core.data_dump.api_registry import ApiRegistry +from msprobe.mindspore.common.const import Const as MsConst +from msprobe.mindspore.common.utils import is_mindtorch +from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell + +if not is_mindtorch(): + _api_types = { + Const.MS_FRAMEWORK: { + Const.MS_API_TYPE_OPS: (ops, (ops,)), + Const.MS_API_TYPE_TENSOR: (Tensor, (Tensor,)), + Const.MS_API_TYPE_STUB_TENSOR: (StubTensor, (StubTensor,)), + Const.MS_API_TYPE_MINT: (mint, (mint,)), + Const.MS_API_TYPE_MINT_FUNC: (functional, (functional,)), + Const.MS_API_TYPE_COM: (comm_func, (comm_func,)) + } + } +else: + import torch + import torch_npu + _api_types = { + Const.MT_FRAMEWORK: { + Const.PT_API_TYPE_FUNCTIONAL: (torch.nn.functional, (torch.nn.functional,)), + Const.PT_API_TYPE_TENSOR: (torch.Tensor, (torch.Tensor,)), + Const.PT_API_TYPE_TORCH: (torch, (torch,)), + Const.PT_API_TYPE_NPU: (torch_npu, (torch_npu,)), + Const.PT_API_TYPE_DIST: (torch.distributed, (torch.distributed, torch.distributed.distributed_c10d)) + } + } + + +_inner_used_api = { + Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_OPS: ( + ops, "norm", "square", "sqrt", "is_complex", "stack", "is_floating_point" + ), + Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_TENSOR: ( + Tensor, "to", "numel" + ), + Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_MINT: ( + mint, "max", "min", "mean", "norm" + ) +} + +_supported_api_list_path = (os.path.join(os.path.dirname(os.path.realpath(__file__)), MsConst.SUPPORTED_API_LIST_FILE),) + + +class ApiTemplate(HOOKCell): + def __init__(self, api_name, api_func, prefix, hook_build_func): + self.api_name = api_name + self.api_func = api_func + self.prefix_api_name = prefix + Const.SEP + str(api_name.split(Const.SEP)[-1]) + Const.SEP + super().__init__(hook_build_func) + + @staticmethod + def async_to_sync(output): + # Fake handle, used to return after the CommHandle executes the wait method + fake_handle = type("FakeHandle", (), {"wait": lambda self: None})() + if isinstance(output, tuple) and len(output) == 2 and hasattr(output[1], "wait"): + output[1].wait() + output = (output[0], fake_handle) + elif hasattr(output, "wait"): + output.wait() + output = fake_handle + return output + + def construct(self, *args, **kwargs): + if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX): + return args[0] if args else kwargs.get(Const.INPUT) + + output = self.api_func(*args, **kwargs) + + if self.prefix_api_name.startswith(MsConst.DISTRIBUTED_DATA_PREFIX): + if kwargs.get("async_op") or self.api_name in ["isend", "irecv"]: + output = self.async_to_sync(output) + return output + + def forward(self, *args, **kwargs): + if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX): + return args[0] if args else kwargs.get(Const.INPUT) + return self.api_func(*args, **kwargs) + + +api_register = None + + +def get_api_register(): + global api_register + + def stub_method(method): + def wrapped_method(*args, **kwargs): + return method(*args, **kwargs) + return wrapped_method + + if api_register is None: + if not is_mindtorch(): + for attr_name in dir(StubTensor): + attr = getattr(StubTensor, attr_name) + api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, []) + if attr_name in api_names and callable(attr): + setattr(StubTensor, attr_name, stub_method(attr)) + api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate) + return api_register diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py deleted file mode 100644 index 7aee1deccd9..00000000000 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from mindspore import Tensor, ops, mint -from mindspore.mint.nn import functional -from mindspore.common._stub_tensor import StubTensor -from mindspore.communication import comm_func - -from msprobe.mindspore.dump.hook_cell.wrap_api import (HOOKTensor, HOOKStubTensor, HOOKFunctionalOP, - HOOKMintOP, HOOKMintNNFunctionalOP, HOOKDistributedOP, - HOOKTorchOP, HOOKTorchTensor, HOOKTorchFunctionalOP, - HOOKTorchDistributedOP, HOOKTorchNpuOP, - get_wrap_api_list, get_wrap_torch_api_list, setup_hooks) -from msprobe.core.common.utils import Const -from msprobe.mindspore.common.utils import is_mindtorch - -if is_mindtorch(): - import torch - import torch_npu - - -def stub_method(method): - def wrapped_method(*args, **kwargs): - return method(*args, **kwargs) - return wrapped_method - - -class ApiRegistry: - def __init__(self): - self.tensor_ori_attr = {} - self.stub_tensor_ori_attr = {} - self.functional_ori_attr = {} - self.mint_ops_ori_attr = {} - self.mint_func_ops_ori_attr = {} - self.distributed_ori_attr = {} - self.norm_inner_ops_ori_attr = {} - - self.torch_ori_attr = {} - self.torch_tensor_ori_attr = {} - self.torch_functional_ori_attr = {} - self.torch_distributed_ori_attr = {} - self.torch_npu_ori_attr = {} - - self.tensor_hook_attr = {} - self.stub_tensor_hook_attr = {} - self.functional_hook_attr = {} - self.mint_ops_hook_attr = {} - self.mint_func_ops_hook_attr = {} - self.distibuted_hook_attr = {} - self.norm_inner_ops_hook_attr = {} - - self.torch_hook_attr = {} - self.torch_tensor_hook_attr = {} - self.torch_functional_hook_attr = {} - self.torch_distributed_hook_attr = {} - self.torch_npu_hook_attr = {} - - self.norm_inner_ops = ["norm", "square", "sqrt", "is_complex"] - - @staticmethod - def store_ori_attr(ori_api_group, api_list, api_ori_attr): - for api in api_list: - if Const.SEP in api: - sub_module_name, sub_op = api.rsplit(Const.SEP, 1) - sub_module = getattr(ori_api_group, sub_module_name) - ori_api_func = getattr(sub_module, sub_op) - else: - ori_api_func = getattr(ori_api_group, api) - if ori_api_group == StubTensor: - api_ori_attr[api] = stub_method(ori_api_func) - continue - api_ori_attr[api] = ori_api_func - - @staticmethod - def set_api_attr(api_group, attr_dict): - for api, api_attr in attr_dict.items(): - if Const.SEP in api: - sub_module_name, sub_op = api.rsplit(Const.SEP, 1) - sub_module = getattr(api_group, sub_module_name, None) - if sub_module is not None: - setattr(sub_module, sub_op, api_attr) - else: - setattr(api_group, api, api_attr) - - def norm_inner_op_set_hook_func(self): - self.set_api_attr(ops, self.norm_inner_ops_hook_attr) - - def norm_inner_op_set_ori_func(self): - self.set_api_attr(ops, self.norm_inner_ops_ori_attr) - - def api_set_hook_func(self): - if is_mindtorch(): - self.set_api_attr(torch, self.torch_hook_attr) - self.set_api_attr(torch.Tensor, self.torch_tensor_hook_attr) - self.set_api_attr(torch.nn.functional, self.torch_functional_hook_attr) - self.set_api_attr(torch.distributed, self.torch_distributed_hook_attr) - self.set_api_attr(torch.distributed.distributed_c10d, self.torch_distributed_hook_attr) - self.set_api_attr(torch_npu, self.torch_npu_hook_attr) - else: - self.set_api_attr(Tensor, self.tensor_hook_attr) - self.set_api_attr(StubTensor, self.stub_tensor_hook_attr) - self.set_api_attr(ops, self.functional_hook_attr) - self.set_api_attr(mint, self.mint_ops_hook_attr) - self.set_api_attr(functional, self.mint_func_ops_hook_attr) - self.set_api_attr(comm_func, self.distibuted_hook_attr) - - def api_set_ori_func(self): - if is_mindtorch(): - self.set_api_attr(torch, self.torch_ori_attr) - self.set_api_attr(torch.Tensor, self.torch_tensor_ori_attr) - self.set_api_attr(torch.nn.functional, self.torch_functional_ori_attr) - self.set_api_attr(torch.distributed, self.torch_distributed_ori_attr) - self.set_api_attr(torch.distributed.distributed_c10d, self.torch_distributed_ori_attr) - self.set_api_attr(torch_npu, self.torch_npu_ori_attr) - else: - self.set_api_attr(Tensor, self.tensor_ori_attr) - self.set_api_attr(StubTensor, self.stub_tensor_ori_attr) - self.set_api_attr(ops, self.functional_ori_attr) - self.set_api_attr(mint, self.mint_ops_ori_attr) - self.set_api_attr(functional, self.mint_func_ops_ori_attr) - self.set_api_attr(comm_func, self.distributed_ori_attr) - - def initialize_hook(self, hook): - setup_hooks(hook) - if is_mindtorch(): - wrap_torch_api_name = get_wrap_torch_api_list() - self.store_ori_attr(torch, - wrap_torch_api_name.torch_api_names, self.torch_ori_attr) - self.store_ori_attr(torch.Tensor, - wrap_torch_api_name.tensor_api_names, self.torch_tensor_ori_attr) - self.store_ori_attr(torch.nn.functional, - wrap_torch_api_name.functional_api_names, self.torch_functional_ori_attr) - self.store_ori_attr(torch.distributed, - wrap_torch_api_name.distributed_api_names, self.torch_distributed_ori_attr) - self.store_ori_attr(torch_npu, - wrap_torch_api_name.npu_api_names, self.torch_npu_ori_attr) - for attr_name in dir(HOOKTorchOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.torch_hook_attr[api_name] = getattr(HOOKTorchOP, attr_name) - for attr_name in dir(HOOKTorchTensor): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.torch_tensor_hook_attr[api_name] = getattr(HOOKTorchTensor, attr_name) - for attr_name in dir(HOOKTorchFunctionalOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.torch_functional_hook_attr[api_name] = getattr(HOOKTorchFunctionalOP, attr_name) - for attr_name in dir(HOOKTorchDistributedOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.torch_distributed_hook_attr[api_name] = getattr(HOOKTorchDistributedOP, attr_name) - for attr_name in dir(HOOKTorchNpuOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.torch_npu_hook_attr[api_name] = getattr(HOOKTorchNpuOP, attr_name) - return - - wrap_api_name = get_wrap_api_list() - self.store_ori_attr(Tensor, wrap_api_name.tensor_api_names, self.tensor_ori_attr) - self.store_ori_attr(StubTensor, wrap_api_name.stub_tensor_api_names, self.stub_tensor_ori_attr) - self.store_ori_attr(ops, wrap_api_name.ops_api_names, self.functional_ori_attr) - self.store_ori_attr(mint, wrap_api_name.mint_api_names, self.mint_ops_ori_attr) - self.store_ori_attr(functional, wrap_api_name.mint_nn_func_api_names, self.mint_func_ops_ori_attr) - self.store_ori_attr(comm_func, wrap_api_name.distributed_api_names, self.distributed_ori_attr) - self.store_ori_attr(ops, self.norm_inner_ops, self.norm_inner_ops_ori_attr) - for attr_name in dir(HOOKTensor): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.tensor_hook_attr[api_name] = getattr(HOOKTensor, attr_name) - for attr_name in dir(HOOKStubTensor): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.stub_tensor_hook_attr[api_name] = getattr(HOOKStubTensor, attr_name) - for attr_name in dir(HOOKFunctionalOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.functional_hook_attr[api_name] = getattr(HOOKFunctionalOP, attr_name) - if api_name in self.norm_inner_ops: - self.norm_inner_ops_hook_attr[api_name] = getattr(HOOKFunctionalOP, attr_name) - for attr_name in dir(HOOKMintOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.mint_ops_hook_attr[api_name] = getattr(HOOKMintOP, attr_name) - for attr_name in dir(HOOKMintNNFunctionalOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.mint_func_ops_hook_attr[api_name] = getattr(HOOKMintNNFunctionalOP, attr_name) - for attr_name in dir(HOOKDistributedOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - api_name = attr_name[Const.ATTR_NAME_PREFIX_LEN:] - self.distibuted_hook_attr[api_name] = getattr(HOOKDistributedOP, attr_name) - - -api_register = ApiRegistry() diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py index b68a7d995a5..7007992ca45 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py @@ -28,23 +28,22 @@ def get_cell_count(name): return HOOKCell.cell_count[name] -def __init__(self, build_hook) -> None: +def __init__(self, hook_build_func) -> None: super(HOOKCell, self).__init__() self.changed_status = False self.input_kwargs = {} - self.prefix = "" if not HOOKCell.g_stop_hook: HOOKCell.g_stop_hook = True self.changed_status = True - if hasattr(self, "prefix_api_name"): - self.prefix = self.prefix_api_name - self.forward_data_collected = False - forward_pre_hook, forward_hook, backward_hook, backward_pre_hook = build_hook(self.prefix) - self.register_forward_pre_hook(forward_pre_hook) - self.register_forward_hook(forward_hook) - register_backward_hook_functions["full"](self, backward_hook) - register_backward_hook_functions["pre"](self, backward_pre_hook) + + prefix = self.prefix_api_name if hasattr(self, "prefix_api_name") else "" + if callable(hook_build_func): + forward_pre_hook, forward_hook, backward_hook, backward_pre_hook = hook_build_func(prefix) + self.register_forward_pre_hook(forward_pre_hook) + self.register_forward_hook(forward_hook) + register_backward_hook_functions["full"](self, backward_hook) + register_backward_hook_functions["pre"](self, backward_pre_hook) # 重载call,加全局标志。 diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_api.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_api.py deleted file mode 100644 index 0e97929ecd7..00000000000 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/wrap_api.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from mindspore import Tensor, mint, ops -from mindspore.common._stub_tensor import StubTensor -from mindspore.communication import comm_func -from mindspore.mint.nn import functional - -from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_yaml -from msprobe.mindspore.common.const import Const as MsConst -from msprobe.mindspore.common.utils import is_mindtorch -from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell - -if is_mindtorch(): - import torch - import torch_npu - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, MsConst.SUPPORTED_API_LIST_FILE) -torch_yaml_path = os.path.join(cur_path, "../../../pytorch/hook_module", MsConst.SUPPORTED_API_LIST_FILE) - - -class HOOKTensor(object): - pass - - -class HOOKStubTensor(object): - pass - - -class HOOKFunctionalOP(object): - pass - - -class HOOKMintOP(object): - pass - - -class HOOKMintNNFunctionalOP(object): - pass - - -class HOOKDistributedOP(object): - pass - - -class HOOKTorchOP(object): - pass - - -class HOOKTorchTensor(object): - pass - - -class HOOKTorchFunctionalOP(object): - pass - - -class HOOKTorchDistributedOP(object): - pass - - -class HOOKTorchNpuOP(object): - pass - - -class ApiTemplate(HOOKCell): - def __init__(self, api_name, api_dict, prefix, hook): - self.api_name = api_name - self.api_func = api_dict[api_name] - self.prefix_api_name = prefix + str(api_name.split(Const.SEP)[-1]) + Const.SEP - super().__init__(hook) - - @staticmethod - def async_to_sync(output): - # Fake handle, used to return after the CommHandle executes the wait method - fake_handle = type("FakeHandle", (), {"wait": lambda self: None})() - if isinstance(output, tuple) and len(output) == 2 and hasattr(output[1], "wait"): - output[1].wait() - output = (output[0], fake_handle) - elif hasattr(output, "wait"): - output.wait() - output = fake_handle - return output - - def construct(self, *args, **kwargs): - if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX): - return args[0] if args else kwargs.get(Const.INPUT) - - output = self.api_func(*args, **kwargs) - - if self.prefix_api_name.startswith(MsConst.DISTRIBUTED_DATA_PREFIX): - if kwargs.get("async_op") or self.api_name in ["isend", "irecv"]: - output = self.async_to_sync(output) - return output - - def forward(self, *args, **kwargs): - if self.api_name.startswith(MsConst.DROPOUT_API_NAME_PREFIX): - return args[0] if args else kwargs.get(Const.INPUT) - return self.api_func(*args, **kwargs) - - -class WrapApiName: - def __init__(self, tensor_api_names, stub_tensor_api_names, ops_api_names, mint_api_names, mint_nn_func_api_names, - distributed_api_names): - self.tensor_api_names = tensor_api_names - self.stub_tensor_api_names = stub_tensor_api_names - self.ops_api_names = ops_api_names - self.mint_api_names = mint_api_names - self.mint_nn_func_api_names = mint_nn_func_api_names - self.distributed_api_names = distributed_api_names - - -class WrapTorchApiName: - def __init__(self, torch_api_names, tensor_api_names, functional_api_names, distributed_api_names, npu_api_names): - self.torch_api_names = torch_api_names - self.tensor_api_names = tensor_api_names - self.functional_api_names = functional_api_names - self.distributed_api_names = distributed_api_names - self.npu_api_names = npu_api_names - - -def get_wrap_api_list(): - api_list = load_yaml(yaml_path) - tensor_api = api_list.get(MsConst.SUPPORTED_TENSOR_LIST_KEY) - ops_api = api_list.get(MsConst.SUPPORTED_OPS_LIST_KEY) - mint_api = api_list.get(MsConst.SUPPORTED_MINT_LIST_KEY) - mint_nn_func_api = api_list.get(MsConst.SUPPORTED__MINT_NN_FUNC_LIST_KEY) - distributed_api = api_list.get(MsConst.SUPPORTED_COMM_LIST_KEY) - wrap_api_name = WrapApiName(set(tensor_api) & set(dir(Tensor)), - set(tensor_api) & set(dir(StubTensor)), - set(ops_api) & set(dir(ops)), - set(mint_api) & set(dir(mint)), - set(mint_nn_func_api) & set(dir(functional)), - set(distributed_api) & set(dir(comm_func))) - return wrap_api_name - - -def get_wrap_torch_api_list(): - api_list = load_yaml(torch_yaml_path) - torch_api = api_list.get("torch") - tensor_api = api_list.get("tensor") - functional_api = api_list.get("functional") - distributed_api = api_list.get("distributed") - npu_api = api_list.get("torch_npu") - wrap_api_name = WrapTorchApiName(set(torch_api) & set(dir(torch)), - set(tensor_api) & set(dir(torch.Tensor)), - set(functional_api) & set(dir(torch.nn.functional)), - set(distributed_api) & set(dir(torch.distributed)), - set(npu_api) & set(dir(torch_npu))) - return wrap_api_name - - -def wrap_api_func(api_name, api_dict, prefix, hook): - def api_function(*args, **kwargs): - return ApiTemplate(api_name, api_dict, prefix, hook)(*args, **kwargs) - return api_function - - -def wrap_api_func_and_bind(api_list, api_dict, prefix, hook, hook_class): - for api_name in api_list: - if callable(api_dict[api_name]): - setattr(hook_class, Const.ATTR_NAME_PREFIX + api_name, wrap_api_func(api_name, api_dict, prefix, hook)) - - -def setup_hooks(hook): - if is_mindtorch(): - torch_wrap_api_name = get_wrap_torch_api_list() - wrap_api_func_and_bind(torch_wrap_api_name.torch_api_names, - {f: getattr(torch, f) for f in dir(torch)}, - MsConst.TORCH_DATA_PREFIX, hook, HOOKTorchOP) - wrap_api_func_and_bind(torch_wrap_api_name.tensor_api_names, - {f: getattr(torch.Tensor, f) for f in dir(torch.Tensor)}, - MsConst.TENSOR_DATA_PREFIX, hook, HOOKTorchTensor) - wrap_api_func_and_bind(torch_wrap_api_name.functional_api_names, - {f: getattr(torch.nn.functional, f) for f in dir(torch.nn.functional)}, - MsConst.OPS_DATA_PREFIX, hook, HOOKTorchFunctionalOP) - wrap_api_func_and_bind(torch_wrap_api_name.distributed_api_names, - {f: getattr(torch.distributed, f) for f in dir(torch.distributed)}, - MsConst.DISTRIBUTED_DATA_PREFIX, hook, HOOKTorchDistributedOP) - wrap_api_func_and_bind(torch_wrap_api_name.npu_api_names, {f: getattr(torch_npu, f) for f in dir(torch_npu)}, - MsConst.TORCH_NPU_DATA_PREFIX, hook, HOOKTorchNpuOP) - return - - wrap_api_name = get_wrap_api_list() - wrap_api_func_and_bind(wrap_api_name.tensor_api_names, {f: getattr(Tensor, f) for f in dir(Tensor)}, - MsConst.TENSOR_DATA_PREFIX, hook, HOOKTensor) - wrap_api_func_and_bind(wrap_api_name.stub_tensor_api_names, {f: getattr(StubTensor, f) for f in dir(StubTensor)}, - MsConst.STUB_TENSOR_DATA_PREFIX, hook, HOOKStubTensor) - wrap_api_func_and_bind(wrap_api_name.ops_api_names, {f: getattr(ops, f) for f in dir(ops)}, - MsConst.OPS_DATA_PREFIX, hook, HOOKFunctionalOP) - wrap_api_func_and_bind(wrap_api_name.mint_api_names, {f: getattr(mint, f) for f in dir(mint)}, - MsConst.MINT_DATA_PREFIX, hook, HOOKMintOP) - wrap_api_func_and_bind(wrap_api_name.mint_nn_func_api_names, {f: getattr(functional, f) for f in dir(functional)}, - MsConst.MINT_NN_FUNC_DATA_PREFIX, hook, HOOKMintNNFunctionalOP) - wrap_api_func_and_bind(wrap_api_name.distributed_api_names, {f: getattr(comm_func, f) for f in dir(comm_func)}, - MsConst.DISTRIBUTED_DATA_PREFIX, hook, HOOKDistributedOP) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py index 0a32200639a..a9a543a8fac 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py @@ -25,7 +25,10 @@ except ImportError: from msprobe.core.common.log import logger from msprobe.core.common.const import Const from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs -from msprobe.mindspore.dump.hook_cell.api_registry import api_register +from msprobe.mindspore.dump.hook_cell.api_register import get_api_register + + +_api_register = get_api_register() def dump_jit(name, in_feat, out_feat, is_forward): @@ -69,7 +72,7 @@ class JitDump(_MindsporeFunctionExecutor): def __call__(self, *args, **kwargs): if JitDump.jit_dump_switch: - api_register.api_set_ori_func() + _api_register.restore_all_api() out = super().__call__(*args, **kwargs) if JitDump.jit_dump_switch and len(args) > 0: if self.name and self.name != "construct": @@ -80,7 +83,7 @@ class JitDump(_MindsporeFunctionExecutor): elif len(args) == 0: logger.warning(f"The jit function {self.name} has no input arguments, nothing will be dumped.") if JitDump.jit_dump_switch: - api_register.api_set_hook_func() + _api_register.register_all_api() return out @classmethod @@ -101,9 +104,9 @@ class JitDump(_MindsporeFunctionExecutor): def grad(self, obj, grad, weights, grad_position, *args, **kwargs): if JitDump.jit_dump_switch and JitDump.jit_enable: - api_register.api_set_ori_func() + _api_register.restore_all_api() output = self._executor.grad(grad, obj, weights, grad_position, *args, *(kwargs.values())) if JitDump.jit_dump_switch and JitDump.jit_enable: dump_jit(obj, args, None, False) - api_register.api_set_hook_func() + _api_register.register_all_api() return output diff --git a/debug/accuracy_tools/msprobe/mindspore/free_benchmark/api_pynative_self_check.py b/debug/accuracy_tools/msprobe/mindspore/free_benchmark/api_pynative_self_check.py index 57b7de4fa56..da4821b3ac4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/free_benchmark/api_pynative_self_check.py +++ b/debug/accuracy_tools/msprobe/mindspore/free_benchmark/api_pynative_self_check.py @@ -19,6 +19,7 @@ import os import traceback import mindspore as ms + from msprobe.core.common.const import Const from msprobe.core.common.exceptions import DistributedNotInitializedError from msprobe.core.common.file_utils import check_path_length, load_yaml @@ -27,7 +28,7 @@ from msprobe.mindspore.common.const import FreeBenchmarkConst from msprobe.mindspore.common.log import logger from msprobe.mindspore.common.utils import get_rank_if_initialized from msprobe.mindspore.debugger.debugger_config import DebuggerConfig -from msprobe.mindspore.dump.hook_cell.api_registry import api_register +from msprobe.mindspore.dump.hook_cell.api_register import get_api_register from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell from msprobe.mindspore.free_benchmark.common.config import Config from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams @@ -37,6 +38,9 @@ from msprobe.mindspore.free_benchmark.perturbation.perturbation_factory import P from msprobe.mindspore.runtime import Runtime +_api_register = get_api_register() + + class ApiPyNativeSelfCheck: def __init__(self, config: DebuggerConfig): Config.is_enable = True @@ -60,8 +64,8 @@ class ApiPyNativeSelfCheck: self.store_original_func() def handle(self): - api_register.initialize_hook(self.build_hook) - api_register.api_set_hook_func() + _api_register.initialize_hook(self.build_hook) + _api_register.register_all_api() def build_hook(self, api_name): def pre_hook(cell, input_data): @@ -166,13 +170,13 @@ def check_self(api_name_with_id, output, ori_func, *args, **kwargs): return ret logger.info(f"[{api_name_with_id}] is {Config.handler_type}ing.") - api_register.api_set_ori_func() + _api_register.restore_all_api() try: perturbation = PerturbationFactory.create(api_name_with_id) params.fuzzed_result = perturbation.handle(params) if params.fuzzed_result is False: - api_register.api_set_hook_func() + _api_register.register_all_api() return ret if Config.stage == Const.BACKWARD: params.original_result = Tools.get_grad(params.original_func, *params.args, **params.kwargs) @@ -183,7 +187,7 @@ def check_self(api_name_with_id, output, ori_func, *args, **kwargs): logger.error(f"[{api_name_with_id}] Error: {str(e)}") logger.error(f"[{api_name_with_id}] Error detail: {traceback.format_exc()}") - api_register.api_set_hook_func() + _api_register.register_all_api() return ret diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 5afbd046be4..2027c3cd7aa 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -41,7 +41,7 @@ from msprobe.mindspore.cell_processor import CellProcessor from msprobe.mindspore.common.log import logger from msprobe.mindspore.common.utils import (get_rank_if_initialized, clean_input_kwargs, is_mindtorch, register_backward_hook_functions) -from msprobe.mindspore.dump.hook_cell.api_registry import api_register +from msprobe.mindspore.dump.hook_cell.api_register import get_api_register from msprobe.mindspore.dump.hook_cell.primitive_hooks import PrimitiveHookService from msprobe.mindspore.dump.jit_dump import JitDump from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell @@ -71,6 +71,7 @@ class Service: self.params_grad_info = {} self.hook_handle_dict = {} # 提前注册,确保注册尽可能多的API hook + self.api_register = get_api_register() self.register_api_hook() self.init_for_debug_level() @@ -321,7 +322,7 @@ class Service: PIJitCaptureContext.__exit__ = self.empty self.first_start = False - api_register.api_set_hook_func() + self.api_register.register_all_api() self.switch = True self.primitive_switch = True logger.info(f"Dump switch is turned on at step {self.current_iter}. ") @@ -410,8 +411,8 @@ class Service: def register_api_hook(self): if self.config.level in [Const.LEVEL_MIX, Const.LEVEL_L1, Const.LEVEL_L2]: logger.info(f"The api {self.config.task} hook function is successfully mounted to the model.") - api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) - api_register.api_set_hook_func() + self.api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + self.api_register.register_all_api() def get_cells_and_names(self): cells_and_names_with_index = {} diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py index dc0174212e3..1245283c19e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py @@ -1,9 +1,7 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # @@ -18,8 +16,8 @@ import os from collections import namedtuple import re -import torch +import torch try: import torch_npu except ImportError: @@ -33,11 +31,9 @@ from msprobe.core.common.const import FileCheckConst, Const, CompareConst from msprobe.core.common.file_utils import FileChecker from msprobe.core.common.log import logger from msprobe.core.common.utils import CompareException +from msprobe.pytorch.hook_module.api_register import ApiTemplate, get_api_register from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate -from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate -from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate -from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate -from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate + hf_32_standard_api = ["conv1d", "conv2d"] not_detach_set = {'resize_', 'resize_as_', 'set_', 'transpose_', 't_', 'squeeze_', 'unsqueeze_'} @@ -108,17 +104,30 @@ def exec_api(exec_params): kwargs = exec_params.kwargs is_autocast = exec_params.is_autocast autocast_dtype = exec_params.autocast_dtype - - if api_type == "Functional": - torch_api = FunctionalOPTemplate(api_name, str, False) - if api_type == "Tensor": - torch_api = TensorOPTemplate(api_name, str, False) - if api_type == "Torch": - torch_api = TorchOPTemplate(api_name, str, False) - if api_type == "Aten": + out = None + + prefix_map = Const.API_DATA_PREFIX.get(Const.PT_FRAMEWORK, {}) + if not prefix_map or api_type not in prefix_map.values() or \ + api_type not in ( + Const.FUNCTIONAL_API_TYPE_PREFIX, + Const.TENSOR_API_TYPE_PREFIX, + Const.TORCH_API_TYPE_PREFIX, + Const.ATEN_API_TYPE_PREFIX, + Const.NPU_API_TYPE_PREFIX + ): + return out + + if api_type == Const.ATEN_API_TYPE_PREFIX: torch_api = AtenOPTemplate(api_name, None, False) - if api_type == "NPU": - torch_api = NpuOPTemplate(api_name, None, False, device) + else: + api_register = get_api_register() + api_register.initialize_hook(None) + api_func_type = list(prefix_map.keys())[list(prefix_map.values()).index(api_type)] + api_func = api_register.ori_api_attr.get(Const.PT_FRAMEWORK + Const.SEP + api_func_type, {}).get(api_name) + if api_func is None: + return out + + torch_api = ApiTemplate(api_name, api_func, api_type, None, need_hook=False, device=device) if is_autocast: with autocast(dtype=autocast_dtype): out = torch_api.forward(*args, **kwargs) @@ -225,7 +234,7 @@ def generate_cpu_params(input_args, input_kwargs, need_backward, api_name): origin_dtype = need_raise_dtypes.pop() raise_dtype = PRECISION_MAPPING.get(origin_dtype, torch.float32) autocast_dtype = origin_dtype - + elif len(need_raise_dtypes) >= 2: raise_dtype = torch.float32 need_raise_dtypes.discard(torch.float32) diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index 16067f6d2be..70bb2106d34 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -57,7 +57,7 @@ def parameter_adapter(func): @wraps(func) def inner(self, *args, **kwargs): - if self.op_name_ == "__getitem__" and len(args) > 1 and isinstance(args[1], torch.Tensor): + if self.api_name == "__getitem__" and len(args) > 1 and isinstance(args[1], torch.Tensor): input_tensor = args[0] indices = args[1] if indices.dtype == torch.uint8: @@ -77,7 +77,7 @@ def parameter_adapter(func): else: res = [input_tensor[tensor_index] for tensor_index in indices] return getattr(torch._C._VariableFunctionsClass, "stack")(res, 0) - if self.op_name_ == "__eq__" and len(args) > 1 and args[1] is None: + if self.api_name == "__eq__" and len(args) > 1 and args[1] is None: return False return func(self, *args, **kwargs) diff --git a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_dump.py b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_dump.py index 4700de6f1f9..cc78962f401 100644 --- a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_dump.py +++ b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_dump.py @@ -17,7 +17,7 @@ import torch from msprobe.core.common.const import Const from msprobe.core.data_dump.scope import BaseScope from msprobe.pytorch.common.log import logger -from msprobe.pytorch.hook_module.api_registry import api_register +from msprobe.pytorch.hook_module.api_register import get_api_register torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0' @@ -26,13 +26,14 @@ class ModuleDumper: def __init__(self, service): self.service = service self.hook_handle_list = [] + self.api_register = get_api_register() def start_module_dump(self, module, dump_name): - api_register.api_originality() + self.api_register.restore_all_api() self.register_hook(module, dump_name) def stop_module_dump(self): - api_register.api_modularity() + self.api_register.register_all_api() for hook_handle in self.hook_handle_list: if isinstance(hook_handle, torch.utils.hooks.RemovableHandle): hook_handle.remove() diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py new file mode 100644 index 00000000000..4154646c0f4 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py @@ -0,0 +1,128 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import os + +import torch +import torch.distributed as dist + +from msprobe.core.common.const import Const +from msprobe.core.data_dump.api_registry import ApiRegistry +from msprobe.pytorch.common.utils import ( + torch_without_guard_version, is_gpu, torch_device_guard, parameter_adapter +) +from msprobe.pytorch.function_factory import npu_custom_functions +from msprobe.pytorch.hook_module.hook_module import HOOKModule + + +torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' + +_api_types = { + Const.PT_FRAMEWORK: { + Const.PT_API_TYPE_FUNCTIONAL: (torch.nn.functional, (torch.nn.functional,)), + Const.PT_API_TYPE_TENSOR: (torch.Tensor, (torch.Tensor,)), + Const.PT_API_TYPE_TORCH: (torch, (torch,)), + Const.PT_API_TYPE_VF: (torch._C._VariableFunctionsClass, (torch._VF,)), + Const.PT_API_TYPE_DIST: (dist, (dist, dist.distributed_c10d)) + } +} +if not is_gpu: + import torch_npu + if torch_without_guard_version: + _api_types.get(Const.PT_FRAMEWORK).update( + { + Const.PT_API_TYPE_NPU: (torch.ops.npu, (torch_npu, torch.ops.npu)) + } + ) + else: + _api_types.get(Const.PT_FRAMEWORK).update( + {Const.PT_API_TYPE_NPU: (torch_npu._C._VariableFunctionsClass, (torch_npu,))} + ) + _api_types.get(Const.PT_FRAMEWORK).update( + { + Const.PT_API_TYPE_NPU_DIST: (torch_npu.distributed, (torch_npu.distributed, + torch_npu.distributed.distributed_c10d)) + } + ) + +_inner_used_api = {} +_supported_api_list_path = (os.path.join(os.path.dirname(os.path.realpath(__file__)), Const.SUPPORT_API_FILE_NAME),) +_cuda_func_mapping = {"npu_fusion_attention": "gpu_fusion_attention"} + + +@parameter_adapter +def tensor_module_forward(module, *args, **kwargs): + return module.api_func(*args, **kwargs) + + +def dist_module_forward(module, *args, **kwargs): + handle = module.api_func(*args, **kwargs) + if kwargs.get("async_op") or module.api_name in ["isend", "irecv"]: + if handle and hasattr(handle, 'wait'): + handle.wait() + if module.api_name == "batch_isend_irecv": + if isinstance(handle, list): + for req in handle: + req.wait() + return handle + + +def npu_module_forward(module, *args, **kwargs): + if not module.need_hook: + if module.api_name not in npu_custom_functions: + raise Exception(f'There is not bench function {module.api_name}') + if module.device == Const.CUDA_LOWERCASE: + module.api_name = _cuda_func_mapping.get(module.api_name, module.api_name) + if module.device in [Const.CUDA_LOWERCASE, Const.CPU_LOWERCASE]: + return npu_custom_functions[module.api_name](*args, **kwargs) + return module.api_func(*args, **kwargs) + + +forward_methods = { + "Tensor": tensor_module_forward, + "Distributed": dist_module_forward, + "NPU": npu_module_forward +} + + +class ApiTemplate(HOOKModule): + def __init__(self, api_name, api_func, prefix, hook_build_func, need_hook=True, device=Const.CPU_LOWERCASE): + self.api_name = api_name + self.api_func = api_func + self.prefix = prefix + self.prefix_api_name = prefix + Const.SEP + str(api_name.split(Const.SEP)[-1]) + Const.SEP + self.need_hook = need_hook + self.device = device + if self.need_hook: + super().__init__(hook_build_func) + if prefix == Const.DIST_API_TYPE_PREFIX: + self.op_is_distributed = True + + @torch_device_guard + def forward(self, *args, **kwargs): + exec_func = forward_methods.get(self.prefix) + exec_func = functools.partial(exec_func, self) if exec_func else self.api_func + return exec_func(*args, **kwargs) + + +api_register = None + + +def get_api_register(): + global api_register + if api_register is None: + api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate) + return api_register diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py deleted file mode 100644 index 1aad89bd6e8..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_registry.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.distributed as dist - -from msprobe.pytorch.hook_module import wrap_torch, wrap_functional, wrap_tensor, wrap_vf, wrap_distributed, wrap_aten -from msprobe.pytorch.hook_module.wrap_aten import get_aten_ops -from msprobe.pytorch.hook_module.wrap_distributed import get_distributed_ops -from msprobe.pytorch.hook_module.wrap_functional import get_functional_ops -from msprobe.pytorch.hook_module.wrap_tensor import get_tensor_ops -from msprobe.pytorch.hook_module.wrap_torch import get_torch_ops -from msprobe.pytorch.hook_module.wrap_vf import get_vf_ops -from msprobe.pytorch.common.utils import torch_without_guard_version, npu_distributed_api, is_gpu -from msprobe.core.common.const import Const - -torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' - -if not is_gpu: - import torch_npu - from . import wrap_npu_custom - from .wrap_npu_custom import get_npu_ops - - -class ApiRegistry: - def __init__(self): - self.tensor_ori_attr = {} - self.torch_ori_attr = {} - self.functional_ori_attr = {} - self.distributed_ori_attr = {} - self.npu_distributed_ori_attr = {} - self.vf_ori_attr = {} - self.aten_ori_attr = {} - self.torch_npu_ori_attr = {} - - self.tensor_hook_attr = {} - self.torch_hook_attr = {} - self.functional_hook_attr = {} - self.distributed_hook_attr = {} - self.npu_distributed_hook_attr = {} - self.vf_hook_attr = {} - self.aten_hook_attr = {} - self.torch_npu_hook_attr = {} - - @staticmethod - def store_ori_attr(ori_api_group, api_list, api_ori_attr): - for api in api_list: - if '.' in api: - sub_module_name, sub_op = api.rsplit('.', 1) - sub_module = getattr(ori_api_group, sub_module_name) - api_ori_attr[api] = getattr(sub_module, sub_op) - else: - api_ori_attr[api] = getattr(ori_api_group, api) - - @staticmethod - def set_api_attr(api_group, attr_dict): - for api, api_attr in attr_dict.items(): - if '.' in api: - sub_module_name, sub_op = api.rsplit('.', 1) - sub_module = getattr(api_group, sub_module_name, None) - if sub_module is not None: - setattr(sub_module, sub_op, api_attr) - else: - setattr(api_group, api, api_attr) - - def api_modularity(self): - self.set_api_attr(torch.Tensor, self.tensor_hook_attr) - self.set_api_attr(torch, self.torch_hook_attr) - self.set_api_attr(torch.nn.functional, self.functional_hook_attr) - self.set_api_attr(dist, self.distributed_hook_attr) - self.set_api_attr(dist.distributed_c10d, self.distributed_hook_attr) - if not is_gpu and not torch_without_guard_version: - self.set_api_attr(torch_npu.distributed, self.npu_distributed_hook_attr) - self.set_api_attr(torch_npu.distributed.distributed_c10d, self.npu_distributed_hook_attr) - if torch_version_above_2: - self.set_api_attr(torch.ops.aten, self.aten_hook_attr) - self.set_api_attr(torch._VF, self.vf_hook_attr) - if not is_gpu: - self.set_api_attr(torch_npu, self.torch_npu_hook_attr) - - def api_originality(self): - self.set_api_attr(torch.Tensor, self.tensor_ori_attr) - self.set_api_attr(torch, self.torch_ori_attr) - self.set_api_attr(torch.nn.functional, self.functional_ori_attr) - self.set_api_attr(dist, self.distributed_ori_attr) - self.set_api_attr(dist.distributed_c10d, self.distributed_ori_attr) - if not is_gpu and not torch_without_guard_version: - self.set_api_attr(torch_npu.distributed, self.npu_distributed_ori_attr) - self.set_api_attr(torch_npu.distributed.distributed_c10d, self.npu_distributed_ori_attr) - if torch_version_above_2: - self.set_api_attr(torch.ops.aten, self.aten_ori_attr) - self.set_api_attr(torch._VF, self.vf_ori_attr) - if not is_gpu: - self.set_api_attr(torch_npu, self.torch_npu_ori_attr) - - def initialize_hook(self, hook, online_run_ut=False): - """ - initialize_hook - Args: - hook (_type_): initialize_hook - online_run_ut (bool): default False, whether online run_ut or not. - If online_run_ut is True, the hook will not wrap the aten ops. - """ - self.store_ori_attr(torch.Tensor, get_tensor_ops(), self.tensor_ori_attr) - wrap_tensor.wrap_tensor_ops_and_bind(hook) - for attr_name in dir(wrap_tensor.HOOKTensor): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.tensor_hook_attr[attr_name[5:]] = getattr(wrap_tensor.HOOKTensor, attr_name) - - self.store_ori_attr(torch, get_torch_ops(), self.torch_ori_attr) - wrap_torch.wrap_torch_ops_and_bind(hook) - for attr_name in dir(wrap_torch.HOOKTorchOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.torch_hook_attr[attr_name[5:]] = getattr(wrap_torch.HOOKTorchOP, attr_name) - - self.store_ori_attr(torch.nn.functional, get_functional_ops(), self.functional_ori_attr) - wrap_functional.wrap_functional_ops_and_bind(hook) - for attr_name in dir(wrap_functional.HOOKFunctionalOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.functional_hook_attr[attr_name[5:]] = getattr(wrap_functional.HOOKFunctionalOP, attr_name) - - self.store_ori_attr(dist, get_distributed_ops(), self.distributed_ori_attr) - wrap_distributed.wrap_distributed_ops_and_bind(hook) - if not is_gpu and not torch_without_guard_version: - self.store_ori_attr(torch_npu.distributed, npu_distributed_api, self.npu_distributed_ori_attr) - for attr_name in dir(wrap_distributed.HOOKDistributedOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.distributed_hook_attr[attr_name[5:]] = getattr(wrap_distributed.HOOKDistributedOP, attr_name) - if not is_gpu and not torch_without_guard_version and attr_name[5:] in npu_distributed_api: - self.npu_distributed_hook_attr[attr_name[5:]] = getattr(wrap_distributed.HOOKDistributedOP, - attr_name) - - if torch_version_above_2 and not online_run_ut: - self.store_ori_attr(torch.ops.aten, get_aten_ops(), self.aten_ori_attr) - wrap_aten.wrap_aten_ops_and_bind(hook) - for attr_name in dir(wrap_aten.HOOKAtenOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.aten_hook_attr[attr_name[5:]] = getattr(wrap_aten.HOOKAtenOP, attr_name) - - self.store_ori_attr(torch._VF, get_vf_ops(), self.vf_ori_attr) - wrap_vf.wrap_vf_ops_and_bind(hook) - for attr_name in dir(wrap_vf.HOOKVfOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.vf_hook_attr[attr_name[5:]] = getattr(wrap_vf.HOOKVfOP, attr_name) - - if not is_gpu: - self.store_ori_attr(torch_npu, get_npu_ops(), self.torch_npu_ori_attr) - wrap_npu_custom.wrap_npu_ops_and_bind(hook) - for attr_name in dir(wrap_npu_custom.HOOKNpuOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.torch_npu_hook_attr[attr_name[5:]] = getattr(wrap_npu_custom.HOOKNpuOP, attr_name) - - -api_register = ApiRegistry() diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py index b59d4be82f2..71dbfa8aeb1 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py @@ -28,28 +28,27 @@ class HOOKModule(nn.Module): module_count = defaultdict(int) inner_stop_hook = {} - def __init__(self, build_hook) -> None: + def __init__(self, hook_build_func) -> None: super(HOOKModule, self).__init__() self.has_overflow = False - self.prefix = "" self.current_thread = threading.current_thread().ident if self.current_thread not in HOOKModule.inner_stop_hook: HOOKModule.inner_stop_hook[self.current_thread] = False self.stop_hook = HOOKModule.inner_stop_hook.get(self.current_thread, False) if not self.stop_hook: - if hasattr(self, "prefix_op_name_"): - self.prefix = self.prefix_op_name_ - self.forward_data_collected = False - forward_pre_hook, forward_hook, backward_hook, _ = build_hook(self.prefix) - if torch_version_above_or_equal_2: - self.register_forward_pre_hook(forward_pre_hook, with_kwargs=True) - self.register_forward_hook(forward_hook, with_kwargs=True) - else: - self.register_forward_pre_hook(forward_pre_hook) - self.register_forward_hook(forward_hook) - self.register_backward_hook(backward_hook) + + prefix = self.prefix_api_name if hasattr(self, "prefix_api_name") else "" + if callable(hook_build_func): + forward_pre_hook, forward_hook, backward_hook, _ = hook_build_func(prefix) + if torch_version_above_or_equal_2: + self.register_forward_pre_hook(forward_pre_hook, with_kwargs=True) + self.register_forward_hook(forward_hook, with_kwargs=True) + else: + self.register_forward_pre_hook(forward_pre_hook) + self.register_forward_hook(forward_hook) + self.register_backward_hook(backward_hook) def __call__(self, *args, **kwargs): changed = False diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml index 4bc22f51ceb..43d750e7d6a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml @@ -1912,4 +1912,8 @@ distributed: - all_to_all - all_gather_into_tensor - reduce_scatter_tensor - - batch_isend_irecv \ No newline at end of file + - batch_isend_irecv + +npu_distributed: + - isend + - irecv \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py deleted file mode 100644 index 1cd11842c31..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_distributed.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from functools import wraps -import torch.distributed as dist - -from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.common.utils import torch_device_guard -from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_yaml - - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") - - -distributed_func = {} -for f in dir(dist): - distributed_func[f] = getattr(dist, f) - - -def get_distributed_ops(): - _all_distributed_ops = dir(dist) - yaml_data = load_yaml(yaml_path) - wrap_distributed_ops = yaml_data.get('distributed') - return set(wrap_distributed_ops) & set(_all_distributed_ops) - - -class HOOKDistributedOP(object): - pass - - -class DistributedOPTemplate(HOOKModule): - def __init__(self, op_name, build_hook): - self.op_name_ = op_name - self.prefix_op_name_ = "Distributed" + Const.SEP + str(op_name) + Const.SEP - super().__init__(build_hook) - if not self.stop_hook: - self.op_is_distributed = True - - @torch_device_guard - def forward(self, *args, **kwargs): - handle = distributed_func.get(self.op_name_)(*args, **kwargs) - if kwargs.get("async_op") or self.op_name_ in ["isend", "irecv"]: - if handle and hasattr(handle, 'wait'): - handle.wait() - if self.op_name_ == "batch_isend_irecv": - if isinstance(handle, list): - for req in handle: - req.wait() - return handle - - -def wrap_distributed_op(op_name, hook): - @wraps(DistributedOPTemplate) - def distributed_op_template(*args, **kwargs): - return DistributedOPTemplate(op_name, hook)(*args, **kwargs) - - distributed_op_template.__name__ = op_name - return distributed_op_template - - -def wrap_distributed_ops_and_bind(hook): - _distributed_ops = get_distributed_ops() - for op_name in _distributed_ops: - setattr(HOOKDistributedOP, "wrap_" + str(op_name), wrap_distributed_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py deleted file mode 100644 index 6164169476d..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_functional.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import torch - -from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.common.utils import torch_device_guard -from msprobe.core.common.const import Const -from msprobe.pytorch.common.log import logger -from msprobe.core.common.file_utils import load_yaml - - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") - - -def get_functional_ops(): - yaml_data = load_yaml(yaml_path) - wrap_functional_ops = yaml_data.get('functional') - _all_functional_ops = dir(torch.nn.functional) - return set(wrap_functional_ops) & set(_all_functional_ops) - - -TorchFunctions = {func: getattr(torch.nn.functional, func) for func in get_functional_ops()} - - -class HOOKFunctionalOP(object): - pass - - -class FunctionalOPTemplate(HOOKModule): - def __init__(self, op_name, hook, need_hook=True): - self.op_name_ = op_name - self.prefix_op_name_ = "Functional" + Const.SEP + str(op_name) + Const.SEP - if need_hook: - super().__init__(hook) - - @torch_device_guard - def forward(self, *args, **kwargs): - return TorchFunctions[str(self.op_name_)](*args, **kwargs) - - -def wrap_functional_op(op_name, hook): - def functional_op_template(*args, **kwargs): - return FunctionalOPTemplate(op_name, hook)(*args, **kwargs) - - return functional_op_template - - -def wrap_functional_ops_and_bind(hook): - _functional_ops = get_functional_ops() - for op_name in _functional_ops: - setattr(HOOKFunctionalOP, "wrap_" + op_name, wrap_functional_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py deleted file mode 100644 index 1c0afc59f50..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import torch - -from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.common.utils import torch_device_guard, torch_without_guard_version -from msprobe.core.common.const import Const -from msprobe.core.common.log import logger -from msprobe.core.common.file_utils import load_yaml -from msprobe.pytorch.function_factory import npu_custom_functions - -try: - import torch_npu -except ImportError: - logger.info("Failing to import torch_npu.") - - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") -cuda_func_mapping = {"npu_fusion_attention" : "gpu_fusion_attention"} - - -def get_npu_ops(): - if torch_without_guard_version: - _npu_ops = dir(torch.ops.npu) - else: - _npu_ops = dir(torch_npu._C._VariableFunctionsClass) - yaml_data = load_yaml(yaml_path) - wrap_npu_ops = yaml_data.get('torch_npu') - return set(wrap_npu_ops) & set(_npu_ops) - - -class HOOKNpuOP(object): - pass - - -class NpuOPTemplate(HOOKModule): - - def __init__(self, op_name, hook, need_hook=True, device=Const.CPU_LOWERCASE): - self.op_name_ = op_name - self.prefix_op_name_ = "NPU" + Const.SEP + str(op_name) + Const.SEP - self.need_hook = need_hook - self.device = device - if need_hook: - super().__init__(hook) - - @torch_device_guard - def forward(self, *args, **kwargs): - if not self.need_hook: - if self.op_name_ not in npu_custom_functions: - raise Exception(f'There is not bench function {self.op_name_}') - if self.device == Const.CUDA_LOWERCASE: - self.op_name_ = cuda_func_mapping.get(self.op_name_, self.op_name_) - if self.device in [Const.CUDA_LOWERCASE, Const.CPU_LOWERCASE]: - return npu_custom_functions[self.op_name_](*args, **kwargs) - if torch_without_guard_version: - return getattr(torch.ops.npu, str(self.op_name_))(*args, **kwargs) - else: - return getattr(torch_npu._C._VariableFunctionsClass, str(self.op_name_))(*args, **kwargs) - - -def wrap_npu_op(op_name, hook): - def npu_op_template(*args, **kwargs): - return NpuOPTemplate(op_name, hook)(*args, **kwargs) - return npu_op_template - - -def wrap_npu_ops_and_bind(hook): - _npu_ops = get_npu_ops() - for op_name in _npu_ops: - setattr(HOOKNpuOP, "wrap_" + str(op_name), wrap_npu_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py deleted file mode 100644 index f93c09a1241..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_tensor.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import torch - -from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.common.utils import torch_device_guard, parameter_adapter -from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_yaml - - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") - - -def get_tensor_ops(): - _tensor_ops = dir(torch.Tensor) - yaml_data = load_yaml(yaml_path) - wrap_tensor_ops = yaml_data.get('tensor') - return set(wrap_tensor_ops) & set(_tensor_ops) - - -TensorOps = {op: getattr(torch.Tensor, op) for op in get_tensor_ops()} - - -class HOOKTensor(object): - pass - - -class TensorOPTemplate(HOOKModule): - - def __init__(self, op_name, hook, need_hook=True): - self.op_name_ = op_name - self.prefix_op_name_ = "Tensor" + Const.SEP + str(op_name) + Const.SEP - if need_hook: - super().__init__(hook) - - @torch_device_guard - @parameter_adapter - def forward(self, *args, **kwargs): - return TensorOps[str(self.op_name_)](*args, **kwargs) - - -def wrap_tensor_op(op_name, hook): - - def tensor_op_template(*args, **kwargs): - return TensorOPTemplate(op_name, hook)(*args, **kwargs) - - return tensor_op_template - - -def wrap_tensor_ops_and_bind(hook): - _tensor_ops = get_tensor_ops() - for op_name in _tensor_ops: - setattr(HOOKTensor, "wrap_" + str(op_name), wrap_tensor_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py deleted file mode 100644 index fc9d61c206b..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_torch.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import torch - -from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.common.utils import torch_device_guard -from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_yaml - - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") - - -def get_torch_ops(): - _torch_ops = [] - yaml_data = load_yaml(yaml_path) - wrap_torch_ops = yaml_data.get('torch') - for operation in wrap_torch_ops: - if '.' in operation: - operation_sub_module_name, operation_sub_op = operation.rsplit('.', 1) - operation_sub_module = getattr(torch, operation_sub_module_name) - if operation_sub_op in dir(operation_sub_module): - _torch_ops.append(operation) - else: - if hasattr(torch, operation): - _torch_ops.append(operation) - return set(_torch_ops) - - -TorchOps = {} -for op in get_torch_ops(): - if '.' in op: - sub_module_name, sub_op = op.rsplit('.', 1) - sub_module = getattr(torch, sub_module_name) - TorchOps[op] = getattr(sub_module, sub_op) - else: - TorchOps[op] = getattr(torch, op) - - - -class HOOKTorchOP(object): - pass - - -class TorchOPTemplate(HOOKModule): - - def __init__(self, op_name, hook, need_hook=True): - self.op_name_ = op_name - self.prefix_op_name_ = "Torch" + Const.SEP + str(op_name) + Const.SEP - if need_hook: - super().__init__(hook) - - @torch_device_guard - def forward(self, *args, **kwargs): - return TorchOps[str(self.op_name_)](*args, **kwargs) - - -def wrap_torch_op(op_name, hook): - - def torch_op_template(*args, **kwargs): - return TorchOPTemplate(op_name, hook)(*args, **kwargs) - - return torch_op_template - - -def wrap_torch_ops_and_bind(hook): - _torch_ops = get_torch_ops() - for op_name in _torch_ops: - setattr(HOOKTorchOP, "wrap_" + op_name, wrap_torch_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py deleted file mode 100644 index 05ee3bc9225..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_vf.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import torch - -from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_yaml -from msprobe.pytorch.hook_module.hook_module import HOOKModule -from msprobe.pytorch.common.utils import torch_device_guard - - -cur_path = os.path.dirname(os.path.realpath(__file__)) -yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") - - -def get_vf_ops(): - yaml_data = load_yaml(yaml_path) - wrap_vf_ops = yaml_data.get('_VF') - return wrap_vf_ops - - -class HOOKVfOP(object): - pass - - -class VfOPTemplate(HOOKModule): - def __init__(self, op_name, hook): - self.op_name_ = op_name - self.prefix_op_name_ = "VF" + Const.SEP + str(op_name) + Const.SEP - super().__init__(hook) - - @torch_device_guard - def forward(self, *args, **kwargs): - return getattr(torch._C._VariableFunctionsClass, str(self.op_name_))(*args, **kwargs) - - -def wrap_vf_op(op_name, hook): - def vf_op_template(*args, **kwargs): - return VfOPTemplate(op_name, hook)(*args, **kwargs) - - return vf_op_template - - -def wrap_vf_ops_and_bind(hook): - _vf_ops = get_vf_ops() - for op_name in _vf_ops: - setattr(HOOKVfOP, "wrap_" + op_name, wrap_vf_op(op_name, hook)) diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index fd81a7f1cf0..11eef489de2 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -30,7 +30,7 @@ from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import get_rank_if_initialized, is_recomputation from msprobe.pytorch.dump.kernel_dump.kernel_config import create_kernel_config_json from msprobe.pytorch.dump.module_dump.module_processer import ModuleProcesser -from msprobe.pytorch.hook_module.api_registry import api_register +from msprobe.pytorch.hook_module.api_register import get_api_register from msprobe.pytorch.hook_module.hook_module import HOOKModule from msprobe.pytorch.hook_module.register_optimizer_hook import register_optimizer_hook @@ -58,6 +58,7 @@ class Service: self.params_grad_info = {} self.hook_handle_dict = {} # 提前注册,确保注册尽可能多的API hook + self.api_register = get_api_register() self.register_api_hook() self.init_for_debug_level() @@ -370,11 +371,10 @@ class Service: def register_api_hook(self): if self.config.level in [Const.LEVEL_MIX, Const.LEVEL_L1, Const.LEVEL_L2]: logger.info_on_rank_0(f"The api {self.config.task} hook function is successfully mounted to the model.") - api_register.initialize_hook( - functools.partial(self.build_hook, BaseScope.Module_Type_API), - self.config.online_run_ut + self.api_register.initialize_hook( + functools.partial(self.build_hook, BaseScope.Module_Type_API) ) - api_register.api_modularity() + self.api_register.register_all_api() def register_module_hook(self): if self.config.level in [Const.LEVEL_L0, Const.LEVEL_MIX]: diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_api_registry.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_api_registry.py new file mode 100644 index 00000000000..c67c5d8ee9e --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/test_api_registry.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from unittest import TestCase +from unittest.mock import patch + +import torch + +from msprobe.core.common.const import Const +from msprobe.core.data_dump.api_registry import _get_attr, ApiWrapper + + +class TestFunctions(TestCase): + def test__get_attr(self): + module = torch + + attr_name = 'linalg.norm' + target_value = torch.linalg.norm + actual_value = _get_attr(module, attr_name) + self.assertEqual(target_value, actual_value) + + attr_name = 'norm' + target_value = torch.norm + actual_value = _get_attr(module, attr_name) + self.assertEqual(target_value, actual_value) + + +class TestApiWrapper(TestCase): + api_types = { + Const.PT_FRAMEWORK: { + Const.PT_API_TYPE_TORCH: (torch, torch), + } + } + supported_api_list_path = (Const.SUPPORT_API_FILE_NAME,) + yaml_value = {'torch': ['linalg.norm', 'norm']} + api_names = {Const.PT_FRAMEWORK: {'torch': {'linalg.norm', 'norm'}}} + + def test___init__(self): + with patch('msprobe.core.data_dump.api_registry.load_yaml', return_value=self.yaml_value): + api_wrapper = ApiWrapper(self.api_types, self.supported_api_list_path) + self.assertEqual(api_wrapper.api_types, self.api_types) + self.assertEqual(api_wrapper.api_list_paths, self.supported_api_list_path) + self.assertEqual(api_wrapper.api_names, self.api_names) + self.assertEqual(api_wrapper.wrapped_api_functions, {}) + + api_wrapper = ApiWrapper(self.api_types, Const.SUPPORT_API_FILE_NAME) + self.assertEqual(api_wrapper.api_list_paths, list(self.supported_api_list_path)) + + with self.assertRaises(Exception) as context: + api_wrapper = ApiWrapper(self.api_types, (Const.SUPPORT_API_FILE_NAME, Const.SUPPORT_API_FILE_NAME)) + self.assertEqual(str(context.exception), + "The number of api_list_paths must be equal to the number of frameworks in 'api_types', " + "when api_list_paths is a list or tuple.") + + def test__get_api_names(self): + target_value = self.api_names + with patch('msprobe.core.data_dump.api_registry.load_yaml', return_value=self.yaml_value): + api_wrapper = ApiWrapper(self.api_types, self.supported_api_list_path) + actual_value = api_wrapper._get_api_names() + self.assertEqual(target_value, actual_value) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/free_benchmark/test_ms_api_pynative_self_check.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/free_benchmark/test_ms_api_pynative_self_check.py index e589dd4d587..4872527e4c2 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/free_benchmark/test_ms_api_pynative_self_check.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/free_benchmark/test_ms_api_pynative_self_check.py @@ -23,12 +23,18 @@ from mindspore import Tensor, mint, ops from msprobe.core.common.const import Const from msprobe.mindspore.common.const import FreeBenchmarkConst from msprobe.mindspore.common.log import logger -from msprobe.mindspore.dump.hook_cell.api_registry import api_register -from msprobe.mindspore.free_benchmark.api_pynative_self_check import (ApiPyNativeSelfCheck, check_all_tensor, - check_self, data_pre_deal, - deal_fuzzed_and_original_result, - get_module, get_supported_ops, - get_target_arg_index, need_wrapper_func) +from msprobe.mindspore.free_benchmark.api_pynative_self_check import ( + ApiPyNativeSelfCheck, + check_all_tensor, + check_self, + data_pre_deal, + deal_fuzzed_and_original_result, + get_module, + get_supported_ops, + get_target_arg_index, + need_wrapper_func, + _api_register +) from msprobe.mindspore.free_benchmark.common.config import Config from msprobe.mindspore.free_benchmark.common.handler_params import HandlerParams from msprobe.mindspore.free_benchmark.common.utils import Tools @@ -83,8 +89,8 @@ class TestApiPyNativeSelfCheck(TestCase): self.assertEqual(self_checker.ori_func, target_ori_func) def test_handle(self): - with patch.object(api_register, "initialize_hook") as mock_init_hook, \ - patch.object(api_register, "api_set_hook_func") as mock_set_hook: + with patch.object(_api_register, "initialize_hook") as mock_init_hook, \ + patch.object(_api_register, "register_all_api") as mock_set_hook: self.checker.handle() mock_init_hook.assert_called_with(self.checker.build_hook) mock_set_hook.assert_called_once() @@ -156,8 +162,8 @@ class TestApiPyNativeSelfCheck(TestCase): mock_warning.reset_mock() Config.stage = Const.FORWARD with patch.object(logger, "info") as mock_info, \ - patch.object(api_register, "api_set_ori_func") as mock_set_ori, \ - patch.object(api_register, "api_set_hook_func") as mock_set_hook, \ + patch.object(_api_register, "restore_all_api") as mock_set_ori, \ + patch.object(_api_register, "register_all_api") as mock_set_hook, \ patch("msprobe.mindspore.free_benchmark.api_pynative_self_check.deal_fuzzed_and_original_result", return_value="ret"): args = (1.0, 1.0) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py index 912830ea1ab..c14635dc64e 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_service.py @@ -21,12 +21,13 @@ from unittest.mock import MagicMock, patch from mindspore import nn, ops from msprobe.core.common.exceptions import MsprobeException -from msprobe.core.common.utils import Const, DumpPathAggregation +from msprobe.core.common.utils import Const +from msprobe.core.data_dump.api_registry import ApiRegistry from msprobe.core.data_dump.scope import BaseScope from msprobe.mindspore.cell_processor import CellProcessor from msprobe.mindspore.common.log import logger from msprobe.mindspore.common.utils import register_backward_hook_functions -from msprobe.mindspore.dump.hook_cell.api_registry import ApiRegistry, api_register +from msprobe.mindspore.dump.hook_cell.api_register import get_api_register from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell from msprobe.mindspore.dump.jit_dump import JitDump from msprobe.mindspore.service import Service @@ -49,7 +50,7 @@ class TestService(unittest.TestCase): self.service.primitive_hook_service = MagicMock() def tearDown(self) -> None: - api_register.api_set_ori_func() + get_api_register().restore_all_api() def test_init(self): self.assertEqual(self.service.config.level, "L0") @@ -197,7 +198,7 @@ class TestService(unittest.TestCase): @patch.object(Service, 'need_end_service', return_value=False) @patch.object(JitDump, 'set_config') @patch.object(JitDump, 'set_data_collector') - @patch.object(ApiRegistry, 'api_set_hook_func') + @patch.object(ApiRegistry, 'register_all_api') def test_start_with_jit_dump_enabled(self, mock_api_set_hook_func, mock_set_data_collector, mock_set_config, mock_need_end_service, mock_register_cell_hook, mock_register_primitive_hook): @@ -269,7 +270,7 @@ class TestService(unittest.TestCase): primitive_combined_name) @patch.object(ApiRegistry, 'initialize_hook') - @patch.object(ApiRegistry, 'api_set_hook_func') + @patch.object(ApiRegistry, 'register_all_api') @patch("msprobe.mindspore.service.logger.info") def test_register_hook_new_with_level_mix(self, mock_logger, mock_api_set_hook_func, mock_initialize_hook): self.service.config.level = Const.LEVEL_MIX diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut_utils.py index 0cf30461aec..751d3f6affd 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut_utils.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_run_ut_utils.py @@ -1,13 +1,28 @@ -# coding=utf-8 +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import unittest -from unittest.mock import patch, MagicMock + import torch + from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import * from msprobe.core.common.file_utils import create_directory, write_csv class TestRunUtUtils(unittest.TestCase): - + def setUp(self): save_path = "temp_save_path" create_directory(save_path) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py index cdc922cc98d..42035932e56 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import io import unittest @@ -19,7 +34,7 @@ class TestParameterAdapter(unittest.TestCase): def setUp(self): self.func_mock = MagicMock() self.decorated_func = parameter_adapter(self.func_mock) - self.op_name_ = "__getitem__" + self.api_name = "__getitem__" def test_handle_masked_select_bfloat16(self): input_tensor = torch.tensor([1.0, 2.0], dtype=torch.bfloat16) @@ -45,7 +60,7 @@ class TestParameterAdapter(unittest.TestCase): self.assertTrue(torch.equal(result, torch.tensor([20.0, 30.0]))) def test_op_name_eq_with_none(self): - self.op_name_ = "__eq__" + self.api_name = "__eq__" args = (torch.tensor([1]), None) result = self.decorated_func(self, *args) self.assertFalse(result) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/dump/test_module_dump.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/dump/test_module_dump.py index 63d6abc3a24..5aaf0820a78 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/dump/test_module_dump.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/dump/test_module_dump.py @@ -18,8 +18,10 @@ from unittest.mock import patch, MagicMock import torch import torch.nn as nn + +from msprobe.core.data_dump.api_registry import ApiRegistry from msprobe.pytorch import PrecisionDebugger -from msprobe.pytorch.hook_module.api_registry import api_register +from msprobe.pytorch.hook_module.api_register import get_api_register from msprobe.pytorch.service import torch_version_above_or_equal_2 @@ -27,12 +29,12 @@ class TestModuleDumper(unittest.TestCase): @classmethod def setUpClass(cls): PrecisionDebugger._instance = None - api_register.api_originality() + get_api_register().restore_all_api() @classmethod def tearDownClass(cls): PrecisionDebugger._instance = None - api_register.api_originality() + get_api_register().restore_all_api() def setUp(self): self.module = nn.Linear(8, 4) @@ -41,7 +43,7 @@ class TestModuleDumper(unittest.TestCase): def test_stop_module_dump(self): self.module_dumper.hook_handle_list.extend([1, 2, 3]) - with patch('msprobe.pytorch.dump.module_dump.module_dump.api_register') as mock_api_register: + with patch.object(ApiRegistry, 'register_all_api') as mock_api_register: mock_handle1 = MagicMock(spec=torch.utils.hooks.RemovableHandle) mock_handle2 = MagicMock(spec=torch.utils.hooks.RemovableHandle) self.module_dumper.hook_handle_list.extend([mock_handle1, mock_handle2]) @@ -50,7 +52,7 @@ class TestModuleDumper(unittest.TestCase): mock_handle1.remove.assert_called_once() mock_handle2.remove.assert_called_once() self.assertEqual(self.module_dumper.hook_handle_list, []) - mock_api_register.api_modularity.assert_called_once() + mock_api_register.assert_called_once() def test_register_hook(self): self.module_dumper.register_hook(self.module, "TestModule") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py deleted file mode 100644 index 837ad23df76..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_api_registry.py +++ /dev/null @@ -1,130 +0,0 @@ -import unittest -from msprobe.pytorch.hook_module.api_registry import ApiRegistry, torch_version_above_2, is_gpu - - -class TestApiRegistry(unittest.TestCase): - - def test_store_ori_attr(self): - class A(): - a1 = 1 - class B(): - a = A() - b1 = 1 - b2 = 2 - - api_list = ["a.a1", "b1", "b2"] - expect_output = {"a.a1":1, "b1":1, "b2":2} - actual_output = dict() - ApiRegistry.store_ori_attr(B, api_list, actual_output) - self.assertEqual(actual_output, expect_output) - - - def test_set_api_attr(self): - class A(): - a1 = 1 - class B(): - a = A().__class__ - b1 = 1 - - attr_dict = {"a.a2":2, "b2":2, "b3":3} - ApiRegistry.set_api_attr(B, attr_dict) - - for k, v in attr_dict.items(): - if '.' in k: - sub_module_name, sub_op = k.rsplit('.', 1) - sub_module = getattr(B, sub_module_name, None) - - self.assertEqual(getattr(sub_module, sub_op), v) - else: - self.assertEqual(getattr(B, k), v) - - def test_api_modularity(self): - - import torch - import torch.distributed as dist - #import torch_npu #门禁没有安装torch_npu - from msprobe.pytorch.hook_module.api_registry import torch_without_guard_version, npu_distributed_api, is_gpu, torch_version_above_2 - - - - reg = ApiRegistry() - attr_dict = {"b2":2, "b3":3} - reg.tensor_hook_attr = attr_dict - reg.torch_hook_attr = attr_dict - reg.functional_hook_attr = attr_dict - reg.distributed_hook_attr = attr_dict - reg.npu_distributed_hook_attr = attr_dict - reg.aten_hook_attr = attr_dict - reg.vf_hook_attr = attr_dict - reg.torch_npu_hook_attr = attr_dict - - reg.api_modularity() - self.assertEqual(torch.Tensor.b2, 2) - - self.assertEqual(torch.b2, 2) - self.assertEqual(torch.nn.functional.b2, 2) - self.assertEqual(dist.b2, 2) - self.assertEqual(dist.distributed_c10d.b2, 2) - #if not is_gpu and not torch_without_guard_version: - #self.assertEqual(torch_npu.distributed.b2, 2) - #self.assertEqual(torch_npu.distributed.distributed_c10d.b2, 2) - if torch_version_above_2: - self.assertEqual(torch.ops.aten.b2, 2) - self.assertEqual(torch._VF.b2, 2) - #if not is_gpu: - #self.assertEqual(torch_npu.b2, 2) - - - def test_api_originality(self): - import torch - import torch.distributed as dist - #import torch_npu #门禁没有安装torch_npu - from msprobe.pytorch.hook_module.api_registry import torch_without_guard_version, npu_distributed_api, is_gpu, torch_version_above_2 - - - - reg = ApiRegistry() - attr_dict = {"b2":2, "b3":3} - reg.tensor_hook_attr = attr_dict - reg.torch_hook_attr = attr_dict - reg.functional_hook_attr = attr_dict - reg.distributed_hook_attr = attr_dict - reg.npu_distributed_hook_attr = attr_dict - reg.aten_hook_attr = attr_dict - reg.vf_hook_attr = attr_dict - reg.torch_npu_hook_attr = attr_dict - - reg.api_originality() - self.assertEqual(torch.Tensor.b2, 2) - - self.assertEqual(torch.b2, 2) - self.assertEqual(torch.nn.functional.b2, 2) - self.assertEqual(dist.b2, 2) - self.assertEqual(dist.distributed_c10d.b2, 2) - #if not is_gpu and not torch_without_guard_version: - #self.assertEqual(torch_npu.distributed.b2, 2) - #self.assertEqual(torch_npu.distributed.distributed_c10d.b2, 2) - if torch_version_above_2: - self.assertEqual(torch.ops.aten.b2, 2) - self.assertEqual(torch._VF.b2, 2) - #if not is_gpu: - #self.assertEqual(torch_npu.b2, 2) - - def test_initialize_hook(self): - def hook_test(): - pass - - reg = ApiRegistry() - reg.initialize_hook(hook_test) - empty_list = [] - self.assertFalse(empty_list==reg.tensor_hook_attr) - self.assertFalse(empty_list==reg.torch_hook_attr) - self.assertFalse(empty_list==reg.functional_hook_attr) - self.assertFalse(empty_list==reg.distributed_hook_attr) - self.assertFalse(empty_list==reg.npu_distributed_hook_attr) - if torch_version_above_2: - #print(True) - self.assertFalse(empty_list==reg.aten_hook_attr) - if not is_gpu: - #print(True) - self.assertFalse(empty_list==reg.torch_npu_hook_attr) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py deleted file mode 100644 index 246feb56bec..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_distributed.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest -import torch.distributed as dist -from msprobe.pytorch.hook_module.wrap_distributed import * - -class TestWrapDistributed(unittest.TestCase): - def hook(name, prefix): - def forward_pre_hook(nope, input, kwargs): - return input, kwargs - - def forward_hook(nope, input, kwargs, result): - return 2 - - def backward_hook(): - pass - - def forward_hook_torch_version_below_2(): - pass - - return forward_pre_hook, forward_hook, backward_hook, forward_hook_torch_version_below_2 - - def test_get_distributed_ops(self): - ops = get_distributed_ops() - self.assertIsInstance(ops, set) - - def test_DistributedOPTemplate(self): - self.setUp() - op_name = 'all_reduce' - if op_name in get_distributed_ops(): - op = DistributedOPTemplate(op_name, self.hook) - self.assertEqual(op.op_name_, op_name) - - def test_wrap_distributed_op(self): - op_name = 'all_reduce' - if op_name in get_distributed_ops(): - wrapped_op = wrap_distributed_op(op_name, self.hook) - self.assertTrue(callable(wrapped_op)) - - def test_wrap_distributed_ops_and_bind(self): - wrap_distributed_ops_and_bind(self.hook) - for op_name in get_distributed_ops(): - self.assertTrue(hasattr(HOOKDistributedOP, "wrap_" + str(op_name))) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py deleted file mode 100644 index 282551e3cef..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_functional.py +++ /dev/null @@ -1,73 +0,0 @@ -import unittest -import torch -import torch.nn.functional as F -from msprobe.pytorch.hook_module.wrap_functional import get_functional_ops, \ - wrap_functional_ops_and_bind, HOOKFunctionalOP -from msprobe.pytorch.common.utils import remove_dropout - - -class TestDropoutFunctions(unittest.TestCase): - - def setUp(self): - self.input_tensor = torch.ones(10, 10) - remove_dropout() - - def test_function_dropout_no_dropout(self): - output = F.dropout(self.input_tensor, p = 0., training = True) - self.assertTrue(torch.equal(self.input_tensor, output)) - - def test_function_dropout_train_vs_eval(self): - output_train = F.dropout(self.input_tensor, p = 0., training = True) - output_eval = F.dropout(self.input_tensor, p = 0., training = False) - self.assertTrue(torch.equal(output_train, output_eval)) - - def test_function_dropout_invalid_probability(self): - with self.assertRaises(ValueError): - F.dropout(self.input_tensor, p = -0.1) - with self.assertRaises(ValueError): - F.dropout(self.input_tensor, p = 1.1) - - def test_function_dropout2d_no_dropout(self): - output = F.dropout2d(self.input_tensor, p = 0., training = True) - self.assertTrue(torch.equal(self.input_tensor, output)) - - def test_function_dropout2d_train_vs_eval(self): - output_train = F.dropout2d(self.input_tensor, p = 0., training = True) - output_eval = F.dropout2d(self.input_tensor, p = 0., training = False) - self.assertTrue(torch.equal(output_train, output_eval)) - - def test_function_dropout2d_invalid_probability(self): - with self.assertRaises(ValueError): - F.dropout2d(self.input_tensor, p = -0.1) - with self.assertRaises(ValueError): - F.dropout2d(self.input_tensor, p = 1.1) - - def test_function_dropout3d_no_dropout(self): - input_tensor_3d = self.input_tensor.unsqueeze(0) - output = F.dropout3d(input_tensor_3d, p = 0., training = True) - self.assertTrue(torch.equal(input_tensor_3d, output)) - - def test_function_dropout3d_train_vs_eval(self): - input_tensor_3d = self.input_tensor.unsqueeze(0) - output_train = F.dropout3d(input_tensor_3d, p = 0., training = True) - output_eval = F.dropout3d(input_tensor_3d, p = 0., training = False) - self.assertTrue(torch.equal(output_train, output_eval)) - - def test_function_dropout3d_invalid_probability(self): - input_tensor_3d = self.input_tensor.unsqueeze(0) - with self.assertRaises(ValueError): - F.dropout3d(input_tensor_3d, p = -0.1) - with self.assertRaises(ValueError): - F.dropout3d(input_tensor_3d, p = 1.1) - - -class TestWrapFunctional(unittest.TestCase): - - def test_get_functional_ops(self): - expected_ops = {'relu', 'sigmoid', 'softmax'} - actual_ops = get_functional_ops() - self.assertTrue(expected_ops.issubset(actual_ops)) - - def test_wrap_functional_ops_and_bind(self): - wrap_functional_ops_and_bind(None) - self.assertTrue(hasattr(HOOKFunctionalOP, 'wrap_relu')) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_npu_custom.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_npu_custom.py deleted file mode 100644 index 573d6d000f3..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_npu_custom.py +++ /dev/null @@ -1,43 +0,0 @@ -import unittest -from unittest.mock import MagicMock, patch - -from msprobe.core.common.const import Const -from msprobe.core.common.log import logger -from msprobe.pytorch.function_factory import npu_custom_functions -from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate - -try: - import torch_npu -except ImportError: - logger.info("Failing to import torch_npu.") - - -class TestNpuOPTemplate(unittest.TestCase): - - def setUp(self): - self.mock_hook = MagicMock(return_value=(MagicMock(), MagicMock(), MagicMock(), None)) - self.template = NpuOPTemplate("sum", self.mock_hook) - - def test_init(self): - self.assertEqual(self.template.op_name_, "sum") - self.assertEqual(self.template.prefix_op_name_, f"NPU{Const.SEP}sum{Const.SEP}") - self.assertTrue(self.template.need_hook) - self.assertEqual(self.template.device, Const.CPU_LOWERCASE) - - @patch('torch.ops.npu.sum') - def test_forward_without_hook(self, mock_npu_sum): - self.template.need_hook = False - npu_custom_functions["sum"] = MagicMock(return_value="output_from_custom") - - result = self.template.forward(1, 2, key='value') - self.assertEqual(result, "output_from_custom") - mock_npu_sum.assert_not_called() - - @patch('torch.ops.npu.sum') - def test_forward_with_hook(self, mock_npu_sum): - self.template.need_hook = True - mock_npu_sum.return_value = "output_from_npu" - - result = self.template.forward(1, 2, key='value') - self.assertEqual(result, "output_from_npu") - mock_npu_sum.assert_called_once_with(1, 2, key='value') diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py deleted file mode 100644 index 6868c5bda7a..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_tensor.py +++ /dev/null @@ -1,40 +0,0 @@ -import unittest -import torch -from msprobe.pytorch.hook_module.wrap_tensor import get_tensor_ops, HOOKTensor, TensorOPTemplate, wrap_tensor_op, wrap_tensor_ops_and_bind - -class TestWrapTensor(unittest.TestCase): - - def hook(name, prefix): - def forward_pre_hook(nope, input, kwargs): - return input, kwargs - - def forward_hook(nope, input, kwargs, result): - return 2 - - def backward_hook(): - pass - - def forward_hook_torch_version_below_2(): - pass - - return forward_pre_hook, forward_hook, backward_hook, forward_hook_torch_version_below_2 - - def test_get_tensor_ops(self): - result = get_tensor_ops() - self.assertIsInstance(result, set) - - def test_HOOKTensor(self): - hook_tensor = HOOKTensor() - self.assertIsInstance(hook_tensor, HOOKTensor) - - def test_TensorOPTemplate(self): - tensor_op_template = TensorOPTemplate('add', self.hook) - self.assertTrue(tensor_op_template.op_name_, 'add') - - def test_wrap_tensor_op(self): - wrapped_op = wrap_tensor_op('add', self.hook) - self.assertTrue(callable(wrapped_op)) - - def test_wrap_tensor_ops_and_bind(self): - wrap_tensor_ops_and_bind(self.hook) - self.assertTrue(hasattr(HOOKTensor, 'wrap_add')) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py deleted file mode 100644 index e0e4d000c0b..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_torch.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest -import torch -from msprobe.pytorch.hook_module.wrap_torch import * - -class TestWrapTorch(unittest.TestCase): - - def hook(name, prefix): - def forward_pre_hook(nope, input, kwargs): - return input, kwargs - - def forward_hook(nope, input, kwargs, result): - return 2 - - def backward_hook(): - pass - - def forward_hook_torch_version_below_2(): - pass - - return forward_pre_hook, forward_hook, backward_hook, forward_hook_torch_version_below_2 - - def setUp(self): - - self.op_name = 'add' - self.torch_op = wrap_torch_op(self.op_name, self.hook) - - def test_get_torch_ops(self): - self.setUp() - ops = get_torch_ops() - self.assertIsInstance(ops, set) - self.assertIn(self.op_name, ops) - - def test_TorchOPTemplate(self): - self.setUp() - template = TorchOPTemplate(self.op_name, self.hook) - self.assertEqual(template.op_name_, self.op_name) - self.assertEqual(template.prefix_op_name_, "Torch." + str(self.op_name) + ".") - - def test_forward(self): - self.setUp() - template = TorchOPTemplate(self.op_name, self.hook) - result = template.forward(torch.tensor([1, 2, 3]), torch.tensor([4, 5, 6])) - torch.testing.assert_close(result, torch.tensor([5, 7, 9])) - - def test_wrap_torch_ops_and_bind(self): - self.setUp() - wrap_torch_ops_and_bind(self.hook) - self.assertTrue(hasattr(HOOKTorchOP, "wrap_" + self.op_name)) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py deleted file mode 100644 index 98efb4bc5b8..00000000000 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/hook_module/test_wrap_vf.py +++ /dev/null @@ -1,11 +0,0 @@ -import unittest -import torch -from msprobe.pytorch.hook_module import wrap_vf - -class TestWrapVF(unittest.TestCase): - def setUp(self): - self.hook = lambda x: x - - def test_get_vf_ops(self): - ops = wrap_vf.get_vf_ops() - self.assertIsInstance(ops, list) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/demo_model.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/demo_model.py index f5de4194402..820b1f7476d 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/demo_model.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/demo_model.py @@ -1,7 +1,25 @@ +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch import torch.nn.functional as F from msprobe.pytorch import TrainerMon from msprobe.pytorch.common import seed_all +from msprobe.pytorch.hook_module.api_register import get_api_register + +get_api_register().restore_all_api() device = torch.device('cpu') dtype_float32 = torch.float32 diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py index f2bc82ffafc..4178e2ef8fb 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_csv2tb.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import random @@ -11,6 +26,9 @@ from tensorboard.backend.event_processing.event_accumulator import EventAccumula from msprobe.pytorch import TrainerMon from msprobe.core.common.const import MonitorConst from msprobe.pytorch.monitor.csv2tb import parse_step_fn, csv2tensorboard_by_step +from msprobe.pytorch.hook_module.api_register import get_api_register + +get_api_register().restore_all_api() base_dir = os.path.dirname(os.path.realpath(__file__)) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_module_hook.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_module_hook.py index eefacb73c8e..66d016f9487 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_module_hook.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/monitor/test_module_hook.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os.path import shutil import unittest @@ -8,10 +23,13 @@ import torch from msprobe.core.common.const import MonitorConst, Const from torch import distributed as dist +from msprobe.pytorch import TrainerMon +from msprobe.pytorch.hook_module.api_register import get_api_register from msprobe.pytorch.monitor.module_hook import CommunicationContext, GradContext, ModuleHookContext, \ param_is_not_tensor_parallel_duplicate, param_is_data_parallel_duplicate from msprobe.test.pytorch_ut.monitor.demo_model import monitor_demo -from msprobe.pytorch import TrainerMon + +get_api_register().restore_all_api() base_dir = os.path.dirname(os.path.realpath(__file__)) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py index 6687f311105..a814ce6213e 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_service.py @@ -1,7 +1,23 @@ +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import unittest from unittest.mock import patch, mock_open, MagicMock from msprobe.core.common.utils import Const +from msprobe.core.data_dump.api_registry import ApiRegistry from msprobe.pytorch.debugger.debugger_config import DebuggerConfig from msprobe.pytorch.pt_config import parse_json_config from msprobe.pytorch.service import Service @@ -87,8 +103,8 @@ class TestService(unittest.TestCase): self.service.build_hook = MagicMock() self.config.level = "L1" with patch("msprobe.pytorch.service.logger.info_on_rank_0") as mock_logger, \ - patch("msprobe.pytorch.service.api_register.initialize_hook") as mock_init_hook, \ - patch("msprobe.pytorch.service.api_register.api_modularity") as mock_api_modularity: + patch.object(ApiRegistry, "initialize_hook") as mock_init_hook, \ + patch.object(ApiRegistry, 'register_all_api') as mock_api_modularity: self.service.register_api_hook() self.assertEqual(mock_logger.call_count, 1) mock_init_hook.assert_called_once() -- Gitee From d25c46e560dfa5c07073a5afbfa3866ff5ef3c59 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 10 Mar 2025 09:22:01 +0800 Subject: [PATCH 224/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../visualization/compare/graph_comparator.py | 150 ++++++++---------- .../visualization/compare/multi_mapping.py | 26 +++ .../msprobe/visualization/graph_service.py | 2 +- 3 files changed, 90 insertions(+), 88 deletions(-) diff --git a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py index d54cff197a7..8f2e0e0fee0 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py @@ -48,6 +48,69 @@ class GraphComparator: self._compare_nodes(self.graph_n.root) self._postcompare() + def multi_compare(self, multi_yaml_path): + multi_mapping = MultiMapping.validate_yaml(load_yaml(multi_yaml_path)) + if not multi_mapping: + logger.warning( + f'The multi mapping file {multi_yaml_path} content is incorrect, and the mapping is not effective.') + return + if self.ma.compare_mode == GraphConst.REAL_DATA_COMPARE: + # 获取真实数据指标在真实数据表头的索引 + id_list = [CompareConst.COMPARE_RESULT_HEADER.index(x) for x in CompareConst.ALL_COMPARE_INDEX] + for node_n_ids, node_b_ids in multi_mapping.items(): + if not MultiMapping.validate_ids_in_graph(node_n_ids, self.graph_n): + continue + if not MultiMapping.validate_ids_in_graph(node_b_ids, self.graph_b, GraphConst.JSON_BENCH_KEY): + continue + merged_items_n = MultiMapping.merge_nodes(node_n_ids, self.graph_n) + merged_items_b = MultiMapping.merge_nodes(node_b_ids, self.graph_b) + node_n = merged_items_n.multi_node + node_n_data = self.data_n_dict + node_b = merged_items_b.multi_node + node_b_data = self.data_b_dict + + if node_n.op == NodeOp.multi_collection: + node_n_data = MultiMapping.get_merged_nodes_data(node_n_ids, self.data_n_dict, node_n.id) + if node_b.op == NodeOp.multi_collection: + node_b_data = MultiMapping.get_merged_nodes_data(node_b_ids, self.data_b_dict, node_b.id) + + node = self._compare_node_with_mapping(node_n, {node_n.id: node_b.id}) + if not node: + continue + compare_result_list = compare_node([node_n.id, node_b.id], + [node_n_data, node_b_data], + self.stack_json_data, self.ma.compare_mode) + if not compare_result_list: + continue + # 真实数据模式,compare_result_list里没有精度指标,需要调用真实数据的比对接口得到指标 + if self.ma.compare_mode == GraphConst.REAL_DATA_COMPARE: + for compare_result in compare_result_list: + # 准备真实数据比对接口需要的参数 + full_param_name_n = compare_result[0] + full_param_name_b = compare_result[1] + + data_name_n = MultiMapping.get_dump_data_name(merged_items_n, full_param_name_n) + data_name_b = MultiMapping.get_dump_data_name(merged_items_b, full_param_name_b) + op_name_mapping_dict = {full_param_name_n: [data_name_n, data_name_b]} + + real_compare_result = run_real_data_single([full_param_name_n, full_param_name_b], + op_name_mapping_dict, self.dump_path_param, + self.framework, self.is_cross_frame) + if len(real_compare_result) < len(id_list): + continue + for i, index in enumerate(id_list): + # 根据索引,将真实数据指标插入表头相应位置 + compare_result[index] = real_compare_result[i] + compare_dict = {} + for item in compare_result_list: + if not isinstance(item, (list, tuple)) or not item: + continue + compare_dict[MultiMapping.replace_param_name(item[0], node_n.id)] = item + precision_index, _ = self.ma.parse_result(node_n, [compare_dict]) + node_n.data[GraphConst.JSON_INDEX_KEY] = precision_index + else: + self.add_compare_result_to_node(node_n, compare_result_list) + def add_compare_result_to_node(self, node, compare_result_list): """ 将比对结果添加到节点的输入输出数据中 @@ -188,90 +251,3 @@ class GraphComparator: node_n.matched_node_link = ancestors_b node_b.matched_node_link = ancestors_n self._get_and_add_result(node_n, node_b) - - def do_multi_mapping(self, multi_yaml_path): - multi_mapping = MultiMapping.validate_yaml(load_yaml(multi_yaml_path)) - if not multi_mapping: - logger.warning( - f'The multi mapping file {multi_yaml_path} content is incorrect, and the mapping is not effective.') - return - if self.ma.compare_mode == GraphConst.REAL_DATA_COMPARE: - # 获取真实数据指标在真实数据表头的索引 - id_list = [CompareConst.COMPARE_RESULT_HEADER.index(x) for x in CompareConst.ALL_COMPARE_INDEX] - for node_n_ids, node_b_ids in multi_mapping.items(): - if not MultiMapping.validate_ids_in_graph(node_n_ids, self.graph_n): - continue - if not MultiMapping.validate_ids_in_graph(node_b_ids, self.graph_b, GraphConst.JSON_BENCH_KEY): - continue - merged_items_n = MultiMapping.merge_nodes(node_n_ids, self.graph_n) - merged_items_b = MultiMapping.merge_nodes(node_b_ids, self.graph_b) - node_n = merged_items_n.multi_node - node_n_data = self.data_n_dict - node_b = merged_items_b.multi_node - node_b_data = self.data_b_dict - - if node_n.op == NodeOp.multi_collection: - node_n_data = MultiMapping.get_merged_nodes_data(node_n_ids, self.data_n_dict, node_n.id) - if node_b.op == NodeOp.multi_collection: - node_b_data = MultiMapping.get_merged_nodes_data(node_b_ids, self.data_b_dict, node_b.id) - - self._compare_node_with_mapping(node_n, {node_n.id: node_b.id}) - compare_result_list = compare_node([node_n.id, node_b.id], - [node_n_data, node_b_data], - self.stack_json_data, self.ma.compare_mode) - if not compare_result_list: - continue - # 真实数据模式,compare_result_list里没有精度指标,需要调用真实数据的比对接口得到指标 - if self.ma.compare_mode == GraphConst.REAL_DATA_COMPARE: - for compare_result in compare_result_list: - # 准备真实数据比对接口需要的参数 - full_param_name_n = compare_result[0] - full_param_name_b = compare_result[1] - - data_name_n = self._get_dump_data_name(merged_items_n, full_param_name_n) - data_name_b = self._get_dump_data_name(merged_items_b, full_param_name_b) - op_name_mapping_dict = {full_param_name_n: [data_name_n, data_name_b]} - - real_compare_result = run_real_data_single([full_param_name_n, full_param_name_b], - op_name_mapping_dict, self.dump_path_param, - self.framework, self.is_cross_frame) - if len(real_compare_result) < len(id_list): - continue - for i, index in enumerate(id_list): - # 根据索引,将真实数据指标插入表头相应位置 - compare_result[index] = real_compare_result[i] - compare_dict = {} - for item in compare_result_list: - if not isinstance(item, (list, tuple)) or not item: - continue - compare_dict[MultiMapping.replace_param_name(item[0], node_n.id)] = item - precision_index, _ = self.ma.parse_result(node_n, [compare_dict]) - node_n.data[GraphConst.JSON_INDEX_KEY] = precision_index - else: - self.add_compare_result_to_node(node_n, compare_result_list) - - @staticmethod - def _get_dump_data_name(merged_items, full_param_name): - """ - 根据节点参数名称,从融合节点信息中获取此参数的真实数据名称 - Args: - merged_items: 融合节点信息 - full_param_name: 参数名称,例如Module.layer.Linear.forward.0.input.0 - - Returns: 真实数据名称,例如Module.layer.Linear.forward.0.input.0.pt - - """ - try: - _, state = get_name_and_state(full_param_name) - except Exception: - return "-1" - node = merged_items.multi_node - # 如果是融合节点,那么其真实数据的存盘data_name需要从融合节点的首节点和尾节点中获取 - if node.op == NodeOp.multi_collection: - return merged_items.end_node.output_data.get(full_param_name, {}).get("data_name", "-1") \ - if Const.OUTPUT == state in full_param_name \ - else merged_items.start_node.input_data.get(full_param_name, {}).get("data_name", "-1") - else: - return node.output_data.get(full_param_name, {}).get("data_name", "-1") \ - if Const.OUTPUT == state in full_param_name \ - else node.input_data.get(full_param_name, {}).get("data_name", "-1") diff --git a/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py b/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py index 585853682a0..e005df71a18 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py @@ -137,6 +137,32 @@ class MultiMapping: return MergedItems(multi_node, node0, node1) + @staticmethod + def get_dump_data_name(merged_items, full_param_name): + """ + 根据节点参数名称,从融合节点信息中获取此参数的真实数据名称 + Args: + merged_items: 融合节点信息 + full_param_name: 参数名称,例如Module.layer.Linear.forward.0.input.0 + + Returns: 真实数据名称,例如Module.layer.Linear.forward.0.input.0.pt + + """ + try: + _, state = get_name_and_state(full_param_name) + except Exception: + return "-1" + node = merged_items.multi_node + # 如果是融合节点,那么其真实数据的存盘data_name需要从融合节点的首节点和尾节点中获取 + if node.op == NodeOp.multi_collection: + return merged_items.end_node.output_data.get(full_param_name, {}).get("data_name", "-1") \ + if Const.OUTPUT == state in full_param_name \ + else merged_items.start_node.input_data.get(full_param_name, {}).get("data_name", "-1") + else: + return node.output_data.get(full_param_name, {}).get("data_name", "-1") \ + if Const.OUTPUT == state in full_param_name \ + else node.input_data.get(full_param_name, {}).get("data_name", "-1") + @staticmethod def _split_mapping_str(x: str): if Const.COMMA in x: diff --git a/debug/accuracy_tools/msprobe/visualization/graph_service.py b/debug/accuracy_tools/msprobe/visualization/graph_service.py index 684e88f8ab3..111cdba2d7c 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph_service.py +++ b/debug/accuracy_tools/msprobe/visualization/graph_service.py @@ -76,7 +76,7 @@ def _compare_graph(input_param, args): graph_b.overflow_check() if args.multi_mapping: - graph_comparator.do_multi_mapping(args.multi_mapping) + graph_comparator.multi_compare(args.multi_mapping) return CompareGraphResult(graph_n, graph_b, graph_comparator, micro_steps) -- Gitee From a8ae3582ebff8a990668fe064d576cdda409e5d6 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 10 Mar 2025 09:51:02 +0800 Subject: [PATCH 225/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/test/visualization_ut/test_graph_service.py | 1 + .../msprobe/visualization/compare/graph_comparator.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/test_graph_service.py b/debug/accuracy_tools/msprobe/test/visualization_ut/test_graph_service.py index 7dfd9564ebc..0fe7047fb8a 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/test_graph_service.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/test_graph_service.py @@ -21,6 +21,7 @@ class Args: overflow_check: bool = False fuzzy_match: bool = False complete_stack: bool = False + multi_mapping: str = None class TestGraphService(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py index 8f2e0e0fee0..4fc506cb5c1 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py @@ -49,6 +49,11 @@ class GraphComparator: self._postcompare() def multi_compare(self, multi_yaml_path): + """ + 多对多节点比对,需建立数量n与数量m节点之间的映射关系 + Args: + multi_yaml_path: 映射文件路径 + """ multi_mapping = MultiMapping.validate_yaml(load_yaml(multi_yaml_path)) if not multi_mapping: logger.warning( -- Gitee From bddaefadda9c43692c8e9cc46ec1097e6484f3a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=94=E7=82=B3=E7=BF=94?= <1120200577@qq.com> Date: Thu, 6 Mar 2025 10:53:48 +0800 Subject: [PATCH 226/333] update slow rank pp stage --- .../msprof_analyze/cluster_analyse/README.md | 3 +- .../pytorch_data_preprocessor.py | 8 +- .../recipes/slow_rank_pp_stage/__init__.py | 14 + .../slow_rank_pp_stage/slow_rank_pp_stage.py | 295 ++++++++++++++++++ 4 files changed, 317 insertions(+), 3 deletions(-) create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/__init__.py create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/slow_rank_pp_stage.py diff --git a/profiler/msprof_analyze/cluster_analyse/README.md b/profiler/msprof_analyze/cluster_analyse/README.md index 1c3761a2202..dc5c801e537 100644 --- a/profiler/msprof_analyze/cluster_analyse/README.md +++ b/profiler/msprof_analyze/cluster_analyse/README.md @@ -81,7 +81,8 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( | slow_link | 集群慢链路异常分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/SlowLink目录下输出交付件stats.ipynb。 | 否 | | cluster_time_summary | 集群场景性能数据分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db和analysis.db文件。--export_type为db时,输出交付件cluster_analysis.db,db里面有ClusterTimeSummary,不支持导出notebook。 | 否 | | cluster_time_compare_summary | 集群场景性能数据对比分析,使用前集群数据必须先分析cluster_time_summary,需要配合--bp参数使用。输入性能数据需要基于cluster_analysis_output下的cluster_analysis.db文件。--export_type为db时,输出交付件cluster_analysis.db,db文件中有对比结果的表ClusterTimeCompareSummary,不支持导出notebook。 | 否 | - + | slow_rank_pp_stage | 集群场景性能数据pp stage通信对比分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。输入性能数据中MetaData表如果没有包含训练任务的并行策略,则需要通过--tp --pp --dp手动传入,数据类型为正整数。--export_type为db时,输出交付件cluster_analysis.db,db文件中有分析结果PPAnalysisResult和P2PAnalysisResult,不支持导出notebook。 | 否 | + --parallel_mode参数示例如下: ```bash diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py index d2706059062..09a46bc7183 100644 --- a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py +++ b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/pytorch_data_preprocessor.py @@ -40,8 +40,12 @@ class PytorchDataPreprocessor(DataPreprocessor): if file_name.startswith(self.PROFILER_INFO_HEAD) and file_name.endswith(self.PROFILER_INFO_EXTENSION): file_path = os.path.join(dir_name, file_name) config = FileManager.read_json_file(file_path) - self.data_type.add(config.get(Constant.CONFIG, {}).get(Constant.EXPER_CONFIG, {}). - get(Constant.EXPER_EXPORT_TYPE, Constant.TEXT)) + export_type = (config.get(Constant.CONFIG, {}).get(Constant.EXPER_CONFIG, {}). + get(Constant.EXPER_EXPORT_TYPE, Constant.TEXT)) + if isinstance(export_type, list): + self.data_type.add(Constant.DB if Constant.DB in export_type else Constant.TEXT) + else: + self.data_type.add(export_type) rank_id_map[rank_id].append(dir_name) try: diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/__init__.py b/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/__init__.py new file mode 100644 index 00000000000..a355e5a7f08 --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/slow_rank_pp_stage.py b/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/slow_rank_pp_stage.py new file mode 100644 index 00000000000..fd5bdc05dc0 --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank_pp_stage/slow_rank_pp_stage.py @@ -0,0 +1,295 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +from collections import defaultdict + +import pandas as pd + +from msprof_analyze.cluster_analyse.recipes.base_recipe_analysis import BaseRecipeAnalysis +from msprof_analyze.prof_common.constant import Constant +from msprof_analyze.prof_common.logger import get_logger +from msprof_analyze.prof_exports.cluster_time_summary_export import CommunicationTimeExport +from msprof_analyze.prof_common.database_service import DatabaseService + +logger = get_logger() + + +class SlowRankPPStageAnalysis(BaseRecipeAnalysis): + TP_SIZE = "tensor_model_parallel_size" + PP_SIZE = "pipeline_model_parallel_size" + DP_SIZE = "data_parallel_size" + + def __init__(self, params): + super().__init__(params) + logger.info("SlowRank PPstage analysis init.") + + self.p2p_analysis_result = None + self.pp_analysis_result = None + self.p2p_vote_result = None + self.pp_vote_result = None + + self.distributed_args = self.load_distributed_args() + + @property + def base_dir(self): + return os.path.basename(os.path.dirname(__file__)) + + @classmethod + def add_parser_argument(cls, parser): + parser.add_argument("--tp", type=int, help=cls.TP_SIZE, default=None) + parser.add_argument("--pp", type=int, help=cls.PP_SIZE, default=None) + parser.add_argument("--dp", type=int, help=cls.DP_SIZE, default=None) + + def reducer_func(self, mapper_res): + mapper_res = list(filter(lambda df: df is not None, mapper_res)) + if not mapper_res: + logger.error("Mapper data is None.") + return None + concated_df = pd.concat(mapper_res) + return concated_df + + def run(self, context): + if self.distributed_args is None: + return + mapper_res = self.mapper_func(context) + comm_ops_df = self.reducer_func(mapper_res) + if comm_ops_df is None: + return + + p2p_analysis_result_list = [] + p2p_vote_result_list = [] + pp_analysis_result_list = [] + pp_vote_result_list = [] + + pp_stage_rank_map = self.map_rank_pp_stage() + + for _, df_one_step in comm_ops_df.groupby("step"): + p2p_analysis_result, p2p_vote_result, pp_analysis_result, pp_vote_result = \ + SlowRankPPStageStepAnalysis(df_one_step).analysis(pp_stage_rank_map) + p2p_analysis_result_list.append(p2p_analysis_result) + p2p_vote_result_list.append(p2p_vote_result) + pp_analysis_result_list.append(pp_analysis_result) + pp_vote_result_list.append(pp_vote_result) + + for step_id, (p2p_analysis_result, p2p_vote_result, pp_analysis_result, pp_vote_result) in \ + enumerate( + zip( + p2p_analysis_result_list, + p2p_vote_result_list, + pp_analysis_result_list, + pp_vote_result_list + )): + p2p_analysis_result["step"] = step_id + p2p_vote_result["step"] = step_id + pp_analysis_result["step"] = step_id + pp_vote_result["step"] = step_id + + self.p2p_analysis_result = pd.concat(p2p_analysis_result_list) + self.p2p_vote_result = pd.concat(p2p_vote_result_list) + self.pp_analysis_result = pd.concat(pp_analysis_result_list) + self.pp_vote_result = pd.concat(pp_vote_result_list) + + if self._export_type == Constant.DB: + self.save_db() + else: + logger.error("SlowRank PPstage is not supported for notebook export type.") + + def save_db(self): + self.dump_data(self.p2p_vote_result, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, "P2PAnalysisResult") + self.dump_data(self.pp_vote_result, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, "PPAnalysisResult") + + def map_rank_pp_stage(self): + tp_size = self.distributed_args.get(self.TP_SIZE, 1) + pp_size = self.distributed_args.get(self.PP_SIZE, 1) + dp_size = self.distributed_args.get(self.DP_SIZE, 1) + + rank_pp_stage_map = {} + rank = 0 + for i in range(pp_size): + for _ in range(tp_size * dp_size): + rank_pp_stage_map[rank] = i + rank += 1 + return rank_pp_stage_map + + def load_distributed_args(self): + tp_size = self._extra_args.get("tp", None) + pp_size = self._extra_args.get("pp", None) + dp_size = self._extra_args.get("dp", None) + + if tp_size and pp_size and dp_size: + if tp_size <= 0 or pp_size <= 0 or dp_size <= 0: + logger.error("Invalid distributed_args, tp pp dp < 0.") + return None + return { + self.TP_SIZE: tp_size, + self.DP_SIZE: dp_size, + self.PP_SIZE: pp_size, + } + else: + rank_id = list(self._data_map.keys())[0] + profiler_db_path = self._data_map[rank_id] + db_path = os.path.join(profiler_db_path, Constant.SINGLE_OUTPUT, f"ascend_pytorch_profiler_{rank_id}.db") + if os.path.exists(db_path): + try: + service = DatabaseService(db_path) + service.add_table_for_query("META_DATA", ["name", "value"]) + df = service.query_data().get("META_DATA", None) + distributed_args = df.loc[df["name"] == "distributed_args", "value"] + if distributed_args.empty: + logger.error("Distributed args not in profiling files, please input manually.") + else: + distributed_args = json.loads(distributed_args.values[0]) + except Exception as err: + logger.error(err) + logger.error("Distributed args not in profiling files, please input manually.") + return None + + tp_size = distributed_args.get(self.TP_SIZE, 1) + pp_size = distributed_args.get(self.PP_SIZE, 1) + dp_size = distributed_args.get(self.DP_SIZE, 1) + if not isinstance(tp_size, int) or not isinstance(pp_size, int) or not isinstance(dp_size, int): + logger.error("Invalid distributed_args in profiling files, please input manually.") + return None + if tp_size <= 0 or pp_size <= 0 or dp_size <= 0: + logger.error("Invalid distributed_args in profiling files, please input manually.") + return None + return { + self.TP_SIZE: tp_size, + self.PP_SIZE: pp_size, + self.DP_SIZE: dp_size, + } + + logger.error(f"Db_file: {db_path} not exist.") + return None + + def _mapper_func(self, data_map, analysis_class): + profiler_db_path = data_map.get(Constant.PROFILER_DB_PATH) + df = CommunicationTimeExport(profiler_db_path, analysis_class).read_export_db() + return df + + +class SlowRankPPStageStepAnalysis: + def __init__(self, comm_ops): + self.comm_ops = comm_ops + self.exclude_ranks = [] + + def grouping_pp_stage_ops(self, pp_stage_rank_map): + p2p_op_group = defaultdict(lambda: defaultdict(list)) + pp_op_group = defaultdict(lambda: defaultdict(list)) + + def divid_opname(op_name): + # op_name的格式:输入 OPTYPE__GORUPHASH_IDX_1 输出 OPTYPE_IDX + splited_name = op_name.split("__") + if len(splited_name) != 2: + return None + splited_num = splited_name[1].split("_") + if len(splited_num) != 3: + return None + return "_".join([splited_name[0], splited_num[1]]) + + ops_num = len(self.comm_ops) + op_name_arr = self.comm_ops["opName"].values + rank_id_arr = self.comm_ops["rank"].values + for idx in range(ops_num): + rank = rank_id_arr[idx] + op_name = op_name_arr[idx] + op_name_short = divid_opname(op_name) + if op_name_short is None: + continue + pp_stage_idx = pp_stage_rank_map[rank] + if rank in self.exclude_ranks: + continue + if "send" in op_name_short or "receive" in op_name_short: + p2p_op_group[pp_stage_idx][op_name_short].append(idx) + else: + pp_op_group[pp_stage_idx][op_name_short].append(idx) + + return p2p_op_group, pp_op_group + + def analysis_pp_stage(self, vote_group): + min_time_dict = defaultdict(lambda: defaultdict(lambda: 0)) + max_time_dict = defaultdict(lambda: defaultdict(lambda: 0)) + mean_time_dict = defaultdict(lambda: defaultdict(lambda: 0)) + count_dict = defaultdict(lambda: defaultdict(lambda: 0)) + rank_vote = defaultdict(lambda: 0) + perpetrator_dict = defaultdict(lambda: defaultdict(lambda: 0)) + minimum_rank_op_name = defaultdict(list) + + communication_time_arr = self.comm_ops["communication_time"].values + rank_id_arr = self.comm_ops["rank"].values + for pp_idx, ops_same_group in vote_group.items(): + for op_name, ops in ops_same_group.items(): + communication_time_list = [communication_time_arr[op_idx] for op_idx in ops] + min_time = min(communication_time_list) + min_op_idx = ops[communication_time_list.index(min_time)] + min_op_rank = rank_id_arr[min_op_idx] + rank_vote[min_op_rank] += 1 + perpetrator_dict[pp_idx][op_name] = min_op_rank + minimum_rank_op_name[min_op_rank].append(op_name) + + max_time = max(communication_time_list) + mean_time = sum(communication_time_list) // len(communication_time_list) + min_time_dict[pp_idx][op_name] = min_time + max_time_dict[pp_idx][op_name] = max_time + mean_time_dict[pp_idx][op_name] = mean_time + count_dict[pp_idx][op_name] = len(ops) + + analysis_result = pd.DataFrame( + columns=[ + "ppIdx", + "opName", + "minTime", + "maxTime", + "meanTime", + "count", + "perpetratorRank" + ] + ) + + for pp_idx in min_time_dict.keys(): + for op_name in min_time_dict[pp_idx].keys(): + analysis_result.loc[len(analysis_result)] = [ + pp_idx, op_name, + min_time_dict[pp_idx][op_name], + max_time_dict[pp_idx][op_name], + mean_time_dict[pp_idx][op_name], + count_dict[pp_idx][op_name], + perpetrator_dict[pp_idx][op_name] + ] + + vote_result = pd.DataFrame(columns=["rankId", "minimumTimes"]) + for rank, minimum_times in rank_vote.items(): + vote_result.loc[len(vote_result)] = [rank, minimum_times] + vote_result.set_index(["rankId"], inplace=True) + + return analysis_result, vote_result + + def analysis(self, pp_stage_rank_map): + self.select_exclude_ranks() + p2p_op_group, pp_op_group = self.grouping_pp_stage_ops(pp_stage_rank_map) + p2p_analysis_result, p2p_vote_result = self.analysis_pp_stage(p2p_op_group) + pp_analysis_result, pp_vote_result = self.analysis_pp_stage(pp_op_group) + return p2p_analysis_result, p2p_vote_result, pp_analysis_result, pp_vote_result + + def select_exclude_ranks(self): + grouped_df = self.comm_ops.groupby("rank") + for rank in grouped_df.groups.keys(): + ops_groupby_rank = grouped_df.get_group(rank) + ops_num = ops_groupby_rank.groupby("opName").size().values + if len(set(ops_num)) > 1: + self.exclude_ranks.append(rank) + \ No newline at end of file -- Gitee From d7330aa7a4704c4d8f6fd1f2a143a8fa43d8a30c Mon Sep 17 00:00:00 2001 From: gitee Date: Mon, 10 Mar 2025 14:24:41 +0800 Subject: [PATCH 227/333] add api bench function --- .../msprobe/core/common/const.py | 33 +++++- .../run_ut/distributed_bench_function.py | 104 ++++++++++++++++++ .../run_ut/distributed_compare_function.py | 52 ++++++++- .../run_ut/distributed_function_registry.py | 20 +++- .../run_ut/run_distributed_check.py | 17 ++- 5 files changed, 215 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 1e653ff31c2..245014859a0 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -635,6 +635,8 @@ class DistributedCheckConst: GROUP_INDEX = "group_index" SRC = "src" SRC_INDEX = "src_index" + OP = "op" + SCATTER_LIST = "scatter_list" TORCH_PROCESS_GROUP = "torch.ProcessGroup" ALL_ARGS = "all_args" ALL_KWARGS = "all_kwargs" @@ -646,12 +648,41 @@ class DistributedCheckConst: HCCL = "hccl" TCP = "tcp" BROADCAST = "broadcast" + REDUCE = "reduce" + ALL_REDUCE = "all_reduce" + SCATTER = "scatter" + GATHER = "gather" + ALL_GATHER = "all_gather" + ALL_TO_ALL = "all_to_all" + ALL_TO_ALL_SINGLE = "all_to_all_single" BROADCAST_SRC_INDEX = 1 FIRST_TENSOR_INDEX = 0 + + RedOpType_SUM = "RedOpType.SUM" + RedOpType_PRODUCT = "RedOpType.PRODUCT" + RedOpType_MIN = "RedOpType.MIN" + RedOpType_MAX = "RedOpType.MAX" + RedOpType_BAND = "RedOpType.BAND" + RedOpType_BOR = "RedOpType.BOR" + RedOpType_BXOR = "RedOpType.BXOR" + API_ARGS_INDEX = { "broadcast": { "group": 2, "src": 1 + }, + "reduce": { + "op": 2, + "dst": 1 + }, + "all_reduce": { + "reduce_op": 2 + }, + "scatter": { + "src": 2, + "scatter_list": 1 + }, + "gather": { + "dst": 2 } } - \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py index 083f7c76c6c..5a0a274c730 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import torch + from msprobe.core.common.const import DistributedCheckConst from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import get_distributed_args @@ -39,3 +41,105 @@ def mock_broadcast(api_name, input_args, input_kwargs): return input_args[real_src][0] +def mock_reduce(api_name, input_args, input_kwargs): + check_object_type(input_args, list) + check_object_type(input_kwargs, list) + if len(input_args) < 1 or len(input_kwargs) < 1: + raise ValueError("input_args and input_kwargs should have at least 1 element") + + reduce_op = get_distributed_args(api_name, input_args[0], input_kwargs[0], DistributedCheckConst.OP) + tensors = [] + for arg in input_args: + if len(arg) > 0: + tensors.append(arg[0]) + reduce_tensor = None + if not tensors: + return reduce_tensor + if reduce_op == DistributedCheckConst.RedOpType_SUM: + reduce_tensor = torch.stack(tensors).sum(dim=0) + elif reduce_op == DistributedCheckConst.RedOpType_PRODUCT: + reduce_tensor = torch.stack(tensors).prod(dim=0) + elif reduce_op == DistributedCheckConst.RedOpType_MIN: + reduce_tensor = torch.stack(tensors).min(dim=0).values + elif reduce_op == DistributedCheckConst.RedOpType_MAX: + reduce_tensor = torch.stack(tensors).max(dim=0).values + elif reduce_op == DistributedCheckConst.RedOpType_BAND: + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor &= t + elif reduce_op == DistributedCheckConst.RedOpType_BOR: + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor |= t + elif reduce_op == DistributedCheckConst.RedOpType_BXOR: + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor ^= t + + return reduce_tensor + + +def mock_scatter(api_name, input_args, input_kwargs): + check_object_type(input_args, list) + check_object_type(input_kwargs, list) + if len(input_args) < 1 or len(input_kwargs) < 1: + raise ValueError("input_args and input_kwargs should have at least 1 element") + + src = get_distributed_args(api_name, input_args[0], input_kwargs[0], DistributedCheckConst.SRC) + group = get_distributed_args(api_name, input_args[0], input_kwargs[0], DistributedCheckConst.GROUP) + group_ranks = group.get(DistributedCheckConst.GROUP_RANKS, []) + if not group_ranks: + raise ValueError("group_ranks should not be empty") + real_src = src - min(group_ranks) + if len(input_args) <= real_src: + raise ValueError("input_args should have at least {} element".format(real_src + 1)) + scatter_list = get_distributed_args(api_name, input_args[real_src], input_kwargs[real_src], + DistributedCheckConst.SCATTER_LIST) + return scatter_list + + +def mock_all_gather(api_name, input_args, input_kwargs): + check_object_type(input_args, list) + check_object_type(input_kwargs, list) + gather_tensor = [] + for data in input_args: + if len(data) > 1: + gather_tensor.append(data[1]) + return gather_tensor + + +def mock_all_to_all(api_name, input_args, input_kwargs): + check_object_type(input_args, list) + check_object_type(input_kwargs, list) + input_tensor_list = [] + for data in input_args: + if len(data) >= 2: + input_tensor_list.append(data[1]) + world_size = len(input_tensor_list) + output_tensor_list = [] + for rank in range(world_size): + output_chunk = [] + for data in input_tensor_list: + if len(data) <= rank: + raise ValueError("input_tensor_list should have at least {} element".format(rank + 1)) + output_chunk.append(data[rank]) + output_tensor_list.append(output_chunk) + return output_tensor_list + + +def mock_all_to_all_single(api_name, input_args, input_kwargs): + check_object_type(input_args, list) + check_object_type(input_kwargs, list) + input_tensor_list = [] + for data in input_args: + if len(data) >= 2: + input_tensor_list.append(data[1]) + if not input_tensor_list: + return [] + input_tensor = torch.stack(input_tensor_list) + output_tensor = input_tensor.t() + output_tensor_list = [tensor.clone() for tensor in output_tensor] + return output_tensor_list diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py index 8f6d2f6fd47..70f279e66c3 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py @@ -20,11 +20,53 @@ import torch from msprobe.core.common.const import CompareConst -def compare_broadcast(device_out, bench_out): +def compare_broadcast(device_out, bench_out, rank): if len(device_out) < 1: raise ValueError("device_out should not be empty") compare_result = torch.equal(device_out[0].cpu(), bench_out) - if not compare_result: - return CompareConst.ERROR - return CompareConst.PASS - \ No newline at end of file + + return CompareConst.PASS if compare_result else CompareConst.ERROR + + +def compare_all_reduce(device_out, bench_out, rank): + if len(device_out) < 1: + raise ValueError("device_out should not be empty") + compare_result = torch.equal(device_out[0].cpu(), bench_out) + + return CompareConst.PASS if compare_result else CompareConst.ERROR + + +def compare_scatter(device_out, bench_out, rank): + if len(device_out) < 1: + raise ValueError("device_out should not be empty") + if len(bench_out) <= rank: + raise ValueError("bench_out should have at least rank+1 outputs") + compare_result = torch.equal(device_out[0].cpu(), bench_out[rank]) + + return CompareConst.PASS if compare_result else CompareConst.ERROR + + +def compare_all_gather(device_out, bench_out, rank): + if len(device_out) < 1: + raise ValueError("device_out should not be empty") + device_out_cpu = [tensor.cpu() for tensor in device_out[0]] + compare_result = all(torch.equal(a, b) for a, b in zip(device_out_cpu, bench_out)) + + return CompareConst.PASS if compare_result else CompareConst.ERROR + + +def compare_all_to_all(device_out, bench_out, rank): + if len(device_out) < 1: + raise ValueError("device_out should not be empty") + device_out_cpu = [tensor.cpu() for tensor in device_out[0]] + compare_result = all(torch.equal(a, b) for a, b in zip(device_out_cpu, bench_out[rank])) + + return CompareConst.PASS if compare_result else CompareConst.ERROR + + +def compare_all_to_all_single(device_out, bench_out, rank): + if len(device_out) < 1: + raise ValueError("device_out should not be empty") + compare_result = torch.equal(device_out[0].cpu(), bench_out[rank]) + + return CompareConst.PASS if compare_result else CompareConst.ERROR diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py index 9502ab3530d..86d552d81ad 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py @@ -18,9 +18,11 @@ from typing import Callable from msprobe.pytorch.api_accuracy_checker.run_ut.distributed_bench_function import \ - mock_broadcast + mock_broadcast, mock_reduce, mock_scatter, mock_all_gather, mock_all_to_all, \ + mock_all_to_all_single from msprobe.pytorch.api_accuracy_checker.run_ut.distributed_compare_function import \ - compare_broadcast + compare_broadcast, compare_all_reduce, compare_scatter, \ + compare_all_gather, compare_all_to_all, compare_all_to_all_single from msprobe.core.common.const import DistributedCheckConst @@ -28,6 +30,9 @@ class DistributedFunctionRegistry: def __init__(self): self.compare_functions = {} self.bench_functions = {} + self.support_api_list = [DistributedCheckConst.BROADCAST, DistributedCheckConst.ALL_REDUCE, + DistributedCheckConst.SCATTER, DistributedCheckConst.ALL_GATHER, + DistributedCheckConst.ALL_TO_ALL, DistributedCheckConst.ALL_TO_ALL_SINGLE] def register_compare_function(self, api_name: str, function: Callable): self.compare_functions[api_name] = function @@ -49,4 +54,13 @@ class DistributedFunctionRegistry: distributed_func_registry = DistributedFunctionRegistry() distributed_func_registry.register_bench_function(DistributedCheckConst.BROADCAST, mock_broadcast) distributed_func_registry.register_compare_function(DistributedCheckConst.BROADCAST, compare_broadcast) - +distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_REDUCE, mock_reduce) +distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_REDUCE, compare_all_reduce) +distributed_func_registry.register_bench_function(DistributedCheckConst.SCATTER, mock_scatter) +distributed_func_registry.register_compare_function(DistributedCheckConst.SCATTER, compare_scatter) +distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_GATHER, mock_all_gather) +distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_GATHER, compare_all_gather) +distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_TO_ALL, mock_all_to_all) +distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_TO_ALL, compare_all_to_all) +distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_TO_ALL_SINGLE, mock_all_to_all_single) +distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_TO_ALL_SINGLE, compare_all_to_all_single) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 2c6793d801b..80975130899 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -48,6 +48,9 @@ RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" RESULT_CSV_HEADER = [['API_NAME', 'RANK', 'COMPARE_RESULT', 'MESSAGE']] DistributedCheckParams = namedtuple("DistributedCheckParams", ["api_full_name", "all_args", "all_kwargs", "group_ranks", "result_file_path", "checker_config"]) +special_rank_api_list = [DistributedCheckConst.SCATTER, + DistributedCheckConst.ALL_TO_ALL, + DistributedCheckConst.ALL_TO_ALL_SINGLE] def cleanup(): @@ -127,6 +130,15 @@ def run_distributed_check(forward_contents, real_data_paths, result_file_path, c for api_full_name, api_info_dict in forward_content.items(): _, api_name = extract_basic_api_segments(api_full_name) + + if api_name not in distributed_func_registry.support_api_list: + message = "The api {} doesn't support distributed check.".format(api_full_name) + logger.warning(message) + result_rows = [] + df_row = list([api_full_name, rank, CompareConst.SKIP, message]) + result_rows.append(df_row) + write_csv(result_rows, result_file_path) + continue if api_info_dict.get('used'): continue @@ -200,9 +212,10 @@ def run_hccl(rank, distributed_config): device_args, _ = generate_device_params(rank_args, rank_kwargs, False, api_name) logger.info("Start to check distributed api {} in rank {}.".format(api_full_name, local_rank)) distributed_func.get(api_name)(*device_args) - + if api_name in special_rank_api_list: + local_rank = rank compare_function = distributed_func_registry.get_compare_function(api_name) - status = compare_function(device_args, benchmark_result) + status = compare_function(device_args, benchmark_result, local_rank) message = '' result_rows = [] df_row = list([api_full_name, local_rank, status, message]) -- Gitee From 5c59468ee80b49ea8190118d37e02b0af1d976e9 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 16:54:47 +0800 Subject: [PATCH 228/333] =?UTF-8?q?=E6=80=A7=E8=83=BD=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/mindspore_processor.py | 38 +++++++---- .../msprobe/core/data_dump/json_writer.py | 66 ++++++++++++++++++- 2 files changed, 91 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 8c4542a1917..3f2101db61c 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -44,6 +44,8 @@ class MindsporeDataProcessor(BaseDataProcessor): "dtype": self.analyze_dtype_in_kwargs } self._async_dump_cache = {} + self.stat_stack_list = [] + self.config.use_delayed_cpu_transfer = True @staticmethod def get_md5_for_tensor(x): @@ -60,11 +62,11 @@ class MindsporeDataProcessor(BaseDataProcessor): def get_stat_info_sync(data): tensor_stat = TensorStatInfo() if data.dtype == ms.bool_: - data_np = data.asnumpy() - tensor_stat.max = np.max(data_np).item() - tensor_stat.min = np.min(data_np).item() + # 如果是 bool 类型,可以直接用算子在 NPU 上运算 + tensor_stat.max = ops.ReduceMax()(data) + tensor_stat.min = ops.ReduceMin()(data) elif not data.shape: - tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() + tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data elif data.dtype == ms.complex64 or data.dtype == ms.complex128: data_abs = np.abs(data.asnumpy()) tensor_stat.max = np.max(data_abs).item() @@ -72,6 +74,7 @@ class MindsporeDataProcessor(BaseDataProcessor): tensor_stat.mean = np.mean(data_abs).item() tensor_stat.norm = np.linalg.norm(data_abs).item() else: + # 对于其它数据,确保为浮点类型(避免立即搬运到 CPU) if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) api_register.norm_inner_op_set_ori_func() @@ -82,10 +85,10 @@ class MindsporeDataProcessor(BaseDataProcessor): get_norm_value = api_register.mint_ops_ori_attr.get("norm", mint.norm) else: get_norm_value = api_register.functional_ori_attr.get("norm", ops.norm) - tensor_stat.max = get_max_value(data).item() - tensor_stat.min = get_min_value(data).item() - tensor_stat.mean = get_mean_value(data).item() - tensor_stat.norm = get_norm_value(data).item() + tensor_stat.max = get_max_value(data) + tensor_stat.min = get_min_value(data) + tensor_stat.mean = get_mean_value(data) + tensor_stat.norm = get_norm_value(data) api_register.norm_inner_op_set_hook_func() return tensor_stat @@ -159,13 +162,24 @@ class MindsporeDataProcessor(BaseDataProcessor): 'shape': tensor.shape } - if tensor_stat.stack_tensor_stat is None: + # 如果配置了延迟 CPU 搬运,则将统计值存入全局 buffer,并返回占位索引 + if self.config.use_delayed_cpu_transfer: + stat_values = [ + tensor_stat.max, + tensor_stat.min, + tensor_stat.mean, + tensor_stat.norm + ] + + placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) + + tensor_json.update({"tensor_stat_index": placeholder_index}) + else: + # 原有逻辑,立即转换成 CPU 数值(调用 transfer_type 会执行 .item()) tensor_json.update({'Max': self.transfer_type(tensor_stat.max)}) tensor_json.update({'Min': self.transfer_type(tensor_stat.min)}) tensor_json.update({'Mean': self.transfer_type(tensor_stat.mean)}) tensor_json.update({'Norm': self.transfer_type(tensor_stat.norm)}) - else: - tensor_json.update({'tensor_stat': tensor_stat.stack_tensor_stat}) if self.config.summary_mode == Const.MD5 and not self.config.async_dump: tensor_md5 = self.get_md5_for_tensor(tensor) tensor_json.update({Const.MD5: tensor_md5}) @@ -191,7 +205,7 @@ class TensorDataProcessor(MindsporeDataProcessor): else: save_tensor_as_npy(tensor, file_path) return single_arg - + def _analyze_numpy(self, ndarray, suffix): dump_data_name, file_path = self.get_save_file_path(suffix) save_npy(ndarray, file_path) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index b1e26d16f97..2b1b5f1bc6b 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -22,6 +22,8 @@ from msprobe.core.common.const import Const, FileCheckConst from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException +from msprobe.core.data_dump.data_processor.mindspore_processor import MindsporeDataProcessor +import mindspore as ms class DataWriter: @@ -38,6 +40,12 @@ class DataWriter: self.cache_stack = {} self.cache_construct = {} self.cache_debug = {} + self.MindsporeDataProcessor = MindsporeDataProcessor + + # 预分配统计值 buffer,注意这里以 float32 为例,且数据仍在 NPU 上 + self.stat_buffer = ms.Tensor(np.zeros((4, self.flush_size), dtype=np.float32)) + self.stat_buffer_index = 0 + self.stat_stack_list = [] @staticmethod def write_data_to_csv(result: list, result_header: tuple, file_path: str): @@ -49,7 +57,7 @@ class DataWriter: spawn_writer = csv.writer(csv_file) if not is_exists: spawn_writer.writerow(result_header) - spawn_writer.writerows([result,]) + spawn_writer.writerows([result, ]) is_new_file = not is_exists if is_new_file: change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) @@ -126,7 +134,37 @@ class DataWriter: def write_debug_info_json(self, file_path): save_json(file_path, self.cache_debug, indent=1) + def append_stat_to_buffer(self, stat_vector): + """ + 直接使用 Python list 存储 stat_vector, + 将 stat_vector 存入 self.stat_stack_list 的方式 + """ + # stat_vector 是一个已经经过 ensure_nonzero_rank 包裹的 tensor 列表,形如 [max, min, mean, norm] + self.stat_stack_list.append(stat_vector) + return len(self.stat_stack_list) - 1 + + def flush_stat_stack(self): + """ + 在 flush 阶段,将所有存储的统计值从设备搬到 CPU, + 这里返回一个列表,每个元素是 [Max, Min, Mean, Norm] 的数值列表 + """ + if not self.stat_stack_list: + return [] + result = [ + [ + x.asnumpy().tolist() if hasattr(x, "asnumpy") else x + for x in stat_values + ] + for stat_values in self.stat_stack_list + ] + self.stat_stack_list = [] + return result + def write_json(self): + # 在写 JSON 前,统一获取统计值 + stat_result = self.flush_stat_stack() + # 遍历 cache_data,将占位符替换为最终统计值 + self._replace_stat_placeholders(self.cache_data, stat_result) if self.cache_data: self.write_data_json(self.dump_file_path) if self.cache_stack: @@ -136,6 +174,32 @@ class DataWriter: if self.cache_debug: self.write_debug_info_json(self.debug_file_path) + def _replace_stat_placeholders(self, data, stat_result): + """ + 递归搜索 data 中所有包含 'tensor_stat_index' 键的字典, + 用 stat_result 中对应下标的统计值替换,并删除占位键。 + 同时打印调试信息,帮助检查哪些占位索引越界了。 + """ + if isinstance(data, dict): + for key, value in list(data.items()): + if key == "tensor_stat_index" and isinstance(value, int): + idx = value + # 打印当前占位索引和统计列表长度 + print(f"DEBUG: Found tensor_stat_index = {idx}, stat_result length = {len(stat_result)}") + if idx < len(stat_result): + stat_values = stat_result[idx] + print(f"DEBUG: Replacing index {idx} with values: {stat_values}") + data["Max"], data["Min"], data["Mean"], data["Norm"] = stat_values + else: + print(f"ERROR: Index out of range! idx = {idx}, but stat_result length = {len(stat_result)}") + data["Max"], data["Min"], data["Mean"], data["Norm"] = None, None, None, None + del data["tensor_stat_index"] + else: + self._replace_stat_placeholders(value, stat_result) + elif isinstance(data, list): + for item in data: + self._replace_stat_placeholders(item, stat_result) + def fill_stack_tensor_data(self): self.process_stat_data_recursive(self.cache_data) -- Gitee From 858b7a842d74ff16e70370c9d53b9438384040a1 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:10:02 +0800 Subject: [PATCH 229/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 2b1b5f1bc6b..ffc366699e1 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -41,10 +41,6 @@ class DataWriter: self.cache_construct = {} self.cache_debug = {} self.MindsporeDataProcessor = MindsporeDataProcessor - - # 预分配统计值 buffer,注意这里以 float32 为例,且数据仍在 NPU 上 - self.stat_buffer = ms.Tensor(np.zeros((4, self.flush_size), dtype=np.float32)) - self.stat_buffer_index = 0 self.stat_stack_list = [] @staticmethod @@ -57,7 +53,7 @@ class DataWriter: spawn_writer = csv.writer(csv_file) if not is_exists: spawn_writer.writerow(result_header) - spawn_writer.writerows([result, ]) + spawn_writer.writerows([result,]) is_new_file = not is_exists if is_new_file: change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) -- Gitee From cb2ea7de9bf323e0bb163fd55e0e9a666235c73b Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:15:07 +0800 Subject: [PATCH 230/333] Update mindspore_processor.py --- .../core/data_dump/data_processor/mindspore_processor.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 3f2101db61c..2df4d930346 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -163,7 +163,7 @@ class MindsporeDataProcessor(BaseDataProcessor): } # 如果配置了延迟 CPU 搬运,则将统计值存入全局 buffer,并返回占位索引 - if self.config.use_delayed_cpu_transfer: + if tensor_atat.stack_tensor_stat is None: stat_values = [ tensor_stat.max, tensor_stat.min, @@ -175,11 +175,7 @@ class MindsporeDataProcessor(BaseDataProcessor): tensor_json.update({"tensor_stat_index": placeholder_index}) else: - # 原有逻辑,立即转换成 CPU 数值(调用 transfer_type 会执行 .item()) - tensor_json.update({'Max': self.transfer_type(tensor_stat.max)}) - tensor_json.update({'Min': self.transfer_type(tensor_stat.min)}) - tensor_json.update({'Mean': self.transfer_type(tensor_stat.mean)}) - tensor_json.update({'Norm': self.transfer_type(tensor_stat.norm)}) + tensor_json.update({'tensor_stat': tensor_stat.stack_tensor_stat}) if self.config.summary_mode == Const.MD5 and not self.config.async_dump: tensor_md5 = self.get_md5_for_tensor(tensor) tensor_json.update({Const.MD5: tensor_md5}) -- Gitee From ec0c368f49e32996a86030a37a7742d6879f6261 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:15:52 +0800 Subject: [PATCH 231/333] Update mindspore_processor.py --- .../msprobe/core/data_dump/data_processor/mindspore_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 2df4d930346..f174b213d13 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -45,7 +45,6 @@ class MindsporeDataProcessor(BaseDataProcessor): } self._async_dump_cache = {} self.stat_stack_list = [] - self.config.use_delayed_cpu_transfer = True @staticmethod def get_md5_for_tensor(x): -- Gitee From 1af8faceee458321a793a1d3d779f64ec81796ea Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:16:13 +0800 Subject: [PATCH 232/333] Update mindspore_processor.py --- .../msprobe/core/data_dump/data_processor/mindspore_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index f174b213d13..2df4d930346 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -45,6 +45,7 @@ class MindsporeDataProcessor(BaseDataProcessor): } self._async_dump_cache = {} self.stat_stack_list = [] + self.config.use_delayed_cpu_transfer = True @staticmethod def get_md5_for_tensor(x): -- Gitee From f0a7c4466782cabffb27ed7b2a763f92491daf6e Mon Sep 17 00:00:00 2001 From: qianggee Date: Mon, 10 Mar 2025 09:18:47 +0000 Subject: [PATCH 233/333] fix bug in deepspeed bf16 opt mv monitor --- .../pytorch/monitor/optimizer_collect.py | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py index 602514836d2..b7eb4da9c66 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py @@ -206,8 +206,59 @@ class MegatronChainedMixPrecisionOptimizerMon(MixPrecisionOptimizerMon): class DeepSpeedZeroOptimizerStage0Mon(OptimizerMon): - def fetch_mv(self, monitor, torch_opt, params2name): - return self._fetch_mv_in_adam(monitor, torch_opt, params2name) + def get_group_index(self, params2name, torch_opt): + fp16_groups = torch_opt.bf16_groups + param2group = defaultdict() + for group_idx, fp16_group in enumerate(fp16_groups): + for param in fp16_group: + param2group[param] = group_idx + return param2group + + def fetch_mv(self, monitor, torch_opt, params2name, name2indices=None): + param2group = self.get_group_index(params2name, torch_opt) + exp_avg_dict = defaultdict(float) + exp_avg_sq_dict = defaultdict(float) + update_dict = defaultdict() + ratio_dict = defaultdict() + + param_slice_mappings = torch_opt.state_dict()['param_slice_mappings'] + for param, name in params2name.items(): + group_idx = param2group[param] + state = torch_opt.state[torch_opt.fp32_groups_flat_partition[group_idx]] + if state.get('exp_avg', None) is None: + logger.warning(f"optimizer state is None. Something is wrong if this is not the first step") + break + param_slice_mapping = param_slice_mappings[group_idx] + hp_address = param_slice_mapping.get(torch_opt.param_names[param]) + if hp_address is None: + continue + start = hp_address.start + numel = hp_address.numel + + if monitor.mv_distribution: + exp_avg_dict[name] = state['exp_avg'].narrow(0, start, numel) + exp_avg_sq_dict[name] = state['exp_avg_sq'].narrow(0, start, numel) + if monitor.mg_direction: + exp_avg_dict[name] = state['exp'].narrow(0, start, numel) + if monitor.ur_distribution: + if len(torch_opt.param_groups) > 1: + logger.info(f"the length of torch_opt.param_groups is {len(torch_opt.param_groups)}.") + if 'step' in state: + step = state['step'] # Optimizer from pytorch or FusedAdam from apex(used by megatron) + elif 'step' in torch_opt.param_groups[0]: + step = torch_opt.param_groups[0]['step'] # AdamW from mindspeed + else: + logger.warning(f"step of {name} is None, maybe something wrong happened.") + continue + exp_avg = state['exp_avg'].narrow(0, start, numel) + exp_avg_sq = state['exp_avg_sq'].narrow(0, start, numel) + exp_avg_hat = exp_avg / (1 - torch_opt.defaults['betas'][0] ** step) + exp_avg_sq_hat = exp_avg_sq / (1 - torch_opt.defaults['betas'][1] ** step) + update_dict[name] = exp_avg_hat / (torch.sqrt(exp_avg_sq_hat) + torch_opt.defaults['eps']) + ratio_dict[name] = exp_avg_hat / torch.sqrt(exp_avg_sq_hat) + monitor.update_heatmap_visualizer[name].pre_cal(update_dict[name]) + monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) + return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) class DeepSpeedZeroOptimizerStage3Mon(OptimizerMon): -- Gitee From 2520aca6e9e26bc4549446d2a3d4c9753c0e979f Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:30:32 +0800 Subject: [PATCH 234/333] Update mindspore_processor.py --- .../core/data_dump/data_processor/mindspore_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 2df4d930346..9c9d59bb929 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -163,7 +163,7 @@ class MindsporeDataProcessor(BaseDataProcessor): } # 如果配置了延迟 CPU 搬运,则将统计值存入全局 buffer,并返回占位索引 - if tensor_atat.stack_tensor_stat is None: + if tensor_stat.stack_tensor_stat is None: stat_values = [ tensor_stat.max, tensor_stat.min, -- Gitee From 1f882f3b638ffba31081cbba4cb1633291574f14 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:32:48 +0800 Subject: [PATCH 235/333] Update pytorch_processor.py --- .../data_processor/pytorch_processor.py | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 66523da9c55..0a6e5f212dc 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -65,6 +65,8 @@ class PytorchDataProcessor(BaseDataProcessor): "dtype": self.analyze_dtype_in_kwargs } self._async_dump_cache = {} + self.stat_stack_list = [] + self.config.use_delayed_cpu_transfer = True @staticmethod def get_md5_for_tensor(x): @@ -124,17 +126,17 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_stat.min = np.min(data_abs).item() tensor_stat.mean = np.mean(data_abs).item() elif data.dtype == torch.bool: - tensor_stat.max = torch.any(data).item() - tensor_stat.min = torch.all(data).item() + tensor_stat.max = torch.any(data) + tensor_stat.min = torch.all(data) elif not data.shape: - tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() + tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data else: if not data.is_floating_point() or data.dtype == torch.float64: data = data.float() - tensor_stat.max = torch.max(data).item() - tensor_stat.min = torch.min(data).item() - tensor_stat.mean = torch.mean(data).item() - tensor_stat.norm = torch.norm(data).item() + tensor_stat.max = torch.max(data) + tensor_stat.min = torch.min(data) + tensor_stat.mean = torch.mean(data) + tensor_stat.norm = torch.norm(data) return tensor_stat @staticmethod @@ -143,7 +145,7 @@ class PytorchDataProcessor(BaseDataProcessor): if data.is_meta: return tensor_stat data_clone = data.detach() - if not data_clone.numel() or not data_clone.data_ptr(): + if data_clone.numel() == 0: return tensor_stat else: if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump: @@ -228,7 +230,7 @@ class PytorchDataProcessor(BaseDataProcessor): if isinstance(element, dist.ProcessGroup): return self._analyze_process_group(element) if isinstance(element, dist.P2POp): - return self._analyze_p2pop(element, Const.SEP.join([str(suffix) for suffix in suffix_stack])) + return self._analyze_p2pop(element) if isinstance(element, dist.ReduceOp): return self._analyze_reduce_op(element) converted_numpy, numpy_type = self._convert_numpy_to_builtin(element) @@ -247,10 +249,10 @@ class PytorchDataProcessor(BaseDataProcessor): module_input_output.update_output_with_args_and_kwargs() return super().analyze_forward_output(name, module, module_input_output) - def _analyze_p2pop(self, arg, suffix): + def _analyze_p2pop(self, arg): p2pop_info = {"class_type": "torch.distributed.P2POp"} try: - tensor_info = self._analyze_tensor(arg.tensor, suffix) + tensor_info = self._analyze_tensor(arg.tensor, []) p2pop_info.update({"tensor": tensor_info}) p2pop_info.update({"op": arg.op.__name__}) p2pop_info.update({"peer": arg.peer}) @@ -269,10 +271,15 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_json.update({'dtype': str(tensor.dtype)}) tensor_json.update({"shape": tensor.shape}) if tensor_stat.stack_tensor_stat is None: - tensor_json.update({"Max": tensor_stat.max}) - tensor_json.update({"Min": tensor_stat.min}) - tensor_json.update({"Mean": tensor_stat.mean}) - tensor_json.update({"Norm": tensor_stat.norm}) + stat_values = [ + tensor_stat.max, + tensor_stat.min, + tensor_stat.mean, + tensor_stat.norm + ] + placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) + + tensor_json.update({"tensor_stat_index": placeholder_index}) tensor_json.update({"requires_grad": tensor.requires_grad}) if tensor_stat.max is not None: if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max): -- Gitee From b745baaf90d838b637805c1dc4d2ef346168e174 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 10 Mar 2025 17:34:06 +0800 Subject: [PATCH 236/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81parameters=5Fgrad=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../builder/test_graph_builder.py | 20 ++++++++++ .../visualization/builder/graph_builder.py | 39 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py b/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py index 706dc8bf82e..9b69e8bc2a7 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py @@ -111,3 +111,23 @@ class TestGraphBuilder(unittest.TestCase): self.assertEqual(graph.root.subnodes[2].op, NodeOp.module) self.assertEqual(len(graph.root.subnodes[0].subnodes), 0) self.assertEqual(graph.root.subnodes[0].id, 'Module.a.0') + + def test_add_parameters_grad(self): + graph = Graph('TestNet') + graph.add_node(NodeOp.module, 'Module.a.backward.0', graph.root) + graph.add_node(NodeOp.module, 'Module.b.backward.0', graph.root) + graph.add_node(NodeOp.module, 'Module.a.backward.1', graph.root) + graph.add_node(NodeOp.module, 'Module.aa.backward.0', graph.get_node('Module.a.backward.0')) + graph.add_node(NodeOp.module, 'Module.aaa.backward.0', graph.get_node('Module.a.backward.0')) + graph.add_node(NodeOp.module, 'Module.aa.backward.1', graph.get_node('Module.a.backward.1')) + graph.add_node(NodeOp.module, 'Module.aaa.backward.1', graph.get_node('Module.a.backward.1')) + + data_dict = {'Module.a.parameters_grad': {}, 'Module.aaa.parameters_grad': {}} + GraphBuilder._add_parameters_grad(graph, data_dict) + root_nodes_id = [node.id for node in graph.get_node('TestNet').subnodes] + sub_nodes_id0 = [node.id for node in graph.get_node('Module.a.backward.0').subnodes] + sub_nodes_id1 = [node.id for node in graph.get_node('Module.a.backward.1').subnodes] + + self.assertEqual(root_nodes_id[-1], 'Module.a.backward.1') + self.assertEqual(sub_nodes_id0[-1], 'Module.aaa.backward.0') + self.assertEqual(sub_nodes_id1[-1], 'Module.a.parameters_grad') diff --git a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py index 814882e6b81..a5ace4001c8 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py @@ -51,6 +51,7 @@ class GraphBuilder: graph = Graph(model_name, data_path=dump_dict.get('dump_data_dir', ''), dump_data=data_dict) GraphBuilder._init_nodes(graph, construct_dict, data_dict, stack_dict) GraphBuilder._collect_apis_between_modules(graph) + GraphBuilder._add_parameters_grad(graph, data_dict) return graph @staticmethod @@ -235,6 +236,44 @@ class GraphBuilder: graph.root.subnodes = output + @staticmethod + def _add_parameters_grad(graph, data_dict): + """ + 将parameters_grad信息添加到graph中, + 对应模块的parameters_grad节点添加到对应模块的最后一次backward节点(backward计数最大)内作为子节点 + + 例如,graph有节点Module.a.backward.0, Module.a.backward.1, Module.a.backward.2 + 则Module.a.parameters_grad添加在Module.a.backward.2内作为子节点 + """ + prefixes = [] + suffix = Const.SEP + Const.PARAMS_GRAD + for node_id, data in data_dict.items(): + if node_id not in graph.node_map and node_id.endswith(suffix): + prefixes.append(node_id.replace(suffix, '')) + + max_info = {prefix: 0 for prefix in prefixes} + + for key in graph.node_map.keys(): + for prefix in prefixes: + # 构建正则表达式,匹配以 "backward.数字" 结尾的键 + pattern = re.compile(r'^' + re.escape(prefix) + r'\.backward\.(\d+)$') + match = pattern.match(key) + if match: + num = int(match.group(1)) + if num > max_info[prefix]: + max_info[prefix] = num + + for prefix, num in max_info.items(): + node_id = prefix + Const.SEP + Const.BACKWARD + Const.SEP + str(num) + node = graph.get_node(node_id) + if node: + parameters_grad_node_id = graph.add_node(NodeOp.module, prefix + suffix, up_node=node) + # 添加输入输出数据 + node_data = data_dict.get(parameters_grad_node_id, {}) + input_data, output_data = get_input_output(node_data, parameters_grad_node_id) + # 更新数据 + graph.get_node(parameters_grad_node_id).set_input_output(input_data, output_data) + class GraphExportConfig: def __init__(self, graph_n, graph_b=None, tool_tip=None, node_colors=None, micro_steps=None, task='', -- Gitee From 6b0172c95fbd423d917e98a24dc30573e133bc92 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 10 Mar 2025 17:37:54 +0800 Subject: [PATCH 237/333] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=8F=98=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/data_dump/data_processor/mindspore_processor.py | 1 - .../msprobe/core/data_dump/data_processor/pytorch_processor.py | 1 - 2 files changed, 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 9c9d59bb929..ed6d4d33978 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -45,7 +45,6 @@ class MindsporeDataProcessor(BaseDataProcessor): } self._async_dump_cache = {} self.stat_stack_list = [] - self.config.use_delayed_cpu_transfer = True @staticmethod def get_md5_for_tensor(x): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 0a6e5f212dc..b8437e9f10b 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -66,7 +66,6 @@ class PytorchDataProcessor(BaseDataProcessor): } self._async_dump_cache = {} self.stat_stack_list = [] - self.config.use_delayed_cpu_transfer = True @staticmethod def get_md5_for_tensor(x): -- Gitee From 39cb020728c765281754d59ecb17346304d86c3c Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Mon, 10 Mar 2025 15:36:33 +0800 Subject: [PATCH 238/333] adapt to distributed api --- .../mindspore/dump/hook_cell/api_register.py | 22 ++++++++----- .../run_ut/data_generate.py | 23 +++++++------- .../run_ut/run_distributed_check.py | 31 ++++++++++--------- .../pytorch/hook_module/api_register.py | 5 ++- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py index 6563ed70086..0f4621b3a03 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py @@ -104,22 +104,28 @@ class ApiTemplate(HOOKCell): api_register = None +stub_tensor_set = False -def get_api_register(): - global api_register +def get_api_register(return_new=False): + global stub_tensor_set def stub_method(method): def wrapped_method(*args, **kwargs): return method(*args, **kwargs) return wrapped_method + if not is_mindtorch() and not stub_tensor_set: + for attr_name in dir(StubTensor): + attr = getattr(StubTensor, attr_name) + api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, []) + if attr_name in api_names and callable(attr): + setattr(StubTensor, attr_name, stub_method(attr)) + stub_tensor_set = True + + if return_new: + return ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate) + global api_register if api_register is None: - if not is_mindtorch(): - for attr_name in dir(StubTensor): - attr = getattr(StubTensor, attr_name) - api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, []) - if attr_name in api_names and callable(attr): - setattr(StubTensor, attr_name, stub_method(attr)) api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate) return api_register diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py index f5ee9282ebd..15e14b68c7d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py @@ -1,9 +1,7 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- # Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # @@ -15,23 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import math -import torch +import os + import numpy +import torch -from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api +from msprobe.core.common.const import Const, FileCheckConst, CompareConst, DistributedCheckConst +from msprobe.core.common.file_utils import FileChecker, load_npy from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type, get_full_data_path, \ CompareException, get_module_and_atttribute_name, get_attribute -from msprobe.core.common.file_utils import FileChecker, load_npy +from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import load_pt -from msprobe.core.common.const import Const, FileCheckConst, CompareConst, DistributedCheckConst -from msprobe.pytorch.hook_module.wrap_distributed import get_distributed_ops - +from msprobe.pytorch.hook_module.api_register import get_api_register -distribute_api_list = list(get_distributed_ops()) +api_register = get_api_register(return_new=True) +api_register.initialize_hook(None) +distribute_api_key = Const.PT_FRAMEWORK + Const.SEP + Const.PT_API_TYPE_DIST +distribute_api_list = list(api_register.ori_api_attr.get(distribute_api_key, {}).keys()) TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 2c6793d801b..1b0c0b1217e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -1,9 +1,7 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- # Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # @@ -15,39 +13,42 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import os import sys import time -import argparse from collections import namedtuple -import torch import torch_npu import torch.distributed as dist import torch.multiprocessing as mp - -from msprobe.core.common.file_utils import FileChecker, write_csv, create_directory from msprobe.core.common.const import Const, FileCheckConst, DistributedCheckConst, CompareConst +from msprobe.core.common.file_utils import FileChecker, write_csv, create_directory from msprobe.core.compare.utils import check_and_return_dir_contents -from msprobe.pytorch.hook_module.wrap_distributed import distributed_func -from msprobe.pytorch.pt_config import parse_json_config -from msprobe.pytorch.common.log import logger -from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.pytorch.api_accuracy_checker.common.config import CheckerConfig +from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_device_params, get_group_info, \ is_port_in_use from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import get_api_info from msprobe.pytorch.api_accuracy_checker.run_ut.distributed_function_registry import distributed_func_registry -from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments -from msprobe.pytorch.api_accuracy_checker.common.config import CheckerConfig +from msprobe.pytorch.common.log import logger +from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.pytorch.hook_module.api_register import get_api_register +from msprobe.pytorch.pt_config import parse_json_config + +api_register = get_api_register(return_new=True) +api_register.initialize_hook(None) +distribute_api_key = Const.PT_FRAMEWORK + Const.SEP + Const.PT_API_TYPE_DIST +distributed_func = api_register.ori_api_attr.get(distribute_api_key, {}) os.environ['HCCL_DETERMINISTIC'] = str(True) current_time = time.strftime("%Y%m%d%H%M%S") RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" RESULT_CSV_HEADER = [['API_NAME', 'RANK', 'COMPARE_RESULT', 'MESSAGE']] -DistributedCheckParams = namedtuple("DistributedCheckParams", ["api_full_name", "all_args", "all_kwargs", - "group_ranks", "result_file_path", "checker_config"]) +DistributedCheckParams = namedtuple("DistributedCheckParams", ["api_full_name", "all_args", "all_kwargs", + "group_ranks", "result_file_path", "checker_config"]) def cleanup(): diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py index 4154646c0f4..30a45a84d87 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py @@ -121,7 +121,10 @@ class ApiTemplate(HOOKModule): api_register = None -def get_api_register(): +def get_api_register(return_new=False): + if return_new: + return ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate) + global api_register if api_register is None: api_register = ApiRegistry(_api_types, _inner_used_api, _supported_api_list_path, ApiTemplate) -- Gitee From 4e8f84cb7e04c3218031df9082a84598544147e4 Mon Sep 17 00:00:00 2001 From: lcw Date: Mon, 24 Feb 2025 14:55:44 +0800 Subject: [PATCH 239/333] =?UTF-8?q?=E3=80=90feature=E3=80=91=E9=9D=99?= =?UTF-8?q?=E6=80=81=E9=85=8D=E7=BD=AE=E9=A1=B9=E6=A3=80=E6=9F=A5=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/const.py | 6 +- .../msprobe/core/common/file_utils.py | 123 +++++++++++++++ debug/accuracy_tools/msprobe/msprobe.py | 6 + .../config_checking/checkers/__init__.py | 1 - .../config_checking/checkers/base_checker.py | 13 ++ .../checkers/checkpoint_checker.py | 8 +- .../config_checking/checkers/code_checker.py | 144 ------------------ .../checkers/dataset_checker.py | 13 +- .../checkers/env_args_checker.py | 5 +- .../config_checking/checkers/pip_checker.py | 10 +- .../checkers/weights_checker.py | 20 +-- .../pytorch/config_checking/config_checker.py | 12 +- .../{__main__.py => config_checking.py} | 17 +-- .../pytorch/config_checking/utils/compare.py | 120 --------------- .../config_checking/utils/config_compare.py | 135 ---------------- .../pytorch/config_checking/utils/hash.py | 61 -------- .../pytorch/config_checking/utils/packing.py | 111 -------------- .../pytorch/config_checking/utils/utils.py | 69 +++------ .../config_checking/test_config_checking.py | 13 +- 19 files changed, 215 insertions(+), 672 deletions(-) delete mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/code_checker.py rename debug/accuracy_tools/msprobe/pytorch/config_checking/{__main__.py => config_checking.py} (76%) delete mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/utils/compare.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/utils/config_compare.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/utils/hash.py delete mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/utils/packing.py diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index b49b4fffd5e..0d9f9dfa9e5 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -504,6 +504,7 @@ class FileCheckConst: XLSX_SUFFIX = ".xlsx" YAML_SUFFIX = ".yaml" IR_SUFFIX = ".ir" + ZIP_SUFFIX = ".zip" MAX_PKL_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 MAX_NUMPY_SIZE = 10737418240 # 10 * 1024 * 1024 * 1024 MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 @@ -512,6 +513,8 @@ class FileCheckConst: MAX_XLSX_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 MAX_YAML_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 MAX_IR_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 + MAX_ZIP_SIZE = 10737418240 # 10 * 1024 * 1024 * 1024 + MAX_FILE_IN_ZIP_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 COMMOM_FILE_SIZE = 1048576 # 1 * 1024 * 1024 DIR = "dir" FILE = "file" @@ -525,7 +528,8 @@ class FileCheckConst: CSV_SUFFIX: MAX_CSV_SIZE, XLSX_SUFFIX: MAX_XLSX_SIZE, YAML_SUFFIX: MAX_YAML_SIZE, - IR_SUFFIX: MAX_IR_SIZE + IR_SUFFIX: MAX_IR_SIZE, + ZIP_SUFFIX: MAX_ZIP_SIZE } CSV_BLACK_LIST = r'^[+-=%@\+\-=%@]|;[+-=%@\+\-=%@]' diff --git a/debug/accuracy_tools/msprobe/core/common/file_utils.py b/debug/accuracy_tools/msprobe/core/common/file_utils.py index fdc626ca6a1..38eb9cd3daf 100644 --- a/debug/accuracy_tools/msprobe/core/common/file_utils.py +++ b/debug/accuracy_tools/msprobe/core/common/file_utils.py @@ -20,6 +20,9 @@ import stat import json import re import shutil +import sys +import zipfile +import multiprocessing from datetime import datetime, timezone from dateutil import parser import yaml @@ -30,6 +33,8 @@ from msprobe.core.common.log import logger from msprobe.core.common.exceptions import FileCheckException from msprobe.core.common.const import FileCheckConst +proc_lock = multiprocessing.Lock() + class FileChecker: """ @@ -671,3 +676,121 @@ def read_xlsx(file_path): logger.error(f"The xlsx file failed to load. Please check the path: {file_path}.") raise RuntimeError(f"Read xlsx file {file_path} failed.") from e return result_df + + +def create_file_with_list(result_list, filepath): + check_path_before_create(filepath) + filepath = os.path.realpath(filepath) + try: + with FileOpen(filepath, 'w', encoding='utf-8') as file: + fcntl.flock(file, fcntl.LOCK_EX) + for item in result_list: + file.write(item + '\n') + fcntl.flock(file, fcntl.LOCK_UN) + except Exception as e: + logger.error(f'Save list to file "{os.path.basename(filepath)}" failed.') + raise RuntimeError(f"Save list to file {os.path.basename(filepath)} failed.") from e + change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY) + + +def create_file_with_content(data, filepath): + check_path_before_create(filepath) + filepath = os.path.realpath(filepath) + try: + with FileOpen(filepath, 'w', encoding='utf-8') as file: + fcntl.flock(file, fcntl.LOCK_EX) + file.write(data) + fcntl.flock(file, fcntl.LOCK_UN) + except Exception as e: + logger.error(f'Save content to file "{os.path.basename(filepath)}" failed.') + raise RuntimeError(f"Save content to file {os.path.basename(filepath)} failed.") from e + change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY) + + +def add_file_to_zip(zip_file_path, file_path, arc_path=None): + """ + Add a file to a ZIP archive, if zip does not exist, create one. + + :param zip_file_path: Path to the ZIP archive + :param file_path: Path to the file to add + :param arc_path: Optional path inside the ZIP archive where the file should be added + """ + check_file_suffix(zip_file_path, FileCheckConst.ZIP_SUFFIX) + check_file_size(file_path, FileCheckConst.MAX_FILE_IN_ZIP_SIZE) + zip_size = os.path.getsize(zip_file_path) if os.path.exists(zip_file_path) else 0 + if zip_size + os.path.getsize(file_path) > FileCheckConst.MAX_ZIP_SIZE: + raise RuntimeError(f"ZIP file size exceeds the limit of {FileCheckConst.MAX_ZIP_SIZE} bytes") + check_path_before_create(zip_file_path) + try: + proc_lock.acquire() + with zipfile.ZipFile(zip_file_path, 'a') as zip_file: + zip_file.write(file_path, arc_path) + except Exception as e: + logger.error(f'add file to zip "{os.path.basename(zip_file_path)}" failed.') + raise RuntimeError(f"add file to zip {os.path.basename(zip_file_path)} failed.") from e + finally: + proc_lock.release() + change_mode(zip_file_path, FileCheckConst.DATA_FILE_AUTHORITY) + + +def create_file_in_zip(zip_file_path, file_name, content): + """ + Create a file with content inside a ZIP archive. + + :param zip_file_path: Path to the ZIP archive + :param file_name: Name of the file to create + :param content: Content to write to the file + """ + check_file_suffix(zip_file_path, FileCheckConst.ZIP_SUFFIX) + check_path_before_create(zip_file_path) + zip_size = os.path.getsize(zip_file_path) if os.path.exists(zip_file_path) else 0 + if zip_size + sys.getsizeof(content) > FileCheckConst.MAX_ZIP_SIZE: + raise RuntimeError(f"ZIP file size exceeds the limit of {FileCheckConst.MAX_ZIP_SIZE} bytes") + try: + proc_lock.acquire() + with zipfile.ZipFile(zip_file_path, 'a') as zip_file: + zip_info = zipfile.ZipInfo(file_name) + zip_info.compress_type = zipfile.ZIP_DEFLATED + zip_file.writestr(zip_info, content) + except Exception as e: + logger.error(f'Save content to file "{os.path.basename(zip_file_path)}" failed.') + raise RuntimeError(f"Save content to file {os.path.basename(zip_file_path)} failed.") from e + finally: + proc_lock.release() + change_mode(zip_file_path, FileCheckConst.DATA_FILE_AUTHORITY) + + +def extract_zip(zip_file_path, extract_dir): + """ + Extract the contents of a ZIP archive to a specified directory. + + :param zip_file_path: Path to the ZIP archive + :param extract_dir: Directory to extract the contents to + """ + check_file_suffix(zip_file_path, FileCheckConst.ZIP_SUFFIX) + try: + proc_lock.acquire() + with zipfile.ZipFile(zip_file_path, 'r') as zip_file: + total_size = 0 + if len(zip_file.infolist()) > FileCheckConst.MAX_FILE_IN_ZIP_SIZE: + raise ValueError(f"Too many files in {os.path.basename(zip_file_path)}") + for file_info in zip_file.infolist(): + if file_info.file_size > FileCheckConst.MAX_FILE_IN_ZIP_SIZE: + raise ValueError(f"File {file_info.filename} is too large to extract") + + total_size += file_info.file_size + if total_size > FileCheckConst.MAX_ZIP_SIZE: + raise ValueError(f"Total extracted size exceeds the limit of {FileCheckConst.MAX_ZIP_SIZE} bytes") + except Exception as e: + logger.error(f'Save content to file "{os.path.basename(zip_file_path)}" failed.') + raise RuntimeError(f"Save content to file {os.path.basename(zip_file_path)} failed.") from e + finally: + proc_lock.release() + with zipfile.ZipFile(zip_file_path, 'r') as zip_file: + zip_file.extractall(extract_dir) + + +def split_zip_file_path(zip_file_path): + check_file_suffix(zip_file_path, FileCheckConst.ZIP_SUFFIX) + zip_file_path = os.path.realpath(zip_file_path) + return os.path.dirname(zip_file_path), os.path.basename(zip_file_path) diff --git a/debug/accuracy_tools/msprobe/msprobe.py b/debug/accuracy_tools/msprobe/msprobe.py index 8e0386fde6d..127e042f65a 100644 --- a/debug/accuracy_tools/msprobe/msprobe.py +++ b/debug/accuracy_tools/msprobe/msprobe.py @@ -51,6 +51,7 @@ def main(): graph_service_cmd_parser = subparsers.add_parser('graph') op_generate_cmd_parser = subparsers.add_parser('op_generate') merge_result_parser = subparsers.add_parser('merge_result') + config_checking_parser = subparsers.add_parser('config_checking') _compare_parser(compare_cmd_parser) _merge_result_parser(merge_result_parser) @@ -71,6 +72,8 @@ def main(): from msprobe.visualization.graph_service import _pt_graph_service_parser, _pt_graph_service_command from msprobe.pytorch.api_accuracy_checker.generate_op_script.op_generator import _op_generator_parser, \ _run_operator_generate_commond + from msprobe.pytorch.config_checking.config_checking import _config_checking_parser, \ + _run_config_checking_command _run_ut_parser(run_ut_cmd_parser) _run_ut_parser(multi_run_ut_cmd_parser) @@ -80,6 +83,7 @@ def main(): _run_overflow_check_parser(run_overflow_check_cmd_parser) _pt_graph_service_parser(graph_service_cmd_parser) _op_generator_parser(op_generate_cmd_parser) + _config_checking_parser(config_checking_parser) elif framework_args.framework == Const.MS_FRAMEWORK: from msprobe.mindspore.api_accuracy_checker.cmd_parser import add_api_accuracy_checker_argument from msprobe.visualization.graph_service import _ms_graph_service_parser, _ms_graph_service_command @@ -118,6 +122,8 @@ def main(): compare_cli(args) elif sys.argv[3] == "merge_result": merge_result_cli(args) + elif sys.argv[3] == "config_checking": + _run_config_checking_command(args) else: if not is_module_available(Const.MS_FRAMEWORK): logger.error("MindSpore does not exist, please install MindSpore library") diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py index 403d01e4380..47e5e614716 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py @@ -15,7 +15,6 @@ __all__ = ['BaseChecker', ] -import msprobe.pytorch.config_checking.checkers.code_checker import msprobe.pytorch.config_checking.checkers.env_args_checker import msprobe.pytorch.config_checking.checkers.pip_checker import msprobe.pytorch.config_checking.checkers.checkpoint_checker diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py index 45b0cfcc1f8..7eb0babb9d4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py @@ -16,6 +16,8 @@ import os from abc import ABC, abstractmethod +from msprobe.core.common.const import FileCheckConst + class PackInput: @@ -26,6 +28,17 @@ class PackInput: self.need_pip_data = config_dict.get("pip data", None) self.output_zip_path = config_dict.get("output zip path", "./config_check_pack.zip") self.model = model + self.check_input_params() + + def check_input_params(self): + if self.code_path is not None: + if not isinstance(self.output_zip_path, str): + raise Exception(f"code_path must be a string") + if self.ckpt_path is not None: + if not isinstance(self.ckpt_path, str): + raise Exception(f"ckpt_path must be a string") + if not isinstance(self.output_zip_path, str) or not self.output_zip_path.endswith(FileCheckConst.ZIP_SUFFIX): + raise Exception(f"output zip path must be a string and ends with '.zip'") class BaseChecker(ABC): diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/checkpoint_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/checkpoint_checker.py index 789f5614ff8..f3208acbb05 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/checkpoint_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/checkpoint_checker.py @@ -18,12 +18,10 @@ import os import torch +from msprobe.core.common.file_utils import load_json, create_file_with_list, create_file_in_zip from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker from msprobe.pytorch.config_checking.config_checker import register_checker_item -from msprobe.pytorch.config_checking.utils.hash import bytes_hash -from msprobe.pytorch.config_checking.utils.packing import create_file_in_zip -from msprobe.pytorch.config_checking.utils.utils import config_checking_print -from msprobe.pytorch.config_checking.utils.utils import load_json, compare_dict, write_list_to_file +from msprobe.pytorch.config_checking.utils.utils import config_checking_print, compare_dict, bytes_hash def tensor_to_hash(tensor): @@ -67,5 +65,5 @@ class CheckpointChecker(BaseChecker): cmp_ckpt_data = load_ckpt_file_in_zip(cmp_dir) result = compare_dict(bench_ckpt_data, cmp_ckpt_data) output_filepath = os.path.join(output_path, CheckpointChecker.result_filename) - write_list_to_file(result, output_filepath) + create_file_with_list(result, output_filepath) diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/code_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/code_checker.py deleted file mode 100644 index e287978aa88..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/code_checker.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import json -import filecmp -import difflib - -from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker -from msprobe.pytorch.config_checking.utils.packing import DirPacker, add_file_to_zip -from msprobe.pytorch.config_checking.config_checker import register_checker_item -from msprobe.pytorch.config_checking.utils.utils import write_list_to_file -from msprobe.pytorch.config_checking.utils.utils import config_checking_print -from msprobe.pytorch.config_checking.utils.config_compare import CONFIG_EXTENSIONS, ConfigComparator - - -def is_constructed_file(filepath): - for suffix in CONFIG_EXTENSIONS: - if filepath.endswith(suffix): - return True - return False - - -def file_is_identical(filepath1, filepath2): - return filecmp.cmp(filepath1, filepath2) - - -def compare_directories(dir1, dir2, output_dir): - # Create the output directory if it doesn't exist - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - # Walk through the directory trees - for root1, _, files1 in os.walk(dir1): - for root2, _, files2 in os.walk(dir2): - # Find the corresponding directory in the other tree - rel_path1 = os.path.relpath(root1, dir1) - rel_path2 = os.path.relpath(root2, dir2) - if rel_path1 == rel_path2: - # Create the corresponding directory in the output dir - output_root = os.path.join(output_dir, rel_path1) - if not os.path.exists(output_root): - os.makedirs(output_root) - - # Compare files in the current directory - for file1 in files1: - file_path1 = os.path.join(root1, file1) - file_path2 = os.path.join(root2, file1) - if os.path.exists(file_path2): - if is_constructed_file(file_path1): - diff, has_diff = ConfigComparator().compare(file_path1, file_path2) - diff = json.dumps(diff, indent=4) - # Compare file contents - else: - has_diff = not file_is_identical(file_path1, file_path2) - if has_diff: - with open(file_path1, 'r', encoding='utf-8') as f1, open(file_path2, 'r', - encoding='utf-8') as f2: - diff = difflib.ndiff(f1.readlines(), f2.readlines()) - if has_diff: - diff_file = os.path.join(output_root, f"{file1}.diff") - with open(diff_file, 'w', encoding='utf-8') as f: - f.writelines(diff) - else: - # File only exists in dir1, mark as deleted - deleted_file = os.path.join(output_root, f"{file1}.deleted") - with open(deleted_file, 'w', encoding='utf-8') as f: - f.write("File deleted") - - for file2 in files2: - file_path2 = os.path.join(root2, file2) - if not os.path.exists(os.path.join(root1, file2)): - # File only exists in dir2, mark as added - added_file = os.path.join(output_root, f"{file2}.added") - with open(added_file, 'w', encoding='utf-8') as f: - f.write("File added") - - -def get_all_files(directory): - file_list = [] - for root, _, files in os.walk(directory): - for file in files: - file_path = os.path.relpath(os.path.join(root, file), directory) - file_list.append(file_path) - return file_list - - -def file_suffix_to_label(filepaths): - add = [] - delete = [] - diff = [] - for filepath in filepaths: - head, tail = filepath.rsplit('.', 1) - if tail == 'added': - add.append(f'[add] {head}') - if tail == 'deleted': - delete.append(f'[delete] {head}') - if tail == 'diff': - diff.append(f'[diff] {head}') - return add + delete + diff - - -@register_checker_item("code") -class CodeChecker(BaseChecker): - input_needed = "code_path" - - target_name_in_zip = "code" - result_dirname = "code" - result_filename = "code_change_summary.txt" - - @staticmethod - def pack(pack_input): - code_path = pack_input.code_path - output_zip_path = pack_input.output_zip_path - for dirname, pathname in code_path.items(): - if os.path.isdir(pathname): - DirPacker(pathname, output_zip_path, os.path.join(CodeChecker.target_name_in_zip, dirname)) - config_checking_print(f"add code {dirname} {pathname} to zip") - elif os.path.isfile(pathname): - dest_path_in_zip = os.path.join(CodeChecker.target_name_in_zip, dirname, os.path.basename(pathname)) - add_file_to_zip(output_zip_path, pathname, dest_path_in_zip) - config_checking_print(f"add code {dirname} {pathname} to zip") - - @staticmethod - def compare(bench_dir, cmp_dir, output_path): - bench_code_dir = os.path.join(bench_dir, CodeChecker.target_name_in_zip) - cmp_code_dir = os.path.join(cmp_dir, CodeChecker.target_name_in_zip) - code_output_dir = os.path.join(output_path, CodeChecker.result_dirname) - compare_directories(bench_code_dir, cmp_code_dir, code_output_dir) - filepaths = get_all_files(output_path) - code_output_filepath = os.path.join(output_path, CodeChecker.result_filename) - write_list_to_file(file_suffix_to_label(filepaths), code_output_filepath) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/dataset_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/dataset_checker.py index 9afeb307e13..0426f1db9c0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/dataset_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/dataset_checker.py @@ -17,11 +17,12 @@ import os import json import torch +from msprobe.core.common.file_utils import create_file_with_list, create_file_with_content, create_file_in_zip +from msprobe.pytorch.common.utils import get_rank_id from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker from msprobe.pytorch.config_checking.config_checker import register_checker_item, register_pre_forward_fun_list -from msprobe.pytorch.config_checking.utils.packing import create_file_in_zip -from msprobe.pytorch.config_checking.utils.utils import write_list_to_file, config_checking_print, get_rank, \ - get_tensor_features, read_rank_result_to_dict, compare_dicts, write_content_to_file +from msprobe.pytorch.config_checking.utils.utils import config_checking_print, \ + get_tensor_features, read_rank_result_to_dict, compare_dicts def get_tuple_input_features(tup): @@ -54,8 +55,8 @@ class DatasetChecker(BaseChecker): features = get_tuple_input_features(data_input) else: raise ValueError("Unsupported input type when pack dataset") - - dataset_filepath = os.path.join(DatasetChecker.target_name_in_zip, f"rank{get_rank()}.json") + + dataset_filepath = os.path.join(DatasetChecker.target_name_in_zip, f"rank{get_rank_id()}.json") create_file_in_zip(output_zip_path, dataset_filepath, json.dumps(features, indent=4)) config_checking_print(f"add first dataset input features to zip") @@ -69,4 +70,4 @@ class DatasetChecker(BaseChecker): cmp_dataset = read_rank_result_to_dict(cmp_dataset_pack_path) deleted, added, changed, result = compare_dicts(bench_dataset, cmp_dataset) output_filepath = os.path.join(output_path, DatasetChecker.result_filename) - write_content_to_file(json.dumps(result, indent=4), output_filepath) + create_file_with_content(json.dumps(result, indent=4), output_filepath) diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/env_args_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/env_args_checker.py index 112e188f49d..668e4815c3a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/env_args_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/env_args_checker.py @@ -16,9 +16,8 @@ import os import json +from msprobe.core.common.file_utils import load_json, load_yaml, create_file_with_content, create_file_in_zip from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker -from msprobe.pytorch.config_checking.utils.packing import create_file_in_zip -from msprobe.pytorch.config_checking.utils.utils import load_json, load_yaml, merge_keys, write_content_to_file from msprobe.pytorch.config_checking.config_checker import register_checker_item from msprobe.pytorch.config_checking.utils.utils import config_checking_print @@ -75,4 +74,4 @@ class EnvArgsChecker(BaseChecker): env_error_message, env_warning_message = compare_env_data(bench_env_data, cmp_env_data) output_filepath = os.path.join(output_path, EnvArgsChecker.result_filename) result = f"-env_error_message:\n{env_error_message}\n-env_warning_message:\n{env_warning_message}" - write_content_to_file(result, output_filepath) + create_file_with_content(result, output_filepath) diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/pip_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/pip_checker.py index 271537dbbb7..7d122028ae8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/pip_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/pip_checker.py @@ -20,15 +20,15 @@ try: except ImportError: import importlib_metadata as metadata +from msprobe.core.common.file_utils import load_yaml, create_file_with_content, create_file_in_zip from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker -from msprobe.pytorch.config_checking.utils.packing import create_file_in_zip -from msprobe.pytorch.config_checking.utils.utils import load_yaml, merge_keys, write_content_to_file +from msprobe.pytorch.config_checking.utils.utils import merge_keys from msprobe.pytorch.config_checking.config_checker import register_checker_item from msprobe.pytorch.config_checking.utils.utils import config_checking_print from msprobe.core.common.file_utils import FileOpen dirpath = os.path.dirname(__file__) -dependpath = os.path.join(dirpath, "../resource/dependency.yaml") +depend_path = os.path.join(dirpath, "../resource/dependency.yaml") def load_pip_txt(file_path): @@ -52,7 +52,7 @@ def collect_pip_data(): def compare_pip_data(npu_path, bench_path): error_message = "" warning_message = "" - necessary_dependency = load_yaml(dependpath)["dependency"] + necessary_dependency = load_yaml(depend_path)["dependency"] npu_data = load_pip_txt(npu_path) bench_data = load_pip_txt(bench_path) key_list = merge_keys(npu_data, bench_data) @@ -91,4 +91,4 @@ class PipPackageChecker(BaseChecker): pip_error_message, pip_warning_message = compare_pip_data(bench_pip_data, cmp_pip_data) output_filepath = os.path.join(output_path, PipPackageChecker.result_filename) result = f"-pip_error_message:\n {pip_error_message}\n-pip_warning_message:\n {pip_warning_message}" - write_content_to_file(result, output_filepath) + create_file_with_content(result, output_filepath) diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/weights_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/weights_checker.py index 8f119425ddf..0275eacf362 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/weights_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/weights_checker.py @@ -14,19 +14,17 @@ # limitations under the License. import os -import subprocess -import re -import zlib import json import torch +from msprobe.core.common.file_utils import create_file_with_list, create_file_with_content, load_yaml, load_json, \ + create_file_in_zip +from msprobe.pytorch.common.utils import get_rank_id from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker -from msprobe.pytorch.config_checking.utils.packing import create_file_in_zip -from msprobe.pytorch.config_checking.utils.utils import load_yaml, merge_keys, write_content_to_file, load_json from msprobe.pytorch.config_checking.config_checker import register_checker_item, register_pre_forward_fun_list -from msprobe.pytorch.config_checking.utils.utils import (config_checking_print, get_rank, get_tensor_features, - read_rank_result_to_dict, compare_dicts, write_list_to_file, - write_content_to_file) +from msprobe.pytorch.config_checking.utils.utils import (merge_keys, config_checking_print, + get_tensor_features, + read_rank_result_to_dict, compare_dicts) def collect_weights_data(model): @@ -34,8 +32,6 @@ def collect_weights_data(model): for name, param in model.named_parameters(): if param.dtype == torch.bfloat16: param = param.float() - data_bytes = param.cpu().detach().numpy().tobytes() - hash_value = zlib.crc32(data_bytes) weights_data[name] = get_tensor_features(param) return weights_data @@ -65,7 +61,7 @@ class WeightsChecker(BaseChecker): def collect_weights(model, data_input): weights_data_dict = collect_weights_data(model) - weights_data_filepath = os.path.join(WeightsChecker.target_name_in_zip, f"rank{get_rank()}.json") + weights_data_filepath = os.path.join(WeightsChecker.target_name_in_zip, f"rank{get_rank_id()}.json") create_file_in_zip(output_zip_path, weights_data_filepath, json.dumps(weights_data_dict, indent=4)) config_checking_print(f"add weights info to zip") register_pre_forward_fun_list(collect_weights) @@ -78,4 +74,4 @@ class WeightsChecker(BaseChecker): cmp_weight = read_rank_result_to_dict(cmp_weight_pack_path) deleted, added, changed, result = compare_dicts(bench_weight, cmp_weight) output_filepath = os.path.join(output_path, WeightsChecker.result_filename) - write_content_to_file(json.dumps(result, indent=4), output_filepath) + create_file_with_content(json.dumps(result, indent=4), output_filepath) diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py index ce1a76aa730..4b541e92189 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py @@ -19,8 +19,7 @@ import shutil import torch import torch.distributed as dist -from msprobe.pytorch.config_checking.utils.utils import load_json -from msprobe.pytorch.config_checking.utils.packing import extract_zip +from msprobe.core.common.file_utils import load_json, split_zip_file_path, create_directory, extract_zip from msprobe.pytorch.config_checking.checkers.base_checker import PackInput from msprobe.pytorch.config_checking.utils.utils import config_checking_print @@ -35,7 +34,14 @@ class ConfigChecker: raise Exception(f"{model} is not a torch.nn.Module") config_dict = load_json(config_filepath) self.pack_input = PackInput(config_dict, model) - self.pack() + file_path, file_name = split_zip_file_path(self.pack_input.output_zip_path) + if not os.path.exists(file_path): + create_directory(file_path) + self.pack() + else: + if os.path.exists(self.pack_input.output_zip_path): + raise Exception("The output file path already exist!") + self.pack() @staticmethod diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/__main__.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py similarity index 76% rename from debug/accuracy_tools/msprobe/pytorch/config_checking/__main__.py rename to debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py index b86564f49a1..87d16747d61 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/__main__.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py @@ -13,13 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import json -import argparse from msprobe.pytorch.config_checking.config_checker import ConfigChecker -from msprobe.pytorch.config_checking.utils.packing import extract_zip -from msprobe.pytorch.config_checking.utils.utils import load_json -from msprobe.pytorch.config_checking.checkers.base_checker import PackInput +from msprobe.pytorch.common.log import logger def pack(config_filepath): @@ -30,16 +25,18 @@ def compare(bench_zip_path, cmp_zip_path, outpath): ConfigChecker.compare(bench_zip_path, cmp_zip_path, outpath) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Configuration Checker!') +def _config_checking_parser(parser): parser.add_argument('-p', '--pack', help='Pack a directory into a zip file') parser.add_argument('-c', '--compare', nargs=2, help='Compare two zip files') parser.add_argument('-o', '--output', help='output path, default is current directory') - args = parser.parse_args() + + +def _run_config_checking_command(args): if args.pack: pack(args.pack) elif args.compare: output_dirpath = args.output if args.output else "./config_check_result" compare(args.compare[0], args.compare[1], output_dirpath) else: - parser.print_help() + logger.error("The param is not correct, you need to give '-p' for pack or '-c' for compare.") + raise Exception("The param is not correct, you need to give '-p' for pack or '-c' for compare.") diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/compare.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/compare.py deleted file mode 100644 index f19c614af34..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/compare.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import configparser -from collections.abc import Mapping, Sequence -import difflib -import xmltodict -import yaml - - -def compare_json(obj1, obj2, ignore_order=False): - """ - compare two given json objects and return the differences as AUD (add, update, delete) - - Args: - obj1 (dict): json object 1 - obj2 (dict): json object 2 - ignore_order (bool): whether to ignore differences in lists order, default to False - - Returns: - dict: a set of AUD changes of comparison of obj1 and obj2 - """ - def _new_diff(): - return { - 'add': [], - 'delete': [], - 'update': [] - } - - def _compare_dicts(d1, d2, path=''): - """ compare two dicts """ - differences = _new_diff() - keys1 = set(d1.keys()) - keys2 = set(d2.keys()) - - for key in keys1 - keys2: - differences['delete'].append(f"{path}/{key}") - - for key in keys2 - keys1: - differences['add'].append(f"{path}/{key}") - - for key in keys1 & keys2: - new_path = f"{path}/{key}" if path else key - sub_diff = _dfs(d1[key], d2[key], new_path) - for k, v in sub_diff.items(): - differences[k].extend(v) - return differences - - def _compare_lists(l1, l2, path=''): - """ compare two lists """ - differences = _new_diff() - if ignore_order: - l1 = sorted(l1) - l2 = sorted(l2) - - if len(l1) != len(l2): - differences['update'].append(f"{path}: Length {len(l1)} -> {len(l2)}") - for idx, (item1, item2) in enumerate(zip(l1, l2)): - new_path = f"{path}/[{idx}]" - sub_diff = _dfs(item1, item2, new_path) - for k, v in sub_diff.items(): - differences[k].extend(v) - - return differences - - def _dfs(v1, v2, path=''): - """ compare two values by its type """ - diffs = _new_diff() - if isinstance(v1, Mapping) and isinstance(v2, Mapping): - return _compare_dicts(v1, v2, path) - elif isinstance(v1, Sequence) and isinstance(v2, Sequence) and not isinstance(v1, str): - return _compare_lists(v1, v2, path) - else: - if v1 != v2: - diffs['update'].append(f"{path}: {v1} -> {v2}") - return diffs - - return _compare_dicts(obj1, obj2) - - -def cmp_file(bench_filepath, cmp_filepath, outpath): - bench_lines = open(bench_filepath, 'r', encoding='utf-8').readlines() - cmp_lines = open(cmp_filepath, 'r', encoding='utf-8').readlines() - html_content = difflib.HtmlDiff().make_file(bench_lines, cmp_lines) - with open(outpath, 'w+', encoding='utf-8') as f: - f.write(html_content) - - -def open_xml_to_dict(filepath): - with open(filepath, 'r', encoding='utf-8') as f: - return xmltodict.parse(f.read()) - - -def open_yaml_to_dict(filepath): - with open(filepath, 'r', encoding='utf-8') as f: - return yaml.safe_load(f) - - -def open_ini_to_dict(filepath): - config = configparser.ConfigParser() - config.read(filepath) - - data = {} - for section in config.sections(): - data[section] = {} - for key, value in config.items(section): - data[section][key] = value - return data diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/config_compare.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/config_compare.py deleted file mode 100644 index 231fbc6bb96..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/config_compare.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import configparser -import json -import xml.etree.ElementTree as ET -from abc import ABC, abstractmethod -from pathlib import Path - -import yaml - -from msprobe.pytorch.config_checking.utils.utils import config_checking_print -from msprobe.pytorch.config_checking.utils.compare import compare_json - -CONFIG_EXTENSIONS = ('.json', '.yaml', '.xml', '.ini', '.yml') - - -class Parser(ABC): - @abstractmethod - def parse(self, file_path: str) -> dict: - pass - - def run(self, file_path: str) -> dict: - """ - 统一对外调用接口 - :param file_path: 需解析的文件路径 - :return: - """ - try: - result = self.parse(file_path) - except Exception as exc: - config_checking_print(f"{self.__class__} parsing error, skip file path: {file_path}, error: {exc}") - result = {} - return result - - -class JsonParser(Parser): - def parse(self, file_path: str) -> dict: - with open(file_path, 'r') as f: - return json.load(f) - - -class IniParser(Parser): - def parse(self, file_path: str) -> dict: - config = configparser.ConfigParser() - config.read(file_path) - return {section: dict(config.items(section)) for section in config.sections()} - - -class YamlParser(Parser): - def parse(self, file_path: str) -> dict: - with open(file_path, 'r') as f: - cont = f.read() - return yaml.safe_load(cont) - - -class XmlParser(Parser): - def parse(self, file_path: str) -> dict: - tree = ET.parse(file_path) - root = tree.getroot() - return self._element_to_dict(root) - - def _element_to_dict(self, element): - return { - element.tag: {child.tag: self._element_to_dict(child) for child in element} - if list(element) - else element.text - } - - -class ParserFactory: - __ParserDict = { - '.json': JsonParser(), - '.ini': IniParser(), - '.yaml': YamlParser(), - '.yml': YamlParser(), - '.xml': XmlParser() - } - - def get_parser(self, file_type: str) -> Parser: - parser = self.__ParserDict[file_type] - if not parser: - raise ValueError(f'Invalid parser type: {file_type}') - return parser - - -class ConfigComparator: - def __init__(self): - self.parser_factory = ParserFactory() - - @staticmethod - def __check_diff(diff): - for _, val in diff.items(): - if len(val) != 0: - return True - return False - - @staticmethod - def __file_extension(filepath): - return Path(filepath).suffix - - def compare(self, file_a, file_b): - tya = self.__file_extension(file_a) - tyb = self.__file_extension(file_b) - if tya != tyb: - raise ValueError(f'File extensions do not match: {tya} != {tyb}') - diff = self.__compare_files(file_a, tya, file_b, tyb) - return diff, self.__check_diff(diff) - - def __compare_files(self, file_path1: str, file_type1: str, file_path2: str, file_type2: str) -> dict: - """ - Compare two config files. - Args: - Returns: - dict: a set of AUD changes of comparison of obj1 and obj2 - """ - parser1 = self.parser_factory.get_parser(file_type1) - parser2 = self.parser_factory.get_parser(file_type2) - - config1 = parser1.run(file_path1) - config2 = parser2.run(file_path2) - - return compare_json(config1, config2) diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/hash.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/hash.py deleted file mode 100644 index de073d9f3b1..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/hash.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import hashlib -import os -from concurrent.futures import ThreadPoolExecutor - -BLOCK_SIZE = 64 << 20 # 64MB -MAX_THREAD_WORKERS = 16 - - -def __hash_block(block_data): - """ - 计算每一个块的hash - :param block_data: - :return: - """ - hasher = hashlib.blake2b() - hasher.update(block_data) - return hasher.digest() - - -def calculate_hash(file_path, max_workers=MAX_THREAD_WORKERS): - file_size = os.path.getsize(file_path) - num_blocks = (file_size + BLOCK_SIZE + 1) // BLOCK_SIZE - - with open(file_path, 'rb') as f, ThreadPoolExecutor(max_workers=max_workers) as exe: - futures = [] - for _ in range(num_blocks): - block_data = f.read(BLOCK_SIZE) - futures.append(exe.submit(__hash_block, block_data)) - block_hashes = [ft.result() for ft in futures] - - # merge combination of hash list - final_hasher = hashlib.blake2b() - for blk in block_hashes: - final_hasher.update(blk) - - return final_hasher.hexdigest() - - -def string_hash(input_str): - return hashlib.sha256(input_str.encode('utf-8')).hexdigest() - - -def bytes_hash(obj: bytes): - hex_dig = hashlib.sha256(obj).hexdigest() - short_hash = int(hex_dig, 16) % (2 ** 16) - return short_hash diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/packing.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/packing.py deleted file mode 100644 index 0b62836d6fd..00000000000 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/packing.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import zipfile -import hashlib -import multiprocessing - -from msprobe.pytorch.config_checking.utils.hash import string_hash -from msprobe.pytorch.config_checking.utils.utils import config_checking_print - - -proc_lock = multiprocessing.Lock() - - -def add_file_to_zip(zip_file_path, file_path, arc_path=None): - """ - Add a file to a ZIP archive. - - :param zip_file_path: Path to the ZIP archive - :param file_path: Path to the file to add - :param arc_path: Optional path inside the ZIP archive where the file should be added - """ - proc_lock.acquire() - with zipfile.ZipFile(zip_file_path, 'a') as zip_file: - zip_file.write(file_path, arc_path) - proc_lock.release() - - -def create_file_in_zip(zip_file_path, file_name, content): - """ - Create a file with content inside a ZIP archive. - - :param zip_file_path: Path to the ZIP archive - :param file_name: Name of the file to create - :param content: Content to write to the file - """ - proc_lock.acquire() - with zipfile.ZipFile(zip_file_path, 'a') as zip_file: - zip_info = zipfile.ZipInfo(file_name) - zip_info.compress_type = zipfile.ZIP_DEFLATED - zip_file.writestr(zip_info, content) - proc_lock.release() - - -def extract_zip(zip_file_path, extract_dir): - """ - Extract the contents of a ZIP archive to a specified directory. - - :param zip_file_path: Path to the ZIP archive - :param extract_dir: Directory to extract the contents to - """ - with zipfile.ZipFile(zip_file_path, 'r') as zip_file: - zip_file.extractall(extract_dir) - - -class DirPacker: - def __init__(self, root_dir, zip_file, result_dirname): - self.root_dir = root_dir - self.zip_file = zip_file - self.result_dirname = result_dirname - self.zip_handler = zipfile.ZipFile(zip_file, 'a') - self.parse_directory() - self.close_zip() - - def parse_directory(self): - for root, _, files in os.walk(self.root_dir): - for file in files: - file_path = os.path.join(root, file) - rel_path = os.path.relpath(file_path, self.root_dir) - if any(part.startswith('.') or part.startswith('__') for part in rel_path.split(os.sep)): - continue # skip hidden file - file_size = os.path.getsize(file_path) - try: - if file_size < 10 * 1024 * 1024: # 10MB - self.add_file_to_zip(file_path) - else: - self.add_hash_file_to_zip(file_path) - except Exception as e: - config_checking_print(f"add file {file_path} to zip error") - raise e - - def add_file_to_zip(self, file_path): - rel_path = os.path.relpath(file_path, self.root_dir) - target_file_path = os.path.join(self.result_dirname, rel_path) - self.zip_handler.write(file_path, target_file_path) - - def add_hash_file_to_zip(self, file_path): - rel_path = os.path.relpath(file_path, self.root_dir) - hash_file_path = f"{rel_path}.hash" - target_file_path = os.path.join(self.result_dirname, hash_file_path) - with open(file_path, 'rb') as f: - file_hash = string_hash(f.read()) - - zip_info = zipfile.ZipInfo(target_file_path) - self.zip_handler.writestr(zip_info, file_hash) - - def close_zip(self): - self.zip_handler.close() diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/utils.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/utils.py index b0cdcc72e96..5b67d229836 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/utils/utils.py @@ -15,31 +15,12 @@ import os import re -import json +import hashlib -import yaml import torch -import torch.distributed as dist -from msprobe.pytorch.config_checking.utils.hash import bytes_hash -from msprobe.core.common.file_utils import FileOpen - - -def load_txt(file_path): - with FileOpen(file_path, 'r', encoding='utf-8') as file: - return file.read() - - -def load_yaml(file_path): - with FileOpen(file_path, 'r', encoding='utf-8') as file: - output = yaml.safe_load(file) - return output - - -def load_json(file_path): - with FileOpen(file_path, 'r', encoding='utf-8') as file: - output = json.load(file) - return output +from msprobe.core.common.file_utils import load_json +from msprobe.pytorch.common.log import logger def merge_keys(dir_0, dir_1): @@ -61,19 +42,8 @@ def compare_dict(bench_dict, cmp_dict): return result -def write_list_to_file(result_list, filepath): - with FileOpen(filepath, 'w', encoding='utf-8') as file: - for item in result_list: - file.write(item + '\n') - - -def write_content_to_file(data, filepath): - with FileOpen(filepath, 'w', encoding='utf-8') as file: - file.write(data) - - def config_checking_print(msg): - print(f"[config checking log] {msg}") + logger.info(f"[config checking log] {msg}") def tensor_to_hash(tensor): @@ -82,25 +52,18 @@ def tensor_to_hash(tensor): return bytes_hash(tensor_bytes) -features = { - "hash": tensor_to_hash, - "max": lambda x: torch.max(x).item(), - "min": lambda x: torch.min(x).item(), - "mean": lambda x: torch.mean(x).item(), - "norm": lambda x: torch.norm(x).item(), -} - - def get_tensor_features(tensor): + features = { + "hash": tensor_to_hash, + "max": lambda x: torch.max(x).item(), + "min": lambda x: torch.min(x).item(), + "mean": lambda x: torch.mean(x).item(), + "norm": lambda x: torch.norm(x).item(), + } + if not tensor.is_floating_point() or tensor.dtype == torch.float64: tensor = tensor.float() - return {key: features[key](tensor) for key in features} - - -def get_rank(): - if dist.is_initialized(): - return dist.get_rank() - return 0 + return {key: features.get(key)(tensor) for key in features} def read_rank_result_to_dict(dirpath): @@ -140,3 +103,9 @@ def compare_dicts(dict1, dict2, path=''): added.append(f"[Added]: {path + key}") result[key] = "[added]" return deleted, added, changed, result + + +def bytes_hash(obj: bytes): + hex_dig = hashlib.sha256(obj).hexdigest() + short_hash = int(hex_dig, 16) % (2 ** 16) + return short_hash diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py index 8275bcb7182..9c3f98e6210 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py @@ -1,5 +1,6 @@ import os import random +import shutil import unittest import torch import json @@ -7,7 +8,7 @@ import numpy as np import torch.nn as nn from multiprocessing import Process from time import sleep -from msprobe.pytroch.config_checking.config_checker import ConfigChecker +from msprobe.pytorch.config_checking.config_checker import ConfigChecker testdir = os.path.dirname(__file__) config_checking_dir = os.path.dirname(testdir) @@ -48,8 +49,8 @@ def get_test_model(): return test_module -@unittest.mock.patch("msprobe.pytroch.config_checking.checkers.pip_checker.collect_pip_data") -@unittest.mock.patch("msprobe.pytroch.config_checking.checkers.env_args_checker.collect_env_data") +@unittest.mock.patch("msprobe.pytorch.config_checking.checkers.pip_checker.collect_pip_data") +@unittest.mock.patch("msprobe.pytorch.config_checking.checkers.env_args_checker.collect_env_data") def train_test(config_dict, seed, mock_env, mock_pip): mock_env.return_value = {"HCCL_DETERMINISTIC": False} if seed == 1234: @@ -65,7 +66,6 @@ def train_test(config_dict, seed, mock_env, mock_pip): config_path = os.path.join(temp_dir, "config.json") json.dump(config_dict, open(config_path, 'w', encoding='utf-8')) ConfigChecker(config_path, test_module) - os.remove(config_path) for input_data, label in get_test_dataset(): output = test_module(input_data) @@ -82,6 +82,9 @@ def process_train_test(config_dict, seed=1234): class TestConfigChecker(unittest.TestCase): + def tearDown(self): + shutil.rmtree(temp_dir) + def test_all(self): config_dict1 = { "env args": True, @@ -104,4 +107,4 @@ class TestConfigChecker(unittest.TestCase): compare_output_dir = os.path.join(temp_dir, "compare_output", "output") with open(os.path.join(compare_output_dir, "pip_data_check_result.txt"), 'r', encoding='utf-8') as file: lines = file.readlines() - self.assertEqual(lines[1], " package_name:transformers, npu_version:0.0.1, bench_version:0.0.2\n") \ No newline at end of file + self.assertEqual(lines[1], " package_name:transformers, npu_version:0.0.1, bench_version:0.0.2\n") -- Gitee From ea94231b5a4b40321741fc89109d7cb30d3e985b Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 10 Mar 2025 19:18:37 +0800 Subject: [PATCH 240/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81parameters=5Fgrad=E4=BF=A1=E6=81=AF?= =?UTF-8?q?=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/visualization/builder/graph_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py index a5ace4001c8..1f70fb7d8ae 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py @@ -247,7 +247,7 @@ class GraphBuilder: """ prefixes = [] suffix = Const.SEP + Const.PARAMS_GRAD - for node_id, data in data_dict.items(): + for node_id in data_dict.keys(): if node_id not in graph.node_map and node_id.endswith(suffix): prefixes.append(node_id.replace(suffix, '')) -- Gitee From c85f90e5f61c0a45b1f7c7c92c479d724a2d7f37 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 10 Mar 2025 19:08:29 +0800 Subject: [PATCH 241/333] add notes --- debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index ba76836cb5d..96de730260d 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -411,3 +411,6 @@ ops: - adaptive_avg_pool2d - adaptive_avg_pool3d ``` +### 9.2 不支持模型 + +静态图场景L0级暂不支持Yi模型。 \ No newline at end of file -- Gitee From 7f55a5c28ce038bd9160a0e4aa5ec635bbbe0e8b Mon Sep 17 00:00:00 2001 From: minghangc <29514143@qq.com> Date: Mon, 10 Mar 2025 14:37:22 +0800 Subject: [PATCH 242/333] =?UTF-8?q?[feat][pre-research]=E3=80=91P2P?= =?UTF-8?q?=E7=AE=97=E5=AD=90=E9=85=8D=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprof_analyze/cluster_analyse/README.md | 3 +- .../common_func/table_constant.py | 18 ++ .../recipes/p2p_pairing/__init__.py | 14 + .../recipes/p2p_pairing/p2p_pairing.py | 243 ++++++++++++++++++ .../msprof_analyze/prof_common/constant.py | 44 +++- .../msprof_analyze/prof_common/db_manager.py | 15 ++ .../prof_exports/p2p_pairing_export.py | 71 +++++ 7 files changed, 406 insertions(+), 2 deletions(-) create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/__init__.py create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/p2p_pairing.py create mode 100644 profiler/msprof_analyze/prof_exports/p2p_pairing_export.py diff --git a/profiler/msprof_analyze/cluster_analyse/README.md b/profiler/msprof_analyze/cluster_analyse/README.md index dc5c801e537..5147fa65148 100644 --- a/profiler/msprof_analyze/cluster_analyse/README.md +++ b/profiler/msprof_analyze/cluster_analyse/README.md @@ -82,7 +82,8 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( | cluster_time_summary | 集群场景性能数据分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db和analysis.db文件。--export_type为db时,输出交付件cluster_analysis.db,db里面有ClusterTimeSummary,不支持导出notebook。 | 否 | | cluster_time_compare_summary | 集群场景性能数据对比分析,使用前集群数据必须先分析cluster_time_summary,需要配合--bp参数使用。输入性能数据需要基于cluster_analysis_output下的cluster_analysis.db文件。--export_type为db时,输出交付件cluster_analysis.db,db文件中有对比结果的表ClusterTimeCompareSummary,不支持导出notebook。 | 否 | | slow_rank_pp_stage | 集群场景性能数据pp stage通信对比分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。输入性能数据中MetaData表如果没有包含训练任务的并行策略,则需要通过--tp --pp --dp手动传入,数据类型为正整数。--export_type为db时,输出交付件cluster_analysis.db,db文件中有分析结果PPAnalysisResult和P2PAnalysisResult,不支持导出notebook。 | 否 | - + | p2p_pairing | 集群场景P2P算子生成全局关联索引,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。输出的关联索引会作为一个新的字段`opConnectionId`附在原性能数据ascend_pytorch_profiler_{rank_id}.db文件的`COMMUNICATION_OP`的表中。 | 否 | + --parallel_mode参数示例如下: ```bash diff --git a/profiler/msprof_analyze/cluster_analyse/common_func/table_constant.py b/profiler/msprof_analyze/cluster_analyse/common_func/table_constant.py index 27daae78cb9..3acb8713e21 100644 --- a/profiler/msprof_analyze/cluster_analyse/common_func/table_constant.py +++ b/profiler/msprof_analyze/cluster_analyse/common_func/table_constant.py @@ -39,3 +39,21 @@ class TableConstant: DST_RANK = "dst_rank" TRANSPORT_TYPE = "transport_type" OPNAME = "op_name" + + +class ProfilerTableConstant: + + # COMMUNICATION OP + OP_ID = "opId" + OP_NAME = "opName" + START_NS = "startNS" + END_NS = "endNS" + CONNECTION_ID = "connectionId" + GROUP_NAME = "groupName" + RELAY = "relay" + RETRY = "retry" + DATA_TYPE = "dataType" + ALG_TYPE = "algType" + COUNT = "count" + OP_TYPE = "opType" + WAIT_NS = "waitNS" diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/__init__.py b/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/__init__.py new file mode 100644 index 00000000000..a355e5a7f08 --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/p2p_pairing.py b/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/p2p_pairing.py new file mode 100644 index 00000000000..b3cce9d214e --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/p2p_pairing.py @@ -0,0 +1,243 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from json import JSONDecodeError + +import numpy as np +import pandas as pd + +from msprof_analyze.cluster_analyse.recipes.base_recipe_analysis import BaseRecipeAnalysis +from msprof_analyze.cluster_analyse.common_func.table_constant import ProfilerTableConstant +from msprof_analyze.prof_common.constant import Constant +from msprof_analyze.prof_common.db_manager import DBManager +from msprof_analyze.prof_common.file_manager import FileManager +from msprof_analyze.prof_common.logger import get_logger +from msprof_analyze.prof_exports.p2p_pairing_export import P2PPairingExport + + +logger = get_logger() + + +class P2PPairing(BaseRecipeAnalysis): + + P2P_OP_NAME_PATTERN = r"^hcom_([Ss]end|[Rr](ecv|eceive))__\d+_\d+_\d+$" + DOMAIN_ID_EXTRACT_PATTERN = r"__(\d+)_\d+_\d+" + RECEIVE_OP_MATCH_PATTERN = r"[Rr]ecv|[Rr]eceive" + VALID_DST_RANK_TASK_TYPE = [Constant.NOTIFY_RECORD, Constant.NOTIFY_WAIT] + # intermediate dataframe column names + COL_NAME_IS_UNIQUE_VALUE = "isUniqueValue" + COL_NAME_OP_DST_RANK = "opDstRank" + COL_NAME_DOMAIN_ID = "domainId" + COL_NAME_IS_RECEIVE = "isReceive" + COL_NAME_OP_NAMING_INDEX = "opNamingIndex" + # output column name + COL_NAME_P2P_CONNECTION_ID = "opConnectionId" + # export params + TARGET_TABLE_NAME = Constant.TABLE_COMMUNICATION_OP + + def __init__(self, params): + super().__init__(params) + logger.info("P2PPairing init.") + + @property + def base_dir(self): + return os.path.basename(os.path.dirname(__file__)) + + def run(self, context): + self.mapper_func(context) + logger.info("P2PPairing completed.") + + def update_connection_info_to_table(self, df_result, profiler_db_path): + """ + 将生成好的连接ID添加至COMMUNICATION OP表中,新增列`opConnectionId`。目前只处理Send和Recv算子,对应的opId会更新具体的连接ID, + 否则置空 + """ + conn, cursor = DBManager.create_connect_db(profiler_db_path) + ret = DBManager.check_columns_exist(cursor, self.TARGET_TABLE_NAME, {self.COL_NAME_P2P_CONNECTION_ID}) + if ret is None: + logger.error("Failed to connect to the database. Please check the database configurations") + return + if self.COL_NAME_P2P_CONNECTION_ID in ret: + logger.error(f"`{self.COL_NAME_P2P_CONNECTION_ID}` already exists in the {self.TARGET_TABLE_NAME}. " + f"Exiting to prevent result overwrite.") + return + DBManager.execute_sql( + conn, + f"ALTER TABLE {self.TARGET_TABLE_NAME} ADD COLUMN {self.COL_NAME_P2P_CONNECTION_ID} TEXT" + ) + DBManager.execute_sql( + conn, + f"UPDATE {self.TARGET_TABLE_NAME} SET {self.COL_NAME_P2P_CONNECTION_ID} = NULL" + ) + DBManager.executemany_sql( + conn, + f""" + UPDATE {self.TARGET_TABLE_NAME} + SET {self.COL_NAME_P2P_CONNECTION_ID} = ? + WHERE {ProfilerTableConstant.OP_ID} = ?;""", + [(row[self.COL_NAME_P2P_CONNECTION_ID], row[P2PPairingExport.CO_OP_NAME]) + for _, row in df_result.iterrows()] + ) + DBManager.destroy_db_connect(conn, cursor) + + def generate_p2p_connection_index(self, df): + """ + 生成每一个P2P的算子的对应连接ID,连接ID的生成规则按照`通信域_Send卡号_Recv卡号_算子index`。 + 其中通信域为通信域字符串的哈希值后三位表示;Send卡和Recv卡分别为这个通信域内的local rank号;算子index是这两张卡之间按时间线排序, + 出现Send和Recv算子已有的频次。比如说,一个算子的名称为`hcom_send_233_58_1`,自己在通信域内的rank号为0,对端的rank号为1;在这之前 + 并没有存在0卡向1卡的Send任务。因此生成的id为`233_0_1_0` + """ + df[self.COL_NAME_DOMAIN_ID] = df[P2PPairingExport.OP_NAME]. \ + str.extract(self.DOMAIN_ID_EXTRACT_PATTERN)[0] + df[self.COL_NAME_IS_RECEIVE] = df[P2PPairingExport.OP_NAME]. \ + str.contains(self.RECEIVE_OP_MATCH_PATTERN) + df.loc[ + df[self.COL_NAME_IS_RECEIVE], [P2PPairingExport.SRC_RANK, self.COL_NAME_OP_DST_RANK] + ] = df.loc[ + df[self.COL_NAME_IS_RECEIVE], [self.COL_NAME_OP_DST_RANK, P2PPairingExport.SRC_RANK] + ].values + df[self.COL_NAME_OP_NAMING_INDEX] = df.sort_values(by=[P2PPairingExport.START_TIME]). \ + groupby([P2PPairingExport.SRC_RANK, self.COL_NAME_OP_DST_RANK]).cumcount() + df[self.COL_NAME_P2P_CONNECTION_ID] = (df[self.COL_NAME_DOMAIN_ID].astype(str) + "_" + + df[P2PPairingExport.SRC_RANK].astype(str) + "_" + + df[self.COL_NAME_OP_DST_RANK].astype(str) + "_" + + df[self.COL_NAME_OP_NAMING_INDEX].astype(str)) + return df.reset_index() + + def fine_filtering_src_dst_ranks(self, df: pd.DataFrame): + """ + 精筛符合条件的数据: + 1、小算子任务包含了“Notify_Record”和“Notify_Wait”的数据 + 2、上一步得到的数据中对端卡号是否一致,如果不一致则会抛出warning + 3、步骤1得到数据中本端卡号是否一致,如果不一致则会报出error返回空值 + """ + df = df[df[P2PPairingExport.TASK_TYPE].isin(self.VALID_DST_RANK_TASK_TYPE)] + + def check_dst_rank_unique(group): + return group[P2PPairingExport.DST_RANK].nunique() == 1 + + unique_dst_rank: pd.DataFrame = (df.groupby(P2PPairingExport.OP_NAME) + .apply(check_dst_rank_unique, include_groups=False)) + + def get_dst_rank_value(group): + if group[P2PPairingExport.DST_RANK].nunique() == 1: + return group[P2PPairingExport.DST_RANK].iloc[0] + return np.nan + + dst_rank_value: pd.DataFrame = (df.groupby(P2PPairingExport.OP_NAME, group_keys=False). + apply(get_dst_rank_value, include_groups=False)) + + df = df.copy() + df[self.COL_NAME_IS_UNIQUE_VALUE] = df[P2PPairingExport.OP_NAME].map(unique_dst_rank) + df[self.COL_NAME_OP_DST_RANK] = df[P2PPairingExport.OP_NAME].map(dst_rank_value) + df[self.COL_NAME_OP_DST_RANK] = df[self.COL_NAME_OP_DST_RANK].fillna(Constant.INVALID_RANK_NUM) + df[self.COL_NAME_OP_DST_RANK] = df[self.COL_NAME_OP_DST_RANK].astype(df[P2PPairingExport.DST_RANK].dtype) + + check_dst_rank_unique_false: pd.DataFrame = df[~df[self.COL_NAME_IS_UNIQUE_VALUE]] + if not check_dst_rank_unique_false.empty: + logger.warning(f"There are communication op entries with multiple destination ranks! " + f"Please check the corresponding profiler database file.") + + df = df[df[self.COL_NAME_IS_UNIQUE_VALUE]] + + src_rank_unique_values: int = df[P2PPairingExport.SRC_RANK].nunique() + if src_rank_unique_values != 1: + logger.error(f"There are communication op entries with multiple source ranks! " + f"Please check the corresponding profiler database file.") + return None + return df.reset_index() + + def filter_data_by_group_name(self, df: pd.DataFrame): + """ + 初步筛选出目标数据: + 1、筛选出Send和Recv的算子 + 2、筛选出同一opId在COMMUNICATION OP中groupName和COMMUNICATION TASK INFO中groupName一致的数据 + """ + df = df[df[P2PPairingExport.OP_NAME].str.match(self.P2P_OP_NAME_PATTERN)] + filtered_df = df[df[P2PPairingExport.CO_GROUP_NAME] == df[P2PPairingExport.CTI_GROUP_NAME]] + anomaly_group_match = df[df[P2PPairingExport.CO_GROUP_NAME] != df[P2PPairingExport.CTI_GROUP_NAME]] + if not anomaly_group_match.empty: + logger.warning(f"Group name mismatch in {len(anomaly_group_match)} entries. Please check the" + f" profiler database in communication task info.") + return filtered_df.reset_index() + + def extract_pp_group_from_metadata(self, profiler_parent_path) -> any: + """ + 从profiler_metadata.json的文件中获取pp通信域的信息 + """ + metadata_path = os.path.join(profiler_parent_path, Constant.PROFILER_METADATA) + try: + if os.path.exists(metadata_path): + metadata = FileManager.read_json_file(metadata_path) + parallel_group_info: dict = metadata.get(Constant.PARALLEL_GROUP_INFO, None) if metadata else None + else: + raise FileNotFoundError(f"No `{Constant.PROFILER_METADATA}` found in {profiler_parent_path}.") + except (FileNotFoundError, JSONDecodeError) as e: + logger.error(f"Failed to load profiler metadata: {e}") + return None + + if parallel_group_info is None: + logger.error(f"No key name `{Constant.PARALLEL_GROUP_INFO}` found in {metadata_path}") + return None + + pp_group_info = [] + for name in parallel_group_info: + each_group_info: dict = parallel_group_info[name] + if each_group_info[Constant.GROUP_NAME] == Constant.PP: + pp_group_info.append(parallel_group_info[name]) + if not pp_group_info: + logger.error(f"No pipeline parallel info found in {metadata_path}") + return None + + return pp_group_info + + def _mapper_func(self, data_map, analysis_class): + profiler_db_path: str = data_map.get(Constant.PROFILER_DB_PATH) + profiler_parent_path: str = os.path.dirname(os.path.dirname(profiler_db_path)) + + df: pd.DataFrame = P2PPairingExport(profiler_db_path, analysis_class).read_export_db() + if df is None or df.empty: + logger.warning(f"There is no stats data in {profiler_db_path}.") + return None + + pp_group_info = self.extract_pp_group_from_metadata(profiler_parent_path) # 暂时没用到,预留给后续确认用全局rank + if pp_group_info is None: + logger.error(f"Cannot obtain pipeline parallel info from the metadata. " + f"Please check the corresponding {Constant.PROFILER_METADATA}") + + df = self.filter_data_by_group_name(df) + if df.empty: + return None + + df_filtered = self.fine_filtering_src_dst_ranks(df.copy()) + if df_filtered is None: + logger.error("Got error when trying to match rank numbers!") + return None + + df_result = df_filtered.groupby([P2PPairingExport.OP_NAME, P2PPairingExport.CO_OP_NAME]).agg( + { + P2PPairingExport.START_TIME: "first", + P2PPairingExport.SRC_RANK: "first", + self.COL_NAME_OP_DST_RANK: "first" + } + ).reset_index() + + df_result = self.generate_p2p_connection_index(df_result) + + df_result = df_result[[P2PPairingExport.CO_OP_NAME, self.COL_NAME_P2P_CONNECTION_ID]] + + self.update_connection_info_to_table(df_result, profiler_db_path) + return data_map.get(Constant.RANK_ID) diff --git a/profiler/msprof_analyze/prof_common/constant.py b/profiler/msprof_analyze/prof_common/constant.py index 8aa499eef45..d77589dbcc0 100644 --- a/profiler/msprof_analyze/prof_common/constant.py +++ b/profiler/msprof_analyze/prof_common/constant.py @@ -114,6 +114,34 @@ class Constant(object): DB = "db" INVALID = "invalid" + # profiler db tables + TABLE_AICORE_FREQ = "AICORE_FREQ" + TABLE_CANN_API = "CANN_API" + TABLE_COMMUNICATION_OP = "COMMUNICATION_OP" + TABLE_COMMUNICATION_TASK_INFO = "COMMUNICATION_TASK_INFO" + TABLE_COMPUTE_TASK_INFO = "COMPUTE_TASK_INFO" + TABLE_CONNECTION_IDS = "CONNECTION_IDS" + TABLE_CONNECTION_CATS = "connectionCats" + TABLE_ENUM_API_TYPE = "ENUM_API_TYPE" + TABLE_ENUM_HCCL_DATA_TYPE = "ENUM_HCCL_DATA_TYPE" + TABLE_ENUM_HCCL_LINK_TYPE = "ENUM_HCCL_LINK_TYPE" + TABLE_ENUM_HCCL_RDMA_TYPE = "ENUM_HCCL_RDMA_TYPE" + TABLE_ENUM_TRANSPORT_TYPE = "ENUM_TRANSPORT_TYPE" + TABLE_ENUM_MODULE = "ENUM_MODULE" + TABLE_MSTX_EVENT_TYPE = "MSTX_EVENT_TYPE" + TABLE_HOST_INFO = "HOST_INFO" + TABLE_META_DATA = "META_DATA" + TABLE_NPU_INFO = "NPU_INFO" + TABLE_OVERLAP_ANALYSIS = "OVERLAP_ANALYSIS" + TABLE_PYTORCH_API = "PYTORCH_API" + TABLE_RANK_DEVICE_MAP = "RANK_DEVICE_MAP" + TABLE_SESSION_TIME_INFO = "SESSION_TIME_INFO" + TABLE_STATUS_INFO = "status_info" + TABLE_STEP_TIME = "STEP_TIME" + TABLE_STRING_IDS = "STRING_IDS" + TABLE_TASK = "TASK" + TABLE_TASK_MPU_INFO = "TASK_MPU_INFO" + # export_type NOTEBOOK = "notebook" @@ -139,6 +167,14 @@ class Constant(object): # metadata key DISTRIBUTED_ARGS = "distributed_args" + PARALLEL_GROUP_INFO = "parallel_group_info" + + # parallel_info_key + GROUP_NAME = "group_name" + GLOBAL_RANKS = "global_ranks" + + # group name value + PP = "pp" # mode ALL = "all" @@ -252,6 +288,10 @@ class Constant(object): VOID_STEP = -1 + # communication task type + NOTIFY_RECORD = "Notify_Record" + NOTIFY_WAIT = "Notify_Wait" + # advisor # timeline @@ -435,4 +475,6 @@ class Constant(object): # hccl_sum UINT32_BITS = 32 - UINT32_MASK = 0xffffffff \ No newline at end of file + UINT32_MASK = 0xffffffff + + INVALID_RANK_NUM = 4294967295 diff --git a/profiler/msprof_analyze/prof_common/db_manager.py b/profiler/msprof_analyze/prof_common/db_manager.py index f4012da13d8..8740499c27e 100644 --- a/profiler/msprof_analyze/prof_common/db_manager.py +++ b/profiler/msprof_analyze/prof_common/db_manager.py @@ -284,6 +284,21 @@ class DBManager: cls.insert_data_into_table(conn, table_name, data) cls.destroy_db_connect(conn, curs) + @classmethod + def check_columns_exist(cls, curs: any, table_name: str, columns: set) -> any: + """ + check columns exist in table, return empty set if none of them exist, else return the set of existing columns + """ + if not isinstance(curs, sqlite3.Cursor): + return None + try: + curs.execute(f"PRAGMA table_info({table_name})") + table_columns = {col[1] for col in curs.fetchall()} + return columns & table_columns + except sqlite3.Error as err: + logger.error(err) + return None + class CustomizedDictFactory: @staticmethod diff --git a/profiler/msprof_analyze/prof_exports/p2p_pairing_export.py b/profiler/msprof_analyze/prof_exports/p2p_pairing_export.py new file mode 100644 index 00000000000..2f6a7394261 --- /dev/null +++ b/profiler/msprof_analyze/prof_exports/p2p_pairing_export.py @@ -0,0 +1,71 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from string import Template + +from msprof_analyze.cluster_analyse.common_func.table_constant import TableConstant +from msprof_analyze.prof_exports.base_stats_export import BaseStatsExport + + +QUERY = Template(""" +SELECT + co.opName AS "$opNameId", + siii.value AS "$opName", + co.startNs AS "$startTime", + co.endNs AS "$endTime", + rdm.rankId AS "$globalRank", + cti.srcRank AS "$srcRank", + cti.dstRank AS "$dstRank", + siiii.value AS "$taskType", + sii.value AS "$coGroupName", + si.value AS "$ctiGroupName" +FROM + COMMUNICATION_TASK_INFO cti + LEFT JOIN COMMUNICATION_OP co on cti.opId = co.opId + CROSS JOIN RANK_DEVICE_MAP rdm + JOIN STRING_IDS si on cti.groupName = si.id + JOIN STRING_IDS sii on co.groupName = sii.id + JOIN STRING_IDS siii on co.opName = siii.id + JOIN STRING_IDS siiii on cti.taskType = siiii.id +""") + + +class P2PPairingExport(BaseStatsExport): + + CO_OP_NAME = "opNameId" + OP_NAME = "opName" + START_TIME = "startTime" + END_TIME = "endTime" + GLOBAL_RANK = "globalRank" + SRC_RANK = "srcRank" + DST_RANK = "dstRank" + TASK_TYPE = "taskType" + CO_GROUP_NAME = "coGroupName" + CTI_GROUP_NAME = "ctiGroupName" + + + def __init__(self, db_path, recipe_name): + super().__init__(db_path, recipe_name) + self._query = QUERY.safe_substitute( + opNameId=self.CO_OP_NAME, + opName=self.OP_NAME, + startTime=self.START_TIME, + endTime=self.END_TIME, + globalRank=self.GLOBAL_RANK, + srcRank=self.SRC_RANK, + dstRank=self.DST_RANK, + taskType=self.TASK_TYPE, + coGroupName=self.CO_GROUP_NAME, + ctiGroupName=self.CTI_GROUP_NAME + ) -- Gitee From 00bd948a4cfe0eefc86e298f757f5d84872aba17 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 09:59:54 +0800 Subject: [PATCH 243/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index ffc366699e1..e7617553c6a 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -23,7 +23,6 @@ from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, loa from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException from msprobe.core.data_dump.data_processor.mindspore_processor import MindsporeDataProcessor -import mindspore as ms class DataWriter: @@ -40,7 +39,6 @@ class DataWriter: self.cache_stack = {} self.cache_construct = {} self.cache_debug = {} - self.MindsporeDataProcessor = MindsporeDataProcessor self.stat_stack_list = [] @staticmethod -- Gitee From 7df95d7ca205cac5379463b3ac7054bb6b37aef1 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 10:04:04 +0800 Subject: [PATCH 244/333] Update pytorch_processor.py --- .../core/data_dump/data_processor/pytorch_processor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index b8437e9f10b..50567b28cb0 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -144,7 +144,7 @@ class PytorchDataProcessor(BaseDataProcessor): if data.is_meta: return tensor_stat data_clone = data.detach() - if data_clone.numel() == 0: + if not data_clone.numel() or not data_clone.data_ptr(): return tensor_stat else: if data_clone.device.type == Const.CPU_LOWERCASE or not async_dump: @@ -229,7 +229,7 @@ class PytorchDataProcessor(BaseDataProcessor): if isinstance(element, dist.ProcessGroup): return self._analyze_process_group(element) if isinstance(element, dist.P2POp): - return self._analyze_p2pop(element) + return self._analyze_p2pop(element, Const.SEP.join([str(suffix) for suffix in suffix_stack])) if isinstance(element, dist.ReduceOp): return self._analyze_reduce_op(element) converted_numpy, numpy_type = self._convert_numpy_to_builtin(element) @@ -248,10 +248,10 @@ class PytorchDataProcessor(BaseDataProcessor): module_input_output.update_output_with_args_and_kwargs() return super().analyze_forward_output(name, module, module_input_output) - def _analyze_p2pop(self, arg): + def _analyze_p2pop(self, arg, suffix): p2pop_info = {"class_type": "torch.distributed.P2POp"} try: - tensor_info = self._analyze_tensor(arg.tensor, []) + tensor_info = self._analyze_tensor(arg.tensor, suffix) p2pop_info.update({"tensor": tensor_info}) p2pop_info.update({"op": arg.op.__name__}) p2pop_info.update({"peer": arg.peer}) -- Gitee From 9cb727f94d3a4860d822b75654edd1b4f1c6836a Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 10:09:12 +0800 Subject: [PATCH 245/333] Update mindspore_processor.py --- .../core/data_dump/data_processor/mindspore_processor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 8a8aa5cc7fd..6fb36ee2167 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -79,10 +79,10 @@ class MindsporeDataProcessor(BaseDataProcessor): if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm - tensor_stat.max = mint.max(data).item() - tensor_stat.min = mint.min(data).item() - tensor_stat.mean = mint.mean(data).item() - tensor_stat.norm = get_norm_value(data).item() + tensor_stat.max = mint.max(data) + tensor_stat.min = mint.min(data) + tensor_stat.mean = mint.mean(data) + tensor_stat.norm = get_norm_value(data) return tensor_stat @staticmethod -- Gitee From e347e0998ad8e1a3cde080f190f47342cd90534f Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 11 Mar 2025 10:24:06 +0800 Subject: [PATCH 246/333] fix review --- .../api_accuracy_checker/compare/algorithm.py | 40 ++++++++++++++++++ .../run_ut/distributed_bench_function.py | 41 ++++++++----------- .../run_ut/distributed_function_registry.py | 26 ++++++------ .../run_ut/run_distributed_check.py | 33 +++++++-------- 4 files changed, 88 insertions(+), 52 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py index ddee254c2b1..965147232a4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py @@ -261,3 +261,43 @@ def compare_bool_tensor(bench_output, device_output): error_rate = float(error_nums / bench_output.size) result = CompareConst.PASS if error_rate == 0 else CompareConst.ERROR return error_rate, result, "" + + +def reduce_sum(tensors): + return torch.stack(tensors).sum(dim=0) + + +def reduce_product(tensors): + return torch.stack(tensors).prod(dim=0) + + +def reduce_min(tensors): + return torch.stack(tensors).min(dim=0).values + + +def reduce_max(tensors): + return torch.stack(tensors).max(dim=0).values + + +def reduce_band(tensors): + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor &= t + return reduce_tensor + + +def reduce_bor(tensors): + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor |= t + return reduce_tensor + + +def reduce_bxor(tensors): + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor ^= t + return reduce_tensor diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py index 5a0a274c730..e48c1cbf157 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py @@ -20,6 +20,19 @@ import torch from msprobe.core.common.const import DistributedCheckConst from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import get_distributed_args +from msprobe.pytorch.api_accuracy_checker.compare.algorithm import reduce_sum, reduce_product, reduce_min, \ + reduce_max, reduce_band, reduce_bor, reduce_bxor + + +reduce_ops = { + DistributedCheckConst.REDOPTYPE_SUM: reduce_sum, + DistributedCheckConst.REDOPTYPE_PRODUCT: reduce_product, + DistributedCheckConst.REDOPTYPE_MIN: reduce_min, + DistributedCheckConst.REDOPTYPE_MAX: reduce_max, + DistributedCheckConst.REDOPTYPE_BAND: reduce_band, + DistributedCheckConst.REDOPTYPE_BOR: reduce_bor, + DistributedCheckConst.REDOPTYPE_BXOR: reduce_bxor, +} def mock_broadcast(api_name, input_args, input_kwargs): @@ -55,30 +68,10 @@ def mock_reduce(api_name, input_args, input_kwargs): reduce_tensor = None if not tensors: return reduce_tensor - if reduce_op == DistributedCheckConst.RedOpType_SUM: - reduce_tensor = torch.stack(tensors).sum(dim=0) - elif reduce_op == DistributedCheckConst.RedOpType_PRODUCT: - reduce_tensor = torch.stack(tensors).prod(dim=0) - elif reduce_op == DistributedCheckConst.RedOpType_MIN: - reduce_tensor = torch.stack(tensors).min(dim=0).values - elif reduce_op == DistributedCheckConst.RedOpType_MAX: - reduce_tensor = torch.stack(tensors).max(dim=0).values - elif reduce_op == DistributedCheckConst.RedOpType_BAND: - reduce_tensor = tensors[0].clone() - if len(tensors) > 1: - for t in tensors[1:]: - reduce_tensor &= t - elif reduce_op == DistributedCheckConst.RedOpType_BOR: - reduce_tensor = tensors[0].clone() - if len(tensors) > 1: - for t in tensors[1:]: - reduce_tensor |= t - elif reduce_op == DistributedCheckConst.RedOpType_BXOR: - reduce_tensor = tensors[0].clone() - if len(tensors) > 1: - for t in tensors[1:]: - reduce_tensor ^= t - + if reduce_op not in reduce_ops: + raise ValueError(f"Unsupported reduce operation: {reduce_op}") + reduce_tensor = reduce_ops[reduce_op](tensors) + return reduce_tensor diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py index 86d552d81ad..9bd481e7143 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py @@ -39,6 +39,11 @@ class DistributedFunctionRegistry: def register_bench_function(self, api_name: str, function: Callable): self.bench_functions[api_name] = function + + def register_functions(self, functions_dict): + for api_name, (bench_function, compare_function) in functions_dict.items(): + self.register_bench_function(api_name, bench_function) + self.register_compare_function(api_name, compare_function) def get_compare_function(self, api_name: str) -> Callable: if not self.compare_functions.get(api_name): @@ -51,16 +56,13 @@ class DistributedFunctionRegistry: return self.bench_functions.get(api_name) +functions_dict = { + DistributedCheckConst.BROADCAST: (mock_broadcast, compare_broadcast), + DistributedCheckConst.ALL_REDUCE: (mock_reduce, compare_all_reduce), + DistributedCheckConst.SCATTER: (mock_scatter, compare_scatter), + DistributedCheckConst.ALL_GATHER: (mock_all_gather, compare_all_gather), + DistributedCheckConst.ALL_TO_ALL: (mock_all_to_all, compare_all_to_all), + DistributedCheckConst.ALL_TO_ALL_SINGLE: (mock_all_to_all_single, compare_all_to_all_single), +} distributed_func_registry = DistributedFunctionRegistry() -distributed_func_registry.register_bench_function(DistributedCheckConst.BROADCAST, mock_broadcast) -distributed_func_registry.register_compare_function(DistributedCheckConst.BROADCAST, compare_broadcast) -distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_REDUCE, mock_reduce) -distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_REDUCE, compare_all_reduce) -distributed_func_registry.register_bench_function(DistributedCheckConst.SCATTER, mock_scatter) -distributed_func_registry.register_compare_function(DistributedCheckConst.SCATTER, compare_scatter) -distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_GATHER, mock_all_gather) -distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_GATHER, compare_all_gather) -distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_TO_ALL, mock_all_to_all) -distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_TO_ALL, compare_all_to_all) -distributed_func_registry.register_bench_function(DistributedCheckConst.ALL_TO_ALL_SINGLE, mock_all_to_all_single) -distributed_func_registry.register_compare_function(DistributedCheckConst.ALL_TO_ALL_SINGLE, compare_all_to_all_single) +distributed_func_registry.register_functions(functions_dict) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 80975130899..59c7ead2cd8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -1,9 +1,7 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- # Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # @@ -15,39 +13,42 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import os import sys import time -import argparse from collections import namedtuple -import torch import torch_npu import torch.distributed as dist import torch.multiprocessing as mp - -from msprobe.core.common.file_utils import FileChecker, write_csv, create_directory from msprobe.core.common.const import Const, FileCheckConst, DistributedCheckConst, CompareConst +from msprobe.core.common.file_utils import FileChecker, write_csv, create_directory from msprobe.core.compare.utils import check_and_return_dir_contents -from msprobe.pytorch.hook_module.wrap_distributed import distributed_func -from msprobe.pytorch.pt_config import parse_json_config -from msprobe.pytorch.common.log import logger -from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.pytorch.api_accuracy_checker.common.config import CheckerConfig +from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import generate_device_params, get_group_info, \ is_port_in_use from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut import get_api_info from msprobe.pytorch.api_accuracy_checker.run_ut.distributed_function_registry import distributed_func_registry -from msprobe.pytorch.api_accuracy_checker.common.utils import extract_basic_api_segments -from msprobe.pytorch.api_accuracy_checker.common.config import CheckerConfig +from msprobe.pytorch.common.log import logger +from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward +from msprobe.pytorch.hook_module.api_register import get_api_register +from msprobe.pytorch.pt_config import parse_json_config + +api_register = get_api_register(return_new=True) +api_register.initialize_hook(None) +distribute_api_key = Const.PT_FRAMEWORK + Const.SEP + Const.PT_API_TYPE_DIST +distributed_func = api_register.ori_api_attr.get(distribute_api_key, {}) os.environ['HCCL_DETERMINISTIC'] = str(True) current_time = time.strftime("%Y%m%d%H%M%S") RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" RESULT_CSV_HEADER = [['API_NAME', 'RANK', 'COMPARE_RESULT', 'MESSAGE']] DistributedCheckParams = namedtuple("DistributedCheckParams", ["api_full_name", "all_args", "all_kwargs", - "group_ranks", "result_file_path", "checker_config"]) + "group_ranks", "result_file_path", "checker_config"]) special_rank_api_list = [DistributedCheckConst.SCATTER, DistributedCheckConst.ALL_TO_ALL, DistributedCheckConst.ALL_TO_ALL_SINGLE] @@ -150,8 +151,8 @@ def run_distributed_check(forward_contents, real_data_paths, result_file_path, c all_args, all_kwargs = get_distributed_args_kwargs(forward_contents, api_full_name, real_data_paths, group_ranks) try: - distributed_check_params = DistributedCheckParams(api_full_name, all_args, all_kwargs, group_ranks, - result_file_path, checker_config) + distributed_check_params = DistributedCheckParams(api_full_name, all_args, all_kwargs, group_ranks, + result_file_path, checker_config) distributed_check(distributed_check_params) except Exception as e: logger.error("The api {} in rank {} distributed check failed.".format(api_full_name, rank)) -- Gitee From f0a110befedaa9eccc687adc044008d8ffbd4e35 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 10:24:50 +0800 Subject: [PATCH 247/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index e7617553c6a..79ba11a2258 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -158,7 +158,8 @@ class DataWriter: # 在写 JSON 前,统一获取统计值 stat_result = self.flush_stat_stack() # 遍历 cache_data,将占位符替换为最终统计值 - self._replace_stat_placeholders(self.cache_data, stat_result) + if stat_result: + self._replace_stat_placeholders(self.cache_data, stat_result) if self.cache_data: self.write_data_json(self.dump_file_path) if self.cache_stack: @@ -172,7 +173,6 @@ class DataWriter: """ 递归搜索 data 中所有包含 'tensor_stat_index' 键的字典, 用 stat_result 中对应下标的统计值替换,并删除占位键。 - 同时打印调试信息,帮助检查哪些占位索引越界了。 """ if isinstance(data, dict): for key, value in list(data.items()): -- Gitee From 2701f8894bce97de73a58ddec5201ce66eee9203 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 11 Mar 2025 10:29:17 +0800 Subject: [PATCH 248/333] fix cleancode --- .../run_ut/distributed_function_registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py index 9bd481e7143..cd53e864d82 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py @@ -56,7 +56,7 @@ class DistributedFunctionRegistry: return self.bench_functions.get(api_name) -functions_dict = { +functions_map = { DistributedCheckConst.BROADCAST: (mock_broadcast, compare_broadcast), DistributedCheckConst.ALL_REDUCE: (mock_reduce, compare_all_reduce), DistributedCheckConst.SCATTER: (mock_scatter, compare_scatter), @@ -65,4 +65,4 @@ functions_dict = { DistributedCheckConst.ALL_TO_ALL_SINGLE: (mock_all_to_all_single, compare_all_to_all_single), } distributed_func_registry = DistributedFunctionRegistry() -distributed_func_registry.register_functions(functions_dict) +distributed_func_registry.register_functions(functions_map) -- Gitee From 8ee77140c835356e5fe510639ee92844c8607e65 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 10:32:20 +0800 Subject: [PATCH 249/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 79ba11a2258..1654ef7090e 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -178,14 +178,10 @@ class DataWriter: for key, value in list(data.items()): if key == "tensor_stat_index" and isinstance(value, int): idx = value - # 打印当前占位索引和统计列表长度 - print(f"DEBUG: Found tensor_stat_index = {idx}, stat_result length = {len(stat_result)}") if idx < len(stat_result): stat_values = stat_result[idx] - print(f"DEBUG: Replacing index {idx} with values: {stat_values}") data["Max"], data["Min"], data["Mean"], data["Norm"] = stat_values else: - print(f"ERROR: Index out of range! idx = {idx}, but stat_result length = {len(stat_result)}") data["Max"], data["Min"], data["Mean"], data["Norm"] = None, None, None, None del data["tensor_stat_index"] else: -- Gitee From 695d33102db6f6aa3c29fdf719b35b1459d8f732 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 11 Mar 2025 10:33:22 +0800 Subject: [PATCH 250/333] fix bug --- .../run_ut/distributed_function_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py index cd53e864d82..6758b4ff4f8 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_function_registry.py @@ -62,7 +62,7 @@ functions_map = { DistributedCheckConst.SCATTER: (mock_scatter, compare_scatter), DistributedCheckConst.ALL_GATHER: (mock_all_gather, compare_all_gather), DistributedCheckConst.ALL_TO_ALL: (mock_all_to_all, compare_all_to_all), - DistributedCheckConst.ALL_TO_ALL_SINGLE: (mock_all_to_all_single, compare_all_to_all_single), + DistributedCheckConst.ALL_TO_ALL_SINGLE: (mock_all_to_all_single, compare_all_to_all_single) } distributed_func_registry = DistributedFunctionRegistry() distributed_func_registry.register_functions(functions_map) -- Gitee From 4648c1a846646e99cc0b2d9063495184cff4bb7f Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 11 Mar 2025 10:36:44 +0800 Subject: [PATCH 251/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/docs/21.visualization_PyTorch.md | 2 +- .../accuracy_tools/msprobe/docs/22.visualization_MindSpore.md | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/21.visualization_PyTorch.md b/debug/accuracy_tools/msprobe/docs/21.visualization_PyTorch.md index b96331ad9e3..fe7a431a738 100644 --- a/debug/accuracy_tools/msprobe/docs/21.visualization_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/21.visualization_PyTorch.md @@ -470,7 +470,7 @@ yaml文件中只需配置待调试侧与标杆侧模型代码中功能一致但 ### 7.3 自定义映射文件(multi) 支持一对一、一对多、多对一、多对多节点映射配置,**多个节点使用英文逗号,分隔开**。 -配置多个节点时,如果待配置节点为Module.layer3.Linear.forward.0、Module.layer4.Linear.forward.0和Module.layer5.Linear.forward.0,则Module.layer4.Linear.forward.0无需配置,仅取首尾节点配置即可(Module.layer3.Linear.forward.0,Module.layer5.Linear.forward.0)。注意,**配置节点的先后顺序不能乱(construct.json中的节点名称顺序代表先后顺序)**,Module.layer3.Linear.forward.0在前,就不能配置成Module.layer5.Linear.forward.0,Module.layer3.Linear.forward.0,会导致配置无效。 +配置多个节点时,如果待配置节点为Module.layer3.Linear.forward.0、Module.layer4.Linear.forward.0和Module.layer5.Linear.forward.0,则Module.layer4.Linear.forward.0无需配置,仅取首尾节点配置即可(Module.layer3.Linear.forward.0,Module.layer5.Linear.forward.0)。注意,**配置节点的先后顺序不能乱(construct.json中的节点名称顺序代表先后顺序,请参考[dump结果文件介绍](./05.data_dump_PyTorch.md#3-dump-结果文件介绍))**,Module.layer3.Linear.forward.0在前,就不能配置成Module.layer5.Linear.forward.0,Module.layer3.Linear.forward.0,会导致配置无效。 ```yaml # 一对一 diff --git a/debug/accuracy_tools/msprobe/docs/22.visualization_MindSpore.md b/debug/accuracy_tools/msprobe/docs/22.visualization_MindSpore.md index 707b9e88c3a..4da58b028c6 100644 --- a/debug/accuracy_tools/msprobe/docs/22.visualization_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/22.visualization_MindSpore.md @@ -486,7 +486,7 @@ yaml文件中只需配置MindSpore与PyTorch模型代码中功能一致但名称 ### 7.3 自定义映射文件(multi) 支持一对一、一对多、多对一、多对多节点映射配置,**多个节点使用英文逗号,分隔开**。 -配置多个节点时,如果待配置节点为Cell.layer3.Linear.forward.0、Cell.layer4.Linear.forward.0和Cell.layer5.Linear.forward.0,则Cell.layer4.Linear.forward.0无需配置,仅取首尾节点配置即可(Cell.layer3.Linear.forward.0,Cell.layer5.Linear.forward.0)。注意,**配置节点的先后顺序不能乱(construct.json中的节点名称顺序代表先后顺序)**,Cell.layer3.Linear.forward.0在前,就不能配置成Cell.layer5.Linear.forward.0,Cell.layer3.Linear.forward.0,会导致配置无效。 +配置多个节点时,如果待配置节点为Cell.layer3.Linear.forward.0、Cell.layer4.Linear.forward.0和Cell.layer5.Linear.forward.0,则Cell.layer4.Linear.forward.0无需配置,仅取首尾节点配置即可(Cell.layer3.Linear.forward.0,Cell.layer5.Linear.forward.0)。注意,**配置节点的先后顺序不能乱(construct.json中的节点名称顺序代表先后顺序,请参考[dump结果文件介绍](./06.data_dump_MindSpore.md#82-动态图场景))**,Cell.layer3.Linear.forward.0在前,就不能配置成Cell.layer5.Linear.forward.0,Cell.layer3.Linear.forward.0,会导致配置无效。 ```yaml # 一对一 @@ -504,7 +504,6 @@ Cell.layer1.Linear.forward.0,Cell.layer2.Linear.forward.0: Cell.layer.Linear.for # 多对多 Cell.layer3.Linear.forward.0,Cell.layer5.Linear.forward.0: Cell.layer1.Linear.forward.0,Cell.layer2.Linear.forward.0 ``` - # FAQ 1. 图比对场景,节点呈现灰色,且没有精度比对数据,怎么处理? -- Gitee From 6ae38df8da6d610515eedd42e902915b34389cf1 Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 11 Mar 2025 10:53:20 +0800 Subject: [PATCH 252/333] fix bug --- .../pytorch/api_accuracy_checker/run_ut/run_distributed_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 59c7ead2cd8..5d4ad5203cc 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -213,6 +213,7 @@ def run_hccl(rank, distributed_config): device_args, _ = generate_device_params(rank_args, rank_kwargs, False, api_name) logger.info("Start to check distributed api {} in rank {}.".format(api_full_name, local_rank)) distributed_func.get(api_name)(*device_args) + dist.barrier() if api_name in special_rank_api_list: local_rank = rank compare_function = distributed_func_registry.get_compare_function(api_name) -- Gitee From 437a932e35493a967139f60d9b01d47ffc0d2c5c Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 11 Mar 2025 10:54:59 +0800 Subject: [PATCH 253/333] fix review --- debug/accuracy_tools/msprobe/core/common/const.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 943fdaef610..f55cb253942 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -744,13 +744,13 @@ class DistributedCheckConst: BROADCAST_SRC_INDEX = 1 FIRST_TENSOR_INDEX = 0 - RedOpType_SUM = "RedOpType.SUM" - RedOpType_PRODUCT = "RedOpType.PRODUCT" - RedOpType_MIN = "RedOpType.MIN" - RedOpType_MAX = "RedOpType.MAX" - RedOpType_BAND = "RedOpType.BAND" - RedOpType_BOR = "RedOpType.BOR" - RedOpType_BXOR = "RedOpType.BXOR" + REDOPTYPE_SUM = "RedOpType.SUM" + REDOPTYPE_PRODUCT = "RedOpType.PRODUCT" + REDOPTYPE_MIN = "RedOpType.MIN" + REDOPTYPE_MAX = "RedOpType.MAX" + REDOPTYPE_BAND = "RedOpType.BAND" + REDOPTYPE_BOR = "RedOpType.BOR" + REDOPTYPE_BXOR = "RedOpType.BXOR" API_ARGS_INDEX = { "broadcast": { -- Gitee From d65024ca74b6c4162d61efefd6f1dae02fdb3a8c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 14:36:07 +0800 Subject: [PATCH 254/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 1654ef7090e..a36f30aaea4 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -22,7 +22,6 @@ from msprobe.core.common.const import Const, FileCheckConst from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException -from msprobe.core.data_dump.data_processor.mindspore_processor import MindsporeDataProcessor class DataWriter: -- Gitee From a93e5f71a5e6b5fbc5782fdac4cba390f7a5e3fa Mon Sep 17 00:00:00 2001 From: gitee Date: Tue, 11 Mar 2025 14:51:34 +0800 Subject: [PATCH 255/333] fix param bug --- .../msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index c7a48844ee8..51fe32de810 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -319,7 +319,8 @@ def run_torch_api_online(api_full_name, api_data, backward_content): if kwargs.get("device"): del kwargs["device"] - device_out = exec_api(api_type, api_name, Const.CUDA_LOWERCASE, args, kwargs) + device_exec_params = ExecParams(api_type, api_name, current_device, args, kwargs, False, None) + device_out = exec_api(device_exec_params) device_out = move2device_exec(device_out, "cpu") return UtDataInfo(None, None, out, device_out, None, in_fwd_data_list, None, rank=api_data.rank) -- Gitee From 9bd876b81bd7b3327f4e9304d80d3e0235410ec6 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 11 Mar 2025 15:57:21 +0800 Subject: [PATCH 256/333] Update file_utils.py --- .../msprobe/core/common/file_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/file_utils.py b/debug/accuracy_tools/msprobe/core/common/file_utils.py index fdc626ca6a1..39389c2fae5 100644 --- a/debug/accuracy_tools/msprobe/core/common/file_utils.py +++ b/debug/accuracy_tools/msprobe/core/common/file_utils.py @@ -392,14 +392,14 @@ def load_json(json_path): def save_json(json_path, data, indent=None, mode="w"): check_path_before_create(json_path) json_path = os.path.realpath(json_path) - try: - with FileOpen(json_path, mode) as f: - fcntl.flock(f, fcntl.LOCK_EX) - json.dump(data, f, indent=indent) - fcntl.flock(f, fcntl.LOCK_UN) - except Exception as e: - logger.error(f'Save json file "{os.path.basename(json_path)}" failed.') - raise RuntimeError(f"Save json file {json_path} failed.") from e + # try: + with FileOpen(json_path, mode) as f: + fcntl.flock(f, fcntl.LOCK_EX) + json.dump(data, f, indent=indent) + fcntl.flock(f, fcntl.LOCK_UN) + # except Exception as e: + # logger.error(f'Save json file "{os.path.basename(json_path)}" failed.') + # raise RuntimeError(f"Save json file {json_path} failed.") from e change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) -- Gitee From c91aea39d7f0b014c53a6d7cbfa22a80c81968dc Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 11 Mar 2025 16:01:41 +0800 Subject: [PATCH 257/333] compare get_name_and_state indexerror protection --- debug/accuracy_tools/msprobe/core/common/utils.py | 1 + debug/accuracy_tools/msprobe/core/compare/utils.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index 38fcddfaead..5e4aed0e87d 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -75,6 +75,7 @@ class MsprobeBaseException(Exception): MERGE_COMPARE_RESULT_ERROR = 33 NAMES_STRUCTS_MATCH_ERROR = 34 INVALID_STATE_ERROR = 35 + INVALID_API_NAME_ERROR = 36 def __init__(self, code, error_info: str = ""): super(MsprobeBaseException, self).__init__() diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index e93ff775e78..a2335857c71 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -549,6 +549,9 @@ def get_name_and_state(name): return name.split(Const.PARAMS_GRAD)[0], Const.PARAMS_GRAD split = re.split(Const.REGEX_FORWARD_BACKWARD, name) + if len(split) < 3: + logger.error(f'Invalid name string: {name}, can not be split by forward/backward, please check.') + raise CompareException(CompareException.INVALID_API_NAME_ERROR) api = f'{split[0]}.{split[1]}.' state_str = split[2] match = re.match(r'^(\d+\.)?(input|output|kwargs|parameters)\..+$', state_str) -- Gitee From 3167600e6f7a9dda2ee40bee0c13c9f292e9d559 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Tue, 11 Mar 2025 16:39:35 +0800 Subject: [PATCH 258/333] =?UTF-8?q?=E9=9D=99=E6=80=81=E5=9B=BEL0=E7=BA=A7?= =?UTF-8?q?=E8=B7=A8=E6=A1=86=E6=9E=B6=E6=AF=94=E5=AF=B9=E9=80=82=E9=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/11.accuracy_compare_MindSpore.md | 28 ++++++++++--------- .../msprobe/mindspore/compare/ms_compare.py | 10 +++++-- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md b/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md index 1b1824a774f..3cfa6c2f8ed 100644 --- a/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md @@ -16,8 +16,10 @@ msprobe精度比对工具主要用于如下场景: - MindSpore与PyTorch跨框架比对 - 通过对同一个网络模型,在整网环境下分别在MindSpore动态图和PyTorch环境下获得API dump数据,以PyTorch数据作为标杆,进行自动比对,从而实现跨框架的精度对比。 - 通过对同一个网络模型,在整网环境下分别在MindSpore动态图和PyTorch环境下获得cell dump数据,由用户指定可以比对的cell list,以PyTorch数据作为标杆,进行自动比对,从而实现跨框架的精度对比。 + - 通过对同一个网络模型,在整网环境下分别在MindSpore静态图和PyTorch环境下获得cell dump数据,由用户指定可以比对的cell list,以PyTorch数据作为标杆,进行自动比对,从而实现跨框架的精度对比。 - 通过对同一个网络模型,在整网环境下分别在MindSpore动态图和PyTorch环境下获得API或模块dump数据,由用户指定可以比对的API或模块,以PyTorch数据作为标杆,进行自动比对,从而实现跨框架的精度对比。 - 通过对同一个网络模型,在整网环境下分别在MindSpore动态图和PyTorch环境下获得API或模块dump数据,由用户指定可以比对的模型代码中的Layer层,以PyTorch数据作为标杆,进行自动比对,从而实现跨框架的精度对比。 + - 通过对同一个网络模型,在整网环境下分别在MindSpore静态图和PyTorch环境下获得模块dump数据,由用户指定可以比对的模型代码中的Layer层,以PyTorch数据作为标杆,进行自动比对,从而实现跨框架的精度对比。 执行精度比对操作需要安装msprobe工具。详见《[MindStudio精度调试工具](../README.md)》的“工具安装”章节。 @@ -35,17 +37,17 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s **完整参数说明** -| 参数名 | 说明 | 是否必选 | -| -------------------- | ------------------------------------------------------------ | -------- | -| -i或--input_path | 指定比对文件。比对文件内容及示例请参见[比对文件](#31-比对文件)或[比对文件(kernel)](#32-比对文件kernel)(比对文件(kernel)仅[不同版本下的全量kernel比对](#23-不同版本下的全量kernel比对)场景支持)。 | 是 | +| 参数名 | 说明 | 是否必选 | +| -------------------- |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------- | +| -i或--input_path | 指定比对文件。比对文件内容及示例请参见[比对文件](#31-比对文件)或[比对文件(kernel)](#32-比对文件kernel)(比对文件(kernel)仅[不同版本下的全量kernel比对](#23-不同版本下的全量kernel比对)场景支持)。 | 是 | | -o或--output_path | 配置比对结果文件存盘目录,默认会在当前目录创建output目录。文件名称基于时间戳自动生成,格式为:
`compare_result_{timestamp}.xlsx`
`compare_result_{rank_id}_{step_id}_{timestamp}.xlsx`(仅[不同版本下的全量kernel比对](#23-不同版本下的全量kernel比对)场景支持)。 | 否 | -| -s或--stack_mode | 比对结果展示调用栈信息(NPU_Stack_Info)的开关,bool 类型。单卡场景开启时,需要使用[比对文件](#31-比对文件)的单卡场景配置stack_path指定stack.json文件,才能生成详细调用栈信息,否则在比对时会报错;暂不支持多卡场景。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | -| -c或--compare_only | 仅比对开关,bool 类型。该参数默认未配置,会启用自动精度分析,工具自动针对比对结果进行分析,识别到第一个精度可能不达标节点(在比对结果文件中的 Accuracy Reached or Not 列显示为 No),并给出问题可能产生的原因(打屏展示并生成 `advisor_{timestamp}.txt` 文件)。通过配置该参数取消自动精度分析,仅输出比对结果表格。 | 否 | -| -f或--fuzzy_match | 模糊匹配。开启后,对于网络中同一层级且命名仅调用次数不同的API,可匹配并进行比对。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | -| -am或--api_mapping | 跨框架比对。配置该参数时表示开启跨框架API比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(api_mapping)](#33-自定义映射文件api_mapping)。仅[跨框架的API比对](#25-跨框架的api比对)场景需要配置。 | 否 | -| -cm或--cell_mapping | 跨框架比对。配置该参数时表示开启跨框架cell模块比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(cell_mapping)](#34-自定义映射文件cell_mapping)。仅[跨框架的cell模块比对](#26-跨框架的cell模块比对)场景需要配置。 | 否 | -| -dm或--data_mapping | 同框架或跨框架比对。通过映射文件指定两个具体参数的对应关系,可以在L0、L1或mix采集场景下使用。配置该参数的同时需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(data_mapping)](#35-自定义映射文件data_mapping)。 | 否 | -| -lm或--layer_mapping | 跨框架比对。配置该参数时表示开启跨框架Layer层的比对功能,指定模型代码中的Layer层后,可以识别对应dump数据中的模块或API。需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(Layer_mapping)](#36-自定义映射文件layer_mapping)。仅[跨框架的Layer层比对](#27-跨框架的layer层比对)场景需要配置。 | 否 | +| -s或--stack_mode | 比对结果展示调用栈信息(NPU_Stack_Info)的开关,bool 类型。单卡场景开启时,需要使用[比对文件](#31-比对文件)的单卡场景配置stack_path指定stack.json文件,才能生成详细调用栈信息,否则在比对时会报错;暂不支持多卡场景。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | +| -c或--compare_only | 仅比对开关,bool 类型。该参数默认未配置,会启用自动精度分析,工具自动针对比对结果进行分析,识别到第一个精度可能不达标节点(在比对结果文件中的 Accuracy Reached or Not 列显示为 No),并给出问题可能产生的原因(打屏展示并生成 `advisor_{timestamp}.txt` 文件)。通过配置该参数取消自动精度分析,仅输出比对结果表格。 | 否 | +| -f或--fuzzy_match | 模糊匹配。开启后,对于网络中同一层级且命名仅调用次数不同的API,可匹配并进行比对;对于跨框架比对场景不再校验dtype与pytorch侧的一致性,可匹配并进行比对。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | +| -am或--api_mapping | 跨框架比对。配置该参数时表示开启跨框架API比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(api_mapping)](#33-自定义映射文件api_mapping)。仅[跨框架的API比对](#25-跨框架的api比对)场景需要配置。 | 否 | +| -cm或--cell_mapping | 跨框架比对。配置该参数时表示开启跨框架cell模块比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(cell_mapping)](#34-自定义映射文件cell_mapping)。仅[跨框架的cell模块比对](#26-跨框架的cell模块比对)场景需要配置。 | 否 | +| -dm或--data_mapping | 同框架或跨框架比对。通过映射文件指定两个具体参数的对应关系,可以在L0、L1或mix采集场景下使用。配置该参数的同时需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(data_mapping)](#35-自定义映射文件data_mapping)。 | 否 | +| -lm或--layer_mapping | 跨框架比对。配置该参数时表示开启跨框架Layer层的比对功能,指定模型代码中的Layer层后,可以识别对应dump数据中的模块或API。需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(Layer_mapping)](#36-自定义映射文件layer_mapping)。仅[跨框架的Layer层比对](#27-跨框架的layer层比对)场景需要配置。 | 否 | 动态图模式没有填写任何mapping时,按照同框架比对的方式进行比对,比对数据和标杆数据的Cell或Api名称需要完全相同才能匹配得上。 @@ -137,13 +139,13 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s 4. 执行如下示例命令进行比对: ```shell - msprobe -f mindspore compare -i ./compare.json -o ./output -s -cm + msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -cm ``` 或 ```shell - msprobe -f mindspore compare -i ./compare.json -o ./output -s -cm cell_mapping.yaml + msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -cm cell_mapping.yaml ``` cell_mapping.yaml文件配置请参见[自定义映射文件(cell_mapping)](#34-自定义映射文件cell_mapping)。 @@ -151,7 +153,7 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s 此外,也可以通过data_mapping.yaml文件实现具体参数的匹配,例: ```shell - msprobe -f mindspore compare -i ./compare.json -o ./output -s -dm data_mapping.yaml + msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -dm data_mapping.yaml ``` data_mapping.yaml的写法请参见[自定义映射文件(data_mapping)](#35-自定义映射文件data_mapping)。 diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 4f158512bb4..7811227e590 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -269,13 +269,19 @@ class MSComparator(Comparator): npu_df[CompareConst.COMPARE_SHAPE] = npu_df[Const.SHAPE] bench_df[CompareConst.COMPARE_KEY] = bench_df[CompareConst.OP_NAME] bench_df[CompareConst.COMPARE_SHAPE] = bench_df[Const.SHAPE] - match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY, CompareConst.COMPARE_SHAPE], - how='outer') + if self.fuzzy_match: + match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY], how='outer') + else: + match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY, CompareConst.COMPARE_SHAPE], + how='outer') match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) def gen_dtype_condition(): npu_dtype = match_result['dtype_x'] bench_dtype = match_result['dtype_y'] + if self.fuzzy_match: + true_condition = pd.Series(True, index=npu_dtype.index) + return true_condition if self.cross_frame: npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) -- Gitee From 6cd5f34f9005bae0b5b32550fb9eb0c0ebbf1262 Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Tue, 11 Mar 2025 23:11:55 +0800 Subject: [PATCH 259/333] check if registe_full_backward_hook is provided in msadapter --- .../accuracy_tools/msprobe/core/data_dump/api_registry.py | 4 ++-- debug/accuracy_tools/msprobe/mindspore/common/utils.py | 2 +- .../msprobe/mindspore/dump/hook_cell/api_register.py | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py b/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py index 27106044e1e..5aeafd573d6 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py @@ -90,8 +90,8 @@ class ApiWrapper: target_module = api_modules[0] if Const.SEP in api_name: sub_module_name, target_attr = api_name.rsplit(Const.SEP, 1) - target_module = getattr(api_modules[0], sub_module_name) - if target_attr in dir(target_module): + target_module = getattr(api_modules[0], sub_module_name, None) + if target_module and target_attr in dir(target_module): names.add(api_name) valid_names[api_type] = names api_names[framework] = valid_names diff --git a/debug/accuracy_tools/msprobe/mindspore/common/utils.py b/debug/accuracy_tools/msprobe/mindspore/common/utils.py index 6da6db0f830..625842da589 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/utils.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/utils.py @@ -179,7 +179,7 @@ def set_register_backward_hook_functions(): from msprobe.mindspore.mindtorch import (_call_impl, register_full_backward_pre_hook, register_full_backward_hook) - if not hasattr(torch, "register_full_backward_hook"): + if not hasattr(torch.nn.Module, "register_full_backward_hook"): setattr(torch.nn.Module, "_call_impl", _call_impl) setattr(torch.nn.Module, "register_full_backward_pre_hook", register_full_backward_pre_hook) setattr(torch.nn.Module, "register_full_backward_hook", register_full_backward_hook) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py index 0f4621b3a03..53271ff07be 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py @@ -27,6 +27,8 @@ from msprobe.mindspore.common.const import Const as MsConst from msprobe.mindspore.common.utils import is_mindtorch from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell + +cur_path = os.path.dirname(os.path.realpath(__file__)) if not is_mindtorch(): _api_types = { Const.MS_FRAMEWORK: { @@ -38,6 +40,7 @@ if not is_mindtorch(): Const.MS_API_TYPE_COM: (comm_func, (comm_func,)) } } + _supported_api_list_path = (os.path.join(cur_path, MsConst.SUPPORTED_API_LIST_FILE),) else: import torch import torch_npu @@ -50,7 +53,8 @@ else: Const.PT_API_TYPE_DIST: (torch.distributed, (torch.distributed, torch.distributed.distributed_c10d)) } } - + _supported_api_list_path = (os.path.join(cur_path, '../../../pytorch/hook_module', + MsConst.SUPPORTED_API_LIST_FILE),) _inner_used_api = { Const.MS_FRAMEWORK + Const.SEP + Const.MS_API_TYPE_OPS: ( @@ -64,8 +68,6 @@ _inner_used_api = { ) } -_supported_api_list_path = (os.path.join(os.path.dirname(os.path.realpath(__file__)), MsConst.SUPPORTED_API_LIST_FILE),) - class ApiTemplate(HOOKCell): def __init__(self, api_name, api_func, prefix, hook_build_func): -- Gitee From 6075f9ada1566b885b0746def9e649c842c04ca1 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Wed, 12 Mar 2025 09:38:04 +0800 Subject: [PATCH 260/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/mindspore/compare/ms_compare.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py index 7811227e590..843afa1a98f 100644 --- a/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py +++ b/debug/accuracy_tools/msprobe/mindspore/compare/ms_compare.py @@ -269,19 +269,14 @@ class MSComparator(Comparator): npu_df[CompareConst.COMPARE_SHAPE] = npu_df[Const.SHAPE] bench_df[CompareConst.COMPARE_KEY] = bench_df[CompareConst.OP_NAME] bench_df[CompareConst.COMPARE_SHAPE] = bench_df[Const.SHAPE] - if self.fuzzy_match: - match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY], how='outer') - else: - match_result = pd.merge(npu_df, bench_df, on=[CompareConst.COMPARE_KEY, CompareConst.COMPARE_SHAPE], - how='outer') + match_result = pd.merge(npu_df, bench_df, on=([CompareConst.COMPARE_KEY] if self.fuzzy_match + else [CompareConst.COMPARE_KEY, CompareConst.COMPARE_SHAPE]), + how='outer') match_result = match_result[match_result['op_name_x'].notna()].fillna(CompareConst.N_A) def gen_dtype_condition(): npu_dtype = match_result['dtype_x'] bench_dtype = match_result['dtype_y'] - if self.fuzzy_match: - true_condition = pd.Series(True, index=npu_dtype.index) - return true_condition if self.cross_frame: npu_dtype = npu_dtype.map(dtype_mapping).fillna(npu_dtype) @@ -294,7 +289,8 @@ class MSComparator(Comparator): ) return equal_condition | match_condition - match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A + if not self.fuzzy_match: + match_result.loc[~gen_dtype_condition(), [i + '_y' for i in bench_df.columns]] = CompareConst.N_A return self.make_result_df(match_result) def modify_compare_data_with_user_mapping(self, npu_df, bench_df): -- Gitee From aa3bb3e99a25398d427931aff8b6c10e45590131 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Wed, 12 Mar 2025 09:54:17 +0800 Subject: [PATCH 261/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=B5=84=E6=96=99?= =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/docs/11.accuracy_compare_MindSpore.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md b/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md index 3cfa6c2f8ed..c4e50f82bb7 100644 --- a/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/11.accuracy_compare_MindSpore.md @@ -43,7 +43,7 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s | -o或--output_path | 配置比对结果文件存盘目录,默认会在当前目录创建output目录。文件名称基于时间戳自动生成,格式为:
`compare_result_{timestamp}.xlsx`
`compare_result_{rank_id}_{step_id}_{timestamp}.xlsx`(仅[不同版本下的全量kernel比对](#23-不同版本下的全量kernel比对)场景支持)。 | 否 | | -s或--stack_mode | 比对结果展示调用栈信息(NPU_Stack_Info)的开关,bool 类型。单卡场景开启时,需要使用[比对文件](#31-比对文件)的单卡场景配置stack_path指定stack.json文件,才能生成详细调用栈信息,否则在比对时会报错;暂不支持多卡场景。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | | -c或--compare_only | 仅比对开关,bool 类型。该参数默认未配置,会启用自动精度分析,工具自动针对比对结果进行分析,识别到第一个精度可能不达标节点(在比对结果文件中的 Accuracy Reached or Not 列显示为 No),并给出问题可能产生的原因(打屏展示并生成 `advisor_{timestamp}.txt` 文件)。通过配置该参数取消自动精度分析,仅输出比对结果表格。 | 否 | -| -f或--fuzzy_match | 模糊匹配。开启后,对于网络中同一层级且命名仅调用次数不同的API,可匹配并进行比对;对于跨框架比对场景不再校验dtype与pytorch侧的一致性,可匹配并进行比对。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | +| -f或--fuzzy_match | 模糊匹配。开启后,对于跨框架比对场景不再校验dtype与pytorch侧的一致性,可匹配并进行比对。通过直接配置该参数开启,默认未配置,表示关闭。 | 否 | | -am或--api_mapping | 跨框架比对。配置该参数时表示开启跨框架API比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(api_mapping)](#33-自定义映射文件api_mapping)。仅[跨框架的API比对](#25-跨框架的api比对)场景需要配置。 | 否 | | -cm或--cell_mapping | 跨框架比对。配置该参数时表示开启跨框架cell模块比对功能,可以指定自定义映射文件*.yaml,不指定映射文件时按照msprobe定义的默认映射关系进行比对。自定义映射文件的格式请参见[自定义映射文件(cell_mapping)](#34-自定义映射文件cell_mapping)。仅[跨框架的cell模块比对](#26-跨框架的cell模块比对)场景需要配置。 | 否 | | -dm或--data_mapping | 同框架或跨框架比对。通过映射文件指定两个具体参数的对应关系,可以在L0、L1或mix采集场景下使用。配置该参数的同时需要指定自定义映射文件*.yaml。自定义映射文件的格式请参见[自定义映射文件(data_mapping)](#35-自定义映射文件data_mapping)。 | 否 | @@ -139,21 +139,26 @@ msprobe -f mindspore compare -i ./compare.json -o ./output -s 4. 执行如下示例命令进行比对: ```shell - msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -cm + msprobe -f mindspore compare -i ./compare.json -o ./output -s -cm ``` 或 ```shell - msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -cm cell_mapping.yaml + msprobe -f mindspore compare -i ./compare.json -o ./output -s -cm cell_mapping.yaml ``` cell_mapping.yaml文件配置请参见[自定义映射文件(cell_mapping)](#34-自定义映射文件cell_mapping)。 不传入cell_mapping.yaml的情况下仅将Cell改成Module后进行匹配;传入cell_mapping.yaml的情况下将按照cell_mapping.yaml的内容进行匹配。 + 如果跨框架比对场景不需要考虑dtype与pytorch侧的一致性,匹配并进行比对,可以开启-f或--fuzzy_match选项,例: + ```shell + msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -cm cell_mapping.yaml + ``` + 此外,也可以通过data_mapping.yaml文件实现具体参数的匹配,例: ```shell - msprobe -f mindspore compare -i ./compare.json -o ./output -s -f -dm data_mapping.yaml + msprobe -f mindspore compare -i ./compare.json -o ./output -s -dm data_mapping.yaml ``` data_mapping.yaml的写法请参见[自定义映射文件(data_mapping)](#35-自定义映射文件data_mapping)。 -- Gitee From e4d4165261f8ffe0d0bf68e9a486d5eed514fb65 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 12 Mar 2025 11:00:34 +0800 Subject: [PATCH 262/333] compare get_name_and_state indexerror protection --- debug/accuracy_tools/msprobe/core/compare/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index a2335857c71..93db9ff4251 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -545,6 +545,10 @@ def get_name_and_state(name): state type: input, output, kwargs, parameters, parameters_grad """ + if not isinstance(name, str): + logger.error(f'Invalid name string: {name}, type should be string, please check.') + raise CompareException(CompareException.INVALID_API_NAME_ERROR) + if Const.PARAMS_GRAD in name.split(Const.SEP): return name.split(Const.PARAMS_GRAD)[0], Const.PARAMS_GRAD -- Gitee From 5c6aa455eeeae9f55e8671f3558292fd988bff3e Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 12 Mar 2025 11:03:03 +0800 Subject: [PATCH 263/333] compare get_name_and_state indexerror protection --- debug/accuracy_tools/msprobe/core/compare/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/utils.py b/debug/accuracy_tools/msprobe/core/compare/utils.py index 93db9ff4251..66dc9ba94ee 100644 --- a/debug/accuracy_tools/msprobe/core/compare/utils.py +++ b/debug/accuracy_tools/msprobe/core/compare/utils.py @@ -546,7 +546,7 @@ def get_name_and_state(name): state type: input, output, kwargs, parameters, parameters_grad """ if not isinstance(name, str): - logger.error(f'Invalid name string: {name}, type should be string, please check.') + logger.error(f'Invalid name: {name}, type should be string, please check.') raise CompareException(CompareException.INVALID_API_NAME_ERROR) if Const.PARAMS_GRAD in name.split(Const.SEP): -- Gitee From 73b36039b2d353ef0262a03eb307a2a29890c6be Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 12 Mar 2025 11:06:09 +0800 Subject: [PATCH 264/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/pytorch_processor.py | 12 +++--- .../msprobe/core/data_dump/json_writer.py | 43 +++++++++++++------ 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 50567b28cb0..f1fbc9ea5fb 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -281,10 +281,10 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_json.update({"tensor_stat_index": placeholder_index}) tensor_json.update({"requires_grad": tensor.requires_grad}) if tensor_stat.max is not None: - if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max): + if torch.isinf(tensor_stat.max) or torch.isnan(tensor_stat.max): tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max") if tensor_stat.min is not None: - if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min): + if torch.isinf(tensor_stat.min) or torch.isnan(tensor_stat.min): tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min") else: @@ -317,7 +317,7 @@ class TensorDataProcessor(PytorchDataProcessor): saved_tensor = tensor.clone().contiguous().detach() save_pt(saved_tensor, file_path) return single_arg - + def _analyze_numpy(self, ndarray, suffix): dump_data_name, file_path = self.get_save_file_path(suffix) save_pt(torch.tensor(ndarray), file_path) @@ -417,8 +417,10 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): def _analyze_maybe_overflow_tensor(self, tensor_json): if tensor_json['Max'] is None or tensor_json['Min'] is None: return - self.has_overflow = np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']) or \ - np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min']) + + stat_stack_list + self.has_overflow = torch.isinf(tensor_json['Max']) or torch.isnan(tensor_json['Max']) or \ + torch.isinf(tensor_json['Min']) or torch.isnan(tensor_json['Min']) def _analyze_tensor(self, tensor, suffix): dump_data_name, file_path = self.get_save_file_path(suffix) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index a36f30aaea4..7b066a1a53c 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -50,7 +50,7 @@ class DataWriter: spawn_writer = csv.writer(csv_file) if not is_exists: spawn_writer.writerow(result_header) - spawn_writer.writerows([result,]) + spawn_writer.writerows([result, ]) is_new_file = not is_exists if is_new_file: change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) @@ -145,6 +145,7 @@ class DataWriter: return [] result = [ [ + x.cpu().detach().numpy().tolist() if hasattr(x, "cpu") else x.asnumpy().tolist() if hasattr(x, "asnumpy") else x for x in stat_values ] @@ -156,9 +157,11 @@ class DataWriter: def write_json(self): # 在写 JSON 前,统一获取统计值 stat_result = self.flush_stat_stack() + print(f"before:{self.cache_data}") # 遍历 cache_data,将占位符替换为最终统计值 if stat_result: self._replace_stat_placeholders(self.cache_data, stat_result) + print(f"after:{self.cache_data}") if self.cache_data: self.write_data_json(self.dump_file_path) if self.cache_stack: @@ -169,20 +172,36 @@ class DataWriter: self.write_debug_info_json(self.debug_file_path) def _replace_stat_placeholders(self, data, stat_result): - """ - 递归搜索 data 中所有包含 'tensor_stat_index' 键的字典, - 用 stat_result 中对应下标的统计值替换,并删除占位键。 - """ if isinstance(data, dict): - for key, value in list(data.items()): + keys = list(data.keys()) # 获取当前所有键 + for key in keys: # 避免遍历时修改字典 + value = data[key] if key == "tensor_stat_index" and isinstance(value, int): idx = value - if idx < len(stat_result): - stat_values = stat_result[idx] - data["Max"], data["Min"], data["Mean"], data["Norm"] = stat_values - else: - data["Max"], data["Min"], data["Mean"], data["Norm"] = None, None, None, None - del data["tensor_stat_index"] + stat_values = stat_result[idx] if idx < len(stat_result) else [None] * 4 + + # 构建新字段并删除旧键 + new_entries = { + "type": data["type"], + "dtype": data["dtype"], + "shape": data["shape"], + "Max": stat_values[0], + "Min": stat_values[1], + "Mean": stat_values[2], + "Norm": stat_values[3] + } + del data[key] + + # 重构字典顺序 + updated_dict = {} + # 先插入统计字段 + updated_dict.update(new_entries) + # 保留原字典其他字段(排除已删除的tensor_stat_index) + for k in data: + if k not in new_entries: + updated_dict[k] = data[k] + data.clear() + data.update(updated_dict) else: self._replace_stat_placeholders(value, stat_result) elif isinstance(data, list): -- Gitee From 875996bbf0b995f4e54c7fa46ec2a5bffb3fdcf9 Mon Sep 17 00:00:00 2001 From: pxp1 <958876660@qq.com> Date: Wed, 12 Mar 2025 10:57:53 +0800 Subject: [PATCH 265/333] =?UTF-8?q?=E4=B8=8D=E9=87=87=E9=9B=86fsdp?= =?UTF-8?q?=E5=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/pytorch/dump/module_dump/module_processer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py index b5ca1da461f..e2239316492 100644 --- a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py +++ b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py @@ -109,6 +109,8 @@ class ModuleProcesser: for name, module in modules_and_names: if module == model: continue + if module.__class__.__name__ == "FullyShardedDataParallel": + continue module_index = (index + Const.SEP) if index != "-1" else "" prefix_name = (BaseScope.Module_Type_Module + Const.SEP + module_index + name + Const.SEP + module.__class__.__name__ + Const.SEP) -- Gitee From ad915bab624fd041fccc68f0451fd43c617efcdc Mon Sep 17 00:00:00 2001 From: curry3 <485078529@qq.com> Date: Wed, 12 Mar 2025 11:43:12 +0800 Subject: [PATCH 266/333] =?UTF-8?q?=E3=80=90=E5=AE=89=E5=85=A8=E3=80=91?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=87=BD=E6=95=B0=E9=80=92=E5=BD=92=E6=B7=B1?= =?UTF-8?q?=E5=BA=A6=E5=88=A4=E6=96=AD=E5=92=8C=E5=8F=8D=E5=90=91hook?= =?UTF-8?q?=E6=A2=AF=E5=BA=A6=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/pytorch/dump/module_dump/module_processer.py | 2 ++ .../msprobe/pytorch/hook_module/hook_module.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py index b5ca1da461f..0cfb86629ba 100644 --- a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py +++ b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py @@ -17,6 +17,7 @@ from functools import wraps import torch from msprobe.core.common.const import Const +from msprobe.core.common.utils import recursion_depth_decorator from msprobe.core.data_dump.scope import BaseScope, ModuleRangeScope, MixRangeScope from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import replace_last_occurrence @@ -58,6 +59,7 @@ class ModuleProcesser: return clone_return_value_func @staticmethod + @recursion_depth_decorator("ModuleDump: ModuleProcesser.clone_if_tensor") def clone_if_tensor(result): if isinstance(result, torch.Tensor): return result.clone() diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py index 71dbfa8aeb1..1eba9897b08 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -110,6 +110,10 @@ class HOOKModule(nn.Module): return result else: return result + + if not (var.requires_grad and torch.is_grad_enabled()): + return result + grad_fn = var.grad_fn if grad_fn is not None: for hook in non_full_backward_hooks: -- Gitee From 4864a86ff3a0acb5a8adb5a3eeb4e89e5f1861ca Mon Sep 17 00:00:00 2001 From: qianggee Date: Wed, 12 Mar 2025 06:10:35 +0000 Subject: [PATCH 267/333] rename bit16 groups --- .../msprobe/pytorch/monitor/optimizer_collect.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py index b7eb4da9c66..8478ed329fa 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py @@ -206,16 +206,16 @@ class MegatronChainedMixPrecisionOptimizerMon(MixPrecisionOptimizerMon): class DeepSpeedZeroOptimizerStage0Mon(OptimizerMon): - def get_group_index(self, params2name, torch_opt): - fp16_groups = torch_opt.bf16_groups + def get_group_index(self, torch_opt): + bit16_groups = torch_opt.bf16_groups param2group = defaultdict() - for group_idx, fp16_group in enumerate(fp16_groups): - for param in fp16_group: + for group_idx, bit16_group in enumerate(bit16_groups): + for param in bit16_group: param2group[param] = group_idx return param2group def fetch_mv(self, monitor, torch_opt, params2name, name2indices=None): - param2group = self.get_group_index(params2name, torch_opt) + param2group = self.get_group_index(torch_opt) exp_avg_dict = defaultdict(float) exp_avg_sq_dict = defaultdict(float) update_dict = defaultdict() @@ -234,7 +234,7 @@ class DeepSpeedZeroOptimizerStage0Mon(OptimizerMon): continue start = hp_address.start numel = hp_address.numel - + if monitor.mv_distribution: exp_avg_dict[name] = state['exp_avg'].narrow(0, start, numel) exp_avg_sq_dict[name] = state['exp_avg_sq'].narrow(0, start, numel) @@ -260,7 +260,6 @@ class DeepSpeedZeroOptimizerStage0Mon(OptimizerMon): monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) - class DeepSpeedZeroOptimizerStage3Mon(OptimizerMon): def get_param_index(self, params2name, name2index, torch_opt): fp16_groups = torch_opt.fp16_partitioned_groups -- Gitee From 8bf8b16aff1835058918bc82afd02e8b1c67924d Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 12 Mar 2025 14:25:58 +0800 Subject: [PATCH 268/333] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=BA=A2=E5=87=BA?= =?UTF-8?q?=E6=A3=80=E6=B5=8Bbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_dump/data_processor/mindspore_processor.py | 6 +++--- .../data_dump/data_processor/pytorch_processor.py | 13 +++++++------ .../msprobe/core/data_dump/json_writer.py | 7 +++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 6fb36ee2167..02fb81afa95 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -259,11 +259,11 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor): self.cached_tensors_and_file_paths = {} def _analyze_maybe_overflow_tensor(self, tensor_json): - if tensor_json['Max'] is None: + if self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) is None: return - if np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']): + if ops.isinf(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])): self.has_overflow = True - if np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min']): + if ops.isinf(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])): self.has_overflow = True def _analyze_tensor(self, tensor, suffix): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index f1fbc9ea5fb..eb94cbfb6c0 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -65,7 +65,6 @@ class PytorchDataProcessor(BaseDataProcessor): "dtype": self.analyze_dtype_in_kwargs } self._async_dump_cache = {} - self.stat_stack_list = [] @staticmethod def get_md5_for_tensor(x): @@ -415,12 +414,15 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): raise RuntimeError(f"overflow check failed") from e def _analyze_maybe_overflow_tensor(self, tensor_json): - if tensor_json['Max'] is None or tensor_json['Min'] is None: + print(f"tensor_json['tensor_stat_index']:{tensor_json['tensor_stat_index']}") + print(f"self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']):{self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])}") + print(f"tensor_json:{tensor_json}") + if self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) is None or self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) is None : + return - stat_stack_list - self.has_overflow = torch.isinf(tensor_json['Max']) or torch.isnan(tensor_json['Max']) or \ - torch.isinf(tensor_json['Min']) or torch.isnan(tensor_json['Min']) + self.has_overflow = torch.isinf(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or torch.isnan(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or \ + torch.isinf(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) or torch.isnan(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) def _analyze_tensor(self, tensor, suffix): dump_data_name, file_path = self.get_save_file_path(suffix) @@ -434,7 +436,6 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): self._analyze_maybe_overflow_tensor(single_arg) return single_arg - class FreeBenchmarkDataProcessor(PytorchDataProcessor): def __init__(self, config, data_writer): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 7b066a1a53c..79c792561dc 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -136,6 +136,13 @@ class DataWriter: self.stat_stack_list.append(stat_vector) return len(self.stat_stack_list) - 1 + def get_buffer_values_max(self, index): + return self.stat_stack_list[index][0] + + def get_buffer_values_min(self, index): + print(f"self.stat_stack_list[index]:{self.stat_stack_list[index]}") + return self.stat_stack_list[index][1] + def flush_stat_stack(self): """ 在 flush 阶段,将所有存储的统计值从设备搬到 CPU, -- Gitee From ab6b3243fb8abf29c8573a8c26ebb48ee78777ea Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 12 Mar 2025 14:29:01 +0800 Subject: [PATCH 269/333] Update file_utils.py --- .../msprobe/core/common/file_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/file_utils.py b/debug/accuracy_tools/msprobe/core/common/file_utils.py index 39389c2fae5..fdc626ca6a1 100644 --- a/debug/accuracy_tools/msprobe/core/common/file_utils.py +++ b/debug/accuracy_tools/msprobe/core/common/file_utils.py @@ -392,14 +392,14 @@ def load_json(json_path): def save_json(json_path, data, indent=None, mode="w"): check_path_before_create(json_path) json_path = os.path.realpath(json_path) - # try: - with FileOpen(json_path, mode) as f: - fcntl.flock(f, fcntl.LOCK_EX) - json.dump(data, f, indent=indent) - fcntl.flock(f, fcntl.LOCK_UN) - # except Exception as e: - # logger.error(f'Save json file "{os.path.basename(json_path)}" failed.') - # raise RuntimeError(f"Save json file {json_path} failed.") from e + try: + with FileOpen(json_path, mode) as f: + fcntl.flock(f, fcntl.LOCK_EX) + json.dump(data, f, indent=indent) + fcntl.flock(f, fcntl.LOCK_UN) + except Exception as e: + logger.error(f'Save json file "{os.path.basename(json_path)}" failed.') + raise RuntimeError(f"Save json file {json_path} failed.") from e change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) -- Gitee From d5043b3ee3c159284ec10cd0da5346616c8a7d93 Mon Sep 17 00:00:00 2001 From: qianggee Date: Wed, 12 Mar 2025 06:39:10 +0000 Subject: [PATCH 270/333] add line --- .../accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py index 8478ed329fa..86b984af651 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py @@ -260,6 +260,7 @@ class DeepSpeedZeroOptimizerStage0Mon(OptimizerMon): monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) + class DeepSpeedZeroOptimizerStage3Mon(OptimizerMon): def get_param_index(self, params2name, name2index, torch_opt): fp16_groups = torch_opt.fp16_partitioned_groups -- Gitee From dc7c11363f6cd2ecd669822efb3e2dade6cb2219 Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 12 Mar 2025 16:05:46 +0800 Subject: [PATCH 271/333] fix safe problem --- .../api_accuracy_checker/run_ut/multi_run_ut.py | 3 +++ .../tensor_transport_layer/attl.py | 16 +++++++++++----- .../pytorch/online_dispatch/dump_compare.py | 9 +++++++-- .../msprobe/pytorch/online_dispatch/utils.py | 14 +++++++++----- .../run_ut/test_multi_run_ut.py | 2 +- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index 498102b475f..3eb7fc0df96 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -50,6 +50,9 @@ def split_json_file(input_file, num_splits, filter_api): backward_data[f"{data_name}.backward"] = backward_data.pop(data_name) input_data = load_json(input_file) + if "dump_data_dir" not in input_data.keys(): + logger.error("Invalid input file, 'dump_data_dir' field is missing") + raise CompareException("Invalid input file, 'dump_data_dir' field is missing") if input_data.get("data") is None: logger.error("Invalid input file, 'data' field is missing") raise CompareException("Invalid input file, 'data' field is missing") diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py index f31c29c6bb6..236c87b1105 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py @@ -27,6 +27,9 @@ from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.client import T from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.server import TCPServer from msprobe.core.common.file_utils import remove_path from msprobe.pytorch.common.utils import logger, save_api_data, load_api_data, save_pkl, load_pkl +from msprobe.core.common.const import Const +from msprobe.core.common.utils import CompareException + BufferType = Union[ApiData, Dict[str, Any], str] # Union[Tensor, Tuple[Optional[Tensor]]] @@ -168,19 +171,22 @@ class ATTL: return buffer -def move2device_exec(obj, device): +def move2device_exec(obj, device, depth=0): + if depth > Const.MAX_DEPTH: + logger.error("Maximum recursion depth exceeded") + raise CompareException(CompareException.RECURSION_LIMIT_ERROR) if isinstance(obj, (tuple, list)): - data_list = [move2device_exec(val, device) for val in obj] + data_list = [move2device_exec(val, device, depth=depth + 1) for val in obj] return data_list if isinstance(obj, list) else tuple(data_list) - if isinstance(obj, dict): - return {key: move2device_exec(val, device) for key, val in obj.items()} + if isinstance(obj, dict): + return {key: move2device_exec(val, device, depth=depth + 1) for key, val in obj.items()} elif isinstance(obj, torch.Tensor): obj = obj.detach() if obj.device.type != device: obj = obj.to(device) return obj elif "return_types" in str(type(obj)): - return move2device_exec(tuple(obj), device) + return move2device_exec(tuple(obj), device, depth=depth + 1) elif isinstance(obj, torch._C.device): return torch.device(device) else: diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py index b185bc1110d..810f1ea2756 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py @@ -19,6 +19,8 @@ import os from datetime import datetime, timezone import torch +from msprobe.core.common.const import Const +from msprobe.core.common.utils import CompareException from msprobe.core.common.file_utils import FileOpen, save_npy, save_json from msprobe.pytorch.common.log import logger @@ -91,10 +93,13 @@ def support_basic_type(data): return False -def dump_data(data, prefix, dump_path): +def dump_data(data, prefix, dump_path, depth=0): + if depth > Const.MAX_DEPTH: + logger.error(f'dump data depth exceeds max depth:{Const.MAX_DEPTH}') + raise CompareException(CompareException.RECURSION_LIMIT_ERROR) if isinstance(data, (tuple, list)) and data: for i, item in enumerate(data): - dump_data(item, "{}.{}".format(prefix, i), dump_path) + dump_data(item, "{}.{}".format(prefix, i), dump_path, depth=depth + 1) return elif support_basic_type(data): if isinstance(data, torch.Tensor) and data.is_meta: diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py index ae8b9435a34..acd630e8ef4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py @@ -26,7 +26,8 @@ except ImportError: else: pta_cpu_device = torch.device("cpu") -from msprobe.core.common.const import CompareConst +from msprobe.core.common.const import CompareConst, Const +from msprobe.core.common.utils import CompareException from msprobe.pytorch.common.log import logger cpu_device = torch._C.device("cpu") @@ -85,7 +86,10 @@ def get_callstack(): return callstack -def data_to_cpu(data, deep, data_cpu): +def data_to_cpu(data, deep, data_cpu, depth=0): + if depth > Const.MAX_DEPTH: + logger.error("Failed to convert data to cpu, depth exceeds max depth:{}".format(Const.MAX_DEPTH)) + raise CompareException(CompareException.RECURSION_LIMIT_ERROR) global cpu_device list_cpu = [] if isinstance(data, torch.Tensor): @@ -101,13 +105,13 @@ def data_to_cpu(data, deep, data_cpu): return tensor_copy elif isinstance(data, list): for v in data: - list_cpu.append(data_to_cpu(v, deep + 1, data_cpu)) + list_cpu.append(data_to_cpu(v, deep + 1, data_cpu, depth=depth + 1)) if deep == 0: data_cpu.append(list_cpu) return list_cpu elif isinstance(data, tuple): for v in data: - list_cpu.append(data_to_cpu(v, deep + 1, data_cpu)) + list_cpu.append(data_to_cpu(v, deep + 1, data_cpu, depth=depth + 1)) tuple_cpu = tuple(list_cpu) if deep == 0: data_cpu.append(tuple_cpu) @@ -115,7 +119,7 @@ def data_to_cpu(data, deep, data_cpu): elif isinstance(data, dict): dict_cpu = {} for k, v in data.items(): - dict_cpu[k] = data_to_cpu(v, deep + 1, data_cpu) + dict_cpu[k] = data_to_cpu(v, deep + 1, data_cpu, depth=depth + 1) if deep == 0: data_cpu.append(dict_cpu) return dict_cpu diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py index 1ad191a0d4e..8eb8fde4fdc 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py @@ -136,7 +136,7 @@ class TestMultiRunUT(unittest.TestCase): def setUp(self): self.test_json_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "dump.json") - self.test_data = {'data': {'key1': 'TRUE', 'key2': 'TRUE', 'key3': 'TRUE'}} + self.test_data = {'dump_data_dir': '/test', 'data': {'key1': 'TRUE', 'key2': 'TRUE', 'key3': 'TRUE'}} self.test_json_content = json.dumps(self.test_data) self.forward_split_files_content = [ {'key1': 'TRUE', 'key2': 'TRUE'}, -- Gitee From 5c5319c089d876c5311b4c828f002a4f252f24e0 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 12 Mar 2025 16:13:52 +0800 Subject: [PATCH 272/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9ms=E6=BA=A2=E5=87=BA?= =?UTF-8?q?=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_dump/data_processor/mindspore_processor.py | 13 +++++++++++-- .../msprobe/core/data_dump/json_writer.py | 1 - 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 02fb81afa95..96314f7aa98 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -258,12 +258,21 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor): f"current overflow times: {self.real_overflow_nums}.") self.cached_tensors_and_file_paths = {} + @staticmethod + def convert_to_numpy(self, value): + return value.asnumpy() if hasattr(value, "asnumpy") else value + def _analyze_maybe_overflow_tensor(self, tensor_json): if self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) is None: return - if ops.isinf(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])): + max_value = self.convert_to_numpy(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) + min_value = self.convert_to_numpy(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) + # if ops.isinf(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])): + if np.isinf(max_value) or np.isnan(max_value): self.has_overflow = True - if ops.isinf(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])): + + # if ops.isinf(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])): + if np.isinf(min_value) or np.isnan(min_value): self.has_overflow = True def _analyze_tensor(self, tensor, suffix): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 79c792561dc..f831c5d1535 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -132,7 +132,6 @@ class DataWriter: 直接使用 Python list 存储 stat_vector, 将 stat_vector 存入 self.stat_stack_list 的方式 """ - # stat_vector 是一个已经经过 ensure_nonzero_rank 包裹的 tensor 列表,形如 [max, min, mean, norm] self.stat_stack_list.append(stat_vector) return len(self.stat_stack_list) - 1 -- Gitee From cd2eb942ce9cb3c50f8db5c79dafb783b9ded536 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Thu, 13 Mar 2025 09:25:39 +0800 Subject: [PATCH 273/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E9=80=92=E5=BD=92=E9=99=90=E5=88=B6=E6=B7=B1=E5=BA=A6?= =?UTF-8?q?=EF=BC=8C=E6=A3=80=E6=B5=8B=E6=88=90=E7=8E=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/utils.py | 4 +- .../compare/test_mode_adapter.py | 3 +- .../visualization_ut/graph/test_base_node.py | 4 +- .../test/visualization_ut/graph/test_graph.py | 11 --- .../visualization/compare/graph_comparator.py | 80 ++++++++++--------- .../msprobe/visualization/graph/base_node.py | 9 ++- .../msprobe/visualization/graph/graph.py | 9 --- 7 files changed, 56 insertions(+), 64 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index 38fcddfaead..b4c567bcfa9 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -485,13 +485,13 @@ recursion_depth = defaultdict(int) # 装饰一个函数,当函数递归调用超过限制时,抛出异常并打印函数信息。 -def recursion_depth_decorator(func_info): +def recursion_depth_decorator(func_info, max_depth=Const.MAX_DEPTH): def decorator(func): @wraps(func) def wrapper(*args, **kwargs): func_id = id(func) recursion_depth[func_id] += 1 - if recursion_depth[func_id] > Const.MAX_DEPTH: + if recursion_depth[func_id] > max_depth: msg = f"call {func_info} exceeds the recursion limit." logger.error_log_with_exp( msg, diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py index 87d1f9ee5f0..4c38e4e6200 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py @@ -2,7 +2,8 @@ import json import unittest from unittest.mock import patch, MagicMock from msprobe.visualization.compare.mode_adapter import ModeAdapter -from msprobe.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.visualization.graph.base_node import BaseNode +from msprobe.visualization.graph.node_op import NodeOp from msprobe.visualization.utils import GraphConst, ToolTip from msprobe.core.common.const import CompareConst diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_base_node.py b/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_base_node.py index 480b95620e6..64b7101c6b0 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_base_node.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_base_node.py @@ -1,6 +1,6 @@ import unittest -from msprobe.visualization.graph.base_node import BaseNode, NodeOp -from msprobe.visualization.utils import GraphConst +from msprobe.visualization.graph.base_node import BaseNode +from msprobe.visualization.graph.node_op import NodeOp class TestBaseNode(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_graph.py b/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_graph.py index 81f9fdca527..24f39cbb808 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_graph.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/graph/test_graph.py @@ -55,17 +55,6 @@ class TestGraph(unittest.TestCase): self.assertIsNotNone(matched_node) self.assertEqual(ancestors, ['node_id_a']) - def test_dfs(self): - graph = Graph("model_name") - graph.add_node(NodeOp.module, "node_a") - graph.add_node(NodeOp.module, "node_b") - node_a = BaseNode(self.node_op, self.node_id) - result = {} - graph.dfs(node_a, result) - self.assertEqual(result, {'node_id': {'id': 'node_id', 'node_type': 0, 'data': {}, - 'output_data': {}, 'input_data': {}, 'upnode': 'None', 'subnodes': [], - 'matched_node_link': [], 'suggestions': {}, 'stack_info': []}}) - def test_split_nodes_by_micro_step(self): nodes = [BaseNode(NodeOp.module, 'a.forward.0'), BaseNode(NodeOp.module, 'a.backward.0'), BaseNode(NodeOp.api_collection, 'apis.0'), BaseNode(NodeOp.module, 'a.forward.1'), diff --git a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py index 902d721a8d1..3f695d23483 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py @@ -17,12 +17,14 @@ import re from msprobe.visualization.builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data from msprobe.visualization.utils import GraphConst, load_json_file, load_data_json_file, get_csv_df from msprobe.visualization.graph.graph import Graph, NodeOp -from msprobe.visualization.graph.node_colors import NodeColors from msprobe.visualization.compare.mode_adapter import ModeAdapter from msprobe.core.common.const import Const +from msprobe.core.common.utils import recursion_depth_decorator class GraphComparator: + MAX_DEPTH = 1000 + def __init__(self, graphs, dump_path_param, args, mapping_dict=None): self.graph_n = graphs[0] self.graph_b = graphs[1] @@ -41,7 +43,7 @@ class GraphComparator: else: self._compare_nodes(self.graph_n.root) self._postcompare() - + def add_compare_result_to_node(self, node, compare_result_list): """ 将比对结果添加到节点的输入输出数据中 @@ -66,43 +68,8 @@ class GraphComparator: self.ma.parse_result(node, [compare_in_dict, compare_out_dict])) node.data[GraphConst.JSON_INDEX_KEY] = precision_index node.data.update(other_dict) - - def _parse_param(self, dump_path_param, output_path): - self.dump_path_param = dump_path_param - self.output_path = output_path - compare_mode = get_compare_mode(self.dump_path_param) - self.ma = ModeAdapter(compare_mode) - self.data_n_dict = load_data_json_file(dump_path_param.get('npu_json_path')) - self.data_b_dict = load_data_json_file(dump_path_param.get('bench_json_path')) - self.stack_json_data = load_json_file(dump_path_param.get('stack_json_path')) - - def _postcompare(self): - self._handle_api_collection_index() - if not self.ma.compare_mode == GraphConst.REAL_DATA_COMPARE: - return - df = get_csv_df(True, self.ma.csv_data, self.ma.compare_mode) - df = run_real_data(self.dump_path_param, df, self.framework, True if self.mapping_dict else False) - compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} - for node in self.ma.compare_nodes: - precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) - node.data[GraphConst.JSON_INDEX_KEY] = precision_index - - def _handle_api_collection_index(self): - """ - api集合的指标, md5模式使用集合中所有api最小的指标,statistics和tensor模式使用集合中所有api最大的指标 - md5模式下指标为0代表最差,statistics和tensor模式下指标为1代表最差 - """ - for node in self.graph_n.root.subnodes: - if node.op == NodeOp.api_collection: - precision_index = GraphConst.MAX_INDEX_KEY if self.ma.compare_mode == GraphConst.MD5_COMPARE \ - else GraphConst.MIN_INDEX_KEY - for api in node.subnodes: - precision_index = min(precision_index, - api.data.get(GraphConst.JSON_INDEX_KEY, GraphConst.MAX_INDEX_KEY)) \ - if self.ma.compare_mode == GraphConst.MD5_COMPARE \ - else max(precision_index, api.data.get(GraphConst.JSON_INDEX_KEY, GraphConst.MIN_INDEX_KEY)) - node.data[GraphConst.JSON_INDEX_KEY] = precision_index + @recursion_depth_decorator('GraphComparator._compare_nodes', max_depth=MAX_DEPTH) def _compare_nodes(self, node_n): """ 递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 @@ -126,6 +93,7 @@ class GraphComparator: for subnode in node_n.subnodes: self._compare_nodes(subnode) + @recursion_depth_decorator('GraphComparator._compare_nodes_fuzzy', max_depth=MAX_DEPTH) def _compare_nodes_fuzzy(self, node_n): if node_n.op != NodeOp.function_api: # 模块经过模糊匹配 @@ -146,6 +114,42 @@ class GraphComparator: for sub_node in node_n.subnodes: self._compare_nodes_fuzzy(sub_node) + def _parse_param(self, dump_path_param, output_path): + self.dump_path_param = dump_path_param + self.output_path = output_path + compare_mode = get_compare_mode(self.dump_path_param) + self.ma = ModeAdapter(compare_mode) + self.data_n_dict = load_data_json_file(dump_path_param.get('npu_json_path')) + self.data_b_dict = load_data_json_file(dump_path_param.get('bench_json_path')) + self.stack_json_data = load_json_file(dump_path_param.get('stack_json_path')) + + def _postcompare(self): + self._handle_api_collection_index() + if not self.ma.compare_mode == GraphConst.REAL_DATA_COMPARE: + return + df = get_csv_df(True, self.ma.csv_data, self.ma.compare_mode) + df = run_real_data(self.dump_path_param, df, self.framework, True if self.mapping_dict else False) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.ma.compare_nodes: + precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + + def _handle_api_collection_index(self): + """ + api集合的指标, md5模式使用集合中所有api最小的指标,statistics和tensor模式使用集合中所有api最大的指标 + md5模式下指标为0代表最差,statistics和tensor模式下指标为1代表最差 + """ + for node in self.graph_n.root.subnodes: + if node.op == NodeOp.api_collection: + precision_index = GraphConst.MAX_INDEX_KEY if self.ma.compare_mode == GraphConst.MD5_COMPARE \ + else GraphConst.MIN_INDEX_KEY + for api in node.subnodes: + precision_index = min(precision_index, + api.data.get(GraphConst.JSON_INDEX_KEY, GraphConst.MAX_INDEX_KEY)) \ + if self.ma.compare_mode == GraphConst.MD5_COMPARE \ + else max(precision_index, api.data.get(GraphConst.JSON_INDEX_KEY, GraphConst.MIN_INDEX_KEY)) + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + def _get_and_add_result(self, node_n, node_b): compare_result_list = compare_node([node_n.id, node_b.id], [self.data_n_dict, self.data_b_dict], diff --git a/debug/accuracy_tools/msprobe/visualization/graph/base_node.py b/debug/accuracy_tools/msprobe/visualization/graph/base_node.py index 2642ff1e97e..fd1541b87bf 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph/base_node.py +++ b/debug/accuracy_tools/msprobe/visualization/graph/base_node.py @@ -12,10 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from msprobe.core.overflow_check.level import OverflowLevel -from msprobe.visualization.graph.node_op import NodeOp from msprobe.visualization.utils import GraphConst from msprobe.visualization.builder.msprobe_adapter import format_node_data, compare_data, compare_data_fuzzy +from msprobe.core.common.log import logger class BaseNode: @@ -114,7 +115,13 @@ class BaseNode: """ ancestors = [] current_node = self.upnode + seen_nodes = set() while current_node: + if current_node.id in seen_nodes: + logger.warning(f'Detected a cycle in the node structure and cannot get node ancestors, ' + f'current node is {current_node.id}.') + return [] + seen_nodes.add(current_node.id) ancestors.append(current_node.id) current_node = current_node.upnode return list(reversed(ancestors)) diff --git a/debug/accuracy_tools/msprobe/visualization/graph/graph.py b/debug/accuracy_tools/msprobe/visualization/graph/graph.py index 5ce12d1cadb..569d8ea21b5 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph/graph.py +++ b/debug/accuracy_tools/msprobe/visualization/graph/graph.py @@ -19,7 +19,6 @@ from msprobe.visualization.utils import GraphConst from msprobe.core.common.log import logger from msprobe.core.common.const import Const - MAX_RECUR_LEVEL = 100 @@ -67,7 +66,6 @@ class Graph: ancestors_b = node_b.get_ancestors() return node_b, ancestors_n, ancestors_b - @staticmethod def fuzzy_match(node_n, node_b): if not node_n or not node_b or not node_n.fuzzy_eq(node_b): @@ -76,13 +74,6 @@ class Graph: ancestors_b = node_b.get_ancestors() return node_b, ancestors_n, ancestors_b - @staticmethod - def dfs(node, result): - info = node.to_dict() - result[node.id] = info - for subnode in node.subnodes: - Graph.dfs(subnode, result) - @staticmethod def split_nodes_by_micro_step(nodes): """ -- Gitee From 826a3b5b9ca35e56c34d1bde6d972e4d88ab8d11 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 13 Mar 2025 09:34:45 +0800 Subject: [PATCH 274/333] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=BA=A2=E5=87=BA?= =?UTF-8?q?=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/mindspore_processor.py | 19 ++++++------- .../data_processor/pytorch_processor.py | 27 ++++++++++++++----- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 96314f7aa98..78dec13adc9 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -63,9 +63,8 @@ class MindsporeDataProcessor(BaseDataProcessor): def get_stat_info_sync(data): tensor_stat = TensorStatInfo() if data.dtype == ms.bool_: - # 如果是 bool 类型,可以直接用算子在 NPU 上运算 - tensor_stat.max = ops.ReduceMax()(data) - tensor_stat.min = ops.ReduceMin()(data) + tensor_stat.max = mint.any(data) + tensor_stat.min = mint.all(data) elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data elif data.dtype == ms.complex64 or data.dtype == ms.complex128: @@ -75,7 +74,6 @@ class MindsporeDataProcessor(BaseDataProcessor): tensor_stat.mean = np.mean(data_abs).item() tensor_stat.norm = np.linalg.norm(data_abs).item() else: - # 对于其它数据,确保为浮点类型(避免立即搬运到 CPU) if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm @@ -263,16 +261,15 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor): return value.asnumpy() if hasattr(value, "asnumpy") else value def _analyze_maybe_overflow_tensor(self, tensor_json): - if self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) is None: + max_tensor = self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) + min_tensor = self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) + if max_tensor is None or min_tensor is None: return - max_value = self.convert_to_numpy(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) - min_value = self.convert_to_numpy(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) - # if ops.isinf(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])): - if np.isinf(max_value) or np.isnan(max_value): + + if mint.isinf(max_tensor) or mint.isnan(max_tensor): self.has_overflow = True - # if ops.isinf(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) or ops.isnan(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])): - if np.isinf(min_value) or np.isnan(min_value): + if mint.isinf(min_tensor) or mint.isnan(min_tensor): self.has_overflow = True def _analyze_tensor(self, tensor, suffix): diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index eb94cbfb6c0..e45ebfa513a 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -414,15 +414,30 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): raise RuntimeError(f"overflow check failed") from e def _analyze_maybe_overflow_tensor(self, tensor_json): - print(f"tensor_json['tensor_stat_index']:{tensor_json['tensor_stat_index']}") - print(f"self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']):{self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])}") - print(f"tensor_json:{tensor_json}") - if self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) is None or self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) is None : + max_tensor = self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) + min_tensor = self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) + if max_tensor is None or min_tensor is None : return - self.has_overflow = torch.isinf(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or torch.isnan(self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index'])) or \ - torch.isinf(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) or torch.isnan(self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index'])) + if torch.isinf(max_tensor) or torch.isnan(max_tensor): + self.has_overflow = True + + if torch.isinf(min_tensor) or torch.isnan(min_tensor): + self.has_overflow = True + + def _analyze_tensor(self, tensor, suffix): + dump_data_name, file_path = self.get_save_file_path(suffix) + if not path_len_exceeds_limit(file_path): + self.cached_tensors_and_file_paths.update({file_path: tensor}) + else: + logger.warning(f'The file path {file_path} length exceeds limit.') + single_arg = super()._analyze_tensor(tensor, suffix) + single_arg.update({"data_name": dump_data_name}) + if not self.has_overflow and self.support_inf_nan: + self._analyze_maybe_overflow_tensor(single_arg) + return single_arg + def _analyze_tensor(self, tensor, suffix): dump_data_name, file_path = self.get_save_file_path(suffix) -- Gitee From 62ca145936371330e5ebd127290939591bab181f Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 13 Mar 2025 10:11:15 +0800 Subject: [PATCH 275/333] fix review --- .../tensor_transport_layer/attl.py | 15 ++++++--------- .../pytorch/online_dispatch/dump_compare.py | 10 ++++------ .../msprobe/pytorch/online_dispatch/utils.py | 15 +++++++-------- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py index 236c87b1105..f858067b661 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py @@ -27,8 +27,7 @@ from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.client import T from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.server import TCPServer from msprobe.core.common.file_utils import remove_path from msprobe.pytorch.common.utils import logger, save_api_data, load_api_data, save_pkl, load_pkl -from msprobe.core.common.const import Const -from msprobe.core.common.utils import CompareException +from msprobe.core.common.utils import recursion_depth_decorator BufferType = Union[ApiData, Dict[str, Any], str] # Union[Tensor, Tuple[Optional[Tensor]]] @@ -171,22 +170,20 @@ class ATTL: return buffer -def move2device_exec(obj, device, depth=0): - if depth > Const.MAX_DEPTH: - logger.error("Maximum recursion depth exceeded") - raise CompareException(CompareException.RECURSION_LIMIT_ERROR) +@recursion_depth_decorator("move2device_exec") +def move2device_exec(obj, device): if isinstance(obj, (tuple, list)): - data_list = [move2device_exec(val, device, depth=depth + 1) for val in obj] + data_list = [move2device_exec(val, device) for val in obj] return data_list if isinstance(obj, list) else tuple(data_list) if isinstance(obj, dict): - return {key: move2device_exec(val, device, depth=depth + 1) for key, val in obj.items()} + return {key: move2device_exec(val, device) for key, val in obj.items()} elif isinstance(obj, torch.Tensor): obj = obj.detach() if obj.device.type != device: obj = obj.to(device) return obj elif "return_types" in str(type(obj)): - return move2device_exec(tuple(obj), device, depth=depth + 1) + return move2device_exec(tuple(obj), device) elif isinstance(obj, torch._C.device): return torch.device(device) else: diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py index 810f1ea2756..a154064755e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/dump_compare.py @@ -20,7 +20,7 @@ from datetime import datetime, timezone import torch from msprobe.core.common.const import Const -from msprobe.core.common.utils import CompareException +from msprobe.core.common.utils import recursion_depth_decorator from msprobe.core.common.file_utils import FileOpen, save_npy, save_json from msprobe.pytorch.common.log import logger @@ -93,13 +93,11 @@ def support_basic_type(data): return False -def dump_data(data, prefix, dump_path, depth=0): - if depth > Const.MAX_DEPTH: - logger.error(f'dump data depth exceeds max depth:{Const.MAX_DEPTH}') - raise CompareException(CompareException.RECURSION_LIMIT_ERROR) +@recursion_depth_decorator("dump_data") +def dump_data(data, prefix, dump_path): if isinstance(data, (tuple, list)) and data: for i, item in enumerate(data): - dump_data(item, "{}.{}".format(prefix, i), dump_path, depth=depth + 1) + dump_data(item, "{}.{}".format(prefix, i), dump_path) return elif support_basic_type(data): if isinstance(data, torch.Tensor) and data.is_meta: diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py index acd630e8ef4..825c8493a72 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py @@ -27,9 +27,10 @@ else: pta_cpu_device = torch.device("cpu") from msprobe.core.common.const import CompareConst, Const -from msprobe.core.common.utils import CompareException +from msprobe.core.common.utils import recursion_depth_decorator from msprobe.pytorch.common.log import logger + cpu_device = torch._C.device("cpu") COLOR_RED = '\033[31m' COLOR_GREEN = '\033[32m' @@ -86,10 +87,8 @@ def get_callstack(): return callstack -def data_to_cpu(data, deep, data_cpu, depth=0): - if depth > Const.MAX_DEPTH: - logger.error("Failed to convert data to cpu, depth exceeds max depth:{}".format(Const.MAX_DEPTH)) - raise CompareException(CompareException.RECURSION_LIMIT_ERROR) +@recursion_depth_decorator("data_to_cpu") +def data_to_cpu(data, deep, data_cpu): global cpu_device list_cpu = [] if isinstance(data, torch.Tensor): @@ -105,13 +104,13 @@ def data_to_cpu(data, deep, data_cpu, depth=0): return tensor_copy elif isinstance(data, list): for v in data: - list_cpu.append(data_to_cpu(v, deep + 1, data_cpu, depth=depth + 1)) + list_cpu.append(data_to_cpu(v, deep + 1, data_cpu)) if deep == 0: data_cpu.append(list_cpu) return list_cpu elif isinstance(data, tuple): for v in data: - list_cpu.append(data_to_cpu(v, deep + 1, data_cpu, depth=depth + 1)) + list_cpu.append(data_to_cpu(v, deep + 1, data_cpu)) tuple_cpu = tuple(list_cpu) if deep == 0: data_cpu.append(tuple_cpu) @@ -119,7 +118,7 @@ def data_to_cpu(data, deep, data_cpu, depth=0): elif isinstance(data, dict): dict_cpu = {} for k, v in data.items(): - dict_cpu[k] = data_to_cpu(v, deep + 1, data_cpu, depth=depth + 1) + dict_cpu[k] = data_to_cpu(v, deep + 1, data_cpu) if deep == 0: data_cpu.append(dict_cpu) return dict_cpu -- Gitee From a34d08ed8a9b25cd2024cc095d7aede551e91227 Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 13 Mar 2025 10:12:15 +0800 Subject: [PATCH 276/333] fix review --- debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py index 825c8493a72..2116186cc04 100644 --- a/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/online_dispatch/utils.py @@ -26,7 +26,7 @@ except ImportError: else: pta_cpu_device = torch.device("cpu") -from msprobe.core.common.const import CompareConst, Const +from msprobe.core.common.const import CompareConst from msprobe.core.common.utils import recursion_depth_decorator from msprobe.pytorch.common.log import logger -- Gitee From 4dc4db4a958aba5d8685158858d6f90c649b6e5d Mon Sep 17 00:00:00 2001 From: Henry Shi Date: Fri, 7 Mar 2025 18:06:33 +0800 Subject: [PATCH 277/333] fix jit grad --- debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py index a9a543a8fac..634b1576752 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/jit_dump.py @@ -16,6 +16,7 @@ import os from collections import defaultdict +import mindspore from mindspore._c_expression import PyNativeExecutor_ try: from mindspore.common.api import _MindsporeFunctionExecutor @@ -105,7 +106,10 @@ class JitDump(_MindsporeFunctionExecutor): def grad(self, obj, grad, weights, grad_position, *args, **kwargs): if JitDump.jit_dump_switch and JitDump.jit_enable: _api_register.restore_all_api() - output = self._executor.grad(grad, obj, weights, grad_position, *args, *(kwargs.values())) + if mindspore.__version__ >= "2.5": + output = self._executor.grad(grad, obj, weights, grad_position, False, *args, *(kwargs.values())) + else: + output = self._executor.grad(grad, obj, weights, grad_position, *args, *(kwargs.values())) if JitDump.jit_dump_switch and JitDump.jit_enable: dump_jit(obj, args, None, False) _api_register.register_all_api() -- Gitee From 05467a0bade00929effae3854a57968a9d49a1eb Mon Sep 17 00:00:00 2001 From: l30044004 Date: Thu, 13 Mar 2025 14:12:16 +0800 Subject: [PATCH 278/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E7=B2=BE=E7=AE=80=E5=A0=86=E6=A0=88=E5=A4=B1=E6=95=88?= =?UTF-8?q?=E9=97=AE=E9=A2=98=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/visualization/builder/graph_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py index 1f70fb7d8ae..0b3305e8c6a 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py @@ -27,8 +27,8 @@ from msprobe.visualization.utils import save_json_file, GraphConst class GraphBuilder: backward_pattern = re.compile(r"(\.backward\.)(\d+)$") forward_pattern = re.compile(r"(\.forward\.)(\d+)$") - # 匹配以大写字母开头,后接任意字母,并以Template(结尾 - template_pattern = re.compile(r'\b[A-Z][a-zA-Z]*Template\(') + # 匹配以大写字母开头,后接任意字母,并以Template(结尾,或包含api_template(的字符串 + template_pattern = re.compile(r'\b([A-Z][a-zA-Z]*Template|api_template)\(') @staticmethod def build(construct_path, data_path, stack_path, model_name='DefaultModel', complete_stack=False): -- Gitee From 8ba89ecadfc833adff31fcc190a0f10279e2a900 Mon Sep 17 00:00:00 2001 From: qianggee Date: Thu, 13 Mar 2025 06:41:29 +0000 Subject: [PATCH 279/333] fix bug if ChainedOpt has state --- .../msprobe/pytorch/monitor/optimizer_collect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py index 86b984af651..df2c9d1c407 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/optimizer_collect.py @@ -185,7 +185,7 @@ class MegatronChainedDistributedOptimizerMon(MegatronDistributedOptimizerMon): for opt in torch_opt.chained_optimizers: self.map_fp16_tp_fp32_param(opt) - if not isinstance(torch_opt, torch.optim.Optimizer): + if not isinstance(torch_opt, torch.optim.Optimizer) and not hasattr(torch_opt, 'state'): torch_opt.state = {} for opt in torch_opt.chained_optimizers: torch_opt.state.update(opt.optimizer.state) @@ -198,7 +198,7 @@ class MegatronChainedMixPrecisionOptimizerMon(MixPrecisionOptimizerMon): for opt in torch_opt.chained_optimizers: self.map_fp16_tp_fp32_param(opt) - if not isinstance(torch_opt, torch.optim.Optimizer): + if not isinstance(torch_opt, torch.optim.Optimizer) and not hasattr(torch_opt, 'state'): torch_opt.state = {} for opt in torch_opt.chained_optimizers: torch_opt.state.update(opt.optimizer.state) -- Gitee From 6b4b496c78b2b6ebf319bfaa4fb36f324a891772 Mon Sep 17 00:00:00 2001 From: qianggee Date: Thu, 13 Mar 2025 09:30:27 +0000 Subject: [PATCH 280/333] resolve safe risk --- .../pytorch/monitor/distributed/wrap_distributed.py | 10 ++++++++-- .../msprobe/pytorch/monitor/module_hook.py | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py b/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py index 6805fe22377..e94763e4787 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/distributed/wrap_distributed.py @@ -24,6 +24,7 @@ import torch.nn as nn from msprobe.core.common.const import MonitorConst from msprobe.core.common.file_utils import load_yaml from msprobe.pytorch.monitor.module_metric import get_metrics, get_summary_writer_tag_name +from msprobe.pytorch.common.log import logger try: import torch_npu @@ -37,6 +38,7 @@ WrapDistributedOps = load_yaml(OpsPath).get("distributed", []) StackBlackListPath = os.path.join(os.path.dirname(__file__), "stack_blacklist.yaml") StackBlackList = load_yaml(StackBlackListPath).get("stack", []) +MAX_STRING_LENGTH = 1000 distributed_func = {} for f in dir(dist): @@ -138,6 +140,8 @@ def get_process_group(process_group): def stack_filter(stack): + if len(stack) > MAX_STRING_LENGTH: + logger.warning(f'The character strin contains more than {MAX_STRING_LENGTH}. re match is skipped.') for pattern in StackBlackList: if re.search(pattern, stack): return False @@ -187,10 +191,12 @@ def update_data(old, new): def is_target_line(codeline): - stack = get_callstack() - whole_stack = ';'.join(stack) if codeline == []: return True + stack = get_callstack() + whole_stack = ';'.join(stack) + if len(whole_stack) > MAX_STRING_LENGTH: + logger.warning(f'The character strin contains more than {MAX_STRING_LENGTH}. re match is skipped.') for pattern in codeline: if re.search(pattern, whole_stack): return True diff --git a/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py b/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py index 3032bdcb3fa..2db2a971256 100644 --- a/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py +++ b/debug/accuracy_tools/msprobe/pytorch/monitor/module_hook.py @@ -26,6 +26,7 @@ from torch.utils.hooks import BackwardHook from msprobe.core.common.const import MonitorConst, Const from msprobe.core.common.file_utils import load_json, save_json +from msprobe.core.common.utils import recursion_depth_decorator from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import is_recomputation from msprobe.pytorch.monitor.anomaly_analyse import AnomalyDataWriter @@ -735,6 +736,7 @@ class TrainerMon: logger.info_on_rank_0(f"> {hooked_count} modules are monitored.") + @recursion_depth_decorator('msprobe.pytorch.monitor.clone_if_tensor') def clone_if_tensor(args): if isinstance(args, tuple): return tuple([clone_if_tensor(arg) for arg in args]) -- Gitee From 8a4d3a0bf719e968027837ec982cb954eb85e665 Mon Sep 17 00:00:00 2001 From: lcw Date: Wed, 5 Mar 2025 15:09:38 +0800 Subject: [PATCH 281/333] =?UTF-8?q?=E3=80=90feature=E3=80=91dump=E6=94=AF?= =?UTF-8?q?=E6=8C=81mindspeed=E8=87=AA=E5=AE=9A=E4=B9=89=E7=AE=97=E5=AD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/const.py | 11 +++++--- .../pytorch/hook_module/api_register.py | 11 ++++++++ .../pytorch/hook_module/support_wrap_ops.yaml | 23 +++++++++++++++- .../msprobe/pytorch/hook_module/utils.py | 26 +++++++++++++++++++ 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index ff8d58dc0a3..cc771d0a0bd 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -79,6 +79,8 @@ class Const: NUMPY_SUFFIX = ".npy" NUMPY_PATTERN = "*.npy" PT_SUFFIX = ".pt" + PY_SUFFIX = ".py" + INIT_PY = "init.py" ONE_GB = 1073741824 # 1 * 1024 * 1024 * 1024 TEN_GB = 10737418240 # 10 * 1024 * 1024 * 1024 ONE_MB = 1048576 # 1 * 1024 * 1024 @@ -247,6 +249,7 @@ class Const: PT_API_TYPE_ATEN = "aten" PT_API_TYPE_DIST = "distributed" PT_API_TYPE_NPU_DIST = "npu_distributed" + PT_API_TYPE_MINDSPEED = "mindspeed" MS_API_TYPE_OPS = "ops" MS_API_TYPE_TENSOR = "tensor" @@ -263,6 +266,7 @@ class Const: NPU_API_TYPE_PREFIX = "NPU" ATEN_API_TYPE_PREFIX = "Aten" VF_API_TYPE_PREFIX = "VF" + MINDSPEED_API_TYPE_PREFIX = "MindSpeed" MINT_API_TYPE_PREFIX = "Mint" MINT_FUNC_API_TYPE_PREFIX = "MintFunctional" @@ -276,7 +280,8 @@ class Const: PT_API_TYPE_NPU: PT_API_TYPE_NPU, PT_API_TYPE_ATEN: PT_API_TYPE_ATEN, PT_API_TYPE_DIST: PT_API_TYPE_DIST, - PT_API_TYPE_NPU_DIST: PT_API_TYPE_NPU_DIST + PT_API_TYPE_NPU_DIST: PT_API_TYPE_NPU_DIST, + PT_API_TYPE_MINDSPEED: PT_API_TYPE_MINDSPEED }, MS_FRAMEWORK: { MS_API_TYPE_OPS: MS_API_TYPE_OPS, @@ -304,7 +309,8 @@ class Const: PT_API_TYPE_NPU: NPU_API_TYPE_PREFIX, PT_API_TYPE_ATEN: ATEN_API_TYPE_PREFIX, PT_API_TYPE_DIST: DIST_API_TYPE_PREFIX, - PT_API_TYPE_NPU_DIST: DIST_API_TYPE_PREFIX + PT_API_TYPE_NPU_DIST: DIST_API_TYPE_PREFIX, + PT_API_TYPE_MINDSPEED: MINDSPEED_API_TYPE_PREFIX }, MS_FRAMEWORK: { MS_API_TYPE_OPS: FUNCTIONAL_API_TYPE_PREFIX, @@ -744,4 +750,3 @@ class DistributedCheckConst: "src": 1 } } - \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py index 30a45a84d87..f8da9453e83 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/api_register.py @@ -26,6 +26,14 @@ from msprobe.pytorch.common.utils import ( ) from msprobe.pytorch.function_factory import npu_custom_functions from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.hook_module.utils import dynamic_import_op + +try: + import mindspeed.ops +except ImportError: + mindspeed_enable = False +else: + mindspeed_enable = True torch_version_above_2 = torch.__version__.split('+')[0] > '2.0' @@ -57,6 +65,9 @@ if not is_gpu: torch_npu.distributed.distributed_c10d)) } ) + if mindspeed_enable: + _api_types.get(Const.PT_FRAMEWORK).update({Const.PT_API_TYPE_MINDSPEED: (mindspeed.ops, (mindspeed.ops,))}) + dynamic_import_op(mindspeed.ops) _inner_used_api = {} _supported_api_list_path = (os.path.join(os.path.dirname(os.path.realpath(__file__)), Const.SUPPORT_API_FILE_NAME),) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml index 43d750e7d6a..5b92baad826 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml @@ -1916,4 +1916,25 @@ distributed: npu_distributed: - isend - - irecv \ No newline at end of file + - irecv + +mindspeed: + - dropout_add_layer_norm.npu_dropout_add_layer_norm + - npu_rotary_position_embedding.npu_rotary_position_embedding + - fusion_attention_v2.npu_fusion_attention + - npu_mm_all_reduce_add_rms_norm.npu_mm_all_reduce_add_rms_norm + - npu_mm_all_reduce_add_rms_norm_.npu_mm_all_reduce_add_rms_norm_ + - gmm.npu_gmm + - gmm.npu_gmm_v2 + - npu_grouped_mat_mul_all_reduce.npu_grouped_mat_mul_all_reduce + - ffn.npu_ffn + - npu_moe_token_permute.npu_moe_token_permute + - npu_moe_token_unpermute.npu_moe_token_unpermute + - npu_ring_attention_update.npu_ring_attention_update + - npu_matmul_add.npu_matmul_add_fp32 + - npu_groupmatmul_add.npu_groupmatmul_add_fp32 + - npu_all_to_all_all_gather_bmm.npu_all_to_all_all_gather_bmm + - npu_bmm_reduce_scatter_all_to_all.npu_bmm_reduce_scatter_all_to_all + - quant_gmm.npu_quant_gmm + - quant_gmm.npu_quant_gmm_v2 + - npu_apply_fused_ema_adamw.npu_apply_fused_ema_adamw \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py index 41869403a54..0992caf0a41 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/utils.py @@ -14,7 +14,12 @@ # limitations under the License. import os +import importlib +import inspect + +from msprobe.core.common.const import Const from msprobe.core.common.file_utils import load_yaml +from msprobe.core.common.log import logger def get_ops(): @@ -26,3 +31,24 @@ def get_ops(): wrap_torch = ops.get('torch') wrap_npu_ops = ops.get('torch_npu') return set(wrap_functional) | set(wrap_tensor) | set(wrap_torch) | set(wrap_npu_ops) + + +def dynamic_import_op(package): + package_name = package.__name__ + ops = {} + ops_dir, _ = os.path.split(package.__file__) + for file_name in os.listdir(ops_dir): + if file_name.endswith(Const.PY_SUFFIX) and file_name != Const.INIT_PY: + sub_module_name = file_name[:-3] + module_name = f"{package_name}.{sub_module_name}" + try: + module = importlib.import_module(module_name) + except Exception as e: + logger.warning(f"import {module_name} failed!") + continue + + func_members = inspect.getmembers(module, inspect.isfunction) + for func_member in func_members: + func_name, func = func_member[0], func_member[1] + ops[f"{sub_module_name}.{func_name}"] = func + return ops -- Gitee From aabd1e201672c1528f5b36dea3564150af1fb267 Mon Sep 17 00:00:00 2001 From: pxp1 <958876660@qq.com> Date: Thu, 13 Mar 2025 17:13:35 +0800 Subject: [PATCH 282/333] =?UTF-8?q?=E9=80=92=E5=BD=92=E6=B7=B1=E5=BA=A6?= =?UTF-8?q?=E9=99=90=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../api_accuracy_checker/generate_op_script/op_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py index 797210f09c3..641eada0303 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/generate_op_script/op_generator.py @@ -183,6 +183,7 @@ class APIExtractor: self.update_data_name(v, dump_data_dir) return value + @recursion_depth_decorator("OpGenerator: APIExtractor.update_data_name") def update_data_name(self, data, dump_data_dir): if isinstance(data, list): for item in data: -- Gitee From 4d1eb75cbd29f059cd6610f61008f5cd17e225c0 Mon Sep 17 00:00:00 2001 From: gitee Date: Thu, 13 Mar 2025 20:15:07 +0800 Subject: [PATCH 283/333] fix review --- .../api_accuracy_checker/compare/algorithm.py | 40 ------------- .../run_ut/distributed_bench_function.py | 58 +++++++++++++++---- 2 files changed, 47 insertions(+), 51 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py index 965147232a4..ddee254c2b1 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py @@ -261,43 +261,3 @@ def compare_bool_tensor(bench_output, device_output): error_rate = float(error_nums / bench_output.size) result = CompareConst.PASS if error_rate == 0 else CompareConst.ERROR return error_rate, result, "" - - -def reduce_sum(tensors): - return torch.stack(tensors).sum(dim=0) - - -def reduce_product(tensors): - return torch.stack(tensors).prod(dim=0) - - -def reduce_min(tensors): - return torch.stack(tensors).min(dim=0).values - - -def reduce_max(tensors): - return torch.stack(tensors).max(dim=0).values - - -def reduce_band(tensors): - reduce_tensor = tensors[0].clone() - if len(tensors) > 1: - for t in tensors[1:]: - reduce_tensor &= t - return reduce_tensor - - -def reduce_bor(tensors): - reduce_tensor = tensors[0].clone() - if len(tensors) > 1: - for t in tensors[1:]: - reduce_tensor |= t - return reduce_tensor - - -def reduce_bxor(tensors): - reduce_tensor = tensors[0].clone() - if len(tensors) > 1: - for t in tensors[1:]: - reduce_tensor ^= t - return reduce_tensor diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py index e48c1cbf157..9f88e739401 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py @@ -20,19 +20,46 @@ import torch from msprobe.core.common.const import DistributedCheckConst from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import get_distributed_args -from msprobe.pytorch.api_accuracy_checker.compare.algorithm import reduce_sum, reduce_product, reduce_min, \ - reduce_max, reduce_band, reduce_bor, reduce_bxor -reduce_ops = { - DistributedCheckConst.REDOPTYPE_SUM: reduce_sum, - DistributedCheckConst.REDOPTYPE_PRODUCT: reduce_product, - DistributedCheckConst.REDOPTYPE_MIN: reduce_min, - DistributedCheckConst.REDOPTYPE_MAX: reduce_max, - DistributedCheckConst.REDOPTYPE_BAND: reduce_band, - DistributedCheckConst.REDOPTYPE_BOR: reduce_bor, - DistributedCheckConst.REDOPTYPE_BXOR: reduce_bxor, -} +def reduce_sum(tensors): + return torch.stack(tensors).sum(dim=0) + + +def reduce_product(tensors): + return torch.stack(tensors).prod(dim=0) + + +def reduce_min(tensors): + return torch.stack(tensors).min(dim=0).values + + +def reduce_max(tensors): + return torch.stack(tensors).max(dim=0).values + + +def reduce_band(tensors): + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor &= t + return reduce_tensor + + +def reduce_bor(tensors): + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor |= t + return reduce_tensor + + +def reduce_bxor(tensors): + reduce_tensor = tensors[0].clone() + if len(tensors) > 1: + for t in tensors[1:]: + reduce_tensor ^= t + return reduce_tensor def mock_broadcast(api_name, input_args, input_kwargs): @@ -68,6 +95,15 @@ def mock_reduce(api_name, input_args, input_kwargs): reduce_tensor = None if not tensors: return reduce_tensor + reduce_ops = { + DistributedCheckConst.REDOPTYPE_SUM: reduce_sum, + DistributedCheckConst.REDOPTYPE_PRODUCT: reduce_product, + DistributedCheckConst.REDOPTYPE_MIN: reduce_min, + DistributedCheckConst.REDOPTYPE_MAX: reduce_max, + DistributedCheckConst.REDOPTYPE_BAND: reduce_band, + DistributedCheckConst.REDOPTYPE_BOR: reduce_bor, + DistributedCheckConst.REDOPTYPE_BXOR: reduce_bxor, + } if reduce_op not in reduce_ops: raise ValueError(f"Unsupported reduce operation: {reduce_op}") reduce_tensor = reduce_ops[reduce_op](tensors) -- Gitee From 2d966c9ebc3b1b031d8bc8cfafb1d04f6df3fdfc Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Thu, 13 Mar 2025 22:43:37 +0800 Subject: [PATCH 284/333] set wrapped api names to ori names --- debug/accuracy_tools/msprobe/core/data_dump/api_registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py b/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py index 5aeafd573d6..1bef962232e 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/api_registry.py @@ -69,6 +69,7 @@ class ApiWrapper: def wrap_api_func(api_name, api_func, prefix, hook_build_func, api_template): def api_function(*args, **kwargs): return api_template(api_name, api_func, prefix, hook_build_func)(*args, **kwargs) + api_function.__name__ = api_name return api_function wrapped_functions[api_name] = wrap_api_func(api_name, ori_api, name_prefix, hook_build_func, api_template) -- Gitee From 35dc8c3956e505c0656711cbbe2bfc25a8865f97 Mon Sep 17 00:00:00 2001 From: gitee Date: Fri, 14 Mar 2025 10:51:04 +0800 Subject: [PATCH 285/333] fix review --- .../run_ut/distributed_compare_function.py | 15 +++++++++------ .../run_ut/run_distributed_check.py | 8 +++++++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py index 70f279e66c3..022c1dfcacf 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py @@ -20,7 +20,7 @@ import torch from msprobe.core.common.const import CompareConst -def compare_broadcast(device_out, bench_out, rank): +def compare_broadcast(device_out, bench_out, **kwargs): if len(device_out) < 1: raise ValueError("device_out should not be empty") compare_result = torch.equal(device_out[0].cpu(), bench_out) @@ -28,7 +28,7 @@ def compare_broadcast(device_out, bench_out, rank): return CompareConst.PASS if compare_result else CompareConst.ERROR -def compare_all_reduce(device_out, bench_out, rank): +def compare_all_reduce(device_out, bench_out, **kwargs): if len(device_out) < 1: raise ValueError("device_out should not be empty") compare_result = torch.equal(device_out[0].cpu(), bench_out) @@ -36,7 +36,8 @@ def compare_all_reduce(device_out, bench_out, rank): return CompareConst.PASS if compare_result else CompareConst.ERROR -def compare_scatter(device_out, bench_out, rank): +def compare_scatter(device_out, bench_out, **kwargs): + rank = kwargs.get("local_rank", 0) if len(device_out) < 1: raise ValueError("device_out should not be empty") if len(bench_out) <= rank: @@ -46,7 +47,7 @@ def compare_scatter(device_out, bench_out, rank): return CompareConst.PASS if compare_result else CompareConst.ERROR -def compare_all_gather(device_out, bench_out, rank): +def compare_all_gather(device_out, bench_out, **kwargs): if len(device_out) < 1: raise ValueError("device_out should not be empty") device_out_cpu = [tensor.cpu() for tensor in device_out[0]] @@ -55,7 +56,8 @@ def compare_all_gather(device_out, bench_out, rank): return CompareConst.PASS if compare_result else CompareConst.ERROR -def compare_all_to_all(device_out, bench_out, rank): +def compare_all_to_all(device_out, bench_out, **kwargs): + rank = kwargs.get("local_rank", 0) if len(device_out) < 1: raise ValueError("device_out should not be empty") device_out_cpu = [tensor.cpu() for tensor in device_out[0]] @@ -64,7 +66,8 @@ def compare_all_to_all(device_out, bench_out, rank): return CompareConst.PASS if compare_result else CompareConst.ERROR -def compare_all_to_all_single(device_out, bench_out, rank): +def compare_all_to_all_single(device_out, bench_out, **kwargs): + rank = kwargs.get("local_rank", 0) if len(device_out) < 1: raise ValueError("device_out should not be empty") compare_result = torch.equal(device_out[0].cpu(), bench_out[rank]) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 5d4ad5203cc..24f57d481f4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -18,6 +18,7 @@ import os import sys import time from collections import namedtuple +import copy import torch_npu import torch.distributed as dist @@ -211,13 +212,18 @@ def run_hccl(rank, distributed_config): result_file_path = distributed_config[DistributedCheckConst.RESULT_FILE_PATH] benchmark_result = distributed_config[DistributedCheckConst.BENCHMARK_RESULT] device_args, _ = generate_device_params(rank_args, rank_kwargs, False, api_name) + origin_args = copy.copy(device_args) logger.info("Start to check distributed api {} in rank {}.".format(api_full_name, local_rank)) distributed_func.get(api_name)(*device_args) dist.barrier() if api_name in special_rank_api_list: local_rank = rank + kwargs = { + "local_rank": local_rank, + "origin_args": origin_args + } compare_function = distributed_func_registry.get_compare_function(api_name) - status = compare_function(device_args, benchmark_result, local_rank) + status = compare_function(device_args, benchmark_result, **kwargs) message = '' result_rows = [] df_row = list([api_full_name, local_rank, status, message]) -- Gitee From d6091e1eac5300e5e93c13a45f0d9cdf29e92ea0 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 14 Mar 2025 14:55:25 +0800 Subject: [PATCH 286/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../visualization/compare/graph_comparator.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py index ba9c6985cdb..41a7276d168 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/graph_comparator.py @@ -161,15 +161,6 @@ class GraphComparator: for subnode in node_n.subnodes: self._compare_nodes(subnode) - def _compare_node_with_mapping(self, node_n, mapping_dict): - node_b, ancestors_n, ancestors_b = Graph.mapping_match(node_n, self.graph_b, mapping_dict) - if node_b: - ancestors_n.append(node_n.id) - ancestors_b.append(node_b.id) - node_n.matched_node_link = ancestors_b - node_b.matched_node_link = ancestors_n - return node_b - @recursion_depth_decorator('GraphComparator._compare_nodes_fuzzy', max_depth=MAX_DEPTH) def _compare_nodes_fuzzy(self, node_n): if node_n.op != NodeOp.function_api: @@ -191,6 +182,15 @@ class GraphComparator: for sub_node in node_n.subnodes: self._compare_nodes_fuzzy(sub_node) + def _compare_node_with_mapping(self, node_n, mapping_dict): + node_b, ancestors_n, ancestors_b = Graph.mapping_match(node_n, self.graph_b, mapping_dict) + if node_b: + ancestors_n.append(node_n.id) + ancestors_b.append(node_b.id) + node_n.matched_node_link = ancestors_b + node_b.matched_node_link = ancestors_n + return node_b + def _parse_param(self, dump_path_param, output_path): self.dump_path_param = dump_path_param self.output_path = output_path -- Gitee From 3744360eab2638d93b4fc5f512073979943c762c Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 14 Mar 2025 17:04:42 +0800 Subject: [PATCH 287/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/visualization_ut/compare/test_multi_mapping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py index f9ea98cd383..7fe14317b2a 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_multi_mapping.py @@ -1,7 +1,8 @@ import unittest from msprobe.visualization.compare.multi_mapping import MultiMapping from msprobe.visualization.graph.graph import Graph -from msprobe.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.visualization.graph.base_node import BaseNode +from msprobe.visualization.graph.node_op import NodeOp from msprobe.visualization.utils import GraphConst -- Gitee From abf56f3f8727ce04bb028e250eb6da67c2829a7b Mon Sep 17 00:00:00 2001 From: lcw Date: Fri, 14 Mar 2025 17:35:04 +0800 Subject: [PATCH 288/333] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E5=A4=96=E9=83=A8=E8=BE=93=E5=85=A5=E4=BD=9C=E4=B8=BA?= =?UTF-8?q?=E6=AD=A3=E5=88=99=E8=A1=A8=E8=BE=BE=E5=BC=8F=E7=9A=84=E5=AE=89?= =?UTF-8?q?=E5=85=A8=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py index 66229d36b8d..db731b33824 100644 --- a/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/parse_tool/lib/utils.py @@ -264,7 +264,7 @@ class Util: match = re_pattern.match(name) if not match: continue - if extern_pattern != '' and re_pattern.match(extern_pattern) and not re.match(extern_pattern, name): + if extern_pattern != '' and re_pattern.match(extern_pattern) and not name.startswith(extern_pattern): continue file_list[name] = gen_info_func(name, match, file["root"]) return file_list -- Gitee From 48e5223f32042689325063a2c935fa11a4f179c9 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 16 Mar 2025 19:18:19 +0800 Subject: [PATCH 289/333] recuce pre --- .../msprobe/core/common/const.py | 1 + .../api_accuracy_checker/compare/algorithm.py | 51 ++++++++++++++++++ .../run_ut/distributed_bench_function.py | 30 ++++++++++- .../run_ut/distributed_compare_function.py | 53 ++++++++++++++++--- .../run_ut/run_distributed_check.py | 6 +-- 5 files changed, 131 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index bdb2353c3b9..ccca518002a 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -753,6 +753,7 @@ class DistributedCheckConst: ALL_TO_ALL_SINGLE = "all_to_all_single" BROADCAST_SRC_INDEX = 1 FIRST_TENSOR_INDEX = 0 + MAX_CUMSUM_CHECK_NUM = 1000 REDOPTYPE_SUM = "RedOpType.SUM" REDOPTYPE_PRODUCT = "RedOpType.PRODUCT" diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py index ddee254c2b1..abe8f2b4b3c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/algorithm.py @@ -261,3 +261,54 @@ def compare_bool_tensor(bench_output, device_output): error_rate = float(error_nums / bench_output.size) result = CompareConst.PASS if error_rate == 0 else CompareConst.ERROR return error_rate, result, "" + + +def maximize_kahan_loss(cumsum, addend, negative=False): + """ + Calculate the precision loss in Kahan summation and select the maximum or minimum loss. + + Parameters: + cumsum (torch.Tensor): The current cumulative sum. + addend (torch.Tensor): The value to be added in the current step. + negative (bool): Whether to select the negative direction of loss. + Default is False (select positive direction which minimizes the sum). + + Returns: + loss_res (torch.Tensor): The selected maximum or minimum loss value. + mask (torch.Tensor): + A boolean mask indicating whether the loss value should be compensated. + """ + loss_all = (cumsum + addend) - cumsum - addend + if negative: + loss_res = torch.min(loss_all, dim=0)[0] + mask = loss_res <= 0 + else: + loss_res = torch.max(loss_all, dim=0)[0] + mask = loss_res >= 0 + return loss_res, mask + + +def kahan_range(tensors, negative=False): + """ + Perform Kahan summation on a list of tensors and track precision loss. + + Parameters: + tensors (list of torch.Tensor): The list of tensors to be summed. + negative (bool): Whether to select the negative direction of loss. + Default is False (select positive direction which minimizes the sum). + Returns: + sum_max: The summation results. + """ + if len(tensors) < 1: + raise ValueError("tensors should have at least 1 element") + cumsum_temp = torch.clone(tensors[0]).unsqueeze(dim=0) + sum_max = torch.clone(tensors[0]) + loss_max = torch.tensor(0) + + for tensor in tensors[1:]: + addend = tensor - loss_max + loss_max, mask = maximize_kahan_loss(cumsum_temp, addend, negative) + sum_max = sum_max + (addend - torch.where(mask, loss_max, 0)) + loss_max = torch.where(mask, 0, loss_max) + cumsum_temp = torch.cat((cumsum_temp, sum_max.unsqueeze(dim=0))) + return sum_max diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py index 9f88e739401..c78c1957e83 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py @@ -19,11 +19,39 @@ import torch from msprobe.core.common.const import DistributedCheckConst from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type +from msprobe.pytorch.api_accuracy_checker.compare.algorithm import kahan_range from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import get_distributed_args +def sort_all_input(inputs): + ranks = len(inputs) + combined_tensor = torch.stack(inputs) + sorted_indices = torch.argsort(combined_tensor, descending=True, dim=0) + combined_tensor = torch.gather(combined_tensor, 0, sorted_indices) + sorted_inputs = [combined_tensor[i] for i in range(ranks)] + return sorted_inputs + + def reduce_sum(tensors): - return torch.stack(tensors).sum(dim=0) + min_bound = torch.min( + kahan_range(tensors, negative=False), + kahan_range(tensors[::-1], negative=False), + ) + max_bound = torch.max( + kahan_range(tensors, negative=True), kahan_range(tensors[::-1], negative=True) + ) + tensors_sorted = sort_all_input(tensors) + min_sorted_bound = torch.min( + kahan_range(tensors_sorted, negative=False), + kahan_range(tensors_sorted[::-1], negative=False), + ) + max_sorted_bound = torch.max( + kahan_range(tensors_sorted, negative=True), + kahan_range(tensors_sorted[::-1], negative=True), + ) + return torch.min(min_bound, min_sorted_bound), torch.max( + max_bound, max_sorted_bound + ) def reduce_product(tensors): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py index 022c1dfcacf..f7cf95a1d0d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_compare_function.py @@ -15,10 +15,40 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools import torch - -from msprobe.core.common.const import CompareConst - +import tqdm + +from msprobe.core.common.const import CompareConst, DistributedCheckConst + + +def cumulative_check(rank, inputs, output, min_bound, max_bound): + # 检查每个元素是否在最小值和最大值之间 + res = CompareConst.PASS + out_of_bounds = torch.nonzero((output < min_bound) | (output > max_bound)) + if out_of_bounds.shape[0] == 0: + return res + # 对超出范围的值进行累加序遍历检查 + perms = list(itertools.permutations(list(range(len(inputs))))) + if len(out_of_bounds) > DistributedCheckConst.MAX_CUMSUM_CHECK_NUM: + res = CompareConst.WARNING + out_of_bounds = out_of_bounds[: DistributedCheckConst.MAX_CUMSUM_CHECK_NUM] + pbar = tqdm.tqdm( + out_of_bounds, + position=rank + 1, + desc=f"Suspicious cumulative result check for rank{rank}", + ) + for indice in pbar: + indice_tuple = tuple(indice) + input_values = torch.stack([input_[indice_tuple] for input_ in inputs])[perms] + for i in range(1, len(inputs)): + input_values[:, 0] += input_values[:, i] + if output[indice_tuple] not in input_values[:, 0]: + res = CompareConst.ERROR + break + pbar.close() + return res + def compare_broadcast(device_out, bench_out, **kwargs): if len(device_out) < 1: @@ -31,9 +61,20 @@ def compare_broadcast(device_out, bench_out, **kwargs): def compare_all_reduce(device_out, bench_out, **kwargs): if len(device_out) < 1: raise ValueError("device_out should not be empty") - compare_result = torch.equal(device_out[0].cpu(), bench_out) - - return CompareConst.PASS if compare_result else CompareConst.ERROR + if isinstance(bench_out, tuple): + rank = kwargs.get("local_rank", 0) + input_args = kwargs.get("input_args", []) + tensors = [] + for arg in input_args: + if len(arg) > 0: + tensors.append(arg[0]) + if len(tensors) < 1: + raise ValueError("input_args should have at least 1 element") + result = cumulative_check(rank, tensors, device_out[0].cpu(), *bench_out) + else: + compare_result = torch.equal(device_out[0].cpu(), bench_out) + result = CompareConst.PASS if compare_result else CompareConst.ERROR + return result def compare_scatter(device_out, bench_out, **kwargs): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index 24f57d481f4..cb00bd8acd9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -207,12 +207,12 @@ def run_hccl(rank, distributed_config): distributed_setup(rank, world_size, master_ip, master_port) api_full_name = distributed_config[DistributedCheckConst.API_FULL_NAME] api_name = distributed_config[DistributedCheckConst.API_NAME] - rank_args = distributed_config[DistributedCheckConst.ALL_ARGS][rank] + input_args = distributed_config[DistributedCheckConst.ALL_ARGS] + rank_args = input_args[rank] rank_kwargs = distributed_config[DistributedCheckConst.ALL_KWARGS][rank] result_file_path = distributed_config[DistributedCheckConst.RESULT_FILE_PATH] benchmark_result = distributed_config[DistributedCheckConst.BENCHMARK_RESULT] device_args, _ = generate_device_params(rank_args, rank_kwargs, False, api_name) - origin_args = copy.copy(device_args) logger.info("Start to check distributed api {} in rank {}.".format(api_full_name, local_rank)) distributed_func.get(api_name)(*device_args) dist.barrier() @@ -220,7 +220,7 @@ def run_hccl(rank, distributed_config): local_rank = rank kwargs = { "local_rank": local_rank, - "origin_args": origin_args + "input_args": input_args } compare_function = distributed_func_registry.get_compare_function(api_name) status = compare_function(device_args, benchmark_result, **kwargs) -- Gitee From 34aeb92806e957cb6ef509501ceb421773fbd804 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 17 Mar 2025 16:18:02 +0800 Subject: [PATCH 290/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E6=94=AF=E6=8C=81=E5=A4=9A=E5=AF=B9=E5=A4=9A=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E6=98=A0=E5=B0=84=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/visualization/compare/multi_mapping.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py b/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py index e005df71a18..bcc7c0f3135 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/multi_mapping.py @@ -155,13 +155,15 @@ class MultiMapping: node = merged_items.multi_node # 如果是融合节点,那么其真实数据的存盘data_name需要从融合节点的首节点和尾节点中获取 if node.op == NodeOp.multi_collection: - return merged_items.end_node.output_data.get(full_param_name, {}).get("data_name", "-1") \ - if Const.OUTPUT == state in full_param_name \ - else merged_items.start_node.input_data.get(full_param_name, {}).get("data_name", "-1") + data = merged_items.end_node.output_data \ + if Const.OUTPUT == state \ + else merged_items.start_node.input_data else: - return node.output_data.get(full_param_name, {}).get("data_name", "-1") \ - if Const.OUTPUT == state in full_param_name \ - else node.input_data.get(full_param_name, {}).get("data_name", "-1") + data = node.output_data \ + if Const.OUTPUT == state \ + else node.input_data + + return data.get(full_param_name, {}).get("data_name", "-1") @staticmethod def _split_mapping_str(x: str): -- Gitee From e9dd8a7e56e817f0a82e8381aaea1b6c5a1d5513 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 18 Mar 2025 10:15:31 +0800 Subject: [PATCH 291/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E5=88=A0=E9=99=A4=E6=9C=AA=E4=BD=BF=E7=94=A8=E7=9A=84?= =?UTF-8?q?=E5=B8=B8=E9=87=8F=E5=92=8C=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../compare/test_mode_adapter.py | 21 ------------------- .../visualization/builder/msprobe_adapter.py | 1 - .../visualization/compare/mode_adapter.py | 19 ----------------- .../graph/distributed_analyzer.py | 9 -------- .../msprobe/visualization/graph/graph.py | 2 -- .../msprobe/visualization/utils.py | 8 ------- 6 files changed, 60 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py index 4c38e4e6200..5f9a64f04dd 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/compare/test_mode_adapter.py @@ -226,27 +226,6 @@ class TestModeAdapter(unittest.TestCase): self.adapter.add_csv_data(compare_result_list) self.assertEqual(self.adapter.csv_data, compare_result_list) - def test_add_error_key(self): - node_data = {'key': {}} - self.adapter.compare_mode = GraphConst.REAL_DATA_COMPARE - self.adapter.add_error_key(node_data) - self.assertEqual(node_data['key'][GraphConst.ERROR_KEY], - [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO]) - node_data = {'key': {}} - self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE - self.adapter.add_error_key(node_data) - self.assertEqual(node_data['key'][GraphConst.ERROR_KEY], - [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR]) - node_data = {'key': []} - self.adapter.add_error_key(node_data) - self.assertEqual(node_data['key'], []) - - node_data = {'key': {}} - self.adapter.compare_mode = '111' - self.adapter.add_error_key(node_data) - self.assertEqual(node_data['key'], {'error_key': []}) - def test_get_tool_tip(self): self.adapter.compare_mode = GraphConst.MD5_COMPARE tips = self.adapter.get_tool_tip() diff --git a/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py index ee5e3f519ed..751006f3e52 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/msprobe_adapter.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -import math from msprobe.core.compare.acc_compare import read_op, merge_tensor, get_accuracy from msprobe.core.common.utils import set_dump_path, get_dump_mode from msprobe.visualization.utils import GraphConst diff --git a/debug/accuracy_tools/msprobe/visualization/compare/mode_adapter.py b/debug/accuracy_tools/msprobe/visualization/compare/mode_adapter.py index 535192d80c5..7b961c4e8cd 100644 --- a/debug/accuracy_tools/msprobe/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/msprobe/visualization/compare/mode_adapter.py @@ -14,7 +14,6 @@ # limitations under the License. import json -import math from msprobe.core.common.const import CompareConst, Const from msprobe.visualization.utils import ToolTip, GraphConst, str2float @@ -157,24 +156,6 @@ class ModeAdapter: return self.csv_data.extend(compare_result_list) - def add_error_key(self, node_data): - """ - 根据不同的模式进行提供不同错误信息 - """ - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - if self.compare_mode == GraphConst.SUMMARY_COMPARE: - message = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] - elif self.compare_mode == GraphConst.REAL_DATA_COMPARE: - message = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] - else: - # 输出件优化 - message = [] - value[GraphConst.ERROR_KEY] = message - node_data[key] = value - def get_tool_tip(self): """ 用于前端展示字段的具体含义 diff --git a/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py b/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py index 5e68d6b2528..b675bd633b2 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py +++ b/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py @@ -107,15 +107,6 @@ class DistributedAnalyzer: return None, None return group_ranks, group_id - @staticmethod - def _get_batch_group_info(node, rank): - for data in node.input_data.values(): - group_id = data.get('group_id') - if group_id is not None: - return group_id - logger.warning(f'The group_id of node {node.id} does not exist, {CANNOT_MATCH}{rank}') - return None - def distributed_match(self): for rank, graph in self.graphs.items(): nodes = graph.node_map diff --git a/debug/accuracy_tools/msprobe/visualization/graph/graph.py b/debug/accuracy_tools/msprobe/visualization/graph/graph.py index 569d8ea21b5..90574174144 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph/graph.py +++ b/debug/accuracy_tools/msprobe/visualization/graph/graph.py @@ -19,8 +19,6 @@ from msprobe.visualization.utils import GraphConst from msprobe.core.common.log import logger from msprobe.core.common.const import Const -MAX_RECUR_LEVEL = 100 - class Graph: def __init__(self, model_name, data_path='', dump_data=None): diff --git a/debug/accuracy_tools/msprobe/visualization/utils.py b/debug/accuracy_tools/msprobe/visualization/utils.py index f6e8258bb67..679af4f3612 100644 --- a/debug/accuracy_tools/msprobe/visualization/utils.py +++ b/debug/accuracy_tools/msprobe/visualization/utils.py @@ -73,14 +73,6 @@ def str2float(percentage_str): return 0 -def is_integer(s): - try: - int(s) - return True - except Exception: - return False - - def check_directory_content(input_path): """ 检查input_path内容, 是否全是step{数字}命名的文件夹(例如step0), 或者全是rank{数字}命名的文件夹(例如rank0), 或者全是文件 -- Gitee From 14bf84215bee5093ca48a75f678a9496d14fd044 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 18 Mar 2025 10:52:09 +0800 Subject: [PATCH 292/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96save=20json=E6=9C=AA=E8=AE=BE=E7=BD=AE=E6=9D=83?= =?UTF-8?q?=E9=99=90=E9=97=AE=E9=A2=98=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/visualization_ut/builder/test_graph_builder.py | 2 +- .../msprobe/visualization/builder/graph_builder.py | 6 +++--- debug/accuracy_tools/msprobe/visualization/utils.py | 8 -------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py b/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py index 9b69e8bc2a7..2e41f2a325c 100644 --- a/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py +++ b/debug/accuracy_tools/msprobe/test/visualization_ut/builder/test_graph_builder.py @@ -32,7 +32,7 @@ class TestGraphBuilder(unittest.TestCase): self.assertIsInstance(graph, Graph) self.assertEqual(len(graph.node_map), 3) - @patch('msprobe.visualization.builder.graph_builder.save_json_file') + @patch('msprobe.visualization.builder.graph_builder.save_json') def test_to_json(self, mock_save_json_file): GraphBuilder.to_json("step/rank/output.vis", self.config) mock_save_json_file.assert_called_once() diff --git a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py index 0b3305e8c6a..bec99d675f4 100644 --- a/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/msprobe/visualization/builder/graph_builder.py @@ -16,12 +16,12 @@ import re from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_json +from msprobe.core.common.file_utils import load_json, save_json from msprobe.visualization.builder.msprobe_adapter import get_input_output from msprobe.visualization.builder.msprobe_adapter import op_patterns from msprobe.visualization.graph.graph import Graph from msprobe.visualization.graph.node_op import NodeOp -from msprobe.visualization.utils import save_json_file, GraphConst +from msprobe.visualization.utils import GraphConst class GraphBuilder: @@ -74,7 +74,7 @@ class GraphBuilder: if config.task: result[GraphConst.JSON_TASK_KEY] = config.task result[GraphConst.OVERFLOW_CHECK] = config.overflow_check - save_json_file(filename, result) + save_json(filename, result, indent=4) @staticmethod def _simplify_stack(stack_dict): diff --git a/debug/accuracy_tools/msprobe/visualization/utils.py b/debug/accuracy_tools/msprobe/visualization/utils.py index f6e8258bb67..0d4402a406a 100644 --- a/debug/accuracy_tools/msprobe/visualization/utils.py +++ b/debug/accuracy_tools/msprobe/visualization/utils.py @@ -42,14 +42,6 @@ def load_data_json_file(file_path): return load_json_file(file_path).get(GraphConst.DATA_KEY, {}) -def save_json_file(file_path, data): - """ - 保存json文件 - """ - with FileOpen(file_path, 'w') as f: - f.write(json.dumps(data, indent=4)) - - def get_csv_df(stack_mode, csv_data, compare_mode): """ 调用acc接口写入csv -- Gitee From 80e84303af1c1c72a2770ddae08fcfefc02de6cd Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Tue, 18 Mar 2025 10:32:51 +0800 Subject: [PATCH 293/333] adapt to mindspore without StubTensor --- .../mindspore/dump/hook_cell/api_register.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py index 53271ff07be..7a5737662d4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_register.py @@ -17,7 +17,6 @@ import os from mindspore import Tensor, ops, mint from mindspore.mint.nn import functional -from mindspore.common._stub_tensor import StubTensor from mindspore.communication import comm_func from msprobe.core.common.file_utils import load_yaml @@ -28,18 +27,28 @@ from msprobe.mindspore.common.utils import is_mindtorch from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell +stub_tensor_existed = True +try: + from mindspore.common._stub_tensor import StubTensor +except ImportError: + stub_tensor_existed = False + cur_path = os.path.dirname(os.path.realpath(__file__)) if not is_mindtorch(): _api_types = { Const.MS_FRAMEWORK: { Const.MS_API_TYPE_OPS: (ops, (ops,)), Const.MS_API_TYPE_TENSOR: (Tensor, (Tensor,)), - Const.MS_API_TYPE_STUB_TENSOR: (StubTensor, (StubTensor,)), Const.MS_API_TYPE_MINT: (mint, (mint,)), Const.MS_API_TYPE_MINT_FUNC: (functional, (functional,)), Const.MS_API_TYPE_COM: (comm_func, (comm_func,)) } } + if stub_tensor_existed: + _api_types.get(Const.MS_FRAMEWORK).update( + {Const.MS_API_TYPE_STUB_TENSOR: (StubTensor, (StubTensor,))} + ) + _supported_api_list_path = (os.path.join(cur_path, MsConst.SUPPORTED_API_LIST_FILE),) else: import torch @@ -116,10 +125,10 @@ def get_api_register(return_new=False): def wrapped_method(*args, **kwargs): return method(*args, **kwargs) return wrapped_method - if not is_mindtorch() and not stub_tensor_set: + if not is_mindtorch() and stub_tensor_existed and not stub_tensor_set: + api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, []) for attr_name in dir(StubTensor): attr = getattr(StubTensor, attr_name) - api_names = load_yaml(_supported_api_list_path[0]).get(Const.MS_API_TYPE_TENSOR, []) if attr_name in api_names and callable(attr): setattr(StubTensor, attr_name, stub_method(attr)) stub_tensor_set = True -- Gitee From 5404b8540f2add31f599d90dd7f45e8138471d33 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 18 Mar 2025 15:31:09 +0800 Subject: [PATCH 294/333] =?UTF-8?q?=E5=90=8C=E6=AD=A5master?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_dump/data_processor/mindspore_processor.py | 5 ----- .../data_dump/data_processor/pytorch_processor.py | 12 ------------ 2 files changed, 17 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 78dec13adc9..56436de967f 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -43,7 +43,6 @@ class MindsporeDataProcessor(BaseDataProcessor): self.mindspore_object_key = { "dtype": self.analyze_dtype_in_kwargs } - self.stat_stack_list = [] self._async_dump_cache = {} self.api_register = get_api_register() @@ -256,10 +255,6 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor): f"current overflow times: {self.real_overflow_nums}.") self.cached_tensors_and_file_paths = {} - @staticmethod - def convert_to_numpy(self, value): - return value.asnumpy() if hasattr(value, "asnumpy") else value - def _analyze_maybe_overflow_tensor(self, tensor_json): max_tensor = self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) min_tensor = self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index e45ebfa513a..35f890bf6d8 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -439,18 +439,6 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): return single_arg - def _analyze_tensor(self, tensor, suffix): - dump_data_name, file_path = self.get_save_file_path(suffix) - if not path_len_exceeds_limit(file_path): - self.cached_tensors_and_file_paths.update({file_path: tensor}) - else: - logger.warning(f'The file path {file_path} length exceeds limit.') - single_arg = super()._analyze_tensor(tensor, suffix) - single_arg.update({"data_name": dump_data_name}) - if not self.has_overflow and self.support_inf_nan: - self._analyze_maybe_overflow_tensor(single_arg) - return single_arg - class FreeBenchmarkDataProcessor(PytorchDataProcessor): def __init__(self, config, data_writer): -- Gitee From 10d75d7d2c644432d9d8a18688032b1d80f57990 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 18 Mar 2025 15:35:33 +0800 Subject: [PATCH 295/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index f831c5d1535..33c95d686b4 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -50,7 +50,7 @@ class DataWriter: spawn_writer = csv.writer(csv_file) if not is_exists: spawn_writer.writerow(result_header) - spawn_writer.writerows([result, ]) + spawn_writer.writerows([result,]) is_new_file = not is_exists if is_new_file: change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) -- Gitee From 0031e9f2644ab54a8fed62d4eab416a59de0d4d7 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 18 Mar 2025 15:43:41 +0800 Subject: [PATCH 296/333] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E8=BF=9C=E7=A8=8B?= =?UTF-8?q?=E5=88=86=E6=94=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/pytorch_processor.py | 6 ------ debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 6 +----- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 35f890bf6d8..172781b811e 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -279,12 +279,6 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_json.update({"tensor_stat_index": placeholder_index}) tensor_json.update({"requires_grad": tensor.requires_grad}) - if tensor_stat.max is not None: - if torch.isinf(tensor_stat.max) or torch.isnan(tensor_stat.max): - tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max") - if tensor_stat.min is not None: - if torch.isinf(tensor_stat.min) or torch.isnan(tensor_stat.min): - tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min") else: tensor_json.update({"requires_grad": tensor.requires_grad}) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 33c95d686b4..364237fd3c9 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -139,7 +139,6 @@ class DataWriter: return self.stat_stack_list[index][0] def get_buffer_values_min(self, index): - print(f"self.stat_stack_list[index]:{self.stat_stack_list[index]}") return self.stat_stack_list[index][1] def flush_stat_stack(self): @@ -151,8 +150,7 @@ class DataWriter: return [] result = [ [ - x.cpu().detach().numpy().tolist() if hasattr(x, "cpu") else - x.asnumpy().tolist() if hasattr(x, "asnumpy") else x + x.item() if hasattr(x, "item") else x for x in stat_values ] for stat_values in self.stat_stack_list @@ -163,11 +161,9 @@ class DataWriter: def write_json(self): # 在写 JSON 前,统一获取统计值 stat_result = self.flush_stat_stack() - print(f"before:{self.cache_data}") # 遍历 cache_data,将占位符替换为最终统计值 if stat_result: self._replace_stat_placeholders(self.cache_data, stat_result) - print(f"after:{self.cache_data}") if self.cache_data: self.write_data_json(self.dump_file_path) if self.cache_stack: -- Gitee From 80c3095ba98c28e315574a2d64a919b76f5d5f72 Mon Sep 17 00:00:00 2001 From: TAJh Date: Tue, 18 Mar 2025 16:04:19 +0800 Subject: [PATCH 297/333] bugfix --- debug/accuracy_tools/msprobe/mindspore/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/__init__.py b/debug/accuracy_tools/msprobe/mindspore/__init__.py index a519d861f57..cbdab34f044 100644 --- a/debug/accuracy_tools/msprobe/mindspore/__init__.py +++ b/debug/accuracy_tools/msprobe/mindspore/__init__.py @@ -17,7 +17,6 @@ import os try: from msprobe.lib import _msprobe_c - os.environ["MS_HOOK_ENABLE"] = "on" os.environ["HOOK_TOOL_PATH"] = _msprobe_c.__file__ except ImportError: from .common.log import logger @@ -26,3 +25,5 @@ except ImportError: from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger from msprobe.mindspore.common.utils import seed_all, MsprobeStep, MsprobeInitStep from msprobe.mindspore.monitor.module_hook import TrainerMon + +os.environ["MS_HOOK_ENABLE"] = "on" -- Gitee From ef17eb6d46847ac463b65c8aa8d6fb0829367fa9 Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Tue, 18 Mar 2025 16:33:08 +0800 Subject: [PATCH 298/333] msprobe dependence bugfix --- debug/accuracy_tools/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 2da7fcf6677..14fd15e3c06 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -24,12 +24,12 @@ import setuptools INSTALL_REQUIRED = [ "wheel", "einops", - "numpy < 2.0", + "numpy >=1.23.0, < 2.0", "pandas >= 1.3.5, < 2.1", "pyyaml", "rich", "tqdm", - "openpyxl", + "openpyxl >= 3.0.6", "pyopenssl", "twisted", "matplotlib", -- Gitee From b965f09f3b1e22f8c4cc227ce2d8170c7c0ae9e0 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 18 Mar 2025 17:13:03 +0800 Subject: [PATCH 299/333] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96=E5=A4=84=E7=90=86=E6=97=A5=E5=BF=97=E8=AF=AD=E6=B3=95?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/visualization/graph/distributed_analyzer.py | 2 +- debug/accuracy_tools/msprobe/visualization/graph/node_op.py | 3 +-- debug/accuracy_tools/msprobe/visualization/graph_service.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py b/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py index b675bd633b2..a4b709a1ed1 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py +++ b/debug/accuracy_tools/msprobe/visualization/graph/distributed_analyzer.py @@ -368,7 +368,7 @@ class DistributedAnalyzer: target_api_name = self.config.get(api_name)[0] target_rank = int(id_info[1].replace(Const.RANK, '')) except Exception as e: - logger.warning(f'Failed to parsing batch p2p parameter with error info: {e}.') + logger.warning(f'Failed to parse batch p2p parameter with error info: {e}.') continue target_node = self._get_target_node(rank, unique_group_id, api_name, target_rank, target_api_name) if not target_node: diff --git a/debug/accuracy_tools/msprobe/visualization/graph/node_op.py b/debug/accuracy_tools/msprobe/visualization/graph/node_op.py index 33bfa9cc2e3..85d7e65bc52 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph/node_op.py +++ b/debug/accuracy_tools/msprobe/visualization/graph/node_op.py @@ -24,7 +24,6 @@ class NodeOp(Enum): function_api = 1 api_collection = 9 - @staticmethod def get_node_op(node_name: str): """ @@ -37,5 +36,5 @@ class NodeOp(Enum): pattern = op_patterns[index] if re.match(pattern, node_name): return op - logger.warning(f"Cannot parsing node_name {node_name} into NodeOp, default parsing as module.") + logger.warning(f"Cannot parse node_name {node_name} into NodeOp, default parsing as module.") return NodeOp.module diff --git a/debug/accuracy_tools/msprobe/visualization/graph_service.py b/debug/accuracy_tools/msprobe/visualization/graph_service.py index 75b0014c1c0..d971320a594 100644 --- a/debug/accuracy_tools/msprobe/visualization/graph_service.py +++ b/debug/accuracy_tools/msprobe/visualization/graph_service.py @@ -159,7 +159,7 @@ def _compare_graph_steps(input_param, args): bench_steps = sorted(check_and_return_dir_contents(dump_step_b, Const.STEP)) if npu_steps != bench_steps: - logger.error('The number of steps in the two runs are different. Unable to match the steps.') + logger.error('The number of steps in the two runs is different. Unable to match the steps.') raise CompareException(CompareException.INVALID_PATH_ERROR) for folder_step in npu_steps: -- Gitee From de921112c66c866ded7df36389526cd6c335e0df Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Tue, 18 Mar 2025 19:09:57 +0800 Subject: [PATCH 300/333] close file handle before return --- debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp index 3374aa0be31..d26b1a6a2c3 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp @@ -715,6 +715,7 @@ static DebuggerErrno WriteOneTensorStatToDisk(const AclTensorStats& stat) if (i >= retry) { LOG_ERROR(DebuggerErrno::ERROR_SYSCALL_FAILED, "Failed to occupy file " + dumpfile); + close(fd); return DebuggerErrno::ERROR_SYSCALL_FAILED; } -- Gitee From abaa7ee379540f26c3da05001adb6660ad6c799d Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 18 Mar 2025 19:44:12 +0800 Subject: [PATCH 301/333] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E5=BC=82=E6=AD=A5dump?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/mindspore_processor.py | 26 +++++------ .../data_processor/pytorch_processor.py | 44 +++++++++++++------ .../msprobe/mindspore/service.py | 10 ++--- .../accuracy_tools/msprobe/pytorch/service.py | 12 ++--- 4 files changed, 49 insertions(+), 43 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 56436de967f..f4cddd50451 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -146,20 +146,18 @@ class MindsporeDataProcessor(BaseDataProcessor): 'shape': tensor.shape } - # 如果配置了延迟 CPU 搬运,则将统计值存入全局 buffer,并返回占位索引 - if tensor_stat.stack_tensor_stat is None: - stat_values = [ - tensor_stat.max, - tensor_stat.min, - tensor_stat.mean, - tensor_stat.norm - ] - - placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) - - tensor_json.update({"tensor_stat_index": placeholder_index}) - else: - tensor_json.update({'tensor_stat': tensor_stat.stack_tensor_stat}) + # 将统计值存入全局 buffer,并返回占位索引 + stat_values = [ + tensor_stat.max, + tensor_stat.min, + tensor_stat.mean, + tensor_stat.norm + ] + + placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) + + tensor_json.update({"tensor_stat_index": placeholder_index}) + if self.config.summary_mode == Const.MD5 and not self.config.async_dump: tensor_md5 = self.get_md5_for_tensor(tensor) tensor_json.update({Const.MD5: tensor_md5}) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 172781b811e..5c349c1c18c 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -114,6 +114,26 @@ class PytorchDataProcessor(BaseDataProcessor): ])) return tensor_stat + @staticmethod + def get_stat_info_sync(data): + tensor_stat = TensorStatInfo() + if torch.is_complex(data): + logger.warning("Async dump do not support complex data!") + return tensor_stat + elif data.dtype == torch.bool: + tensor_stat.max = torch.any(data) + tensor_stat.min = torch.all(data) + elif not data.shape: + tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data + else: + if not data.is_floating_point() or data.dtype == torch.float64: + data = data.float() + tensor_stat.max = torch.max(data) + tensor_stat.min = torch.min(data) + tensor_stat.mean = torch.mean(data) + tensor_stat.norm = torch.norm(data) + return tensor_stat + @staticmethod def get_stat_info_sync(data): tensor_stat = TensorStatInfo() @@ -268,21 +288,17 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_json.update({'type': 'torch.Tensor'}) tensor_json.update({'dtype': str(tensor.dtype)}) tensor_json.update({"shape": tensor.shape}) - if tensor_stat.stack_tensor_stat is None: - stat_values = [ - tensor_stat.max, - tensor_stat.min, - tensor_stat.mean, - tensor_stat.norm - ] - placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) - - tensor_json.update({"tensor_stat_index": placeholder_index}) - tensor_json.update({"requires_grad": tensor.requires_grad}) - else: - tensor_json.update({"requires_grad": tensor.requires_grad}) - tensor_json.update({"tensor_stat": tensor_stat.stack_tensor_stat}) + stat_values = [ + tensor_stat.max, + tensor_stat.min, + tensor_stat.mean, + tensor_stat.norm + ] + placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) + + tensor_json.update({"tensor_stat_index": placeholder_index}) + tensor_json.update({"requires_grad": tensor.requires_grad}) if self.config.summary_mode == Const.MD5 and not self.config.async_dump: tensor_md5 = self.get_md5_for_tensor(tensor) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 11d6db7a981..b0dfb2809a5 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -274,10 +274,8 @@ class Service: def step(self): if self.config.level == Const.LEVEL_DEBUG: return - if self.config.async_dump: - self.data_collector.fill_stack_tensor_data() - if self.config.task == Const.TENSOR: - self.data_collector.data_processor.dump_async_data() + if self.config.async_dump and self.config.task == Const.TENSOR: + self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() self.loop += 1 self.reset_status() @@ -350,9 +348,7 @@ class Service: self.switch = False self.primitive_switch = False self.start_call = False - if self.config.async_dump: - self.data_collector.fill_stack_tensor_data() - if self.config.task == Const.TENSOR: + if self.config.async_dump and self.config.task == Const.TENSOR: self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() JitDump.jit_dump_switch = False diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index b0b2780328d..7fdc4380f51 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -293,10 +293,8 @@ class Service: if self.config.online_run_ut and torch_version_above_or_equal_2: run_ut_dispatch(self.attl, False, self.config.online_run_ut_recompute) return - if self.config.async_dump: - self.data_collector.fill_stack_tensor_data() - if self.config.task == Const.TENSOR: - self.data_collector.data_processor.dump_async_data() + if self.config.async_dump and self.config.task == Const.TENSOR: + self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() def step(self): @@ -304,10 +302,8 @@ class Service: return if self.should_stop_service: return - if self.config.async_dump: - self.data_collector.fill_stack_tensor_data() - if self.config.task == Const.TENSOR: - self.data_collector.data_processor.dump_async_data() + if self.config.async_dump and self.config.task == Const.TENSOR: + self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() self.loop += 1 self.reset_status() -- Gitee From 90f9c8c0a482626012a69af7e4d4f5876bb364c7 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 18 Mar 2025 20:00:33 +0800 Subject: [PATCH 302/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/data_dump/data_collector.py | 3 --- .../core/data_dump/data_processor/base.py | 3 +-- .../data_processor/mindspore_processor.py | 17 +++++++----- .../data_processor/pytorch_processor.py | 23 +--------------- .../msprobe/core/data_dump/json_writer.py | 26 ------------------- .../test_mindspore_processor.py | 23 ---------------- .../data_processor/test_pytorch_processor.py | 21 --------------- 7 files changed, 12 insertions(+), 104 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py index 20e4489f89e..622c441a27d 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py @@ -213,9 +213,6 @@ class DataCollector: data_info = self.data_processor.analyze_params(grad_name, param_name, data) self.handle_data(grad_name, data_info, flush=self.data_processor.is_terminated) - def fill_stack_tensor_data(self): - self.data_writer.fill_stack_tensor_data() - def debug_data_collect_forward(self, variable, name_with_count): data_info = self.data_processor.analyze_debug_forward(variable, name_with_count) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 775a80b2418..962dc527c59 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -79,12 +79,11 @@ class ModuleBackwardOutputs: class TensorStatInfo: - def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None, stack_tensor_stat=None): + def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None): self.max = max_val self.min = min_val self.mean = mean_val self.norm = norm_val - self.stack_tensor_stat = stack_tensor_stat class BaseDataProcessor: diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index f4cddd50451..60a61879c65 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -85,19 +85,22 @@ class MindsporeDataProcessor(BaseDataProcessor): @staticmethod def get_stat_info_async(data): tensor_stat = TensorStatInfo() - if data.dtype == ms.complex64 or data.dtype == ms.complex128: + if data.dtype == ms.bool_: + tensor_stat.max = mint.any(data) + tensor_stat.min = mint.all(data) + elif not data.shape: + tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data + elif data.dtype == ms.complex64 or data.dtype == ms.complex128: logger.warning("Async dump do not support complex data!") return tensor_stat - elif data.dtype == ms.bool_: - tensor_stat.stack_tensor_stat = (["Max", "Min"], ops.stack([data.any(), data.all()])) - elif not data.shape: - tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack([data, data, data, data])) else: if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm - tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], ops.stack( - [mint.max(data), mint.min(data), mint.mean(data), get_norm_value(data)])) + tensor_stat.max = mint.max(data) + tensor_stat.min = mint.min(data) + tensor_stat.mean = mint.mean(data) + tensor_stat.norm = get_norm_value(data) return tensor_stat @staticmethod diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 5c349c1c18c..471b0d449fd 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -92,30 +92,9 @@ class PytorchDataProcessor(BaseDataProcessor): def analyze_dtype_in_kwargs(element): return {"type": "torch.dtype", "value": str(element)} - @staticmethod - def get_stat_info_async(data): - tensor_stat = TensorStatInfo() - if torch.is_complex(data): - logger.warning("Async dump do not support complex data!") - return tensor_stat - elif data.dtype == torch.bool: - tensor_stat.stack_tensor_stat = (["Max", "Min"], torch.stack( - [torch.any(data), torch.all(data)])) - elif not data.shape: - tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], torch.stack([data, data, data, data])) - else: - if not data.is_floating_point() or data.dtype == torch.float64: - data = data.float() - tensor_stat.stack_tensor_stat = (["Max", "Min", "Mean", "Norm"], torch.stack([ - torch.max(data), - torch.min(data), - torch.mean(data), - torch.norm(data) - ])) - return tensor_stat @staticmethod - def get_stat_info_sync(data): + def get_stat_info_async(data): tensor_stat = TensorStatInfo() if torch.is_complex(data): logger.warning("Async dump do not support complex data!") diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 364237fd3c9..22778999624 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -209,29 +209,3 @@ class DataWriter: elif isinstance(data, list): for item in data: self._replace_stat_placeholders(item, stat_result) - - def fill_stack_tensor_data(self): - self.process_stat_data_recursive(self.cache_data) - - def process_stat_data_recursive(self, data, depth=0): - if depth > Const.MAX_DEPTH: - logger.error(f"The maximum depth of recursive process stat data, {Const.MAX_DEPTH} is reached.") - raise MsprobeException(MsprobeException.RECURSION_LIMIT_ERROR) - if isinstance(data, dict): - if "tensor_stat" in data.keys(): - tensor_stat = data["tensor_stat"] - if len(tensor_stat) != Const.TENSOR_STAT_LEN or len(tensor_stat[0]) != len(tensor_stat[1]): - logger.warning("Some bad data in async dump") - else: - tensor_stat_index, tensor_stat_data = tensor_stat[0], tensor_stat[1] - if hasattr(tensor_stat_data, "device") and tensor_stat_data.device != Const.CPU_LOWERCASE: - tensor_stat_data = tensor_stat_data.cpu() - for index, stat in zip(tensor_stat_index, tensor_stat_data): - data.update({index: stat.item()}) - del data["tensor_stat"] - else: - for key in data.keys(): - self.process_stat_data_recursive(data[key], depth + 1) - elif isinstance(data, (list, tuple)): - for i in data: - self.process_stat_data_recursive(i, depth + 1) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py index b593d34c5d8..ed20e4f73cd 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py @@ -66,15 +66,6 @@ class TestMindsporeDataProcessor(unittest.TestCase): self.assertEqual(result.mean, 2.0) self.assertEqual(result.norm, ms.ops.norm(tensor).item()) - def test_get_stat_info_float_async(self): - self.config.async_dump = True - tensor = ms.tensor([1.0, 2.0, 3.0]) - result = self.processor.get_stat_info(tensor).stack_tensor_stat[1] - self.assertEqual(result[0].item(), 3.0) - self.assertEqual(result[1].item(), 1.0) - self.assertEqual(result[2].item(), 2.0) - self.assertEqual(result[3].item(), ms.ops.norm(tensor).item()) - def test_get_stat_info_int(self): self.config.async_dump = False tensor = ms.Tensor([1, 2, 3], dtype=ms.int32) @@ -84,13 +75,6 @@ class TestMindsporeDataProcessor(unittest.TestCase): self.assertEqual(result.mean, 2) self.assertEqual(result.norm, ms.ops.norm(tensor).item()) - def test_get_stat_info_int_async(self): - self.config.async_dump = True - tensor = ms.tensor([1, 2, 3]) - result = self.processor.get_stat_info(tensor).stack_tensor_stat[1] - self.assertEqual(result[0].item(), 3.0) - self.assertEqual(result[1].item(), 1.0) - def test_get_stat_info_bool(self): self.config.async_dump = False tensor = ms.Tensor([True, False, True]) @@ -100,13 +84,6 @@ class TestMindsporeDataProcessor(unittest.TestCase): self.assertIsNone(result.mean) self.assertIsNone(result.norm) - def test_get_stat_info_bool_async(self): - self.config.async_dump = True - tensor = ms.Tensor([True, False, True]) - result = self.processor.get_stat_info(tensor).stack_tensor_stat[1] - self.assertEqual(result[0].item(), True) - self.assertEqual(result[1].item(), False) - @patch.object(MindsporeDataProcessor, 'get_md5_for_tensor') def test__analyze_tensor(self, get_md5_for_tensor): get_md5_for_tensor.return_value = "test_md5" diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py index 3d31a1bb516..ed79e16fa98 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py @@ -80,14 +80,6 @@ class TestPytorchDataProcessor(unittest.TestCase): self.assertEqual(result.mean, 2.0) self.assertEqual(result.norm, torch.norm(tensor).item()) - def test_get_stat_info_float_async(self): - tensor = torch.tensor([1.0, 2.0, 3.0]) - result = self.processor.get_stat_info_async(tensor).stack_tensor_stat[1] - self.assertEqual(result[0].item(), 3.0) - self.assertEqual(result[1].item(), 1.0) - self.assertEqual(result[2].item(), 2.0) - self.assertEqual(result[3].item(), torch.norm(tensor).item()) - def test_get_stat_info_int(self): tensor = torch.tensor([1, 2, 3], dtype=torch.int32) result = self.processor.get_stat_info(tensor) @@ -96,13 +88,6 @@ class TestPytorchDataProcessor(unittest.TestCase): self.assertEqual(result.mean, 2) self.assertEqual(result.norm, torch.norm(tensor.float()).item()) - def test_get_stat_info_int_async(self): - tensor = torch.tensor([1, 2, 3]) - result = self.processor.get_stat_info_async(tensor).stack_tensor_stat[1] - self.assertEqual(result[0].item(), 3.0) - self.assertEqual(result[1].item(), 1.0) - self.assertEqual(result[2].item(), 2.0) - self.assertEqual(result[3].item(), torch.norm(tensor.float()).item()) def test_get_stat_info_empty(self): tensor = torch.tensor([]) @@ -120,12 +105,6 @@ class TestPytorchDataProcessor(unittest.TestCase): self.assertIsNone(result.mean) self.assertIsNone(result.norm) - def test_get_stat_info_bool_async(self): - tensor = torch.tensor([True, False, True]) - result = self.processor.get_stat_info_async(tensor).stack_tensor_stat[1] - self.assertEqual(result[0].item(), True) - self.assertEqual(result[1].item(), False) - def test_get_stat_info_with_scalar_tensor(self): scalar_tensor = torch.tensor(42.0) result = PytorchDataProcessor.get_stat_info(scalar_tensor) -- Gitee From e727c7e6524bf0a517168f7856a7c2f82cf26a86 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 18 Mar 2025 20:09:28 +0800 Subject: [PATCH 303/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/mindspore_processor.py | 2 +- .../msprobe/core/data_dump/data_processor/pytorch_processor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 60a61879c65..848347a50e4 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -94,7 +94,7 @@ class MindsporeDataProcessor(BaseDataProcessor): logger.warning("Async dump do not support complex data!") return tensor_stat else: - if not ops.is_floating_point(data) or data.dtype == ms.float64: + if not data.dtype == ms.float64 or ops.is_floating_point(data): data = data.to(ms.float32) get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm tensor_stat.max = mint.max(data) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 471b0d449fd..92f2ed06e42 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -128,7 +128,7 @@ class PytorchDataProcessor(BaseDataProcessor): elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data else: - if not data.is_floating_point() or data.dtype == torch.float64: + if not data.dtype == torch.float64 or data.is_floating_point(): data = data.float() tensor_stat.max = torch.max(data) tensor_stat.min = torch.min(data) -- Gitee From ce56f4d0a1d74edb436a8fa3f97a0283fe48ad9c Mon Sep 17 00:00:00 2001 From: lcw Date: Tue, 11 Mar 2025 16:51:18 +0800 Subject: [PATCH 304/333] =?UTF-8?q?=E3=80=90feature=E3=80=91config=5Fcheck?= =?UTF-8?q?ing=20=E6=94=AF=E6=8C=81=E8=B6=85=E5=8F=82=E6=AF=94=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/const.py | 1 + .../config_checking/checkers/__init__.py | 1 + .../config_checking/checkers/base_checker.py | 1 + .../checkers/hyperparameter_checker.py | 218 ++++++++++++++++++ .../pytorch/config_checking/config_checker.py | 7 +- .../config_checking/config_checking.py | 6 +- .../test/pytorch_ut/config_checking/bench.sh | 25 ++ .../test/pytorch_ut/config_checking/cmp.sh | 25 ++ .../config_checking/test_config_checking.py | 34 +-- 9 files changed, 296 insertions(+), 22 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/bench.sh create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/cmp.sh diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index bdb2353c3b9..0855942857e 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -167,6 +167,7 @@ class Const: LEFT_MOVE_INDEX = -1 RIGHT_MOVE_INDEX = 1 LAST_INDEX = -1 + MAX_TRAVERSAL_DEPTH = 5 TOP_LAYER = "TopLayer" CELL = "Cell" diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py index 47e5e614716..bc698ff7cee 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/__init__.py @@ -20,6 +20,7 @@ import msprobe.pytorch.config_checking.checkers.pip_checker import msprobe.pytorch.config_checking.checkers.checkpoint_checker import msprobe.pytorch.config_checking.checkers.dataset_checker import msprobe.pytorch.config_checking.checkers.weights_checker +import msprobe.pytorch.config_checking.checkers.hyperparameter_checker from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py index 7eb0babb9d4..fb6c36938cc 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/base_checker.py @@ -27,6 +27,7 @@ class PackInput: self.need_env_args = config_dict.get("env args", None) self.need_pip_data = config_dict.get("pip data", None) self.output_zip_path = config_dict.get("output zip path", "./config_check_pack.zip") + self.shell_path = config_dict.get("shell path", None) self.model = model self.check_input_params() diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py new file mode 100644 index 00000000000..dd0ae266855 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/checkers/hyperparameter_checker.py @@ -0,0 +1,218 @@ +# Copyright (c) 2025-2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import re +import tempfile +from difflib import SequenceMatcher + +from typing import Union, List, Dict, Any + +from msprobe.pytorch.config_checking.checkers.base_checker import BaseChecker +from msprobe.pytorch.config_checking.config_checker import register_checker_item +from msprobe.pytorch.config_checking.utils.utils import compare_dict, config_checking_print +from msprobe.core.common.file_utils import (os_walk_for_files, create_file_in_zip, load_json, create_file_with_list, + FileOpen) +from msprobe.core.common.const import FileCheckConst, Const + + +@register_checker_item("hyperparameter") +class HyperparameterChecker(BaseChecker): + input_needed = "shell_path" + target_name_in_zip = "hyperparameters" + result_filename = "hyperparameter_diff.txt" + + PARAMETER_NAME_MAPPING = { + "learning_rate": ["lr", "learningrate"], + "batch_size": ["batch", "bs", "batch_size_per_gpu"], + "epochs": ["num_epochs", "max_epochs", "epoch"], + "weight_decay": ["wd", "weightdecay"], + "dropout_rate": ["dropout", "drop_rate"], + } + + @staticmethod + def pack(pack_input): + shell_path = pack_input.shell_path + output_zip_path = pack_input.output_zip_path + + if not isinstance(shell_path, list): + raise TypeError("shell_path should be a list of file paths.") + + for index, script_path in enumerate(shell_path): + if os.path.isfile(script_path): + hyperparameters = HyperparameterChecker._extract_hyperparameters_from_script(script_path) + if hyperparameters: + create_file_in_zip(output_zip_path, os.path.join(HyperparameterChecker.target_name_in_zip, + HyperparameterChecker.target_name_in_zip + + Const.REPLACEMENT_CHARACTER + str(index) + + FileCheckConst.JSON_SUFFIX), + json.dumps(hyperparameters, indent=4)) + config_checking_print(f"add hyperparameters args to zip") + else: + config_checking_print(f"Warning: Failed to extract hyperparameters from script {script_path}") + else: + config_checking_print(f"Warning: Script path {script_path} is not a file.") + + @staticmethod + def compare(bench_dir, cmp_dir, output_path): + bench_model_dir = os.path.join(bench_dir, HyperparameterChecker.target_name_in_zip) + cmp_model_dir = os.path.join(cmp_dir, HyperparameterChecker.target_name_in_zip) + output_filepath = os.path.join(output_path, HyperparameterChecker.result_filename) + bench_hyperparameters = HyperparameterChecker.load_hyperparameters(bench_model_dir) + cmp_hyperparameters = HyperparameterChecker.load_hyperparameters(cmp_model_dir) + + if len(bench_hyperparameters) != len(cmp_hyperparameters): + config_checking_print("The shell path length dose not match!") + raise Exception("The shell path length dose not match!") + + all_diffs = [] + all_files = set(bench_hyperparameters.keys()) | set(cmp_hyperparameters.keys()) + + for filename in all_files: + bench_params = bench_hyperparameters.get(filename, {}) + cmp_params = cmp_hyperparameters.get(filename, {}) + + if bench_params and cmp_params: + all_diffs.extend(HyperparameterChecker.compare_param(bench_params, cmp_params, filename)) + + elif bench_params is not None: + all_diffs.append(f"[Only in benchmark] File: {filename}") + else: + all_diffs.append(f"[Only in compare] File: {filename}") + create_file_with_list(all_diffs, output_filepath) + + @staticmethod + def compare_param(bench_params, cmp_params, filename): + all_diffs = [] + file_diffs = [] + bench_param_names = bench_params.keys() + for bench_param_name in bench_param_names: + matched_cmp_param_name = HyperparameterChecker._fuzzy_match_parameter(bench_param_name, cmp_params) + if matched_cmp_param_name: + bench_param_value = bench_params[bench_param_name] + cmp_param_value = cmp_params[matched_cmp_param_name] + if bench_param_value != cmp_param_value: + diff = compare_dict({bench_param_name: bench_param_value}, + {matched_cmp_param_name: cmp_param_value}) + if diff: + file_diffs.extend( + [f" Parameter '{bench_param_name}' (matched with '{matched_cmp_param_name}'): {d}" + for d in diff]) + del cmp_params[matched_cmp_param_name] + else: + file_diffs.append( + f" [Only in benchmark] Parameter: '{bench_param_name}': {bench_params[bench_param_name]}") + for cmp_param_name, cmp_param_value in cmp_params.items(): + file_diffs.append(f" [Only in compare] Parameter: '{cmp_param_name}': {cmp_param_value}") + if file_diffs: + file_diffs.sort() + all_diffs.append(f"File: {filename}") + all_diffs.extend(file_diffs) + return all_diffs + + @staticmethod + def load_hyperparameters(model_dir): + hyperparameters = {} + if not os.path.exists(model_dir): + return hyperparameters + subfiles = os_walk_for_files(model_dir, Const.MAX_TRAVERSAL_DEPTH) + for files in subfiles: + if files["file"].endswith(FileCheckConst.JSON_SUFFIX): + filepath = os.path.join(files["root"], files["file"]) + relative_filepath = os.path.relpath(filepath, model_dir) + params = load_json(filepath) + if params: + hyperparameters[relative_filepath] = params + return hyperparameters + + @staticmethod + def _extract_hyperparameters_from_script(script_path: str) -> Dict[str, Any]: + """ + Extracts arguments from bash script used to run a model training. + """ + hyperparameters = {} + script_content_list = [] + with FileOpen(script_path, 'r') as file: + for line in file: + stripped_line = line.lstrip() + if not stripped_line.startswith('#'): + line = line.split('#')[0].rstrip() + '\n' + if line.strip(): + script_content_list.append(line) + script_content = ''.join(script_content_list) + + command_line = re.search(r'torchrun\s[^|]*|python -m torch.distributed.launch\s[^|]*', script_content, + re.DOTALL) + if command_line: + command_line = command_line.group() + + blocks = re.findall(r'([a-zA-Z0-9_]{1,20}_ARGS)="(.*?)"', script_content, re.DOTALL) + block_contents = {} + for block_name, block_content in blocks: + block_content = block_content.replace('\n', ' ') + block_contents[block_name] = block_content + command_line = command_line.replace(f"${block_name}", block_content) + + matches = re.findall(r'--([\w-]+)(?:\s+([^\s\\]+))?', command_line) + for match in matches: + key, value = match + args_key = re.match(r'\$\{?(\w+)}?', value) + if args_key: + env_vars = re.findall(rf'{args_key.group(1)}=\s*(.+)', script_content) + if env_vars: + value = env_vars[-1] + hyperparameters[key] = value if value else True + + return hyperparameters + + @staticmethod + def _fuzzy_match_parameter(param_name: str, available_params: Dict[str, Any]) -> Union[str, None]: + """ + Fuzzy matches a parameter name against available parameter names using predefined + mappings and string similarity. + """ + if param_name in available_params: + return param_name + + canonical_name = None + for standard_name, aliases in HyperparameterChecker.PARAMETER_NAME_MAPPING.items(): + if param_name == standard_name or param_name in aliases: + canonical_name = standard_name + break + + if canonical_name: + if canonical_name in available_params: + return canonical_name + for alias in HyperparameterChecker.PARAMETER_NAME_MAPPING[canonical_name]: + if alias in available_params: + config_checking_print( + f"Matched '{param_name}' to alias '{alias}' via canonical name '{canonical_name}'") + return alias + + best_match_name = None + best_match_ratio = 0.8 + for available_param_name in available_params: + ratio = SequenceMatcher(None, param_name.lower(), available_param_name.lower()).ratio() + if ratio > best_match_ratio: + best_match_ratio = ratio + best_match_name = available_param_name + + if best_match_name: + config_checking_print( + f"Fuzzy matched parameter '{param_name}' to '{best_match_name}' (similarity: {best_match_ratio:.2f})") + return best_match_name + + return None diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py index 4b541e92189..619c4687e74 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checker.py @@ -19,7 +19,7 @@ import shutil import torch import torch.distributed as dist -from msprobe.core.common.file_utils import load_json, split_zip_file_path, create_directory, extract_zip +from msprobe.core.common.file_utils import load_json, split_zip_file_path, create_directory, extract_zip, make_dir from msprobe.pytorch.config_checking.checkers.base_checker import PackInput from msprobe.pytorch.config_checking.utils.utils import config_checking_print @@ -56,13 +56,14 @@ class ConfigChecker: config_checking_print(f"extract zip file {cmp_zip_path} to {cmp_dir}") output_dir = os.path.join(outpath, "output") - os.mkdir(output_dir) + make_dir(output_dir) for checker in ConfigChecker.checkers.values(): checker.compare_ex(bench_dir, cmp_dir, output_dir) - config_checking_print(f"config checking result save to {output_dir}") + config_checking_print(f"config checking result save to {os.path.realpath(output_dir)}") def pack(self): + config_checking_print(f"pack result zip path {os.path.realpath(self.pack_input.output_zip_path)}") if dist.is_initialized() and dist.get_rank() == 0: config_checking_print(f"pack result zip path {self.pack_input.output_zip_path}") if os.path.exists(self.pack_input.output_zip_path): diff --git a/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py index 87d16747d61..414bc5ae6e1 100644 --- a/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py +++ b/debug/accuracy_tools/msprobe/pytorch/config_checking/config_checking.py @@ -26,7 +26,7 @@ def compare(bench_zip_path, cmp_zip_path, outpath): def _config_checking_parser(parser): - parser.add_argument('-p', '--pack', help='Pack a directory into a zip file') + parser.add_argument('-pack', '--pack', help='Pack a directory into a zip file') parser.add_argument('-c', '--compare', nargs=2, help='Compare two zip files') parser.add_argument('-o', '--output', help='output path, default is current directory') @@ -38,5 +38,5 @@ def _run_config_checking_command(args): output_dirpath = args.output if args.output else "./config_check_result" compare(args.compare[0], args.compare[1], output_dirpath) else: - logger.error("The param is not correct, you need to give '-p' for pack or '-c' for compare.") - raise Exception("The param is not correct, you need to give '-p' for pack or '-c' for compare.") + logger.error("The param is not correct, you need to give '-pack' for pack or '-c' for compare.") + raise Exception("The param is not correct, you need to give '-pack' for pack or '-c' for compare.") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/bench.sh b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/bench.sh new file mode 100644 index 00000000000..217676ef0f4 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/bench.sh @@ -0,0 +1,25 @@ +MASTER_PORT=6000 +NNODES=1 +NODE_RANK=0 +CKPT_SAVE_DIR="your model save ckpt path" +DATA_PATH="your data path" +TOKENIZER_MODEL="your tokenizer path" +CKPT_LOAD_DIR="your model ckpt path" +TP=1 + +DISTRIBUTED_ARGS=" + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --tensor-model-parallel-size ${TP} \ + --sequence-parallel \ + --tokenizer-model ${TOKENIZER_MODEL} \ +" + +torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \ + $GPT_ARGS \ + --distributed-backend nccl \ + --load $CKPT_LOAD_DIR \ + --save $CKPT_SAVE_DIR \ + | tee logs/train_llama2_7b.log \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/cmp.sh b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/cmp.sh new file mode 100644 index 00000000000..8df9e650797 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/cmp.sh @@ -0,0 +1,25 @@ +MASTER_PORT=6001 +NNODES=1 +NODE_RANK=0 +CKPT_SAVE_DIR="./aaa" +DATA_PATH="./aaa" +TOKENIZER_MODEL="./aaa" +CKPT_LOAD_DIR="./aaa" +TP=2 + +DISTRIBUTED_ARGS=" + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --tensor-model-parallel-size ${TP} \ + --sequence-parallel \ + --tokenizer-model ${TOKENIZER_MODEL} \ +" + +torchrun $DISTRIBUTED_ARGS pretrain_gpt.py \ + $GPT_ARGS \ + --distributed-backend nccl \ + --load $CKPT_LOAD_DIR \ + --save $CKPT_SAVE_DIR \ + | tee logs/train_llama2_7b.log \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py index 9c3f98e6210..95d6b7dbd7f 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/config_checking/test_config_checking.py @@ -6,8 +6,6 @@ import torch import json import numpy as np import torch.nn as nn -from multiprocessing import Process -from time import sleep from msprobe.pytorch.config_checking.config_checker import ConfigChecker testdir = os.path.dirname(__file__) @@ -67,18 +65,15 @@ def train_test(config_dict, seed, mock_env, mock_pip): json.dump(config_dict, open(config_path, 'w', encoding='utf-8')) ConfigChecker(config_path, test_module) - for input_data, label in get_test_dataset(): - output = test_module(input_data) - loss = loss_fun(output, label) - optimizer.zero_grad() - loss.backward() - optimizer.step() - - -def process_train_test(config_dict, seed=1234): - p = Process(target=train_test, args=(config_dict, seed)) - p.start() - sleep(3) + try: + for input_data, label in get_test_dataset(): + output = test_module(input_data) + loss = loss_fun(output, label) + optimizer.zero_grad() + loss.backward() + optimizer.step() + except Exception: + pass class TestConfigChecker(unittest.TestCase): @@ -89,16 +84,18 @@ class TestConfigChecker(unittest.TestCase): config_dict1 = { "env args": True, "pip data": True, + "shell path": [os.path.join(testdir, "cmp.sh")], "output zip path": os.path.join(temp_dir, "config_check_pack1.zip") } - process_train_test(config_dict1, 1234) + train_test(config_dict1, 1234) config_dict2 = { "env args": True, "pip data": True, + "shell path": [os.path.join(testdir, "bench.sh")], "output zip path": os.path.join(temp_dir, "config_check_pack2.zip") } - process_train_test(config_dict2, 1233) + train_test(config_dict2, 1233) ConfigChecker.compare(config_dict1["output zip path"], config_dict2["output zip path"], @@ -107,4 +104,9 @@ class TestConfigChecker(unittest.TestCase): compare_output_dir = os.path.join(temp_dir, "compare_output", "output") with open(os.path.join(compare_output_dir, "pip_data_check_result.txt"), 'r', encoding='utf-8') as file: lines = file.readlines() + with open(os.path.join(compare_output_dir, "hyperparameter_diff.txt"), 'r', encoding='utf-8') as file: + hyperparameter_diff = file.readlines() self.assertEqual(lines[1], " package_name:transformers, npu_version:0.0.1, bench_version:0.0.2\n") + self.assertEqual(len(hyperparameter_diff), 6) + self.assertEqual(hyperparameter_diff[1], + ' Parameter \'load\' (matched with \'load\'): load: "./aaa" -> "your model ckpt path"\n') -- Gitee From 09ef0d26da9631d58c9acd883f87c2177e5a5447 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Tue, 18 Mar 2025 20:29:05 +0800 Subject: [PATCH 305/333] add wqc01202410 to approvers --- debug/OWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/OWNERS b/debug/OWNERS index 0bda9243569..d15a8cdab8a 100644 --- a/debug/OWNERS +++ b/debug/OWNERS @@ -4,6 +4,7 @@ approvers: - wangchao285 - kun_8 - brightlyking +- wqc01202410 reviewers: - lv-kaimeng - TAJh @@ -13,4 +14,3 @@ reviewers: - louyujing - yang_chen_2001_02_14 - shawnzhu1 -- wqc01202410 -- Gitee From d3c3c7a28c427ee99109effde5195ce030e53fe9 Mon Sep 17 00:00:00 2001 From: jiandaobao Date: Tue, 18 Mar 2025 20:30:20 +0800 Subject: [PATCH 306/333] undo deter --- .../api_accuracy_checker/run_ut/distributed_bench_function.py | 2 ++ .../api_accuracy_checker/run_ut/run_distributed_check.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py index c78c1957e83..18ff05bc00c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/distributed_bench_function.py @@ -25,6 +25,8 @@ from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import get_distrib def sort_all_input(inputs): ranks = len(inputs) + if ranks <= 1: + return inputs combined_tensor = torch.stack(inputs) sorted_indices = torch.argsort(combined_tensor, descending=True, dim=0) combined_tensor = torch.gather(combined_tensor, 0, sorted_indices) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py index cb00bd8acd9..54f3790bbc0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_distributed_check.py @@ -44,7 +44,6 @@ api_register.initialize_hook(None) distribute_api_key = Const.PT_FRAMEWORK + Const.SEP + Const.PT_API_TYPE_DIST distributed_func = api_register.ori_api_attr.get(distribute_api_key, {}) -os.environ['HCCL_DETERMINISTIC'] = str(True) current_time = time.strftime("%Y%m%d%H%M%S") RESULT_FILE_NAME = "accuracy_checking_result_" + current_time + ".csv" RESULT_CSV_HEADER = [['API_NAME', 'RANK', 'COMPARE_RESULT', 'MESSAGE']] -- Gitee From 8b32ab3cff30abeeecb7db9a50da3035baa2ec95 Mon Sep 17 00:00:00 2001 From: jiandaobao Date: Tue, 18 Mar 2025 20:31:46 +0800 Subject: [PATCH 307/333] add ut --- .../compare/test_algorithm.py | 42 ++++++++++++++++++ .../run_ut/test_distributed_bench_function.py | 43 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py index 377a29f2237..8ce876c496a 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py @@ -208,3 +208,45 @@ class TestAlgorithmMethods(unittest.TestCase): ulp_err = alg.calc_ulp_err(self.bench_data, self.device_data, eb, exponent_num, data_type) expected_ulp_err = (self.device_data.astype(data_type) - self.bench_data).astype(data_type) * np.exp2(-eb + exponent_num) self.assertTrue(np.allclose(ulp_err, expected_ulp_err)) + + +class TestKahanLossRange(unittest.TestCase): + + def setUp(self): + self.cumsum = torch.tensor( + [[1000, 1, 10], [30, 20, 10]], dtype=torch.bfloat16) + self.addend = torch.tensor([[3], [0.1]], dtype=torch.bfloat16) + self.tensors = [ + torch.tensor([1000], dtype=torch.bfloat16), + torch.tensor([1004], dtype=torch.bfloat16), + torch.tensor([103], dtype=torch.bfloat16), + torch.tensor([4], dtype=torch.bfloat16)] + + def test_kahan_loss_positive(self): + # 测试最大化需要补偿的正损失, loss_res为历史损失中最大值,且mask会遮蔽小于0的部分 + loss_res, mask = alg.maximize_kahan_loss(self.cumsum, self.addend, negative=False) + expected_loss = torch.tensor([[1], [-0.1001]], dtype=torch.bfloat16) + expected_mask = expected_loss >= 0 + self.assertTrue(torch.equal(loss_res, expected_loss)) + self.assertTrue(torch.equal(mask, expected_mask)) + + def test_kahan_loss_negative(self): + # 测试最大化需要补偿的负损失, loss_res为历史损失中最小值,且mask会遮蔽大于0的部分 + loss_res, mask = alg.maximize_kahan_loss(self.cumsum, self.addend, negative=True) + expected_loss = torch.tensor([[0], [-0.1001]], dtype=torch.bfloat16) + expected_mask = expected_loss <= 0 + self.assertTrue(torch.equal(loss_res, expected_loss)) + self.assertTrue(torch.equal(mask, expected_mask)) + + def test_kahan_range_empty_list(self): + # 测试输入为空列表的情况 + with self.assertRaises(ValueError): + alg.kahan_range([], negative=self.negative) + + def test_kahan_range_min_max(self): + min_ = alg.kahan_range(self.tensors, negative=False) + max_ = alg.kahan_range(self.tensors, negative=False) + expected_min = torch.tensor(2096) + expected_max = torch.tensor(2112) + self.assertTrue(torch.equal(min_, expected_min)) + self.assertTrue(torch.equal(max_, expected_max)) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py new file mode 100644 index 00000000000..be5a6f67116 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py @@ -0,0 +1,43 @@ +import torch +import unittest + +from msprobe.pytorch.api_accuracy_checker.run_ut.distributed_bench_function import sort_all_input + +class TestSortAllInput(unittest.TestCase): + def setUp(self): + self.inputs = [ + torch.tensor([3.0, 2.0, 1.0]), + torch.tensor([6.0, 5.0, 4.0]), + torch.tensor([9.0, 8.0, 7.0]) + ] + + def test_normal_case(self): + # 测试正常情况 + sorted_inputs = sort_all_input(self.inputs) + expected_sorted_inputs = [ + torch.tensor([9.0, 8.0, 7.0]), + torch.tensor([6.0, 5.0, 4.0]), + torch.tensor([3.0, 2.0, 1.0]) + ] + for result, expected in zip(sorted_inputs, expected_sorted_inputs): + self.assertTrue(torch.equal(result, expected)) + + def test_single_tensor(self): + # 测试只有一个张量的情况 + single_input = [torch.tensor([2.0])] + sorted_inputs = sort_all_input(single_input) + self.assertTrue(torch.equal(sorted_inputs[0], single_input[0])) + + def test_empty_list(self): + # 测试输入为空列表的情况 + with self.assertRaises(IndexError): + sort_all_input([]) + + def test_inconsistent_shapes(self): + # 测试输入张量形状不一致的情况 + inconsistent_inputs = [ + torch.tensor([3.0, 2.0, 1.0]), + torch.tensor([6.0, 5.0]) + ] + with self.assertRaises(RuntimeError): + sort_all_input(inconsistent_inputs) -- Gitee From da951c63a0a2739110cb62e44cc94b51b4723bcb Mon Sep 17 00:00:00 2001 From: curry3 <485078529@qq.com> Date: Mon, 17 Mar 2025 10:08:04 +0800 Subject: [PATCH 308/333] =?UTF-8?q?=E3=80=90feature=E3=80=91msprobe=20pyto?= =?UTF-8?q?rch=20dump=E6=94=AF=E6=8C=81FP8=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/common/exceptions.py | 4 +- .../data_processor/pytorch_processor.py | 40 ++++++++--- .../msprobe/pytorch/common/utils.py | 18 ++++- .../dump/module_dump/module_processer.py | 4 +- .../pytorch/hook_module/hook_module.py | 4 +- .../data_processor/test_pytorch_processor.py | 10 +++ .../test/pytorch_ut/common/test_pt_utils.py | 72 ++++++++++++------- 7 files changed, 112 insertions(+), 40 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/exceptions.py b/debug/accuracy_tools/msprobe/core/common/exceptions.py index d71d30224b6..252860aee75 100644 --- a/debug/accuracy_tools/msprobe/core/common/exceptions.py +++ b/debug/accuracy_tools/msprobe/core/common/exceptions.py @@ -28,12 +28,14 @@ class MsprobeException(CodedException): OVERFLOW_NUMS_ERROR = 1 RECURSION_LIMIT_ERROR = 2 INTERFACE_USAGE_ERROR = 3 + UNSUPPORTED_TYPE_ERROR = 4 err_strs = { INVALID_PARAM_ERROR: "[msprobe] 无效参数:", OVERFLOW_NUMS_ERROR: "[msprobe] 超过预设溢出次数 当前溢出次数:", RECURSION_LIMIT_ERROR: "[msprobe] 递归调用超过限制:", - INTERFACE_USAGE_ERROR: "[msprobe] Invalid interface usage: " + INTERFACE_USAGE_ERROR: "[msprobe] Invalid interface usage: ", + UNSUPPORTED_TYPE_ERROR: "[msprobe] Unsupported type: " } diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 66523da9c55..a8289d58a66 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -24,14 +24,14 @@ from torch import distributed as dist from torch.distributed.distributed_c10d import _get_default_group from msprobe.core.common.const import Const +from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common.file_utils import path_len_exceeds_limit from msprobe.core.common.log import logger -from msprobe.core.common.utils import convert_tuple +from msprobe.core.common.utils import convert_tuple, recursion_depth_decorator from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, ModuleBackwardInputsOutputs, \ ModuleForwardInputsOutputs, TensorStatInfo -from msprobe.pytorch.common.utils import save_pt, load_pt +from msprobe.pytorch.common.utils import Const as PtConst, save_pt, is_hifloat8_tensor, is_float8_tensor from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow -from msprobe.core.common.utils import recursion_depth_decorator is_gpu = False try: @@ -214,6 +214,18 @@ class PytorchDataProcessor(BaseDataProcessor): logger.warning(f"Failed to get value of torch.distributed.ReduceOp with error info: {e}.") return {"type": "torch.distributed.ReduceOp", "value": op_type} + @staticmethod + def _cast_to_float_if_fp8(tensor): + dtype = str(tensor.dtype) + if is_float8_tensor(tensor): + dtype = PtConst.HIFLOAT8_TYPE if is_hifloat8_tensor(tensor) else dtype + logger.debug( + f"The {dtype} tensor analyzing/saving is unsupported in dump function." + f"Casting to float for processing." + ) + tensor = tensor.float() + return tensor, dtype + @classmethod def get_special_types(cls): return super().get_special_types() + cls.pytorch_special_type @@ -263,10 +275,11 @@ class PytorchDataProcessor(BaseDataProcessor): return p2pop_info def _analyze_tensor(self, tensor, suffix): + tensor, dtype = self._cast_to_float_if_fp8(tensor) tensor_stat = self.get_stat_info(tensor, self.config.async_dump) tensor_json = {} tensor_json.update({'type': 'torch.Tensor'}) - tensor_json.update({'dtype': str(tensor.dtype)}) + tensor_json.update({'dtype': dtype}) tensor_json.update({"shape": tensor.shape}) if tensor_stat.stack_tensor_stat is None: tensor_json.update({"Max": tensor_stat.max}) @@ -305,6 +318,7 @@ class TensorDataProcessor(PytorchDataProcessor): dump_data_name, file_path = self.get_save_file_path(suffix) single_arg = super()._analyze_tensor(tensor, suffix) single_arg.update({"data_name": dump_data_name}) + tensor, _ = self._cast_to_float_if_fp8(tensor) if self.config.async_dump: self._async_dump_cache[file_path] = tensor.clone().detach() else: @@ -383,7 +397,8 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): self._analyze_maybe_overflow_flag() if self.has_overflow: for file_path, tensor in self.cached_tensors_and_file_paths.items(): - save_pt(tensor, file_path) + tensor, _ = self._cast_to_float_if_fp8(tensor) + save_pt(tensor.clone().contiguous().detach(), file_path) self.real_overflow_nums += 1 if self.overflow_nums != -1 and self.real_overflow_nums >= self.overflow_nums: logger.info(f"[{Const.TOOL_NAME}] Reached the preset overflow times, " @@ -508,11 +523,13 @@ class KernelDumpDataProcessor(PytorchDataProcessor): return if self.config.is_backward_kernel_dump: - self.forward_args = self.clone_and_detach_tensor(module_input_output.args) - self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs) try: + self.forward_args = self.clone_and_detach_tensor(module_input_output.args) + self.forward_kwargs = self.clone_and_detach_tensor(module_input_output.kwargs) output = module.forward(*self.forward_args, **self.forward_kwargs) - except Exception: + except Exception as e: + if isinstance(e, MsprobeException): + logger.warning(str(e)) self._print_unsupported_log(name) self.enable_kernel_dump = False return @@ -557,6 +574,11 @@ class KernelDumpDataProcessor(PytorchDataProcessor): @recursion_depth_decorator("KernelDump: KernelDumpDataProcessor.clone_and_detach_tensor") def clone_and_detach_tensor(self, input_params): if isinstance(input_params, torch.Tensor): + if is_float8_tensor(input_params): + raise MsprobeException( + MsprobeException.UNSUPPORTED_TYPE_ERROR, + f"L2 backward dump does not support float8 type." + ) if input_params.requires_grad: return input_params.clone().detach().requires_grad_() return input_params.clone() @@ -571,6 +593,8 @@ class KernelDumpDataProcessor(PytorchDataProcessor): def analyze_single_element(self, element, suffix_stack): if isinstance(element, torch.Tensor): + if is_float8_tensor(element): + return {} if not self.is_found_output_tensor: if element.requires_grad: self.forward_output_tensor = element diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index 40360ef44c3..2191e545287 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -261,6 +261,10 @@ class Const: NPU = 'NPU' DISTRIBUTED = 'Distributed' + HIFLOAT8_TYPE = "torch_npu.HiFloat8Tensor" + FLOAT8_E5M2_TYPE = "torch.float8_e5m2" + FLOAT8_E4M3FN_TYPE = "torch.float8_e4m3fn" + RAISE_PRECISION = { torch.float16: torch.float32, torch.bfloat16: torch.float32, @@ -473,3 +477,15 @@ def replace_last_occurrence(text, old, new): if index != -1: return text[:index] + text[index:].replace(old, new, 1) return text + + +def is_hifloat8_tensor(tensor): + if not is_gpu and hasattr(torch_npu, "HiFloat8Tensor") and isinstance(tensor, torch_npu.HiFloat8Tensor): + return True + return False + + +def is_float8_tensor(tensor): + if str(tensor.dtype) in [Const.FLOAT8_E5M2_TYPE, Const.FLOAT8_E4M3FN_TYPE]: + return True + return is_hifloat8_tensor(tensor) diff --git a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py index c1c433276fb..37611f4db32 100644 --- a/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py +++ b/debug/accuracy_tools/msprobe/pytorch/dump/module_dump/module_processer.py @@ -20,7 +20,7 @@ from msprobe.core.common.const import Const from msprobe.core.common.utils import recursion_depth_decorator from msprobe.core.data_dump.scope import BaseScope, ModuleRangeScope, MixRangeScope from msprobe.pytorch.common.log import logger -from msprobe.pytorch.common.utils import replace_last_occurrence +from msprobe.pytorch.common.utils import replace_last_occurrence, is_float8_tensor from torch.utils.checkpoint import checkpoint as origin_checkpoint from torch.utils.checkpoint import set_checkpoint_early_stop from torch.utils.hooks import BackwardHook @@ -61,7 +61,7 @@ class ModuleProcesser: @staticmethod @recursion_depth_decorator("ModuleDump: ModuleProcesser.clone_if_tensor") def clone_if_tensor(result): - if isinstance(result, torch.Tensor): + if isinstance(result, torch.Tensor) and not is_float8_tensor(result): return result.clone() elif type(result) is tuple: return tuple(ModuleProcesser.clone_if_tensor(x) for x in result) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py index 1eba9897b08..dccf9c7a922 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py @@ -21,6 +21,8 @@ import torch import torch.nn as nn import torch.utils.hooks as full_hooks +from msprobe.pytorch.common.utils import is_float8_tensor + torch_version_above_or_equal_2 = torch.__version__.split('+')[0] >= '2.0' @@ -111,7 +113,7 @@ class HOOKModule(nn.Module): else: return result - if not (var.requires_grad and torch.is_grad_enabled()): + if is_float8_tensor(var) or not (var.requires_grad and torch.is_grad_enabled()): return result grad_fn = var.grad_fn diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py index 3d31a1bb516..bda61faadf7 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py @@ -335,6 +335,16 @@ class TestPytorchDataProcessor(unittest.TestCase): self.assertEqual(result['Max_except_inf_nan'], 1.0) self.assertEqual(result['Min_except_inf_nan'], 1.0) + def test_cast_to_float_if_fp8(self): + tensor = MagicMock() + tensor.dtype = "torch.float8_e5m2" + _, dtype = self.processor._cast_to_float_if_fp8(tensor) + self.assertEqual(dtype, "torch.float8_e5m2") + + tensor.dtype = "torch.float8_e4m3fn" + _, dtype = self.processor._cast_to_float_if_fp8(tensor) + self.assertEqual(dtype, "torch.float8_e4m3fn") + class TestTensorDataProcessor(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py index 42035932e56..0a25e6edf59 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/common/test_pt_utils.py @@ -13,20 +13,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import io +import os +import tempfile import unittest from unittest.mock import MagicMock, patch -import tempfile import torch import torch.distributed as dist - -from msprobe.core.common.file_utils import FileCheckConst from msprobe.core.common.exceptions import DistributedNotInitializedError +from msprobe.core.common.file_utils import FileCheckConst from msprobe.pytorch.api_accuracy_checker.common.utils import ApiData -from msprobe.pytorch.common.utils import parameter_adapter, get_rank_if_initialized, \ - get_tensor_rank, get_rank_id, print_rank_0, load_pt, save_pt, save_api_data, load_api_data, save_pkl, load_pkl +from msprobe.pytorch.common.utils import ( + parameter_adapter, + get_rank_if_initialized, + get_tensor_rank, + get_rank_id, + print_rank_0, + load_pt, + save_pt, + save_api_data, + load_api_data, + save_pkl, + load_pkl, + is_float8_tensor, + is_hifloat8_tensor +) class TestParameterAdapter(unittest.TestCase): @@ -201,6 +213,12 @@ class TestSavePT(unittest.TestCase): self.tensor = torch.tensor([1, 2, 3]) self.filepath = 'temp_tensor.pt' + def tearDown(self): + try: + os.remove(self.filepath) + except FileNotFoundError: + pass + @patch('msprobe.pytorch.common.utils.save_pt') @patch('os.path.realpath', return_value='temp_tensor.pt') @patch('msprobe.core.common.file_utils.check_path_before_create') @@ -208,21 +226,6 @@ class TestSavePT(unittest.TestCase): def test_save_pt_success(self, mock_change_mode, mock_check_path, mock_realpath, mock_torch_save): mock_torch_save(self.tensor, self.filepath) mock_torch_save.assert_called_once_with(self.tensor, self.filepath) - mock_change_mode.assert_called_once_with(self.filepath, FileCheckConst.DATA_FILE_AUTHORITY) - -class TestSavePT(unittest.TestCase): - - def setUp(self): - self.tensor = torch.tensor([1, 2, 3]) - self.filepath = 'temp_tensor.pt' - - @patch('torch.save') - @patch('os.path.realpath', return_value='temp_tensor.pt') - @patch('msprobe.core.common.file_utils.check_path_before_create') - @patch('msprobe.core.common.file_utils.change_mode') - def test_save_pt_success(self, mock_change_mode, mock_check_path, mock_realpath, mock_torch_save): - save_pt(self.tensor, self.filepath) - mock_torch_save.assert_called_once_with(self.tensor, self.filepath) @patch('torch.save', side_effect=Exception("Save failed")) @patch('os.path.realpath', return_value='temp_tensor.pt') @@ -233,12 +236,6 @@ class TestSavePT(unittest.TestCase): save_pt(self.tensor, self.filepath) self.assertIn("save pt file temp_tensor.pt failed", str(context.exception)) - def tearDown(self): - try: - os.remove(self.filepath) - except FileNotFoundError: - pass - class TestSaveApiData(unittest.TestCase): @@ -314,3 +311,24 @@ class TestSavePkl(unittest.TestCase): load_pkl(self.filepath) self.assertIn("Unsupported object type: os.system", str(context.exception)) os.remove(self.filepath) + +class TestFloat8Tensor(unittest.TestCase): + def setUp(self): + self.tensor = MagicMock() + + def test_is_float8_tensor(self): + self.tensor.dtype = "torch.float8_e5m2" + res = is_float8_tensor(self.tensor) + self.assertTrue(res) + + self.tensor.dtype = "torch.float8_e4m3fn" + res = is_float8_tensor(self.tensor) + self.assertTrue(res) + + def test_is_not_float8_tensor(self): + self.tensor.dtype = 123 + res = is_float8_tensor(self.tensor) + self.assertFalse(res) + + res = is_hifloat8_tensor(self.tensor) + self.assertFalse(res) -- Gitee From 867eecfd0f862fa9867c0d7464ba387ffb9ce5fe Mon Sep 17 00:00:00 2001 From: jiandaobao Date: Tue, 18 Mar 2025 21:07:50 +0800 Subject: [PATCH 309/333] adjust ut --- .../compare/test_algorithm.py | 28 +++++++++---------- .../run_ut/test_distributed_bench_function.py | 14 ---------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py index 8ce876c496a..f1cc0d31363 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/compare/test_algorithm.py @@ -214,8 +214,8 @@ class TestKahanLossRange(unittest.TestCase): def setUp(self): self.cumsum = torch.tensor( - [[1000, 1, 10], [30, 20, 10]], dtype=torch.bfloat16) - self.addend = torch.tensor([[3], [0.1]], dtype=torch.bfloat16) + [[1000, 30], [1, 20], [10, 10]], dtype=torch.bfloat16) + self.addend = torch.tensor([[3, 0.2]], dtype=torch.bfloat16) self.tensors = [ torch.tensor([1000], dtype=torch.bfloat16), torch.tensor([1004], dtype=torch.bfloat16), @@ -225,28 +225,28 @@ class TestKahanLossRange(unittest.TestCase): def test_kahan_loss_positive(self): # 测试最大化需要补偿的正损失, loss_res为历史损失中最大值,且mask会遮蔽小于0的部分 loss_res, mask = alg.maximize_kahan_loss(self.cumsum, self.addend, negative=False) - expected_loss = torch.tensor([[1], [-0.1001]], dtype=torch.bfloat16) + expected_loss = torch.tensor([1, 0.0498], dtype=torch.bfloat16) expected_mask = expected_loss >= 0 - self.assertTrue(torch.equal(loss_res, expected_loss)) - self.assertTrue(torch.equal(mask, expected_mask)) + self.assertTrue(torch.allclose(loss_res, expected_loss)) + self.assertTrue(torch.allclose(mask, expected_mask)) def test_kahan_loss_negative(self): # 测试最大化需要补偿的负损失, loss_res为历史损失中最小值,且mask会遮蔽大于0的部分 loss_res, mask = alg.maximize_kahan_loss(self.cumsum, self.addend, negative=True) - expected_loss = torch.tensor([[0], [-0.1001]], dtype=torch.bfloat16) + expected_loss = torch.tensor([0, -0.0127], dtype=torch.bfloat16) expected_mask = expected_loss <= 0 - self.assertTrue(torch.equal(loss_res, expected_loss)) - self.assertTrue(torch.equal(mask, expected_mask)) + self.assertTrue(torch.allclose(loss_res, expected_loss)) + self.assertTrue(torch.allclose(mask, expected_mask)) def test_kahan_range_empty_list(self): # 测试输入为空列表的情况 with self.assertRaises(ValueError): - alg.kahan_range([], negative=self.negative) + alg.kahan_range([]) def test_kahan_range_min_max(self): + max_ = alg.kahan_range(self.tensors, negative=True) min_ = alg.kahan_range(self.tensors, negative=False) - max_ = alg.kahan_range(self.tensors, negative=False) - expected_min = torch.tensor(2096) - expected_max = torch.tensor(2112) - self.assertTrue(torch.equal(min_, expected_min)) - self.assertTrue(torch.equal(max_, expected_max)) + expected_min = torch.tensor(2096, dtype=torch.bfloat16) + expected_max = torch.tensor(2112, dtype=torch.bfloat16) + self.assertTrue(torch.allclose(min_, expected_min)) + self.assertTrue(torch.allclose(max_, expected_max)) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py index be5a6f67116..0b21a9559e9 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_distributed_bench_function.py @@ -27,17 +27,3 @@ class TestSortAllInput(unittest.TestCase): single_input = [torch.tensor([2.0])] sorted_inputs = sort_all_input(single_input) self.assertTrue(torch.equal(sorted_inputs[0], single_input[0])) - - def test_empty_list(self): - # 测试输入为空列表的情况 - with self.assertRaises(IndexError): - sort_all_input([]) - - def test_inconsistent_shapes(self): - # 测试输入张量形状不一致的情况 - inconsistent_inputs = [ - torch.tensor([3.0, 2.0, 1.0]), - torch.tensor([6.0, 5.0]) - ] - with self.assertRaises(RuntimeError): - sort_all_input(inconsistent_inputs) -- Gitee From b75167fe3140732cdb75cfd55b56653b13245027 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Tue, 18 Mar 2025 23:16:41 +0800 Subject: [PATCH 310/333] add shawnzhu1 to approvers --- debug/OWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/OWNERS b/debug/OWNERS index d15a8cdab8a..bd8412edd67 100644 --- a/debug/OWNERS +++ b/debug/OWNERS @@ -5,6 +5,7 @@ approvers: - kun_8 - brightlyking - wqc01202410 +- shawnzhu1 reviewers: - lv-kaimeng - TAJh @@ -13,4 +14,3 @@ reviewers: - zhengxinqian - louyujing - yang_chen_2001_02_14 -- shawnzhu1 -- Gitee From 8286a445e4887fd278726ddd43a536a197e13a58 Mon Sep 17 00:00:00 2001 From: gitee Date: Wed, 19 Mar 2025 11:41:06 +0800 Subject: [PATCH 311/333] delete useless code --- .../msprobe/docs/07.accuracy_checker_PyTorch.md | 2 +- .../api_accuracy_checker/compare/api_precision_compare.py | 6 +----- .../pytorch/api_accuracy_checker/run_ut/multi_run_ut.py | 4 ---- .../api_accuracy_checker/run_ut/run_overflow_check.py | 6 +----- .../msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py | 6 ++++-- 5 files changed, 7 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/07.accuracy_checker_PyTorch.md b/debug/accuracy_tools/msprobe/docs/07.accuracy_checker_PyTorch.md index b07568e25a2..ba7f978b09a 100644 --- a/debug/accuracy_tools/msprobe/docs/07.accuracy_checker_PyTorch.md +++ b/debug/accuracy_tools/msprobe/docs/07.accuracy_checker_PyTorch.md @@ -107,7 +107,7 @@ msprobe -f pytorch multi_run_ut -api_info ./dump_path/step{step_number}/rank{ran | -save_error_data | 保存精度未达标的 API 输入输出数据。 | 否 | | -o 或 --out_path | 指定 run_ut 执行结果存盘路径,默认“./”。 | 否 | | -j 或 --jit_compile | 开启 jit 编译。 | 否 | -| -n | 同时执行 run_ut 线程的数量,默认为 8,最大支持 64,但每个 Device 最大支持 8 个线程。当指定多个线程和多个 Device 时,线程数在每张卡上均分。 | 否 | +| -n 或 --num_splits | 同时执行 run_ut 线程的数量,默认为 8,最大支持 64,但每个 Device 最大支持 8 个线程。当指定多个线程和多个 Device 时,线程数在每张卡上均分。 | 否 | | -d 或 --device | 指定 Device ID,选择 UT 代码运行所在的卡,默认值为 0,支持同时指定 0~7,共 8 个 Device。 | 否 | | -csv_path 或 --result_csv_path | 指定本次运行中断时生成的 `accuracy_checking_result_{timestamp}.csv` 文件路径,执行 run_ut 中断时,若想从中断处继续执行,配置此参数即可。需要指定为上次中断的 `accuracy_checking_result_{timestamp}.csv` 文件。详见 [3.3 断点续检](#33-断点续检)。 | run_ut 操作中断后继续执行场景下必须配置 | | -f 或 --filter_api | 过滤模型中除最大值和最小值以外其他参数和结构相同的 API。适用于模型较大且重复 API 较多的场景。 | 否 | diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py index 8f7db73b58f..cd60d8bc15f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py @@ -430,6 +430,7 @@ def _api_precision_compare(parser=None): _api_precision_compare_parser(parser) args = parser.parse_args(sys.argv[1:]) _api_precision_compare_command(args) + logger.info("Compare task completed.") def _api_precision_compare_command(args): @@ -457,8 +458,3 @@ def _api_precision_compare_parser(parser): parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, help=" The api precision compare task result out path.", required=False) - - -if __name__ == '__main__': - _api_precision_compare() - logger.info("Compare task completed.") diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index 3eb7fc0df96..1354e2dea17 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -224,7 +224,3 @@ def main(): args = parser.parse_args() config = prepare_config(args) run_parallel_ut(config) - - -if __name__ == '__main__': - main() diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py index 6214d892906..f0490ed62ed 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py @@ -161,6 +161,7 @@ def _run_overflow_check(parser=None): _run_overflow_check_parser(parser) args = parser.parse_args(sys.argv[1:]) _run_overflow_check_command(args) + logger.info("UT task completed.") def _run_overflow_check_command(args): @@ -175,8 +176,3 @@ def _run_overflow_check_command(args): logger.error(f"Set NPU device id failed. device id is: {args.device_id}") raise NotImplementedError from error run_overflow_check(api_info) - - -if __name__ == '__main__': - _run_overflow_check() - logger.info("UT task completed.") diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 51fe32de810..0f13f3a4980 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -84,6 +84,9 @@ tqdm_params = { } +seed_all() + + def run_ut(config): logger.info("start UT test") if config.online_config.is_online: @@ -582,9 +585,8 @@ def run_ut_command(args): } run_ut_config = checker_config.get_run_ut_config(**config_params) run_ut(run_ut_config) + logger.info("UT task completed.") if __name__ == '__main__': - seed_all() _run_ut() - logger.info("UT task completed.") -- Gitee From e62d32a0f2cd2609ef29826da4eb9ce4b86bdd6e Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Wed, 19 Mar 2025 14:15:50 +0800 Subject: [PATCH 312/333] compare remove files add warning --- debug/accuracy_tools/msprobe/core/compare/acc_compare.py | 4 +++- .../msprobe/core/compare/merge_result/merge_result.py | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index f2aa8c479ec..bd4126a8857 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -430,7 +430,9 @@ class Comparator: logger.info("Please check whether the input data belongs to you. If not, there may be security risks.") file_name = add_time_with_xlsx("compare_result" + suffix) file_path = os.path.join(os.path.realpath(output_path), file_name) - remove_path(file_path) + if os.path.exists(file_path): + logger.warning(f"{file_path} will be recovered") + remove_path(file_path) highlight_dict = {"red_rows": set(), "yellow_rows": set(), "red_lines": [], "yellow_lines": []} npu_json = input_param.get("npu_json_path") diff --git a/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py b/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py index b605bd59fca..c43958f40b2 100644 --- a/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py +++ b/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2024, Huawei Technologies Co., Ltd. +# Copyright (c) 2024-2025, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +21,8 @@ from functools import partial import pandas as pd from tqdm import tqdm -from msprobe.core.common.file_utils import load_yaml, logger, FileChecker, save_excel, read_xlsx, create_directory +from msprobe.core.common.file_utils import load_yaml, logger, FileChecker, save_excel, read_xlsx, create_directory, \ + remove_path from msprobe.core.common.const import FileCheckConst, Const, CompareConst from msprobe.core.common.utils import CompareException, add_time_with_xlsx from msprobe.core.compare.utils import table_value_is_valid @@ -329,6 +330,10 @@ def generate_merge_result(all_compare_index_dict_list, all_rank_num_list, all_co for i, df in enumerate(merge_df_list): # merge_df_list中df与compare_index_list中compare_index一一对应 final_result_df_list.append((df, compare_index_list[i])) + + if os.path.exists(output_path): + logger.warning(f"{output_path} will be recovered") + remove_path(output_path) save_excel(output_path, final_result_df_list) logger.info(f"The compare results of the multi-ranks are merged and saved in: {output_path}.") -- Gitee From 0801cdb4460baaff4631ae63bb581cb048e045ce Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 14:55:39 +0800 Subject: [PATCH 313/333] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/mindspore_processor.py | 2 +- .../msprobe/core/data_dump/data_processor/pytorch_processor.py | 2 +- debug/accuracy_tools/msprobe/mindspore/service.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 848347a50e4..7472298d9bf 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -94,7 +94,7 @@ class MindsporeDataProcessor(BaseDataProcessor): logger.warning("Async dump do not support complex data!") return tensor_stat else: - if not data.dtype == ms.float64 or ops.is_floating_point(data): + if data.dtype == ms.float64 or not ops.is_floating_point(data): data = data.to(ms.float32) get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm tensor_stat.max = mint.max(data) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 92f2ed06e42..6ae515f86a1 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -128,7 +128,7 @@ class PytorchDataProcessor(BaseDataProcessor): elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data else: - if not data.dtype == torch.float64 or data.is_floating_point(): + if data.dtype == torch.float64 or not data.is_floating_point(): data = data.float() tensor_stat.max = torch.max(data) tensor_stat.min = torch.min(data) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index b0dfb2809a5..1d30b19e311 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -349,7 +349,7 @@ class Service: self.primitive_switch = False self.start_call = False if self.config.async_dump and self.config.task == Const.TENSOR: - self.data_collector.data_processor.dump_async_data() + self.data_collector.data_processor.dump_async_data() self.data_collector.write_json() JitDump.jit_dump_switch = False -- Gitee From 28f6dbb378b10e4b500ed6e731baaab54274b0d5 Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Wed, 19 Mar 2025 15:34:58 +0800 Subject: [PATCH 314/333] fix doc mistake --- .../msprobe/docs/02.config_introduction.md | 2 +- .../msprobe/docs/06.data_dump_MindSpore.md | 9 +++++---- .../msprobe/docs/13.overflow_check_MindSpore.md | 4 +++- .../msprobe/docs/29.data_dump_MSAdapter.md | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md index 60bfac71f6b..d77a2a7a14c 100644 --- a/debug/accuracy_tools/msprobe/docs/02.config_introduction.md +++ b/debug/accuracy_tools/msprobe/docs/02.config_introduction.md @@ -91,7 +91,7 @@ PyTorch、MSAdapter 以及 MindSpore 动态图场景下,"level"须为"L0"或"L | 参数 | 解释 | 是否必选 | | ------------- | ---------------------- | -------- | -| overflow_nums | 最大溢出次数,int 类型,默认为 1,仅 PyTorch 与 MindSpore 动态图场景支持。表示第 N 次溢出后,不再进行溢出检测。过程中检测到溢出 API 对应的 输入输出 数据均 dump。
**配置示例**:"overflow_nums": 3。配置为 -1 时,表示持续检测溢出直到训练结束。 | 否 | +| overflow_nums | 最大溢出次数,int 类型,默认为 1,仅 PyTorch、MSAdapter 以及 MindSpore 动态图场景支持。表示第 N 次溢出后,不再进行溢出检测。过程中检测到溢出 API 对应的 输入输出 数据均 dump。
**配置示例**:"overflow_nums": 3。配置为 -1 时,表示持续检测溢出直到训练结束。 | 否 | | check_mode | 溢出类型,str 类型,仅 MindSpore v2.3.0 以下版本的静态图场景支持,可选参数:
"aicore":开启 AI Core 的溢出检测;
"atomic":开启 Atomic 的溢出检测;
"all":开启算子的溢出检测,默认值。
**配置示例**:"check_mode": "all"。 | 否 | ### 1.6 task 配置为 free_benchmark diff --git a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md index fbbbd387644..ef04ea9cc0d 100644 --- a/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/06.data_dump_MindSpore.md @@ -31,7 +31,7 @@ dump 的"tensor"模式采集数据量大小,可以参考[数据量基线](data ## 5. 场景介绍 ### 5.1 静态图场景 -在静态图场景下,msprobe 支持 **L0 Level** 和 **L2 Level** 的数据采集。 +在静态图场景下,msprobe 支持 **L0 Level** 和 **L2 Level** 的数据采集。且当 MindSpore 版本高于 2.5.0 时,若需采集 **L2 Level** 数据,必须使用编包时添加了`--include-mod=adump`选项的 mindstudio-probe whl 包进行 msprobe 工具安装。 - **L0 Level(Cell 级)** :采集 `Cell` 对象的数据,适用于需要分析特定网络模块的情况。 - **L2 Level(Kernel 级)** :采集底层算子的输入输出数据,适用于深入分析算子级别的精度问题。 @@ -377,9 +377,10 @@ trainer.train(1, train_dataset) L0 级别 dump 的目录结构与动态图场景下目录结构一致。
L2 级别 dump 的目录结构如下所示: -若jit_level=O2,且使用mindstudio-probe发布包或源码编包时添加了`--include-mod=adump`选项,目录结构示例如下: +若jit_level=O2,MindSpore 版本不低于 2.5.0,且使用mindstudio-probe发布包或源码编包时添加了`--include-mod=adump`选项,目录结构示例如下: ``` ├── dump_path +│ ├── acl_dump_{device_id}.json │ ├── rank_0 │ | ├── {timestamp} │ | │ ├── step_0 @@ -403,9 +404,9 @@ L2 级别 dump 的目录结构如下所示: **说明** 1. 若配置文件中指定落盘npy格式,但是实际数据格式不在npy支持范围内(如bf16、int4等),则该tensor会以原始码流落盘,并不会转换为npy格式。 2. 若原始文件全名长度超过255个字符,则文件基础名会被转换为长度为32位的随机数字字符串,原始文件名与转换后文件名的对应关系会保存在同目录下的`mapping.csv`文件中。 +3. acl_dump_{device_id}.json 为在 Dump 接口调用过程中生成的中间文件,一般情况下无需关注。 - -其他场景请参见 MindSpore 官方文档中的[数据对象目录](https://www.mindspore.cn/docs/zh-CN/r2.4.0/model_train/debug/dump.html)。 +其他场景下,除 kernel_kbyk_dump.json(jit_level=O0/O1)、kernel_graph_dump.json(jit_level=O2)等无需关注的中间文件外的其他 dump 结果文件请参见 MindSpore 官方文档中的[ Ascend 下 O0/O1 模式 Dump 数据对象目录和数据文件介绍](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/debug/dump.html#%E6%95%B0%E6%8D%AE%E5%AF%B9%E8%B1%A1%E7%9B%AE%E5%BD%95%E5%92%8C%E6%95%B0%E6%8D%AE%E6%96%87%E4%BB%B6%E4%BB%8B%E7%BB%8D)与[ Ascend 下 O2 模式 Dump 数据对象目录和数据文件介绍](https://www.mindspore.cn/docs/zh-CN/r2.5.0/model_train/debug/dump.html#%E6%95%B0%E6%8D%AE%E5%AF%B9%E8%B1%A1%E7%9B%AE%E5%BD%95%E5%92%8C%E6%95%B0%E6%8D%AE%E6%96%87%E4%BB%B6%E4%BB%8B%E7%BB%8D-1)。 ### 8.2 动态图场景 diff --git a/debug/accuracy_tools/msprobe/docs/13.overflow_check_MindSpore.md b/debug/accuracy_tools/msprobe/docs/13.overflow_check_MindSpore.md index ef83aa17237..3b674a35e40 100644 --- a/debug/accuracy_tools/msprobe/docs/13.overflow_check_MindSpore.md +++ b/debug/accuracy_tools/msprobe/docs/13.overflow_check_MindSpore.md @@ -28,4 +28,6 @@ export MS_ASCEND_CHECK_OVERFLOW_MODE="INFNAN_MODE" ## 3 溢出检测结果文件介绍 -溢出检测结果文件目录结构与含义与数据采集任务一致,但仅保存溢出 API 或 kernel 的真实数据或统计信息。详见MindSpore 场景的精度数据采集中的["**3 dump 结果文件介绍**"](./06.data_dump_MindSpore.md#3-dump-结果文件介绍)章节。 +溢出检测结果文件目录结构与含义与数据采集任务一致,但仅保存溢出 API 或 kernel 的真实数据或统计信息。详见MindSpore 场景的精度数据采集中的["**8. dump 结果文件介绍**"](./06.data_dump_MindSpore.md#8-dump-结果文件介绍)章节。 + +**说明**:在静态图 O2 编译等级下,若 MindSpore 版本为 2.4,或者 MindSpore 版本为 2.5,且未使用编包时添加了`--include-mod=adump`选项的 mindstudio-probe whl 包,则会产生 kernel_graph_overflow_check.json 中间文件,一般情况下无需关注。 \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/docs/29.data_dump_MSAdapter.md b/debug/accuracy_tools/msprobe/docs/29.data_dump_MSAdapter.md index cefcabafbcb..f67b28af517 100644 --- a/debug/accuracy_tools/msprobe/docs/29.data_dump_MSAdapter.md +++ b/debug/accuracy_tools/msprobe/docs/29.data_dump_MSAdapter.md @@ -130,7 +130,7 @@ import torch.nn as nn import torch.nn.functional as F # 导入工具的数据采集接口 -from msprobe.pytorch import PrecisionDebugger +from msprobe.mindspore import PrecisionDebugger # 在模型训练开始前实例化PrecisionDebugger debugger = PrecisionDebugger(config_path='./config.json') -- Gitee From 33db809000b399eddab7f0c1ce7655ee2bc36978 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 16:45:42 +0800 Subject: [PATCH 315/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/mindspore_processor.py | 1 - .../msprobe/core/data_dump/json_writer.py | 76 ++++++++++--------- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 7472298d9bf..86836889592 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -46,7 +46,6 @@ class MindsporeDataProcessor(BaseDataProcessor): self._async_dump_cache = {} self.api_register = get_api_register() - @staticmethod def get_md5_for_tensor(x): x = convert_bf16_to_fp32(x) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 22778999624..09e81bca453 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -19,7 +19,7 @@ import copy import numpy as np from msprobe.core.common.const import Const, FileCheckConst -from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json +from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json, recursion_depth_decorator from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException @@ -55,6 +55,44 @@ class DataWriter: if is_new_file: change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) + @recursion_depth_decorator("JsonWriter: DataWriter._replace_stat_placeholders") + def _replace_stat_placeholders(self, data, stat_result): + if isinstance(data, dict): + keys = list(data.keys()) # 获取当前所有键 + for key in keys: # 避免遍历时修改字典 + value = data[key] + if key == "tensor_stat_index" and isinstance(value, int): + idx = value + stat_values = stat_result[idx] if idx < len(stat_result) else [None] * 4 + + # 构建新字段并删除旧键 + new_entries = { + "type": data["type"], + "dtype": data["dtype"], + "shape": data["shape"], + "Max": stat_values[0], + "Min": stat_values[1], + "Mean": stat_values[2], + "Norm": stat_values[3] + } + del data[key] + + # 重构字典顺序 + updated_dict = {} + # 先插入统计字段 + updated_dict.update(new_entries) + # 保留原字典其他字段(排除已删除的tensor_stat_index) + for k in data: + if k not in new_entries: + updated_dict[k] = data[k] + data.clear() + data.update(updated_dict) + else: + self._replace_stat_placeholders(value, stat_result) + elif isinstance(data, (list, tuple)): + for item in data: + self._replace_stat_placeholders(item, stat_result) + def reset_cache(self): self.cache_data = {} self.cache_stack = {} @@ -173,39 +211,3 @@ class DataWriter: if self.cache_debug: self.write_debug_info_json(self.debug_file_path) - def _replace_stat_placeholders(self, data, stat_result): - if isinstance(data, dict): - keys = list(data.keys()) # 获取当前所有键 - for key in keys: # 避免遍历时修改字典 - value = data[key] - if key == "tensor_stat_index" and isinstance(value, int): - idx = value - stat_values = stat_result[idx] if idx < len(stat_result) else [None] * 4 - - # 构建新字段并删除旧键 - new_entries = { - "type": data["type"], - "dtype": data["dtype"], - "shape": data["shape"], - "Max": stat_values[0], - "Min": stat_values[1], - "Mean": stat_values[2], - "Norm": stat_values[3] - } - del data[key] - - # 重构字典顺序 - updated_dict = {} - # 先插入统计字段 - updated_dict.update(new_entries) - # 保留原字典其他字段(排除已删除的tensor_stat_index) - for k in data: - if k not in new_entries: - updated_dict[k] = data[k] - data.clear() - data.update(updated_dict) - else: - self._replace_stat_placeholders(value, stat_result) - elif isinstance(data, list): - for item in data: - self._replace_stat_placeholders(item, stat_result) -- Gitee From afc98933b815b01e286891b4b3035cdf92539ed0 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 16:56:15 +0800 Subject: [PATCH 316/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 09e81bca453..7bacd05a31b 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -19,7 +19,8 @@ import copy import numpy as np from msprobe.core.common.const import Const, FileCheckConst -from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json, recursion_depth_decorator +from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json \ +from msprobe.core.common.utils import recursion_depth_decorator from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException @@ -79,9 +80,9 @@ class DataWriter: # 重构字典顺序 updated_dict = {} - # 先插入统计字段 + # 通过插入排序后字段保证字段写入json的有序 updated_dict.update(new_entries) - # 保留原字典其他字段(排除已删除的tensor_stat_index) + # 遍历原字典其他字段(排除已删除的tensor_stat_index) for k in data: if k not in new_entries: updated_dict[k] = data[k] -- Gitee From 92c34f88659fe4d32f84e19f9c1867f7a1aba7f3 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 16:57:46 +0800 Subject: [PATCH 317/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 7bacd05a31b..b2fb892bd39 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -19,7 +19,7 @@ import copy import numpy as np from msprobe.core.common.const import Const, FileCheckConst -from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json \ +from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json from msprobe.core.common.utils import recursion_depth_decorator from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException -- Gitee From 66c34405693760cd09e50d1c606fbc77eff419e6 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 17:00:13 +0800 Subject: [PATCH 318/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index b2fb892bd39..fa2c972daba 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -63,9 +63,11 @@ class DataWriter: for key in keys: # 避免遍历时修改字典 value = data[key] if key == "tensor_stat_index" and isinstance(value, int): - idx = value + if idx > 0: + idx = value + else: + return stat_values = stat_result[idx] if idx < len(stat_result) else [None] * 4 - # 构建新字段并删除旧键 new_entries = { "type": data["type"], -- Gitee From f63b894a089de2991870418b29c511d95a2a2197 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 17:03:49 +0800 Subject: [PATCH 319/333] Update json_writer.py --- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index fa2c972daba..e937774c533 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -63,7 +63,7 @@ class DataWriter: for key in keys: # 避免遍历时修改字典 value = data[key] if key == "tensor_stat_index" and isinstance(value, int): - if idx > 0: + if value > 0: idx = value else: return -- Gitee From 1d397342d159422d62e466f75c6a07f017969423 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 17:08:07 +0800 Subject: [PATCH 320/333] Update pytorch_processor.py --- .../msprobe/core/data_dump/data_processor/pytorch_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 6ae515f86a1..9d7249dbce7 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -406,7 +406,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): max_tensor = self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) min_tensor = self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) - if max_tensor is None or min_tensor is None : + if max_tensor is None or min_tensor is None: return if torch.isinf(max_tensor) or torch.isnan(max_tensor): -- Gitee From 09d4afebd427bd7de6af62ef45c2ae204a57014e Mon Sep 17 00:00:00 2001 From: curry3 <485078529@qq.com> Date: Wed, 19 Mar 2025 17:00:57 +0800 Subject: [PATCH 321/333] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E5=AF=B9getitem=20API=E5=B0=81=E8=A3=85=E5=AF=BC?= =?UTF-8?q?=E8=87=B4dump=E6=8A=A5=E9=94=99=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/docs/FAQ.md | 14 +++----------- .../pytorch/hook_module/support_wrap_ops.yaml | 1 - 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/FAQ.md b/debug/accuracy_tools/msprobe/docs/FAQ.md index 833ca07a236..252fc94e97f 100644 --- a/debug/accuracy_tools/msprobe/docs/FAQ.md +++ b/debug/accuracy_tools/msprobe/docs/FAQ.md @@ -58,11 +58,7 @@ 答:对于 fp16 的数据,CPU 会上升一个精度 fp32 去计算,这是和算子那边对齐的精度结论,CPU 用更高精度去计算会更接近真实值。 -6. 添加预检工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 - - 答:注释工具目录 `mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml` 文件中 Tensor: 下的 `- __getitem__`,工具会跳过采集该 API。如果是需要 dump 关键位置 API 也可以考虑根据报错堆栈信息注释引发报错的类型检查。 - -7. Tensor 魔法函数具体对应什么操作? +6. Tensor 魔法函数具体对应什么操作? 答: @@ -202,15 +198,11 @@ def npu_forward_fused_softmax(self, input_, mask): 答:正常现象,dataloader 通过 raise 结束程序,堆栈信息可忽略。 -10. 添加 msprobe 工具后截取操作报错:`IndexError: too many indices for tensor of dimension x` 或 `TypeError: len() of a 0-d tensor`。 - - 答:注释工具目录 `mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml` 文件中 `Tensor: ` 下的 `- __getitem__`,工具会跳过采集该 API。如果是需要采集关键位置 API 也可以考虑根据报错堆栈信息注释引发报错的类型检查。 - -11. 使用 msprobe 工具数据采集功能后,模型出现报错,报错信息为:`activation_func must be F.gelu` 或 `ValueError(Only support fusion of gelu and swiglu)`。 +10. 使用 msprobe 工具数据采集功能后,模型出现报错,报错信息为:`activation_func must be F.gelu` 或 `ValueError(Only support fusion of gelu and swiglu)`。 答:这一类报错常见于 Megatron/MindSpeed/ModelLink 等加速库或模型仓中,原因是工具本身会封装 torch 的 API(API类型和地址会发生改变),而有些 API 在工具使能前类型和地址就已经确定,此时工具无法对这类 API 再进行封装,而加速库中会对某些 API 进行类型检查,即会把工具无法封装的原始的 API和工具封装之后的 API 进行判断,所以会报错。 规避方式有3种:①将PrecisionDebugger的实例化放在文件的开始位置,即导包后的位置,确保所有API都被封装;②注释 `mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml` 文件中的 `-gelu` 或者 `-silu`,工具会跳过采集该 API。③ 可以考虑根据报错堆栈信息注释引发报错的类型检查。 -12. 添加 msprobe 工具后触发与 AsStrided 算子相关、或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 +11. 添加 msprobe 工具后触发与 AsStrided 算子相关、或者编译相关的报错,如:`Failed to compile Op [AsStrided]`。 答:注释工具目录 `mstt/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml` 文件中 `Tensor: `下的 `-t` 和 `- transpose`。 diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml index 5b92baad826..6be86e0dfbc 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml @@ -151,7 +151,6 @@ tensor: - __eq__ - __ge__ - __gt__ - - __getitem__ - __iadd__ - __iand__ - __idiv__ -- Gitee From 69e799ef7547fb675f318629ef19775cc5eb8644 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Wed, 19 Mar 2025 17:44:42 +0800 Subject: [PATCH 322/333] =?UTF-8?q?=E5=88=A0=E9=99=A4ut?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_mindspore_processor.py | 102 ------------------ .../data_processor/test_pytorch_processor.py | 55 ---------- .../test/mindspore_ut/test_ms_debug_save.py | 38 +------ .../test/pytorch_ut/test_pt_debug_save.py | 40 ------- 4 files changed, 1 insertion(+), 234 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py index ed20e4f73cd..7406e0d1cc7 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_mindspore_processor.py @@ -84,57 +84,6 @@ class TestMindsporeDataProcessor(unittest.TestCase): self.assertIsNone(result.mean) self.assertIsNone(result.norm) - @patch.object(MindsporeDataProcessor, 'get_md5_for_tensor') - def test__analyze_tensor(self, get_md5_for_tensor): - get_md5_for_tensor.return_value = "test_md5" - tensor = ms.Tensor(np.array([1, 2, 3], dtype=np.int32)) - self.config.summary_mode = 'md5' - self.config.async_dump = False - suffix = "test_tensor" - expected_result = { - 'type': 'mindspore.Tensor', - 'dtype': 'Int32', - 'shape': (3,), - 'Max': 3, - 'Min': 1, - 'Mean': 2, - 'Norm': ms.ops.norm(tensor).item(), - 'md5': 'test_md5', - } - result = self.processor._analyze_tensor(tensor, suffix) - self.assertEqual(result, expected_result) - - -class TestTensorDataProcessor(unittest.TestCase): - - def setUp(self): - self.config = MagicMock() - self.data_writer = MagicMock() - self.processor = TensorDataProcessor(self.config, self.data_writer) - self.data_writer.dump_tensor_data_dir = "./dump_data" - self.processor.current_api_or_module_name = "test_api" - self.processor.api_data_category = "input" - - @patch('msprobe.core.data_dump.data_processor.mindspore_processor.save_tensor_as_npy') - def test_analyze_tensor(self, mock_save): - self.config.framework = "mindspore" - self.config.async_dump = False - tensor = ms.Tensor([1.0, 2.0, 3.0]) - suffix = 'suffix' - result = self.processor._analyze_tensor(tensor, suffix) - mock_save.assert_called_once() - expected = { - 'type': 'mindspore.Tensor', - 'dtype': str(tensor.dtype), - 'shape': tensor.shape, - 'Max': 3.0, - 'Min': 1.0, - 'Mean': 2.0, - 'Norm': ms.ops.norm(tensor).item(), - 'data_name': 'test_api.input.suffix.npy' - } - self.assertEqual(expected, result) - class TestOverflowCheckDataProcessor(unittest.TestCase): def setUp(self): @@ -195,57 +144,6 @@ class TestOverflowCheckDataProcessor(unittest.TestCase): self.data_processor.overflow_nums = 3 self.assertFalse(self.data_processor.is_terminated) - def test__analyze_maybe_overflow_tensor(self): - self.data_processor.has_overflow = False - tensor_json = {"Max": None, "Min": 0} - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertFalse(self.data_processor.has_overflow) - tensor_json.update({"Max": -np.inf}) - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.data_processor.has_overflow) - self.data_processor.has_overflow = False - tensor_json.update({"Max": np.inf}) - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.data_processor.has_overflow) - self.data_processor.has_overflow = False - tensor_json.update({"Max": np.nan}) - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.data_processor.has_overflow) - tensor_json.update({"Max": 0}) - self.data_processor.has_overflow = False - tensor_json.update({"Min": -np.inf}) - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.data_processor.has_overflow) - self.data_processor.has_overflow = False - tensor_json.update({"Min": np.inf}) - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.data_processor.has_overflow) - self.data_processor.has_overflow = False - tensor_json.update({"Min": np.nan}) - self.data_processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.data_processor.has_overflow) - - @patch("msprobe.core.data_dump.data_processor.mindspore_processor.logger.warning") - @patch.object(OverflowCheckDataProcessor, "get_save_file_path") - @patch.object(MindsporeDataProcessor, "_analyze_tensor") - def test__analyze_tensor(self, mock_super, mock_get_file_path, mock_warning): - mock_get_file_path.return_value = ("dump_data_name", "file_path") - single_arg = {"Max": None} - mock_super.return_value = single_arg - - with patch("msprobe.core.data_dump.data_processor.mindspore_processor.path_len_exceeds_limit", - return_value=False): - ret = self.data_processor._analyze_tensor("tensor", "suffix") - self.assertEqual(self.data_processor.cached_tensors_and_file_paths, {"file_path": "tensor"}) - mock_warning.assert_not_called() - mock_super.assert_called_with("tensor", "suffix") - self.assertEqual(ret.get("Max"), None) - self.assertEqual(ret.get("data_name"), "dump_data_name") - - with patch("msprobe.core.data_dump.data_processor.mindspore_processor.path_len_exceeds_limit", - return_value=True): - self.data_processor._analyze_tensor("tensor", "suffix") - mock_warning.assert_called_with("The file path file_path length exceeds limit.") class TestKernelDumpDataProcessor(unittest.TestCase): def setUp(self): diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py index ed79e16fa98..dc1df442141 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py @@ -280,40 +280,6 @@ class TestPytorchDataProcessor(unittest.TestCase): expected_result = self.processor._analyze_builtin(Ellipsis) self.assertEqual(result, expected_result) - @patch.object(PytorchDataProcessor, 'get_md5_for_tensor') - def test_analyze_tensor(self, get_md5_for_tensor): - get_md5_for_tensor.return_value = 'mocked_md5' - tensor = torch.tensor([1.0, 2.0, 3.0]) - self.config.summary_mode = 'md5' - self.config.async_dump = False - result = self.processor._analyze_tensor(tensor, 'suffix') - expected = { - 'type': 'torch.Tensor', - 'dtype': str(tensor.dtype), - 'shape': tensor.shape, - 'Max': 3.0, - 'Min': 1.0, - 'Mean': 2.0, - 'Norm': torch.norm(tensor).item(), - 'requires_grad': tensor.requires_grad, - 'md5': 'mocked_md5' - } - self.assertDictEqual(expected, result) - - def test_analyze_tensor_with_empty_tensor(self): - tensor = torch.tensor([]) - result = self.processor._analyze_tensor(tensor, 'suffix') - self.assertEqual(result['Max'], None) - self.assertEqual(result['Min'], None) - self.assertEqual(result['Mean'], None) - self.assertEqual(result['Norm'], None) - - def test_analyze_tensor_with_inf_and_nan(self): - tensor = torch.tensor([1.0, float('inf'), float('nan'), -float('inf')]) - result = self.processor._analyze_tensor(tensor, 'suffix') - self.assertEqual(result['Max_except_inf_nan'], 1.0) - self.assertEqual(result['Min_except_inf_nan'], 1.0) - class TestTensorDataProcessor(unittest.TestCase): @@ -325,27 +291,6 @@ class TestTensorDataProcessor(unittest.TestCase): self.processor.current_api_or_module_name = "test_api" self.processor.api_data_category = "input" - @patch('torch.save') - def test_analyze_tensor(self, mock_save): - self.config.framework = "pytorch" - self.config.async_dump = False - tensor = torch.tensor([1.0, 2.0, 3.0]) - suffix = 'suffix' - result = self.processor._analyze_tensor(tensor, suffix) - mock_save.assert_called_once() - expected = { - 'type': 'torch.Tensor', - 'dtype': 'torch.float32', - 'shape': tensor.shape, - 'Max': 3.0, - 'Min': 1.0, - 'Mean': 2.0, - 'Norm': torch.norm(tensor).item(), - 'requires_grad': False, - 'data_name': 'test_api.input.suffix.pt' - } - self.assertEqual(expected, result) - class TestOverflowCheckDataProcessor(unittest.TestCase): diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_debug_save.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_debug_save.py index 495eedbf413..7af7dd89727 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_debug_save.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_debug_save.py @@ -38,40 +38,4 @@ class TestMindsporeDebuggerSave(TestCase): task_config = BaseConfig(statistics_task_json) with patch("msprobe.mindspore.debugger.precision_debugger.parse_json_config", return_value=(common_config, task_config)), \ patch("msprobe.mindspore.debugger.precision_debugger.set_register_backward_hook_functions"): - self.debugger = PrecisionDebugger() - - def test_forward_and_backward(self): - def forward_func(x, y): - PrecisionDebugger.save(x, "x_tensor") - return x * y - x = mindspore.Tensor([1.]) - y = mindspore.Tensor([2.]) - result_json = { - "task": "statistics", - "level": "debug", - "framework": "mindspore", - "dump_data_dir": None, - "data": { - "x_tensor.0": { - "type": "mindspore.Tensor", - "dtype": "Float32", - "shape": (1,), - "Max": 1.0, - "Min": 1.0, - "Mean": 1.0, - "Norm": 1.0 - }, - "x_tensor_grad.0": { - "type": "mindspore.Tensor", - "dtype": "Float32", - "shape": (1,), - "Max": 2.0, - "Min": 2.0, - "Mean": 2.0, - "Norm": 2.0 - } - } - } - grad_fn = mindspore.value_and_grad(forward_func, (0, 1)) - grad_fn(x, y) - self.assertEqual(self.debugger.service.data_collector.data_writer.cache_debug, result_json) \ No newline at end of file + self.debugger = PrecisionDebugger() \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_debug_save.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_debug_save.py index 534437260e6..d68f28066fa 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_debug_save.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_debug_save.py @@ -38,43 +38,3 @@ class TestPytorchDebuggerSave(TestCase): task_config = BaseConfig(statistics_task_json) with patch("msprobe.pytorch.debugger.precision_debugger.parse_json_config", return_value=(common_config, task_config)): self.debugger = PrecisionDebugger() - - def test_forward_and_backward(self): - def forward_func(x, y): - PrecisionDebugger.save(x, "x_tensor") - return x * y - x = torch.tensor([1.]) - y = torch.tensor([2.]) - x.requires_grad = True - y.requires_grad = True - result_json = { - "task": "statistics", - "level": "debug", - "framework": "pytorch", - "dump_data_dir": None, - "data": { - "x_tensor.0": { - "type": "torch.Tensor", - "dtype": "torch.float32", - "shape": torch.Size([1]), - "Max": 1.0, - "Min": 1.0, - "Mean": 1.0, - "Norm": 1.0, - "requires_grad": True - }, - "x_tensor_grad.0": { - "type": "torch.Tensor", - "dtype": "torch.float32", - "shape": torch.Size([1]), - "Max": 2.0, - "Min": 2.0, - "Mean": 2.0, - "Norm": 2.0, - "requires_grad": False - } - } - } - loss = forward_func(x, y) - loss.backward() - self.assertEqual(self.debugger.service.data_collector.data_writer.cache_debug, result_json) \ No newline at end of file -- Gitee From 9982d0b5b5a95db0126347c2560448a4c75fcb89 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 20 Mar 2025 09:09:31 +0800 Subject: [PATCH 323/333] Update test_pytorch_processor.py --- .../data_processor/test_pytorch_processor.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py index dc1df442141..713a507a9ad 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py @@ -382,19 +382,6 @@ class TestOverflowCheckDataProcessor(unittest.TestCase): self.processor._is_support_inf_nan() self.assertTrue(self.processor.support_inf_nan) - def test_analyze_maybe_overflow_tensor(self): - tensor_json = {'Max': None, 'Min': None} - self.processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertFalse(self.processor.has_overflow) - - tensor_json = {'Max': float('inf'), 'Min': 1.0} - self.processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.processor.has_overflow) - - tensor_json = {'Max': 1.0, 'Min': float('inf')} - self.processor._analyze_maybe_overflow_tensor(tensor_json) - self.assertTrue(self.processor.has_overflow) - @patch('msprobe.core.common.file_utils.path_len_exceeds_limit', return_value=False) @patch.object(BaseDataProcessor, 'get_save_file_path', return_value=['test_api_name', 'test_api_name.0.forward.input.pt']) -- Gitee From e869c784145b368aff2938adb961b644069791ea Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 20 Mar 2025 09:13:26 +0800 Subject: [PATCH 324/333] Update test_pytorch_processor.py --- .../data_processor/test_pytorch_processor.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py index 713a507a9ad..72a6807c90b 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py @@ -382,20 +382,6 @@ class TestOverflowCheckDataProcessor(unittest.TestCase): self.processor._is_support_inf_nan() self.assertTrue(self.processor.support_inf_nan) - @patch('msprobe.core.common.file_utils.path_len_exceeds_limit', return_value=False) - @patch.object(BaseDataProcessor, 'get_save_file_path', - return_value=['test_api_name', 'test_api_name.0.forward.input.pt']) - def test_analyze_tensor(self, mock_path_len_exceeds_limit, _): - tensor = torch.tensor([1.0, 2.0, 3.0]) - suffix = 'suffix' - expected = {'Max': 3.0, 'Min': 1.0, 'data_name': 'test_api_name'} - with patch.object(PytorchDataProcessor, '_analyze_tensor', - return_value={'Max': 3.0, 'Min': 1.0}) as mock_super_analyze_tensor: - result = self.processor._analyze_tensor(tensor, suffix) - mock_super_analyze_tensor.assert_called_once_with(tensor, suffix) - mock_path_len_exceeds_limit.assert_called_once() - self.assertEqual(expected, result) - class TestFreeBenchmarkDataProcessor(unittest.TestCase): -- Gitee From a310f9ea3363d8c38c9fba20e437bd902f3be238 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Wed, 19 Mar 2025 16:22:28 +0800 Subject: [PATCH 325/333] fix monitor readme --- debug/accuracy_tools/msprobe/docs/19.monitor.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/docs/19.monitor.md b/debug/accuracy_tools/msprobe/docs/19.monitor.md index fa1b7d06d6c..099bba30c0d 100644 --- a/debug/accuracy_tools/msprobe/docs/19.monitor.md +++ b/debug/accuracy_tools/msprobe/docs/19.monitor.md @@ -411,14 +411,13 @@ python3 -m msprobe.pytorch.monitor.anomaly_analyse -d $MONITOR_OUTPUT_DIR/anomal from msprobe.pytorch.monitor.csv2tb import csv2tensorboard_by_step # 前三个参数用来指定需要转换的一批文件,指定monitor输出目录及一个时间范围,会对这个范围内的文件进行转换 # process_num指定拉起的进程个数,默认为1,更多的进程个数可以加速转换 -# data_type_list是一个列表,指定需要转换的数据类型, 数据类型应来自输出件文件前缀,所有类型数据: -# ["actv", "actv_grad", "exp_avg", "exp_avg_sq", "grad_unreduced", "grad_reduced", "param"] -# 不指定就转换全部数据 -# output_dirpath可指定输出目录, 不传值时保存到"{curtime}_csv2tensorboard_by_step"文件夹,其中curtime为自动获取的当前时间戳 +# data_type_list是一个列表,指定需要转换的数据类型,默认转换全部数据,数据类型应来自输出件文件前缀,所有类型数据: +# ["actv", "actv_grad", "exp_avg", "exp_avg_sq", "grad_unreduced", "grad_reduced", "param"] +# output_dirpath可指定输出目录,默认保存到"{curtime}_csv2tensorboard_by_step"文件夹,其中curtime为自动获取的当前时间戳 csv2tensorboard_by_step( - monitor_path="~/monitor_output", - time_start="Dec03_21-34-40", - time_end="Dec03_21-34-42", + monitor_path="~/monitor_output", # 必填 + time_start="Dec03_21-34-40", # 必填 + time_end="Dec03_21-34-42", # 必填 process_num=8, data_type_list=["param"] ) @@ -500,7 +499,7 @@ csv2tensorboard_by_step(monitor_path, time_start, time_end, process_num=1, data_ | time_end | 结束时间戳。搭配time_start一起使用。指定一个时间范围,会对这个范围内的文件进行转换。左闭右闭的区间。 | 是 | | process_num | 指定拉起的进程个数,默认为1,更多的进程个数可以加速转换。 | 否 | | data_type_list | 指定需要转换的数据类型, 数据类型应来自输出件文件前缀,所有类型数据:
["actv", "actv_grad", "exp_avg", "exp_avg_sq", "grad_unreduced", "grad_reduced", "param"]。
不指定就转换全部数据。 | 否 | - +| output_dirpath | 指定转换后的输出路径,默认输出到"{curtime}_csv2tensorboard_by_step"文件夹,其中curtime为自动获取的当前时间戳。 | 否 | - 在模型任意位置获取当前参数**梯度**统计量 ```python TrainerMon.generate_wgrad_metrics() -> tuple[dict, dict] -- Gitee From 3f6ea453679bb896dc17c8cba49a86b0c6b5f3c0 Mon Sep 17 00:00:00 2001 From: wangqingcai Date: Thu, 20 Mar 2025 10:30:13 +0800 Subject: [PATCH 326/333] fix online_run_ut path check --- debug/accuracy_tools/msprobe/pytorch/pt_config.py | 13 +++++++------ .../msprobe/test/pytorch_ut/test_pt_config.py | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index fabe265f230..d97aff11ff4 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -16,9 +16,10 @@ import os import re -from msprobe.core.common.const import Const +from msprobe.core.common.const import Const, FileCheckConst from msprobe.core.common.exceptions import MsprobeException -from msprobe.core.common.file_utils import FileOpen, load_json, check_file_or_directory_path, check_crt_valid +from msprobe.core.common.file_utils import FileOpen, load_json, check_file_or_directory_path, check_crt_valid, \ + FileChecker from msprobe.core.common.log import logger from msprobe.core.common.utils import is_int from msprobe.core.common_config import BaseConfig, CommonConfig @@ -273,13 +274,13 @@ class RunUTConfig(BaseConfig): @classmethod def check_nfs_path_config(cls, nfs_path): - if nfs_path and not os.path.exists(nfs_path): - raise Exception("nfs_path: %s does not exist" % nfs_path) + if nfs_path: + FileChecker(nfs_path, FileCheckConst.DIR, FileCheckConst.READ_ABLE).common_check() @classmethod def check_tls_path_config(cls, tls_path): - if tls_path and not os.path.exists(tls_path): - raise Exception("tls_path: %s does not exist" % tls_path) + if tls_path: + FileChecker(tls_path, FileCheckConst.DIR, FileCheckConst.READ_ABLE).common_check() @classmethod def check_master_ip_config(cls, master_ip): diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py index c1b8bac47fd..0724581bc79 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py @@ -397,13 +397,13 @@ class TestRunUTConfig(unittest.TestCase): def test_check_nfs_path_config_not_exist(self, mock_exists): with self.assertRaises(Exception) as context: RunUTConfig.check_nfs_path_config("./invalid_nfs") - self.assertIn("does not exist", str(context.exception)) + self.assertIn("[msprobe] 非法文件路径:", str(context.exception)) @patch('os.path.exists', return_value=False) def test_check_tls_path_config_not_exist(self, mock_exists): with self.assertRaises(Exception) as context: RunUTConfig.check_tls_path_config("./invalid_tls") - self.assertIn("does not exist", str(context.exception)) + self.assertIn("[msprobe] 非法文件路径:", str(context.exception)) def test_check_run_ut_config(self): with patch.object(RunUTConfig, 'check_filter_list_config') as mock_filter, \ -- Gitee From 253a12dc75ffa8c93454df0816ccdd47e747146c Mon Sep 17 00:00:00 2001 From: Linwei-Ying Date: Thu, 20 Mar 2025 10:48:52 +0800 Subject: [PATCH 327/333] compare logger bugfix --- debug/accuracy_tools/msprobe/core/common/utils.py | 2 +- debug/accuracy_tools/msprobe/core/compare/acc_compare.py | 3 +++ .../msprobe/core/compare/merge_result/merge_result.py | 1 + debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index 963ca825456..e08e40f1ee6 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -284,7 +284,7 @@ def set_dump_path(input_param): npu_path_valid = npu_path is not None and npu_path.endswith("dump.json") bench_path_valid = bench_path is not None and bench_path.endswith("dump.json") if not npu_path_valid or not bench_path_valid: - logger.error(f"Please check the json path is valid. npu_path: {npu_path}, bench_path: {bench_path}") + logger.error(f"Please check the json path is valid and ensure that neither npu_path nor bench_path is None.") raise CompareException(CompareException.INVALID_PATH_ERROR) input_param['npu_dump_data_dir'] = os.path.join(os.path.dirname(npu_path), Const.DUMP_TENSOR_DATA) input_param['bench_dump_data_dir'] = os.path.join(os.path.dirname(bench_path), Const.DUMP_TENSOR_DATA) diff --git a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py index f2aa8c479ec..9f8b1321134 100644 --- a/debug/accuracy_tools/msprobe/core/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/core/compare/acc_compare.py @@ -282,6 +282,8 @@ class Comparator: result = [] bench_ops_all[CompareConst.N_A] = self._generate_na_data(bench_ops_all) for ms_op_name, bench_op_name in self.data_mapping_dict.items(): + check_op_str_pattern_valid(ms_op_name) + check_op_str_pattern_valid(bench_op_name) if ms_op_name in npu_ops_all and bench_op_name in bench_ops_all: npu_stack_info = npu_ops_all.get(ms_op_name).get("stack_info", None) bench_stack_info = bench_ops_all.get(bench_op_name).get("stack_info", None) @@ -333,6 +335,7 @@ class Comparator: pt_data_name = bench_ops_all.get(bench_op_name).get("data_name", None) result_item.append([ms_data_name, pt_data_name]) result.append(result_item) + logger.info(f"{ms_op_name}, {bench_op_name} compared.") elif ms_op_name not in npu_ops_all: logger.warning(f'Can not find npu op name : `{ms_op_name}` in npu dump json file.') elif bench_op_name not in npu_ops_all: diff --git a/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py b/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py index b605bd59fca..3c32c89003d 100644 --- a/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py +++ b/debug/accuracy_tools/msprobe/core/compare/merge_result/merge_result.py @@ -63,6 +63,7 @@ def get_result_path(input_dir): for f in os.listdir(input_dir) if f.endswith(FileCheckConst.XLSX_SUFFIX)] filt_compare_result_path_list = [] for file_path in compare_result_path_list: + FileChecker(file_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE).common_check() file_name = os.path.basename(file_path) if check_compare_result_name(file_name): compare_result_path_checker = FileChecker(file_path, FileCheckConst.FILE, FileCheckConst.READ_ABLE) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py index 61766ed27c0..69abbf12b4c 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/common/test_utils.py @@ -206,7 +206,7 @@ class TestUtils(TestCase): with self.assertRaises(CompareException) as context: set_dump_path(input_param) self.assertEqual(context.exception.code, CompareException.INVALID_PATH_ERROR) - mock_error.assert_called_with("Please check the json path is valid. npu_path: None, bench_path: bench_path") + mock_error.assert_called_with("Please check the json path is valid and ensure that neither npu_path nor bench_path is None.") @patch.object(logger, "error") def test_get_dump_mode(self, mock_error): -- Gitee From 1ca3e0ffd787ebd735a72ce0c6748fd06c985eaf Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 20 Mar 2025 11:29:45 +0800 Subject: [PATCH 328/333] Update test_pytorch_processor.py --- .../data_processor/test_pytorch_processor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py index e11e234d1c1..5cf644526be 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/data_dump/data_processor/test_pytorch_processor.py @@ -1,3 +1,19 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" import hashlib import os import sys -- Gitee From bc9aa501030f1c0d54d65470193cab73195e87da Mon Sep 17 00:00:00 2001 From: lcw Date: Thu, 20 Mar 2025 11:26:38 +0800 Subject: [PATCH 329/333] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE?= =?UTF-8?q?=E5=A4=8Dclip=20grad=20=E5=9C=BA=E6=99=AF=E4=B8=8B=E8=BF=94?= =?UTF-8?q?=E5=9B=9E=E5=80=BC=E6=9C=89=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/pytorch/hook_module/register_optimizer_hook.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/register_optimizer_hook.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/register_optimizer_hook.py index 75be9fc4532..b4f9a5f5063 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/register_optimizer_hook.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/register_optimizer_hook.py @@ -32,8 +32,9 @@ def register_optimizer_hook(data_collector): def patch_clip_grad(func): def wrapper(*args, **kwargs): data_collector.optimizer_status = Const.CLIP_GRAD - func(*args, **kwargs) + result = func(*args, **kwargs) data_collector.optimizer_status = Const.END_PREFIX + Const.CLIP_GRAD + return result return wrapper -- Gitee From 6588880041c3faf80fa37b8d94a38dc162a102ac Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 20 Mar 2025 15:12:43 +0800 Subject: [PATCH 330/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/mindspore_processor.py | 2 +- .../msprobe/core/data_dump/data_processor/pytorch_processor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 86836889592..96b56902d99 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -93,7 +93,7 @@ class MindsporeDataProcessor(BaseDataProcessor): logger.warning("Async dump do not support complex data!") return tensor_stat else: - if data.dtype == ms.float64 or not ops.is_floating_point(data): + if not ops.is_floating_point(data) or data.dtype == ms.float64: data = data.to(ms.float32) get_norm_value = mint.norm if hasattr(mint, "norm") else ops.norm tensor_stat.max = mint.max(data) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index ecaa6c879a2..7cc552c0bcc 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -105,7 +105,7 @@ class PytorchDataProcessor(BaseDataProcessor): elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data else: - if not data.is_floating_point() or data.dtype == torch.float64: + if data.dtype == torch.float64 or not data.is_floating_point(): data = data.float() tensor_stat.max = torch.max(data) tensor_stat.min = torch.min(data) -- Gitee From 67780fef2de87ceb7def29de2b9e96f232ea6423 Mon Sep 17 00:00:00 2001 From: lcw Date: Wed, 19 Mar 2025 14:44:24 +0800 Subject: [PATCH 331/333] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E5=A4=9A=E7=BA=BF=E7=A8=8B=E4=B8=8B=E5=81=B6=E7=8E=B0?= =?UTF-8?q?json.dump=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/data_dump/json_writer.py | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index b1e26d16f97..2d836479177 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -16,13 +16,15 @@ import csv import os import copy -import numpy as np +import threading from msprobe.core.common.const import Const, FileCheckConst from msprobe.core.common.file_utils import change_mode, FileOpen, save_json, load_json from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException +lock = threading.Lock() + class DataWriter: @@ -90,28 +92,32 @@ class DataWriter: self.write_json() def update_data(self, new_data): - if not isinstance(new_data, dict) or len(new_data.keys()) != 1: - logger.warning(f"The data info({new_data}) should be a dict with only one outer key.") - return - dump_data = self.cache_data.get(Const.DATA) - if not isinstance(dump_data, dict): - logger.warning(f"The dump data({dump_data}) should be a dict.") - return - - key = next(iter(new_data.keys())) - if key in dump_data: - dump_data.get(key).update(new_data.get(key)) - else: - dump_data.update(new_data) + with lock: + if not isinstance(new_data, dict) or len(new_data.keys()) != 1: + logger.warning(f"The data info({new_data}) should be a dict with only one outer key.") + return + dump_data = self.cache_data.get(Const.DATA) + if not isinstance(dump_data, dict): + logger.warning(f"The dump data({dump_data}) should be a dict.") + return + + key = next(iter(new_data.keys())) + if key in dump_data: + dump_data.get(key).update(new_data.get(key)) + else: + dump_data.update(new_data) def update_stack(self, new_data): - self.cache_stack.update(new_data) + with lock: + self.cache_stack.update(new_data) def update_construct(self, new_data): - self.cache_construct.update(new_data) + with lock: + self.cache_construct.update(new_data) def update_debug(self, new_data): - self.cache_debug['data'].update(new_data) + with lock: + self.cache_debug['data'].update(new_data) def write_data_json(self, file_path): logger.info(f"dump.json is at {os.path.dirname(os.path.dirname(file_path))}. ") @@ -127,14 +133,15 @@ class DataWriter: save_json(file_path, self.cache_debug, indent=1) def write_json(self): - if self.cache_data: - self.write_data_json(self.dump_file_path) - if self.cache_stack: - self.write_stack_info_json(self.stack_file_path) - if self.cache_construct: - self.write_construct_info_json(self.construct_file_path) - if self.cache_debug: - self.write_debug_info_json(self.debug_file_path) + with lock: + if self.cache_data: + self.write_data_json(self.dump_file_path) + if self.cache_stack: + self.write_stack_info_json(self.stack_file_path) + if self.cache_construct: + self.write_construct_info_json(self.construct_file_path) + if self.cache_debug: + self.write_debug_info_json(self.debug_file_path) def fill_stack_tensor_data(self): self.process_stat_data_recursive(self.cache_data) -- Gitee From 7df9b649a8f8ed06ed441c19ccf1906fa29a9ffb Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 20 Mar 2025 17:36:43 +0800 Subject: [PATCH 332/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/mindspore_processor.py | 9 +++++++-- .../data_dump/data_processor/pytorch_processor.py | 9 +++++++-- .../msprobe/core/data_dump/json_writer.py | 14 ++++++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 96b56902d99..6bded03c2c7 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -256,13 +256,18 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor): self.cached_tensors_and_file_paths = {} def _analyze_maybe_overflow_tensor(self, tensor_json): - max_tensor = self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) - min_tensor = self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) + tensor_stat_index = tensor_json.get('tensor_stat_index') + if tensor_stat_index is None: + logger.warning("tensor_stat_index does not exist in tensor_json.") + return + max_tensor = self.data_writer.get_buffer_values_max(tensor_stat_index) + min_tensor = self.data_writer.get_buffer_values_min(tensor_stat_index) if max_tensor is None or min_tensor is None: return if mint.isinf(max_tensor) or mint.isnan(max_tensor): self.has_overflow = True + return if mint.isinf(min_tensor) or mint.isnan(min_tensor): self.has_overflow = True diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 7cc552c0bcc..60ef834601b 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -418,14 +418,19 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): raise RuntimeError(f"overflow check failed") from e def _analyze_maybe_overflow_tensor(self, tensor_json): - max_tensor = self.data_writer.get_buffer_values_max(tensor_json['tensor_stat_index']) - min_tensor = self.data_writer.get_buffer_values_min(tensor_json['tensor_stat_index']) + tensor_stat_index = tensor_json.get('tensor_stat_index') + if tensor_stat_index is None: + logger.warning("tensor_stat_index does not exist in tensor_json.") + return + max_tensor = self.data_writer.get_buffer_values_max(tensor_stat_index) + min_tensor = self.data_writer.get_buffer_values_min(tensor_stat_index) if max_tensor is None or min_tensor is None: return if torch.isinf(max_tensor) or torch.isnan(max_tensor): self.has_overflow = True + return if torch.isinf(min_tensor) or torch.isnan(min_tensor): self.has_overflow = True diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index e937774c533..5e38ec64897 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -177,10 +177,20 @@ class DataWriter: return len(self.stat_stack_list) - 1 def get_buffer_values_max(self, index): - return self.stat_stack_list[index][0] + if 0 <= index < len(self.stat_stack_list) and len(self.stat_stack_list[index]) >= 1: + return self.stat_stack_list[index][0] + else: + logger.warning(f"stat_stack_list[{index}] The internal data is incomplete," + f" and the maximum value cannot be obtained.") + return None def get_buffer_values_min(self, index): - return self.stat_stack_list[index][1] + if 0 <= index < len(self.stat_stack_list) and len(self.stat_stack_list[index]) >= 1: + return self.stat_stack_list[index][1] + else: + logger.warning(f"stat_stack_list[{index}] Internal data is incomplete" + f" and minimum values cannot be obtained.") + return None def flush_stat_stack(self): """ -- Gitee From 7aa76eb51211e3dd93bfec221ffcde3a33609910 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 20 Mar 2025 19:38:46 +0800 Subject: [PATCH 333/333] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/core/common/const.py | 1 + .../core/data_dump/data_processor/mindspore_processor.py | 4 ++-- .../core/data_dump/data_processor/pytorch_processor.py | 4 ++-- debug/accuracy_tools/msprobe/core/data_dump/json_writer.py | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index 64132f3f3ec..ff9f29e4d29 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -219,6 +219,7 @@ class Const: MEAN = 'Mean' NORM = 'Norm' DATA_NAME = 'data_name' + TENSOR_STAT_INDEX = 'tensor_stat_index' CODE_STACK = 'Code Stack' OP_NAME = 'Op Name' diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 6bded03c2c7..b2d8c611130 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -158,7 +158,7 @@ class MindsporeDataProcessor(BaseDataProcessor): placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) - tensor_json.update({"tensor_stat_index": placeholder_index}) + tensor_json.update({Const.TENSOR_STAT_INDEX: placeholder_index}) if self.config.summary_mode == Const.MD5 and not self.config.async_dump: tensor_md5 = self.get_md5_for_tensor(tensor) @@ -256,7 +256,7 @@ class OverflowCheckDataProcessor(MindsporeDataProcessor): self.cached_tensors_and_file_paths = {} def _analyze_maybe_overflow_tensor(self, tensor_json): - tensor_stat_index = tensor_json.get('tensor_stat_index') + tensor_stat_index = tensor_json.get(Const.TENSOR_STAT_INDEX) if tensor_stat_index is None: logger.warning("tensor_stat_index does not exist in tensor_json.") return diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index 60ef834601b..a54dbe60d40 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -289,7 +289,7 @@ class PytorchDataProcessor(BaseDataProcessor): ] placeholder_index = self.data_writer.append_stat_to_buffer(stat_values) - tensor_json.update({"tensor_stat_index": placeholder_index}) + tensor_json.update({Const.TENSOR_STAT_INDEX: placeholder_index}) tensor_json.update({"requires_grad": tensor.requires_grad}) if self.config.summary_mode == Const.MD5 and not self.config.async_dump: @@ -418,7 +418,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): raise RuntimeError(f"overflow check failed") from e def _analyze_maybe_overflow_tensor(self, tensor_json): - tensor_stat_index = tensor_json.get('tensor_stat_index') + tensor_stat_index = tensor_json.get(Const.TENSOR_STAT_INDEX) if tensor_stat_index is None: logger.warning("tensor_stat_index does not exist in tensor_json.") return diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index 3ae731410b2..7e315226676 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -62,9 +62,9 @@ class DataWriter: def _replace_stat_placeholders(self, data, stat_result): if isinstance(data, dict): keys = list(data.keys()) # 获取当前所有键 - for key in keys: # 避免遍历时修改字典 + for key in keys: # 递归所有变量 value = data[key] - if key == "tensor_stat_index" and isinstance(value, int): + if key == Const.TENSOR_STAT_INDEX and isinstance(value, int): if value > 0: idx = value else: -- Gitee