From a8e033c08c265f5a7b35d90240e8ec42ff5e4567 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Tue, 4 Jun 2024 21:43:15 +0800 Subject: [PATCH 001/141] compare finished --- .../generate_op_script/op_generator.py | 261 +++++++++++++++ .../operator_replication.template | 307 ++++++++++++++++++ 2 files changed, 568 insertions(+) create mode 100644 debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py create mode 100644 debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py new file mode 100644 index 0000000000..ba731f7239 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -0,0 +1,261 @@ +import json +import os +import math +import numpy as np +import torch +try: + import torch_npu +except ImportError: + pass + + +TENSOR_DATA_LIST = ["torch.Tensor"] +TORCH_BOOL_TYPE = ["torch.bool"] +TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", + "torch.int64", "torch.long"] +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", + "torch.float64", "torch.double"] +TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] +NUMPY_TYPE = ["numpy.int8", "numpy.int16", "numpy.int32", "numpy.int64", "numpy.uint8", "numpy.uint16", "numpy.uint32", + "numpy.uint64", "numpy.float16", "numpy.float32", "numpy.float64", "numpy.float128", "numpy.complex64", + "numpy.complex128", "numpy.complex256", "numpy.bool_", "numpy.string_", "numpy.bytes_", "numpy.unicode_"] +RAISE_PRECISION = { + "torch.float16": "torch.float32", + "torch.half": "torch.float32", + "torch.bfloat16": "torch.float32", + "torch.float32": "torch.float64", + "torch.float": "torch.float64" +} + + +''' +user_settings could be adjusted by user. +keys: + full_api_name: api_type.api_name.ordinal_number + direction_status: forward or backward + json_path : path of forward json file or backward json file + mode : random_data_mode or real_data_mode + random_seed: if mode is random_data_mode, random seed is random_seed + iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter + real_data_path: path of real data + output_path : path of output files +''' +user_settings = { + "full_api_name": "Torch.matmul.83", + "direction_status": "forward", + "json_path": "", + "mode": "real_data", + "random_seed": 1234, + "iter_times": 5, + "real_data_path": "", + "output_path": "" +} + + +def check_full_api_name(full_api_name): + ''' + {api_type}_{api_name}_{api调用次数} + two things: new name format, what parts of full_api_name is needed + ''' + pass + + +def check_user_settings(user_settings): + check_full_api_name(user_settings["full_api_name"]) + if user_settings["mode"] != "random" and user_settings["mode"] != "real_data": + raise Exception("Error: mode must be random or real_data!") + if user_settings["mode"] == "real_data": + pass + with open(user_settings["json_path"]) as f: + json_content = json.load(f) + (api_full_name, api_info_dict) = list(json_content.items())[0] + return api_info_dict + + +def get_settings(): + ''' + internal_settings contain all information needed for the program. + keys: + full_api_name: api_type.api_name.ordinal_number + api_type: type of api, should be Functional, Torch or Tensor + api_name: name of api + ordinal_number: how many times the same api has been called + direction_status: forward or backward + json_path : path of forward json file or backward json file + mode : random_data_mode or real_data_mode + random_seed: if mode is random_data_mode, random seed is random_seed + iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter + real_data_path: path of real data + output_path : path of output files + ''' + api_info_dict = check_user_settings(user_settings) + args_info = api_info_dict.get("args") + kwargs_info = api_info_dict.get("kwargs") + + internal_settings = {} + internal_settings["full_api_name"] = user_settings.get("full_api_name") + parts_of_full_api_name = internal_settings["full_api_name"].split(".", -1) + if parts_of_full_api_name[0] == "Functional": + internal_settings["api_type"] = "torch.nn.functional" + elif parts_of_full_api_name[0] == "Tensor": + internal_settings["api_type"] = "torch.Tensor" + else: + internal_settings["api_type"] = "torch" + internal_settings["api_name"] = parts_of_full_api_name[1] + internal_settings["ordinal_number"] = parts_of_full_api_name[2] + internal_settings["random_seed"] = user_settings.get("random_seed") + if user_settings.get("mode") == "random_data_mode": + internal_settings["iter_times"] = 1 + else: + internal_settings["iter_times"] = user_settings.get("iter_times") + internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info) + internal_settings["args_list_generator_device"] = generate_args_list_device(args_info) + internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info) + internal_settings["kwargs_value_assignment"] = generate_kwargs_value_assignment_code(kwargs_info) + internal_settings["kwargs_dict_generator_device"] = generate_kwargs_dict_device(kwargs_info) + internal_settings["kwargs_dict_generator_bench"] = generate_kwargs_dict_bench(kwargs_info) + return internal_settings + + +def recursive_args_element_assignment(args_info, name_number): + args_element_assignment = "" + for index, arg in enumerate(args_info): + if isinstance(arg, (list, tuple)): + new_args_element_assignment = recursive_args_element_assignment(arg, name_number + "_" + str(index)) + args_element_assignment += new_args_element_assignment + else: + arg["parameter_name"] = "arg" + name_number + "_" + str(index) + args_element_assignment += " " + "arg_info" + name_number + "_" + str(index) + " = " + "{}".format(str(arg)) + "\n" + args_element_assignment += " " + "arg" + name_number + "_" + str(index) + " = " + "generate_data(arg_info" + name_number + "_" + str(index) + ")" + "\n" + return args_element_assignment + + +def generate_args_element_assignment_code(args_info): + args_element_assignment = recursive_args_element_assignment(args_info, "") + return args_element_assignment + + +def recursive_args_list(args_info, flag_device=False, flag_bench=False): + args_list_generator = "" + for index, arg in enumerate(args_info): + if isinstance(arg, list): + args_list_generator += "[" + new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) + args_list_generator += new_args_list_generator + args_list_generator += "]" + elif isinstance(arg, tuple): + args_list_generator += "(" + new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) + args_list_generator += new_args_list_generator + args_list_generator += ")" + else: + args_list_generator += arg.get("parameter_name") + if arg.get("type") in TENSOR_DATA_LIST: + if flag_device: + args_list_generator += ".to(device)" + if flag_bench: + data_dtype = arg.get("dtype") + raised_dtype = RAISE_PRECISION.get(data_dtype) + if raised_dtype: + args_list_generator += ".to(" + raised_dtype + ")" + args_list_generator += ", " + return args_list_generator + + +def generate_args_list_device(args_info): + args_list_generator_device = recursive_args_list(args_info, flag_device=True) + return args_list_generator_device + + +def generate_args_list_bench(args_info): + args_list_generator_bench = recursive_args_list(args_info, flag_bench=True) + return args_list_generator_bench + + +def recursive_kwargs_value_assignment(info, key_name, name_number): + kwargs_value_assignment = "" + if isinstance(info, dict): + if info.get("type") == "torch.device" or info.get("type") == "torch.dtype": + kwargs_value_assignment += " " + "kwarg_" + key_name + name_number + " = " + info.get("value") + else: + kwargs_value_assignment += " " + "kwarg_info_" + key_name + name_number + " = " + "{}".format(str(info)) + "\n" + kwargs_value_assignment += " " + "kwarg_" + key_name + name_number + " = " + "generate_data(kwarg_info_" + key_name + name_number + ")" + "\n" + info["parameter_name"] = "kwarg_" + key_name + name_number + else: + for index, arg in enumerate(info): + new_kwargs_value_assignment = recursive_kwargs_value_assignment(arg, key_name, name_number + "_" + str(index)) + kwargs_value_assignment += new_kwargs_value_assignment + return kwargs_value_assignment + + +def generate_kwargs_value_assignment_code(kwargs_info): + kwargs_value_assignment = "" + for key, value in kwargs_info.items(): + kwargs_value_assignment += recursive_kwargs_value_assignment(value, key, "") + return kwargs_value_assignment + + +def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): + kwargs_dict_generator = "" + if isinstance(info, dict): + kwargs_dict_generator += info.get("parameter_name") + if info.get("type") in TENSOR_DATA_LIST: + if flag_device: + kwargs_dict_generator += "to(device)" + if flag_bench: + data_dtype = info.get("dtype") + raised_dtype = RAISE_PRECISION.get(data_dtype) + if raised_dtype: + kwargs_dict_generator += "to(" + raised_dtype + ")" + else: + kwargs_dict_generator = "" + if isinstance(info, list): + kwargs_dict_generator += "[" + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += "]" + else: + kwargs_dict_generator += "(" + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += ")" + return kwargs_dict_generator + + +def generate_kwargs_dict_device(kwargs_info): + kwargs_dict_generator_device = "" + for key, value in kwargs_info.items(): + kwargs_dict_generator_device += '"' + key + '"' + ": " + kwargs_dict_generator_device += recursive_kwargs_dict(value, flag_device=True) + ", " + return kwargs_dict_generator_device + + +def generate_kwargs_dict_bench(kwargs_info): + kwargs_dict_generator_bench = "" + for key, value in kwargs_info.items(): + kwargs_dict_generator_bench += '"' + key + '"' + ": " + kwargs_dict_generator_bench += recursive_kwargs_dict(value, flag_bench=True) + ", " + return kwargs_dict_generator_bench + + +def main(): + internal_settings = get_settings() + + template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") + operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("full_api_name"))) + + try: + with open(template_path, 'r') as ftemp, open(operator_script_path, 'w') as fout: + code_template = ftemp.read() + fout.write(code_template.format(**internal_settings)) + except OSError: + print(f"Failed to open file. Please check file {template_path} or {operator_script_path}.") + + print(f"Generate operator script successfully and the name is {operator_script_path}.") + + +if __name__ == "__main__": + main() + print("Job done successfully.") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template new file mode 100644 index 0000000000..54d1ab01e3 --- /dev/null +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -0,0 +1,307 @@ +import json +import os +import math +import torch +try: + import torch_npu +except ImportError: + pass + +from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi + + +TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] +TORCH_BOOL_TYPE = ["torch.bool"] +TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "torch.int32", "torch.int", + "torch.int64", "torch.long"] +TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", + "torch.float64", "torch.double"] +TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] + + +def get_device(): + if torch.cuda.is_available(): + device = torch.device("cuda") + elif torch_npu.npu.is_available(): + device = torch.device("npu") + else: + raise Exception("Error: This device is not NPU or GPU!") + return device + + +def generate_bool_tensor(low, high, shape): + low, high = int(low), int(high) + tensor = torch.randint(low, high + 1, shape) + bool_tensor = torch.gt(tensor, 0) + return bool_tensor + + +def generate_numerical_tensor(low, high, shape, data_dtype): + if data_dtype in TORCH_FLOAT_TYPE: + scale = high - low + rand01 = torch.rand(shape, dtype=eval(data_dtype)) + tensor = rand01 * scale + low + elif data_dtype in TORCH_INT_TYPE: + low, high = int(low), int(high) + tensor = torch.randint(low, high + 1, shape, dtype=eval(data_dtype)) + else: + raise NotImplementedError(f"{{data_dtype}} is not supported!") + if torch.numel(tensor) == 0: + return tensor + tmp_tensor = tensor.reshape(-1) + tmp_tensor[0] = low + tmp_tensor[-1] = high + data = tmp_tensor.reshape(shape) + return data + + +def generate_random_tensor(info): + low, high = info.get('Min'), info.get('Max') + data_dtype = info.get('dtype') + shape = tuple(info.get('shape')) + if data_dtype == "torch.bool": + data = generate_bool_tensor(low, high, shape) + else: + data = generate_numerical_tensor(low, high, shape, data_dtype) + return data + + +def generate_real_tensor(data_path): + data_path = os.path.realpath(data_path) + data = torch.load(data_path) + return data + + +def generate_data(info): + data_type = info.get("type") + data_path = info.get("datapath") + if data_type in TENSOR_DATA_LIST: + if data_path: + data = generate_real_tensor(data_path) + else: + data = generate_random_tensor(info) + else: + data = info.get("value") + return data + + +def get_input(): +{args_element_assignment} + args_device = [{args_list_generator_device}] + args_bench = [{args_list_generator_bench}] +{kwargs_value_assignment} + kwargs_device = {{{kwargs_dict_generator_device}}} + kwargs_bench = {{{kwargs_dict_generator_bench}}} + return args_device, kwargs_device, args_bench, kwargs_bench + + +def exec_api_device(args, kwargs): + output_device = {api_type}.{api_name}(*args, **kwargs) + return output_device + + +def exec_api_bench(args, kwargs): + output_bench = {api_type}.{api_name}(*args, **kwargs) + return output_bench + + +def compute_inf_nan_proportion(inf_nan_mask, out_device, out_bench, abs_bench_with_eps, rtol): + out_bench = out_bench.to(out_device.dtype) + min = torch.finfo(out_device.dtype).min + max = torch.finfo(out_device.dtype).max + bench_clip = torch.clamp(out_bench, min=min, max=max) + device_clip = torch.clamp(out_device, min=min, max=max) + clipped_abs_ae = torch.abs(device_clip - bench_clip) + clipped_re = clipped_abs_ae / abs_bench_with_eps + pass_mask = torch.less_equal(clipped_re, rtol) + both_nan_mask = torch.logical_and(torch.isnan(out_device), torch.isnan(bench_clip)) + pass_mask = torch.logical_or(pass_mask, both_nan_mask) + not_pass_mask = torch.logical_not(pass_mask) + not_pass_mask = torch.logical_and(not_pass_mask, inf_nan_mask) + inf_nan_err_cnt = torch.sum(not_pass_mask) + return 0 if torch.sum(inf_nan_mask) == 0 else inf_nan_err_cnt / torch.sum(inf_nan_mask) + + +def compute_rmse(abs_err, normal_value_mask): + if torch.sum(normal_value_mask) == 0: + return 0 + else: + masked_ae = torch.where(normal_value_mask, abs_err, 0) + mse = torch.sum(torch.square(masked_ae)) / torch.sum(normal_value_mask) + rmse = torch.sqrt(mse) + return rmse + + +def compute_error_balance(out_device, out_bench): + larger_count = torch.sum(torch.greater(out_device - out_bench.to(out_device.dtype), 0)) + smaller_count = torch.sum(torch.less(out_device - out_bench.to(out_device.dtype), 0)) + total_count = torch.numel(out_bench) + error_balance = abs(larger_count - smaller_count) / total_count + return error_balance + + +def compare_tensor(out_device, out_bench, api_name): + if out_device.shape != out_bench.shape: + print("ERROR: shape of out_device and out_bench is not equal!") + return None + if torch.numel(out_bench) == 0: + print("Both out_device and out_bench have zero elements.") + return None + dtype_device = out_device.dtype + dtype_bench = out_bench.dtype + if str(dtype_device) in TORCH_FLOAT_TYPE and str(dtype_bench) in TORCH_FLOAT_TYPE \ + or str(dtype_device) in TORCH_INT_TYPE and str(dtype_bench) in TORCH_INT_TYPE \ + or str(dtype_device) in TORCH_BOOL_TYPE and str(dtype_bench) in TORCH_BOOL_TYPE: + out_device = out_device.to(torch.device("cpu")) + if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or api_name in BinaryStandardApi: + print("compare standard: binary consistency standard:") + error_number = torch.sum(out_device != out_bench).item() + error_rate = error_number / torch.numel(out_bench) + print(f"error rate is {{error_rate}}.") + else: + abs_err = torch.abs(out_device - out_bench) + abs_bench = torch.abs(out_bench) + if dtype_bench == torch.float32: + eps = 2 ** -23 + if dtype_bench == torch.float64: + eps = 2 ** -52 + abs_bench_with_eps = abs_bench + eps + rel_err = torch.abs(abs_err / abs_bench_with_eps) + device_finite_mask = torch.isfinite(out_device) + bench_finite_mask = torch.isfinite(out_bench.to(dtype_device)) + both_finite_mask = torch.logical_and(device_finite_mask, bench_finite_mask) + inf_nan_mask = torch.logical_not(both_finite_mask) + if api_name in AbsoluteStandardApi: + if dtype_device == torch.float16: + rtol, small_value, small_value_atol = 1.0e-3, 1.0e-3, 1.0e-5 + elif dtype_device == torch.bfloat16: + rtol, small_value, small_value_atol = 4.0e-3, 1.0e-3, 1.0e-5 + else: + rtol, small_value, small_value_atol = 1.0e-6, 1.0e-6, 1.0e-9 + small_value_mask = torch.less_equal(abs_bench, small_value) + small_value_mask = torch.logical_and(small_value_mask, both_finite_mask) + normal_value_mask = torch.logical_and(both_finite_mask, torch.logical_not(small_value_mask)) + inf_nan_proportion = compute_inf_nan_proportion(inf_nan_mask, out_device, out_bench, abs_bench_with_eps, rtol) + rel_err_mask = torch.greater(rel_err, rtol) + rel_err_mask = torch.logical_and(rel_err_mask, normal_value_mask) + if torch.sum(normal_value_mask) == 0: + rel_err_proportion = 0 + else: + rel_err_proportion = torch.sum(rel_err_mask) / torch.sum(normal_value_mask) + abs_err_mask = torch.greater(abs_err, small_value_atol) + abs_err_mask = torch.logical_and(abs_err_mask, small_value_mask) + if torch.sum(small_value_mask) == 0: + abs_err_proportion = 0 + else: + abs_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) + print("compare standard: absolute threshold standard") + print(f"relative error ratio is {{rel_err_proportion}}") + print(f"absolute error ratio is {{abs_err_proportion}}") + elif api_name in ULPStandardApi: + if dtype_device == torch.float16: + min_eb, exponent_num = -14, 10 + elif dtype_device == torch.bfloat16: + min_eb, exponent_num = -126, 7 + else: + min_eb, exponent_num = -126, 23 + eb = torch.where(abs_bench == 0, torch.zeros(out_bench.shape), torch.floor(torch.log2(abs_bench))) + eb = torch.maximum(eb, min_eb) + if dtype_device == torch.float32: + ulp_err = (out_device.to(torch.float64) - out_bench).to(torch.float64) * torch.exp2(-eb + exponent_num).to(torch.float64) + else: + ulp_err = (out_device.to(torch.float32) - out_bench).to(torch.float32) * torch.exp2(-eb + exponent_num).to(torch.float32) + ulp_err = torch.abs(ulp_err) + max_ulp_err = torch.max(ulp_err) + mean_ulp_err = torch.mean(ulp_err) + if dtype_device == torch.float32: + ulp_err_proportion = torch.sum(ulp_err > 32) / torch.numel(out_bench) + else: + ulp_err_proportion = torch.sum(ulp_err > 1) / torch.numel(out_bench) + print("compare standard: ulp error standard") + print(f"maximum ulp error is {{max_ulp_err}}") + print(f"mean ulp error is {{mean_ulp_err}}") + print(f"ulp error proportion is {{ulp_err_proportion}}") + else: + if dtype_device == torch.float16: + small_value, small_value_atol = 1.0e-3, 1.0e-5 + elif dtype_device == torch.bfloat16: + small_value, small_value_atol = 1.0e-3, 1.0e-5 + else: + small_value, small_value_atol = 1.0e-6, 1.0e-9 + small_value_mask = torch.less_equal(abs_bench, small_value) + small_value_mask = torch.logical_and(small_value_mask, both_finite_mask) + normal_value_mask = torch.logical_and(both_finite_mask, torch.logical_not(small_value_mask)) + abs_err_mask = torch.greater(abs_err, small_value_atol) + abs_err_mask = torch.logical_and(abs_err_mask, small_value_mask) + if torch.sum(small_value_mask) == 0: + small_value_err_proportion = 0 + else: + small_value_err_proportion = torch.sum(abs_err_mask) / torch.sum(small_value_mask) + rel_err = torch.where(normal_value_mask, rel_err, -1 * torch.ones(out_device.shape)) + if torch.max(rel_err) >= 0: + max_rel_err = torch.max(rel_err) + else: + max_rel_err = 0 + if torch.sum(normal_value_mask) == 0: + mean_rel_err = 0 + else: + mean_rel_err = torch.sum(torch.clamp(rel_err, min=0)) / torch.sum(normal_value_mask) + rmse = compute_rmse(abs_err, normal_value_mask) + error_balance = compute_error_balance(out_device, out_bench) + print("compare standard: benchmark standard") + print(f"small value error proportion is {{small_value_error_proportion}}") + print(f"maximum relative error is {{max_rel_err}}") + print(f"mean relative error is {{mean_rel_err}}") + print(f"root mean squared error is {{rmse}}") + print(f"error balance is {{error_balance}}") + else: + print(f"ERROR: out_device dtype is {{dtype_device}}, out_bench dtype is {{dtype_bench}}, not comparable.") + return None + + +def compare_element(out_device, out_bench, api_name): + if type(out_device) != type(out_bench): + print("ERROR: out_device and out_bench is not the same type!") + return None + if isinstance(out_bench, torch.Tensor): + print(f"data type: {{type(out_bench)}}") + compare_tensor(out_device, out_bench, api_name) + elif isinstance(out_bench, (bool, int, float, str)): + print(f"data type: {{type(out_bench)}}") + if out_device == out_bench: + print("PASS: out_device and out_bench equals.") + else: + print("ERROR: out_device and out_bench is not equal!") + else: + print(f"ERROR: comparison of type {{type(out_bench)}} is not supported.") + return None + + +def compare(out_device, out_bench, api_name): + print("Compare result:") + if type(out_device) != type(out_bench): + print("ERROR: out_device and out_bench is not the same type!") + print("Compare finished.") + return None + if isinstance(out_bench, (list, tuple)): + print(f"data type: {{type(out_bench)}}") + if len(out_device) != len(out_bench): + print("ERROR: len of out_device and out_bench is different!") + print("Compare finished.") + return None + for index, _ in enumerate(out_bench): + print(f"index {{index}}:") + compare_element(out_device[index], out_bench[index], api_name) + else: + compare_element(out_device, out_bench, api_name) + print("Compare finished.") + + +device = get_device() +api_name = {api_name} +torch.manual_seed({random_seed}) +for i in range({iter_times}): + args_device, kwargs_device, args_bench, kwargs_bench = get_input() + output_device = exec_api_device(args_device, kwargs_device) + output_bench = exec_api_bench(args_bench, kwargs_bench) + compare(output_device, output_bench, api_name) -- Gitee From c6b713996d81ab95564d2454aeadd5a25927bdd8 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Tue, 4 Jun 2024 22:40:03 +0800 Subject: [PATCH 002/141] bugfix test forward compare --- .../generate_op_script/operator_replication.template | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 54d1ab01e3..5e4f7cae51 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -205,7 +205,7 @@ def compare_tensor(out_device, out_bench, api_name): else: min_eb, exponent_num = -126, 23 eb = torch.where(abs_bench == 0, torch.zeros(out_bench.shape), torch.floor(torch.log2(abs_bench))) - eb = torch.maximum(eb, min_eb) + eb = torch.maximum(eb, min_eb * torch.ones(out_bench.shape)) if dtype_device == torch.float32: ulp_err = (out_device.to(torch.float64) - out_bench).to(torch.float64) * torch.exp2(-eb + exponent_num).to(torch.float64) else: @@ -249,7 +249,7 @@ def compare_tensor(out_device, out_bench, api_name): rmse = compute_rmse(abs_err, normal_value_mask) error_balance = compute_error_balance(out_device, out_bench) print("compare standard: benchmark standard") - print(f"small value error proportion is {{small_value_error_proportion}}") + print(f"small value error proportion is {{small_value_err_proportion}}") print(f"maximum relative error is {{max_rel_err}}") print(f"mean relative error is {{mean_rel_err}}") print(f"root mean squared error is {{rmse}}") @@ -298,9 +298,10 @@ def compare(out_device, out_bench, api_name): device = get_device() -api_name = {api_name} +api_name = "{api_name}" torch.manual_seed({random_seed}) for i in range({iter_times}): + print(f"iter: {{i}}:") args_device, kwargs_device, args_bench, kwargs_bench = get_input() output_device = exec_api_device(args_device, kwargs_device) output_bench = exec_api_bench(args_bench, kwargs_bench) -- Gitee From 4eaa6a0df3166b9b0162a5ceb6cb179b55c0b324 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 02:37:04 +0800 Subject: [PATCH 003/141] fix op_generator.py --- .../generate_op_script/op_generator.py | 102 ++++++++++-------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index ba731f7239..69a76a2bc8 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -16,9 +16,6 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -NUMPY_TYPE = ["numpy.int8", "numpy.int16", "numpy.int32", "numpy.int64", "numpy.uint8", "numpy.uint16", "numpy.uint32", - "numpy.uint64", "numpy.float16", "numpy.float32", "numpy.float64", "numpy.float128", "numpy.complex64", - "numpy.complex128", "numpy.complex256", "numpy.bool_", "numpy.string_", "numpy.bytes_", "numpy.unicode_"] RAISE_PRECISION = { "torch.float16": "torch.float32", "torch.half": "torch.float32", @@ -29,82 +26,94 @@ RAISE_PRECISION = { ''' -user_settings could be adjusted by user. +user_settings could be set by user. keys: - full_api_name: api_type.api_name.ordinal_number direction_status: forward or backward json_path : path of forward json file or backward json file - mode : random_data_mode or real_data_mode - random_seed: if mode is random_data_mode, random seed is random_seed - iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter - real_data_path: path of real data - output_path : path of output files + mode : random_data or real_data + random_seed: if mode is random_data, random seed is random_seed; if mode is real_data, random_seed does not matter + iter_times: if mode is random_data, generate iter_times groups of data; if mode is real_data, iter_times does not matter ''' user_settings = { - "full_api_name": "Torch.matmul.83", "direction_status": "forward", "json_path": "", - "mode": "real_data", + "mode": "random_data", "random_seed": 1234, - "iter_times": 5, - "real_data_path": "", - "output_path": "" + "iter_times": 5 } -def check_full_api_name(full_api_name): +def check_json(json_path): ''' + api_full_name: api_type.api_name.ordinal_number {api_type}_{api_name}_{api调用次数} - two things: new name format, what parts of full_api_name is needed + two things: new name format, what parts of api_full_name is needed ''' - pass + json_file = os.path.realpath(json_path) + with open(json_file) as f: + json_content = json.load(f) + if not isinstance(json_content, dict): + raise ValueError("content of json file is not a dictionary!") + if len(list(json_content.items())) > 1: + raise ValueError("json file has more than one API, only one API is allowed!") + (api_full_name, api_info_dict) = list(json_content.items())[0] + (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + if api_type not in ("Functional", "Tensor", "Torch"): + raise ValueError("this type of API is not supported!") + return (api_full_name, api_info_dict) def check_user_settings(user_settings): - check_full_api_name(user_settings["full_api_name"]) - if user_settings["mode"] != "random" and user_settings["mode"] != "real_data": - raise Exception("Error: mode must be random or real_data!") - if user_settings["mode"] == "real_data": - pass - with open(user_settings["json_path"]) as f: - json_content = json.load(f) - (api_full_name, api_info_dict) = list(json_content.items())[0] - return api_info_dict + if user_settings.get("direction_status") not in ("forward", "backward"): + raise ValueError("direction_status should be forward or backward!") + if user_settings.get("mode") not in ("random_data","real_data"): + raise ValueError("mode should be random_data or real_data!") + r_seed = user_settings.get("random_seed") + if not isinstance(r_seed, int): + raise ValueError("random_seed should be an integer!") + iter_t = user_settings.get("iter_times") + if not isinstance(iter_t, int) or iter_t <= 0: + raise ValueError("iter_times should be an integer bigger than zero!") + (api_full_name, api_info_dict) = check_json(user_settings.get("json_path")) + return api_full_name, api_info_dict def get_settings(): ''' - internal_settings contain all information needed for the program. + internal_settings contain all information needed for the operator program. keys: - full_api_name: api_type.api_name.ordinal_number - api_type: type of api, should be Functional, Torch or Tensor - api_name: name of api + api_full_name: api_type.api_name.ordinal_number + api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch + api_name: name of API ordinal_number: how many times the same api has been called direction_status: forward or backward - json_path : path of forward json file or backward json file - mode : random_data_mode or real_data_mode - random_seed: if mode is random_data_mode, random seed is random_seed - iter_times: if mode is random_data_mode, generate iter_times group of data; if mode is real_data_mode, iter_times does not matter - real_data_path: path of real data - output_path : path of output files + random_seed: if mode is random_data, random seed is random_seed + iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter + args_element_assignment: code for args assignment + args_list_generator_device: code for generate args list on device + args_list_generator_bench: code for generate args list on bench + kwargs_value_assignment: code for kwargs assignment + kwargs_dict_generator_device: code for generate kwargs dict on device + kwargs_dict_generator_bench: code for generate kwargs dict on bench ''' - api_info_dict = check_user_settings(user_settings) + api_full_name, api_info_dict = check_user_settings(user_settings) args_info = api_info_dict.get("args") kwargs_info = api_info_dict.get("kwargs") internal_settings = {} - internal_settings["full_api_name"] = user_settings.get("full_api_name") - parts_of_full_api_name = internal_settings["full_api_name"].split(".", -1) - if parts_of_full_api_name[0] == "Functional": + internal_settings["api_full_name"] = api_full_name + (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) + if api_type == "Functional": internal_settings["api_type"] = "torch.nn.functional" - elif parts_of_full_api_name[0] == "Tensor": + elif api_type == "Tensor": internal_settings["api_type"] = "torch.Tensor" else: internal_settings["api_type"] = "torch" - internal_settings["api_name"] = parts_of_full_api_name[1] - internal_settings["ordinal_number"] = parts_of_full_api_name[2] + internal_settings["api_name"] = api_name + internal_settings["ordinal_number"] = ordinal_number + internal_settings["direction_status"] = user_settings.get("direction_status") internal_settings["random_seed"] = user_settings.get("random_seed") - if user_settings.get("mode") == "random_data_mode": + if user_settings.get("mode") == "real_data": internal_settings["iter_times"] = 1 else: internal_settings["iter_times"] = user_settings.get("iter_times") @@ -244,7 +253,7 @@ def main(): internal_settings = get_settings() template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") - operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("full_api_name"))) + operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) try: with open(template_path, 'r') as ftemp, open(operator_script_path, 'w') as fout: @@ -258,4 +267,3 @@ def main(): if __name__ == "__main__": main() - print("Job done successfully.") -- Gitee From e5fb95bb631683c9f9641dac01b28121bb13b2e2 Mon Sep 17 00:00:00 2001 From: TAJh <2559659915@qq.com> Date: Wed, 5 Jun 2024 01:14:05 +0000 Subject: [PATCH 004/141] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: TAJh <2559659915@qq.com> --- debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py b/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py index 993b5e2cf1..6ee91dd7e8 100644 --- a/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py +++ b/debug/accuracy_tools/api_checker/ut_case/ArgMaxWithValue_ut.py @@ -21,12 +21,13 @@ class ArgMaxWithValueUT(UTBase): len_args = len(args) self.axis = self.kwargs.get("axis") if self.kwargs else 0 self.keep_dims = self.kwargs.get("keep_dims") if self.kwargs else False + def forward_mindspore_impl(self, *args): x = args[0] net = ArgMaxWithValue(self.axis, self.keep_dims) out = net(x) return out - + def forward_pytorch_impl(self, *args): input_pt_x = args[0] value, index = torch.max(input_pt_x, self.axis, self.keep_dims) -- Gitee From 6e3e1112c233382f269c064754b48d54c098f453 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:11:56 +0800 Subject: [PATCH 005/141] bugfix real_data --- .../generate_op_script/op_generator.py | 21 +++++-------------- .../operator_replication.template | 8 ++++++- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 69a76a2bc8..d5edca540d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -16,13 +16,6 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -RAISE_PRECISION = { - "torch.float16": "torch.float32", - "torch.half": "torch.float32", - "torch.bfloat16": "torch.float32", - "torch.float32": "torch.float64", - "torch.float": "torch.float64" -} ''' @@ -163,10 +156,8 @@ def recursive_args_list(args_info, flag_device=False, flag_bench=False): if flag_device: args_list_generator += ".to(device)" if flag_bench: - data_dtype = arg.get("dtype") - raised_dtype = RAISE_PRECISION.get(data_dtype) - if raised_dtype: - args_list_generator += ".to(" + raised_dtype + ")" + args_list_generator += '.to(torch.device("cpu"))' + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("paramter_name") + ".dtype))" args_list_generator += ", " return args_list_generator @@ -210,12 +201,10 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += info.get("parameter_name") if info.get("type") in TENSOR_DATA_LIST: if flag_device: - kwargs_dict_generator += "to(device)" + kwargs_dict_generator += ".to(device)" if flag_bench: - data_dtype = info.get("dtype") - raised_dtype = RAISE_PRECISION.get(data_dtype) - if raised_dtype: - kwargs_dict_generator += "to(" + raised_dtype + ")" + kwargs_dict_generator += '.to(torch.device("cpu"))' + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("paramter_name") + ".dtype))" else: kwargs_dict_generator = "" if isinstance(info, list): diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 5e4f7cae51..714f60e300 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -17,7 +17,13 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] - +RAISE_PRECISION = { + "torch.float16": torch.float32, + "torch.half": torch.float32, + "torch.bfloat16": torch.float32, + "torch.float32": torch.float64, + "torch.float": torch.float64 +} def get_device(): if torch.cuda.is_available(): -- Gitee From b3aad7c2e896d3df259162181e85309cd88ed92e Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:21:49 +0800 Subject: [PATCH 006/141] bugfix real_data --- .../api_accuracy_checker/generate_op_script/op_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index d5edca540d..1964227f73 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -157,7 +157,7 @@ def recursive_args_list(args_info, flag_device=False, flag_bench=False): args_list_generator += ".to(device)" if flag_bench: args_list_generator += '.to(torch.device("cpu"))' - args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("paramter_name") + ".dtype))" + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("parameter_name") + ".dtype))" args_list_generator += ", " return args_list_generator @@ -204,7 +204,7 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += ".to(device)" if flag_bench: kwargs_dict_generator += '.to(torch.device("cpu"))' - kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("paramter_name") + ".dtype))" + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("parameter_name") + ".dtype))" else: kwargs_dict_generator = "" if isinstance(info, list): -- Gitee From 45235137d93b90df94abd86fc1f7124e35524bbc Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:24:41 +0800 Subject: [PATCH 007/141] bugfix real_data --- .../generate_op_script/operator_replication.template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index 714f60e300..ad9664e177 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -17,13 +17,13 @@ TORCH_INT_TYPE = ["torch.uint8", "torch.int8", "torch.int16", "torch.short", "to TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.float32", "torch.float", "torch.float64", "torch.double"] TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -RAISE_PRECISION = { +RAISE_PRECISION = {{ "torch.float16": torch.float32, "torch.half": torch.float32, "torch.bfloat16": torch.float32, "torch.float32": torch.float64, "torch.float": torch.float64 -} +}} def get_device(): if torch.cuda.is_available(): -- Gitee From c8854fc92c0acda1ddb0bc12d4cc60d234f7e1a9 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 11:29:39 +0800 Subject: [PATCH 008/141] bugfix real_data --- .../api_accuracy_checker/generate_op_script/op_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 1964227f73..b50437e599 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -157,7 +157,7 @@ def recursive_args_list(args_info, flag_device=False, flag_bench=False): args_list_generator += ".to(device)" if flag_bench: args_list_generator += '.to(torch.device("cpu"))' - args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), default=" + arg.get("parameter_name") + ".dtype))" + args_list_generator += ".to(RAISE_PRECISION.get(str(" + arg.get("parameter_name") + ".dtype), " + arg.get("parameter_name") + ".dtype))" args_list_generator += ", " return args_list_generator @@ -204,7 +204,7 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += ".to(device)" if flag_bench: kwargs_dict_generator += '.to(torch.device("cpu"))' - kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), default=" + info.get("parameter_name") + ".dtype))" + kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), " + info.get("parameter_name") + ".dtype))" else: kwargs_dict_generator = "" if isinstance(info, list): -- Gitee From a166c2db2590998751864d1b3b0b9541dd81cd06 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 12:54:09 +0800 Subject: [PATCH 009/141] bugfix from others opinion --- .../api_accuracy_checker/generate_op_script/op_generator.py | 4 ++-- .../generate_op_script/operator_replication.template | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index b50437e599..97d4ba1d8a 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -52,7 +52,7 @@ def check_json(json_path): (api_full_name, api_info_dict) = list(json_content.items())[0] (api_type, api_name, ordinal_number) = api_full_name.split(".", -1) if api_type not in ("Functional", "Tensor", "Torch"): - raise ValueError("this type of API is not supported!") + raise ValueError("type {0} of API is not supported!".format(api_type)) return (api_full_name, api_info_dict) @@ -61,7 +61,7 @@ def check_user_settings(user_settings): raise ValueError("direction_status should be forward or backward!") if user_settings.get("mode") not in ("random_data","real_data"): raise ValueError("mode should be random_data or real_data!") - r_seed = user_settings.get("random_seed") + r_seed = user_settings.get("random_seed", 1234) if not isinstance(r_seed, int): raise ValueError("random_seed should be an integer!") iter_t = user_settings.get("iter_times") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index ad9664e177..c0a571208b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -153,6 +153,9 @@ def compare_tensor(out_device, out_bench, api_name): if torch.numel(out_bench) == 0: print("Both out_device and out_bench have zero elements.") return None + print(f"shape is {{out_bench.shape}}") + print(f"dtype of out_device is {{out_device.dtype}}") + print(f"dtype of out_bench is {{out_bench.dtype}}") dtype_device = out_device.dtype dtype_bench = out_bench.dtype if str(dtype_device) in TORCH_FLOAT_TYPE and str(dtype_bench) in TORCH_FLOAT_TYPE \ -- Gitee From 738da5a838787dcdae650b212627c2fa0bcb84c4 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Wed, 5 Jun 2024 17:37:32 +0800 Subject: [PATCH 010/141] script generated not rely on accuracy_tools --- .../generate_op_script/op_generator.py | 15 +++++++++++++- .../operator_replication.template | 20 +++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 97d4ba1d8a..b1bd26e711 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -8,6 +8,8 @@ try: except ImportError: pass +from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi + TENSOR_DATA_LIST = ["torch.Tensor"] TORCH_BOOL_TYPE = ["torch.bool"] @@ -39,7 +41,7 @@ user_settings = { def check_json(json_path): ''' api_full_name: api_type.api_name.ordinal_number - {api_type}_{api_name}_{api调用次数} + {api_type}.{api_name}.{api调用次数} two things: new name format, what parts of api_full_name is needed ''' json_file = os.path.realpath(json_path) @@ -71,6 +73,16 @@ def check_user_settings(user_settings): return api_full_name, api_info_dict +def get_compare_standard(api_name): + if api_name in BinaryStandardApi: + return "CompareStandard.BINARY_EQUALITY_STANDARD" + if api_name in AbsoluteStandardApi: + return "CompareStandard.ABSOLUTE_THRESHOLD_STANDARD" + if api_name in ULPStandardApi: + return "CompareStandard.ULP_ERROR_STANDARD" + return "CompareStandard.BENCHMARK_STANDARD" + + def get_settings(): ''' internal_settings contain all information needed for the operator program. @@ -103,6 +115,7 @@ def get_settings(): else: internal_settings["api_type"] = "torch" internal_settings["api_name"] = api_name + internal_settings["compare_standard"] = get_compare_standard(api_name) internal_settings["ordinal_number"] = ordinal_number internal_settings["direction_status"] = user_settings.get("direction_status") internal_settings["random_seed"] = user_settings.get("random_seed") diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index c0a571208b..aff89c6a4d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -1,14 +1,13 @@ import json import os import math +from enum import Enum import torch try: import torch_npu except ImportError: pass -from api_accuracy_checker.compare.compare_utils import BinaryStandardApi, AbsoluteStandardApi, ULPStandardApi - TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] TORCH_BOOL_TYPE = ["torch.bool"] @@ -25,6 +24,14 @@ RAISE_PRECISION = {{ "torch.float": torch.float64 }} + +class CompareStandard(Enum): + BINARY_EQUALITY_STANDARD = auto() + ABSOLUTE_THRESHOLD_STANDARD = auto() + ULP_ERROR_STANDARD = auto() + BENCHMARK_STANDARD = auto() + + def get_device(): if torch.cuda.is_available(): device = torch.device("cuda") @@ -162,8 +169,8 @@ def compare_tensor(out_device, out_bench, api_name): or str(dtype_device) in TORCH_INT_TYPE and str(dtype_bench) in TORCH_INT_TYPE \ or str(dtype_device) in TORCH_BOOL_TYPE and str(dtype_bench) in TORCH_BOOL_TYPE: out_device = out_device.to(torch.device("cpu")) - if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or api_name in BinaryStandardApi: - print("compare standard: binary consistency standard:") + if str(dtype_device) in TORCH_BOOL_TYPE or str(dtype_device) in TORCH_INT_TYPE or compare_standard == CompareStandard.BINARY_EQUALITY_STANDARD: + print("compare standard: binary equality standard:") error_number = torch.sum(out_device != out_bench).item() error_rate = error_number / torch.numel(out_bench) print(f"error rate is {{error_rate}}.") @@ -180,7 +187,7 @@ def compare_tensor(out_device, out_bench, api_name): bench_finite_mask = torch.isfinite(out_bench.to(dtype_device)) both_finite_mask = torch.logical_and(device_finite_mask, bench_finite_mask) inf_nan_mask = torch.logical_not(both_finite_mask) - if api_name in AbsoluteStandardApi: + if compare_standard == CompareStandard.ABSOLUTE_THRESHOLD_STANDARD: if dtype_device == torch.float16: rtol, small_value, small_value_atol = 1.0e-3, 1.0e-3, 1.0e-5 elif dtype_device == torch.bfloat16: @@ -206,7 +213,7 @@ def compare_tensor(out_device, out_bench, api_name): print("compare standard: absolute threshold standard") print(f"relative error ratio is {{rel_err_proportion}}") print(f"absolute error ratio is {{abs_err_proportion}}") - elif api_name in ULPStandardApi: + elif compare_standard == CompareStandard.ULP_ERROR_STANDARD: if dtype_device == torch.float16: min_eb, exponent_num = -14, 10 elif dtype_device == torch.bfloat16: @@ -308,6 +315,7 @@ def compare(out_device, out_bench, api_name): device = get_device() api_name = "{api_name}" +compare_standard = {compare_standard} torch.manual_seed({random_seed}) for i in range({iter_times}): print(f"iter: {{i}}:") -- Gitee From 7cdd0637b593f764d31e457b444e8af0122e25d0 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 12:41:32 +0800 Subject: [PATCH 011/141] add argparse --- .../generate_op_script/op_generator.py | 88 ++++++++++--------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index b1bd26e711..48f64fb1e6 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -1,3 +1,20 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import argparse import json import os import math @@ -20,30 +37,7 @@ TORCH_FLOAT_TYPE = ["torch.float16", "torch.half", "torch.bfloat16", "torch.floa TORCH_COMPLEX_TYPE = ["torch.complex32", "torch.chalf", "torch.complex64", "torch.cfloat", "torch.complex128", "torch.cdouble"] -''' -user_settings could be set by user. -keys: - direction_status: forward or backward - json_path : path of forward json file or backward json file - mode : random_data or real_data - random_seed: if mode is random_data, random seed is random_seed; if mode is real_data, random_seed does not matter - iter_times: if mode is random_data, generate iter_times groups of data; if mode is real_data, iter_times does not matter -''' -user_settings = { - "direction_status": "forward", - "json_path": "", - "mode": "random_data", - "random_seed": 1234, - "iter_times": 5 -} - - def check_json(json_path): - ''' - api_full_name: api_type.api_name.ordinal_number - {api_type}.{api_name}.{api调用次数} - two things: new name format, what parts of api_full_name is needed - ''' json_file = os.path.realpath(json_path) with open(json_file) as f: json_content = json.load(f) @@ -58,18 +52,11 @@ def check_json(json_path): return (api_full_name, api_info_dict) -def check_user_settings(user_settings): - if user_settings.get("direction_status") not in ("forward", "backward"): - raise ValueError("direction_status should be forward or backward!") - if user_settings.get("mode") not in ("random_data","real_data"): - raise ValueError("mode should be random_data or real_data!") - r_seed = user_settings.get("random_seed", 1234) - if not isinstance(r_seed, int): - raise ValueError("random_seed should be an integer!") - iter_t = user_settings.get("iter_times") - if not isinstance(iter_t, int) or iter_t <= 0: +def check_user_settings(cmd_args): + iter_t = cmd_args.iter_times + if iter_t <= 0: raise ValueError("iter_times should be an integer bigger than zero!") - (api_full_name, api_info_dict) = check_json(user_settings.get("json_path")) + (api_full_name, api_info_dict) = check_json(cmd_args.forward_json_path) return api_full_name, api_info_dict @@ -83,7 +70,7 @@ def get_compare_standard(api_name): return "CompareStandard.BENCHMARK_STANDARD" -def get_settings(): +def get_settings(cmd_args): ''' internal_settings contain all information needed for the operator program. keys: @@ -91,7 +78,7 @@ def get_settings(): api_type: type of API, one of torch.nn.functional, torch.Tensor or Torch api_name: name of API ordinal_number: how many times the same api has been called - direction_status: forward or backward + direction_status: forward random_seed: if mode is random_data, random seed is random_seed iter_times: if mode is random_data, generate iter_times group of data; if mode is real_data, iter_times does not matter args_element_assignment: code for args assignment @@ -101,7 +88,7 @@ def get_settings(): kwargs_dict_generator_device: code for generate kwargs dict on device kwargs_dict_generator_bench: code for generate kwargs dict on bench ''' - api_full_name, api_info_dict = check_user_settings(user_settings) + api_full_name, api_info_dict = check_user_settings(cmd_args) args_info = api_info_dict.get("args") kwargs_info = api_info_dict.get("kwargs") @@ -117,12 +104,12 @@ def get_settings(): internal_settings["api_name"] = api_name internal_settings["compare_standard"] = get_compare_standard(api_name) internal_settings["ordinal_number"] = ordinal_number - internal_settings["direction_status"] = user_settings.get("direction_status") - internal_settings["random_seed"] = user_settings.get("random_seed") - if user_settings.get("mode") == "real_data": + internal_settings["direction_status"] = "forward" + internal_settings["random_seed"] = cmd_args.random_seed + if cmd_args.mode == "real_data": internal_settings["iter_times"] = 1 else: - internal_settings["iter_times"] = user_settings.get("iter_times") + internal_settings["iter_times"] = cmd_args.iter_times internal_settings["args_element_assignment"] = generate_args_element_assignment_code(args_info) internal_settings["args_list_generator_device"] = generate_args_list_device(args_info) internal_settings["args_list_generator_bench"] = generate_args_list_bench(args_info) @@ -251,8 +238,25 @@ def generate_kwargs_dict_bench(kwargs_info): return kwargs_dict_generator_bench +def op_generator_parser(parser): + parser.add_argument("-forward", "forward_json_path", dest="forward_json_path", type=str, + help=" Path of forward API json file.", + required=True) + parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), + help=" Execute mode, should be random_data or real_data.", + required=True) + parser.add_argement("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, + help=" If mode is random_data, it is random seed.", + required=False) + parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, + help=" If mode is random_data, generate iter_times group of data." + required=False) + + def main(): - internal_settings = get_settings() + parser = argparse.ArgumentParser() + cmd_args = parser.parse_args() + internal_settings = get_settings(cmd_args) template_path = os.path.join(os.path.dirname(__file__), "operator_replication.template") operator_script_path = os.path.join(os.path.dirname(__file__), "{0}.py".format(internal_settings.get("api_full_name"))) -- Gitee From eb723ff50b1c71202559fee491594533f1699720 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:43:00 +0800 Subject: [PATCH 012/141] forward full version --- .../generate_op_script/op_generator.py | 31 ++++++------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 48f64fb1e6..8fda0dfac1 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -140,16 +140,12 @@ def generate_args_element_assignment_code(args_info): def recursive_args_list(args_info, flag_device=False, flag_bench=False): args_list_generator = "" for index, arg in enumerate(args_info): - if isinstance(arg, list): - args_list_generator += "[" - new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) - args_list_generator += new_args_list_generator - args_list_generator += "]" - elif isinstance(arg, tuple): - args_list_generator += "(" + if isinstance(arg, (list, tuple)): + (left_bracket, right_bracket) = ("[", "]") if isinstance(arg, list) else ("(", ")") + args_list_generator += left_bracket new_args_list_generator = recursive_args_list(arg, flag_device=flag_device, flag_bench=flag_bench) args_list_generator += new_args_list_generator - args_list_generator += ")" + args_list_generator += right_bracket else: args_list_generator += arg.get("parameter_name") if arg.get("type") in TENSOR_DATA_LIST: @@ -206,19 +202,12 @@ def recursive_kwargs_dict(info, flag_device=False, flag_bench=False): kwargs_dict_generator += '.to(torch.device("cpu"))' kwargs_dict_generator += ".to(RAISE_PRECISION.get(str(" + info.get("parameter_name") + ".dtype), " + info.get("parameter_name") + ".dtype))" else: - kwargs_dict_generator = "" - if isinstance(info, list): - kwargs_dict_generator += "[" - for arg in info: - kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) - kwargs_dict_generator += ", " - kwargs_dict_generator += "]" - else: - kwargs_dict_generator += "(" - for arg in info: - kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) - kwargs_dict_generator += ", " - kwargs_dict_generator += ")" + (left_bracket, right_bracket) = ("[", "]") if isinstance(info, list) else ("(", ")") + kwargs_dict_generator += left_bracket + for arg in info: + kwargs_dict_generator += recursive_kwargs_dict(arg, flag_device=flag_device, flag_bench=flag_bench) + kwargs_dict_generator += ", " + kwargs_dict_generator += right_bracket return kwargs_dict_generator -- Gitee From 8fc903a8cd5d58e1d2b55cb88375cae99a952e9e Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:49:50 +0800 Subject: [PATCH 013/141] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 8fda0dfac1..25ee5e3d96 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -238,7 +238,7 @@ def op_generator_parser(parser): help=" If mode is random_data, it is random seed.", required=False) parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, - help=" If mode is random_data, generate iter_times group of data." + help=" If mode is random_data, generate iter_times group of data.", required=False) -- Gitee From bf58fddd6b60e9f5421c91a64b0fcaa4d614ed5d Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:54:45 +0800 Subject: [PATCH 014/141] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 25ee5e3d96..09548d260d 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -243,7 +243,8 @@ def op_generator_parser(parser): def main(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser() + op_generator_parser(parser) cmd_args = parser.parse_args() internal_settings = get_settings(cmd_args) -- Gitee From c1171b5c8580d785da2f0881fba89851c1d96c91 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:56:52 +0800 Subject: [PATCH 015/141] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index 09548d260d..d9356cf7a3 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -228,7 +228,7 @@ def generate_kwargs_dict_bench(kwargs_info): def op_generator_parser(parser): - parser.add_argument("-forward", "forward_json_path", dest="forward_json_path", type=str, + parser.add_argument("-forward", "--forward_json_path", dest="forward_json_path", type=str, help=" Path of forward API json file.", required=True) parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), -- Gitee From 922873bb72c656f9d49201d237dceb21257260d8 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 14:58:40 +0800 Subject: [PATCH 016/141] bugfix --- .../api_accuracy_checker/generate_op_script/op_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py index d9356cf7a3..7d3e2b226b 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/op_generator.py @@ -234,7 +234,7 @@ def op_generator_parser(parser): parser.add_argument("-m", "--mode", dest="mode", type=str, choices=("random_data", "real_data"), help=" Execute mode, should be random_data or real_data.", required=True) - parser.add_argement("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, + parser.add_argument("-rs", "--random_seed", dest = "random_seed", type=int, default=1234, help=" If mode is random_data, it is random seed.", required=False) parser.add_argument("-it", "--iter_times", dest="iter_times", type=int, default=5, -- Gitee From c684a2bbd12112be134d1beae10acb229bba5763 Mon Sep 17 00:00:00 2001 From: zhangruoyu2 Date: Thu, 6 Jun 2024 15:04:06 +0800 Subject: [PATCH 017/141] bugfix --- .../generate_op_script/operator_replication.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template index aff89c6a4d..7630839aa9 100644 --- a/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template +++ b/debug/accuracy_tools/api_accuracy_checker/generate_op_script/operator_replication.template @@ -1,7 +1,7 @@ import json import os import math -from enum import Enum +from enum import Enum, auto import torch try: import torch_npu -- Gitee From 56dbcfe85d673d944921665a0b7d32b022ca6620 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Thu, 20 Jun 2024 19:27:43 +0800 Subject: [PATCH 018/141] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/atat/pytorch/__init__.py | 1 + .../atat/pytorch/compare/acc_compare.py | 3 +- .../atat/pytorch/visualization/__init__.py | 0 .../pytorch/visualization/compare_tree.py | 313 ++++++++++++++++++ .../pytorch/visualization/graph/__init__.py | 0 .../pytorch/visualization/graph/base_node.py | 97 ++++++ .../atat/pytorch/visualization/graph/graph.py | 28 ++ .../visualization/graph/graph_builder.py | 53 +++ .../pytorch/visualization/graph/node_op.py | 24 ++ .../atat/pytorch/visualization/graph_utils.py | 31 ++ .../pytorch/visualization/json_parse_graph.py | 211 ++++++++++++ 11 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/__init__.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py diff --git a/debug/accuracy_tools/atat/pytorch/__init__.py b/debug/accuracy_tools/atat/pytorch/__init__.py index 482e850f7b..198cea96de 100644 --- a/debug/accuracy_tools/atat/pytorch/__init__.py +++ b/debug/accuracy_tools/atat/pytorch/__init__.py @@ -2,3 +2,4 @@ from .debugger.precision_debugger import PrecisionDebugger from .common.utils import seed_all from .compare.acc_compare import compare from .compare.distributed_compare import compare_distributed +from .visualization.json_parse_graph import compare_graph, build_graph diff --git a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py index be98a51c5a..1de3c2addb 100644 --- a/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/atat/pytorch/compare/acc_compare.py @@ -861,8 +861,9 @@ def compare_process(file_handles, stack_mode, fuzzy_match, summary_compare=False if npu_ops_queue: for npu_data in npu_ops_queue: get_un_match_accuracy(result, npu_data, md5_compare, summary_compare) + result_to_csv(md5_compare, summary_compare, stack_mode, result, output_csv_handle) - header = [] +def result_to_csv(md5_compare, summary_compare, stack_mode, result, output_csv_handle): if md5_compare: header = CompareConst.MD5_COMPARE_RESULT_HEADER[:] elif summary_compare: diff --git a/debug/accuracy_tools/atat/pytorch/visualization/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py new file mode 100644 index 0000000000..15d744280a --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -0,0 +1,313 @@ +import os +import json +import stat +from .graph_utils import ToolTip, Suggestions +from .graph.node_op import NodeOp +from ..compare.acc_compare import read_op, merge_tensor, get_accuracy, result_to_csv +from ...core.utils import CompareConst, Const + + +class CompareTree: + def __init__(self, trees, datas, stack_json_data, csv_path, compare_modes, stack_mode=True): + self.tree_n = trees[0] + self.tree_b = trees[1] + self.data_n_dict = datas[0] + self.data_b_dict = datas[1] + self.csv_path = csv_path + self.b_nodes_by_value = {} + self.to_csv_result = [] + self.md5_compare = compare_modes[0] + self.summary_compare = compare_modes[1] + self.real_data_compare = self.summary_compare is False and self.md5_compare is False + self.stack_mode = stack_mode + self.stack_json_data = stack_json_data + self.real_data_compare_nodes = [] + self.fill_b_nodes_dict(self.tree_b) + self.compare_nodes(self.tree_n) + + # 获取节点所有祖先的列表 + @staticmethod + def get_ancestors(node): + ancestors = [] + current_node = node.upnode + while current_node: + ancestors.append(current_node.type) + current_node = current_node.upnode + return list(reversed(ancestors)) + + @staticmethod + def add_real_compare_node_error_key(node_data): + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + value['error_key'] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + node_data[key] = value + + @staticmethod + def add_summary_compare_node_error_key(node_data): + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + value['error_key'] = ['Max Magnitude Diff', 'Min Magnitude Diff', 'Mean Magnitude Diff', + 'L2norm Magnitude Diff'] + node_data[key] = value + + @staticmethod + def add_real_compare_suggestions(node): + if node.op == NodeOp.module: + node.suggestions['text'] = Suggestions.Module + node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + if node.op == NodeOp.function_api: + node.suggestions['text'] = Suggestions.API + node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + @staticmethod + def _match_data(data_dict, compare_data, key_list, id_list): + if len(key_list) != len(id_list): + return + for i, key in enumerate(key_list): + data = compare_data[id_list[i]] + if data is not None and 'nan' not in str(data): + data_dict[key] = compare_data[id_list[i]] + + @staticmethod + def _del_item_by_list(data_dict, del_list): + if isinstance(data_dict, dict): + for item in del_list: + if item in data_dict: + del data_dict[item] + + def have_same_ancestors(self, node_a, node_b): + """ + 比较两个节点的所有祖先是否相同 + Args: + node_a: NPU节点 + node_b: Bench节点 + Returns: bool + """ + ancestors_a = self.get_ancestors(node_a) + ancestors_b = self.get_ancestors(node_b) + return ancestors_a == ancestors_b, ancestors_a + + def fill_b_nodes_dict(self, node): + """ + 将树展开为dict,key为node唯一名称,value为node自身,方便根据node唯一名称查找node + """ + if node.type not in self.b_nodes_by_value: + self.b_nodes_by_value[node.type] = [] + self.b_nodes_by_value[node.type].append(node) + for subnode in node.subnodes: + self.fill_b_nodes_dict(subnode) + + def result_to_csv(self): + with os.fdopen(os.open(self.csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), + 'w+') as file_out: + result_to_csv(self.md5_compare, self.summary_compare, self.stack_mode, self.to_csv_result, file_out) + + def compare_nodes(self, node_n): + """ + 递归比较NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查它们的祖先和参数信息,检查一致则进行精度数据比对 + Args: + node_n: NPU节点 + """ + if node_n.type in self.b_nodes_by_value: + for node_b in self.b_nodes_by_value[node_n.type]: + # 检查两个节点是否有完全相同的祖先链 + flag, ancestors = self.have_same_ancestors(node_n, node_b) + flag = flag and node_n.data_info == node_b.data_info + if flag: + # 如果祖先链相同,data_info相同,将node_b及其祖先添加到node_n的matched_node_link属性中 + ancestors.append(node_b.type) + node_n.matched_node_link = ancestors + node_b.matched_node_link = ancestors + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程比对接口 + compare_result_list = self.compare_node(node_n, node_b) + if compare_result_list: + self.to_csv_result.extend(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + + for subnode in node_n.subnodes: + self.compare_nodes(subnode) + + def compare_node(self, node_n, node_b): + """ + 调用acc_compare.py中的get_accuracy获得精度比对指标 + 真实数据比对模式无法获得精度比对指标,需要调用多进程比对接口 + Args: + node_n: NPU节点 + node_b: Bench节点 + + Returns: 包含参数信息和比对指标(真实数据比对模式除外)的list + """ + result = [] + merge_n = self.parse_node(node_n, self.data_n_dict) + merge_b = self.parse_node(node_b, self.data_b_dict) + get_accuracy(result, merge_n, merge_b, self.summary_compare, self.md5_compare) + return result + + def parse_node(self, node, data_dict): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) + if node.type in self.stack_json_data: + op_parsed_list.append( + {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) + else: + op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) + return merge_tensor(op_parsed_list, self.summary_compare, self.md5_compare) + + def add_compare_result_to_node(self, node, compare_result_list): + """ + 将比对结果添加到节点的输入输出数据中 + Args: + node: 节点 + compare_result_list: 包含参数信息和比对指标(真实数据比对模式除外)的list + """ + # 真实数据比对,先暂存节点,在多进程比对得到精度指标后,再将指标添加到节点 + if self.real_data_compare: + self.real_data_compare_nodes.append(node) + return + compare_in_dict = {} + compare_out_dict = {} + # input和output比对数据分开 + for item in compare_result_list: + if 'output' in item[0]: + compare_out_dict[item[0]] = item + else: + compare_in_dict[item[0]] = item + if self.md5_compare: + precision_status_in = self.add_md5_compare_data(node.input_data, compare_in_dict) + precision_status_out = self.add_md5_compare_data(node.output_data, compare_out_dict) + # 所有输入输出md5比对通过,这个节点才算通过 + precision_status = precision_status_in and precision_status_out + node.data['precision_status'] = precision_status + # md5比对通过为1,否则0 + node.data['precision_index'] = 1 if precision_status else 0 + node.data['md5 Compare Result'] = CompareConst.PASS if precision_status else CompareConst.DIFF + elif self.summary_compare: + precision_status_in, precision_index_in = self.add_summary_compare_data(node.input_data, compare_in_dict) + precision_status_out, precision_index_out = self.add_summary_compare_data(node.output_data, + compare_out_dict) + precision_status = precision_status_in and precision_status_out + precision_index = min(precision_index_in, precision_index_out) + node.data['precision_status'] = precision_status + node.data['precision_index'] = precision_index + if not precision_status: + self.add_summary_compare_node_error_key(node.output_data) + self.add_real_compare_suggestions(node) + + def add_summary_compare_data(self, node_data, compare_data_dict): + precision_status = True + precision_index = 1 + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, + CompareConst.NORM_DIFF] + # 取npu和bench数据进行比较,用完删除 + del_list = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, + CompareConst.NPU_NORM, CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, + CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM] + key_list.extend(del_list) + id_list = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + self._match_data(value, compare_data, key_list, id_list) + # summary比对是否通过 + precision_status, precision_index = self._summary_compare_judgment(value, precision_status, + precision_index) + self._del_item_by_list(value, del_list) + node_data[key] = value + return precision_status, precision_index + + @staticmethod + def _summary_compare_judgment(data_dict, precision_status, precision_index): + max_magnitude_diff = 0 + item_dict = {(CompareConst.NPU_MAX, CompareConst.BENCH_MAX): (CompareConst.MAX_DIFF, 'Max Magnitude Diff'), + (CompareConst.NPU_MIN, CompareConst.BENCH_MIN): (CompareConst.MIN_DIFF, 'Min Magnitude Diff'), + (CompareConst.NPU_MEAN, CompareConst.BENCH_MEAN): (CompareConst.MEAN_DIFF, 'Mean Magnitude Diff'), + (CompareConst.NPU_NORM, CompareConst.BENCH_NORM): ( + CompareConst.NORM_DIFF, 'L2norm Magnitude Diff')} + for key, value in item_dict.items(): + if isinstance(data_dict.get(key[0]), (float, int)) and isinstance(data_dict.get(key[1]), (float, int)) \ + and isinstance(data_dict.get(value[0]), (float, int)): + magnitude_diff = abs(data_dict.get(value[0])) / ( + max(abs(data_dict.get(key[0])), abs(data_dict.get(key[1]))) + 1e-10) + magnitude_diff = 1 if magnitude_diff > 1 else magnitude_diff + data_dict[value[1]] = magnitude_diff + if magnitude_diff > 0.3: + precision_status = False + max_magnitude_diff = max(max_magnitude_diff, magnitude_diff) + precision_index = 1 - max_magnitude_diff + return precision_status, precision_index + + def add_md5_compare_data(self, node_data, compare_data_dict): + precision_status = True + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = ['md5 Compare Result'] + id_list = [8] + self._match_data(value, compare_data, key_list, id_list) + # md5比对是否通过 + if value.get('md5 Compare Result') != CompareConst.PASS: + precision_status = False + node_data[key] = value + return precision_status + + def add_real_compare_data(self, node_data, compare_data_dict): + min_thousandth = float(1) + numbers = [] + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + id_list = [6, 7, 8, 9, 10] + self._match_data(value, compare_data, key_list, id_list) + # 获取一个节点所有的输入或输出最小的双千指标 + thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) + # 可能是None,可能是非数字内容str + try: + thousandth = float(thousandth) + except (ValueError, TypeError): + thousandth = None + if thousandth is not None: + numbers.append(thousandth) + node_data[key] = value + # 双千指标都是None的异常情况 + if not numbers: + min_thousandth = None + else: + min_thousandth = min(numbers + [min_thousandth]) + return min_thousandth + + def get_tool_tip(self): + """ + 用于前端展示字段的具体含义 + """ + if self.summary_compare: + tips = { + CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, + CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, + CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, + CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} + elif self.md5_compare: + tips = { + Const.MD5: ToolTip.MD5} + else: + tips = { + CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.COSINE: ToolTip.COSINE, + CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, + CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} + return json.dumps(tips) + + + + diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py new file mode 100644 index 0000000000..8dfcfbe90b --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -0,0 +1,97 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BaseNode: + def __init__(self, node_op, node_type, up_node=None, is_forward=True): + self.op = node_op + self.type = node_type + self.id = node_type + self.data = {} + self.outputs = [] + self.inputs = [] + self.output_data = {} + self.input_data = {} + self.upnode = up_node + self.subnodes = [] + if up_node: + up_node.add_subnode(self) + self.is_forward = is_forward + self.pair = None + self.matched_node_link = [] + self.data_info = '' + self.suggestions = {} + + def __str__(self): + info = f'id:\t{self.id}' + return info + + @staticmethod + def _handle_item(data_dict): + del_list = ['requires_grad', 'data_name', 'full_op_name'] + for key, value in data_dict.items(): + if not isinstance(value, dict): + continue + for item in del_list: + if item in value: + del value[item] + BaseNode._formate_floats(value) + + return data_dict + + @staticmethod + def _formate_floats(data_dict): + for key, value in data_dict.items(): + if isinstance(value, float): + data_dict[key] = round(value, 6) + if isinstance(value, str): + # 将单引号删掉,None换成null避免前端解析错误 + value = value.replace("'", "").replace('None', 'null') + if value is None: + value = 'null' + if not isinstance(value, (list, tuple, dict, str)): + value = str(value) + data_dict[key] = value + + def get_info(self): + info = f'{self.id}\t{self.op}' + if not self.is_forward: + info += '(b)' + for key in self.data: + info += f'\n{key}:\t{self.data.get(key)}' + return info + + def add_subnode(self, node): + if node.id == self.id: + return + self.subnodes.append(node) + + def get_yaml_dict(self): + result = {} + result['id'] = self.id + result['node_type'] = self.op.value + result['type'] = self.type + result['data'] = self.data + result['output_data'] = self._handle_item(self.output_data) + result['input_data'] = self._handle_item(self.input_data) + result['outputs'] = [(edge_id, node.id) for edge_id, node in self.outputs] + result['inputs'] = [(edge_id, node.id) for edge_id, node in self.inputs] + result['upnode'] = self.upnode.id if self.upnode else 'None' + result['subnodes'] = [node.id for node in self.subnodes] + result['is_forward'] = self.is_forward + result['pair'] = self.pair.id if self.pair else 'None' + result['matched_node_link'] = self.matched_node_link + result['suggestions'] = self.suggestions + return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py new file mode 100644 index 0000000000..849a07a108 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -0,0 +1,28 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class Graph: + def __init__(self): + self.root = None + self.recent_node = None + self.depth = 0 + self.node_map = {} + self.rawid_map = {} + + def __str__(self): + infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] + info = "\n".join(infos) + return info diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py new file mode 100644 index 0000000000..22bb2739f9 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py @@ -0,0 +1,53 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json + +from ....core.file_check_util import FileOpen + + +class GraphBuilder: + + @staticmethod + def export_to_json(filename, graph): + result = {} + result['root'] = graph.root.id if graph.root else 'None' + result['node'] = {} + GraphBuilder._export_dfs(graph.root, result['node']) + with FileOpen(filename, 'w') as f: + f.write(json.dumps(result, indent=4)) + + @staticmethod + def get_graph_result(graph): + result = {} + result['root'] = graph.root.id if graph.root else 'None' + result['node'] = {} + GraphBuilder._export_dfs(graph.root, result['node']) + return result + + @staticmethod + def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): + result = {} + result['NPU'] = GraphBuilder.get_graph_result(graph_n) + result['Bench'] = GraphBuilder.get_graph_result(graph_b) + result['Tooltip'] = tool_tip + with FileOpen(filename, 'w') as f: + f.write(json.dumps(result, indent=4)) + + @staticmethod + def _export_dfs(node, result): + info = node.get_yaml_dict() + result[node.id] = info + for subnode in node.subnodes: + GraphBuilder._export_dfs(subnode, result) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py new file mode 100644 index 0000000000..3249df10c4 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum + + +class NodeOp(Enum): + module = 1 + function_api = 2 + module_api = 3 + tensor = 4 + output = 5 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py new file mode 100644 index 0000000000..193dbaf18e --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py @@ -0,0 +1,31 @@ +class ToolTip: + MAX_DIFF = 'NPU与标杆API统计信息比对,最大值的差值' + MIN_DIFF = 'NPU与标杆API统计信息比对,最小值的差值' + MEAN_DIFF = 'NPU与标杆API统计信息比对,平均值的差值' + NORM_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值' + MAX_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最大值的差值相对误差' + MIN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最小值的差值相对误差' + MEAN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,平均值的差值相对误差' + NORM_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值相对误差' + MD5 = '数据MD5信息,用于比较两个数据信息是否完全一致' + ONE_THOUSANDTH_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差大于千分之一的比例占总元素个数的比例小于千分之一' + COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' + MAX_ABS_ERR = '当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001' + MAX_RELATIVE_ERR = '当最大相对误差越接近0表示其计算的误差越小。当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象' + + +class Suggestions: + Module = '此模块精度比对结果疑似异常,请使用ptdbg工具对模块中的api进行dump比对' + API = '此api精度比对结果疑似异常,请使用api accuracy checker工具对api进行精度检测' + PTDBG = 'ptdbg工具' + PTDBG_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/ptdbg_ascend' + API_ACCURACY_CHECKER = 'api accuracy checker工具' + API_ACCURACY_CHECKER_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker' + + +class Const: + CONSTRUCT_FILE = 'construct.json' + DUMP_FILE = 'dump.json' + STACK_FILE = 'stack.json' + GRAPH_FILE = 'graph.vis' + CSV_NAME = 'compare_result' \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py new file mode 100644 index 0000000000..178b1b3d28 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py @@ -0,0 +1,211 @@ +import os +import json +import re +import time +import pandas as pd +from .compare_tree import CompareTree +from .graph_utils import Const +from .graph.graph import Graph +from .graph.base_node import BaseNode +from .graph.node_op import NodeOp +from .graph.graph_builder import GraphBuilder +from ..compare.acc_compare import read_op, task_dumppath_get, _do_multi_process +from ...core.utils import add_time_as_suffix +from ...core.file_check_util import FileOpen, FileChecker, FileCheckConst, create_directory + + +def _load_json_file(file_path): + try: + with FileOpen(file_path, 'r') as file: + file_dict = json.load(file) + if not isinstance(file_dict, dict): + return {} + return file_dict + except json.JSONDecodeError: + return {} + + +def _get_data_inputs_outputs(data_dict: dict): + input_args = data_dict.get('input_args', []) + input_kwargs = data_dict.get('input_kwargs', {}) + output = data_dict.get('output', []) + + input_args = input_args if isinstance(input_args, list) else [] + input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} + output = output if isinstance(output, list) else [] + return input_args, input_kwargs, output + + +def _add_node_data(node_data, node): + """ + acc_compare read_op 解析数据 + """ + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node.type) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if 'output' in full_op_name: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + node.input_data = input_data + node.output_data = output_data + + +def _get_data_info(item): + if isinstance(item, dict): + return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) + elif isinstance(item, (list, tuple)): + return str([_get_data_info(sub_item) for sub_item in item]) + return '' + + +def _process_node_data_info(items): + info_str = '' + for item in items: + info_str += _get_data_info(item) + return info_str + + +# 节点所有输入、输出的type、dtype和shape要一样 +def _get_node_data_info(input_args, input_kwargs, output): + return _process_node_data_info(input_args) + _process_node_data_info(input_kwargs) + _process_node_data_info(output) + + +def _get_node_op(node_name: str): + pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' + match = re.match(pattern, node_name) + if match: + return NodeOp.function_api + else: + return NodeOp.module + + +def build_tree(construct_dict, data_dict, root_name='NPU'): + # 创建一个字典来存储已经创建的节点,以便重用 + created_nodes = {} + root_node = BaseNode(NodeOp.module, root_name) + + # 创建一个函数来递归地创建或获取节点 + def get_or_create_node(op, name, up_node=None): + if name not in created_nodes: + # add data + base_node = BaseNode(op, name, up_node) + node_data = data_dict.get(name, {}) + input_args, input_kwargs, output = _get_data_inputs_outputs(node_data) + # 添加输入输出数据 + _add_node_data(node_data, base_node) + + # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 + data_info = _get_node_data_info(input_args, input_kwargs, output) + base_node.data_info = data_info + created_nodes[name] = base_node + elif up_node: + # 如果节点已经存在,但我们现在才知道它的上级节点 + created_nodes[name].upnode = up_node + up_node.add_subnode(created_nodes[name]) + return created_nodes[name] + + # 遍历字典,为每个键值对创建或获取节点 + for subnode, upnode in construct_dict.items(): + if upnode: + up_node = get_or_create_node(_get_node_op(upnode), upnode) + else: + up_node = root_node + get_or_create_node(_get_node_op(subnode), subnode, up_node) + + return root_node, created_nodes + + +def do_build_graph(construct_path, data_path, output_path): + construct_dict = _load_json_file(construct_path) + data_dict = _load_json_file(data_path).get('data', {}) + root_node, created_nodes = build_tree(construct_dict, data_dict, 'root_node') + graph = Graph() + graph.root = root_node + graph.node_map = created_nodes + GraphBuilder.export_to_json(output_path, graph) + + +def do_compare_graph(construct_path_list, data_path_list, stack_path, output_path, csv_path): + dump_path_param = { + "npu_json_path": data_path_list[0], + "bench_json_path": data_path_list[1], + "stack_json_path": stack_path, + "is_print_compare_log": True + } + # 判断比对模式 + summary_compare, md5_compare = task_dumppath_get(dump_path_param) + + construct_n_dict = _load_json_file(construct_path_list[0]) + data_n_dict = _load_json_file(data_path_list[0]).get('data', {}) + root_n_node, created_n_nodes = build_tree(construct_n_dict, data_n_dict) + construct_b_dict = _load_json_file(construct_path_list[1]) + data_b_dict = _load_json_file(data_path_list[1]).get('data', {}) + root_b_node, created_b_nodes = build_tree(construct_b_dict, data_b_dict) + stack_json_data = _load_json_file(stack_path) + + compare_tree = CompareTree([root_n_node, root_b_node], [data_n_dict, data_b_dict], stack_json_data, + csv_path,[md5_compare, summary_compare]) + compare_tree.result_to_csv() + + if summary_compare is False and md5_compare is False: + # 真实数据比对,开启多进程比对得到精度指标,再写进已创建的csv中 + _do_multi_process(dump_path_param, csv_path) + # 从csv文件读取精度指标,添加到node节点中 + df = pd.read_csv(csv_path) + compare_data_dict = {row[0]: row.tolist() for index, row in df.iterrows()} + for node in compare_tree.real_data_compare_nodes: + min_thousandth_in = compare_tree.add_real_compare_data(node.input_data, compare_data_dict) + min_thousandth_out = compare_tree.add_real_compare_data(node.output_data, compare_data_dict) + if min_thousandth_in and min_thousandth_out: + change_percentage = abs(min_thousandth_in - min_thousandth_out) + else: + change_percentage = 0 + precision_status = True + if change_percentage > 0.1: + precision_status = False + # 精度不达标,双千指标标红 + CompareTree.add_real_compare_node_error_key(node.output_data) + # 添加建议 + CompareTree.add_real_compare_suggestions(node) + node.data['precision_status'] = precision_status + node.data['precision_index'] = 0 if change_percentage > 1 else 1 - change_percentage + + graph_n = Graph() + graph_n.root = root_n_node + graph_n.node_map = created_n_nodes + graph_b = Graph() + graph_b.root = root_b_node + graph_n.node_map = created_b_nodes + start_time = time.time() + GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, compare_tree.get_tool_tip()) + end_time = time.time() + print('export_graphs_to_yaml', end_time - start_time) + + +def compare_graph(dump_path_n, dump_path_b, out_path): + create_directory(out_path) + n_path_checker = FileChecker(dump_path_n, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) + n_path_checker.common_check() + b_path_checker = FileChecker(dump_path_b, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) + b_path_checker.common_check() + construct_path_n = os.path.join(dump_path_n, Const.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, Const.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, Const.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, Const.DUMP_FILE) + stack_path = os.path.join(dump_path_n, Const.STACK_FILE) + output_path = os.path.join(out_path, Const.GRAPH_FILE) + csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(Const.CSV_NAME)) + do_compare_graph([construct_path_n, construct_path_b], [data_path_n, data_path_b], + stack_path, output_path,csv_path) + + +def build_graph(dump_path, out_path): + create_directory(out_path) + path_checker = FileChecker(dump_path, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) + path_checker.common_check() + construct_path = os.path.join(dump_path, Const.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, Const.DUMP_FILE) + do_build_graph(construct_path, data_path, out_path) -- Gitee From e50c85984a04e5f9a16d0c378bcc44d5c5b8e603 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 21 Jun 2024 10:47:08 +0800 Subject: [PATCH 019/141] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/compare_tree.py | 96 +++++++++---------- .../pytorch/visualization/graph/base_node.py | 39 +++++++- .../atat/pytorch/visualization/graph_utils.py | 5 +- .../pytorch/visualization/json_parse_graph.py | 27 +++--- 4 files changed, 99 insertions(+), 68 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py index 15d744280a..f402624807 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -1,7 +1,7 @@ import os import json import stat -from .graph_utils import ToolTip, Suggestions +from .graph_utils import ToolTip, Suggestions, GraphConst from .graph.node_op import NodeOp from ..compare.acc_compare import read_op, merge_tensor, get_accuracy, result_to_csv from ...core.utils import CompareConst, Const @@ -25,9 +25,11 @@ class CompareTree: self.fill_b_nodes_dict(self.tree_b) self.compare_nodes(self.tree_n) - # 获取节点所有祖先的列表 @staticmethod def get_ancestors(node): + """ + 获取节点所有祖先的列表 + """ ancestors = [] current_node = node.upnode while current_node: @@ -37,10 +39,14 @@ class CompareTree: @staticmethod def add_real_compare_node_error_key(node_data): + """ + 精度疑似有问题,这些指标将在前端标红 + """ for key, value in node_data.items(): if not isinstance(value, dict): continue - value['error_key'] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + value[GraphConst.ERROR_KEY] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] node_data[key] = value @staticmethod @@ -48,12 +54,15 @@ class CompareTree: for key, value in node_data.items(): if not isinstance(value, dict): continue - value['error_key'] = ['Max Magnitude Diff', 'Min Magnitude Diff', 'Mean Magnitude Diff', - 'L2norm Magnitude Diff'] + value[GraphConst.ERROR_KEY] = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] node_data[key] = value @staticmethod def add_real_compare_suggestions(node): + """ + 精度疑似有问题,给一些建议 + """ if node.op == NodeOp.module: node.suggestions['text'] = Suggestions.Module node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL @@ -61,21 +70,33 @@ class CompareTree: node.suggestions['text'] = Suggestions.API node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + @staticmethod + def convert_percentage_to_float(percentage_str): + """ + 百分比字符串转换为浮点型 + Args: + percentage_str: '0.00%'、'23.4%' + Returns: float 0.00、0.234 + """ + try: + percentage_str = percentage_str.replace('%', '') + return float(percentage_str) / 100 + except ValueError: + return 0 + @staticmethod def _match_data(data_dict, compare_data, key_list, id_list): + """ + 绑定精度指标到node的input_data和output_data中 + """ if len(key_list) != len(id_list): return for i, key in enumerate(key_list): data = compare_data[id_list[i]] - if data is not None and 'nan' not in str(data): + if data is not None and 'nan' not in str(data) and str(data) != ' ': data_dict[key] = compare_data[id_list[i]] - - @staticmethod - def _del_item_by_list(data_dict, del_list): - if isinstance(data_dict, dict): - for item in del_list: - if item in data_dict: - del data_dict[item] + else: + data_dict[key] = 'null' def have_same_ancestors(self, node_a, node_b): """ @@ -199,47 +220,26 @@ class CompareTree: def add_summary_compare_data(self, node_data, compare_data_dict): precision_status = True - precision_index = 1 + max_relative_err = 0 for key, value in node_data.items(): if not isinstance(value, dict): continue compare_data = compare_data_dict.get(key) if compare_data: + # 对应比对结果csv的列 key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, - CompareConst.NORM_DIFF] - # 取npu和bench数据进行比较,用完删除 - del_list = [CompareConst.NPU_MAX, CompareConst.NPU_MIN, CompareConst.NPU_MEAN, - CompareConst.NPU_NORM, CompareConst.BENCH_MAX, CompareConst.BENCH_MIN, - CompareConst.BENCH_MEAN, CompareConst.BENCH_NORM] - key_list.extend(del_list) - id_list = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + id_list = [6, 7, 8, 9, 10, 11, 12, 13] self._match_data(value, compare_data, key_list, id_list) - # summary比对是否通过 - precision_status, precision_index = self._summary_compare_judgment(value, precision_status, - precision_index) - self._del_item_by_list(value, del_list) + # 相对误差大于0.5疑似有精度问题 + for item in key_list[4:]: + relative_err = CompareTree.convert_percentage_to_float(value.get(item)) + max_relative_err = max(max_relative_err, relative_err) node_data[key] = value - return precision_status, precision_index - - @staticmethod - def _summary_compare_judgment(data_dict, precision_status, precision_index): - max_magnitude_diff = 0 - item_dict = {(CompareConst.NPU_MAX, CompareConst.BENCH_MAX): (CompareConst.MAX_DIFF, 'Max Magnitude Diff'), - (CompareConst.NPU_MIN, CompareConst.BENCH_MIN): (CompareConst.MIN_DIFF, 'Min Magnitude Diff'), - (CompareConst.NPU_MEAN, CompareConst.BENCH_MEAN): (CompareConst.MEAN_DIFF, 'Mean Magnitude Diff'), - (CompareConst.NPU_NORM, CompareConst.BENCH_NORM): ( - CompareConst.NORM_DIFF, 'L2norm Magnitude Diff')} - for key, value in item_dict.items(): - if isinstance(data_dict.get(key[0]), (float, int)) and isinstance(data_dict.get(key[1]), (float, int)) \ - and isinstance(data_dict.get(value[0]), (float, int)): - magnitude_diff = abs(data_dict.get(value[0])) / ( - max(abs(data_dict.get(key[0])), abs(data_dict.get(key[1]))) + 1e-10) - magnitude_diff = 1 if magnitude_diff > 1 else magnitude_diff - data_dict[value[1]] = magnitude_diff - if magnitude_diff > 0.3: - precision_status = False - max_magnitude_diff = max(max_magnitude_diff, magnitude_diff) - precision_index = 1 - max_magnitude_diff + if max_relative_err > 0.5: + precision_status = False + precision_index = 1 - max_relative_err return precision_status, precision_index def add_md5_compare_data(self, node_data, compare_data_dict): @@ -307,7 +307,3 @@ class CompareTree: CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} return json.dumps(tips) - - - - diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 8dfcfbe90b..049eb48755 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re class BaseNode: @@ -47,24 +48,52 @@ class BaseNode: for item in del_list: if item in value: del value[item] - BaseNode._formate_floats(value) + BaseNode._format_data(value) return data_dict @staticmethod - def _formate_floats(data_dict): + def _format_data(data_dict): + """ + 格式化数据,小数保留6位,处理一些异常值 + """ for key, value in data_dict.items(): - if isinstance(value, float): - data_dict[key] = round(value, 6) if isinstance(value, str): # 将单引号删掉,None换成null避免前端解析错误 value = value.replace("'", "").replace('None', 'null') - if value is None: + value = BaseNode._format_decimal_string(value) + if value is None or value == ' ': value = 'null' + if isinstance(value, float): + value = round(value, 6) if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value + @staticmethod + def _format_decimal_string(s): + """ + 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + """ + pattern = re.compile(r'\d+\.\d+%?') + matches = pattern.findall(s) + for match in matches: + is_percent = match.endswith('%') + number_str = match.rstrip('%') + decimal_part = number_str.split('.')[1] + # 如果小数位数大于6,进行处理 + if len(decimal_part) > 6: + number_float = float(number_str) + if is_percent: + number_float /= 100 + formatted_number = f"{number_float:.6f}" + # 如果原来是百分数,加回百分号 + if is_percent: + formatted_number += '%' + # 替换原字符串中的数值部分 + s = s.replace(match, formatted_number) + return s + def get_info(self): info = f'{self.id}\t{self.op}' if not self.is_forward: diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py index 193dbaf18e..1f3598a442 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py @@ -23,9 +23,10 @@ class Suggestions: API_ACCURACY_CHECKER_URL = 'https://gitee.com/ascend/att/tree/master/debug/accuracy_tools/api_accuracy_checker' -class Const: +class GraphConst: CONSTRUCT_FILE = 'construct.json' DUMP_FILE = 'dump.json' STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' - CSV_NAME = 'compare_result' \ No newline at end of file + CSV_NAME = 'compare_result' + ERROR_KEY = 'error_key' \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py index 178b1b3d28..8bf29c3687 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py @@ -4,7 +4,7 @@ import re import time import pandas as pd from .compare_tree import CompareTree -from .graph_utils import Const +from .graph_utils import GraphConst from .graph.graph import Graph from .graph.base_node import BaseNode from .graph.node_op import NodeOp @@ -54,6 +54,9 @@ def _add_node_data(node_data, node): def _get_data_info(item): + """ + 将api的参数信息拼接成字符串进行匹配 + """ if isinstance(item, dict): return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) elif isinstance(item, (list, tuple)): @@ -68,8 +71,10 @@ def _process_node_data_info(items): return info_str -# 节点所有输入、输出的type、dtype和shape要一样 def _get_node_data_info(input_args, input_kwargs, output): + """ + 节点所有输入、输出的type、dtype和shape要一样 + """ return _process_node_data_info(input_args) + _process_node_data_info(input_kwargs) + _process_node_data_info(output) @@ -191,13 +196,13 @@ def compare_graph(dump_path_n, dump_path_b, out_path): n_path_checker.common_check() b_path_checker = FileChecker(dump_path_b, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) b_path_checker.common_check() - construct_path_n = os.path.join(dump_path_n, Const.CONSTRUCT_FILE) - construct_path_b = os.path.join(dump_path_b, Const.CONSTRUCT_FILE) - data_path_n = os.path.join(dump_path_n, Const.DUMP_FILE) - data_path_b = os.path.join(dump_path_b, Const.DUMP_FILE) - stack_path = os.path.join(dump_path_n, Const.STACK_FILE) - output_path = os.path.join(out_path, Const.GRAPH_FILE) - csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(Const.CSV_NAME)) + construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) + stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) + output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) + csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(GraphConst.CSV_NAME)) do_compare_graph([construct_path_n, construct_path_b], [data_path_n, data_path_b], stack_path, output_path,csv_path) @@ -206,6 +211,6 @@ def build_graph(dump_path, out_path): create_directory(out_path) path_checker = FileChecker(dump_path, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) path_checker.common_check() - construct_path = os.path.join(dump_path, Const.CONSTRUCT_FILE) - data_path = os.path.join(dump_path, Const.DUMP_FILE) + construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) do_build_graph(construct_path, data_path, out_path) -- Gitee From 52ae930160301b53713a888620b4b2763a8cc147 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Fri, 21 Jun 2024 11:01:33 +0800 Subject: [PATCH 020/141] =?UTF-8?q?=E5=88=86=E7=BA=A7=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/json_parse_graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py index 8bf29c3687..026bd47c47 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py @@ -213,4 +213,5 @@ def build_graph(dump_path, out_path): path_checker.common_check() construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) - do_build_graph(construct_path, data_path, out_path) + output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) + do_build_graph(construct_path, data_path, output_path) -- Gitee From b3f8020a22515309b0e5165b2fc36a194c8a8bad Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 24 Jun 2024 11:42:03 +0800 Subject: [PATCH 021/141] =?UTF-8?q?=E7=BB=9F=E8=AE=A1=E5=80=BC=E6=AF=94?= =?UTF-8?q?=E5=AF=B9bug=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../accuracy_tools/atat/pytorch/visualization/compare_tree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py index f402624807..0ca80be88c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -239,6 +239,7 @@ class CompareTree: node_data[key] = value if max_relative_err > 0.5: precision_status = False + max_relative_err = 1 if max_relative_err > 1 else max_relative_err precision_index = 1 - max_relative_err return precision_status, precision_index @@ -254,7 +255,7 @@ class CompareTree: self._match_data(value, compare_data, key_list, id_list) # md5比对是否通过 if value.get('md5 Compare Result') != CompareConst.PASS: - precision_status = False + precision_status = False node_data[key] = value return precision_status -- Gitee From a1cdec6b6890a00b19f50ad5475c26afaa8aec6c Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 25 Jun 2024 11:19:08 +0800 Subject: [PATCH 022/141] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/compare_tree.py | 4 +++- .../atat/pytorch/visualization/graph/base_node.py | 7 ++++--- .../atat/pytorch/visualization/graph_utils.py | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py index 0ca80be88c..3f2fb406af 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py @@ -31,6 +31,8 @@ class CompareTree: 获取节点所有祖先的列表 """ ancestors = [] + if not node: + return ancestors current_node = node.upnode while current_node: ancestors.append(current_node.type) @@ -237,7 +239,7 @@ class CompareTree: relative_err = CompareTree.convert_percentage_to_float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value - if max_relative_err > 0.5: + if max_relative_err > GraphConst.MAX_RELATIVE_ERR: precision_status = False max_relative_err = 1 if max_relative_err > 1 else max_relative_err precision_index = 1 - max_relative_err diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 049eb48755..26713200ea 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re +from ..graph_utils import GraphConst class BaseNode: @@ -65,7 +66,7 @@ class BaseNode: if value is None or value == ' ': value = 'null' if isinstance(value, float): - value = round(value, 6) + value = round(value, GraphConst.DECIMAL) if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value @@ -82,11 +83,11 @@ class BaseNode: number_str = match.rstrip('%') decimal_part = number_str.split('.')[1] # 如果小数位数大于6,进行处理 - if len(decimal_part) > 6: + if len(decimal_part) > GraphConst.DECIMAL: number_float = float(number_str) if is_percent: number_float /= 100 - formatted_number = f"{number_float:.6f}" + formatted_number = f"{number_float:.{GraphConst.DECIMAL}f}" # 如果原来是百分数,加回百分号 if is_percent: formatted_number += '%' diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py index 1f3598a442..eae5084677 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py @@ -29,4 +29,6 @@ class GraphConst: STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' CSV_NAME = 'compare_result' - ERROR_KEY = 'error_key' \ No newline at end of file + ERROR_KEY = 'error_key' + DECIMAL = 6 + MAX_RELATIVE_ERR = 0.5 \ No newline at end of file -- Gitee From 1e099f5a5cf28b6d375dc2c06013c059f64ea416 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:27:33 +0800 Subject: [PATCH 023/141] =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E9=87=8D?= =?UTF-8?q?=E6=9E=84=EF=BC=8C=E4=B8=8D=E4=BF=AE=E6=94=B9=E5=B7=B2=E7=9F=A5?= =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E4=BF=9D=E8=AF=81=E8=BE=93=E5=85=A5?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/__init__.py | 0 .../{graph => builder}/graph_builder.py | 45 +-- .../visualization/builder/graph_parser.py | 108 ++++++ .../pytorch/visualization/compare/__init__.py | 0 .../comparator.py} | 356 ++++++------------ .../visualization/compare/graph_comparator.py | 175 +++++++++ .../pytorch/visualization/graph/base_node.py | 32 +- .../atat/pytorch/visualization/graph/graph.py | 8 +- .../pytorch/visualization/graph/node_op.py | 9 + .../pytorch/visualization/json_parse_graph.py | 217 ----------- .../atat/pytorch/visualization/test.py | 81 ++++ .../{graph_utils.py => utils.py} | 65 +++- 12 files changed, 605 insertions(+), 491 deletions(-) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py rename debug/accuracy_tools/atat/pytorch/visualization/{graph => builder}/graph_builder.py (59%) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py rename debug/accuracy_tools/atat/pytorch/visualization/{compare_tree.py => compare/comparator.py} (31%) create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py delete mode 100644 debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/test.py rename debug/accuracy_tools/atat/pytorch/visualization/{graph_utils.py => utils.py} (50%) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py similarity index 59% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py rename to debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 22bb2739f9..76d476a996 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# Copyright (c) 2024, Huawei Technologies Co., Ltd. # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,39 +12,42 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json -from ....core.file_check_util import FileOpen +from .graph_parser import GraphParser +from ..utils import load_json_file, save_json_file class GraphBuilder: - + @staticmethod + def build(construct_path, data_path, model_name): + construct_dict = load_json_file(construct_path) + data_dict = load_json_file(data_path).get('data', {}) + graph = GraphParser().parse(construct_dict, data_dict, model_name) + return graph + @staticmethod def export_to_json(filename, graph): + result = GraphBuilder._get_graph_dict(graph) + save_json_file(filename, result) + + # todo 吧两个export归一 + @staticmethod + def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): result = {} - result['root'] = graph.root.id if graph.root else 'None' - result['node'] = {} - GraphBuilder._export_dfs(graph.root, result['node']) - with FileOpen(filename, 'w') as f: - f.write(json.dumps(result, indent=4)) - + result['NPU'] = GraphBuilder._get_graph_dict(graph_n) + result['Bench'] = GraphBuilder._get_graph_dict(graph_b) + result['Tooltip'] = tool_tip + save_json_file(filename, result) + @staticmethod - def get_graph_result(graph): + def _get_graph_dict(graph): result = {} result['root'] = graph.root.id if graph.root else 'None' result['node'] = {} + # todo 可以把这个遍历删除 GraphBuilder._export_dfs(graph.root, result['node']) return result - - @staticmethod - def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): - result = {} - result['NPU'] = GraphBuilder.get_graph_result(graph_n) - result['Bench'] = GraphBuilder.get_graph_result(graph_b) - result['Tooltip'] = tool_tip - with FileOpen(filename, 'w') as f: - f.write(json.dumps(result, indent=4)) - + @staticmethod def _export_dfs(node, result): info = node.get_yaml_dict() diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py new file mode 100644 index 0000000000..2227710b74 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ...compare.acc_compare import read_op +from ..graph.graph import Graph +from ..graph.base_node import BaseNode +from ..graph.node_op import NodeOp + + +class GraphParser: + def __init__(self): + pass + + def parse(self, construct_dict, data_dict, model_name): + self.graph = Graph() + self.data_dict = data_dict + self.graph.root = BaseNode(NodeOp.module, model_name) + self.graph.node_map[model_name] = self.graph.root + self._init_nodes(construct_dict) + self.data_dict.clear() + return self.graph + + def _init_nodes(self, construct_dict): + for subnode, upnode in construct_dict.items(): + if upnode: + up_node = self._get_or_create_node(NodeOp.get_node_op(upnode), upnode) + else: + up_node = self.graph.root + self._get_or_create_node(NodeOp.get_node_op(subnode), subnode, up_node) + + # todo 这个函数也得改改 + def _get_or_create_node(self, op, name, up_node=None): + if name not in self.graph.node_map: + # add data + base_node = BaseNode(op, name, up_node) + node_data = self.data_dict.get(name, {}) + input_args, input_kwargs, output = GraphParser._get_data_inputs_outputs(node_data) + # 添加输入输出数据 + GraphParser._add_node_data(node_data, base_node) + + # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 + # 这个东西必须改了,todo + data_info = GraphParser._get_node_data_info(input_args, input_kwargs, output) + base_node.data_info = data_info + self.graph.node_map[name] = base_node + elif up_node: + # 如果节点已经存在,但是我们刚刚获取了他的上级节点 + # todo 这里要加个函数 + self.graph.node_map[name].upnode = up_node + up_node.add_subnode(self.graph.node_map[name]) + return self.graph.node_map[name] + + @staticmethod + def _get_data_inputs_outputs(data_dict: dict): + input_args = data_dict.get('input_args', []) + input_kwargs = data_dict.get('input_kwargs', {}) + output = data_dict.get('output', []) + + input_args = input_args if isinstance(input_args, list) else [] + input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} + output = output if isinstance(output, list) else [] + return input_args, input_kwargs, output + + # todo 要加入basenode + @staticmethod + def _add_node_data(node_data, node): + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node.type) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if 'output' in full_op_name: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + node.input_data = input_data + node.output_data = output_data + + @staticmethod + def _get_data_info(item): + if isinstance(item, dict): + return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) + elif isinstance(item, (list, tuple)): + return str([GraphParser._get_data_info(sub_item) for sub_item in item]) + return '' + + @staticmethod + def _process_node_data_info(items): + info_str = '' + for item in items: + info_str += GraphParser._get_data_info(item) + return info_str + + @staticmethod + def _get_node_data_info(input_args, input_kwargs, output): + return GraphParser._process_node_data_info(input_args) + GraphParser._process_node_data_info(input_kwargs) + GraphParser._process_node_data_info(output) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py similarity index 31% rename from debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py rename to debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py index 3f2fb406af..21f0ae9f39 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare_tree.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py @@ -1,95 +1,126 @@ -import os -import json -import stat -from .graph_utils import ToolTip, Suggestions, GraphConst -from .graph.node_op import NodeOp -from ..compare.acc_compare import read_op, merge_tensor, get_accuracy, result_to_csv -from ...core.utils import CompareConst, Const - - -class CompareTree: - def __init__(self, trees, datas, stack_json_data, csv_path, compare_modes, stack_mode=True): - self.tree_n = trees[0] - self.tree_b = trees[1] - self.data_n_dict = datas[0] - self.data_b_dict = datas[1] - self.csv_path = csv_path - self.b_nodes_by_value = {} - self.to_csv_result = [] - self.md5_compare = compare_modes[0] - self.summary_compare = compare_modes[1] - self.real_data_compare = self.summary_compare is False and self.md5_compare is False - self.stack_mode = stack_mode - self.stack_json_data = stack_json_data - self.real_data_compare_nodes = [] - self.fill_b_nodes_dict(self.tree_b) - self.compare_nodes(self.tree_n) +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. - @staticmethod - def get_ancestors(node): - """ - 获取节点所有祖先的列表 - """ - ancestors = [] - if not node: - return ancestors - current_node = node.upnode - while current_node: - ancestors.append(current_node.type) - current_node = current_node.upnode - return list(reversed(ancestors)) +import json +from ....core.utils import CompareConst, Const +from ..utils import ToolTip, GraphConst, convert_percentage_to_float - @staticmethod - def add_real_compare_node_error_key(node_data): - """ - 精度疑似有问题,这些指标将在前端标红 - """ - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - value[GraphConst.ERROR_KEY] = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, - CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] - node_data[key] = value - @staticmethod - def add_summary_compare_node_error_key(node_data): +class Comparator: + def __init__(self, summary_compare, md5_compare): + if summary_compare: #0 summary mode, 1 md5 mode, 2 true data mode + self.compare_mode = GraphConst.SUMMARY_COMPARE + elif md5_compare: + self.compare_mode = GraphConst.MD5_COMPARE + else: + self.compare_mode = GraphConst.REAL_DATA_COMPARE + self.csv_data = [] + self.compare_nodes = [] + + def parse_result(self, node, compare_data_dict): + """ + 根据结果返回数据,分别是precision_status,precision_index,和附加数据 + """ + other_dict = {} + if self.is_md5_compare(): + precision_status_in = Comparator.add_md5_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out = Comparator.add_md5_compare_data(node.output_data, compare_data_dict[1]) + # 所有输入输出md5对比通过,这个节点才算通过 + precision_status = precision_status_in and precision_status_out + precision_index = 1 if precision_status else 0 + other_result = CompareConst.PASS if precision_status else CompareConst.DIFF + other_dict['md5 Compare Result'] = other_result + elif self.is_summary_compare(): + precision_status_in, precision_index_in = Comparator.add_summary_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out, precision_index_out = Comparator.add_summary_compare_data(node.output_data, compare_data_dict[1]) + precision_status = precision_status_in and precision_status_out + precision_index = min(precision_index_in, precision_index_out) + else: + min_thousandth_in = Comparator.add_real_compare_data(node.input_data, compare_data_dict[0]) + min_thousandth_out = Comparator.add_real_compare_data(node.output_data, compare_data_dict[0]) + if min_thousandth_in and min_thousandth_out: + change_percentage = abs(min_thousandth_in - min_thousandth_out) + else: + change_percentage = 0 + precision_status = True + if change_percentage > 0.1: + precision_status = False + precision_index = 0 if change_percentage > 1 else 1 - change_percentage + return precision_status, precision_index, other_dict + + def prepare_real_data(self, node): + if self.is_real_data_compare(): + self.compare_nodes.append(node) + return True + return False + + # todo 改成私有 + def is_summary_compare(self): + return self.compare_mode == GraphConst.SUMMARY_COMPARE + + def is_md5_compare(self): + return self.compare_mode == GraphConst.MD5_COMPARE + + def is_real_data_compare(self): + return self.compare_mode == GraphConst.REAL_DATA_COMPARE + + def add_csv_data(self, compare_result_list): + if not self.is_real_data_compare(): + return + self.csv_data.extend(compare_result_list) + + def add_error_key(self, node_data): for key, value in node_data.items(): if not isinstance(value, dict): continue - value[GraphConst.ERROR_KEY] = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + if self.is_summary_compare(): + message = [CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + elif self.is_real_data_compare(): + message = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + else: + # todo 这个应该是bug,应该修复 + message = [] + value[GraphConst.ERROR_KEY] = message node_data[key] = value - - @staticmethod - def add_real_compare_suggestions(node): - """ - 精度疑似有问题,给一些建议 - """ - if node.op == NodeOp.module: - node.suggestions['text'] = Suggestions.Module - node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL - if node.op == NodeOp.function_api: - node.suggestions['text'] = Suggestions.API - node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL - - @staticmethod - def convert_percentage_to_float(percentage_str): + + def get_tool_tip(self): """ - 百分比字符串转换为浮点型 - Args: - percentage_str: '0.00%'、'23.4%' - Returns: float 0.00、0.234 + 用于前端展示字段的具体含义 """ - try: - percentage_str = percentage_str.replace('%', '') - return float(percentage_str) / 100 - except ValueError: - return 0 + if self.is_summary_compare(): + tips = { + CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, + CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, + CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, + CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} + elif self.is_md5_compare(): + tips = {Const.MD5: ToolTip.MD5} + else: + tips = { + CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, + CompareConst.COSINE: ToolTip.COSINE, + CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, + CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} + # todo 这个要放在外面去 + return json.dumps(tips) @staticmethod def _match_data(data_dict, compare_data, key_list, id_list): """ - 绑定精度指标到node的input_data和output_data中 + 绑定精度指标到node的input_data和output_data """ if len(key_list) != len(id_list): return @@ -99,128 +130,9 @@ class CompareTree: data_dict[key] = compare_data[id_list[i]] else: data_dict[key] = 'null' - - def have_same_ancestors(self, node_a, node_b): - """ - 比较两个节点的所有祖先是否相同 - Args: - node_a: NPU节点 - node_b: Bench节点 - Returns: bool - """ - ancestors_a = self.get_ancestors(node_a) - ancestors_b = self.get_ancestors(node_b) - return ancestors_a == ancestors_b, ancestors_a - - def fill_b_nodes_dict(self, node): - """ - 将树展开为dict,key为node唯一名称,value为node自身,方便根据node唯一名称查找node - """ - if node.type not in self.b_nodes_by_value: - self.b_nodes_by_value[node.type] = [] - self.b_nodes_by_value[node.type].append(node) - for subnode in node.subnodes: - self.fill_b_nodes_dict(subnode) - - def result_to_csv(self): - with os.fdopen(os.open(self.csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), - 'w+') as file_out: - result_to_csv(self.md5_compare, self.summary_compare, self.stack_mode, self.to_csv_result, file_out) - - def compare_nodes(self, node_n): - """ - 递归比较NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查它们的祖先和参数信息,检查一致则进行精度数据比对 - Args: - node_n: NPU节点 - """ - if node_n.type in self.b_nodes_by_value: - for node_b in self.b_nodes_by_value[node_n.type]: - # 检查两个节点是否有完全相同的祖先链 - flag, ancestors = self.have_same_ancestors(node_n, node_b) - flag = flag and node_n.data_info == node_b.data_info - if flag: - # 如果祖先链相同,data_info相同,将node_b及其祖先添加到node_n的matched_node_link属性中 - ancestors.append(node_b.type) - node_n.matched_node_link = ancestors - node_b.matched_node_link = ancestors - # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程比对接口 - compare_result_list = self.compare_node(node_n, node_b) - if compare_result_list: - self.to_csv_result.extend(compare_result_list) - self.add_compare_result_to_node(node_n, compare_result_list) - - for subnode in node_n.subnodes: - self.compare_nodes(subnode) - - def compare_node(self, node_n, node_b): - """ - 调用acc_compare.py中的get_accuracy获得精度比对指标 - 真实数据比对模式无法获得精度比对指标,需要调用多进程比对接口 - Args: - node_n: NPU节点 - node_b: Bench节点 - - Returns: 包含参数信息和比对指标(真实数据比对模式除外)的list - """ - result = [] - merge_n = self.parse_node(node_n, self.data_n_dict) - merge_b = self.parse_node(node_b, self.data_b_dict) - get_accuracy(result, merge_n, merge_b, self.summary_compare, self.md5_compare) - return result - - def parse_node(self, node, data_dict): - """ - 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 - """ - op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) - if node.type in self.stack_json_data: - op_parsed_list.append( - {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) - else: - op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) - return merge_tensor(op_parsed_list, self.summary_compare, self.md5_compare) - - def add_compare_result_to_node(self, node, compare_result_list): - """ - 将比对结果添加到节点的输入输出数据中 - Args: - node: 节点 - compare_result_list: 包含参数信息和比对指标(真实数据比对模式除外)的list - """ - # 真实数据比对,先暂存节点,在多进程比对得到精度指标后,再将指标添加到节点 - if self.real_data_compare: - self.real_data_compare_nodes.append(node) - return - compare_in_dict = {} - compare_out_dict = {} - # input和output比对数据分开 - for item in compare_result_list: - if 'output' in item[0]: - compare_out_dict[item[0]] = item - else: - compare_in_dict[item[0]] = item - if self.md5_compare: - precision_status_in = self.add_md5_compare_data(node.input_data, compare_in_dict) - precision_status_out = self.add_md5_compare_data(node.output_data, compare_out_dict) - # 所有输入输出md5比对通过,这个节点才算通过 - precision_status = precision_status_in and precision_status_out - node.data['precision_status'] = precision_status - # md5比对通过为1,否则0 - node.data['precision_index'] = 1 if precision_status else 0 - node.data['md5 Compare Result'] = CompareConst.PASS if precision_status else CompareConst.DIFF - elif self.summary_compare: - precision_status_in, precision_index_in = self.add_summary_compare_data(node.input_data, compare_in_dict) - precision_status_out, precision_index_out = self.add_summary_compare_data(node.output_data, - compare_out_dict) - precision_status = precision_status_in and precision_status_out - precision_index = min(precision_index_in, precision_index_out) - node.data['precision_status'] = precision_status - node.data['precision_index'] = precision_index - if not precision_status: - self.add_summary_compare_node_error_key(node.output_data) - self.add_real_compare_suggestions(node) - - def add_summary_compare_data(self, node_data, compare_data_dict): + + @staticmethod + def add_summary_compare_data( node_data, compare_data_dict): precision_status = True max_relative_err = 0 for key, value in node_data.items(): @@ -233,19 +145,19 @@ class CompareTree: CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] id_list = [6, 7, 8, 9, 10, 11, 12, 13] - self._match_data(value, compare_data, key_list, id_list) + Comparator._match_data(value, compare_data, key_list, id_list) # 相对误差大于0.5疑似有精度问题 for item in key_list[4:]: - relative_err = CompareTree.convert_percentage_to_float(value.get(item)) + relative_err = convert_percentage_to_float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value - if max_relative_err > GraphConst.MAX_RELATIVE_ERR: + if max_relative_err > 0.5: precision_status = False - max_relative_err = 1 if max_relative_err > 1 else max_relative_err precision_index = 1 - max_relative_err return precision_status, precision_index - def add_md5_compare_data(self, node_data, compare_data_dict): + @staticmethod + def add_md5_compare_data( node_data, compare_data_dict): precision_status = True for key, value in node_data.items(): if not isinstance(value, dict): @@ -254,14 +166,15 @@ class CompareTree: if compare_data: key_list = ['md5 Compare Result'] id_list = [8] - self._match_data(value, compare_data, key_list, id_list) + Comparator._match_data(value, compare_data, key_list, id_list) # md5比对是否通过 if value.get('md5 Compare Result') != CompareConst.PASS: precision_status = False node_data[key] = value return precision_status - - def add_real_compare_data(self, node_data, compare_data_dict): + + @staticmethod + def add_real_compare_data(node_data, compare_data_dict): min_thousandth = float(1) numbers = [] for key, value in node_data.items(): @@ -272,7 +185,7 @@ class CompareTree: key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] id_list = [6, 7, 8, 9, 10] - self._match_data(value, compare_data, key_list, id_list) + Comparator._match_data(value, compare_data, key_list, id_list) # 获取一个节点所有的输入或输出最小的双千指标 thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) # 可能是None,可能是非数字内容str @@ -288,25 +201,4 @@ class CompareTree: min_thousandth = None else: min_thousandth = min(numbers + [min_thousandth]) - return min_thousandth - - def get_tool_tip(self): - """ - 用于前端展示字段的具体含义 - """ - if self.summary_compare: - tips = { - CompareConst.MAX_DIFF: ToolTip.MAX_DIFF, - CompareConst.MIN_DIFF: ToolTip.MIN_DIFF, - CompareConst.MEAN_DIFF: ToolTip.MEAN_DIFF, - CompareConst.NORM_DIFF: ToolTip.NORM_DIFF} - elif self.md5_compare: - tips = { - Const.MD5: ToolTip.MD5} - else: - tips = { - CompareConst.ONE_THOUSANDTH_ERR_RATIO: ToolTip.ONE_THOUSANDTH_ERR_RATIO, - CompareConst.COSINE: ToolTip.COSINE, - CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, - CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} - return json.dumps(tips) + return min_thousandth \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py new file mode 100644 index 0000000000..51f6bc34d4 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -0,0 +1,175 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import json +import pandas as pd +from ..utils import Suggestions, GraphConst, load_json_file, write_csv_data +from ..graph.node_op import NodeOp +from .comparator import Comparator +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy +from ....core.utils import Const +from ...compare.acc_compare import task_dumppath_get, _do_multi_process + + +class GraphComparator: + def __init__(self, graphs, data_paths, stack_path, output_path): + self.graph_n = graphs[0] + self.graph_b = graphs[1] + self._parse_param(data_paths, stack_path, output_path) + + def _parse_param(self, data_paths, stack_path, output_path): + self.dump_path_param = { + 'npu_json_path': data_paths[0], + 'bench_json_path': data_paths[1], + 'stack_json_path': stack_path, + 'is_print_compare_log': True + } + self.output_path = output_path + summary_compare, md5_compare = task_dumppath_get(self.dump_path_param) + self.comparator = Comparator(summary_compare, md5_compare) + self.data_n_dict = load_json_file(data_paths[0]).get('data', {}) + self.data_b_dict = load_json_file(data_paths[1]).get('data', {}) + self.stack_json_data = load_json_file(stack_path) + + def compare(self): + self._compare_nodes(self.graph_n.root) + self._postcompare() + + def _postcompare(self): + if not self.comparator.is_real_data_compare(): + return + csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) + write_csv_data(csv_path, self.comparator.is_md5_compare(), self.comparator.is_summary_compare(), True, self.comparator.csv_data) + _do_multi_process(self.dump_path_param, csv_path) + # 从csv文件读取精度指标,添加到node节点中 + df = pd.read_csv(csv_path) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.comparator.compare_nodes: + precision_status, precision_index, _ = self.comparator.parse_result(node, [compare_data_dict]) + # todo 常量改成变量 + node.data['precision_status'] = precision_status + node.data['precision_index'] = precision_index + if not precision_status: + self.comparator.add_error_key(node.output_data) + self.add_suggestions(node) + if os.path.isfile(csv_path): + os.remove(csv_path) + + @staticmethod + def add_suggestions(node): + """ + 精度疑似有问题时,提供一些建议 + """ + if node.op == NodeOp.module: + node.suggestions['text'] = Suggestions.Module + node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + elif node.op == NodeOp.function_api: + node.suggestions['text'] = Suggestions.API + node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + def _compare_nodes(self, node_n): + """ + 递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 + 这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 + Args: + node_n: NPU节点 + """ + # todo 这个函数也需要改改,把is_matched删掉, + is_matched, node_b, ancestors = GraphComparator._match_node(node_n, self.graph_b) + if is_matched: + ancestors.append(node_b.type) + node_n.matched_node_link = ancestors + node_b.matched_node_link = ancestors + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 + compare_result_list = self.compare_node(node_n, node_b) + if compare_result_list: + self.comparator.add_csv_data(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + for subnode in node_n.subnodes: + self._compare_nodes(subnode) + + + @staticmethod + def _match_node(node_n, graph_b): + """ + 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 + 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 + 返回匹配结果,匹配到的系欸但,以及祖先列表 + """ + if node_n.id not in graph_b.node_map: + return False, None, None + node_b = graph_b.node_map[node_n.id] + if node_n.data_info != node_b.data_info: + return False, None, None + ancestors_n = node_n.get_ancestors() + ancestors_b = node_b.get_ancestors() + if ancestors_n != ancestors_b: + return False, None, None + return True, node_b, ancestors_n + + def compare_node(self, node_n, node_b): + """ + 调用acc_compare.py中的get_accuracy获得精度对比指标 + 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + Args: + node_n: NPU节点 + node_b: Bench节点 + Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + result = [] + # todo 写一个atat adpator + merge_n = self.parse_node(node_n, self.data_n_dict) + merge_b = self.parse_node(node_b, self.data_b_dict) + get_accuracy(result, merge_n, merge_b, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) + return result + + def parse_node(self, node, data_dict): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) + if node.type in self.stack_json_data: + op_parsed_list.append( + {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) + else: + op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) + return merge_tensor(op_parsed_list, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) + + def add_compare_result_to_node(self, node, compare_result_list): + """ + 将比对结果添加到节点的输入输出数据中 + Args: + node: 节点 + compare_result_list: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + # 真实数据比对,先暂存节点,在多进程对比得到精度指标后,再将指标添加到节点中 + if self.comparator.prepare_real_data(node): + return + compare_in_dict = {} + compare_out_dict = {} + # input和output对比数据分开 + for item in compare_result_list: + if 'output' in item[0]: + compare_out_dict[item[0]] = item + else: + compare_in_dict[item[0]] = item + precision_status, precision_index, other_dict = self.comparator.parse_result(node, [compare_in_dict, compare_out_dict]) + node.data['precision_status'] = precision_status + node.data['precision_index'] = precision_index + node.data.update(other_dict) + if not precision_status: + self.comparator.add_error_key(node.output_data) + self.add_suggestions(node) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 26713200ea..5628e3e0ee 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -12,15 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import re -from ..graph_utils import GraphConst class BaseNode: - def __init__(self, node_op, node_type, up_node=None, is_forward=True): + def __init__(self, node_op, node_id, up_node=None): self.op = node_op - self.type = node_type - self.id = node_type + self.type = node_id + self.id = node_id self.data = {} self.outputs = [] self.inputs = [] @@ -30,16 +30,17 @@ class BaseNode: self.subnodes = [] if up_node: up_node.add_subnode(self) - self.is_forward = is_forward + self.is_forward = True self.pair = None self.matched_node_link = [] self.data_info = '' self.suggestions = {} - + # todo 这些都在做什么,都应该确认一下 + def __str__(self): info = f'id:\t{self.id}' return info - + @staticmethod def _handle_item(data_dict): del_list = ['requires_grad', 'data_name', 'full_op_name'] @@ -66,7 +67,7 @@ class BaseNode: if value is None or value == ' ': value = 'null' if isinstance(value, float): - value = round(value, GraphConst.DECIMAL) + value = round(value, 6) if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value @@ -83,11 +84,11 @@ class BaseNode: number_str = match.rstrip('%') decimal_part = number_str.split('.')[1] # 如果小数位数大于6,进行处理 - if len(decimal_part) > GraphConst.DECIMAL: + if len(decimal_part) > 6: number_float = float(number_str) if is_percent: number_float /= 100 - formatted_number = f"{number_float:.{GraphConst.DECIMAL}f}" + formatted_number = f"{number_float:.6f}" # 如果原来是百分数,加回百分号 if is_percent: formatted_number += '%' @@ -125,3 +126,14 @@ class BaseNode: result['matched_node_link'] = self.matched_node_link result['suggestions'] = self.suggestions return result + + def get_ancestors(self): + """ + 获取节点所有祖先的列表 + """ + ancestors = [] + current_node = self.upnode + while current_node: + ancestors.append(current_node.id) + current_node = current_node.upnode + return list(reversed(ancestors)) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index 849a07a108..347e8c2c88 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -15,14 +15,14 @@ class Graph: + # todo,这里应该加入一些和图相关的操作 + # 可以把root node 的初始化放进Graph里面 def __init__(self): self.root = None - self.recent_node = None - self.depth = 0 self.node_map = {} - self.rawid_map = {} - + def __str__(self): infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] info = "\n".join(infos) return info + \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index 3249df10c4..015a83abda 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -14,6 +14,7 @@ # limitations under the License. from enum import Enum +import re class NodeOp(Enum): @@ -22,3 +23,11 @@ class NodeOp(Enum): module_api = 3 tensor = 4 output = 5 + + @staticmethod + def get_node_op(node_name: str): + pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' + if re.match(pattern, node_name): + return NodeOp.function_api + else: + return NodeOp.module diff --git a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py b/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py deleted file mode 100644 index 026bd47c47..0000000000 --- a/debug/accuracy_tools/atat/pytorch/visualization/json_parse_graph.py +++ /dev/null @@ -1,217 +0,0 @@ -import os -import json -import re -import time -import pandas as pd -from .compare_tree import CompareTree -from .graph_utils import GraphConst -from .graph.graph import Graph -from .graph.base_node import BaseNode -from .graph.node_op import NodeOp -from .graph.graph_builder import GraphBuilder -from ..compare.acc_compare import read_op, task_dumppath_get, _do_multi_process -from ...core.utils import add_time_as_suffix -from ...core.file_check_util import FileOpen, FileChecker, FileCheckConst, create_directory - - -def _load_json_file(file_path): - try: - with FileOpen(file_path, 'r') as file: - file_dict = json.load(file) - if not isinstance(file_dict, dict): - return {} - return file_dict - except json.JSONDecodeError: - return {} - - -def _get_data_inputs_outputs(data_dict: dict): - input_args = data_dict.get('input_args', []) - input_kwargs = data_dict.get('input_kwargs', {}) - output = data_dict.get('output', []) - - input_args = input_args if isinstance(input_args, list) else [] - input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} - output = output if isinstance(output, list) else [] - return input_args, input_kwargs, output - - -def _add_node_data(node_data, node): - """ - acc_compare read_op 解析数据 - """ - input_data = {} - output_data = {} - op_parsed_list = read_op(node_data, node.type) - for item in op_parsed_list: - full_op_name = item.get('full_op_name', '') - if 'output' in full_op_name: - output_data[full_op_name] = item - else: - input_data[full_op_name] = item - node.input_data = input_data - node.output_data = output_data - - -def _get_data_info(item): - """ - 将api的参数信息拼接成字符串进行匹配 - """ - if isinstance(item, dict): - return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) - elif isinstance(item, (list, tuple)): - return str([_get_data_info(sub_item) for sub_item in item]) - return '' - - -def _process_node_data_info(items): - info_str = '' - for item in items: - info_str += _get_data_info(item) - return info_str - - -def _get_node_data_info(input_args, input_kwargs, output): - """ - 节点所有输入、输出的type、dtype和shape要一样 - """ - return _process_node_data_info(input_args) + _process_node_data_info(input_kwargs) + _process_node_data_info(output) - - -def _get_node_op(node_name: str): - pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' - match = re.match(pattern, node_name) - if match: - return NodeOp.function_api - else: - return NodeOp.module - - -def build_tree(construct_dict, data_dict, root_name='NPU'): - # 创建一个字典来存储已经创建的节点,以便重用 - created_nodes = {} - root_node = BaseNode(NodeOp.module, root_name) - - # 创建一个函数来递归地创建或获取节点 - def get_or_create_node(op, name, up_node=None): - if name not in created_nodes: - # add data - base_node = BaseNode(op, name, up_node) - node_data = data_dict.get(name, {}) - input_args, input_kwargs, output = _get_data_inputs_outputs(node_data) - # 添加输入输出数据 - _add_node_data(node_data, base_node) - - # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 - data_info = _get_node_data_info(input_args, input_kwargs, output) - base_node.data_info = data_info - created_nodes[name] = base_node - elif up_node: - # 如果节点已经存在,但我们现在才知道它的上级节点 - created_nodes[name].upnode = up_node - up_node.add_subnode(created_nodes[name]) - return created_nodes[name] - - # 遍历字典,为每个键值对创建或获取节点 - for subnode, upnode in construct_dict.items(): - if upnode: - up_node = get_or_create_node(_get_node_op(upnode), upnode) - else: - up_node = root_node - get_or_create_node(_get_node_op(subnode), subnode, up_node) - - return root_node, created_nodes - - -def do_build_graph(construct_path, data_path, output_path): - construct_dict = _load_json_file(construct_path) - data_dict = _load_json_file(data_path).get('data', {}) - root_node, created_nodes = build_tree(construct_dict, data_dict, 'root_node') - graph = Graph() - graph.root = root_node - graph.node_map = created_nodes - GraphBuilder.export_to_json(output_path, graph) - - -def do_compare_graph(construct_path_list, data_path_list, stack_path, output_path, csv_path): - dump_path_param = { - "npu_json_path": data_path_list[0], - "bench_json_path": data_path_list[1], - "stack_json_path": stack_path, - "is_print_compare_log": True - } - # 判断比对模式 - summary_compare, md5_compare = task_dumppath_get(dump_path_param) - - construct_n_dict = _load_json_file(construct_path_list[0]) - data_n_dict = _load_json_file(data_path_list[0]).get('data', {}) - root_n_node, created_n_nodes = build_tree(construct_n_dict, data_n_dict) - construct_b_dict = _load_json_file(construct_path_list[1]) - data_b_dict = _load_json_file(data_path_list[1]).get('data', {}) - root_b_node, created_b_nodes = build_tree(construct_b_dict, data_b_dict) - stack_json_data = _load_json_file(stack_path) - - compare_tree = CompareTree([root_n_node, root_b_node], [data_n_dict, data_b_dict], stack_json_data, - csv_path,[md5_compare, summary_compare]) - compare_tree.result_to_csv() - - if summary_compare is False and md5_compare is False: - # 真实数据比对,开启多进程比对得到精度指标,再写进已创建的csv中 - _do_multi_process(dump_path_param, csv_path) - # 从csv文件读取精度指标,添加到node节点中 - df = pd.read_csv(csv_path) - compare_data_dict = {row[0]: row.tolist() for index, row in df.iterrows()} - for node in compare_tree.real_data_compare_nodes: - min_thousandth_in = compare_tree.add_real_compare_data(node.input_data, compare_data_dict) - min_thousandth_out = compare_tree.add_real_compare_data(node.output_data, compare_data_dict) - if min_thousandth_in and min_thousandth_out: - change_percentage = abs(min_thousandth_in - min_thousandth_out) - else: - change_percentage = 0 - precision_status = True - if change_percentage > 0.1: - precision_status = False - # 精度不达标,双千指标标红 - CompareTree.add_real_compare_node_error_key(node.output_data) - # 添加建议 - CompareTree.add_real_compare_suggestions(node) - node.data['precision_status'] = precision_status - node.data['precision_index'] = 0 if change_percentage > 1 else 1 - change_percentage - - graph_n = Graph() - graph_n.root = root_n_node - graph_n.node_map = created_n_nodes - graph_b = Graph() - graph_b.root = root_b_node - graph_n.node_map = created_b_nodes - start_time = time.time() - GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, compare_tree.get_tool_tip()) - end_time = time.time() - print('export_graphs_to_yaml', end_time - start_time) - - -def compare_graph(dump_path_n, dump_path_b, out_path): - create_directory(out_path) - n_path_checker = FileChecker(dump_path_n, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) - n_path_checker.common_check() - b_path_checker = FileChecker(dump_path_b, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) - b_path_checker.common_check() - construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) - construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) - data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) - data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) - stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) - output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) - csv_path = os.path.join(os.path.realpath(out_path), add_time_as_suffix(GraphConst.CSV_NAME)) - do_compare_graph([construct_path_n, construct_path_b], [data_path_n, data_path_b], - stack_path, output_path,csv_path) - - -def build_graph(dump_path, out_path): - create_directory(out_path) - path_checker = FileChecker(dump_path, FileCheckConst.DIR, FileCheckConst.READ_WRITE_ABLE) - path_checker.common_check() - construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) - data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) - output_path = os.path.join(out_path, GraphConst.GRAPH_FILE) - do_build_graph(construct_path, data_path, output_path) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/atat/pytorch/visualization/test.py new file mode 100644 index 0000000000..3bd43362a2 --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/test.py @@ -0,0 +1,81 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import time +import shutil +import filecmp +from .compare.graph_comparator import GraphComparator +from .utils import GraphConst +from .builder.graph_builder import GraphBuilder + + +def compare_graph(dump_path_n, dump_path_b, out_path): + # 对两个数据进行构图 + construct_path_n = os.path.join(dump_path_n, GraphConst.CONSTRUCT_FILE) + construct_path_b = os.path.join(dump_path_b, GraphConst.CONSTRUCT_FILE) + data_path_n = os.path.join(dump_path_n, GraphConst.DUMP_FILE) + data_path_b = os.path.join(dump_path_b, GraphConst.DUMP_FILE) + graph_n = GraphBuilder.build(construct_path_n, data_path_n, 'TestNet') + graph_b = GraphBuilder.build(construct_path_b, data_path_b, 'TestNet') + # 基于graph、stack和data进行比较 + stack_path = os.path.join(dump_path_n, GraphConst.STACK_FILE) + graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) + graph_comparator.compare() + output_path = os.path.join(out_path, 'compare.vis') + GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, graph_comparator.comparator.get_tool_tip()) + +def build_graph(dump_path, out_path): + construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) + data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) + output_path = os.path.join(out_path, 'build.vis') + graph = GraphBuilder.build(construct_path, data_path, 'TestNet') + GraphBuilder.export_to_json(output_path, graph) + +def run_st(data_path): + start_time = time.time() + run_bench(data_path, 'output2') + end_time = time.time() + print('run_st time cost:', end_time - start_time) + # 比较output2的结果和output1 的bench结果差距 + for data_dir in os.listdir(data_path): + data_dir = os.path.join(data_path, data_dir) + if not os.path.isdir(data_dir): + continue + output1 = os.path.join(data_dir, 'output1') + output2 = os.path.join(data_dir, 'output2') + files = ['build.vis', 'compare.vis'] + for vis_file in files: + file1 = os.path.join(output1, vis_file) + file2 = os.path.join(output2, vis_file) + result = filecmp.cmp(file1, file2) + if result: + print('pass ' + file1) + else: + print('not pass ' + file1) + +def run_bench(data_path, output_dir): + for data_dir in os.listdir(data_path): + data_dir = os.path.join(data_path, data_dir) + if not os.path.isdir(data_dir): + continue + run_data_path = os.path.join(data_dir, 'data') + output_path = os.path.join(data_dir, output_dir) + if os.path.exists(output_path): + shutil.rmtree(output_path) + os.makedirs(output_path) + build_graph(run_data_path, output_path) + compare_graph(run_data_path, run_data_path, output_path) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py similarity index 50% rename from debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py rename to debug/accuracy_tools/atat/pytorch/visualization/utils.py index eae5084677..ab02ef7d20 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph_utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -1,12 +1,62 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import stat +from ...core.file_check_util import FileOpen +from ..compare.acc_compare import result_to_csv + + +def load_json_file(file_path): + try: + with FileOpen(file_path, 'r') as f: + file_dict = json.load(f) + if not isinstance(file_dict, dict): + return {} + return file_dict + except json.JSONDecodeError: + return {} + +def save_json_file(file_path, data): + with FileOpen(file_path, 'w') as f: + f.write(json.dumps(data, indent=4)) + +def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): + with os.fdopen(os.open(csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), 'w+') as f: + result_to_csv(md5_compare, summary_compare, stack, csv_data, f) + +def convert_percentage_to_float(percentage_str): + """ + 百分比字符串转换转换为浮点型 + Args: + percentage_str: '0.00%', '23.4%' + Returns: float 0.00, 0.234 + """ + try: + percentage_str = percentage_str.replace('%', '') + return float(percentage_str) / 100 + except ValueError: + return 0 + + class ToolTip: MAX_DIFF = 'NPU与标杆API统计信息比对,最大值的差值' MIN_DIFF = 'NPU与标杆API统计信息比对,最小值的差值' MEAN_DIFF = 'NPU与标杆API统计信息比对,平均值的差值' NORM_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值' - MAX_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最大值的差值相对误差' - MIN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,最小值的差值相对误差' - MEAN_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,平均值的差值相对误差' - NORM_MAGNITUDE_DIFF = 'NPU与标杆API统计信息比对,2范数(平方根)的差值相对误差' MD5 = '数据MD5信息,用于比较两个数据信息是否完全一致' ONE_THOUSANDTH_ERR_RATIO = 'Tensor中的元素逐个与对应的标杆数据对比,相对误差大于千分之一的比例占总元素个数的比例小于千分之一' COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' @@ -28,7 +78,8 @@ class GraphConst: DUMP_FILE = 'dump.json' STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' - CSV_NAME = 'compare_result' + CSV_FILE = 'tmp.csv' ERROR_KEY = 'error_key' - DECIMAL = 6 - MAX_RELATIVE_ERR = 0.5 \ No newline at end of file + SUMMARY_COMPARE = 0 + MD5_COMPARE = 1 + REAL_DATA_COMPARE = 2 \ No newline at end of file -- Gitee From 91b11f0511f31754a70519df3c9b36ad2be223e8 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:29:27 +0800 Subject: [PATCH 024/141] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=EF=BC=9Bmsprobe=E4=BE=9D=E8=B5=96=E5=88=86?= =?UTF-8?q?=E7=A6=BB=EF=BC=9B=E4=B8=BABaseNode=E5=92=8CGraph=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=9B=B4=E5=A4=9A=E5=9F=BA=E7=A1=80=E8=83=BD=E5=8A=9B?= =?UTF-8?q?=EF=BC=9B=E5=87=8F=E5=B0=91=E6=A8=A1=E5=9D=97=E9=97=B4=E4=BE=9D?= =?UTF-8?q?=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../visualization/builder/graph_builder.py | 87 ++++--- .../visualization/builder/graph_parser.py | 108 --------- .../visualization/builder/msprobe_adapter.py | 174 ++++++++++++++ .../visualization/compare/graph_comparator.py | 200 ++++++---------- .../{comparator.py => mode_adapter.py} | 214 +++++++++--------- .../pytorch/visualization/graph/base_node.py | 123 ++++------ .../atat/pytorch/visualization/graph/graph.py | 79 ++++++- .../pytorch/visualization/graph/node_op.py | 3 + .../atat/pytorch/visualization/test.py | 15 +- .../atat/pytorch/visualization/utils.py | 37 ++- 10 files changed, 582 insertions(+), 458 deletions(-) delete mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py create mode 100644 debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py rename debug/accuracy_tools/atat/pytorch/visualization/compare/{comparator.py => mode_adapter.py} (74%) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 76d476a996..8cce30f0d2 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -13,44 +13,75 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .graph_parser import GraphParser -from ..utils import load_json_file, save_json_file +from ..graph.graph import Graph +from ..graph.node_op import NodeOp +from ..utils import load_json_file, load_data_json_file, save_json_file, GraphConst +from .msprobe_adapter import parse_raw_data, get_input_output, get_node_tag class GraphBuilder: @staticmethod - def build(construct_path, data_path, model_name): + def build(construct_path, data_path, model_name='DefaultModel'): + """ + GraphBuilder的对外提供的构图方法 + Args: + construct_path: construct.json路径 + data_path: dump.json路径 + model_name: 模型名字,依赖外部输入 + Returns: Graph,代表图的数据结构 + """ construct_dict = load_json_file(construct_path) - data_dict = load_json_file(data_path).get('data', {}) - graph = GraphParser().parse(construct_dict, data_dict, model_name) + data_dict = load_data_json_file(data_path) + graph = Graph(model_name) + GraphBuilder._init_nodes(graph, construct_dict, data_dict) return graph @staticmethod - def export_to_json(filename, graph): - result = GraphBuilder._get_graph_dict(graph) - save_json_file(filename, result) - - # todo 吧两个export归一 - @staticmethod - def export_graphs_to_json(filename, graph_n, graph_b, tool_tip): + def export_to_json(filename, graph_n, graph_b=None, tool_tip=None): + """ + 将graph到处成.vis文件的接口 + Args: + filename: 输出文件路径 + graph_n: Graph + graph_b: bench Graph,为空是只输出graph_b,不为空会同时输出两个graph,作为对比的结果 + tool_tip: 在对比模型下输出的意见 + """ result = {} - result['NPU'] = GraphBuilder._get_graph_dict(graph_n) - result['Bench'] = GraphBuilder._get_graph_dict(graph_b) - result['Tooltip'] = tool_tip + if graph_b: + result[GraphConst.JSON_NPU_KEY] = graph_n.get_dict2() + result[GraphConst.JSON_BENCH_KEY] = graph_b.get_dict2() + else: + result = graph_n.get_dict2() + if tool_tip: + result[GraphConst.JSON_TIP_KEY] = tool_tip save_json_file(filename, result) @staticmethod - def _get_graph_dict(graph): - result = {} - result['root'] = graph.root.id if graph.root else 'None' - result['node'] = {} - # todo 可以把这个遍历删除 - GraphBuilder._export_dfs(graph.root, result['node']) - return result - + def _init_nodes(graph, construct_dict, data_dict): + for subnode_id, upnode_id in construct_dict.items(): + if upnode_id: + upnode_op = NodeOp.get_node_op(upnode_id) + upnode = GraphBuilder._create_or_get_node(graph, data_dict, upnode_op, upnode_id) + else: + upnode = graph.root + node_op = NodeOp.get_node_op(subnode_id) + GraphBuilder._create_or_get_node(graph, data_dict, node_op, subnode_id, upnode) + @staticmethod - def _export_dfs(node, result): - info = node.get_yaml_dict() - result[node.id] = info - for subnode in node.subnodes: - GraphBuilder._export_dfs(subnode, result) + def _create_or_get_node(graph, data_dict, op, name, upnode=None): + if name in graph.node_map: + node = graph.get_node(name) + else: + graph.add_node(op, name, upnode) + node = graph.get_node(name) + node_data = data_dict.get(name, {}) + input_args, input_kwargs, output = parse_raw_data(node_data) + # 添加输入输出数据 + input_data, output_data = get_input_output(node_data, node.id) + # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 + tag = get_node_tag([input_args, input_kwargs, output]) + # 跟新数据 + node.set_input_output_tag(input_data, output_data, tag) + # 添加节点 + node.add_upnode(upnode) + return node \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py deleted file mode 100644 index 2227710b74..0000000000 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_parser.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ...compare.acc_compare import read_op -from ..graph.graph import Graph -from ..graph.base_node import BaseNode -from ..graph.node_op import NodeOp - - -class GraphParser: - def __init__(self): - pass - - def parse(self, construct_dict, data_dict, model_name): - self.graph = Graph() - self.data_dict = data_dict - self.graph.root = BaseNode(NodeOp.module, model_name) - self.graph.node_map[model_name] = self.graph.root - self._init_nodes(construct_dict) - self.data_dict.clear() - return self.graph - - def _init_nodes(self, construct_dict): - for subnode, upnode in construct_dict.items(): - if upnode: - up_node = self._get_or_create_node(NodeOp.get_node_op(upnode), upnode) - else: - up_node = self.graph.root - self._get_or_create_node(NodeOp.get_node_op(subnode), subnode, up_node) - - # todo 这个函数也得改改 - def _get_or_create_node(self, op, name, up_node=None): - if name not in self.graph.node_map: - # add data - base_node = BaseNode(op, name, up_node) - node_data = self.data_dict.get(name, {}) - input_args, input_kwargs, output = GraphParser._get_data_inputs_outputs(node_data) - # 添加输入输出数据 - GraphParser._add_node_data(node_data, base_node) - - # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 - # 这个东西必须改了,todo - data_info = GraphParser._get_node_data_info(input_args, input_kwargs, output) - base_node.data_info = data_info - self.graph.node_map[name] = base_node - elif up_node: - # 如果节点已经存在,但是我们刚刚获取了他的上级节点 - # todo 这里要加个函数 - self.graph.node_map[name].upnode = up_node - up_node.add_subnode(self.graph.node_map[name]) - return self.graph.node_map[name] - - @staticmethod - def _get_data_inputs_outputs(data_dict: dict): - input_args = data_dict.get('input_args', []) - input_kwargs = data_dict.get('input_kwargs', {}) - output = data_dict.get('output', []) - - input_args = input_args if isinstance(input_args, list) else [] - input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} - output = output if isinstance(output, list) else [] - return input_args, input_kwargs, output - - # todo 要加入basenode - @staticmethod - def _add_node_data(node_data, node): - input_data = {} - output_data = {} - op_parsed_list = read_op(node_data, node.type) - for item in op_parsed_list: - full_op_name = item.get('full_op_name', '') - if 'output' in full_op_name: - output_data[full_op_name] = item - else: - input_data[full_op_name] = item - node.input_data = input_data - node.output_data = output_data - - @staticmethod - def _get_data_info(item): - if isinstance(item, dict): - return str(item.get('type', 'na')) + '_' + str(item.get('dtype', 'na')) + '_' + str(item.get('shape', 'na')) - elif isinstance(item, (list, tuple)): - return str([GraphParser._get_data_info(sub_item) for sub_item in item]) - return '' - - @staticmethod - def _process_node_data_info(items): - info_str = '' - for item in items: - info_str += GraphParser._get_data_info(item) - return info_str - - @staticmethod - def _get_node_data_info(input_args, input_kwargs, output): - return GraphParser._process_node_data_info(input_args) + GraphParser._process_node_data_info(input_kwargs) + GraphParser._process_node_data_info(output) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py new file mode 100644 index 0000000000..d668598f1c --- /dev/null +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -0,0 +1,174 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, task_dumppath_get, _do_multi_process +from ..utils import GraphConst +from ....core.utils import print_info_log + + +def get_compare_mode(dump_path_param): + """ + 获得比较模式,包括summary、MD5和真实数据三种模式 + Args: + dump_path_param: 调用acc_compare接口所以来的参数结构 + Returns: 0 summary mode, 1 md5 mode, 2 true data mode + """ + summary_compare, md5_compare = task_dumppath_get(dump_path_param) + if summary_compare: + compare_mode = GraphConst.SUMMARY_COMPARE + elif md5_compare: + compare_mode = GraphConst.MD5_COMPARE + else: + compare_mode = GraphConst.REAL_DATA_COMPARE + return compare_mode + +def run_real_data(dump_path_param, csv_path): + """ + 多进程运行生成真实数据 + Args: + dump_path_param: 调用acc_compare接口所以来的参数结构 + csv_path: 生成文件路径 + """ + _do_multi_process(dump_path_param, csv_path) + +def parse_raw_data(data_dict: dict): + """ + 进行dump的原始数据解析,提取三个关键字段以进一步处理 + """ + input_args = data_dict.get('input_args', []) + input_kwargs = data_dict.get('input_kwargs', {}) + output = data_dict.get('output', []) + + input_args = input_args if isinstance(input_args, list) else [] + input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} + output = output if isinstance(output, list) else [] + + return input_args, input_kwargs, output + +def get_input_output(node_data, node_id): + """ + 将dump的原始数据进行拆解,分解为output和input两个数据 + Args: + node_data: 属于单个节点的dump数据 + node_id: 节点名字 + """ + input_data = {} + output_data = {} + op_parsed_list = read_op(node_data, node_id) + for item in op_parsed_list: + full_op_name = item.get('full_op_name', '') + if 'output' in full_op_name: + output_data[full_op_name] = item + else: + input_data[full_op_name] = item + return input_data, output_data + +def get_node_tag(inputs): + """ + 基于inputs生成节点专属tag,一次判断节点是否相同 + """ + result = "" + for single_input in inputs: + info = '' + for item in single_input: + info += _get_single_tag(item) + result += info + return result + +def format_node_data(data_dict): + """ + 批量进行节点数据的输出 + """ + del_list = ['requires_grad', 'data_name', 'full_op_name'] + for _, value in data_dict.items(): + if not isinstance(value, dict): + continue + for item in del_list: + if item in value: + del value[item] + _format_data(value) + return data_dict + +def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_md5_compare): + """ + 调用acc_compare.py中的get_accuracy获得精度对比指标 + 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list + """ + merge_n = _parse_node(node_ids[0], data_dicts[0], stack_json_data, is_summary_compare, is_md5_compare) + merge_b = _parse_node(node_ids[1], data_dicts[1], stack_json_data, is_summary_compare, is_md5_compare) + result = [] + get_accuracy(result, merge_n, merge_b, is_summary_compare, is_md5_compare) + return result + +def _get_single_tag(item): + if isinstance(item, dict): + return str(item.get('type', GraphConst.TAG_NA)) + '_' + str(item.get('dtype', GraphConst.TAG_NA)) + '_' + str(item.get('shape', GraphConst.TAG_NA)) + elif isinstance(item, (list, tuple)): + return str([_get_single_tag(sub_item) for sub_item in item]) + return '' + +def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_compare): + """ + 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + """ + op_parsed_list = read_op(data_dict.get(node_id, {}), node_id) + if node_id in stack_json_data: + op_parsed_list.append( + {'full_op_name': node_id, 'full_info': stack_json_data[node_id]}) + else: + op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) + return merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + +def _format_decimal_string(s): + """ + 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + """ + pattern = re.compile(r'\d{1,20}\.\d{1,20}%?') + matches = pattern.findall(s) + for match in matches: + is_percent = match.endswith('%') + number_str = match.rstrip('%') + decimal_part = number_str.split('.')[1] + # 如果小数位数大于6,进行处理 + if len(decimal_part) > GraphConst.ROUND_TH: + number_float = float(number_str) + if is_percent: + number_float /= 100 + formatted_number = f"{number_float:.{GraphConst.ROUND_TH}f}" + # 如果原来是百分数,加回百分号 + if is_percent: + formatted_number += '%' + # 替换原字符串中的数值部分 + s = s.replace(match, formatted_number) + return s + +def _format_data(data_dict): + """ + 格式化数据,小数保留6位,处理一些异常值 + """ + for key, value in data_dict.items(): + if isinstance(value, str): + # 将单引号删掉,None换成null避免前端解析错误 + value = value.replace("'", "").replace('None', 'null') + value = _format_decimal_string(value) + if value is None or value == ' ': + value = 'null' + if isinstance(value, float): + value = round(value, GraphConst.ROUND_TH) + if not isinstance(value, (list, tuple, dict, str)): + value = str(value) + data_dict[key] = value diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 51f6bc34d4..5426679ae8 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -14,14 +14,12 @@ # limitations under the License. import os -import json import pandas as pd -from ..utils import Suggestions, GraphConst, load_json_file, write_csv_data -from ..graph.node_op import NodeOp -from .comparator import Comparator -from ...compare.acc_compare import read_op, merge_tensor, get_accuracy -from ....core.utils import Const -from ...compare.acc_compare import task_dumppath_get, _do_multi_process +from ....core.utils import Const, print_info_log +from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data +from ..utils import GraphConst, load_json_file, load_data_json_file, write_csv_data +from ..graph.graph import Graph +from .mode_adapter import ModeAdapter class GraphComparator: @@ -29,124 +27,18 @@ class GraphComparator: self.graph_n = graphs[0] self.graph_b = graphs[1] self._parse_param(data_paths, stack_path, output_path) - - def _parse_param(self, data_paths, stack_path, output_path): - self.dump_path_param = { - 'npu_json_path': data_paths[0], - 'bench_json_path': data_paths[1], - 'stack_json_path': stack_path, - 'is_print_compare_log': True - } - self.output_path = output_path - summary_compare, md5_compare = task_dumppath_get(self.dump_path_param) - self.comparator = Comparator(summary_compare, md5_compare) - self.data_n_dict = load_json_file(data_paths[0]).get('data', {}) - self.data_b_dict = load_json_file(data_paths[1]).get('data', {}) - self.stack_json_data = load_json_file(stack_path) - - def compare(self): - self._compare_nodes(self.graph_n.root) - self._postcompare() - - def _postcompare(self): - if not self.comparator.is_real_data_compare(): - return - csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) - write_csv_data(csv_path, self.comparator.is_md5_compare(), self.comparator.is_summary_compare(), True, self.comparator.csv_data) - _do_multi_process(self.dump_path_param, csv_path) - # 从csv文件读取精度指标,添加到node节点中 - df = pd.read_csv(csv_path) - compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} - for node in self.comparator.compare_nodes: - precision_status, precision_index, _ = self.comparator.parse_result(node, [compare_data_dict]) - # todo 常量改成变量 - node.data['precision_status'] = precision_status - node.data['precision_index'] = precision_index - if not precision_status: - self.comparator.add_error_key(node.output_data) - self.add_suggestions(node) - if os.path.isfile(csv_path): - os.remove(csv_path) - - @staticmethod - def add_suggestions(node): - """ - 精度疑似有问题时,提供一些建议 - """ - if node.op == NodeOp.module: - node.suggestions['text'] = Suggestions.Module - node.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL - elif node.op == NodeOp.function_api: - node.suggestions['text'] = Suggestions.API - node.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL - - def _compare_nodes(self, node_n): - """ - 递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 - 这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 - Args: - node_n: NPU节点 - """ - # todo 这个函数也需要改改,把is_matched删掉, - is_matched, node_b, ancestors = GraphComparator._match_node(node_n, self.graph_b) - if is_matched: - ancestors.append(node_b.type) - node_n.matched_node_link = ancestors - node_b.matched_node_link = ancestors - # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 - compare_result_list = self.compare_node(node_n, node_b) - if compare_result_list: - self.comparator.add_csv_data(compare_result_list) - self.add_compare_result_to_node(node_n, compare_result_list) - for subnode in node_n.subnodes: - self._compare_nodes(subnode) - - - @staticmethod - def _match_node(node_n, graph_b): - """ - 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 - 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 - 返回匹配结果,匹配到的系欸但,以及祖先列表 - """ - if node_n.id not in graph_b.node_map: - return False, None, None - node_b = graph_b.node_map[node_n.id] - if node_n.data_info != node_b.data_info: - return False, None, None - ancestors_n = node_n.get_ancestors() - ancestors_b = node_b.get_ancestors() - if ancestors_n != ancestors_b: - return False, None, None - return True, node_b, ancestors_n - def compare_node(self, node_n, node_b): + def compare(self): """ - 调用acc_compare.py中的get_accuracy获得精度对比指标 - 真实数据对比模式无法获得精度对比指标,需要调用多进程比对接口 + 比较函数,初始化结束后单独调用。比较结果写入graph_n Args: - node_n: NPU节点 - node_b: Bench节点 - Returns: 包含参数信息和对比指标(真实数据对比模式除外)的list - """ - result = [] - # todo 写一个atat adpator - merge_n = self.parse_node(node_n, self.data_n_dict) - merge_b = self.parse_node(node_b, self.data_b_dict) - get_accuracy(result, merge_n, merge_b, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) - return result - - def parse_node(self, node, data_dict): - """ - 转换节点,使其能够作为acc_compare.py中的get_accuracy的入参 + construct_path: construct.json路径 + data_path: dump.json路径 + model_name: 模型名字,依赖外部输入 + Returns: Graph,代表图的数据结构 """ - op_parsed_list = read_op(data_dict.get(node.type, {}), node.type) - if node.type in self.stack_json_data: - op_parsed_list.append( - {'full_op_name': node.type, 'full_info': self.stack_json_data[node.type]}) - else: - op_parsed_list.append({'full_op_name': node.type, 'full_info': None}) - return merge_tensor(op_parsed_list, self.comparator.is_summary_compare(), self.comparator.is_md5_compare()) + self._compare_nodes(self.graph_n.root) + self._postcompare() def add_compare_result_to_node(self, node, compare_result_list): """ @@ -156,7 +48,7 @@ class GraphComparator: compare_result_list: 包含参数信息和对比指标(真实数据对比模式除外)的list """ # 真实数据比对,先暂存节点,在多进程对比得到精度指标后,再将指标添加到节点中 - if self.comparator.prepare_real_data(node): + if self.ma.prepare_real_data(node): return compare_in_dict = {} compare_out_dict = {} @@ -166,10 +58,64 @@ class GraphComparator: compare_out_dict[item[0]] = item else: compare_in_dict[item[0]] = item - precision_status, precision_index, other_dict = self.comparator.parse_result(node, [compare_in_dict, compare_out_dict]) - node.data['precision_status'] = precision_status - node.data['precision_index'] = precision_index + precision_status, precision_index, other_dict = self.ma.parse_result(node, [compare_in_dict, compare_out_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index node.data.update(other_dict) if not precision_status: - self.comparator.add_error_key(node.output_data) - self.add_suggestions(node) + self.ma.add_error_key(node.output_data) + node.add_suggestions() + + def _parse_param(self, data_paths, stack_path, output_path): + self.dump_path_param = { + 'npu_json_path': data_paths[0], + 'bench_json_path': data_paths[1], + 'stack_json_path': stack_path, + 'is_print_compare_log': True + } + self.output_path = output_path + compare_mode = get_compare_mode(self.dump_path_param) + self.ma = ModeAdapter(compare_mode) + self.data_n_dict = load_data_json_file(data_paths[0]) + self.data_b_dict = load_data_json_file(data_paths[1]) + self.stack_json_data = load_json_file(stack_path) + + def _postcompare(self): + if not self.ma.is_real_data_compare(): + return + csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) + try: + write_csv_data(csv_path, self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) + run_real_data(self.dump_path_param, csv_path) + # 从csv文件读取精度指标,添加到node节点中 + df = pd.read_csv(csv_path) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.ma.compare_nodes: + precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + if not precision_status: + self.ma.add_error_key(node.output_data) + node.add_suggestions() + except (FileNotFoundError, IOError) as e: + print_info_log('File error in _postcompare: {e}') + finally: + if os.path.isfile(csv_path): + os.remove(csv_path) + + def _compare_nodes(self, node_n): + #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 + #这里采用先序遍历,好处在于当这个节点被比较时,他的先序已经被匹配,这可以为后续的模糊匹配提供重要信息 + node_b, ancestors = Graph.match(self.graph_n, node_n, self.graph_b) + if node_b: + ancestors.append(node_b.id) + node_n.add_link(node_b, ancestors) + # 真实数据比对只会得到基本信息,并没有精度指标,需要调用多进程对比接口 + compare_result_list = compare_node([node_n.id, node_b.id], [self.data_n_dict, self.data_b_dict], + self.stack_json_data, self.ma.is_summary_compare(), + self.ma.is_md5_compare()) + if compare_result_list: + self.ma.add_csv_data(compare_result_list) + self.add_compare_result_to_node(node_n, compare_result_list) + for subnode in node_n.subnodes: + self._compare_nodes(subnode) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py similarity index 74% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py rename to debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index 21f0ae9f39..9d5e3aab4f 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -18,55 +18,142 @@ from ....core.utils import CompareConst, Const from ..utils import ToolTip, GraphConst, convert_percentage_to_float -class Comparator: - def __init__(self, summary_compare, md5_compare): - if summary_compare: #0 summary mode, 1 md5 mode, 2 true data mode - self.compare_mode = GraphConst.SUMMARY_COMPARE - elif md5_compare: - self.compare_mode = GraphConst.MD5_COMPARE - else: - self.compare_mode = GraphConst.REAL_DATA_COMPARE +class ModeAdapter: + def __init__(self, compare_mode): + self.compare_mode = compare_mode self.csv_data = [] self.compare_nodes = [] + @staticmethod + def _add_md5_compare_data(node_data, compare_data_dict): + precision_status = True + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [GraphConst.JSON_MD5_KEY] + headers = CompareConst.MD5_COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # md5比对是否通过 + if value.get(GraphConst.JSON_MD5_KEY) != CompareConst.PASS: + precision_status = False + node_data[key] = value + return precision_status + + @staticmethod + def _add_real_compare_data(node_data, compare_data_dict): + min_thousandth = float(1) + numbers = [] + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, + CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] + headers = CompareConst.COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # 获取一个节点所有的输入或输出最小的双千指标 + thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) + # 可能是None,可能是非数字内容str + try: + thousandth = float(thousandth) + except (ValueError, TypeError): + thousandth = None + if thousandth is not None: + numbers.append(thousandth) + node_data[key] = value + # 双千指标都是None的异常情况 + if not numbers: + min_thousandth = None + else: + min_thousandth = min(numbers + [min_thousandth]) + return min_thousandth + + @staticmethod + def _add_summary_compare_data( node_data, compare_data_dict): + precision_status = True + max_relative_err = 0 + for key, value in node_data.items(): + if not isinstance(value, dict): + continue + compare_data = compare_data_dict.get(key) + if compare_data: + # 对应比对结果csv的列 + key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, + CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, + CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] + headers = CompareConst.SUMMARY_COMPARE_RESULT_HEADER + id_list = [headers.index(x) for x in key_list] + ModeAdapter._match_data(value, compare_data, key_list, id_list) + # 相对误差大于0.5疑似有精度问题 + for item in key_list[4:]: + relative_err = convert_percentage_to_float(value.get(item)) + max_relative_err = max(max_relative_err, relative_err) + node_data[key] = value + if max_relative_err > GraphConst.MAX_RELATIVE_ERR_TH: + precision_status = False + max_relative_err = 1 if max_relative_err > 1 else max_relative_err + precision_index = 1 - max_relative_err + return precision_status, precision_index + + @staticmethod + def _match_data(data_dict, compare_data, key_list, id_list): + """ + 绑定精度指标到node的input_data和output_data + """ + if len(key_list) != len(id_list): + return + for id, key in zip(id_list, key_list): + data = compare_data[id] + if data is not None and 'nan' not in str(data) and str(data) != ' ': + data_dict[key] = data + else: + data_dict[key] = 'null' + def parse_result(self, node, compare_data_dict): """ 根据结果返回数据,分别是precision_status,precision_index,和附加数据 """ other_dict = {} if self.is_md5_compare(): - precision_status_in = Comparator.add_md5_compare_data(node.input_data, compare_data_dict[0]) - precision_status_out = Comparator.add_md5_compare_data(node.output_data, compare_data_dict[1]) + precision_status_in = ModeAdapter._add_md5_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out = ModeAdapter._add_md5_compare_data(node.output_data, compare_data_dict[1]) # 所有输入输出md5对比通过,这个节点才算通过 precision_status = precision_status_in and precision_status_out precision_index = 1 if precision_status else 0 other_result = CompareConst.PASS if precision_status else CompareConst.DIFF - other_dict['md5 Compare Result'] = other_result + other_dict[GraphConst.JSON_MD5_KEY] = other_result elif self.is_summary_compare(): - precision_status_in, precision_index_in = Comparator.add_summary_compare_data(node.input_data, compare_data_dict[0]) - precision_status_out, precision_index_out = Comparator.add_summary_compare_data(node.output_data, compare_data_dict[1]) + precision_status_in, precision_index_in = ModeAdapter._add_summary_compare_data(node.input_data, compare_data_dict[0]) + precision_status_out, precision_index_out = ModeAdapter._add_summary_compare_data(node.output_data, compare_data_dict[1]) precision_status = precision_status_in and precision_status_out precision_index = min(precision_index_in, precision_index_out) else: - min_thousandth_in = Comparator.add_real_compare_data(node.input_data, compare_data_dict[0]) - min_thousandth_out = Comparator.add_real_compare_data(node.output_data, compare_data_dict[0]) + min_thousandth_in = ModeAdapter._add_real_compare_data(node.input_data, compare_data_dict[0]) + min_thousandth_out = ModeAdapter._add_real_compare_data(node.output_data, compare_data_dict[0]) if min_thousandth_in and min_thousandth_out: change_percentage = abs(min_thousandth_in - min_thousandth_out) else: change_percentage = 0 precision_status = True - if change_percentage > 0.1: + if change_percentage > GraphConst.REAL_DATA_TH: precision_status = False precision_index = 0 if change_percentage > 1 else 1 - change_percentage return precision_status, precision_index, other_dict def prepare_real_data(self, node): + """ + 为真实数据比较模式准备节点信息 + """ if self.is_real_data_compare(): self.compare_nodes.append(node) return True return False - # todo 改成私有 def is_summary_compare(self): return self.compare_mode == GraphConst.SUMMARY_COMPARE @@ -82,6 +169,9 @@ class Comparator: self.csv_data.extend(compare_result_list) def add_error_key(self, node_data): + """ + 根据不同的模式进行提供不同错误信息 + """ for key, value in node_data.items(): if not isinstance(value, dict): continue @@ -91,7 +181,7 @@ class Comparator: elif self.is_real_data_compare(): message = [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] else: - # todo 这个应该是bug,应该修复 + # 输出件优化 message = [] value[GraphConst.ERROR_KEY] = message node_data[key] = value @@ -114,91 +204,5 @@ class Comparator: CompareConst.COSINE: ToolTip.COSINE, CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} - # todo 这个要放在外面去 + # 输出件优化 return json.dumps(tips) - - @staticmethod - def _match_data(data_dict, compare_data, key_list, id_list): - """ - 绑定精度指标到node的input_data和output_data - """ - if len(key_list) != len(id_list): - return - for i, key in enumerate(key_list): - data = compare_data[id_list[i]] - if data is not None and 'nan' not in str(data) and str(data) != ' ': - data_dict[key] = compare_data[id_list[i]] - else: - data_dict[key] = 'null' - - @staticmethod - def add_summary_compare_data( node_data, compare_data_dict): - precision_status = True - max_relative_err = 0 - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - compare_data = compare_data_dict.get(key) - if compare_data: - # 对应比对结果csv的列 - key_list = [CompareConst.MAX_DIFF, CompareConst.MIN_DIFF, CompareConst.MEAN_DIFF, - CompareConst.NORM_DIFF, CompareConst.MAX_RELATIVE_ERR, CompareConst.MIN_RELATIVE_ERR, - CompareConst.MEAN_RELATIVE_ERR, CompareConst.NORM_RELATIVE_ERR] - id_list = [6, 7, 8, 9, 10, 11, 12, 13] - Comparator._match_data(value, compare_data, key_list, id_list) - # 相对误差大于0.5疑似有精度问题 - for item in key_list[4:]: - relative_err = convert_percentage_to_float(value.get(item)) - max_relative_err = max(max_relative_err, relative_err) - node_data[key] = value - if max_relative_err > 0.5: - precision_status = False - precision_index = 1 - max_relative_err - return precision_status, precision_index - - @staticmethod - def add_md5_compare_data( node_data, compare_data_dict): - precision_status = True - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - compare_data = compare_data_dict.get(key) - if compare_data: - key_list = ['md5 Compare Result'] - id_list = [8] - Comparator._match_data(value, compare_data, key_list, id_list) - # md5比对是否通过 - if value.get('md5 Compare Result') != CompareConst.PASS: - precision_status = False - node_data[key] = value - return precision_status - - @staticmethod - def add_real_compare_data(node_data, compare_data_dict): - min_thousandth = float(1) - numbers = [] - for key, value in node_data.items(): - if not isinstance(value, dict): - continue - compare_data = compare_data_dict.get(key) - if compare_data: - key_list = [CompareConst.COSINE, CompareConst.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR, - CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO] - id_list = [6, 7, 8, 9, 10] - Comparator._match_data(value, compare_data, key_list, id_list) - # 获取一个节点所有的输入或输出最小的双千指标 - thousandth = value.get(CompareConst.ONE_THOUSANDTH_ERR_RATIO) - # 可能是None,可能是非数字内容str - try: - thousandth = float(thousandth) - except (ValueError, TypeError): - thousandth = None - if thousandth is not None: - numbers.append(thousandth) - node_data[key] = value - # 双千指标都是None的异常情况 - if not numbers: - min_thousandth = None - else: - min_thousandth = min(numbers + [min_thousandth]) - return min_thousandth \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 5628e3e0ee..84bba4d17b 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -13,116 +13,81 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re +from ..utils import Suggestions, GraphConst +from ..builder.msprobe_adapter import format_node_data class BaseNode: def __init__(self, node_op, node_id, up_node=None): self.op = node_op - self.type = node_id self.id = node_id self.data = {} - self.outputs = [] - self.inputs = [] self.output_data = {} self.input_data = {} - self.upnode = up_node + self.upnode = None + self.add_upnode(up_node) self.subnodes = [] - if up_node: - up_node.add_subnode(self) - self.is_forward = True - self.pair = None self.matched_node_link = [] - self.data_info = '' + self.tag = '' self.suggestions = {} - # todo 这些都在做什么,都应该确认一下 def __str__(self): info = f'id:\t{self.id}' return info - - @staticmethod - def _handle_item(data_dict): - del_list = ['requires_grad', 'data_name', 'full_op_name'] - for key, value in data_dict.items(): - if not isinstance(value, dict): - continue - for item in del_list: - if item in value: - del value[item] - BaseNode._format_data(value) - - return data_dict - @staticmethod - def _format_data(data_dict): + def get_suggestions(self): """ - 格式化数据,小数保留6位,处理一些异常值 + 精度疑似有问题时,提供一些建议 """ - for key, value in data_dict.items(): - if isinstance(value, str): - # 将单引号删掉,None换成null避免前端解析错误 - value = value.replace("'", "").replace('None', 'null') - value = BaseNode._format_decimal_string(value) - if value is None or value == ' ': - value = 'null' - if isinstance(value, float): - value = round(value, 6) - if not isinstance(value, (list, tuple, dict, str)): - value = str(value) - data_dict[key] = value + if self.op == NodeOp.module: + self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.Module + self.suggestions[Suggestions.PTDBG] = Suggestions.PTDBG_URL + elif self.op == NodeOp.function_api: + self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.API + self.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL + + def set_input_output_tag(self, input_data, output_data, tag): + self.input_data = input_data + self.output_data = output_data + self.tag = tag - @staticmethod - def _format_decimal_string(s): + def add_upnode(self, node): """ - 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 + 绑定upnode,用于对两个节点进行上下级关联 """ - pattern = re.compile(r'\d+\.\d+%?') - matches = pattern.findall(s) - for match in matches: - is_percent = match.endswith('%') - number_str = match.rstrip('%') - decimal_part = number_str.split('.')[1] - # 如果小数位数大于6,进行处理 - if len(decimal_part) > 6: - number_float = float(number_str) - if is_percent: - number_float /= 100 - formatted_number = f"{number_float:.6f}" - # 如果原来是百分数,加回百分号 - if is_percent: - formatted_number += '%' - # 替换原字符串中的数值部分 - s = s.replace(match, formatted_number) - return s - - def get_info(self): - info = f'{self.id}\t{self.op}' - if not self.is_forward: - info += '(b)' - for key in self.data: - info += f'\n{key}:\t{self.data.get(key)}' - return info - - def add_subnode(self, node): - if node.id == self.id: + if not node or node.id == self.id or self.upnode: return - self.subnodes.append(node) + self.upnode = node + node.subnodes.append(self) + + def add_link(self, node, ancestors): + """ + 在节点匹配成功后进行匹配数据的录入 + Args: + node: 和self相互匹配的节点 + ancestors: 对面节点的祖先信息 + """ + self.matched_node_link = ancestors + node.matched_node_link = ancestors def get_yaml_dict(self): + """ + 输出数据 + """ + # 输出件优化 result = {} result['id'] = self.id result['node_type'] = self.op.value - result['type'] = self.type + result['type'] = self.id result['data'] = self.data - result['output_data'] = self._handle_item(self.output_data) - result['input_data'] = self._handle_item(self.input_data) - result['outputs'] = [(edge_id, node.id) for edge_id, node in self.outputs] - result['inputs'] = [(edge_id, node.id) for edge_id, node in self.inputs] + result['output_data'] = format_node_data(self.output_data) + result['input_data'] = format_node_data(self.input_data) + result['outputs'] = [] + result['inputs'] = [] result['upnode'] = self.upnode.id if self.upnode else 'None' result['subnodes'] = [node.id for node in self.subnodes] - result['is_forward'] = self.is_forward - result['pair'] = self.pair.id if self.pair else 'None' + result['is_forward'] = True + result['pair'] = 'None' result['matched_node_link'] = self.matched_node_link result['suggestions'] = self.suggestions return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index 347e8c2c88..b53a691c02 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -13,16 +13,85 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .base_node import BaseNode +from .node_op import NodeOp +from ..utils import GraphConst + class Graph: - # todo,这里应该加入一些和图相关的操作 - # 可以把root node 的初始化放进Graph里面 - def __init__(self): - self.root = None + def __init__(self, model_name): self.node_map = {} + self.add_node(NodeOp.module, model_name) + self.root = self.get_node(model_name) def __str__(self): infos = [f'{str(self.node_map.get(node_id))}' for node_id in self.node_map] info = "\n".join(infos) return info - \ No newline at end of file + + @staticmethod + def match(graph_n, node_n, graph_b): + """ + 给定节点n,在另一个graph中匹配它对应的节点。前置条件是它的父节点匹配已经完成 + 目前采用完全匹配的方式,后续可能在这里加入一定的模糊匹配逻辑 + 返回匹配结果,匹配到的节点,以及祖先树。没匹配到则返回None, [] + """ + if not node_n or node_n.id not in graph_b.node_map: + return None, [] + node_b = graph_b.node_map.get(node_n.id) + if node_n.tag != node_b.tag: + return None, [] + + ancestors_n = node_n.get_ancestors() + ancestors_b = node_b.get_ancestors() + if ancestors_n != ancestors_b: + return None, [] + return node_b, ancestors_n + + @staticmethod + def dfs(node, result): + info = node.get_yaml_dict() + result[node.id] = info + for subnode in node.subnodes: + Graph.dfs(subnode, result) + + def add_node(self, node_op, node_id, up_node=None): + """ + 在graph中进行节点的添加 + Args: + node_op: 需要添加的节点类型 + node_id: 需要添加的节点id + up_node:对应节点的父节点 + """ + if node_id in self.node_map: + return self.node_map.get(node_id) + node = BaseNode(node_op, node_id, up_node) + self.node_map[node_id] = node + + def get_node(self, node_id): + """ + 返回节点,不存在返回None + """ + return self.node_map.get(node_id, None) + + def get_dict(self): + """ + 用于数据输出 + """ + result = {} + result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' + result[GraphConst.JSON_NODE_KEY] = {} + for node_id in self.node_map: + info = self.node_map.get(node_id).get_yaml_dict() + result[GraphConst.JSON_NODE_KEY][node_id] = info + return result + + def get_dict2(self): + # 输出件优化 + # 为了防止输出件变动临时使用方法,会在后续重构中删除 + # 递归遍历,在正式交付中尽量避免 + result = {} + result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' + result[GraphConst.JSON_NODE_KEY] = {} + Graph.dfs(self.root, result[GraphConst.JSON_NODE_KEY]) + return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index 015a83abda..ad7be35f0c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -26,6 +26,9 @@ class NodeOp(Enum): @staticmethod def get_node_op(node_name: str): + """ + 基于代表节点的字符串,解析节点种类 + """ pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' if re.match(pattern, node_name): return NodeOp.function_api diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/atat/pytorch/visualization/test.py index 3bd43362a2..dcd0b9d1f9 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/test.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/test.py @@ -21,6 +21,8 @@ import filecmp from .compare.graph_comparator import GraphComparator from .utils import GraphConst from .builder.graph_builder import GraphBuilder +from ...core.utils import print_info_log +from ...core.file_check_util import FileOpen, create_directory def compare_graph(dump_path_n, dump_path_b, out_path): @@ -36,7 +38,8 @@ def compare_graph(dump_path_n, dump_path_b, out_path): graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) graph_comparator.compare() output_path = os.path.join(out_path, 'compare.vis') - GraphBuilder.export_graphs_to_json(output_path, graph_n, graph_b, graph_comparator.comparator.get_tool_tip()) + GraphBuilder.export_to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) + def build_graph(dump_path, out_path): construct_path = os.path.join(dump_path, GraphConst.CONSTRUCT_FILE) @@ -45,11 +48,12 @@ def build_graph(dump_path, out_path): graph = GraphBuilder.build(construct_path, data_path, 'TestNet') GraphBuilder.export_to_json(output_path, graph) + def run_st(data_path): start_time = time.time() run_bench(data_path, 'output2') end_time = time.time() - print('run_st time cost:', end_time - start_time) + print_info_log(f'run_st time cost: {end_time - start_time}') # 比较output2的结果和output1 的bench结果差距 for data_dir in os.listdir(data_path): data_dir = os.path.join(data_path, data_dir) @@ -63,9 +67,10 @@ def run_st(data_path): file2 = os.path.join(output2, vis_file) result = filecmp.cmp(file1, file2) if result: - print('pass ' + file1) + print_info_log('pass ' + file1) else: - print('not pass ' + file1) + print_info_log('not pass ' + file1) + def run_bench(data_path, output_dir): for data_dir in os.listdir(data_path): @@ -76,6 +81,6 @@ def run_bench(data_path, output_dir): output_path = os.path.join(data_dir, output_dir) if os.path.exists(output_path): shutil.rmtree(output_path) - os.makedirs(output_path) + create_directory(output_path) build_graph(run_data_path, output_path) compare_graph(run_data_path, run_data_path, output_path) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index ab02ef7d20..6505ce2990 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -21,6 +21,9 @@ from ..compare.acc_compare import result_to_csv def load_json_file(file_path): + """ + 加载json文件 + """ try: with FileOpen(file_path, 'r') as f: file_dict = json.load(f) @@ -30,14 +33,32 @@ def load_json_file(file_path): except json.JSONDecodeError: return {} + +def load_data_json_file(file_path): + """ + 加载dump.json中的data字段 + """ + return load_json_file(file_path).get(GraphConst.DATA_KEY, {}) + + def save_json_file(file_path, data): + """ + 保存json文件 + """ with FileOpen(file_path, 'w') as f: f.write(json.dumps(data, indent=4)) + def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): + """ + 调用acc接口写入csv + """ + if os.path.exists(csv_path): + os.remove(csv_path) with os.fdopen(os.open(csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), 'w+') as f: result_to_csv(md5_compare, summary_compare, stack, csv_data, f) + def convert_percentage_to_float(percentage_str): """ 百分比字符串转换转换为浮点型 @@ -82,4 +103,18 @@ class GraphConst: ERROR_KEY = 'error_key' SUMMARY_COMPARE = 0 MD5_COMPARE = 1 - REAL_DATA_COMPARE = 2 \ No newline at end of file + REAL_DATA_COMPARE = 2 + JSON_NPU_KEY = 'NPU' + JSON_BENCH_KEY = 'Bench' + JSON_TIP_KEY = 'Tooltip' + JSON_MD5_KEY = 'md5 Compare Result' + JSON_ROOT_KEY = 'root' + JSON_NODE_KEY = 'node' + DATA_KEY = 'data' + REAL_DATA_TH = 0.1 + MAX_RELATIVE_ERR_TH = 0.5 + ROUND_TH = 6 + JSON_STATUS_KEY = 'precision_status' + JSON_INDEX_KEY = 'precision_index' + SUGGEST_KEY = 'text' + TAG_NA = 'na' -- Gitee From 85e6d5410b2065a54e85f1fa2ed028c30439319a Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:29:53 +0800 Subject: [PATCH 025/141] =?UTF-8?q?NodeOp=E8=A7=A3=E6=9E=90=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E3=80=81BaseNode=E6=AF=94=E8=BE=83=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=8C=E7=A7=BB=E9=99=A4tag=E5=B1=9E?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../visualization/builder/graph_builder.py | 17 ++--- .../visualization/builder/msprobe_adapter.py | 67 ++++++++++--------- .../visualization/compare/mode_adapter.py | 4 +- .../pytorch/visualization/graph/base_node.py | 18 +++-- .../atat/pytorch/visualization/graph/graph.py | 13 ++-- .../pytorch/visualization/graph/node_op.py | 17 ++--- .../atat/pytorch/visualization/test.py | 4 +- .../atat/pytorch/visualization/utils.py | 5 +- 8 files changed, 78 insertions(+), 67 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 8cce30f0d2..85634a461c 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -16,7 +16,7 @@ from ..graph.graph import Graph from ..graph.node_op import NodeOp from ..utils import load_json_file, load_data_json_file, save_json_file, GraphConst -from .msprobe_adapter import parse_raw_data, get_input_output, get_node_tag +from .msprobe_adapter import get_input_output class GraphBuilder: @@ -37,9 +37,9 @@ class GraphBuilder: return graph @staticmethod - def export_to_json(filename, graph_n, graph_b=None, tool_tip=None): + def to_json(filename, graph_n, graph_b=None, tool_tip=None): """ - 将graph到处成.vis文件的接口 + 将graph导出成.vis文件的接口 Args: filename: 输出文件路径 graph_n: Graph @@ -48,10 +48,10 @@ class GraphBuilder: """ result = {} if graph_b: - result[GraphConst.JSON_NPU_KEY] = graph_n.get_dict2() - result[GraphConst.JSON_BENCH_KEY] = graph_b.get_dict2() + result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict2() + result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict2() else: - result = graph_n.get_dict2() + result = graph_n.to_dict2() if tool_tip: result[GraphConst.JSON_TIP_KEY] = tool_tip save_json_file(filename, result) @@ -75,13 +75,10 @@ class GraphBuilder: graph.add_node(op, name, upnode) node = graph.get_node(name) node_data = data_dict.get(name, {}) - input_args, input_kwargs, output = parse_raw_data(node_data) # 添加输入输出数据 input_data, output_data = get_input_output(node_data, node.id) - # 添加输入输出数据信息组成的标识,用来匹配npu和标杆的节点 - tag = get_node_tag([input_args, input_kwargs, output]) # 跟新数据 - node.set_input_output_tag(input_data, output_data, tag) + node.set_input_output(input_data, output_data) # 添加节点 node.add_upnode(upnode) return node \ No newline at end of file diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index d668598f1c..960e1c4f1a 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -19,6 +19,13 @@ from ..utils import GraphConst from ....core.utils import print_info_log +# 用于将节点名字解析成对应的NodeOp的规则 +op_patterns = [ + r'^(Module)', #NodeOp.module + r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' #NodeOp.function_api +] + + def get_compare_mode(dump_path_param): """ 获得比较模式,包括summary、MD5和真实数据三种模式 @@ -35,6 +42,7 @@ def get_compare_mode(dump_path_param): compare_mode = GraphConst.REAL_DATA_COMPARE return compare_mode + def run_real_data(dump_path_param, csv_path): """ 多进程运行生成真实数据 @@ -44,19 +52,6 @@ def run_real_data(dump_path_param, csv_path): """ _do_multi_process(dump_path_param, csv_path) -def parse_raw_data(data_dict: dict): - """ - 进行dump的原始数据解析,提取三个关键字段以进一步处理 - """ - input_args = data_dict.get('input_args', []) - input_kwargs = data_dict.get('input_kwargs', {}) - output = data_dict.get('output', []) - - input_args = input_args if isinstance(input_args, list) else [] - input_kwargs = input_kwargs if isinstance(input_kwargs, dict) else {} - output = output if isinstance(output, list) else [] - - return input_args, input_kwargs, output def get_input_output(node_data, node_id): """ @@ -70,23 +65,36 @@ def get_input_output(node_data, node_id): op_parsed_list = read_op(node_data, node_id) for item in op_parsed_list: full_op_name = item.get('full_op_name', '') - if 'output' in full_op_name: + if not full_op_name: + continue + splits = full_op_name.split('.') + if len(splits) <= GraphConst.OUTPUT_INDEX: + continue + if 'output' in splits[GraphConst.OUTPUT_INDEX]: output_data[full_op_name] = item else: input_data[full_op_name] = item return input_data, output_data -def get_node_tag(inputs): - """ - 基于inputs生成节点专属tag,一次判断节点是否相同 - """ - result = "" - for single_input in inputs: - info = '' - for item in single_input: - info += _get_single_tag(item) - result += info - return result + +def compare_data(data_dict1, data_dict2): + """ + 比较get_input_output中输出的结果是否结构一致,比较一致返回True + """ + if len(data_dict1) != len(data_dict2): + return False + # 用于比较两个节点是否相等的关键字段 + tag_keys = ['type', 'dtype', 'shape'] + for key1, key2 in zip(data_dict1, data_dict2): + dict1 = data_dict1[key1] + dict2 = data_dict2[key2] + for tag_key in tag_keys: + tag_value1 = dict1.get(tag_key, None) + tag_value2 = dict2.get(tag_key, None) + if tag_value1 != tag_value2: + return False + return True + def format_node_data(data_dict): """ @@ -102,6 +110,7 @@ def format_node_data(data_dict): _format_data(value) return data_dict + def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_md5_compare): """ 调用acc_compare.py中的get_accuracy获得精度对比指标 @@ -114,12 +123,6 @@ def compare_node(node_ids, data_dicts, stack_json_data, is_summary_compare, is_m get_accuracy(result, merge_n, merge_b, is_summary_compare, is_md5_compare) return result -def _get_single_tag(item): - if isinstance(item, dict): - return str(item.get('type', GraphConst.TAG_NA)) + '_' + str(item.get('dtype', GraphConst.TAG_NA)) + '_' + str(item.get('shape', GraphConst.TAG_NA)) - elif isinstance(item, (list, tuple)): - return str([_get_single_tag(sub_item) for sub_item in item]) - return '' def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_compare): """ @@ -133,6 +136,7 @@ def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_ op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) return merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + def _format_decimal_string(s): """ 使用正则表达式匹配包含数字、小数点和可选的百分号的字符串 @@ -156,6 +160,7 @@ def _format_decimal_string(s): s = s.replace(match, formatted_number) return s + def _format_data(data_dict): """ 格式化数据,小数保留6位,处理一些异常值 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index 9d5e3aab4f..18c6bac6ba 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -15,7 +15,7 @@ import json from ....core.utils import CompareConst, Const -from ..utils import ToolTip, GraphConst, convert_percentage_to_float +from ..utils import ToolTip, GraphConst, str2float class ModeAdapter: @@ -91,7 +91,7 @@ class ModeAdapter: ModeAdapter._match_data(value, compare_data, key_list, id_list) # 相对误差大于0.5疑似有精度问题 for item in key_list[4:]: - relative_err = convert_percentage_to_float(value.get(item)) + relative_err = str2float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value if max_relative_err > GraphConst.MAX_RELATIVE_ERR_TH: diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 84bba4d17b..74868f83db 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -14,7 +14,7 @@ # limitations under the License. from ..utils import Suggestions, GraphConst -from ..builder.msprobe_adapter import format_node_data +from ..builder.msprobe_adapter import format_node_data, compare_data class BaseNode: @@ -28,12 +28,21 @@ class BaseNode: self.add_upnode(up_node) self.subnodes = [] self.matched_node_link = [] - self.tag = '' self.suggestions = {} def __str__(self): info = f'id:\t{self.id}' return info + + def __eq__(self,other): + """ + 用来判断两个节点是否可以被匹配上,认为结构上是否一致 + """ + if not compare_data(self.input_data, other.input_data): + return False + if not compare_data(self.output_data, other.output_data): + return False + return True def get_suggestions(self): """ @@ -46,10 +55,9 @@ class BaseNode: self.suggestions[GraphConst.SUGGEST_KEY] = Suggestions.API self.suggestions[Suggestions.API_ACCURACY_CHECKER] = Suggestions.API_ACCURACY_CHECKER_URL - def set_input_output_tag(self, input_data, output_data, tag): + def set_input_output(self, input_data, output_data): self.input_data = input_data self.output_data = output_data - self.tag = tag def add_upnode(self, node): """ @@ -70,7 +78,7 @@ class BaseNode: self.matched_node_link = ancestors node.matched_node_link = ancestors - def get_yaml_dict(self): + def to_dict(self): """ 输出数据 """ diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index b53a691c02..7af11fceec 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -39,9 +39,8 @@ class Graph: if not node_n or node_n.id not in graph_b.node_map: return None, [] node_b = graph_b.node_map.get(node_n.id) - if node_n.tag != node_b.tag: + if node_n != node_b: return None, [] - ancestors_n = node_n.get_ancestors() ancestors_b = node_b.get_ancestors() if ancestors_n != ancestors_b: @@ -50,7 +49,7 @@ class Graph: @staticmethod def dfs(node, result): - info = node.get_yaml_dict() + info = node.to_dict() result[node.id] = info for subnode in node.subnodes: Graph.dfs(subnode, result) @@ -64,7 +63,7 @@ class Graph: up_node:对应节点的父节点 """ if node_id in self.node_map: - return self.node_map.get(node_id) + return node = BaseNode(node_op, node_id, up_node) self.node_map[node_id] = node @@ -74,7 +73,7 @@ class Graph: """ return self.node_map.get(node_id, None) - def get_dict(self): + def to_dict(self): """ 用于数据输出 """ @@ -82,11 +81,11 @@ class Graph: result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' result[GraphConst.JSON_NODE_KEY] = {} for node_id in self.node_map: - info = self.node_map.get(node_id).get_yaml_dict() + info = self.node_map.get(node_id).to_dict() result[GraphConst.JSON_NODE_KEY][node_id] = info return result - def get_dict2(self): + def to_dict2(self): # 输出件优化 # 为了防止输出件变动临时使用方法,会在后续重构中删除 # 递归遍历,在正式交付中尽量避免 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index ad7be35f0c..a5bf8a4438 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -15,22 +15,23 @@ from enum import Enum import re +from ..builder.msprobe_adapter import op_patterns class NodeOp(Enum): module = 1 function_api = 2 - module_api = 3 - tensor = 4 - output = 5 @staticmethod def get_node_op(node_name: str): """ 基于代表节点的字符串,解析节点种类 """ - pattern = r'^(Tensor|Torch|Functional|NPU|VF|Distributed|Aten)' - if re.match(pattern, node_name): - return NodeOp.function_api - else: - return NodeOp.module + for op in NodeOp: + index = op.value - 1 + if index < 0 or index >= len(op_patterns): + raise Exception("NodeOp and op_patterns in MsprobeAdapter do not match") + pattern = op_patterns[index] + if re.match(pattern, node_name): + return op + raise Exception("Cannot parse node_name {node_name} into NodeOp") diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/atat/pytorch/visualization/test.py index dcd0b9d1f9..9784a96b80 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/test.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/test.py @@ -38,7 +38,7 @@ def compare_graph(dump_path_n, dump_path_b, out_path): graph_comparator = GraphComparator([graph_n, graph_b], [data_path_n, data_path_b], stack_path, out_path) graph_comparator.compare() output_path = os.path.join(out_path, 'compare.vis') - GraphBuilder.export_to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) + GraphBuilder.to_json(output_path, graph_n, graph_b, graph_comparator.ma.get_tool_tip()) def build_graph(dump_path, out_path): @@ -46,7 +46,7 @@ def build_graph(dump_path, out_path): data_path = os.path.join(dump_path, GraphConst.DUMP_FILE) output_path = os.path.join(out_path, 'build.vis') graph = GraphBuilder.build(construct_path, data_path, 'TestNet') - GraphBuilder.export_to_json(output_path, graph) + GraphBuilder.to_json(output_path, graph) def run_st(data_path): diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 6505ce2990..692ac63270 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -59,7 +59,7 @@ def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): result_to_csv(md5_compare, summary_compare, stack, csv_data, f) -def convert_percentage_to_float(percentage_str): +def str2float(percentage_str): """ 百分比字符串转换转换为浮点型 Args: @@ -67,7 +67,7 @@ def convert_percentage_to_float(percentage_str): Returns: float 0.00, 0.234 """ try: - percentage_str = percentage_str.replace('%', '') + percentage_str = percentage_str.strip('%') return float(percentage_str) / 100 except ValueError: return 0 @@ -118,3 +118,4 @@ class GraphConst: JSON_INDEX_KEY = 'precision_index' SUGGEST_KEY = 'text' TAG_NA = 'na' + OUTPUT_INDEX = -2 -- Gitee From 1744839f6e88fecb4c5e3f14d014c6d885760fcf Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Wed, 10 Jul 2024 10:30:49 +0800 Subject: [PATCH 026/141] =?UTF-8?q?=E9=80=82=E9=85=8Dacc=5Fcompare?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/atat/pytorch/__init__.py | 3 +- .../visualization/builder/msprobe_adapter.py | 7 +++-- .../visualization/compare/graph_comparator.py | 31 +++++++------------ .../atat/pytorch/visualization/utils.py | 8 ++--- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/__init__.py b/debug/accuracy_tools/atat/pytorch/__init__.py index 198cea96de..dcdf4cb3a3 100644 --- a/debug/accuracy_tools/atat/pytorch/__init__.py +++ b/debug/accuracy_tools/atat/pytorch/__init__.py @@ -2,4 +2,5 @@ from .debugger.precision_debugger import PrecisionDebugger from .common.utils import seed_all from .compare.acc_compare import compare from .compare.distributed_compare import compare_distributed -from .visualization.json_parse_graph import compare_graph, build_graph +from .visualization.builder.graph_builder import GraphBuilder +from .visualization.compare.graph_comparator import GraphComparator diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 960e1c4f1a..56f9e4da61 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -50,7 +50,7 @@ def run_real_data(dump_path_param, csv_path): dump_path_param: 调用acc_compare接口所以来的参数结构 csv_path: 生成文件路径 """ - _do_multi_process(dump_path_param, csv_path) + return _do_multi_process(dump_path_param, csv_path) def get_input_output(node_data, node_id): @@ -134,7 +134,10 @@ def _parse_node(node_id, data_dict, stack_json_data, is_summary_compare, is_md5_ {'full_op_name': node_id, 'full_info': stack_json_data[node_id]}) else: op_parsed_list.append({'full_op_name': node_id, 'full_info': None}) - return merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + result = merge_tensor(op_parsed_list, is_summary_compare, is_md5_compare) + if not result: + result['op_name'] = [] + return result def _format_decimal_string(s): diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 5426679ae8..94c6f76bb9 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -17,7 +17,7 @@ import os import pandas as pd from ....core.utils import Const, print_info_log from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data -from ..utils import GraphConst, load_json_file, load_data_json_file, write_csv_data +from ..utils import GraphConst, load_json_file, load_data_json_file, get_csv_df from ..graph.graph import Graph from .mode_adapter import ModeAdapter @@ -83,25 +83,16 @@ class GraphComparator: def _postcompare(self): if not self.ma.is_real_data_compare(): return - csv_path = os.path.join(self.output_path, GraphConst.CSV_FILE) - try: - write_csv_data(csv_path, self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) - run_real_data(self.dump_path_param, csv_path) - # 从csv文件读取精度指标,添加到node节点中 - df = pd.read_csv(csv_path) - compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} - for node in self.ma.compare_nodes: - precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) - node.data[GraphConst.JSON_STATUS_KEY] = precision_status - node.data[GraphConst.JSON_INDEX_KEY] = precision_index - if not precision_status: - self.ma.add_error_key(node.output_data) - node.add_suggestions() - except (FileNotFoundError, IOError) as e: - print_info_log('File error in _postcompare: {e}') - finally: - if os.path.isfile(csv_path): - os.remove(csv_path) + df = get_csv_df(self.ma.is_md5_compare(), self.ma.is_summary_compare(), True, self.ma.csv_data) + df = run_real_data(self.dump_path_param, df) + compare_data_dict = {row[0]: row.tolist() for _, row in df.iterrows()} + for node in self.ma.compare_nodes: + precision_status, precision_index, _ = self.ma.parse_result(node, [compare_data_dict]) + node.data[GraphConst.JSON_STATUS_KEY] = precision_status + node.data[GraphConst.JSON_INDEX_KEY] = precision_index + if not precision_status: + self.ma.add_error_key(node.output_data) + node.add_suggestions() def _compare_nodes(self, node_n): #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 692ac63270..3dda565388 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -49,14 +49,11 @@ def save_json_file(file_path, data): f.write(json.dumps(data, indent=4)) -def write_csv_data(csv_path, md5_compare, summary_compare, stack, csv_data): +def get_csv_df(md5_compare, summary_compare, stack, csv_data): """ 调用acc接口写入csv """ - if os.path.exists(csv_path): - os.remove(csv_path) - with os.fdopen(os.open(csv_path, os.O_RDWR | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP), 'w+') as f: - result_to_csv(md5_compare, summary_compare, stack, csv_data, f) + return result_to_csv(md5_compare, summary_compare, stack, csv_data, None) def str2float(percentage_str): @@ -99,7 +96,6 @@ class GraphConst: DUMP_FILE = 'dump.json' STACK_FILE = 'stack.json' GRAPH_FILE = 'graph.vis' - CSV_FILE = 'tmp.csv' ERROR_KEY = 'error_key' SUMMARY_COMPARE = 0 MD5_COMPARE = 1 -- Gitee From f1f56e778ef60787b3aed8217805920be96b5eee Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Thu, 11 Jul 2024 12:31:09 +0800 Subject: [PATCH 027/141] =?UTF-8?q?=E6=A3=80=E8=A7=86=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/graph_builder.py | 2 +- .../visualization/builder/msprobe_adapter.py | 14 +++++++------- .../visualization/compare/graph_comparator.py | 5 ----- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 85634a461c..9edc260589 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -77,7 +77,7 @@ class GraphBuilder: node_data = data_dict.get(name, {}) # 添加输入输出数据 input_data, output_data = get_input_output(node_data, node.id) - # 跟新数据 + # 更新数据 node.set_input_output(input_data, output_data) # 添加节点 node.add_upnode(upnode) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 56f9e4da61..cb39538ac9 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -30,7 +30,7 @@ def get_compare_mode(dump_path_param): """ 获得比较模式,包括summary、MD5和真实数据三种模式 Args: - dump_path_param: 调用acc_compare接口所以来的参数结构 + dump_path_param: 调用acc_compare接口所依赖的参数 Returns: 0 summary mode, 1 md5 mode, 2 true data mode """ summary_compare, md5_compare = task_dumppath_get(dump_path_param) @@ -47,7 +47,7 @@ def run_real_data(dump_path_param, csv_path): """ 多进程运行生成真实数据 Args: - dump_path_param: 调用acc_compare接口所以来的参数结构 + dump_path_param: 调用acc_compare接口所依赖的参数 csv_path: 生成文件路径 """ return _do_multi_process(dump_path_param, csv_path) @@ -77,17 +77,17 @@ def get_input_output(node_data, node_id): return input_data, output_data -def compare_data(data_dict1, data_dict2): +def compare_data(data_dict_list1, data_dict_list2): """ 比较get_input_output中输出的结果是否结构一致,比较一致返回True """ - if len(data_dict1) != len(data_dict2): + if len(data_dict_list1) != len(data_dict_list2): return False # 用于比较两个节点是否相等的关键字段 tag_keys = ['type', 'dtype', 'shape'] - for key1, key2 in zip(data_dict1, data_dict2): - dict1 = data_dict1[key1] - dict2 = data_dict2[key2] + for key1, key2 in zip(data_dict_list1, data_dict_list2): + dict1 = data_dict_list1[key1] + dict2 = data_dict_list2[key2] for tag_key in tag_keys: tag_value1 = dict1.get(tag_key, None) tag_value2 = dict2.get(tag_key, None) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 94c6f76bb9..221b091ec6 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -31,11 +31,6 @@ class GraphComparator: def compare(self): """ 比较函数,初始化结束后单独调用。比较结果写入graph_n - Args: - construct_path: construct.json路径 - data_path: dump.json路径 - model_name: 模型名字,依赖外部输入 - Returns: Graph,代表图的数据结构 """ self._compare_nodes(self.graph_n.root) self._postcompare() -- Gitee From ae12e7df789067d3d80d3d57153d54f17f1b8412 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 15 Jul 2024 16:51:36 +0800 Subject: [PATCH 028/141] =?UTF-8?q?=E5=A4=84=E7=90=86=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E5=B7=B2=E7=9F=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 6 ++++-- .../atat/pytorch/visualization/compare/graph_comparator.py | 4 ++-- .../atat/pytorch/visualization/graph/base_node.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index cb39538ac9..c73a7c5879 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -153,8 +153,6 @@ def _format_decimal_string(s): # 如果小数位数大于6,进行处理 if len(decimal_part) > GraphConst.ROUND_TH: number_float = float(number_str) - if is_percent: - number_float /= 100 formatted_number = f"{number_float:.{GraphConst.ROUND_TH}f}" # 如果原来是百分数,加回百分号 if is_percent: @@ -168,6 +166,7 @@ def _format_data(data_dict): """ 格式化数据,小数保留6位,处理一些异常值 """ + pattern = r'^[+-]?(\d+(.\d*)?|.\d+)([eE][+-]?\d+)$' for key, value in data_dict.items(): if isinstance(value, str): # 将单引号删掉,None换成null避免前端解析错误 @@ -175,6 +174,9 @@ def _format_data(data_dict): value = _format_decimal_string(value) if value is None or value == ' ': value = 'null' + # 科学计数法1.123123123123e-11,格式化为1.123123e-11 + if isinstance(value, float) and re.match(pattern, str(value)): + value = "{:.6e}".format(value) if isinstance(value, float): value = round(value, GraphConst.ROUND_TH) if not isinstance(value, (list, tuple, dict, str)): diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py index 221b091ec6..18b905f338 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py @@ -59,7 +59,7 @@ class GraphComparator: node.data.update(other_dict) if not precision_status: self.ma.add_error_key(node.output_data) - node.add_suggestions() + node.get_suggestions() def _parse_param(self, data_paths, stack_path, output_path): self.dump_path_param = { @@ -87,7 +87,7 @@ class GraphComparator: node.data[GraphConst.JSON_INDEX_KEY] = precision_index if not precision_status: self.ma.add_error_key(node.output_data) - node.add_suggestions() + node.get_suggestions() def _compare_nodes(self, node_n): #递归遍历NPU树中的节点,如果在Bench中找到具有相同名称的节点,检查他们的祖先和参数信息,检查一致则及逆行精度数据对比 diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 74868f83db..21b1db7fc2 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .node_op import NodeOp from ..utils import Suggestions, GraphConst from ..builder.msprobe_adapter import format_node_data, compare_data @@ -34,7 +35,7 @@ class BaseNode: info = f'id:\t{self.id}' return info - def __eq__(self,other): + def __eq__(self, other): """ 用来判断两个节点是否可以被匹配上,认为结构上是否一致 """ -- Gitee From 2b3630d2f641ff34356368000f3709068fd96303 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Mon, 15 Jul 2024 17:34:47 +0800 Subject: [PATCH 029/141] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index c73a7c5879..77bda68140 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -172,13 +172,13 @@ def _format_data(data_dict): # 将单引号删掉,None换成null避免前端解析错误 value = value.replace("'", "").replace('None', 'null') value = _format_decimal_string(value) - if value is None or value == ' ': + elif value is None or value == ' ': value = 'null' # 科学计数法1.123123123123e-11,格式化为1.123123e-11 - if isinstance(value, float) and re.match(pattern, str(value)): + elif isinstance(value, float) and re.match(pattern, str(value)): value = "{:.6e}".format(value) - if isinstance(value, float): + elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) - if not isinstance(value, (list, tuple, dict, str)): + elif not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value -- Gitee From bfb6286b7dac55fff62b027c12ebb26054f05a88 Mon Sep 17 00:00:00 2001 From: l30044004 Date: Tue, 16 Jul 2024 11:18:07 +0800 Subject: [PATCH 030/141] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 2 +- debug/accuracy_tools/atat/pytorch/visualization/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 77bda68140..738f8dac54 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -175,7 +175,7 @@ def _format_data(data_dict): elif value is None or value == ' ': value = 'null' # 科学计数法1.123123123123e-11,格式化为1.123123e-11 - elif isinstance(value, float) and re.match(pattern, str(value)): + elif isinstance(value, float) and len(value) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): value = "{:.6e}".format(value) elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 3dda565388..9a26c2cdfe 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -115,3 +115,4 @@ class GraphConst: SUGGEST_KEY = 'text' TAG_NA = 'na' OUTPUT_INDEX = -2 + STR_MAX_LEN = 50 -- Gitee From 50b6eb72ea1d567d289ad8928d6ae6f92f039201 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 19 Jul 2024 10:56:19 +0800 Subject: [PATCH 031/141] =?UTF-8?q?=E5=B0=8F=E5=80=BC=E5=9F=9F=E4=B8=8D?= =?UTF-8?q?=E6=98=BE=E7=A4=BA=E7=9B=B8=E5=AF=B9=E8=AF=AF=E5=B7=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/compare/mode_adapter.py | 8 ++++++-- debug/accuracy_tools/atat/pytorch/visualization/utils.py | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index 18c6bac6ba..b291db60d8 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -89,8 +89,12 @@ class ModeAdapter: headers = CompareConst.SUMMARY_COMPARE_RESULT_HEADER id_list = [headers.index(x) for x in key_list] ModeAdapter._match_data(value, compare_data, key_list, id_list) - # 相对误差大于0.5疑似有精度问题 - for item in key_list[4:]: + # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 + for index, item in enumerate(key_list[4:]): + value_diff = value.get(index) + if abs(value_diff) < GraphConst.SMALL_VALUE: + value[index] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) + continue relative_err = str2float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) node_data[key] = value diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/atat/pytorch/visualization/utils.py index 9a26c2cdfe..3e07122a94 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/utils.py @@ -80,6 +80,7 @@ class ToolTip: COSINE = '通过计算两个向量的余弦值来判断其相似度,数值越接近于1说明计算出的两个张量越相似,实际可接受阈值为大于0.99。在计算中可能会存在nan,主要由于可能会出现其中一个向量为0' MAX_ABS_ERR = '当最大绝对误差越接近0表示其计算的误差越小,实际可接受阈值为小于0.001' MAX_RELATIVE_ERR = '当最大相对误差越接近0表示其计算的误差越小。当dump数据中存在0或Nan时,比对结果中最大相对误差则出现inf或Nan的情况,属于正常现象' + SMALL_VALUE_TIP = '{} 小于1e-3,不计算相对误差' class Suggestions: @@ -116,3 +117,4 @@ class GraphConst: TAG_NA = 'na' OUTPUT_INDEX = -2 STR_MAX_LEN = 50 + SMALL_VALUE = 1e-3 -- Gitee From e3af5d50fe6a6c8562ee8f890e6d8640c6757eb0 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 19 Jul 2024 12:43:07 +0800 Subject: [PATCH 032/141] bugfix --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 2 +- .../atat/pytorch/visualization/compare/mode_adapter.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 738f8dac54..2b77b7c5bf 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -175,7 +175,7 @@ def _format_data(data_dict): elif value is None or value == ' ': value = 'null' # 科学计数法1.123123123123e-11,格式化为1.123123e-11 - elif isinstance(value, float) and len(value) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): + elif isinstance(value, float) and len(str(value)) < GraphConst.STR_MAX_LEN and re.match(pattern, str(value)): value = "{:.6e}".format(value) elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index b291db60d8..f4b56494f0 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -91,9 +91,9 @@ class ModeAdapter: ModeAdapter._match_data(value, compare_data, key_list, id_list) # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 for index, item in enumerate(key_list[4:]): - value_diff = value.get(index) - if abs(value_diff) < GraphConst.SMALL_VALUE: - value[index] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) + value_diff = value.get(key_list[index]) + if isinstance(value_diff, float) and abs(value_diff) < GraphConst.SMALL_VALUE: + value[item] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) continue relative_err = str2float(value.get(item)) max_relative_err = max(max_relative_err, relative_err) -- Gitee From 794ae5f397f3989ee156fc4890c3ad79a9a27405 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 19 Jul 2024 15:17:25 +0800 Subject: [PATCH 033/141] =?UTF-8?q?=E7=BB=9D=E5=AF=B9=E8=AF=AF=E5=B7=AE?= =?UTF-8?q?=E4=B8=BA0=E6=97=B6=E8=AE=A1=E7=AE=97=E7=9B=B8=E5=AF=B9?= =?UTF-8?q?=E8=AF=AF=E5=B7=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/compare/mode_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index f4b56494f0..c26c6759ab 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -92,7 +92,7 @@ class ModeAdapter: # 相对误差大于0.5疑似有精度问题,小值域1e-3不比较相对误差 for index, item in enumerate(key_list[4:]): value_diff = value.get(key_list[index]) - if isinstance(value_diff, float) and abs(value_diff) < GraphConst.SMALL_VALUE: + if isinstance(value_diff, float) and value_diff != 0 and abs(value_diff) < GraphConst.SMALL_VALUE: value[item] = ToolTip.SMALL_VALUE_TIP.format(key_list[index]) continue relative_err = str2float(value.get(item)) -- Gitee From 298e6dc98322734303252dd9c007f4dde9f01628 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Tue, 23 Jul 2024 14:45:04 +0800 Subject: [PATCH 034/141] =?UTF-8?q?=E6=9B=BF=E6=8D=A2graph=20to=5Fdict?= =?UTF-8?q?=E6=96=B9=E6=B3=95=EF=BC=8C=E9=80=92=E5=BD=92=E6=94=B9=E9=81=8D?= =?UTF-8?q?=E5=8E=86=EF=BC=9B=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99=E7=9A=84?= =?UTF-8?q?json.dumps=EF=BC=9B=E6=B8=85=E7=90=86=E6=97=A0=E7=94=A8?= =?UTF-8?q?=E7=9A=84node=E6=95=B0=E6=8D=AE=E8=BE=93=E5=87=BA=EF=BC=9BNodeO?= =?UTF-8?q?p=E4=B8=8B=E6=A0=87=E4=BF=AE=E6=94=B9=E4=B8=BA=E4=BB=8E0?= =?UTF-8?q?=E5=BC=80=E5=A7=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/graph_builder.py | 6 +++--- .../atat/pytorch/visualization/compare/mode_adapter.py | 3 +-- .../atat/pytorch/visualization/graph/base_node.py | 6 ------ .../atat/pytorch/visualization/graph/graph.py | 10 ---------- .../atat/pytorch/visualization/graph/node_op.py | 6 +++--- 5 files changed, 7 insertions(+), 24 deletions(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py index 9edc260589..f623a48ae3 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py @@ -48,10 +48,10 @@ class GraphBuilder: """ result = {} if graph_b: - result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict2() - result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict2() + result[GraphConst.JSON_NPU_KEY] = graph_n.to_dict() + result[GraphConst.JSON_BENCH_KEY] = graph_b.to_dict() else: - result = graph_n.to_dict2() + result = graph_n.to_dict() if tool_tip: result[GraphConst.JSON_TIP_KEY] = tool_tip save_json_file(filename, result) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py index c26c6759ab..3ce2f414c5 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py @@ -208,5 +208,4 @@ class ModeAdapter: CompareConst.COSINE: ToolTip.COSINE, CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR, CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR} - # 输出件优化 - return json.dumps(tips) + return tips diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py index 21b1db7fc2..f04f367f59 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py @@ -83,20 +83,14 @@ class BaseNode: """ 输出数据 """ - # 输出件优化 result = {} result['id'] = self.id result['node_type'] = self.op.value - result['type'] = self.id result['data'] = self.data result['output_data'] = format_node_data(self.output_data) result['input_data'] = format_node_data(self.input_data) - result['outputs'] = [] - result['inputs'] = [] result['upnode'] = self.upnode.id if self.upnode else 'None' result['subnodes'] = [node.id for node in self.subnodes] - result['is_forward'] = True - result['pair'] = 'None' result['matched_node_link'] = self.matched_node_link result['suggestions'] = self.suggestions return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py index 7af11fceec..6bae10ad3f 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py @@ -84,13 +84,3 @@ class Graph: info = self.node_map.get(node_id).to_dict() result[GraphConst.JSON_NODE_KEY][node_id] = info return result - - def to_dict2(self): - # 输出件优化 - # 为了防止输出件变动临时使用方法,会在后续重构中删除 - # 递归遍历,在正式交付中尽量避免 - result = {} - result[GraphConst.JSON_ROOT_KEY] = self.root.id if self.root else 'None' - result[GraphConst.JSON_NODE_KEY] = {} - Graph.dfs(self.root, result[GraphConst.JSON_NODE_KEY]) - return result diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py index a5bf8a4438..ed06e0ef73 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py @@ -19,8 +19,8 @@ from ..builder.msprobe_adapter import op_patterns class NodeOp(Enum): - module = 1 - function_api = 2 + module = 0 + function_api = 1 @staticmethod def get_node_op(node_name: str): @@ -28,7 +28,7 @@ class NodeOp(Enum): 基于代表节点的字符串,解析节点种类 """ for op in NodeOp: - index = op.value - 1 + index = op.value if index < 0 or index >= len(op_patterns): raise Exception("NodeOp and op_patterns in MsprobeAdapter do not match") pattern = op_patterns[index] -- Gitee From eb8904f2638f9ff784015dff4841f881dc26a6f7 Mon Sep 17 00:00:00 2001 From: huxiaobo Date: Tue, 23 Jul 2024 17:38:42 +0800 Subject: [PATCH 035/141] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E5=89=8D=E7=AB=AF=E6=97=A0=E6=B3=95=E8=A7=A3=E6=9E=90?= =?UTF-8?q?Inf=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../atat/pytorch/visualization/builder/msprobe_adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py index 2b77b7c5bf..adee140eea 100644 --- a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py @@ -179,6 +179,7 @@ def _format_data(data_dict): value = "{:.6e}".format(value) elif isinstance(value, float): value = round(value, GraphConst.ROUND_TH) - elif not isinstance(value, (list, tuple, dict, str)): + # Inf会走入这里,确保转成Inf。另外给其他不符合预期的类型做兜底方案 + if not isinstance(value, (list, tuple, dict, str)): value = str(value) data_dict[key] = value -- Gitee From 40bdba690a3873b201c0505e52ecf67f53e662f4 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Thu, 25 Jul 2024 16:51:02 +0800 Subject: [PATCH 036/141] =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E7=A7=BB=E5=8A=A8=E5=88=B0msprobe=E5=8C=85=E5=86=85?= =?UTF-8?q?=EF=BC=8C=E9=80=82=E9=85=8D=E6=94=B9=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{atat => msprobe}/pytorch/visualization/__init__.py | 0 .../{atat => msprobe}/pytorch/visualization/builder/__init__.py | 0 .../pytorch/visualization/builder/graph_builder.py | 0 .../pytorch/visualization/builder/msprobe_adapter.py | 0 .../{atat => msprobe}/pytorch/visualization/compare/__init__.py | 0 .../pytorch/visualization/compare/graph_comparator.py | 0 .../pytorch/visualization/compare/mode_adapter.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/__init__.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/base_node.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/graph.py | 0 .../{atat => msprobe}/pytorch/visualization/graph/node_op.py | 0 .../{atat => msprobe}/pytorch/visualization/test.py | 0 .../{atat => msprobe}/pytorch/visualization/utils.py | 0 13 files changed, 0 insertions(+), 0 deletions(-) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/builder/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/builder/graph_builder.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/builder/msprobe_adapter.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/compare/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/compare/graph_comparator.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/compare/mode_adapter.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/__init__.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/base_node.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/graph.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/graph/node_op.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/test.py (100%) rename debug/accuracy_tools/{atat => msprobe}/pytorch/visualization/utils.py (100%) diff --git a/debug/accuracy_tools/atat/pytorch/visualization/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/builder/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/builder/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/builder/graph_builder.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/builder/graph_builder.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/builder/msprobe_adapter.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/compare/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/graph_comparator.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/compare/mode_adapter.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/__init__.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/__init__.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/__init__.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/base_node.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/base_node.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/graph.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/graph.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/graph/node_op.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/test.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/test.py diff --git a/debug/accuracy_tools/atat/pytorch/visualization/utils.py b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py similarity index 100% rename from debug/accuracy_tools/atat/pytorch/visualization/utils.py rename to debug/accuracy_tools/msprobe/pytorch/visualization/utils.py -- Gitee From 43a1ab1c3be7e4678baf2a29a36a138eeba0f41b Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 26 Jul 2024 14:17:24 +0800 Subject: [PATCH 037/141] =?UTF-8?q?=E5=8F=AF=E8=A7=86=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E9=80=82=E9=85=8D=E5=B7=A5=E5=85=B7=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/builder/msprobe_adapter.py | 4 ++-- .../pytorch/visualization/compare/graph_comparator.py | 3 --- .../pytorch/visualization/compare/mode_adapter.py | 2 +- .../msprobe/pytorch/visualization/test.py | 11 +++++------ .../msprobe/pytorch/visualization/utils.py | 4 +--- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py index 2b77b7c5bf..4dfbf4c7fa 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/builder/msprobe_adapter.py @@ -14,9 +14,9 @@ # limitations under the License. import re -from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, task_dumppath_get, _do_multi_process +from ...compare.acc_compare import read_op, merge_tensor, get_accuracy, _do_multi_process +from ....core.common.utils import task_dumppath_get from ..utils import GraphConst -from ....core.utils import print_info_log # 用于将节点名字解析成对应的NodeOp的规则 diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py index 18b905f338..3d5f297246 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/graph_comparator.py @@ -13,9 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import pandas as pd -from ....core.utils import Const, print_info_log from ..builder.msprobe_adapter import compare_node, get_compare_mode, run_real_data from ..utils import GraphConst, load_json_file, load_data_json_file, get_csv_df from ..graph.graph import Graph diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py index c26c6759ab..d8a7c8f21f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/compare/mode_adapter.py @@ -14,7 +14,7 @@ # limitations under the License. import json -from ....core.utils import CompareConst, Const +from ....core.common.const import CompareConst, Const from ..utils import ToolTip, GraphConst, str2float diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py index 9784a96b80..61f3b788f9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py @@ -14,15 +14,14 @@ # limitations under the License. import os -import re import time import shutil import filecmp from .compare.graph_comparator import GraphComparator from .utils import GraphConst from .builder.graph_builder import GraphBuilder -from ...core.utils import print_info_log -from ...core.file_check_util import FileOpen, create_directory +from ...core.common.log import logger +from ...core.common.file_check import create_directory def compare_graph(dump_path_n, dump_path_b, out_path): @@ -53,7 +52,7 @@ def run_st(data_path): start_time = time.time() run_bench(data_path, 'output2') end_time = time.time() - print_info_log(f'run_st time cost: {end_time - start_time}') + logger.info(f'run_st time cost: {end_time - start_time}') # 比较output2的结果和output1 的bench结果差距 for data_dir in os.listdir(data_path): data_dir = os.path.join(data_path, data_dir) @@ -67,9 +66,9 @@ def run_st(data_path): file2 = os.path.join(output2, vis_file) result = filecmp.cmp(file1, file2) if result: - print_info_log('pass ' + file1) + logger.info('pass ' + file1) else: - print_info_log('not pass ' + file1) + logger.info('not pass ' + file1) def run_bench(data_path, output_dir): diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py index 3e07122a94..fb046f9758 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/utils.py @@ -14,9 +14,7 @@ # limitations under the License. import json -import os -import stat -from ...core.file_check_util import FileOpen +from ...core.common.file_check import FileOpen from ..compare.acc_compare import result_to_csv -- Gitee From 5961e71345bc701a187849e56efa0f3461a5d4f8 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Fri, 26 Jul 2024 17:58:47 +0800 Subject: [PATCH 038/141] =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/pytorch/visualization/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py index 61f3b788f9..165d54ce17 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/test.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/test.py @@ -20,7 +20,7 @@ import filecmp from .compare.graph_comparator import GraphComparator from .utils import GraphConst from .builder.graph_builder import GraphBuilder -from ...core.common.log import logger +from ...pytorch.common.log import logger from ...core.common.file_check import create_directory -- Gitee From 2943fde2d13c6fac76cf598992088c4768de5eda Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 1 Aug 2024 09:58:14 +0800 Subject: [PATCH 039/141] update_profiler_pre_research_code --- .idea/workspace.xml | 81 ++++++ profiler/README.md | 1 + profiler/advisor/README.md | 11 +- profiler/advisor/analyzer/base_analyzer.py | 8 - .../computation/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_analyzer.py | 36 +++ .../ai_core_freq/ai_core_freq_checker.py | 100 ++++++++ .../computation/aicpu/aicpu_checker.py | 6 +- .../computation/profiling_analyzer.py | 9 +- .../dataloader/dataloader_analyzer.py | 30 +++ .../analyzer/dataloader/dataloader_checker.py | 84 ++++++ .../graph_fusion/graph_fusion_checker.py | 2 +- .../analyzer/overall/overall_analyzer.py | 45 ---- .../overall/overall_summary_analyzer.py | 240 ++++++++---------- .../analyzer/schedule/syncbn/__init__.py | 0 .../schedule/syncbn/syncbn_analyzer.py | 30 +++ .../schedule/syncbn/syncbn_checker.py | 70 +++++ .../schedule/synchronize_stream/__init__.py | 0 .../synchronize_stream_analyzer.py | 32 +++ .../synchronize_stream_checker.py | 89 +++++++ .../schedule/timeline_base_checker.py | 91 +++++++ profiler/advisor/common/analyzer_scopes.py | 4 + profiler/advisor/common/constant.py | 9 +- profiler/advisor/common/graph/graph_parser.py | 9 +- profiler/advisor/common/profiling/ge_info.py | 3 +- profiler/advisor/common/profiling/msprof.py | 3 +- .../advisor/common/profiling/op_summary.py | 4 +- profiler/advisor/common/profiling/tasktime.py | 4 +- .../advisor/common/timeline/fusion_ops_db.py | 6 +- profiler/advisor/config/config.ini | 1 + profiler/advisor/config/config.py | 7 + .../config/profiling_data_version_config.yaml | 17 +- .../advisor/dataset/ai_core_freq/__init__.py | 0 .../ai_core_freq/ai_core_freq_dataset.py | 148 +++++++++++ .../advisor/dataset/profiling/device_info.py | 2 + .../dataset/profiling/profiling_dataset.py | 15 +- .../dataset/profiling/profiling_parser.py | 27 +- .../advisor/dataset/timeline_event_dataset.py | 176 ++++++++++--- .../html/templates/ai_core_frequency.html | 27 ++ .../html/templates/slow_dataloader.html | 18 ++ .../html/templates/sync_batchnorm.html | 30 +++ .../html/templates/synchronize_stream.html | 57 +++++ profiler/advisor/img/overall.png | Bin 64492 -> 49616 bytes profiler/advisor/img/overall_0.png | Bin 0 -> 56377 bytes profiler/advisor/interface/interface.py | 18 +- profiler/advisor/result/item.py | 2 +- profiler/advisor/result/result.py | 18 +- profiler/advisor/rules/dataloader.yaml | 9 + profiler/advisor/rules/sync_batchnorm.yaml | 41 +++ profiler/advisor/rules/synchronize.yaml | 8 + profiler/advisor/utils/utils.py | 64 ++++- profiler/cli/__init__.py | 2 +- profiler/cli/analyze_cli.py | 3 - profiler/cli/compare_cli.py | 2 + .../common_func/file_manager.py | 19 ++ profiler/compare_tools/README.md | 82 +++++- .../comparator/api_compare_comparator.py | 32 +++ .../comparator/kernel_compare_comparator.py | 35 +++ .../compare_bean/api_compare_bean.py | 47 ++++ .../compare_bean/kernel_compare_bean.py | 75 ++++++ .../origin_data_bean/kernel_details_bean.py | 6 + .../data_prepare/operator_data_prepare.py | 17 ++ .../disaggregate/overall_perf_interface.py | 28 +- .../generator/detail_performance_generator.py | 22 +- .../profiling_parser/base_profiling_parser.py | 19 +- .../profiling_parser/gpu_profiling_parser.py | 5 + .../profiling_parser/npu_profiling_parser.py | 24 ++ .../compare_backend/utils/args_manager.py | 13 +- .../compare_backend/utils/compare_args.py | 4 + .../compare_backend/utils/constant.py | 7 +- .../compare_backend/utils/excel_config.py | 48 +++- .../compare_backend/utils/torch_op_node.py | 8 + .../compare_backend/utils/tree_builder.py | 3 +- .../view/work_sheet_creator.py | 12 +- profiler/compare_tools/img/OverallMetrics.png | Bin 0 -> 66941 bytes profiler/compare_tools/performance_compare.py | 2 + profiler/test/run_ut.py | 2 + .../test_dataloader_checker.py | 65 +++++ .../timeline_advice/test_syncbn_checker.py | 62 +++++ .../test_synchronize_stream.py | 55 ++++ .../compute_advice/test_frequency_advice.py | 145 +++++++++++ .../test_base_profiling_parser.py | 5 + 82 files changed, 2236 insertions(+), 305 deletions(-) create mode 100644 .idea/workspace.xml create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/__init__.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py create mode 100644 profiler/advisor/analyzer/dataloader/dataloader_analyzer.py create mode 100644 profiler/advisor/analyzer/dataloader/dataloader_checker.py delete mode 100644 profiler/advisor/analyzer/overall/overall_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/__init__.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py create mode 100644 profiler/advisor/analyzer/schedule/timeline_base_checker.py create mode 100644 profiler/advisor/dataset/ai_core_freq/__init__.py create mode 100644 profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py create mode 100644 profiler/advisor/display/html/templates/ai_core_frequency.html create mode 100644 profiler/advisor/display/html/templates/slow_dataloader.html create mode 100644 profiler/advisor/display/html/templates/sync_batchnorm.html create mode 100644 profiler/advisor/display/html/templates/synchronize_stream.html create mode 100644 profiler/advisor/img/overall_0.png create mode 100644 profiler/advisor/rules/dataloader.yaml create mode 100644 profiler/advisor/rules/sync_batchnorm.yaml create mode 100644 profiler/advisor/rules/synchronize.yaml create mode 100644 profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py create mode 100644 profiler/compare_tools/img/OverallMetrics.png create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_dataloader_checker.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py create mode 100644 profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py create mode 100644 profiler/test/ut/advisor/compute_advice/test_frequency_advice.py diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000000..a364b7d06a --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + { + "keyToString": { + "RunOnceActivity.OpenProjectViewOnStart": "true", + "RunOnceActivity.ShowReadmeOnStart": "true", + "last_opened_file_path": "D:/mycode/att" + } +} + + + + + + + + + + + + + + + + + + + + + + + + 1716885945639 + + + + \ No newline at end of file diff --git a/profiler/README.md b/profiler/README.md index 1669e3524e..549ffefc14 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -91,6 +91,7 @@ ascend pytorch profiler数据目录结构如下: | profiler版本 | 发布日期 | 下载链接 | 校验码 | | ------------ | ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | + | 1.2.0 | 2024-07-25 | [msprof_analyze-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.0/msprof_analyze-1.2.0-py3-none-any.whl) | 6a4366e3beca40b4a8305080e6e441d6ecafb5c05489e5905ac0265787555f37 | | 1.1.2 | 2024-07-12 | [msprof_analyze-1.1.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.2/msprof_analyze-1.1.2-py3-none-any.whl) | af62125b1f9348bf491364e03af712fc6d0282ccee3fb07458bc9bbef82dacc6 | | 1.1.1 | 2024-06-20 | [msprof_analyze-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.1/msprof_analyze-1.1.1-py3-none-any.whl) | 76aad967a3823151421153d368d4d2f8e5cfbcb356033575e0b8ec5acea8e5e4 | | 1.1.0 | 2024-05-28 | [msprof_analyze-1.1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.0/msprof_analyze-1.1.0-py3-none-any.whl) | b339f70e7d1e45e81f289332ca64990a744d0e7ce6fdd84a8d82e814fa400698 | diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md index c650f40b3e..7702711055 100644 --- a/profiler/advisor/README.md +++ b/profiler/advisor/README.md @@ -36,11 +36,11 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 3. 查看结果。 - 分析结果输出相关简略建议到执行终端中,并生成`att_advisor_{timestamp}.html`和`att_advisor_{timestamp}.xlsx`文件供用户预览。 + 分析结果输出相关简略建议到执行终端中,并生成`mstt_advisor_{timestamp}.html`和`mstt_advisor_{timestamp}.xlsx`文件供用户预览。 - `att_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 + `mstt_advisor_{timestamp}.xlsx`文件内容与执行终端输出一致。 - `att_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 + `mstt_advisor_{timestamp}.html`文件分析详见“**报告解析**”。 执行终端输出示例如下: @@ -72,6 +72,7 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 | | block_dim_analysis | block dim算子调优 | | | operator_no_bound_analysis | operator no bound | | | graph | 融合算子图调优 | +| | freq_analysis | AI Core算子降频分析 | | scheduling | timeline_fusion_ops | 亲和API替换调优 | | | timeline_op_dispatch | 识别算子下发问题(路径3/路径5) | @@ -132,6 +133,8 @@ cluster模块的分析包含快慢卡和快慢链路分析,仅识别问题, overall模块的分析包含当前训练任务慢卡的性能拆解,按照计算、通信和下发三个维度进行耗时的统计,可以基于该分析识别到训练性能瓶颈是计算、通信还是下发问题,同样不提供调优建议。 +![输入图片说明](./img/overall_0.png) + ![输入图片说明](./img/overall.png) schedule模块包含亲和API、aclOpCompile、syncBatchNorm、SynchronizeStream等多项检测。 @@ -152,7 +155,7 @@ torch_npu.npu.config.allow_internal_format = False ![schedule_3](./img/schedule_3.png) -computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 +computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape、AI Core算子降频分析等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 ![computation_1](./img/computation_1.png) diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index e0e17320b3..ada1b0bf4f 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -73,14 +73,6 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): def optimize(self, **kwargs): pass - @abstractmethod - def make_record(self): - pass - - @abstractmethod - def make_render(self): - pass - def init_dataset_list(self)->None: dataset_cls_list = self.dataset_cls_list if len(dataset_cls_list) == 0: diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py b/profiler/advisor/analyzer/computation/ai_core_freq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py new file mode 100644 index 0000000000..4f25deff7c --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py @@ -0,0 +1,36 @@ +import logging + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_checker import AICoreFreqChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqAnalyzer(BaseAnalyzer): + dataset_cls_list = [AICoreFreqDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = AICoreFreqDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + self.html = None + + @BaseAnalyzer.check_data((AICoreFreqDataset.get_key(),)) + def optimize(self, **kwargs): + if not Config().get_config("aic_frequency"): + logger.warning("Can not find ai core frequency in info.json*, please check data integrity.") + return self.result + add_render_list = kwargs.get("add_render_list", True) + ai_core_freq_checker = AICoreFreqChecker() + ai_core_freq_checker.check_ai_core_freq(self.dataset) + if not ai_core_freq_checker.ai_core_freq_issues: + return self.result + ai_core_freq_checker.make_record(self.result) + self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list) + return self.result diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py new file mode 100644 index 0000000000..5ea4dbd754 --- /dev/null +++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py @@ -0,0 +1,100 @@ +import logging + +from profiler.advisor.dataset.ai_core_freq.ai_core_freq_dataset import AICoreFreqDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import convert_to_float + +logger = logging.getLogger() + + +class AICoreFreqChecker: + DEFAULT_FREQ = 1800 + DECREASE_FREQ_RATIO = 0.05 + SHOW_TOPK_OPS = 10 + TOTAL_DURATION_INDEX = 2 + DECREASE_FREQ_RATIO_INDEX = 3 + + def __init__(self): + + self.ai_core_freq_issues = False + self.desc = "" + self.suggestions = "" + self.decrease_freq_ops = [] + self.headers = [] + self.op_freq = None + self.rank_id = None + self.stage = None + + def check_ai_core_freq(self, event_dataset: AICoreFreqDataset, rank_id=None, stage=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "op_freq") or not getattr(event_dataset, "op_freq"): + logger.debug("Skip slow ai core frequency checker, " + "because no ai core frequency were recorded in trace_view.json") + return + + self.rank_id = rank_id + self.stage = stage + self.op_freq = event_dataset.op_freq + for op_name, op_info in self.op_freq.items(): + freq_list = op_info.get("freq_list", []) + if not freq_list: + continue + + op_count = op_info.get("count", 0) + op_total_duration = round(op_info.get("dur", 0), 2) + max_freq = max(self.DEFAULT_FREQ, convert_to_float(Config().get_config("aic_frequency"))) + + decrease_freq_ratio = sum(max_freq - freq for freq in freq_list) / (max_freq * len(freq_list)) + if decrease_freq_ratio >= self.DECREASE_FREQ_RATIO: + self.ai_core_freq_issues = True + self.decrease_freq_ops.append([op_name, op_count, op_total_duration, + f"{round(decrease_freq_ratio, 4):.2%}", + round(sum(freq_list) / len(freq_list), 2), + max(freq_list), min(freq_list)]) + + if self.decrease_freq_ops: + # 按算子总耗时和降频比率 降序排列 + self.decrease_freq_ops.sort(key= + lambda x: (x[self.TOTAL_DURATION_INDEX], x[self.DECREASE_FREQ_RATIO_INDEX]), + reverse=True) + + self.desc = (f"{len(self.decrease_freq_ops)} operators are found during frequency reduction, and the reduction " + f"ratio is larger than {self.DECREASE_FREQ_RATIO}.") + if self.rank_id: + self.desc = f"For rank {self.rank_id}, " + self.desc.lower() + self.suggestions = "Please check the temperature or max power of your machine." + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem("AI Core Frequency", self.desc, [self.suggestions]) + result.add(OptimizeRecord(optimization_item)) + + self.headers = ["Operator name", "Count", "Total duration(us)", "AI CORE frequency decreased ratio", + "Average frequency", "Max frequency", "Min frequency"] + if self.rank_id: + self.headers = ["Rank id"] + self.headers + sub_table_name = "AI Core Frequency" if not self.stage else f"Stage-{self.stage}: AI Core Frequency" + result.add_detail(sub_table_name, headers=self.headers) + + for row in self.decrease_freq_ops: + if self.rank_id: + row = [self.rank_id] + row + result.add_detail(sub_table_name, detail=row) + + def make_render(self, html_render, add_render_list=True): + if self.SHOW_TOPK_OPS: + self.desc += f" Only show {self.SHOW_TOPK_OPS} operators here, see latest mstt_advisor.xlsx for details." + return html_render.render_template(key="computation", + template_dir="templates", + template_name="ai_core_frequency.html", + desc=self.desc, + suggestion=self.suggestions, + headers=self.headers, + data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS], + add_render_list=add_render_list) diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py index 4eca1c6c02..0caede4b89 100644 --- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -3,13 +3,13 @@ import os from functools import partial from typing import List, Dict, Optional -import yaml from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker, logger from profiler.advisor.analyzer.schedule.fusion_ops.timeline_api_stack_checker import OpStackFinder from profiler.advisor.common import constant from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.cluster_analyse.common_func.file_manager import FileManager class AicpuChecker(OperatorChecker): @@ -47,8 +47,8 @@ class AicpuChecker(OperatorChecker): if not os.path.exists(rule_path): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) return {} - with open(rule_path, 'r') as f: - self.aicpu_rules = yaml.safe_load(f) + + self.aicpu_rules = FileManager.read_yaml_file(rule_path) self.filter_aicpu_rules(self.aicpu_rules) for checker_name, check_rule in self.aicpu_rules.items(): if not isinstance(check_rule, (list, dict,)): diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py index 8682617700..2021bcd576 100644 --- a/profiler/advisor/analyzer/computation/profiling_analyzer.py +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -1,19 +1,15 @@ import logging from abc import ABC -from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker -from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset -from profiler.advisor.utils.utils import get_supported_subclass logger = logging.getLogger() @@ -76,14 +72,15 @@ class BlockDimAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = BlockDimChecker(self.cann_version) - + class OperatorBoundAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) self.checker = OperatorBoundChecker(self.cann_version) + class AicpuAnalyzer(ProfilingAnalyzer): def __init__(self, collection_path, **kwargs) -> None: super().__init__(collection_path, **kwargs) - self.checker = AicpuChecker(self.cann_version) \ No newline at end of file + self.checker = AicpuChecker(self.cann_version) diff --git a/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py new file mode 100644 index 0000000000..291c3a1f94 --- /dev/null +++ b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py @@ -0,0 +1,30 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.dataloader.dataloader_checker import DataloaderChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class DataloaderAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, n_processes: int = 1, **kwargs) -> None: + super().__init__(collection_path, n_processes, **kwargs) + key = TimelineEventDataset.get_key() + self.dataset = self.get_first_data_by_key(self.dataset_list, key) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + dataloader_checker = DataloaderChecker() + dataloader_checker.check_slow_dataloader(self.dataset) + dataloader_checker.make_record(self.result) + dataloader_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/dataloader/dataloader_checker.py b/profiler/advisor/analyzer/dataloader/dataloader_checker.py new file mode 100644 index 0000000000..eb1886284e --- /dev/null +++ b/profiler/advisor/analyzer/dataloader/dataloader_checker.py @@ -0,0 +1,84 @@ +import os +import re +import logging +import yaml + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.cluster_analyse.common_func.file_manager import FileManager + +logger = logging.getLogger() + + +class DataloaderChecker: + + def __init__(self): + + self.dataloader_issues = False + self.optimization_item = [] + self.desc = "" + self.suggestions = [] + self.dataloader_duration_threshold = None + self._init_rule() + + def check_slow_dataloader(self, event_dataset: TimelineEventDataset): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "dataloader") or not getattr(event_dataset, "dataloader"): + logger.debug("Skip slow dataloader checker, because no dataloader duration larger than %s", + self.dataloader_duration_threshold) + return + for event in event_dataset.dataloader: + + dataloader_duration = float(event.dur) / 1000 + if dataloader_duration < self.dataloader_duration_threshold: + continue + self.desc = self.desc.format(dataloader_duration=dataloader_duration, + dataloader_duration_threshold=self.dataloader_duration_threshold) + self.dataloader_issues = True + + if re.search("singleprocess", event.name.lower()): + self.suggestions = self._reset_suggestions(["I/O", "num_workers"]) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.dataloader_issues: + return + + self.optimization_item.append(OptimizeItem("Slow dataloader", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.dataloader_issues: + return + html_render.render_template(key="dataloader", + template_dir="templates", + template_name="slow_dataloader.html", + desc=self.desc, + suggestions=self.suggestions) + + def _init_rule(self): + dataloader_rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), + "rules", + "dataloader.yaml" + ) + dataloader_rule = FileManager.read_yaml_file(dataloader_rule_path) + + self.dataloader_duration_threshold = dataloader_rule.get("dataloader_duration_threshold") + self.desc = dataloader_rule.get("problem") + self.suggestions = dataloader_rule.get("solutions") + + def _reset_suggestions(self, suggestion_pattern_list): + + suggestions = [] + for solution in self.suggestions: + for suggestion_pattern in suggestion_pattern_list: + if re.search(suggestion_pattern, solution): + suggestions.append(solution) + return suggestions diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py index e64020fdfe..30bd432379 100644 --- a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py @@ -149,7 +149,7 @@ class GraphFusionRules: optimization_item = OptimizeItem( "fusion issue", f"Found {len(self.candidates)} fusion issues", - ["Check fusion issues detail in att_advisor*.html"] + ["Check fusion issues detail in mstt_advisor*.html"] ) total_time = 0.0 for candidate in self.task_duration_list: diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py deleted file mode 100644 index 916a396b3d..0000000000 --- a/profiler/advisor/analyzer/overall/overall_analyzer.py +++ /dev/null @@ -1,45 +0,0 @@ -import logging -from typing import Dict, List - -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult -from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface - -logger = logging.getLogger() - - -class OverallSummaryAnalyzer(BaseAnalyzer): - - def __init__(self, profiling_path, benchmark_profiling_path=None, **kwargs): - self.benchmark_profiling_path = benchmark_profiling_path or profiling_path - self.profiling_path = profiling_path - self.html_render = HTMLRender() - self.result = OptimizeResult() - - def optimize(self, **kwargs): - compare_result = ComparisonInterface(self.benchmark_profiling_path, self.profiling_path).compare( - Constant.OVERALL_COMPARE) - - headers = compare_result.get('Model Profiling Time Distribution').get("headers", []) - rows = compare_result.get('Model Profiling Time Distribution').get("rows", []) - - self.make_record() - self.make_render(headers=headers, rows=rows) - return compare_result - - def make_record(self): - pass - - def make_render(self, **kwargs): - headers = kwargs.get("headers") - rows = kwargs.get("rows") - - if not headers or not rows: - logger.info("Empty headers or rows, skip render overall analysis html") - self.html_render.render_template(key="overall", - template_dir="templates", - template_name="overall_analysis.html", - headers=kwargs.get("headers"), - rows=kwargs.get("rows")) diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index c74ae05103..8e93dbda77 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -13,27 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -import copy - -import logging -from typing import Dict, List +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult from profiler.compare_tools.compare_backend.utils.constant import Constant -from profiler.advisor.common import constant as const from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface -from profiler.advisor.utils.utils import get_file_path_from_directory, load_parameter class OverallSummaryAnalyzer(BaseAnalyzer): OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" advice_map = { - "Computing Time": "if you want more detailed advice please go to att_advisor_*.html", - "Uncovered Communication Time": "if you want more detailed advice please go to att_advisor_*.html", - "Free Time": "if you want more detailed advice please go to att_advisor_*.html" + "Computing Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Uncovered Communication Time": "if you want more detailed advice please go to mstt_advisor_*.html", + "Free Time": "if you want more detailed advice please go to mstt_advisor_*.html" } time_name_map = { "Computing Time": "computing", @@ -47,45 +41,37 @@ class OverallSummaryAnalyzer(BaseAnalyzer): 'SDMA Time(Num)': 'SDMA Time' } performance_time_dict = { - "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', - 'Flash Attention Time(Backward)(Num)', 'Other Time'], - "Uncovered Communication Time(Wait Time)": [], - "Free Time": ['SDMA Time(Num)'] + "Computing Time": "computing_time_ms", + " -- Flash Attention": "fa_time_ms", + " -- Conv": "conv_time_ms", + " -- Matmul": "matmul_time_ms", + " -- Vector": "vector_time_ms", + " -- SDMA(Tensor Move)": "tensor_move_time_ms", + " -- Other Cube": "other_cube_time_ms", + "Uncovered Communication Time": "uncovered_communication_time_ms", + " -- Wait": "wait_time_ms", + " -- Transmit": "transmit_time_ms", + "Free Time": "free_time_ms", + " -- SDMA": "sdma_time_ms", + " -- Free": "free_ms", + "E2E Time": "e2e_time_ms" } def __init__(self, collection_path: str, n_processes: int = 1, **kwargs): profile_path = get_profile_path(collection_path) super().__init__(profile_path, n_processes, **kwargs) - self.base_collection_path = kwargs.get("base_collection_path", "") - self._has_base_collection = False + self.benchmark_profiling_path = kwargs.get("benchmark_profiling_path", "") + self._has_benchmark_profiling = False self._is_minimal_profiling = False self.cur_data = {} - self.cur_data_table = {} self.cur_bottleneck = {} + self._disaggregate_perf = {} + self._disaggregate_benchmark_perf = {} self.cur_advices = "" - self._headers = [] - self._base_data = [] - self._comparison_data = [] self.html_render = HTMLRender() self.result = OptimizeResult() self.bottleneck_str = "" - self.bottleneck_table = {} - - @staticmethod - def split_duration_and_num(time_value: str) -> tuple: - split_data = time_value.split("s") # time value example: 0.229s(1756) - duration, num = 0.0, None - if len(split_data) >= 2: - try: - num = int(split_data[1].strip("()")) - except ValueError: - pass - if len(split_data) >= 1: - try: - duration = float(split_data[0]) - except ValueError: - print(f"[WARNING] Invalid time value: {time_value}.") - return duration, num + self.over_summary_analysis = {} @staticmethod def calculate_ratio(dividend, divisor): @@ -93,131 +79,121 @@ class OverallSummaryAnalyzer(BaseAnalyzer): return float("inf") return dividend / divisor + @staticmethod + def get_time_category_dict(overall_dict: dict): + time_category_dict = { + "Computing Time": round(overall_dict.get('computing_time_ms', 0.0), 3), + "Uncovered Communication Time": round(overall_dict.get('uncovered_communication_time_ms', 0.0), 3), + "Free Time": round(overall_dict.get('free_time_ms', 0.0), 3) + } + return time_category_dict + def path_check(self): - if self.base_collection_path: - if os.path.exists(self.base_collection_path): - self._has_base_collection = True + if self.benchmark_profiling_path: + if os.path.exists(self.benchmark_profiling_path): + self._has_benchmark_profiling = True else: - print(f"[WARNING] Invalid path which not exists: {self.base_collection_path}.") + print(f"[WARNING] Invalid path which not exists: {self.benchmark_profiling_path}.") return os.path.exists(self.collection_path) def process(self): - base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path - result_data = ComparisonInterface(base_collection_path, self.collection_path).compare(Constant.OVERALL_COMPARE) - for data in result_data.values(): - self._headers = data.get("headers", []) - rows = data.get("rows", []) - if len(rows) == 2: - self._base_data = rows[0] - self._comparison_data = rows[1] - if not self._headers or not self._comparison_data: + self._disaggregate_perf = ComparisonInterface(self.collection_path).disaggregate_perf(Constant.OVERALL_COMPARE) + if not self._disaggregate_perf: return - self._is_minimal_profiling = 'E2E Time(Not minimal profiling)' not in self._headers - if self._has_base_collection: - self.cur_data["comparison_result"] = result_data - time_category_dict = {} - for time_category, time_list in self.performance_time_dict.items(): - time_value = self.get_time_value(time_category, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - duration, _ = self.split_duration_and_num(time_value) - time_category = time_category.split("(")[0] - time_category_dict[time_category] = duration - self.get_sub_category_time(time_category, time_list, duration) - self.cur_data["overall_data"] = time_category_dict - - def get_time_value(self, header_name: str, data_list: list): - try: - data_index = self._headers.index(header_name) - except ValueError: - return Constant.INVALID_VALUE - try: - time_value = data_list[data_index] - except IndexError: - return Constant.INVALID_VALUE - return time_value - - def get_sub_category_time(self, category: str, time_list: list, total_duration: float): - sub_time_dict = {} - for time_name in time_list: - time_value = self.get_time_value(time_name, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) - duration, num = self.split_duration_and_num(time_value) - sub_time_dict.setdefault(f"Duration(s)", []).append(duration) - sub_time_dict.setdefault(f"Duration Ratio", []).append( - "{:.2%}".format(self.calculate_ratio(duration, total_duration))) - sub_time_dict.setdefault(f"Kernel Number", []).append(num) - self.cur_data[self.time_name_map.get(category)] = sub_time_dict + self._is_minimal_profiling = self._disaggregate_perf.get("minimal_profiling", False) + self.cur_data["overall_data"] = self.get_time_category_dict(self._disaggregate_perf.get('overall', {})) + if self._has_benchmark_profiling: + self._disaggregate_benchmark_perf = ComparisonInterface( + self.benchmark_profiling_path).disaggregate_perf(Constant.OVERALL_COMPARE) def identify_bottleneck(self): overall_data = self.cur_data.get("overall_data") if not overall_data: return e2e_time = '%.3f' % sum([data for data in overall_data.values()]) - overall_bottleneck = f"The Model E2E Time is {e2e_time}s.\n" + overall_bottleneck = f"The Model E2E Time is {e2e_time}ms.\n" comparison_bottleneck = "" for time_type, time_value in overall_data.items(): - # add subtype time bottleneck - self.cur_bottleneck[self.time_name_map.get(time_type)] = f"{time_type} is {time_value}s.\n" # add overall bottleneck - overall_bottleneck += f" -- {time_type} is {time_value}s\n" + overall_bottleneck += f" -- {time_type} is {time_value}ms\n" if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, e2e_time) > 0.1: overall_bottleneck += "percentage of free time exceed the threshold 10%." - if not self._has_base_collection: + if not self._has_benchmark_profiling: continue # add comparison bottleneck - time_type_origin = "Uncovered Communication Time(Wait Time)" \ - if time_type == "Uncovered Communication Time" else time_type - base_duration, _ = self.split_duration_and_num(self.get_time_value(time_type_origin, self._base_data)) + base_duration = self.get_time_category_dict( + self._disaggregate_benchmark_perf.get('overall', {}) + ).get(time_type) if time_value > base_duration: ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" self.cur_bottleneck["overall_data"] = overall_bottleneck if comparison_bottleneck: self.cur_bottleneck["comparison_result"] = comparison_bottleneck + def optimize(self, **kwargs): if self.path_check(): self.process() self.identify_bottleneck() self.format_bottleneck() - self.format_cur_data() + self.format_over_summary_analysis() self.make_record() self.make_render() return self.result def format_bottleneck(self): result = '' - headers = [] - data_list = [] - data = [] - for key, value in self.cur_bottleneck.items(): + for _, value in self.cur_bottleneck.items(): if not value: continue - result += f'{key}: {value} \n' - headers.append(key) - data.append(value) - data_list.append(data) + result += f'{value} \n' self.bottleneck_str = result - self.bottleneck_table["headers"] = headers - self.bottleneck_table["data"] = data_list - def format_cur_data(self): - if not self.cur_data: - return - for data_type, data in self.cur_data.items(): - if not data: - continue - if data_type not in list(self.time_name_map.values()): - data_list = list(data.values()) - else: - data_list = [','.join(map(str, value)) for value in data.values()] - headers = list(data.keys()) - data_table = {"headers": headers, "data": [data_list]} - self.cur_data_table[data_type] = copy.deepcopy(data_table) + def format_over_summary_analysis(self): + headers = ['Performance Index', 'Duration(ms)', 'Duration Ratio'] + performance_data = self.get_analysis_data(self._disaggregate_perf) + benchmark_data = self.get_analysis_data(self._disaggregate_benchmark_perf) + if self._has_benchmark_profiling: + headers.append('Diff Duration(ms)') + self.format_analysis_with_benchmark(performance_data, benchmark_data, headers) + else: + self.format_analysis_only(performance_data, headers) + + def get_analysis_data(self, data_dict: dict): + if not data_dict: + return {} + return { + **data_dict.get("overall"), + **data_dict.get("computing_time_disaggregate"), + **data_dict.get("communication_time_disaggregate"), + **data_dict.get("free_time_disaggregate"), + } + def format_analysis_only(self, performance_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res + + def format_analysis_with_benchmark(self, performance_data: dict, benchmark_data: dict, headers: list): + res = [] + total_duration = performance_data.get('e2e_time_ms', 0.0) + for time_name, time_key in self.performance_time_dict.items(): + row = [time_name] + duration = performance_data.get(time_key, 0.0) + row.append("{:.3f}".format(duration)) + row.append("{:.2%}".format(self.calculate_ratio(duration, total_duration))) + row.append("{:.3f}".format(duration - benchmark_data.get(time_key, 0.0))) + res.append(row) + self.over_summary_analysis["headers"] = headers + self.over_summary_analysis["data"] = res def make_record(self): """ @@ -232,20 +208,23 @@ class OverallSummaryAnalyzer(BaseAnalyzer): ) self.result.add(OptimizeRecord(optimization_item)) - self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - for data_type, data_dict in self.cur_data_table.items(): - if data_dict: - self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) + self.result.add_detail( + OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + headers=self.over_summary_analysis["headers"] + ) + for data in self.over_summary_analysis["data"]: + self.result.add_detail(OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, detail=data) def make_render(self): if not self.bottleneck_str and not self.cur_advices: return + # 将\n替换为html换行 + bottleneck_str = self.bottleneck_str.replace('\n', '
') result_for_html = { - "Description" : self.bottleneck_str, - "suggestion" : self.cur_advices, - "details" : [self.bottleneck_table] + "Description": bottleneck_str, + "suggestion": self.cur_advices, + "details": [self.over_summary_analysis] } - self.html_render.render_template(key="overall", title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, template_dir="templates", @@ -254,9 +233,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): torch_version=self.torch_version, result=result_for_html) + def get_profile_path(collection_path): for root, dirs, files in os.walk(collection_path): for file in files: if file.startswith("profiler_info"): return root - return "" \ No newline at end of file + return "" diff --git a/profiler/advisor/analyzer/schedule/syncbn/__init__.py b/profiler/advisor/analyzer/schedule/syncbn/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py new file mode 100644 index 0000000000..fc6dfce5f0 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py @@ -0,0 +1,30 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class SyncBNAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, **kwargs): + super().__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + syncbn_checker = SyncBNChecker() + syncbn_checker.check_syncbn(self.timeline_event_dataset) + syncbn_checker.make_record(self.result) + syncbn_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py new file mode 100644 index 0000000000..83988c4e60 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py @@ -0,0 +1,70 @@ +import logging +import os + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.cluster_analyse.common_func.file_manager import FileManager + +logger = logging.getLogger() + + +class SyncBNChecker: + + def __init__(self): + self.optimization_item = [] + self.syncbn_issues = False + self.desc = "" + self.suggestions = [] + self.solutions = None + self.max_syncbn_num = None + self._init_rule() + + def check_syncbn(self, event_dataset: TimelineEventDataset): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "sync_batchnorm") or not getattr(event_dataset, "sync_batchnorm"): + logger.debug("Skip syncbn checker, because no syncbn found") + return + + syncbn_num = len(event_dataset.sync_batchnorm) + self.syncbn_issues = syncbn_num >= self.max_syncbn_num + self.desc = self.desc.format(syncbn_num=syncbn_num) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.syncbn_issues: + return + + self.optimization_item.append(OptimizeItem("SyncBatchNorm", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.syncbn_issues: + return + html_render.render_template(key="schedule", + template_dir="templates", + template_name="sync_batchnorm.html", + desc=self.desc, + solutions=self.solutions) + + def _init_rule(self): + syncbn_rule_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))), + "rules", + "sync_batchnorm.yaml" + ) + + syncbn_rule = FileManager.read_yaml_file(syncbn_rule_path) + + self.max_syncbn_num = syncbn_rule.get("max_syncbn_num") + self.desc = syncbn_rule.get("problem") + + self.solutions = syncbn_rule.get("solutions") + for solution in self.solutions: + for key, val in solution.items(): + self.suggestions.append(f"{key}, {val.get('desc')}") diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py b/profiler/advisor/analyzer/schedule/synchronize_stream/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py new file mode 100644 index 0000000000..88e55449c5 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py @@ -0,0 +1,32 @@ +import logging + +from typing import List, Dict, Any + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class SynchronizeStreamAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, **kwargs): + super().__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + @BaseAnalyzer.check_data((TimelineEventDataset.get_key(),)) + def optimize(self, **kwargs): + + synchronize_stream_checker = SynchronizeStreamChecker() + synchronize_stream_checker.check_synchronize(self.timeline_event_dataset, kwargs.get("profiling_with_stack")) + synchronize_stream_checker.make_record(self.result) + synchronize_stream_checker.make_render(self.html_render) + return self.result diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py new file mode 100644 index 0000000000..03d88d281c --- /dev/null +++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py @@ -0,0 +1,89 @@ +import logging + +from profiler.advisor.common import constant as const +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.analyzer.schedule.timeline_base_checker import TimelineBaseChecker +from profiler.advisor.utils.utils import format_timeline_result + +logger = logging.getLogger() + + +class SynchronizeStreamChecker(TimelineBaseChecker): + + def __init__(self): + super().__init__(n_processes=1) + self.optimization_item = [] + self.synchronize_issues = False + self.desc = "" + self.suggestions = [] + self.solutions = [] + self.max_synchronize_num = None + + def check_synchronize(self, event_dataset: TimelineEventDataset, profiling_with_stack=None): + """ + :Param event_dataset: dataset of timeline event + """ + if not hasattr(event_dataset, "synchronize_stream") or not getattr(event_dataset, "synchronize_stream"): + logger.debug("Skip synchronize stream checker, because no synchronize stream found") + return + + synchronize_num = event_dataset.synchronize_stream.total_count + slow_synchronize_stream = event_dataset.synchronize_stream.slow_synchronize_stream + total_slow_synchronize_time = sum((float(sync_stream.dur) for sync_stream in slow_synchronize_stream)) + + synchronize_stream_rule = event_dataset.synchronize_stream.rule + self.max_synchronize_num = synchronize_stream_rule.get("max_synchronize_num") + self.synchronize_issues = synchronize_num >= self.max_synchronize_num and len(slow_synchronize_stream) > 0 + if not self.synchronize_issues: + return + + for sync_stream in slow_synchronize_stream: + if sync_stream.name not in self._matched_op_index: + self._matched_op_index[sync_stream.name] = [] + self._matched_op_index[sync_stream.name].append(sync_stream.dataset_index) + self.query_stack(event_dataset, profiling_with_stack) + + self.desc = synchronize_stream_rule.get("problem") + self.desc = self.desc.format(synchronize_num=synchronize_num, + slow_synchronize_num=len(slow_synchronize_stream), + total_synchronize_stream_time=total_slow_synchronize_time) + + solutions = synchronize_stream_rule.get("solutions") + for solution in solutions: + renderer_solution = {} + for key, val in solution.items(): + if self.empty_stacks and self.framework_black_list: + # 如果堆栈源于torch, torch_npu等框架,则不提示修改的代码 + if "modify code" in key.lower(): + continue + self.suggestions.append(f"{key}, {val.get('desc')}") + renderer_solution.update({key: val}) + self.solutions.append(renderer_solution) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.synchronize_issues: + return + + self.optimization_item.append(OptimizeItem("SynchronizeStream", self.desc, self.suggestions)) + for optimization in self.optimization_item: + result.add(OptimizeRecord(optimization)) + + def make_render(self, html_render): + if not self.synchronize_issues: + return + + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + html_render.render_template(key="schedule", + template_dir="templates", + template_name="synchronize_stream.html", + desc=self.desc, + solutions=self.solutions, + result=format_result_for_html, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + empty_stacks=self.empty_stacks, + framework_black_list=self.framework_black_list) diff --git a/profiler/advisor/analyzer/schedule/timeline_base_checker.py b/profiler/advisor/analyzer/schedule/timeline_base_checker.py new file mode 100644 index 0000000000..8bc6915026 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/timeline_base_checker.py @@ -0,0 +1,91 @@ +from abc import ABC, abstractmethod +import multiprocessing +import logging + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult + +logger = logging.getLogger() + + +class TimelineBaseChecker(ABC): + + def __init__(self, n_processes: int = 1): + self.n_processes = n_processes + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.empty_stacks = True + self.framework_black_list = False + + @abstractmethod + def make_record(self, result: OptimizeResult): + pass + + @abstractmethod + def make_render(self, html_render): + pass + + def query_stack(self, event_dataset: TimelineEventDataset = None, profiling_with_stack: str = None): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + event_dataset = event_dataset if not profiling_with_stack else TimelineEventDataset( + collection_path=profiling_with_stack, data={}, _datasets={}, analysis_mode="fusion_ops", + build_dataset=False) + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op, stack in op_stack.items(): + if op not in self.matched_op_stacks: + self.matched_op_stacks[op] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op]: + self.matched_op_stacks[op][stack] = 0 + self.matched_op_stacks[op][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_ops = [] + for op, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_ops.append(op) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if not self._is_keep_stack(stack): + self.framework_black_list = True + logger.debug("Drop stack from framework %s", const.FRAMEWORK_STACK_BLACK_LIST) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op] = stack + + if matched_ops and not stack_record: + for op in matched_ops: + stack_record[op] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _is_keep_stack(self, stack): + # 过滤掉torch, torch_npu, megatron, deepspeed等框架下的堆栈,这些源码基本是不能被修改的 + stack_list = stack.replace("\\r\\n", ";").split(";") + if not stack_list: + return False + + final_called_stack = stack_list[0] + for framework in const.FRAMEWORK_STACK_BLACK_LIST: + if framework in final_called_stack.split("/"): + return False + return True diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 592f9d421e..52e3e07554 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -12,3 +12,7 @@ class SupportedScopes: BLOCK_DIM_ANALYSIS = "block_dim_analysis" OPERATOR_NO_BOUND_ANALYSIS = "operator_no_bound_analysis" TIMELINE_OP_DISPATCH = "timeline_op_dispatch" + DATALOADER = "dataloader" + SYNCBN = "syncbn" + SYNCHRONIZE_STREAM = "synchronize_stream" + FREQ_ANALYSIS = "freq_analysis" diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 697430ee6c..87245a43ea 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -26,6 +26,7 @@ ENQUEUE = "enqueue" TORCH_TO_NPU = "torch_to_npu" OP_COMPILE_NAME = "AscendCL@aclopCompileAndExecute" OP_COMPILE_ID = "aclopCompileAndExecute" +SYNC_STREAM = "AscendCL@aclrtSynchronizeStream" MAX_OP_COMPILE_NUM = 20 ACL_TO_NPU = "acl_to_npu" TASK_TYPE = "Task Type" @@ -111,7 +112,7 @@ HTTP_PREFIXES = "http://" HTTPS_PREFIXES = "https://" COMMON_YAML_DIR = "modelarts/solution/ma_advisor_rules/" COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" -INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" +INNER_ENDPOINT_SUFFIX = "obs.{}.ulanqab.huawei.com" AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" FUSION_PASS_YAML_NAME = "op_fusion_pass.yaml" @@ -138,4 +139,8 @@ CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" CLUSTER_COMM_JSON = "cluster_communication.json" BOTTLENECK = "bottleneck" -DATA = "data" \ No newline at end of file +DATA = "data" + +FRAMEWORK_STACK_BLACK_LIST = ["torch", "torch_npu", "megatron", "deepspeed"] +DISABLE_STREAMING_READER = "DISABLE_STREAMING_READER" +MAX_FILE_SIZE = 10**10 diff --git a/profiler/advisor/common/graph/graph_parser.py b/profiler/advisor/common/graph/graph_parser.py index d4c67fc191..ef4dc4d681 100644 --- a/profiler/advisor/common/graph/graph_parser.py +++ b/profiler/advisor/common/graph/graph_parser.py @@ -1,11 +1,12 @@ import os import logging -import yaml import itertools from collections import deque from dataclasses import dataclass from typing import List, Tuple, Dict +from profiler.cluster_analyse.common_func.file_manager import FileManager + logger = logging.getLogger() @@ -344,9 +345,9 @@ class QueryGraphParser: if not os.path.exists(rule_database): raise FileNotFoundError(f"Path {rule_database} does not exist.") - with open(rule_database, 'r') as f: - database = yaml.safe_load(f) - self.parse_yaml(database) + + database = FileManager.read_yaml_file(rule_database) + self.parse_yaml(database) def parse_yaml(self, yaml_database): fusion_strategy_list = yaml_database.get("GraphFusion", []) diff --git a/profiler/advisor/common/profiling/ge_info.py b/profiler/advisor/common/profiling/ge_info.py index 9996ec611a..4fd5846d88 100644 --- a/profiler/advisor/common/profiling/ge_info.py +++ b/profiler/advisor/common/profiling/ge_info.py @@ -17,12 +17,13 @@ class GeInfo(ProfilingParser): """ ge info file """ - FILE_PATTERN = r"ge_info.db" FILE_PATTERN_MSG = "ge_info.db" FILE_INFO = "ge info" STATIC_OP_STATE = "0" DYNAMIC_OP_STATE = "1" + file_pattern_list = [r"ge_info.db"] + def __init__(self, path: str) -> None: super().__init__(path) self.op_state_info_list = None diff --git a/profiler/advisor/common/profiling/msprof.py b/profiler/advisor/common/profiling/msprof.py index 9453986b82..750c5481e6 100644 --- a/profiler/advisor/common/profiling/msprof.py +++ b/profiler/advisor/common/profiling/msprof.py @@ -33,10 +33,11 @@ class Msprof(ProfilingParser): msprof """ - FILE_PATTERN = r"^msprof[_\d]+.json$" FILE_PATTERN_MSG = "msprof_*.json" FILE_INFO = "msprof" + file_pattern_list = [r"^msprof[_\d]+.json$"] + def __init__(self, path: str) -> None: super().__init__(path) self._tasks: List[TaskInfo] = [] diff --git a/profiler/advisor/common/profiling/op_summary.py b/profiler/advisor/common/profiling/op_summary.py index d79439dbad..4744b5029a 100644 --- a/profiler/advisor/common/profiling/op_summary.py +++ b/profiler/advisor/common/profiling/op_summary.py @@ -16,13 +16,13 @@ class OpSummary(ProfilingParser): """ op summary """ - - FILE_PATTERN = r"^op_summary_[_\d]+\.csv$" FILE_PATTERN_MSG = "op_summary_*.csv" FILE_INFO = "op summary" STATIC_OP_STATE = "static" DYNAMIC_OP_STATE = "dynamic" + file_pattern_list = [r"^op_summary_[_\d]+\.csv$"] + def __init__(self, path: str) -> None: super().__init__(path) self.op_list: List[OpInfo] = [] diff --git a/profiler/advisor/common/profiling/tasktime.py b/profiler/advisor/common/profiling/tasktime.py index 3ce09a7838..732ff0f367 100644 --- a/profiler/advisor/common/profiling/tasktime.py +++ b/profiler/advisor/common/profiling/tasktime.py @@ -17,11 +17,11 @@ class TaskTime(ProfilingParser): """ task time info """ - - FILE_PATTERN = r"^task_time_[_\d]+\.json$" FILE_PATTERN_MSG = "task_time*.json" FILE_INFO = "task time" + file_pattern_list = [r"^task_time_[_\d]+\.json$"] + def __init__(self, path: str) -> None: super().__init__(path) self._tasks: List[TaskInfo] = [] diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py index 8637befd1a..64cc849295 100644 --- a/profiler/advisor/common/timeline/fusion_ops_db.py +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -1,13 +1,12 @@ import logging import os -import yaml - from profiler.advisor.common import constant from profiler.advisor.common.timeline.fusion_ops_rule import OpRule from profiler.advisor.common.timeline.fusion_ops_rule_handler import TimelineOpRuleHandler from profiler.advisor.utils.log import get_log_level from profiler.advisor.utils.utils import get_file_path_by_walk +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() logger.setLevel(get_log_level()) @@ -241,8 +240,7 @@ class FusionOperatorDB: logger.debug("The rule yaml file is successfully found in path: %s", os.path.abspath(file_path)) - with open(file_path, "rb") as file: - db_content = yaml.safe_load(file) + db_content = FileManager.read_yaml_file(file_path) if not self._is_version_supported(db_content): self.is_empty = True diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini index c56c1dad9f..06e9931601 100644 --- a/profiler/advisor/config/config.ini +++ b/profiler/advisor/config/config.ini @@ -9,6 +9,7 @@ tune_ops_file = operator_tuning_file.cfg [THRESHOLD] # operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker operator_bound_ratio = 0.8 +frequency_threshold = 0.05 [RULE-BUCKET] # region : URL of different regions where can download rule yaml file cn-north-9 = cnnorth9-modelarts-sdk diff --git a/profiler/advisor/config/config.py b/profiler/advisor/config/config.py index 12f4526f8c..4f36dfedfc 100644 --- a/profiler/advisor/config/config.py +++ b/profiler/advisor/config/config.py @@ -97,6 +97,13 @@ class Config: """ return float(self.config.get("THRESHOLD", "operator_bound_ratio")) + @property + def frequency_threshold(self) -> float: + """ + frequency_threshold + """ + return float(self.config.get("THRESHOLD", "frequency_threshold")) + def set_log_path(self, result_file: str, log_path: str = None): self.log_path = log_path if log_path is not None else os.path.join(self._work_path, "log") os.makedirs(self.log_path, exist_ok=True) diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml index 4ef76105a0..b8c92fe074 100644 --- a/profiler/advisor/config/profiling_data_version_config.yaml +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -1,18 +1,19 @@ versions: - version: 8.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: - mindstudio_profiler_output: - [ op_summary, msprof ] + mindstudio_profiler_output: [ op_summary, msprof ] class_attr: op_summary: OpSummary msprof: Msprof file_attr: - op_summary: ^op_summary_\d{14}\.csv$ msprof: ^msprof_\d{14}\.json$ + op_summary: [ kernel_details.csv, '^op_summary_\d{14}\.csv$' ] - version: 7.0.0 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -28,13 +29,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 7.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -50,13 +52,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 6.3.RC2 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -72,9 +75,7 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+\.csv$'] task_time: ^task_time_\d+_\d+\.json$ msprof: ^msprof_\d+_\d+\.json$ ge_info: ge_info.db - - diff --git a/profiler/advisor/dataset/ai_core_freq/__init__.py b/profiler/advisor/dataset/ai_core_freq/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py new file mode 100644 index 0000000000..c99baea656 --- /dev/null +++ b/profiler/advisor/dataset/ai_core_freq/ai_core_freq_dataset.py @@ -0,0 +1,148 @@ +import json +import logging +import math +import os +import traceback + +import ijson +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.utils import convert_to_float, parse_json_with_generator +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class AICoreFreqDataset: + + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: + + self._profiler_step = [] + self._ai_core_ops = [] + self._ai_core_freq: [TimelineEvent] = [] + self._previous_freq_index = -1 + + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) + + self.step = kwargs.get("step") + self.op_freq = {} + info = DeviceInfoParser(collection_path) + info.parse_data() + if not Config().get_config("aic_frequency"): + return + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + @property + def profiler_step(self): + return self._profiler_step + + @property + def ai_core_freq(self): + return self._ai_core_freq + + @property + def ai_core_ops(self): + return self._ai_core_ops + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): + + if len(self.timeline_data_list) == 0: + logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + if len(self.timeline_data_list) > 1: + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) + + _ = parse_json_with_generator(sorted(self.timeline_data_list)[0], self._add_event) + + target_ai_core_ops = self._get_target_ai_core_ops() + self._get_op_frequency(target_ai_core_ops) + return True + + def _add_profiler_step(self, event): + if event.name.startswith("ProfilerStep"): + self._profiler_step.append(event) + + def _add_ai_core_ops(self, event): + if event.args.get("Task Type") in ["MIX_AIC", "AI_CORE"]: + self._ai_core_ops.append(event) + + def _add_ai_core_freq(self, event): + if event.name == "AI Core Freq": + if self._previous_freq_index != -1: + self._ai_core_freq[self._previous_freq_index]["end"] = event.get("ts", float(math.inf)) + self._previous_freq_index += 1 + event.setdefault("end", float(math.inf)) + self._ai_core_freq.append(event) + + def _add_event(self, index, event): + event["dataset_index"] = index + if not isinstance(event, TimelineEvent): + event = TimelineEvent(event) + + self._add_profiler_step(event) + self._add_ai_core_ops(event) + self._add_ai_core_freq(event) + + return True + + def _get_target_ai_core_ops(self): + target_ai_core_ops = [] + if not self.step or f"ProfilerStep#{self.step}" not in [event.name for event in self._profiler_step]: + target_ai_core_ops = self._ai_core_ops + else: + for step_event in self._profiler_step: + if step_event.name != f"ProfilerStep#{self.step}": + continue + + for ai_core_op_event in self._ai_core_ops: + if step_event.ts_include(ai_core_op_event): + target_ai_core_ops.append(ai_core_op_event) + target_ai_core_ops = sorted(target_ai_core_ops, key=lambda x: float(x.ts)) + return target_ai_core_ops + + def _get_op_frequency(self, ai_core_ops): + ai_core_freq = sorted(self._ai_core_freq, key=lambda x: float(x.ts)) + + op_index, freq_index = 0, 0 + while op_index < len(ai_core_ops) and freq_index < len(ai_core_freq): + op_event = ai_core_ops[op_index] + op_end_time = convert_to_float(op_event.ts) + convert_to_float(op_event.dur) + op_freq_list = [] + while freq_index < len(ai_core_freq): + freq_event = ai_core_freq[freq_index] + if convert_to_float(freq_event.end) < op_end_time: + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + freq_index += 1 + continue + elif convert_to_float(freq_event.ts) < op_end_time: + if op_event.name not in self.op_freq: + self.op_freq[op_event.name] = {"count": 0, "dur": 0, "freq_list": []} + self.op_freq[op_event.name]["count"] += 1 + self.op_freq[op_event.name]["dur"] += convert_to_float(op_event.dur) + op_freq_list.append(convert_to_float(freq_event.args.MHz)) + self.op_freq[op_event.name]["freq_list"].append(min(op_freq_list)) + break + else: + break + + op_index += 1 diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py index b58930777f..110cd0794c 100644 --- a/profiler/advisor/dataset/profiling/device_info.py +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -54,6 +54,8 @@ class DeviceInfoParser: config.set_config("device_id", device_info["id"]) if "aiv_num" in device_info: config.set_config("aiv_num", device_info["aiv_num"]) + if "aic_frequency" in device_info: + config.set_config("aic_frequency", device_info["aic_frequency"]) if "ai_core_num" in device_info: config.set_config("ai_core_num", device_info["ai_core_num"]) return True diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 46d4a4fe8b..ebd90951ab 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -10,6 +10,7 @@ from profiler.advisor.common.profiling.tasktime import TaskTime from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser from profiler.advisor.utils.utils import join_prof_path +from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -42,14 +43,21 @@ class ProfilingDataset(Dataset): self.build_from_pattern(value, join_prof_path(current_path, key)) elif isinstance(dirs_pattern, list): for item in dirs_pattern: + if hasattr(self, item) and getattr(self, item): + # 避免重复构建kernel_details.csv, op_summary.csv的数据对象 + continue + file_pattern_list = self.current_version_pattern.get('file_attr').get(item) data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] - data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) + if not hasattr(data_class, "file_pattern_list"): + continue + setattr(data_class, "file_pattern_list", self.current_version_pattern.get('file_attr').get(item)) data_object = data_class(current_path) is_success = data_object.parse_data() if is_success: setattr(self, item, data_object) else: - logger.warning("Skip parse %s from local path %s", self.current_version_pattern.get('class_attr').get(item), current_path) + logger.info("Skip parse %s with file pattern %s from local path %s", + self.current_version_pattern.get('class_attr').get(item), file_pattern_list, current_path) else: logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) @@ -69,8 +77,7 @@ class ProfilingDataset(Dataset): logger.warning("Skip parse profiling dataset, because %s does not exist.", config_path) return [] - with open(config_path, 'r') as f: - patterns = yaml.safe_load(f) + patterns = FileManager.read_yaml_file(config_path) return patterns diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py index bb4caeb29e..51996617c2 100644 --- a/profiler/advisor/dataset/profiling/profiling_parser.py +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -12,10 +12,10 @@ class ProfilingParser: """ profiling """ - FILE_PATTERN = "" FILE_PATTERN_MSG = "" FILE_INFO = "" - FILE_PATH = "" + + file_pattern_list = [] def __init__(self, path: str) -> None: self._path = path @@ -37,15 +37,20 @@ class ProfilingParser: return False def _parse_from_file(self): - file_list = get_file_path_from_directory(self._path, self.file_match_func(self.FILE_PATTERN)) - if not file_list: - return False - ## get last file - file = file_list[-1] - self.FILE_PATH = file - if len(file_list) > 1: - logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) - return self.parse_from_file(file) + + if not isinstance(self.file_pattern_list, list): + self.file_pattern_list = [self.file_pattern_list] + + for file_pattern in self.file_pattern_list: + file_list = get_file_path_from_directory(self._path, self.file_match_func(file_pattern)) + if not file_list: + continue + ## get last file + target_file = file_list[-1] + if len(file_list) > 1: + logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, target_file) + return self.parse_from_file(target_file) + return False @staticmethod def get_float(data) -> float: diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index d3889e4458..1504e65f54 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -1,14 +1,16 @@ +import json import logging -from typing import List +import os +from typing import List, Any +import traceback import ijson -from profiler.advisor.dataset.dataset import Dataset from tqdm import tqdm +import yaml from profiler.advisor.common import constant as const from profiler.advisor.common.timeline.event import TimelineEvent -from profiler.advisor.utils.utils import get_file_path_from_directory -from profiler.advisor.utils.utils import singleton +from profiler.advisor.utils.utils import get_file_path_from_directory, check_path_valid, singleton from profiler.cluster_analyse.common_func.file_manager import FileManager logger = logging.getLogger() @@ -39,37 +41,76 @@ class OpCompileCollector: self._total_op_compile_time = 0.0 +class SynchronizeStreamCollector: + + def __init__(self): + self._synchronize_stream_count = 0 + self._slow_synchronize_stream = [] + self.rule = SynchronizeStreamCollector._load_rule() + + @property + def total_count(self): + return self._synchronize_stream_count + + @property + def slow_synchronize_stream(self): + return self._slow_synchronize_stream + + @staticmethod + def _load_rule(): + sync_stream_rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "rules", + "synchronize.yaml") + + sync_stream_rule = FileManager.read_yaml_file(sync_stream_rule_path) + return sync_stream_rule + + def update_sync_stream_count(self): + self._synchronize_stream_count += 1 + + def append_slow_sync_stream(self, event): + if float(event.dur) / 1000 >= self.rule.get("slow_synchronize_threshold", 10): + self._slow_synchronize_stream.append(event) + + def unset(self): + self._synchronize_stream_count = 0 + self._slow_synchronize_stream = [] + + @singleton -class TimelineEventDataset(Dataset): +class TimelineEventDataset: - def __init__(self, collection_path, data: dict, **kwargs) -> None: + def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None: self._ops_with_task_type = {} self._ops_with_stack = {} self._ops_compile = OpCompileCollector() self._torch_to_npu = {} self._acl_to_npu = set() - self._aten: List[str] = [] - self._optimizer: List[str] = [] + self._aten: List[Any] = [] + self._optimizer: List[Any] = [] + self._dataloader: List[Any] = [] + self._sync_batchnorm: List[Any] = [] + self._synchronize_stream = SynchronizeStreamCollector() self.timeline_dir = collection_path - self.timeline_data_list = get_file_path_from_directory(collection_path, lambda file: file.endswith("trace_view.json")) + self.timeline_data_list = get_file_path_from_directory(collection_path, + lambda file: file.endswith("trace_view.json")) self.dataset_len = None self.analysis_mode = kwargs.get("analysis_mode") self.task_type = kwargs.get("task_type") - self.cann_version = kwargs.get("cann_version") - self.torch_version = kwargs.get("torch_version") - if self.analysis_mode in ["fusion_ops", "all"]: - logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", - self.cann_version, self.torch_version) + if not build_dataset: + return - super().__init__(collection_path, data) + if self.parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) if self.analysis_mode in ["op_stack", "all"]: self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) self._post_process() - @property def ops_with_stack(self): return self._ops_with_stack @@ -102,36 +143,60 @@ class TimelineEventDataset(Dataset): def aten(self): return self._aten - def _parse(self): + @property + def dataloader(self): + return self._dataloader + + @property + def sync_batchnorm(self): + return self._sync_batchnorm + + @property + def synchronize_stream(self): + return self._synchronize_stream + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): if len(self.timeline_data_list) == 0: logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) return False if len(self.timeline_data_list) > 1: - logger.warning("Please ensure only one trace_view.json in %s, there will analyze first timeline profiling data.", self.timeline_dir) - self.timeline_data_list = [self.timeline_data_list[0]] + logger.warning("Found multiple trace_view.json in %s, load the file of device 0 for analysis .", + self.timeline_dir) result = self.parse_data_with_generator(self._add_event) if not self.dataset_len: self.dataset_len = len(result) - return True def parse_data_with_generator(self, func): result = [] + timeline_data_path = sorted(self.timeline_data_list)[0] + if not check_path_valid(timeline_data_path): + return result + try: - json_content = FileManager.read_json_file(self.timeline_data_list[0]) - for i, event in tqdm(enumerate(json_content), leave=False, ncols=100, - desc="Building dataset for timeline analysis", - total=self.dataset_len): - func_res = func(index=i, event=event) - if func_res: - result.append(func_res) - except Exception as e: - logger.warning("Error %s while parsing file %s, continue to timeline analysis", e, - self.timeline_data_list[0]) + with open(timeline_data_path, "r") as f: + for i, event in tqdm(enumerate(ijson.items(f, "item")), + leave=False, ncols=100, desc="Building dataset for timeline analysis", + total=self.dataset_len): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) return result def _add_ops_with_task_type(self, event): @@ -169,12 +234,40 @@ class TimelineEventDataset(Dataset): "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur })) + def _add_dataloader(self, event: TimelineEvent): + if "dataloader" in event.name.lower(): + self._dataloader.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur, + "stack": event.args.get("Call stack") + })) + + def _add_sync_batchnorm(self, event: TimelineEvent): + if event.name.lower() == "syncbatchnorm": + self._sync_batchnorm.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur + })) + + def _add_synchronize(self, event: TimelineEvent): + if event.name.startswith(const.SYNC_STREAM): + self._synchronize.append(TimelineEvent({ + "name": event.name, "ts": event.ts, "dur": event.dur + })) + + def _add_specific_operator(self, event): + # for analysis of operator aclOpCompile, enable jit_compILE=False + self._add_op_compile(event) + # for analysis of slow dataloader.__next__ + self._add_dataloader(event) + # for analysis of syncBatchNorm operator, prompt users to replace source code of torch_npu's syncbn + self._add_sync_batchnorm(event) + def _add_event(self, index, event): event["dataset_index"] = index if not isinstance(event, TimelineEvent): event = TimelineEvent(event) - self._add_op_compile(event) + self._add_specific_operator(event) + if self.analysis_mode == "fusion_ops": self._add_event_for_fusion_ops(event) elif self.analysis_mode == "op_stack": @@ -190,6 +283,10 @@ class TimelineEventDataset(Dataset): self._add_aten(event) return + # 检查cann层同步操作,根据时间窗口索引到host侧的aten算子并给出堆栈 + if event.name.startswith(const.SYNC_STREAM): + self._add_aten(event) + if event.name.startswith(f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}"): self._add_optimizer(event) return @@ -215,7 +312,18 @@ class TimelineEventDataset(Dataset): # eliminate sub aten operator of the first level aten operator by 'ts' and 'dur', # keep the first level aten operator contiguous formated_atens = [] - for aten_event in sorted(self._aten, key=lambda x: x.get("ts", -1)): - if not formated_atens or not formated_atens[-1].ts_include(aten_event): - formated_atens.append(aten_event) + for event in sorted(self._aten, key=lambda x: x.get("ts", -1)): + if event.name.startswith(const.ATEN): + if not formated_atens or not formated_atens[-1].ts_include(event): + formated_atens.append(event) + + elif event.name.startswith(const.SYNC_STREAM): + self._synchronize_stream.update_sync_stream_count() + if formated_atens[-1].ts_include(event): + # 使用aten算子的索引,用于查询堆栈 + event["dataset_index"] = formated_atens[-1].get("dataset_index") + self._synchronize_stream.append_slow_sync_stream(event) + + else: + continue self._aten = formated_atens diff --git a/profiler/advisor/display/html/templates/ai_core_frequency.html b/profiler/advisor/display/html/templates/ai_core_frequency.html new file mode 100644 index 0000000000..d045142037 --- /dev/null +++ b/profiler/advisor/display/html/templates/ai_core_frequency.html @@ -0,0 +1,27 @@ +{% if data|length > 0 %} +
+

AI CORE Frequency Issues

+
+ Issue: {{ desc }} +
+ Suggestion: {{ suggestion }} +

+ + + {% for header in headers %} + + {% endfor %} + + + {% for row in data %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|safe }}
+ +
+
+{% endif %} \ No newline at end of file diff --git a/profiler/advisor/display/html/templates/slow_dataloader.html b/profiler/advisor/display/html/templates/slow_dataloader.html new file mode 100644 index 0000000000..ae3a22f283 --- /dev/null +++ b/profiler/advisor/display/html/templates/slow_dataloader.html @@ -0,0 +1,18 @@ +
+

Slow Dataloader Issues

+
+ {{ desc }} + + + + + + {% for suggestion in suggestions %} + + + + {% endfor %} +
Suggestions
{{ loop.index }}. {{ suggestion|safe }}
+ +
+
diff --git a/profiler/advisor/display/html/templates/sync_batchnorm.html b/profiler/advisor/display/html/templates/sync_batchnorm.html new file mode 100644 index 0000000000..0a4cb3e730 --- /dev/null +++ b/profiler/advisor/display/html/templates/sync_batchnorm.html @@ -0,0 +1,30 @@ + +
+

SyncBatchNorm Issues

+
+ {{ desc }} + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ + More efficient code of syncbn forward as follows: + {% for item in solutions %} + {% for key, value in item.items() %} + {% if 'efficient_code' in value %} +
{{ value.efficient_code|safe }}
+ {% endif %} + {% endfor %} + {% endfor %} + +
+
diff --git a/profiler/advisor/display/html/templates/synchronize_stream.html b/profiler/advisor/display/html/templates/synchronize_stream.html new file mode 100644 index 0000000000..fd95b48615 --- /dev/null +++ b/profiler/advisor/display/html/templates/synchronize_stream.html @@ -0,0 +1,57 @@ +
+

Synchronize Stream Issues

+
+ {{ desc }} + + + + + + + {% for item in solutions %} + {% set rowloop = loop %} + {% for key, value in item.items() %} + + + + + {% endfor %} + {% endfor %} +
Suggestions
{{ rowloop.index }}. {{ value.desc }}
+ +
+ {% if not empty_stacks %} + Please click on the collapsible box below to view the detailed code stack that triggers synchronizeStream + {% elif not framework_black_list %} + Suggestion: + These operators have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to + Ascend PyTorch Profiler to set + 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. + {% endif %} + + {% for api_name, stacks in result.items() %} + + {% if empty_stacks %} +
{{api_name|safe}}
+ + {% elif stacks | length > 0 %} + +
{{api_name|safe}}
+
+
+ {% for stack in stacks %} +
No.{{loop.index|safe}} code stack, called {{stack[1]|safe}} times
+ + {% endfor %} +
+
+ {% endif %} + + {% endfor %} + +
+ +
+
diff --git a/profiler/advisor/img/overall.png b/profiler/advisor/img/overall.png index 6d5da107a3f7f6c8c655922bd80d193708fe71aa..1883d4c97388b1cfb774d05fc9e0d368d0c66901 100644 GIT binary patch literal 49616 zcmd42Rajlk(lrVz$ijjINpN=w1PSi$?yztI1b26Wy9ak)!QFxc2<`+6?j*Q#X7;!D z`(^*n|D2n1b8hGC*;2E*yK0P4;YtdUXvp};FfcG^(o$k7FfcDjVPHV)U=Z-cihKzH z28I+yT1;5Y9rmc>QI$Zmm6_*xx?Dwgwq=SkzE}nOjcO15q26oN-|5{p1ovujcG^l7 zjHy9oqmx(e&WT?`;?Q}`k08}B*wvM(y5pqJQq0BANQQs$uh)te|vg z=6NKktE+o{I=y)T5`qE4z>>ni`TI{}-o!300xvHrYOWlq<@eOoRptG7xzXD@UDJ7YHo~&Ivom$`KOMqgz<`9h zhrr)QM}2O$Qx&`JtQ?GtwhwD^o{#O;{GJ~L4cc!$xTQYdTD&MS4d`AP+HGoT?EJBC zGv3gUT+`*-SYO`^efvD$HTHa2SUU%;%gybVflo8kZI1_l<7Olej%c*6^- z7fhT1`s)P|T=^X_<8OF0o`!Cw{~r0EV<-zS6N#Iv@4~7prl%$HpC9&bcW}ng($l)R z%CzHjY$0iE@>^cy?t4~`p@lX!H9A`b=>Df2uo8Pr5X$-aT4m+d)6?~F6%&{d?+vaz z>_1&ug~7mS9783C{`tmV7BlVn&``7U2_KU&H3M%H^uLV*fW}NplIj23xB}C^+p)o8 z^8G*4&&3757lmZK@whwAd4}3~8*qDc?sEBxe1kn-bc6pz`p-Jn>EQIppikR%c3thh z#g!{hhhDLoY&qFs-Sm+dzW=*gM)7VGkSr`JCL=tG9xUvC1_}%HkA~N{d8pbiveVS) z^vTHZtnW(!XKqrI-EJoXk9J6W+~*XlAJPp_{1l&^Gva>T3i5l>d+TT1*LP{doDAn^ z?nqh&%VKJI4!uM(HARK{&!2Zkf&GD8X=r$RuCg;3_KZ#bG~a4hq$w6~`2)SUOaJa@ z*zSSyLeVe7AiZl=ezb@Y^XYCyk|AC2Id4?)q03`;Ek&dFDa5ve9!INVQ#QS;or3cI z>&(<}*JF*L*T|72(?3SAPXjM#KrR)l7=4x~II_gEpY|2^ll|`f(q1RHBzp@#bUg{i za)tGkJZL_V!&l3|Ym8TwtQFH#KNybIV4TU^-1zxKAH4Pc&(vXHNg+s}MjS1_G#$*8 zhhPo10GAy_!(o)bjed(~)mJ^_Piv1qpSn;`B70l&n%T7Ip`V`JPbG6knHYWAYT4aR z(VyXb%#bqUkp7t?DJeuZKwTj4auAE7Pz>5?$FG08vH7kGH{n~WOeaJ{9?5y~a*#!H zN8pL(7wli#OM?vfu&4|1@cZDtU5>k4By>z{_3YHA%mejoPN zp630$LT>yX*TSAYmMvqEt(0^4)pU(2O3@+*nBk}Q=p z#QAVOv6zti`Udu&-wqStLjf@fXgKBkDf}NV13Y~O8x~xd(Hq+v@b7~G29}E)CSY5l zB47TWjUXfsY=m;U2U@~^)^AS%!qBp$Q^IXU%PTV1+yu-M(r@umOe-O^>4bl>yc=LtBtmlqd-S2>T5{v`jb6$WH0 zzW#7~K8vd*Xk%x$<(SH>@3~);x{S=Q^uI@;`>O!Mr?b?qXOid55tc5!zYj5Vu0Zwm zTi`bH{{0mlxZ8jmZagI=+xJqPC+i8g`WF43-5&mT_X~CJgC7QuYODM`Yo+I0;ltz+w8pS@gs1#;o;!|JQ*&LA;9tJq{2~DOCD`I)h^zaFW4}0r9yi-Uw*~_vs!n`zr&k*4F7KWdp}Ia zoG+8)|BRo@0~Ue^L;n8j#&D!iMud8v*4ITau>Wc3DggsR0ipcmjkpvr0cNbZt{T&& z1pmxXi3s?Z6#AFXLs&tx7+OsdlK=I;GASTN_7yUqkzRZX20RLTYv-B%D$(gmaTuE~ zxQckY2<6QS&2yZiI^{wTZ9=U2xdnrfQ5{DwH2#_fJe%Jc{nsl7N@i;l9>>DgWuO4~ z05b}>N~)%Ly`cFelt%Z_jeA>`(ptN{_N&|mwSey0`1W7;$+ADJnObDe2Z^}IU>ci; zGQyT{m9~|Bx~T5VslWrP8<7BO$=Y(cc-0qS!$OO&UoB6H!>CP$z;nrBLR085v^W#+ ziUzKhh^&W>IWF4g450nixNB}+!pT0OyA6m{MaXcta4FFk(3E8ZCo}_VmW2(D$kVot z+~Ll=^QE9agN#U=^;)Yjf2?WPVjd-NTMmX<*r2v1w$c?!77dw~PuG>j0W*Rb3CJK! zEh{s}lPM~2-6<+^_L6w8O5%99%fLnb$0UnV21)ootlj=BdE#8hs|UA6+k5R8Ui}I! zYo541gBYA%WgbN9jW)l^cBJ!iwH!-IYbWoe?gD{sB$~ zT3KeZ+plBi=uNI2mV1xgRGS0LR{beic2kjhvsJDz5jug51@9I~)Mr8sIvpZja^_pC zdMCWcB@ON?E*A}YSX%CVBJ7{r6uT#xzOU8aDM*(y2w9x&|)2U854~XPU10E6b3Np5vx4NtY2RBJEuDYU-4Lf-H6DQVNzA{ zg6Z&2EHhjmQ}#UNO8aPA)t7vx@KhEmH2Vx@f9OjQevo56NtSDJ9$#a(51HYXGFpF_ ztJBpdoARY-B6-0Yrw%%V4CI3%9n879_&n*?5AoNSQ+EUCh7bJW8r#;jUzCGEvbEpF z%*~IgFg%=03&31f4O$T^GgLv3w5bFh?@S%vGFfc&k3>hGGA7;6CSOOF_JSduKd-QW zpg*;xje~1J+N8Fy^Y_fRcL)9|{Drs6MPr)#v=zwapL|T5Y^A_I6Yjcyi^-sh%k_Vo z)LI+DAp3%SOjd$rjBiLfm7~*65<5X_=d+oPNeB9pPK-52h$R2ZG%}y=eRBZj7wQ0$ z@DC20LYg$0F9gU7<)l^9XTu!GY~?`leVh5~5gW80UffIef$Ns*druDBV<(n`C|9Qk z1`s-52kU1jZuuBVJD=N@u*_U9B)#TP{IpcePTfQfh0C|ov!k$6(xi0?)yN)F<!ME2oRlV#+LK07Na0rMj~UIH?{~L)gD=;QZ74T{OVWywS6OOz)E=J#F0(Hyd?~_! zyY>N<^pkqaF8!}|SY5S(tXzF!Lu9z%mS*!p(+(B;mp+f9mw^}YyhMlJ%F+nAal|rv zn^tC}CpS8U&eJhjGpg5mwh^Xk)4KDyR9UYsA=pebllD2}5Fwm~?);ag*V2Zh6W3`1 zRBt?GzWAB$JrnRy*9CZ>_B5N(4e}+d>!i2Ln!KvHxn)Z-QJg$TFJ3-_P}+acq0H|7 z#UCoTc&dEe@4CWBO|UIz`m*h=_Y-|G*Rlb~+TvEj!rwW17iC;>$tQ2&aTJte*tVn) zLx~69ny56tP`5llt&Wp4!5Ao>iX!^u_wH@)$)LB+UAXjFPVp&9`(19 zu`m(46`g`u7Sx8g#zXEUCQ(W@p~1~6$yEGEi=ywzBULw{8ZHldLNbJwDIITz zbZnh0&TUh-Jm7fmjOmQR#YS0gJzH7CbP({GZbf`uj;fS$!>%NUr% zI8Q1t*=PwDi>0~3BPu=yv5M8m5`whiJ%ki+sHOP~&;yoxsXR6K%p3~T?+V{NApH($lGw>s6)eL?)+SMIP$R1&u)|j_I=fmDhJ3mG zTz-{Qxup1@%vzPctUM}KMEII!<~y@-n$<|lRHO#UKrG^3dEqb2mo&AN!Jmt;%4nF% z>gb~w`JXyH4XYAjRX&qLlMLzHv>RN6bCJ&6+Q1*$%E#8ewY)DCe4VJP2$}3rL037J z&#orrn#M8uP4V&z{eqe2+h0@@g*D|#5#3YENmbN*w7g(_x*JQen4x%vO{~R`-F#m~;VbOGo6%9gH?tEi)77e- z4OK}}>UN5YpQ5frsAXy&4&42`&PQhW!;M}};xf*y;{?7@7B{m^6l*B!&hH`MZXePq zQW9=1z|vX*$2ETvsG`M*=wC(El_DlP)_ljCfdh+%ryZUF)-du)WY!jlV3=gnFiJ*C z@@B7!;|Xefstnr;RKm#b%iH`I9k8&~85_1YGlBa!n6@kqvL5HxI=km}9ccR&FH2|S zSZo|!>W$i0+8q6$MB{M)jY#fzwFNm&cemvYLwy5&1j zY`HRDy6IPf)cL$E<-dD>&>m*>WA^WUUDVmawY{L~4WINb0~GcRU^=w zp`1fC!Jm5TCVkC0Ab=yIS3DRqECF>lLf43ehA?!!ol#MA%d#uk$Wyv^X+186cD!{w znKD&4K1;&VcId6C3>DdBTns8g6fwF}k&4rp_Q_LHlvxO`Vg9x_Kd_|%H)reX!MFNd z0aKb7Bf36pC2k*hIyn{i3l{qU~Ye0@CIkVb6tg4&OJLbwOHw zLr<}Iw~^B9-<{k zG3OAM3eQ8mg?*4k7o>}gGkZVCGi=#PjEAyET2Py$ZjM$i9lVgohVv}XA$q^2&=1O6 zP@8lRx473faa0YU@~J=VSu%L-sjBsiB8Fv`1E5y$R|+~RYp=a_Rm2t;M8~D|>`KC( z#xi-uzZuY%V$|$mlhtM1S*B%(jF_;-LoTC_rCrCIJ!k_}gUhp3U;}{7|FKU%9O(9S ztaM4@xL!nfrZW4OkyESTd*lO@`mV&dm$bhg#tLLF%2LqhXEv={DD=-NVz*y1x0yRM zo3RvQq7ORzwt7G!H2i-&O}JjDBB)`)rF_S^1Uynl`zyw*&tj!9M0ZT${Gj1s!++gk zFJMD9s|XaimgpGKP45)}O>*on0c%HT@an4#6j(GFZ_wS`4O|IEgzbeSlYAyu({7nE zP_hni@h{h0QY&l{1WEW#gsSCvrZQb0)p?>ww=>MYBH*Z75mjivsVbbY*R?e+J=}VZ!Ln zSgtR@z$*2_5Iz)2{3{s~hWx9ysuRv;R}v5;^4`W%iV39t5=raY>|etHy(~-4$>^Fg zwJ};Hbbto9p~?2*!Z9P z)5o-`UVJX0ZE7lhRU7QjG*+vLW{pxlgW+v~^YPEBfprJ_1H)(xN_=DhGV(HP*70IQ zbw2Am83J1-^>Rw6ZFI~A`d?Upt3M$?R*2{en)`&P6{Jvi9l6|30SimE)nccPaPn_24=MHZUA?qx6ajvO}vF}V+gre zq~u2M9(#hT0=s$Ol;Vr{G5ksD!-zgBqd@!LxYi0Y3F8pF?s=mQz~iA=N+Ui%zs=|q zeD~mm4ee>oC3~eJl|H^qV$eukv76u5pNK1VCWkuUMy;@-5hswko-O1s12MpiDCOIb zIWX|jchbh{mkY+$=L=3`Vy6l4axxe73A6wMRW+?xG+iISox~dShjBAiwcHT zm{IcOt>nAK&kfup64`Te6Rs^i=u2aioYfc$#wME!6;`2j=| zW3et_kL=3&qLOErZ55+fYcFwNMH|%=U}82M_kSj$&EVWV$kT*PZ$w4%ltR}e<&W+5 zv#{l|d?tRV(JVY!-6{R}^qon#JC1(&PR8@B<0%H$&sxP~^s>g*wF&M~fKzUwT+UgQ z8al3g{DQEgIV4rbi^Ot8Bqrv4#O82u0{P+rKF?~Vc2i;2x2J6f&TU&H#K$WSz%>ayUcKJASzA$?pfzqb2Q4Dw%GLm4o94(}YU!cqWRZ2>9J>^hIGmlv>kfQBA{b zP_~2;R%lO3_$p2CxX{>@e-k-uc^cZE$|S#e5%^m@I(^?yUecuu3p%na$0D1q5SlDa z^Uh&M!S7m~=MI^~-13U(SpyO9Hl+eJDVV&|*e|WI;?ky&>`W3{yFH(7TN$_sisD^bA9R9tAr%HikkM#oxX2*k%E>~}hCxd{zsCT`Jr@R^Z=@E(N^am^Ed z8oHog42_xQ|Ckr!X#FK^_<{V3V~fm(Uy3dm_Nm>*6dJ6qEko{c!8PA2t>FPvio{$M z2iy0KG7FOZd>Ria+n#+nH6bg>*9yTqc|dbVP7+u-RP6-~{iSUuFkX&{bgY-{b z5(<7BlZ#PbNZMD#b=nF;xV7&OR=3!`(nvvtm4=3~R&8qO7BI~=+BRQ|I{Ie>uv=Yb z8abS`_}1{2MnK@jFIh^)s!D2H;V531ohPVn^jg71EdZ`+Ilxyn9ak?0_O!PIZIkN@7qomR6E|>xk8LD^(9AOr}Ro^5?=s zjtqgf#X1Qnd7n4_qXj64e`_=tB_Uh2#wV@d9XC0pSx{8SuQhBVTGje~j)c0^=jp~G zA$nv^TeaQ_fnq+|JWiprx^k~>N6z^^=^HXby=PLbqI9TlT!zF{=de}QiF}N1=9|v6 z&ssXWj)(6wd&Ts63pRM&?@W~UsYh5O#xJ^dVn>8gY<+4a!3J0=c+$mcQ>nu~BH#SD zS@r6(Mg;s@Td5^%7SUl%$4PTFmb3aVxA(-9l}tF_Fo&17;=4$4j(wu96#h&K-r_@aiRf<#TWH4cgetcUiMY=?BOb7pe@@8-o=+tgymyqEMHB;ol9Qj zXY0qaJHxvbdwoi2{qo$|gVi{4-O8v-)D;_t)}uz$l$XysG5%z_Gezl?{>pF+_oS;^ zc0>?s9+i`p^i`>gD;ZC%1wo;L@@?1M(u$zNatcT&eMXNsu~KmP0n5)x+Wl~HH=#re znoY@vs?*xst7V}Z%>-smB*u(N&4m}L^AnTL_Zns-BC|ch0akF!RM5^ny9pq1!tcRC z;FKp(icNL&Q(D`qjDW}wJT8F~ZD-_Foq?n+yIZAZup;`n!CF*Ms%r6$_y-3)65~qd zXV+M40z<*mk4nc9EEG_-uU$rU>Jb9`QN@)D%xFysujVx(&T@dmBwWUG)Re=f23Ge< z9LDO9_D4{IZ-MASkob?LCvi4Cnj@?EM%Q8%LZT7J>(>4ou><25(W#pzf&gGZuBq{^ zxpL(QqbU_EI0CZXv2uGRGB?%_5Ra9dDdQSZ?5j_-xRd9eVKh}N(1yg=d|EPSmW4R< z()Es6f7r&(gR<&%AX@Ji6Xwlcz&7M6qcG-Jp{FP=c+2?VJQP4p23UwDOL}5sV*6>U z2@9sg#I}$SS14R^`L!Hlr?$p3>I5)NBQT5u~jZx}zU<;ye_(-(C+RBW-jyUAs)jUv8yvUTIArV zHrE*z-Ey8bE4_6CUwfY&{@>mucgbq4TxiHp;6~rr9tT{95w)1(rNsP6qCmq4m=WIH3>% zF6~VxEpk((g-b(|@=pzO$c%U8FNb!gP1w%vPiyIZ^SxJY?|Eo5nqrfa%$RZ`FE^Yo zSqu^%DV#Y&)NKphUOtY<2|L*42{5ZVj!=&gZ>d;y6tf`HZ(Fyhc_{`mt=8QN=%SV% zy?TGjj}TyHdHxks>q5L>>OS$Iir5#kl%@?7lsL4t5Hjh zEe;pTZ85YO*%0YXsDRfR`TJMx-MCG<3$<4P^eMvoS5c(UWm_ZkCUoD4uLCvB`w-dw zIFCyp!mmiFXIDjdC-z(=@N(q-jRO!z{hz#@` z2C9&K2d}GVrFEDIU#=BfChHX3?O0|s$ruwv0*CUQ0;Vx-aCnnW7$^2sxA!?2^t(w& z(o0bo-w7jxiVrS5DH*+|g}uT!G};x#E-r=LtsFcNfAgvfZvsH$x|i1|6>MzZ26dz@ zl`woAc|f@l(@a{UcR@_EVm~I$0_bBn4LKO>{-iyO)8(_20lFi#Da*FvNXJ6nVMJ(3>7q`{b{+k^$5tWQ851__k@R~nfTy%i#j(EL6f zNCH`vEb*@(0sl1%8sti~D6!BK222T6R!1BA#n$V!)5O5#-es;^9|7I-Va+1YTmpeo z8v!h|f90wH>XI7~0%_%Cur4!p3P_H?JPtc3{EDSTF#khped?lGrnM}2@eC5&r;>bt zI;-lGD!R2*RVCx}OXZILTSq!E&$N~zxD23bI-v#P<{qRXGN|<~g1EF4g8(o}AV`dS zBV{Ag=uUs4`dgl17_HW^Y4FR(!{NH3KD#7)h_x+LHGirsUfg4;K-9l#Fnq@|z_H-X z50QX~Hxx?Q^eSs>8AL~X_@$cXTZ^B*VPILFwCAz;__{oZ=+X*FDVEnyG~EA|^1q9_ z8&DN>$}|ql8l7#q5BklO`C?Z+_S@=rJWGLhwUxwf_1RVR*vs5}a94#g;^MqQkg@^6 zFYgEwx^<+8KiJq%>TlLIZKI?hS4N<2j`oFajm36}i|_-i>ur{utxE{#3te(iHkw2! z=M??=XxJAFGOH3guWE~+_`97OcXI}o3WiST{aXozXuFD&Z~JJq3DfKOt>D+S;%0sO zvUwCk;w~$Vr>?gmU+y#E1-dvT%9606t3~~1V|BZsDK4i1-5!iIa(PLe& z9#y@xYG74wEH(yU#R+FyNo8)7Aqs%Jv2fsx48k-taa4xP*qWCXb>2R#Y|AH~$Cq(r zg5edSC(R-r21>s8v+FDNzZgBnLkJt=C}-EU-IB-JSF}4m3pozS!^JvKv#3t zJGYT45gR$G%>tClSZqYl%u`plpfr0nzIHP5>{DS?I2sSWbvzBdxfHNH?CZN#MDXEH zY#B+bFTwAE-0I)Omc1s+tNt=el)Z)@3-A2}`;eQ>rFiWnduN?euBz@VLJb>cWi-Jt zC3fZ(z|371a3T4`m4G7aDR=fpL$+A4-eBbqms5!p;Q6SxF=hOS zvqD{6TGZZv{_^VLAA@0vL&}08M?~W%Zf9bU4?yG7+8;*3?ofu=l_qg{S-`dpDOsJ^ z;9z`s2jcg*Ii@G$v#XzvN=|DemUVwQPNSy_vwCC3`km%&zE6AvM-6gG!UDaL%(isQ4#YDDxtr z=Px1CqYKcX*|+_f-ztL&njVDjq5P%$n9Jr{<2xh5BWOluOc#yOW8CI}%~Zr!F6heK8q`8osl7z=5T!1GO;97l^V4Oant-iXQWVOB+L7&v>iwac2&7u&dK z3YJE#iEx>OO!@7%0(zB-(h_OI$~i!)TELq za;<<)nhcmIjcWBA(+2uU_LdK(fe5oC{j(yP?J+BNN{jaczUH2?(50&tg(>RO!g%<( z4NE@m{(PeR?mMfRO8QgBhTb`!?UNzg4=Za666_hX0tp3u6J-AOIZw{qB3%7~5!3ya z**S-HeHk%9uK|{~1nH8zmJ+HV7!~*WM^G|}AjL^fKAwzFy%^-fHDvOqG!c;!0=&ex zs{1cWvmvpn<% zXk797%fhKz6g%}!ADXTjeHQ+!V*HU)$pgc#r2LAw+vqYe8%rZehVZ1$LSLEHt?H$q zhVvvMWo2nrF=*85_AKekrrEH1mYMYi*7f=08QXz;t7ef^)*{fW8o2}{rG()Jpx|jk9Z-_Gu)md zOQ%ryBI`GE>%6A>Vji@m(I*P&LS%@T@#h}-jQZs=ywj#vlBHBKKOp7rodiW%IQU|@ zze|5Fx~D+x8FQx*l|^pzTwq_XMD&SX`(1`FFUHj>q0Fy(RQQWZnX_5kBI1aQrnfY#IGa`%6PKO+L`i2{ZB47$ORJSj z9TkVRwxLaKFTG7Qn9Xy%id?}*!pX=C z=1u|IX)hTshma=+RmB0p+e!ZS-?L@_55W9x?u;$CIb59q!PDa*ug6RPqEx_o9QLF~9{ z++fda&$+*2Z@RAao`FCnV)nbI3?Q{a=Rz0w581=_Z6}{v_y5f#-L?N;O(K&FH&->O z{9i)bF|ZabZ3QLty|=$V5*)y;WJrZj3i$(}xj!N`CXBiqD8w`-3esaE)Qt+HPUcD? z_?WOzc)7RXFr~&=w3I>JAVj@LAm_4{_X4B6(0vK3=4Osm-aK6$HbD=r6otXjUU~t0 zBkZ-%^QHkvON?j;X0X)01Ymwvh6giREFd*3BLe;2!;IF&`ZHy7_!OZbs?&>quiGbs z@>zRca}oMZJaRTxhd?4-v|9OSl=`<38mm1pwC0S$Kih-?1COE_A0GHKHFCyg(Ib{G z4B=e_N`a`NHBWXIGH+fMqb9|O7?pt#@TwEOb`4lsks*~s>b6$t;`L)+l z{aVHPG$^4mDY&_o4z!A;wQt&dBIRuy{xaauX7Ea|BpdmrjIWN6`Um)eFa}& zq8+1~^Lztv30Yd3@0p#Bn<%|mlSdtvm*k$6r|t|S5^*{gP+?y*R{A++z!fxUzGqJD z@Zpr=v9szJkbAbK^cIJV;tfG-@}5y-;{Kv1qNg^-cm_w&#d?RdV@3=Pe+|-b$7IT9 z@1;-dn7`9g%EJ01**Qbdb7jQ8$m*2t2so40N4--t$SfhIcB&gWedae4=2<+j+u!r6 zX-W>p2`GAx`&gMI=U}D@*G>s_e5GtCdP>ocy>FV<&MG&^@1a4~$5c49lSRwD#LGCQ zcZeTsFK&LrRk#_th5qieA*ES|%1LjQ5SBuaR@0XGjRu?sk4KW&;DVirGS-_ki?zKj zbWeh%utg%W-$XHO^YR9Ywf15>w>am1k zhVQ*~L)1C+TSE??h4Owdig>g|2Crr9Hiz39zM56(rST~W@1u@@*I>@F;bv_&#e-8q zPXxbK5~qA|c<@TO0g~ThL5;OCU;2bA;QT3GA92uO>B24FGbV&T(l5*3KNT*;oObrL z#hFOl0Mnc$>V}TRgr7#5>S{O^7T+UlmW`=k_?^#WIAQATIvh9MnBlvxcBC-H0h6|d zQ4hI#nrIB{LFAf8Pg@qjkw>F3JVwqlH^R0Xj7Swe)Pq$l@>j%Yu4|&7MNo1_JCiL& z9)!fG+BkwSmvWUC*kGyi#W1zlQVXd>456D=JXtpUJ}(XhI5z{r50Xld2dm_r5&PX6 zFuTO}Y(~jh!Zc=etadW1Yw^${(_Y%1y`yti-EdHe4ikzOZ)T_WUwPfJg^6RP& z)=9;Zd+8mxSsEf;Nk!@X`=f&%MTSed$~f)Ms=Nla=!qM7?$JMN({xT{&D!3Cse#XQ zUSG!_=5cBoUn4CNvOV>+3k|{K)!E*+tNb~R&quM(p<<_&){VtYS+bdMYgREr9*svV zf&C7PL#qYOk&eKz@O-MvaYD{DO%T3nAdTKM{bj+g-c6-VwPeRK7lZ)*WAIV5;KSab z@v`dtAME|4O|}My>6!iaw_D1g0{-9>W}C6red>S)yfgNx+&07t_CS}wlkZb_T7US> zL!PiO#k)=PD|*bXdF+2}0yzpOyC~i4gt!-U&y~M*l-pfHS<_MBBG6HeG}aXzcZI5I z(I(FWYGDyIQ{{q_23!4#TSE%tvd49wGh#z!#=VRnazhU6^ zR>F;JXedOu6dI9r(?)xF{PYLZ9fVHPUHioy#UAWkUgrpSKG6*cgtdSviiS3{J4D%y+gGEKe2z_Jv1aft@kIX z&{iLX0n3R1#86)BPUmI3?=F%`Ev*4K|A6%M?DBqiWR=T7$9tz2M8-Gg25!(pS6tAD zuj$+Bcv$u3t_uVo8iY<`Ovc~2tf}&A>4ljd{dGlI041Qu>B1`K>m@(Avc#{|kBC*Y zIW(aDwdP8sTNe~rP*WVg8uaH(zkbb&p>c(s*RHSqsy*wWV~r)c;EIjLszBn8!|&Gv zZBKiw^6mJ}ZZpSLu?-ExEsB_We+p-4-N$dJID0HTSIq5sO?iE^ykdCO!Q}fpS;rkc zC93Ho3+iG*R{H||Lhab|58t7I^A|>j70$)ZuFadDe)W?3sa_zQ(-W@zQh4Z@t2V-6 zQiIuoij|6d6eDa`f8=K)z+ck#7E-nZJ`1XOU}5C(tdR)*R<+z z&hNTN))^E2UdM5n(S25#*Rf!4BDg0mHnDRCgpk``dRXw`gmBOZZ&ZkZlCHCbb-XYN z7{NXJ7a9$B5Lxw~u8u~z5s6irv#p{KbsSr44SBeA#ES67=@F3bXWO3gSrWA}{22un zFlb~SSL|^F2mrKj@gI}|VHQ+CNVaIE?S<`*4@%Y$V9))`LMFR^T zJ2>Za3{2^UGm1vt6GWC{($!fzH=ne-1i2?ZU+a5{) z_Lk0#`~FuLjArrE7I^3OW4?DTfF3t+|D^Jdg6I$Q+ZJwJ>;lC7a^X^&dO(}qrRrf0 z06#`;c8aX~8$WJJEZ8x}fYJS)8_W)$ji@g4A`wU^fC?6ZA(TW^dj~ckEilfVV2{dJ z_xFHX*ZOX<{X6MzwNtDY@;n!bjYCIxl@)2JF-}7D_~5_mTbi@myf`Ox98ZDrpZ9>+ zsef{(!hBme*((2dul8@PQ_RV3=|eW)d|o2KaO9v|zwj$QULGKW2V0Gzi{<$cyc`A>f;DFseT`^ z+fS}*qjvmWY-fY2A6c5vm{Nb4o*OX=k&C8pSRL+;;{h3Sr~*fq9!nzqYymc{!Ty#j z%pHy}<1w_T zKOW_p0{C_GV~p6{v~HWXkr$aqRK5QAWbv^;RJ`iu5Bktw4`)ic>o;Xvmtq9rN#M%S zA*V|x6W#x40mwEOaJ0*jXz zf5p6)zSO(ORK*XBQ$9Oh%WPyyqd@@d|Uw_`L1!6RcBE0yvsP_9UMwfB9Iq zh~X#5Y@ri=f|t>e2wQTZW$tTBkGC6)0&QldR{g=WRwBsqW@rlt$9vTBYmrM7Ujwg9)u zg@aTfA7m``7@)52LRNREPqJjyLAi!ZhZ!v_*6tW{p^k&=(@UcOctHX2P6 z+4aJ7>~#M+H0F-?AVKHM(PffAPkp!#6TordUKm}^`V2r-qD$3+Mp5Jc!)AO?3u+@k zf!cQKSp}5%1dW<~SO|hM-W)#TBA%4c zl66$2nsawr79<5xFV?!Zu+Cf;lEygIc4P60I$-O?%JRl;kf{Uplrn(yC4@)v-H7N% z{hTQ!#3!3bf9~?6{gF&J-oteNITNW56&xnW08j=Gs^Of1x1L=M9dar>{#*-TsxeO8 z3FQSqXF~PwuCDhej98iZ^fEf=o&r-lKm}HDMR2~;@=r%ykFtiuT`k6leM)7>kgc;v zuD15E`3Lr_fR*6xa_W-p+AHeoGlCb;UW7;6j0ObBYoY8g z;=!rwy)Ev?>jI=wK2L8tQX3aT?-JHuSBaqUlXGvQOQVNo=nHlG02M_DPni~^Y#gDM zJ!7ofRs@p18y7-1n&9PSR2uS!w9xxgbypt?H4_V<875}LkFb2Bhl_j9F2O>CrFLtr zf?LC&%(BQ4!-PHS&LJ}i1r>PUtW29bfu9*_OtNWqx&a?a<2V%3o{RuR zl4~I%yQ+5tFt0^rswy4um*ze$Thb(nl$%K!p&L3a`n+t|*DH!X`$qIit*o0IfY-e= z)NoZqeqAY-vyh%pYpl`yscjH1pq`e-&>B0y3Ei4IiqD^x|Jl9%2NR|`a@_?Oxe--K z0Nb^~{EAWjY!#D{GW1+qx({ECi^N!bp9NoQZDsup*Vn?~tqO~c_lU0e!cR*$}YvnLZ2uiLA`du@Rc=2zKF1~X`&OS$LM3O*Es5ntqAtJoPBBZ_4-9V z+V7_^4kk+(_(S)ijCBHv92n&w;a>W$b+ge#AdA8JUaCb2)yLQY{eNoaE-!7kC2+MA z??3?yB`pBEq}@4-qr!yANxf5gFeQi0?m$P~yEkeg0x{uz=Sofb@<&EeGK|%$l=t=c zp<4z(Wk>u75c)nGETjvS3hJ-lMo+pr%0<+VUrN3f6{{)1)xsAnJ@d~TQB~Y?cxNE! z*J`;J`Twx@mO*j7+q!R%;I6^l-3jh)!JWo~y9a{11b2cv1b3IJ%*Uv*b80DI8SX-A1}F12lHxcHfOR#F3;l&IpAp$cvpUeqq!E1xW^QBfE)Tjh2l zY|EI4k{qDsB@WN$EuDDPlVdR5AFo?Ihv?20mdg=-|O5qhaGKt{LO0Fhi-|b@mh=`W;=+wTB z6%G>zBwozs`SjMR=B`qGWO@-C11o=k{7b zy+D}1sp$JiQQZ&QTKie_WVO08{o_dApxlc8ACv!ofXTbo!5x~w0_t>9*Y8-lF07aj4;y2UYDm#J zl8%4Ab)`MEe+TgPWtV3O=<@+8r$evcdAHo1nkb+Cle*8}$&P2Zd(s{*kccyy# zxu|mUf1@k>npFQwx?dHwyaDJY5qKS0nO~iG7oBp4)+=)L{;Irr$ZsO=qUa)YyMz{| zdDPh)8Bmk2c)GeeHg)D*7pPBO;Sb1XFc`guO+H6FpfeQ7rA3+;HXq3KAw{Km%x)}+ z{?-y4UG|4@PG{}{KdLyFQ7(p4*Y?FVCK~!%X4vcJvp%>HQ`aU@`%iuAGv~ruw#Ch= zb4+7MZuqEeiGnNqoa1m^ogopvA&^+NS}SL~-cIsR>}*d^7E+^w>$e@gT}yamcm>Q? z0W%Mq5hd2q+jC}dsAz2|TimvklR~6JZ@_U8m&@;SExtQ#L5eZ+^X$&;B@K3qz4*@) zZd=+C(>Gx$od;2;JBgnIEbaAPci3Si?0(qmGI~8)e{){b-RHJF*|RWU=8>3H2h`e` zD%E(Z4tzpzC~Ijdoo5Du=d{FOau z(#;+E0n;BYciZtjWK^D%)GU9hDZrsrKh0|-bH-!)zr@e)y>E25wEp&_ZjWbh#dX&+ z#5C8e%Q=f}Hhq`+GB0%(z7f?S73^&aQ@E-8vD1W+&gI0x<~_JRnkGKQ;(=nywl=OU zMMbYHc=%;wYmCgHK9V>f#RH-{1pZ$szr2jHe5;KuT_(I19yk2k9zJlRTz_uk*F09N zV8*;C#sCu)T)!JTTD=wJnU7ly;6xWW%C}1xpfZOKtb4soaKA6S*XEX4lQn z%lVTUnuO=MA7?nb!Q+s`u4l+Lxo8rv2xI$cX>c;e`NCz&JH|BfYgrHg-uB5P)H~it zNUjd0)8m(vIsgL-kge+{YEHh}HR$F6n{M%uY}$T_vQ#<>Qa;j&=!_=;zObdmbY+EO zPxfA-XAn1Z>DA+%c$9jL1+C4?pU?_@B<%?r7r{sVLcm8+zahkRY4?+7Tu|Cp2-KmU zyi#D?Wk3$j`1~||2;7X9`gHeW%!oV_i$(082G=6ELeB%5{p|IcZke=b1V}2$YhJn> zboT<#$v?C==Vv;t{FRp-c;bgx3EWcVMAw30Qf>lNIWzBwdmR9Yh!R&8MUd7>*6oe+ z)l+$CYb4P$tI#5=0xmTz*nrPelLpdh%%;KWSNSS&ewh+FuIbk_LB^Q0QiG`>duGn+ zUkD5@aqZ}c5>Qhaap`qpzu{eV<%{Agir&BfF$|Rq$YzjgGnncK6&jqs#D?IrN6yoUCZ-qVgZJ8U<#ibD+NUy_h}!rW z!^8kE9#dgHo?wHq`7l3shi&|5q2Myd?@p5qHvKiIHmvY7=1ynbw9Mzi``*8VdJ~FN zla;ZdmAM;hH8$?X+jQG+s{BeQaL6_Y;t0K(I6^dGCn^0PO3dy@e#ikK#FpR^Z;AB> zwTphWF;r?rOjCq7WYOQW2gd>K@trP0NJ&n*4-kgEVdF#{3wYP?tx7kcARNb%L8dnk zAz)IG)%j?CYV0%fhu88wYR_D7TQto=1WE-iDm28G^;$%BZNiA=&t#fMi*l~~Kv`LU zN-tPI51B+mrb_e4Ly%g{_mn!BD)uT%;@zZZAs|%8civ*3c@*Jo9xb?Q6@D7tkU^L0 z1Ey^S&TLR_K^7mlrYJ5azb)jR*&Z7mI9e3%OI@x0O!mKTqejll5=N+MchYW`OvIg z1Ltrh!l&O`HHxhcey0YEY8s1InbMwOQZJw1s{^X|tJY?fCh+rXLM7d%iXLR-BI!(l zd;U(-=pVaKb*My?Z1+@6+eFUBX20kZ6sh*VM0ikFoI}le(A%d^1;smj^KcCeA*@Ui%`y^~QBMlu*ah4py++T@T6K_vF7O>)X26|s z_-fNB#t|Jqq<%XU_96*j7HSCZ2#enVgG8bC#lbA+P~K{ilbgY4;3*iIj`9HMolsqM z@b((5Ds!R^*;{see!J3zk-NOiD_)fvC>OM~mC|^M@`!MwlDY6(-;2{qss(l!Y}Ft? zD2D!+QI2vr)ZThvM){N5#e;w!TZD*OJb}Da^}bm7`mN9osRzD>)bMS)4%LXj%RDvn z@@WAb9Jt*G^zaw{5f-y${))FKn$es@w041jB#g>)Cm-={A-5l5eV7#BEP|i}KLeR( zQ(Jy^j3R*0E&h}5s?e7q1gTmF?w#6-(U7PeG~`^4Xe790(7Pv4#Wy#jW>5rrg?c(oUU_9|1ZB5-96>w7(z-9^7z}>6BSxixE1VP4~2M2VzRp^$@$iQ)L!P3l$z;ymj zhXPD{c#8S7D9B^(>eP<=Df`jJS`WS(>!`}vLm@kS1kqaWlDMk87wzLB0`SD?IDK=o zU^><|guH4Yq73fR3`P5*a7`5%Vbb}P*yvwnwDg>;G= z8&IE{8_FEVgcE5YPFJ@6v(5N*vQylQ1GE`z)FkqL`@)zLUF6{Z=32-hbTdY^9a}$8 zvm@qFa)SuLxUnuZJ@^X#(l!4=Nw%|kuE(HtbnT4FaA8U{gYu6qtN(@8RPLWN7_y*aL`B=3W;CnyJB$46tRB?MWvYd zM#KY5IyOX5p&>HX|8^EraOGoEVO;S}bL}+it|oc_iS^W5p=VQOGp%nmWW)Q%CZLx0 z8<@_D;Q(zNxM~Ub;3;*FcIkpXsNxMPoIw});q@Tkr6?8!EnT@J9FQ=`CvG)eLImROizzVDm*lRiR@7c4f?3NGa*N7eX;rgX znv2IgSftY@<9R?F0Nt3=K?Oa!^NnylVC8N2Tc6=N)DMerO2~}6Z`v_#x|8nA%ProJ)J$ywE4YJ1OzR^A* zC8u^|9rF=>R{7}Rjon?eHG^3-UfpfVaN<%MHzWZ*mdW)YLUoPOadR_GkE&gNJCX&! zXnNPct$^qs>WPnOi;KJPt?5q5WV!CS;Ic-pK1Yicl_(aSvFO^2Fy*AXb&vT>LXQF3 zAcl_Vg@pyFwU*YrLqa^RB~gBw>r7STlETWkC@R{vu4P;t_UDF)NC^1yo>P%6sW4BK zO=69_2kf6a3Yp04oj63ep&X@<@uxr*5Dw2$io3P~Lcx~b-VJCGX(H^-B`eOT}n>|Q98v;Wy{3gPiw?zaR#p7S&#q;OnX6Y|< zEbqeipW0#FJuPW!P@~A&V5^vJOL*~LBbO1?iv?$$2aV}u`pHi7y%C!}!rK%(4;wuV z0P;{!#kX+dsIyNbiDp;a-d-880pE&jyffnttCJsmWOn@}e-&GL)E3L#Xa4}yhb=P- z=$>3Ry#34MF|iFtTPW(IqRS$ObKy8ZO3SCIHSmaEG-}ps^4A;yM~z9Ud05iBE$as` z?mQ4u{Zjcy3{j4`l{Zov^DK#7xp8GI=XbVL7I+?v=saxB^b=eRUkkAN!L0z%G9}v) z2%s`vGK2%>8R%I^eS!t0P3UX+;lLPmjyd{k7}XLw|7&RxP;yG-bAoI;I#Il6*vv}5)7+G`LlF;f1t?}k z)1WnrHx@y)ikKHW_dRbU`Z5cfb3z}HMG2{#2&7*nCXo1+vwcEal;d6M_^LyYtrQNX zW?~=S-VT!AgHFjwh9(o&onmXDEc=(vUo-lR9gQJUPpZ&SxuL({_b$ymI(A67FHUL7 z58w9pSbDW&*OJ@DP9JgmjOy~7XCj@P*$K3hAm=8wV>!h??hcFtX-|qDDuU#IkgN&Q zzNb5#QW{x6)$OBHD*)n;a23d6{el2V(bHL4SrGU@5YaO(yV`T7--sx1z3;gy5F+NR z%;dKN7q@QJmWB~aA0hzECxwq@39OP^yCryN&VsgJ^;K-K!HnjlA>n;Lrp=s@A_T} zFA8E(O{76g>Z{Ecb3Z6+3;PJ0g(*490{x-`s+cV?R0?a-Tv!X{%*GV!EAfXyRvl+O z3#Wh)>_7%#NB0uq;E60aFy_R(u^{vEDb!J^|2WE*WymAb*0Qva7#OS-Cjhlp>3g15DZg~M@CBHpyYSw#cCxZ+Xt;PUgC zurwVZWl@xsBg2cV44tXssoMe(4sKi^({z&AA(2C0Jf-uYjaXg{!-!j=Ryk3z(t3 z6As}+wPjE>3(`cF-dR!ltKr&AQIk_vHE?lp2DzWv`kZINSlrvoQ~6UM+@}p-vZs7Q zdv8R0?J9KabL?XQDT{`}yRCO^7ZsTit4bCH%JeD@sHl`M6cDHKuJ>UTw_P2!va@}g zd|{WOc>?C_8cqj9a#CdU-mvz74A;mfpxnW@=ple;$p2M#1LBuw$30HZnWVpFrV)>b zcn3HhIq6$(X!rrrZS%{|EzBLu>)71z%`%*~30}dd^&Jx>>!8d93~%CO~s1B?J9rwK#K z&0~JP7NwHD3M)=>;V|bnN)c0-2iDC>Kz(L-h{(x2_HWy&lb~3qWJ;$k%G)iNq^b*d z>4l$+@bvoiRx+A(XvvhfK$c=lpO(Q{Jm)r>pBg1s7SnN|(Y2Tss?H0u^7;x_;&Kj2 zm+IEahLQg7mdf>je}<~On+5op zU+Yz{{}N89Va8DN4Gr4;{#)~5hrX@1JYY*yN1>-EnkjyEJ z=K5~h5*Z4v`O|?Q4r1kXcbTP~jt9fOdCuIW>+f;-m0GHYMF*S{)IKW`6dl@+Kc6cU6)}M{EoPZ&;VgJ*^A6|{sELn`&JJ|8b1fo`xq+^muceSkDUw~< zKfwJ82w4x(R~`vlpPG&@l=AOu+61LnpOy@O_@T>8IGW5*fQ|p_%w)^RgWQaA-uevL zGTJ|{q~!;va6q@ycN)c$4mdP_$3^{V38{SV_x@5r6jnnN zTgze9{tsK^f!4%P0F0H=$$U-*Ud~&i@=lcn79&pUjT`z`^6e)5N)+%?Z6CE4c$E4@ zm{&6s|CQov&*d;y_e*U2a1y>j zc~y0RUBYM{{m;=wU2V9_Y^I5jSRu$L%zj+}n~z@d4H8BPXzKm|&D=laR4gie3L;F( z8*YXmBSrQ`=EMG*%tryhA}as}bFQ#J&M^dykoX?{q8G+=Te?Qke(&c3lT9k?^+u{A z^K$25fg;$BO#-z2X*rqw*-C+oa|GU;q(`B5KK4Y}9yh2}|0F(T%M^rqaPmVYnaiKO z1otAhtpA*=N3pI(l1<)+ENxopn&UtQAK{^ojURgpMM@R6)+4CupWCZe*%iX%)#{yd z0YZ=KJ(QXQfG2iGxoW!sMlb#|j2J~zzqTo43109wi7`=A324NRx0u!9> zk$TZVjQZz3vYTGIVgGMUXmD9*3@h7{O=zdLq#>6;6hH&ovFFjy{d^<*LoRU*TI*nc zaT95iWXtaG&KK5rHNF3Sv$f)XQTchcz3WrFLrvLrlF9EpAx8v$+mw>!17toeSxg*! zWrm+Gi5jy&H(ehe*G~&OZUsbwKFGeG zvu4$u$LNgDsj+16u3!Vu_oPH%h&p7BE%JFoF4s>iPW7>JoSZD5ge9+JaA|D11LPbT zT>yl`Ot?Ml;R00b^N-j+n439FafSw>uh_e`maM(lk7S^(q-#Zmc+k}i+`w*4<9L#* zxt_YcCuBNNXkdITescTVW{jdEK36*5B-^LCj|)T`igOdbv>6@iN}#FTxYIHIT4mP$ zL_O3$ab3gmVnBKdL?R6!W4_#xBWhKTFTyH>f+z%0wlIK<9@6w5zt&F$Xdw%p3~91} zi>4p*b2&h)HJ=+WE_CH+`X>++5zM(2@(L(($ulz0Z9CR7k5TsqTo7fH^6R)MBz03o z06x{xKcCR}KJsrMk9Xdy^7gQ7S*3< z9e<)3_n-CY1i8bg?Kw4KQa#>)yQTA*D@ac{DE~7+`8TN-(;D$lwo@@!*sK@s%pnss zCnYuJ^1LOq9j8IQu|zvSzKlG360r|^VNT=TI%0g~^0B$uwAU)_ zY`=U4#imKxbo)?NU#EgFfRO_x205+OxiVL-;yZ7DY1zOI(7>!S&!Xpp#DQa;%DYQj z{t=ae$aSUc|F{q~Zx_OILazvs!vPyFDB9=nprZPb|Rbq)YKCpyW@=Y9I) z00q8VQe(a#{m*-A7)R5iEeVkFy;5sd2QU)@p3TgIys8qi zLuvgBtlt^oY_G$izCB%$gXIwyPdFgd(+f;56CeMJIJfDIEbyxBDJAt7Lj`UwF}Zr^ zqsz76Tz<_5NM_OnIfIm}?@jXrk&^_cB@u|nb@F3|Lho-Rx_oJu5FtkGifd;6wJ5vc zvgxE76n^~Qgr>?@qSlk7yyLRRTBH z#uEVby0%A{B(2syn7T2mV`=mCb=m86o-?}M=IQ=BRv_XTnM1fLk_1Ig^q{}DrIEt% z7Wcgp9m=A8={KG|j@&C$hM$#>42t5q1A&yTQ%xxvBvh`jtVIiju@=>csBrMEKHpf% zeV&;gy*0{amk2A&+?b5xW-VR!vW_}#vaHNcwXCF1b@`srY1)c*L5rN`pTR0AAB4=~g90;f3NAI*u}=i@H+CQFTQ-cAdF&#ZPe!!VS}Q(4Q)|+~ z_><|}sGnU)QZS0Weq>mEGC^X;Jouh`rlAAAfI154f+-tF$@g^89ZTEbU~I}a#ZhSu zYTS+kc}+FYcKSl{!h##3YyR;JMX`?#AOQ#0bMXUe3~+J`5+EI8!%r3f!*#)fj9x_F zIZS*NILhb;$?jL(YK>%vkK@mM1Y%WqLin|9*}y{sY@oQ6W&;@f9L;%oAxi7eSA}K~ z{V=KBa@iyxQJ|wfZ!F*dsxl7Imoi|W^0U%L7Y-P}jeq%55rZm(e^q_S$3y)*O=(qcE=ko*|@XzfcJTND|48nhMsl4B*@%ep(i4p?6?~eE#zyLgPL)At; z15o!c&~q;WO7K)D?5_o2D-~0Ccm*ghu)c0$_sKE=Uj}`WlSfw}%=OlZG9)38M-cnc zIX+mQ@Hok>LjWiuCUd{yAUnL#{jrGyhMtHQxsaF4m-}}lat8-c`Fxn{s)hjSA_(rg z;m~)GNvMeU&~U(^g_>)}GQo6~5bW(sq1F7iM>uc$lEII>`tPx;r6 z26eQK#UYjC&Yxin?g@bWm@~qwDW9r-**fo4fW5Z5^|0X610qvCO_%D&5f|G`Qn)F7 z66rJn$};9|3gGEwvy~zl|Hf5*5bG(;lKq!kt0NU`3I4Tgz zqDw0D&oSVW5n$|DT6U0_m44$&$lG`EgTv6DR>OHnx4aF?b{WBd0e|$!hxu~boz&Hu zq7E&LsVxQ7LHPD(BPo5ztX2kIspJ)gbWtz^Alc?_$y*;6xzs(Z;m30BKb3DVO17-6 z!TLUP2L(7tRyI^gNaCF;82bNw3XVWUdkaHc)igl6Lvir7u|#_p?YQ0^tJUdF8}+0)wF)j9VJ69iyv6u}J`8{I;q#>kURRDdt9M;I^-_uMek z>RVWLt^3IPU>FI^0itc&@h_CPXls;!Y<;peBna_Ki1b!JTH%~t99KFIX)$UnkqU9V zn9TJhxlBC){qKhdO8Q&RK?P8I)^4j;(Ol_3uI5(_{Ri@H?~4@t$E$hb zs?W~dNGy^jzvI#+u)iG1NM;21AuOOF2zpMyEA>Ns3z&q|E3Klfay9p0WOIAfuF*-u zN%JWg?aN4j=khd0y*D}k9T@r51Su?GyJa13kYBJoV(Y=YdS`z*ywhMMN^kHi)acpd z>ow;OeL6KM?PgAn)#b5sr&ibZQoHvgtb((dZLtLM+Q~2x=(>(A`REa<+^WS(I%>&j zBV!hsw5NPy7c#p+dQ;fBQW8apr+tk3;k@HD$-8Lta6hE$`(dNM)6p@MdWL3XFz$>} zxb3^!=29o}>3$~+m4>0P!$|np&^xt)2wN`|nY_WE@VQn25^;Msy=F|;?Kn^O(>Hc# z3m%P>q(8?B(Q6v=tWihmex}7LrG2U?89B1Ht4xT9n`Q5a-!(vXg{?V*sh&gazpE2T zrlp6ckA7`)b|xh}+wstP7goPj6xK(DjAUu_yn3Tc#P!Fx{-ux^L)s*12!Cp_44)`A zf9hdXlT}hIk-Ac`Kh6NHx?Ui0k3kE*@`IV@Q78x>c_z|-Ak@7tONb5 zBX*Y8@k@Koajn98Kht388Ce>xG%9=?o4(Ld9?{B=9sBgU-R^R}uhuVPR3tb0-d-M2 zzpL2P*TDZL-b`Ta#R|o4ee_~|Dfm%^^Ukk1GOk5nb17AsL z>hsxS)2NDCUmHz@026vUwQ9_abM>swVR?#n4pOWXV{+ej)n3@{_;*&+G@HZ++DBrs zCA`5rYR2WeECI6^sUS$EmiBBsxPztOr4*5Os)oY^^mwAlv>j-gc5o2Gs!0M!C9EPJ z^SoE?S*0l}ea0yg%WX)5s9N9%wkaqTwKdlm5x1>BwpGx8Igjd%2^eUoUFr= z2;5oJ@bOq|j6m|8>Om9ylhHf9QfF<@bC&F(lhjmh9N!P)IDw@Pc4^(EoxESXG_5k| z!sXk^J649Mn0Da5Qw_0_;@phR4@2aXV77uYm{kpmq}w1RwW}5G)hVEsHkM zobusrVcg;F9!~kvrcRDJcH<>>Qx4Ja0I+X@PqQ#54incnsK0Q|rJKq)-2$a!%0tBs zAy>X;S|MVBqR~Wjy5Ud7I*_^bbSgZ445SqYES(`v7RVI6pBow}fgJKDEs>(CUoEH> zGqJwUq&Ai0U__PqsEMw;rflO?`6KXaoYr+MsB;^T|L^rdxha zCJ~#8V%eVhF^4i+oCy!jowbvkVWJ#O4HxDV_Y@fCw7nSnT;(nJHhk*Jy|jEB zX@o{&Q837uy^)am_;M7|V#Mpo!Rtv=c>JwuCLrcSvP+(t&oeCyXXe$frb289<FGB5q?mT?HGy=daYCR-R?si*<= zPP>Y3xyE{*@d}>v#rY+DBF6>(~n&6602mDwI0 zMEG8*)Haq@kI%6V!Fquvij+(&Lb);I^S1vi|clO z?fg+#f_jCNeoU&mma?1K#p98*S&8~waj%Nu)*(mhg&n%Lw}XgiLUS3HlH<@Qnd{WdPYkf}p|_Ei9+`x|9$8 zGdt|S8u0`V2BE+RIky73=8+qEwUr4|sHRtIhBT0(mXFxkQtt}6oQW6e?ac6M{OjBI zs#k9!#W-PPqR5}8l@U9_R=*prOiT?i1Zw<*tV%;e${Nvh#Uc<)qQ8G%q>Rbtt^FeA zWBY^Ad-QzTBTuA;IK@<4993 zr4lFX}*QSBdk{E0EvWnblt(1w78j zO)J0n4x`ve$ZUoocB{_IMkDi-k zD;Q>iH^KGj;^1}Q+EEg{aREJmu#=96tTXe_- ziS>H~XL#h_slkitlG4aWx>EfZvq{4WNP{h()miCcxF0jo zJ$C%ngiF1y%KM!P-7$FmXmPEtKC+E{@rnb40OnPiy^-AW%Q`k>R4<;H)qUqym({^d zkMOYW(?l?6i9)QV3|v+<35G0evj5_ArE6iF(*OQ_*Pn$0xx@!UmP@ynM?D5?^>V>+ z$K*O3&t8v2+5tQLJw~dU@#~V)J@k9rAgN!KY3THG1cOk11zX4b;yOyZ3Bkf3i{k;| zebmpwr(FF{R}gV>l!n#P~4Ktp4s?g)g z%{*v2>nKxbhCgZs&->!2qwpYS-<}r-Fz{6G>aoxvrrz|f8M4Gnnatq=RzNrV_cX|) zwWImkHkUb-R!Hb_;tes(gX?heOpSGG6RDwZP^v0@#D^hIS8M79Un>w=j)hGZgx!K63*0U8-M zxV<-7h3F?#KG^IKXAme&5$}-1f_=HUW3*I>G4h)#6YG(M0*Z5~0aexs zz<6z#fW`8+PoVm=?zna53bQZ6X3H#Mw~iIuIc*n9!I)Sz!k~ku`Uo^m{cpG~*)p}V z_`d*V=|m9Pe1p#1@r>T=(&|E%tFHwuPL3&u`{h0@BjDPV;k%ydt9o3W=~PfxDZ@Im z+y)*bR%+g1Qe0a7^2=?;xgiF48QF}@aMJS`E~jnD(8$H%;2~1%>bo!>#zLUV^ zRn-e0-N`5>S5ds^t(uReYBv}J40}PiOKUYdmy3sMvXYb+G)EXh)JM&M5#!b#yNp6t83ce616P*4mlb``BbLiRCA+(kLdF*?9 z@0f+YHfh}{sp=ks{%+l~5p=}7(a%(mZ714h3uL!>{W!r6^6&wx)RYHFzbgun19|+V zbz~)ekva4sO0tWhj5>LVjkqEh?ppEd1jB`R*RWDHowO1jFl$&dl^FaZw@$q0I;jRR zzHv%heqJ<^Dt4(Zi2|{&DK4bjAbE;Sqk)k>>GvG7u`z}uo~CwSJ>oe+IUr zkbSH|)Evu}>B3*?IGrefg8-L(iEteLjx?daTl6xSg7wuFXIjJWt%F{6L(etC0_teZ zKmJVC@6^6*Qes8s19dcHk_C1!Z58f?a=}TBpE4$WRX8dh(6YrUbVqUm+f3BKE^Mg5 zmqLyg*&L$kl`=C`jWT_)LN6>%LWTT4K5PxMHmq+VhMcAB#kSS{=(O~qk#jkasq};x z=*L&Pqq8l5J!y6L-hKQa?@QbLCl9(EDC_s$Cs7}^!nDTcIZ<4g2|ht#0KGSq;{VY7 zXJToBVZwD6f*w63lUGh^(K%{;-!1g`V}CJ(DBgnSss@q2DGY`JktA>D8MKVaS%(Zv zCV4MFm76J1fRJnq+MaH_C5)8lidEvn~I zAbw5VKBoV*J?J|~O^FTb;n5q~7|0}FBH|5jKj?@EMwuKK6k=fD>rG)3!b}#*NntXt z+TbMKfC3~bF6x}f$Tz3332+J>U@^jlfizk_@plf)Rh2fP16@g+zPO{b!ik`Vdc=1B zE=!2w;y6S=CR_XyAXpBx1DjM(<&bJ0?EMAhe7i0}zJqpgP=xULAU@}`Lt4XD94Rh4 zDDBpT1~u?Os(;oadWt4Pf?6NA@`P!a=w;pD@S_EDV{Shc22J*&aj*ox+afkbGF-y1 z3bMGZGQltIiQ`IX0PYZr+peZdVqb1|!mA!vh6OavnD5P<*~6(h@XJ9cnueF|p$`$< zA4tu=x0ZoMnn@63-)GBc@_C>7h=4NjL{z(I0qL&XGg!Q}3k$2gCtp4rF5cE~k{YHw zNx#IJe|@2>fj$^~l*E^NFhlkOCyce>!bj^=sM4xk#m6bseaR&*SuVJ#g zvcp9ePsHt+CY_8RyQB~o z{Shh2{VeHlyLOoQLQS(Y%NECUy69YU@jd9g!=1gSfT(2M^4jeh;cI&i%8!<7ds0ja zd1vuQN{y3JAiw=Ji*%zeNnO9}*akECVq1ezf{m_aa(3!zP<-o1ik;)!UY2xWjVT_x z6(%P5t5QIW4_Ne*pD(%^q@k4s-#(b4z`t1lObU|xBfIDL&SJN_cnAJc`@;$20Tg&h zuU96)C&Ri3*IMO$HL95?^C{`f-CzWw&q>+Y1ocxYIeWxRKYjVH%!3H?vFL z#k31!md{-gvFa`1TBK86nw&;N`*2YUWAv-)l1gtTu_~5C^W9j8ms-xncdz;F$t$vp z)x#5a=bbfmaTMC!pmc}R@3$CGr5u~D3M3m{z@q5j#OUK^4DJtjHl??8Ee&4Vf61ag zf+NvXv=v2ZFuPNINhCgA#Y-}tEPLAM3;&g-;6iwk1qB9O3e#z}J-O-i8hiTZ`_%bh zwqpoW0bM%VPB{H9-vW-d6^VI*NpVprcrKPYXp9TAf$ftzkG=b@|-X64o!j z-+>#VY0f4(?~sPs`YSaHj;`V>N+#qu8<$df>cr^>@B7XWc=#AnF!1x2ugqi#U4?`p zxg*H8NOaL4!Ij1H_g?DA=$W{x5aM!OmJButNXe`pt;6|)$ZF`|^YnUeXC>7Nh7%nU zt^J7;I+_M!o1-f3bFzjEW{?(C3w(lJ`6aVI>a2K<^`Q%EHkT-b?>mGa z!<@!jW1v>aCNgMy<{EqX-4cr2g_V9Hp@Ch{(154lRvU*_cm2GA=8u2n%t8&M+Fl7+ zqLra}CE-D*AM3QB zH^+mVRrOCN?a*4gXT6eh#2aC_`-Gh^z2M38SXlxMY9~LpNv3Va=;MB+rl}f^KW{RG zqGPmjsDNWVnEn1s3jS+sv2Ze-Wfn%=v@)Iw|6FMukHf}CaK^>04GuMNn!@A`8J7b# zO}Op^iXN#icOl9&>t9z(NBGY;aryYc8B0Xw&eF9}{M&+(k1tw4i7@?0fp|!H_klU*s?8C$%9+G6O zy*Ag<+4bP26!aAVxrArz+svEd)9(%m74Lwat69DG*M9o71l}SCVV9vhc!1YNjAJv< z@uc5l3=|aR&;Ar7D%@VV{w`hq8)W-kw+FY^=Q75C_3{_L4 zp8abO`Kbdw_UkL5_PoErt_m6apVvi#xn{amCGiCv>4EJ7$?%Yom_n+r3&pBI!$j~0 z-1D#XV$kNnd@@eDPY+naLW-`Z>hjU0M{GIRC}bN2jipvG^d9 z4D7JD9ICEPKu^yOUOA#@*P$2cgzU(WIogYNq_YJDr$b#Reu;DHn6tA&!V5Z?ZDNHz zEuO(eud{ZezZM@UyjYe5$tu(zF;}`4SK}BM+Rd;u6BFh5e4KLVO}FV+BGb+2%JAa9 zJZye1xF;c9$xEix7_<25C(5NnU@47hXvg72vh~+uzd*PRl`KW z`z%?SCaj~(+=S;5xwjHm*go9M@50xP2c6t+7u=cEEwM#lDeshsewRs4Zhc05fU8h& z1Ik?1Q6i^B2nl$ICS^ojWPq3K4#{+JC8y7a?>_mzIHPuUXNCl#Lox>|nsuaOMjm2c{_TBVtaas=emJdKg24{xod1b1!|> z@a@ac^^;YQwMzZ*ht&+6Us9znL+)RWHwhuJWZCVFBhb}K3;66lX_zFhw|_15N_wSN z(2q-oNoO`GYr05ptD zSP@p&QesuU@K=Z9B;9m8neO;Zume@7eN`L<6Y=DT+o;{=h4%!0Z_{Gm#c7%OiO+=={g7g!x4e0%qfLTY-XG_jI~KyM$?U%%D=PVLTU5p* zUTZoyr$JMxxVrwdAD&^>C9bk9dq^$LN#!_s`#5c2xBeCwG6pn%Eq_`r>{`8}z_Xx1i zA+7>`i{wQHYL$IqChK{B2E)s{)JH%;!fC0}gYm=Uj0q|33=JT0ylHNQYskRssZxDc z8rVO*e@X>8*GwYzzMK@OU`ow3>)@>oC+XytAWNAKu&|;418tJ7Dhv)duqp^angS#% zh41}h>rcBsz)a2|1Pbb#G(4u<0)1%rnG*|e)dG7<8P z0IXGCKau&j%_dG!fkrb%ZBTaVzZwa*K{D};R^$fAwf;IDn9>Om)<4%kCkcrqdO`YU z#b!~sWyf|AAMWlgrEC85a>`xl9~1YjU-?%<^Y|bTa?Sv#^V%YT;H+;UUH|hbd_9gC-U6*QI+*<2lAVi(6@B;{)u@T+Q{&-3pA@ zU*C^^tnC@2Mo0AB9c)d8^`r(AX$eR${`RpWzTKLD8#J^ym8ePol8OK9po2Q6t)}aC z@$-1kP*iQm$|)UDZE}~yvQ`xM#*EG~Gr|{aEOS5kE&dA=jFjc&rWPWRlT|TNjo?jJ zm*vwElT)gyUz2xbJW|CmFa2#O+@9>VkGnN2R_#*95(3j+3kVU)x|>GrdAA%r7`3RP ze_Hrj>B6t$9XBO@pf%xsDApK6YXGYqPNgk6p&tR4ZM6%Vx=__voWuq{s$1a^_0ZVy z;U9_bB8R1^Em~V6I(*qVuckizlDhL?=4NT6N4@)?YTD+*h1&Y&$uZ8YF`A>r<`k6v zWT|GHOA_2fLm!V;=-cijjHED1t=HDW;3`6|#(Q%W+tu|tFeAKg;;xR=6n-4;%h9{K zt@3^No^#f;QwLLLHnLeO=fBPFcYS)UNgnGVGxKM1T8@;p;pvx@@8Q${zMvkdw7;VMbuH~lX?E-5U z)K3G3r3GMp{&>`oeKBDn!YwPbl5=fk*}J2Zirn zl_rC4b}|#c-C!w$&45Bp4I={QL|WA67dQU|l0cURE_lU|w{q5ztV!q4c@+4O39lPT z>Z4wH#N+y{Lu%0>{~z|=Yi~E=aOm3~6h#ysVk;V>pSLTw#sJYuRzm(@`&5qU)}oqG zJ;J~&dB>MJ{JI{23U%W|TcEYLzvE}=OksDtE9pcyLr8xZ1W4I*-Oa2QowFx zd|dfma#FB-pIh2Ig#T0QlWAcAZEY71&A2zas=IMH+<-O7hEU?%?ZsL1PS8s9F04!p z=M9O7X*AzRmX;cmL3{uC`f$(Pl26qka?h@Psg9UdB?(P~S!GlAP zpuyc8g1ftGf&}*raGHEsYkh0g-c{$t0n!$p%q2KKURaSR6S^U6zrpHd?zjRY;ElOpz z$RjDGaEDu*U%V537nNf9$gd)q9sJNHM_n)PoQg4sJlQwlxQuotVEyST)Y0U@jxp(t zEIwit&@1p&9zW*sX%M(lFsp3-a7?(T)YjHCv)nV$;()giPG6pVTGXjb0R0{!y@nuGChi}Cdd_@;-YxszQHq41`WNAL67)5a?q=XVNVY{(s)O>2&_y?te`AKe)Dqo^!2DawX6ycM9!EBzG zueLr#5+4?UgrXi6YNi?7nhIVKrB2GrAM?adSG?|ZJaeP2C8?2S2Zwcf+7*c0KUDXbPOQsy#Umwz8t&In6<3x0Yh7aXIVr z$L5s>C$&g;zlyP-(rv!qvzLf9v5G0T87Dg*e$6ZAzFzGVshuJS+*4)KU-n>`567(P zlc2S(o!rT_{b<1*l&?n>?poc+u3CsuO+`)Ik;oxLgAqo@l;34R6TSQ~fWNqrE2{=- zI6tv?B^KY32zW3I4ouv%oXfQn$V9<#7OC?4Fbwos+53<6?DX1C*MxZSSxVYYDq%w> z!LhA|t&G>42W6ZYxE_kvIfPh`h$SMg-Ld0OZzXV^m7}zwTvF6%O@Ukax{PVQBsg`9 zcoI^EsnzvlEBHv_2Y$6#d{Xe$ogNfRw<9>Hca|w2VyuRB@pNm@Qd4GJx=W{_yBM!A zS^RwT;f=9;e-N!>DD`J$Nvw?HU7irgLk0LMX6>9P-;Wbflvv8ix;;zh$W(CF=4@#O zDjE>jfodaGbf2>GL?D+EgaJy? z_7+jSSI6aTlxfg)-g|mYz*lUUDs35|ANfym0lt;=y`~tFR%}W3d+NSC=U$_~Y|xAj(mZh&SEH1*qV4AOBFx?vT8 z={{B6ie{o#bGam^F~@T0;7QO}(>EgxP#W>YqCkLiJLzKo>YOSf)P+HIsE8GOThhMNs_PyO>{0-n4V~WZPV50Qk*ZXjpw4KHsn*fuqt<1e6K0hV4L49yT6*B zY0`d~H+4JNSS=>xbBFJHJJ8}g#{Tg`1z_dN1*Dxe$pErt4isZkf+2DU1Edh3+H3ob zuUlGkl|~AH-wL^=-gcjpT0E2fY=xjbi7gz4% z!j}Ergo8cNPmutU`yu2hUF85$uJ@uZkvr!StQ6dq%nv2U`kF3IM9&E46 zD|`A@Ezi|l4>u*~2{w7lOUx+^R;*dUv*6PZ+H5IKuJ&5fyXH&Da0e;AmIki^;icJD z`r+Q&AJ|X|Hbv^!`ekE^s5eh#!X1k%Sylk72POm*RM)XilEfU)-1w!oE=YQCg2jtG zC2vEYpV`LjKGvZ;(bD_aMpU)}qR}esx%EkXDo*fCoK5EiML5}7x)^})OzTLu#M4CI zo1d5#g&bgMCLjpL5(vycO=J%Z-de(wX|hwDr-n}7-jJpCIvetMjBruJ98c$F;wu8U1dP!ZvQo|qA*wm zt=JpYzK^fSPu!Bo4G9viKH>afvVC*Eg67xk=ER(AP98;b5(a=?)?L|ZfZfLncN_jm zx}(hyqV#v+Xw+*fW{q#ZSOVpzaeYg|Q>FR&`kr&2B0#-K ztI!=QuZdz>Iu^OI7L}WA?>U&*yz2icO$rTl`1bQJ){Ix-3_&Y%-DjX7lkOdsA>E#} z3$R0sKnbhB(-4YlNvF09Lw1rt4a)jk<{16Yx_9zL9VOCbvT08rB~;vZ`jJ|w9iq9R z%D1v+Yl_ilqXtoTHZ=7Iqt>T6r*mDzZ>I-H;h{Fg?_D)Ky!k<$$1k(n1A-xkE zypdTc$CCE?HI7ts=n3X~8{fpOK4N+URg%|w`TbNzyVUMyh-zVNLk5wB9A{RD-Zviz z@e2JuJPC@D5MGF?*gQGaRoXR9stv7??yNmty zzTFjXPK*g(X?8yp;_yip3YyJFP0jbA&(tcy`&=l1A46zX zxz)fFZI>`^MTPfLcNLWf9uXc7ugPj}T$J=)mvwtXbo|{-Lfft7Qq(r~_s{*ub zE$+(O#R6s885Bs*Jz2f5ylOV_AEssvlGIl{>hh z@vmlphQQ@pS&b($@wNoFB~;03!czZA7<5bG`6+v*OcG~(i@*(B4t z$YTSyX(C3Gf&fxJcgJL1cRND;e0unPm2Cbr9pr-c_CB#&s(e(w8d`X}_Z{l*LfR!X z=^jp8tCw;{-=~qX_EAkwyhCB%Aqc|aPzyxWCr1+qoc$DJs{7H?M4nB~w@%VH=gu4~ z6NFgWU5R5Q&&U(3A*!@wJ7BewHjdB38G-p-v{Q|!)+>oq(=#D@Nx?*Y2r(*51&+Z) z{h)w-uo)SHB{WF6SkcPT4OJfMyUT82&;_MyU<7a@39`p6nZz0rp9B%IqS)?6wLEx9fjIP3`nzFCRB6 zN=cjRn2n!%LVlNbnFySi(F~ftYakbK)0-*nvu!@8(e11*tP*n=QQGmRZoIS+KFtn! zo%pRUk%Y;K8c(y>9V%Fvk!h0rcL{zH#yP}Sk0~eLVNbN&knrWYvy=av$&vQpLZg>t zA!8)oSK1LM>v!~$P8gYy@z^HS5jp0kIZ>9+`0d{9rJNGq41VCgp6bFLL1X#E!WtZ`8d*J zsarE6`}>K3?lwNeWp6fO@tcQR|M;^Xr)^K4ihh=ngyt?`Y?53&pbM%@PwdLx-GQ%e zT2$-WG`wnMjYGiiqCivRS!CH9UMkDcvUw@mVP1TXC(ux2n3^Wtq0JGMCFYvG`LXp8 z6W4-LJuIPY%1-e(Ryr>kaqZt81aEgwe0@G!8_&mZqbcWmdwF?3#UEoUHW1f<&E*y_}ybGHx5^W92>y#e)@*XHdZJyU-*y5aM%)R3H_6 zVpPTK>R^3^VhON;n)sL^9CKlgi?nwOHUg4W+%KR}+g+l+L-v*7esd%5k-RPO?9F^* zT)rJ{@&FJ+OFoc%;w$U;ax!nDi|Oq5jVquY7GwxBA^Kx#5#50VZhQ^49O)$>)hG!p zUi$^m+kW7sMFzq*z41S!UjXnL`O}7k3IJ=rPHZ4@01*{@R3EEw2xxT`i_?(x=U`6@ zTw5)P%^8NRK?6+jUm=jdU8*IEVHLnw5DZ!V9u>$W!j>@-Bcy(KWVlJg(k_<9}cFLOqE76~yWnc`?JgY5U407~r^>Ma0lyfar!QP{;pH zhCFX51_e-aLqomg(C9!WTL`WcwfFR0m)3+{FoCnMN^hwMfr_en6V6`~vi`PP5lIuF zm~{i%KLtoMqY8HoaXh;auoGh52n!h6^Iz(>vxY3u5p6vUUX}vJ$1Mz$2n0L??u>Z(3 z@8xxQ+Hyg5JWM%}{?GRKA%6@LV2{JDbc_N>85#$m+|PEGzrB;@F)RSMk!5LBa3z&s zIR7kp03k=0owT#}M;w2rNVYW|SJ%G>a}WW02v{)j@5GXyuEOa+ofvDsf>yI2!@u?V zqZXli9M>O_>Nz#opuqS%=iYxup9;6AmfGzi{$>{W`hMxYO#)U`Ok`q^_>f94a{u)q z40J)ktxyDXZV{J!0%l|ay3b!K&+#Ye@J@FU8-&&fOII}2>=2WULqO!&gPD7*Ig+lj z16+`*HNu)SsJAa#oCOLVdxVIF1XnfM^hX?s+{rrLJ?e<@8&LB_50~O6^rU@*z)kZq z;(gaHQHBO1AejZz1TZ5)nM97wX%;7+NY>n6ZGkHh;=N%2*`AWrEp@;2TimDQ z-C%$wT6Ag@8to?!My!f)I{ctsJY%b6Bwd+{+q7mEeW(!4T)cCJEemg2kb3kSruo#D zcDxg{C@)56DfjjR;l970T|=M(29=w+NAxh3WK}zV3F_OSx+%{pY$nk%1&R$kwWEh) zhlnbsN(FrdD&yRU-EGV&AcbncqLUMdq* z{?_oPPpTYs{0dD>4F69Bxb@&uzzN07Ug2p`!91+{*(Ah#w_A^p;~vvb9yX$qQv#rW zjM%DOcVoSkF@ufU)SvI(3aq8>QVfC>YT^spJ>|-R-T>ucXVUgA8*{7{_L~*Wj5nMs z`}L)0KjG^E(_8pPyOcx-FKSf}xA*>VF?(6E2iZS%J&?XHx;7_D)1V{G!XrS#%0qA% zQ9$^FkH@ZEW;d%SI z&R0yCFI2DQr;+AwRs-3H%S;x$biy%pPaUIfNI%8M{7fPAX z+Zzcf6v=>~5wD*@)HkbLZ3f;`Vn}Hx+?rT@_GD7;Vlwb6pKj-d%pz`3(C%kV|4gnz zW{i!G{kDqlA^gMvgXOy8gu}v7Mj;hCNf7b~;VUD$6bA6AT5SqsTRxk$pD1bw^WuS7 zeFShKnFClLjaANv`m6rufTcm2L>kX&HK#dQfPXtuOiRQHY#PwDQo%Rmm`=1ALdDOH zHRpv7x3e-=_S_HpIWmi5STpiz_Te+KJI-cMA*h*zvDQc>On1gi*H0tF?CK=2c1+GUX;%yqO{YpBCEy2H>4z3 z$u}sUYw~ARyDZHru~-%Boqbc)vUTyuUN{^%*=3mAZqAdya~6aP!H)h6Z&^PDj+FnT z=U&FvJoKcUeq2DtcO_#tu~q?;JiLBG#cn;-Y!s9rtUvswgzB8C%mU#xTL7SjYl4pt z-kHOwBVw*I>w}`PE(Ox!&Civy)C^OdDtA)p_nS679qFm9={(IMfbGNUz@)PoklMrV z5-FPGH}`M9Th9?OP8T+1fPmo8uXX%t+W{}O!LDk-K_qy*dOdRB7T+oeB`RHamI0vv zACq+W`U^hAYej+axJ1&oE9nZwgx1>7Y{|Z&RrTZ>BG}?B!4U6;xpt8}u}HMzuFK{n z9Up2^Ts5MI*fPzTw!@72z3Lo#m9lB;v#7+-UcfizhA+2dS_Z25tKfxG_9gA;1p|R5 zHc#smV0(hK@^O?@cl04ppuIoQMsv$-uf4os?5gksK zI^3Rvch1rk5r_UVY?IXytVv|?*K>NoPUV@Vm|D}m^QT;Ow8Ood8K;VQu?L$03dOTX z?ecQup=N5bkYAXdvNyC^Mc4@h6*nI7F9X@2m?y8&R=9}PacY$XN{*^Nt-`(=L%Jq) zlg8OhV%0#&z2+T9jTVLh@lYA3z~c;Y;Bhl(9qu(a90feH!usK++COX+=-${;;a>fa zwA&Co&n!K@2|dS98?ZfXX1&k8%!$^t-!s2Tw5+G$pLa}dU3dozkk5gAonoad=@w+x;efQovsAW*WQP^@NN)80<-QlLt1|e+J7X14+6^2?Y|5`8+vjjL0K=E zC|?+$>J@uXRuVHZZ{hxW10*C5P{lTjBAEVLtQ-jnkqDjx!Mg(3W*I*n<|(A56$t{dF5 z(fi{zJaXJj$dnqsKYvhTw(J9Dkwx%!V|iw*k7EVosK!M%ZqCk>g_V!x@VV=LM1nHv zow5Cuf`4s^r&?0P*(6|`Ehv{AwP|O3AwGW6@%O7~5}j3k#OPmDU62@3tH+_f-+ueB z^G|D7M&<;N}I7?-iP3* zHUP@VnlE$HGgMmna9;-|`t2ZBTI65A^Y(Wo%3W?Ho`G@qiM1rT z7IYrnDy?0z$>DqX&wdQFm4%b44k5mq{L!I4YWnQzDI3^)I`JPaAQznOz;E|YR75wW zM&J@H1iNHJ6p%d9!OymR;Mq=B?a1at;SU9>BH6n~i}lmX`S^z=R!ej0w^QhPAcR@v z%#-F6E!v-IGg~?wKJ-trXxGQc(lxi3`_G?Y~XQDC}1vfmG_C!>oG@qXKp7! zqAxUETjN`l<8%k#=WPUD`m)I;3^~z?)li9C;wl*5{UP*7X zZ*+4&lOcLJM&t(!PHHXQ*jm#9)jGry2c&1KrdlQi;8q$a@6p&~GA*4@);a*tpjT83 zLh>O+yU0$?Z-V2_UKDMJq3O)~vl*a~mtuZlu;lW336=I0^pVkZI0TXb&Zj5q3?4}$wejU&ahCSBvCdmOFd08>ea%tc_bJIyW}!uR zrBkHN5`2OHg{EnuM$1*#thQF78Zx`=cy(q>^$XJdp&~;qPUcLKIT0;d}sfpp)VShAWKb?r;@5fB^ zyd=O$P4S)=$q&ii%SgTg58Qotr~8yreT>=0Nhe%R2MUh8Jw?q|ZhyM>t$|QE?0#Z= zX1(Ee-Z4{-eqSjqMq-IOIRi~UB6;~g2`*xemluOR+sE1Gr@^dxPj4!(~g_{D9md=gVZ1pmG$Z0G=5i%Sh7Mkt-o=C*#BtP9~^7L_lI;*RW{tXmp*hq2p zPEKB(n>t-qcP821q~J5Ga2c#8^v(eICxx|{&jEvLId@|%ywo`Gp~Hks@Y5j9&n1y7 zDPDq*%5R-W;dQMPGt2j*kB1Hy#+f?vxYl5AA3L?m;^cgLAKud+^+L2zoxA;wtDa>F zB}Z+jI19Q%7mjZVC%#mIiw?2YtS zg6XD7KC4I)iYniY4#`rNQ>PH7s4$HV?;lLg?5MuB8sS;c8P)%ItwS^5N-MjL(zi~M zoci?(l{X#1HbypyR$!N+G&+1-rxgOFQAbTDL5waXU18ssXIXtA)JUR-ryqcq*@&=L zxtUCV!OJF;Kk(8SmaG20f;sN}y@@XN{{7RwH02BN7%%^k8|R>#N7c06r&p4lFgi%x zd0jJ!k;leYv~A=-{Gjo%Vs>AyrgJGpHb40q(Be&CQcW0cG;w{AuC_mMLF+0>0Dqt+#~v=CI#@t0E}^A>rWMM z?s?*?kYn}97-LR^vGg-OJ&X#k4SFV-N>QmlZ&0*(ae)?zVQ%L=6(QHI$nPvfVQoZZ zvqo$D)+3$mS&Aq>kSf4p)>vuJSkKEV)FweKqq?{D*-kc z$unYzU_#89sehYmy!scNgdGAT<*E&!2PUBNrea zSXEqFB=)=%C2i8Bc9kryi~;5GdP)SFG~8xUkdReILX`{>#?<~EMdW5Ldhqf4gazpN z+b!rmmY$M7=PC@YDQc(>h^ZWUSJpoZx}MhKYb?`2Qy{B>Ur_}3#x^4En9J7=!pmv5IPXS z705un3Crs6BN!lh!6@w6B*0=H>s6_Eg~T4V=!W7B`Q|JDcNcC3{Al8C^?n<=?|~ z&|zI^>__Z~>7jYOVhKGIg}iO;E|iXQnHDdP|0uo6YDc*%&DyhFJk)z;b3L$rPT+Jg zy*BPq8=`bVNSgX|fd6$cg2(FuZ(uKJ$-;jl%iz%C8(1e6ET|a+T!P%FF4GT((*VHY z_q=6yFVe|b30if&88d|VHt&hoDSUvSJX;iF{Z$Y-kn`C2+N z#_c#q!#E2=5+f9lGf$pqIX~b1_H~sU-cJ+CzI8vhfFj886AJU<^y_jHp#S0$7rB@A zO(}T1jePUu_69Iq2inNN)GtyfJJMgvB&bV~_zJqm#M`od#fBQ&VfG_9Z2O#%Hb{xg zTrcR^D$qy8Uh-a3m=BmU;_yX)`s{FZyXp=3MZ)R94@+uvd>ucUxwKF67u)x$3xOA8oGX(${(|pMF%)ye9yN-EIambm(Jc7#s$VKxi zLLO(RmC8 z$qDekMhBPxYtl#4IV;zy=1aDI#pHpw;g+9ZQ@f4+;v%Pab-Z?{PztJiz7cXSOL3ca z-fW8O%XuDGpF=oO0|px{deRKX>oU2>-ajLo2D@$C&$Tz}h@en-zL(RDWAVIf@$ljm z{4$1*q8q!cE}o`%Ja*QMV=(NHY(5zIz9>>QlG;RtzuEv#W>3G6B3M0<^5)L|de+zq z61yt*ONHyYhfkiLjXKiz@#ij7p?=bX+LU?wwx^V>|4@ILW_R>pSi z%D~q*#zz~Q=TV_0M-*~wg!>sPeXktdzb5~x)x>Q`w)-BTmM^A)R)ZH1V=E3|Rr4&8 zc&OoztDD=C$JyTqzcpPTGP=^g7a($|8aYqXe#oom%h<8|gdg2-^C7`9X6f-pqiH93 zNzc4B#9>7W8n_VL&8dpS!sFCP9GYQ8$vbNv8kFXB_@%-3g~ey1BHYW~mftV(loK_X zbaydiTtofKP3DxB?+~l#RZvjfxrJ{QLp57QZ%oY!($(vW6c>KUr z7FhN#vY_r*JmH5am!P!VD7$e5p(6Q!MK`S&)?TCn#M0)?LHY8Zx1+-=yAs!)@)axf zV(CCHm7%!NE&(RcRJV(p1vm3~TuYsY`KV)+9&@d-K}4mMKw=h|BGS_C%O;r#q6aN1 zmlxZjE~sb@l6EB#`MlPGS~?vWpTcHWd0L7T&Ph53=*^$fzAUTyGOaV2RdcyrTQC80 zG{R+3)NQ}}+1ZuJ&Ls}e*UKoxUga{tcv4kW5vON|JohIXgvr7@WkrzT_xRgH4U2kB z;%GT*1TFDS5R%K8O7^{K?<2g%B1J&Ov_ZrEsMtHL`db#!PrBM=n3<*+D+|?Rx|+FB zM_$zj-)jra!&(1f39}EE%hkqD%z7`E9_XSTm`fqxDzB%FyzgX>ppTNG+5OfZ)>9z) zb8-{Rp{ZS-LpMW@o&+sF9aD<~G3FU2GR*rXvMS1MFa94qbwFi*3~Ak0Esh@$mRG?m zCzMJUYI$Rkajyvn%=WJ)BU;+30#9b?e1{tjRjN`1WTX~AA5C8N6XZLJU_;{CupyMz zj~f=XE#IY&3ka^X`7@z!HsXEJ;fWlz!bc1_ttx|W%nEQ`|JpI~MZw1c?#b9rSC=qP_;{HX4&s_`9Y$?qt3|JOh%8WlQ+?r!mhe=9EOHY}Y z#ZIIWlMXb^yta?ISR#;sQfgvw;)$^H+T(Z3XLek}f^E#E64fSarZa89{H~6u(U>hs z*Z(crMv7JD*qNt-O72rdaWD8MoHl)ljA2_NVU0=Cd(cl&pxTy$DzVc>UKyjxCn|k7 z@nFu~Gn`Gb=iD*$wHoNU7feoYt~^qpPrnoFU-xpO#Hm4`>-ObHPPp>*7GcG}?jJ>S z$O`9!i#YNi?e1ym&s_;f|17pAqz7MmHEt|lBdaZe$DT%&e+Urg0g1+RftPz89|K;f zeXbhb6i9*b_VH-Am0D7$P8uF5(lHmjEDtLAcACiG)PSS8wdeA_ z<^I*;Jq=J^@5sL*Uu4F@zt6P6Wj|J^qgviPUIIBa(asH;k=Hbn`_S%UR898!R!iK@ z2Ti8bOVI9f=W%;G4FiIQmZ6p#dLMhQyYXssgi@x`Jc8jOD_$BcMnPIb(GX2Oe=Ydo zjk>lpvl(XUtaud=Pei~^@6LUpMyU}k(*oa{tHCG4UMn#*V%;02X}oo|{8`hzM&kA> zE81J95NyyjsP`40^cwfKLKV`Jq2B#KJ1P>@r3v~9(A;&hT@S`I)px&5ode)Gv5RC| z1*BgOog`}F(PzLE2%gp`IOZOLVQ#28(gsZ5K2K2}CBgSL7ycyQF;1>2`E?+er4}8; zO|cQaqK*r)t4)Ky7q~*NJ+Cy6#S@ zASa-y<}l`D%MoA>5Bb>E1EW*BY|xZ^g`3?5=(GmGQfon1*_Upm&(PS@yig&ac{9VR zD(2#VfA$i$3}4|hIcyUAVs;XRniapW{$b*Jw3!d8x@}8VTY+p#*Y2-*3|ooV){$Kf zOPVOJp!lmFF?nsL*XW^2Aw5Z&#vAIQREe6n;Y~4lH#eJtn)GqRH3PBqUkO*DjDVRH zub?K8Icc{ApJ!6GJy=YF0H*q%oo`nw`GfqL?l~Mirw_<&l~=en2x%UiiB!9J(X9P`I*S0Hn7{NzC5^YYhxa`}#VTELojRWaZtzk=s{! z%f!Bx&*ZiT+S~?+sM#2G4NCok-A?{Es?5j+9am=K)<6DxB=F5MA^ro9jS!KI4q5(v zGQ#tF^nPlECrl*#1xj9ty4sfe^gPFJXSDtrM%7YxGL{_z0#Z>%LR6KZRkS$$Z{6@9 zm*rfRC}52fm9i1Z1h^J|4LtqR)+Yc4;&8YOzT&?8HfK;tWkW3N>@S+gA&y&*`0OG7 zqbYBbO=j}|qw{U=jN=AZm5;;4sO1y0ngGUsk?qSWq-#Ud#2S@U`lR!U zv_&yR$$6@Tl5<^AKJ>k398Moj3LUGv1(v(JoPeMxu{DyeJMU281AI~6$Fe3KU?1Y+ z{u&MQ{dwGv-S*2Kx%L&#-ER{1ISnY9PS4`^J>W+^zg@e26Z!7Lr+@tOL*H-L`3?Wz z`apu{fu~>Uy-2vOA^LaU-RlA&-iG=SWdGUr_itP+QjfhE5p@6bkEhdU&@a!})&u@I zvY+kA9$y!@7jyT2j9|G6<5=Mxf<`219)j-qUp4=4y`VpO2zu?SpU6sJeH}qJSx(oI zePulnw13wS`C785AglGXj}wjPe=PF%sZe0w^+y)EQVPHWZr*qA`m;V~k--b%bVJb1 zQ0*VG+uw2#c{`G;)Uqu9!=CSGhr;s`@U7V;#yS{DFe2qGg3;ibe-qPNEXT>pQ3 z_^S1kiUjoNAK(6*7M{RUs)4)R8pm^qsF+tM+~rioZh4Xj+(V=i?;qXtBth@*3iH;L zzPf>H*WWPy8-eDZTgco(xmxU>(g3YWMn)j+hkxqzvRKn&%0C;BzgzK{EI^s3 zTz`-_?p%0fC$(97)Q_E=?)*ivs8m1{BYz*4k6mxo=;HOJQAXzqc+o)#HIB*B?PQj}Y(QrU?!b}+05 z-!rhs9@m^&g2%DXf`kwAKgro%Xy9^#|ESv+nQ?j3?VlOvFLjWbPU~6<>s!mesB~t@ zM)%J?Q+~aLWZmEE!N6YsIXv=*Iz6O*2->au@h6jzo8cJy{g({qbvB(z1v0xv!L}2I zj=ck+)dlAkk=_$w#Kf6g7UN%f@oF8G1D6^&?8GF}<+3xd;k&zs4+sv4YUB8-UNwhh zD;SA*^}{Rg(*U8m zj`=YtZGXUmh~NiDi@-f)6}~IQmlwBEUe4}a>soE=@E9dN6qY}69ypU``f{;A2%hP7 z*~7<+onPV4?5_H>T8c4>w25{2c0fdG9m5t~Iyo=1FJZYk+;?2t+=WFp59Hs~LTIoR zYX^sR=q2c_apwuL3<~P!j|CU=>{nJ2XR5oD)T#YY5`EdHRPzhef2svAwii^PCz>by z$?Ge}xt*|3*SX%{D)aSZ(y(GT}R(#H20cwh2Qa(DjpwuXn!cHcc8!C zPlzAM$N<{NPB&)TJmYXF-}6aYbvM_#zr|5OU5_R_IGDo*qb7Vi;WOMj{a7jCvJE&> zLQ-C6H!(P=egW7FP$L=ZL+skKx~cltruWDq#Cc0?PQBT4K#XcI>T))PixDqPE-N~u zy4*UC=GE!^!E^RJ^sKW=%ii-_cPjT}?5#>4&t{cm4-F7@FMaXQ zL~^H)PLGTfGF<9;_A1JJOg_12?zGuV{v>kMvydcWgBml4d8H>rt;BYu0gAk@?{TiD7}!hMF>l(+>d>e&&x5vUB2=S20|Lpikh zG?CqW(LKgwWn6=-=P%Rq2Ow7RvKyP-Wl*?|Uc#N%888PdhI-vBG%~kw| zt!E+y%oVZl4`U_|%3|J7zhC7a*h>Dpmo-13__ijtFh&2nnXpc~cxvMJ!2%&~)<>HY zOTWt6)x{U$3q^^i%e6o5o(uB8_jme8*(0u4_u9nd06f~PUz?@`TT|OfX34$SlPZW% zHdqVVw<}1RDxz~J9|~LN%l+8iA-f(|SP9zARN&$NuonsVm$(r^bhHP?9SsMGE4r@z znz2e7Rd>mF`%jyqj&l7o&OHs-0R`W$3w^wff<#*Yr;h&Zoza9VMMvTF7AWDMlRy?R zoG;zt|SgTMyq z(@hoc@z4mDs#A90LdhRzC3N#uxRTAl+>5PdqiSPJN>$S5Gn*Oj2c9US3T2$*`2}{$ zWD+Z_(O+(rX04;R-U1nim|T_gP&dQdWI?hUnxp&-5J8orxMiCPT$ep`vg6!-W6F+l z9@VPa(}56e_}QMUp4#@C@pTc}gPrRVgZoYcyUj~FrD)UQ%pX;5L>%oSjc>%W+8x@J zDHx3#C-p3{2`HW^?sbznLagI|Sh=q0cGbUW>dP<&!KbyBfMJ!>h=tMvD}Dxz)&{7@$v76ODb8aPF|1 za)lHutWGae@#n-|`&>X!ubrzLg+41YHebof_o^V|B-#lY)W3iVV{$$X=dFP4dauS* zR?6jQO+zx)mo0|*mbnqY5Dmzb;t2c%OuO_M9R1z?xHeti-z zH@sFm>V@VD8$99%_tujJ&Pc(P56mh}%;^~49?q$#hR%s>#LKyHzTR3**aRPEqHV{g zviCwM(ioj4#cFms9etC2Mk-ygf5?8$l=;*4egfV4aQpnubnV3wcj0W2%oa8CPz`T1 zr0)QlI06+Nw-{}mGoOum^}C;Jnu*%e!YoB3Ve2G3pI8W1&pVZ>^pD5`tFp7{9xvpI zY-G4NkMOFsUHdbs&`>N>zGlivtzXdAm&vUK%}bOg)kWeD*G|`v5}-KmC5(JF7_Wx7 zrgJ>3hILZKy>=zN|CMg`F*n;tjQFi5cwhZZ^=A&uj8%H3JPd7^hf9o;E}&KIuJ zVrT`6V`Dj44JYn<1+G_1*We7j#PgWOok5Rvn%87VIm;+Z5+mcXTv6cgdHCX_Xzz1u zflWLYI|b1Vx(9SYJ|ut>#SykDAP)LH!&;sdb^ku*>2qrP;iY1dMS&aSkO;TW0V&-&3eEh7&v+*LVR(Src{Du$KvcY&qc2lsD z>Dj`HCbrfe%|t}T>)GBUFvtvM9VwmD=0d(RWWC;o%o(@iT9hY=9se}u(PFx~w9OLu zL-`=FH>3ZQb?Apd=Yjy5(2B!u40eX;kB49U_8X*m3K>`}ty}S_#ub;dDx2oXts45# zwiz@_Uyp{cMD1+{7JikS;)RC)*o4|gKxF9$;qN6Xp~d1wG4pG|l5cnGpNhPl<*on@Mp-M^j{EM1ebS}f^^*5*i5;Tnh~7?jZy zoxAU7Md^n$p3I`|5!dQ7T0EZ5pQpN;v*{z9R?yly@56_o%sgR1!sbE7)~4P}W)$+2 zE+hQ@S9)F+rP)xNx0)}P8uC?au_wzjSi^h|eZ(i^QH6A1)~q90a8PG&)t!7~jWCa% z;bV{(o1IQ&n!iLkd2era3u@ARXc5>w`suk zjO3UdpYhccLLd_dbLnI2`860fMJUX+>~E0YT>em*I-fVyuMvHY8D5jSrkmFDGk1u# z29m1Y)IYNkH)_<+!Z}SZ8WueU-s}{QQ{hvol2hYl(mwbStr7fcs|1J5WNle2=6ryncAlEf^->ThvQ*I zFZ$K}_*W~f!cJJUBF#aREl-PIJb@5^-6$(wul@Flb)=rBGFFu%n_RI`(a~$QBSdbk zU`AwRF+62=Z~*6R{>K!(@q86~c}xt+6isXzrMFK+bmR?EnA2tqgJ{%YT=KEhn3H1Z zduIxkDaq9?nz+)?*XQ0At%sq3a(k2_ZzHtm6jh17KEWimPEzB4t92*jWlfM*D?5Yp z$+EX0cuF5&6dl^QlTXH(ccRVvxDNyDzdg z+H2jM&xQluv@cP1u&UoRN?O%X^o=6jl*YzEX?-C zk5up`a+lj5vScT>HDR-q)_u3lo_&wgx{%yZT5xh)Q+$PYF+KGB>d8cCa_-2t_~|q` z1J>j8uD=G{P_RU=FL>`9Nu$b^VoE-ZO)$jY<0CfT(-t;0#%VgT8L@>M580;&8gTN^n5qwlNv^AVf*T9PRieJVNlvXW7MkKX%LZzt02ZV5tpWsJ8+8SD*hJdp7;|} zF4b3=o?}{fq;|9KSJH~~q%{|o5>SkV_2K>HL;gIPJCFDQ&}8droe}Md zbad9F=FWsSvn)_L+pt_5T!hUfK5LgvJ4|4b^{>Yy zmOQYEZeJ?SfNSBh?yky$mG^zB*)<<@^#{Aj zdH6~^gk@rN#+2n8sOw8V0k%&BXvjkDz4KVm)aKwHs0yc>VOqt(aYD|y!lZM>nL*u$ z`f5UmIGvT~owpRYagJGmBUtq?!s^!%UuD=qemyc|cd^jzl<;v1Ys)ArKr4rY zam^Fm(fBjtw6D=l8%FJ{DQT}3Ac2auK;Y1D61*48eI(*E(OUMk{Y}j(cKFzJO8;=V zY`w=H%u4vCYDn8m_2eJ`)39$n~*L8GI9bB z^{OR9uJXb5Z*kpv;_VTu7{gR|PBbAM!K&2$f|X41s4z!gUZJ8w=)=+t_9Sqx2wKx9 z|144739%i%(S_8*qs0$)^)+nqxYe$X=O5DX%iz1B`v+y>L`Kut(-eome`V|rv5Nu$ z%mKVLd-r@Uc-D|ey3B+}vyJ{y$*%U9?bf~#pAqO~$puNbz=?y2ya3KOy*PCspxLdyu@=BrB*kGQ=n zb6jh-qjvVWfv_N7t&~0C1R@fCmdkC)a*e~%TG{lykE2AcW9e5wn8i7#s~4N~YE?_m zb^KL^Fgk7XWE5%+@*cjhGx#e~e-A|J22y`T>NFVV(r4|t>-8xyH|BQC4&JjS`J$uq z>rsWeSinTXAUfw5}9GAqhI%Z~bu zHtt>&g^VmtMSU*#q)eRP{H|8@6~+olWAJRv-8k=OE*Vps zK9;2rivG^%=T5RX7pB-5EQr^wO!B&@+R3zI>9hFIENbqA;U%~H4JjLL;JnS5@WHGKy4ZhfBu_H7l_@ch2mc2w!1m+@jh zaR+MvokM{?kfcrQ-Fw1(u}A6IA^hPCi6J6-+4xpr}|P12KOe8Zx(VT>`Z z)$D9>&u+h|o4osR|Bhv$yW$7_X<4#3soYEuC~r+f=2kriaZ*!AYc^X3;ER$!d93mF zB1PTak~_H=YVpu~_4@g2%w{1c`Yt8iv{jK!(+XxSDx`a}yoO>zhR?NjYT*)xv|Q?L z(M@jaogKsKzJrqgF_)}|TETWzz~n_LG+SQXjRuuoXYRR9nYeSVGs#ZJG-;Kq?8U(L z?l}bx4w{Vf*KF;H=i?W&eyY#CD4|e`Ar-9SuWNO}AA%}nKFjTWmA-iMalNnQs|(q1 ziH@?hS*-Fnw`8+&pFIBTcwvUG5}yB}oLg|`%v}Ke;IjGxzOiC2h`Yt%xcOXlH#V|< zf6nRUT}3>9p6C%L%|m6wE$kv_0&0$bb5!baC*`4K(5&VeejG?Xa>YmG>ccl@Q8i&IOE(Q{@kwzM$U3HP*upSRh%9*K;bWy0R z^scUhZj4w?W|SQksSdpbI;OBp8&>QLE(MU<=O#ZDrhVT|1EEojR&NO+vaL853jix0 zd2)J|5u3x(^|KaIls$yB(h-n;AHg=$<15#W&i zu}`0B3tZ+>D=c(<$Nd>o1ld@)Rc4>ru<)v4E(+c>T(_3mE~7AaTv(9m{GJ-j-Px)1 zBNcQh+#JU`9}#{2Q|FMohygBZZx9ZBU5Nsx&y)K(@O_1+RLnB?`L+SP(50h?P%70L zx|8+Gh#?*79b1k&dWP0R4Xd+}sj|)4m=*u|-c1UC+B%_<`8n42wm=?UI*9W|J~VlR zJu1>0Z*De8!!C;!$ahcZ3+MK^BfVC_`3%*LGxH4xW4};iL5l}}217TIhk^j2%j_aM zmE>n1Zyr7bWqSzcHq(S}uFe2D5i13O)~uTy4$M^S%;ih*Ll`D*)vc{9@k8FgWAdv@ z_c7GSe3Z;NrKl7Tq3-VP78Z7NYg_-!6aTqcw2y9?9UdOe)~W-qRZ|+zsX`qzraN5_8Nf|sy?l+z-Dx{Wy>;ftwJJa? z65Kt(!GV`klWw9#`RADaoRuM~n>iO(zGkgsPQmuf7wwk@A*!gih!$QE!zDu+y-7%$ zngT~^hkgbYzb*#gIzKISMmbegpPS7P5Zu52fQX0~Kc~JO@NAt12&Eij!!@1IKDDt) zxVnw&-!(F!s_#D}eLIWnAJ*qjThRCM`r6UamSl!96?0Cf6N(Vt(XEX6tDlwSzI?VT zg0^-RVxG}vB8BDft{YXzD6HHGan*#oc6%v-(f}^}p!}aZ<%eBmp|~8sWC2Slq|9z%fKo1dNF$9A zp)xZ8>E6BMwi1Dm%7jp6YeJZ%RI?TMY7d-6cxj}cQ!&$DqrBR~esTujmo6XKwBn@` z^>t;w;`bj-+Zqj&@*(Rx<7M_qIjn9ApVu4sNDGqQ%2a0ALUUw}OjWDtDmt+OqVk|i zlOh)r_bBWsn_MK^*O%+ePsw8EaA}Lg>SF|(Df$)=_I3M^bf1IiKHuJ@B8ei;bt&NN5+Zs6 zTH`5vGtM7A^?qlRTwNMu9pWb#GSUOsH#xyeQbi5dG_6{2Y zNa_9eJcatBYXPSuZ&-;Po0Kc)?SvYBldUcW!Iov2yoIytidE?LwG)!@$$S5%nfPJ`M{HXJ@X z3KgeU+3n0b=i_*jPd_0=R`sSC{|ynEExu|cqgp@hAQmAQrX>LsGRTs>AY~M~S|J}Y zodaNn+8rUjah#3)!lhJ!lNX!y!8rbU6L)Gsn>+WGNoDA?YG2J#<~2{HJDy<%a5b(( z@=)~{`Xz9x27VYGvP97E&O4yt*gHb*5AJ7c$K)DyLw%D)Gbmk*R(f&5$sRPfxT&Qg;i{1Nc|(@A3hRan_;vSjhpMW+_c&AE+ANzzn%{p-(ccF)=u zHPU*PfnA_puVXo$8v+@h)#5202u#V+g(VwGgnl$BMCy;pId;M3PNWkyDP>ssXX}4T z!|eeO+YVV#4eZUt>bQnXxuq_&3s82}3EdH0NEQ zap*H^GxfusAZub1EldI!aXqccCoSq6!qJ$nd-=-|V)$Yw>1&6c1L9akDPZCS5 z;H&+rwi!@b_=M2^z{a^~Gv=W=75oAtXO@;h0Q(hozZPLak_pPuYetCCg(wr8w5k(} zvBv2G$2r-w^Ww11Nm1JUT0KYR{+aD&GqRMNqpoIc&W22=4IbJDtZJEbv}4;K(+xsN=DFJcxU~Y6wJ{jUZ40Xp{2o5P_QNavvS?5L z_yh%bA!H#P7S3gePvjCJl{UKcP5;J?s?n@+{N4Ee)h#Q496bLo<*S#K!1sVMToLp5 zczLO-b?iM*BJ!`=D2PPmLJ*q-ev3{1MOcAPxuPI{wN=Xc=Gy;78#(FjL03EIrwc%B z;eSCjE~{Yg0fo&r2QBTK!$0dNe)T*bzNPXALyEuJFv@~!PIvSJAN{71LX1NeE)7iFBNsD_UUm1?JRWD99&+eWnxI|HKsGbxeQ^n!Y`T8rc*=;=P#W$C}A zbdmcj>Wmr{2Zy7xHZ~g7j)zL%+1iE%ph2M72QhdryLV3A|v#m0)ImEc8!_{xBSlxyL%K@zXnb}OucpQ8mc3fNc zmsR=IV7Wa2t!~ziPfptHF9S(o{cI&m-z<36Pc)A%>F{A(fO_L8Nt2);u*TKw92^`> zOntN4|4n!eZf=0sfyeQQ@IEYXl6aA&P#l4%LF8RREy5zL!XhFf!ouD@E(o)?Vj3nc zX1V(@#6YIksaV{2Kc!3{M1UcbkeIOx?*TFXKH_CBd-_tZt_~|?LXD61w$VIBqy7#e z5#5cpbUTT5p61W5j3FVPXPK#DKy(rU+4DMi)7~L$bOKdqVsgTDHvkOX@!^*|6-3l{ zSJ5X7$Vn487Ytb&NdLWE)QAf7+T0k+r_1Wt*5PXV_)wN+r-}GtXJ8Z@92hiV$jfcO z^6Ij$@G_73tL-7MJ&PH41|%5K|AH7;go+jxGW@z+3v7`Pb@c3gcJ}M{^$mD!GJYUG`#79P)j)PRSb`gLXOQ`R!B-Q{oe)#^@zn)Axirn%0Bx7Y#2bb(E%(2 z7+qkHerxNB2_%rSIz%>2po#lAg0;hwdI;>{!0o!r9Svv^7GO?-))#bjQ%Onx2U5UJ zNI06WRXaRtXKwNFV{u!&)R*XDeBO%~5-)TiK;?`x=OPdtiD8Ys5@^6K@gxZx$r(n3 zPbl0K%^aML>FS=Dal3F$ZZn}zdX*vSmA``M{7@3iFdAUKp#v1IxiA6}?IJV8hM2Si+d7Tr{>Mbh)oJWAFD91UF=$6+`Oe2uzQy`P{ z1>{g}fe5)DO1caiz7YLm-Ef4NqAYJ~c+lxRX3D9bii@2hCuZ_gb$D$|gIVx%-j zDgNmR50`hxlkHF3a16+;VesWu}dHij;K<&Xj zojd)G2Zz|tjveM;ksZ}h^|rai(Wdd^u&~B)G%aXT%olCY>jORJP0Cqh#`|bO6KZIF zqH|o}4Yg45betMzGS`px0aL?V{7&3ezOrP74-v@WOCcZzBOZ_{Buo5A*7vCzMn2uMmdoI+feg`ITVNVAD zK&Qew)gfd1DLw&SB=@{UuGVH*kME$v3?Cr{F{I6O@!mOBrv#r%wY_r#{#?zwg@UCc z*vWJ|uoykTT8xQU0P4U=MULUW&!c0I=)zv-;Hcp5)tfuLH!?#?*%M!VUy`*QcT6&* ztqlS7mGuB9;e;X!r9?kk<>KZ?qX==v@g&I}T23`>&f}9dlM;F$W)c;mGV~(U%M(J6 zA=I&5x07j30H?X<=_#|_uEIxaNQPkH-bY?ivjqO0-0pfkQh@2DiEvy}ae8Sbb~YAi z+F+!Hi-A;Km?;MUQ_z`SiVeU1n8H9o)LxaHvt2{^Fi9Ep{qfXuySg&|u{BL9=0~~2 zKMQt#iIP6)Vt&IXiuP)kwYl0rN1%4Q_YatMIRHn04N}HU~^8n%aB~niwg4do5`a0-=qbb zqL>pUm}iLk&i^dc`_;(|z55Qk4Y@}Jm)r~!a+exyicX^p;95kh(L!Ul zj9V>UW(dB|kn?2sR_eP{lhfQ)R6>K}JaD);y&CMOohs9h{s_B;G6t}@X7v{y^DUIT zyKidUlg<$2k~j6mzGI7ksEz{dXYR^8SK}We$k)VPCq_%*9b<_pu9jd9QJ^W+UTDzP z5@p6+gC1;R&v{WG3(4iAzNwv8UZq=RPav+fUS^oQ(TsZw>E=+k=xuZ+=BUHS_2Ls_u*|L}X2A>2$T1cqohUElK4)(0%hG zUHC=S05N?(^%`c2`Bwq&uaCGne&Z{@CKKn}xEeXQ8x0`1K+$MP7ze`RxgOs(?ut9Wc1>=ZGmb$q=57Jp_s&?)eW1@+%>V zSeddfhsyk#(h}i;XQXcd4bfR4*RM+7AFiy&)@tnDiXqpF64UBL_8u`@B-no%-&42pGQe< zi}xm3AIXa|Pk8QtAk+co2IcSNPck##?$L8}azZ*g7xDIKZEXRv1&ku=5Kz~+rYnf$4FyO`T7l6qBET{g5B-|Y6c0Oat@W6|cTt99iCavmUk0bY>K99%oHMV$S2p`{H1k6mP>k^=9P zj~2{+UB_SDCMLqe1Tv*PRWDN^VG&{Pt=a8*r(*y&0Wzs-+f6k9o&l1Mgm!?%-9?-~ zzkmAjW?g3W`9!JlB_5E(VTfz%&&q=#8vFUTqcpo}kw0z>li{)2VCDfwpzY{_@ii3nHx zaS4ONfJGNV8^86h?mu`#yp{d%=NkO_;kz@snvwyY{PPk9p4S}92uRGY-@G}J*-O**;ldAA1AZUPM=wD=`ET9!5jug&lg#UeRu2mdtn2A06X11pM|?R&GnXXStxhTr>o(%r&iHL z3SBe+M)FvG@zVv28iH`gsp6b05W_jxxVZS(lte|uQ-G>~TpyB^o`kq13WSK;)Av+6 zOXI~r4a!Sa%-%~R0N-D)wY9wFqAC$Vk_;TB3j9=GfA$r?qX8?l2NcdQuP3*~J~3X- zh?(|6w7B*crz_i2b-Wx*vTc_D-JBjUKsxX5#XXOceU%aJ92Pgl93>aXLQBGA`8N8U zpxYR@N4S8^5(oh*;sAd#TC>v_Us{5X08VxL9rt8S2I5$nnOR<54&Ry%1!`w5Uj>|m z7I1^h$-2zp)ggoSom&^DN$|>?;{9s#`daJrBfx9uyO2w#9(JEIy2I1YR8YMr#z8+r6&`F7+6GnD+4jtjSaitiork zXRksv{{)eMmnFYc!Vl{QHa7tv9w;e86C$Fx2ao~McG7{edOc_52$veTi(~KIGv64J zm&AJrk`m-Dv~R(mmW_fJqC0Pbzuo@BV??}%s4$m_@mtjG(EkEE6oJO^e7KC5_p)5t z5Y@DPs`-{fEDMP!iXxEUPS&N-ZA(3R)QhM(U>~?Y2S_t;2^Ak9kmRRS)Ju>pc)}+G z22IFxWR8K3W+ya+jY9gAc<;pv5iodN%ml0Bqjb*YZ zId*FX6%)H<^VHNUe?{BBYR&&2R*#S;xQni^DcV$FrrsJ^rr$V;qOtxgBv6Ze@xswIx=$ue=1+^F2vdR&9_2D7tO zR?l)>MhQ&lwgze5tcBHi0Cx}((Qb-N^A&3&EywNT>>Ic|R1nL?OH6gv^e z;7knMSbaYP%&%F|ml9>0U}CmTN6@SHGiennoJ^A|zUxm1U0ZgcjGY{8o?hWU9O29*LNoF zV4^$mGMgG7ttQV6)X+AE$NSIL@^SZZRZa%;MCz!_G6o!WyF$PPbkr6!5wWPRBWyX3 z;jmeq7^k|iQZljK#&i1MBEQbYBs1QFs7QOka`q%H1+|NE8C)kd0Cc0c?KD82d2Ff` zLN@@SRUc4!Av6(WDy?NPXU@NN+pYiwOo*mrHgnuw8Q>;K0pC0Q5-1ShiAz-}Vd30= zR(>bRbj_D0(Tv+pXM5SeBkBAqFyW~w5FIUR0!ZqD+p=~Rd)sfQr1fho=cpcsm3-v@ zj@q1526;DD>D(#OU}0hM-g$1K*1*FzokK~hLVuF7J^=m1+haR{p1zQ|gb+e**NM$7 z(|o6;bHqwdkb{C3fI=J5k<^>+XK)~E)i|vqF}CV@(~wSMH-!Rh4xFY9qcOlPe&Td6 zYS#D;na4sxQXSzd3;<~w!7C9moID+$q-EdWS2milJj+|XE>D>^!LUt?rncC%mGpVm zKV|C@GdjV@*t4JWZhE$eW4M$&5?~INGy%bfj6VOxP1hqkA4@fFRVgb)V&ToV=MgTm%R3jZLcf=(!jY30ugE z`?1>`=RTzpRtpmWT4RJX+K?*8HMZ^S&@)Ayju^PTTdVb1n}Li?xw~g&V4FUqdj(IZ;qeIiQdlnY4IR+dP#| zY6MEF6}S?Sh1B>b&KlJ?Ufj) z-Z^%OxBbK0JvHPDOdNED2st9*LdFKocf4gcMc7-<#AQ!{WZVzZ1f+3h%J&(oQ_{bF zpciIGNg=)pTi>wen#&Jpy~g*>EDoFE_F4&IoeJaL`g+e8e$!i%~$R#|E@ML7h?!2yvhtr#`6 zOW;T^!I@_-t!A4nHxun0Xo!QoNWg@*S2}We`UoTm);LZlKcb}c!!5%3vAvOEBu)H2 zL^QAlNXpPfeK5pibl9n2UVFZYDTRun+~+^iBQ8mdvbB467T0Sq^n)i!;YFml5So@y zvS@moQof0o@LLKm&udyk!yY|J)lFsE&+;@#ZOh_$s~{`J#!<~hwIA;<+4((j$;d3; zoXYe$Hg$3ka;*|Op}R6m%(A@12~mZ#c6h4C``p@a)-HOhbUNZShVa^fU0-s(QL;#__;Er_r+Ha2wK{6-$=tdX#%c;JIXa7k zc$-FB>VT(>(PW7oH`(L%j&Dhf)YVzf&V@ljeS8uo^aEkC5V`ZWaVI3aRxFIM9~Ajpn~9 zS{teZYNPqQNU(o5YGT0YmN4Yr>)bJ{m{Ky+;Oh|8K|=*L{wjnCiG*`GuKhp(xHLpv z1Y}ljxL?PzsmR8Y)GbREp|)dXa+)35NLhA~|GUbX{pZlJ>^iv=d57?NtAMnW-$-S< zcNOb46vR@EDGWY=@F-&E-+}39HMnUXJCSZMhw-a?+_P&{^GJBzaV7~=20n0A=y)e5 zpJ$TZBA(t@LONTXnoktB6jm_q9*C4u+9QrSd%8AB>;6@_rqh4cRj$+r@`kBS#>wKr zl^jJDz#d+TI;DRzyBj2yE;n~w3Baz2da^{{?zIBrygw+~xf7S@+CT(HgUnDOMX@gw4QpO*y{swL8EWr!eD z%x?xzCdiR+HW{n|qM*@_baz}}Shdr&Xs1+nv+y;}X7}k~$DNy#Rtl?gpbbkNw9; zUSn^qUCC~j0x)ijOtR>~`c6#t26gDR_MDoSDg86;cPg?}5o(vz*83gsv3ET+xX@0C z*ct4Um2Mw$!Zs8k5@|pQLP4uph2#UY(#0UIhxn_MY?c=en7YR=rSF25*9(0}1|R0l zFx-j&9BVQ0P3~GseKaroF#fUA*S|UKjxAfT2Z@`iIb4_8y0gS0l<{Ib>)wKxIvv~? zzPi({!-iv)DwnPm5LJ6JF}dx6M^5lkOJ$Rx-wTIMb=O?|m@u%Ln_2=jHH`NvQ13n4 zcS|PL%!a*f$X8_g#*EVAl5)8l*ws=V(Q{ll)-&TZYh*q{JUjC!LR<(-mN{l5xUB9S z{+iZ7W0`JV^Q?QYt4rP47bno8&#vr{XEv_R(5ivo&K7n1hP9aHs>EUzk?l(y75AO^ zY55%Hs^>htCD-S=C2$rG`A}_*#5PmY`JWOX!xx+ zbn3)(j;CaH(ZSQaW9Mlb4E^g_i{vPfdXc8E}H3YK8j>229K9MWg~- z1%yc^0k&`k+e-54RI>M!VPbcXR(e(MqT?u6=vXiN!n30 zylojzb=zwN`Ad3~I9A^@>u*``@{j7mL6wecJhr%3mKelxuB8^<**kMaR`y}w?Y#6O83XVq$xy5HSY6+?{|;+E}{27(bZjIApqX( zt#hfGsGBA9+sl*#0yw3*u;&?v#VpM)VZb>fIeE$i9>8{_Fp-9>ovzP_tD8PY=j~5@ za^5S{jc2^BQv;g)K=#!>47l*?up_b+`{nBe>qJaW4SQju(;gXGn?Ia!#w;CI_I5st z+o^th+@&C1R3e#i=n;kMEpv2Ls_Le8c@&~rmTWWv2<;qK2bYhu<#&4mx-)?Ur%3K$ z=(c@--QX)U)Ml>b`NL`%)^eXU0g=f{tm+Y;IN?Q^c>5if=17nJ(c;x3*kdYH%xi1G5o{gUks{oZ6E06I}X!Yi{m|IR5eDui}29 zdMj6udTw|)z&dowrg=MsrwnI(Y8-8pC96CY4~#rc_CMU1v2NxhFl_}pWYOAV{w?|Z z>FeImf1|eEAeo#hT|At?Prd8!TxcwGO1|ts z6-q-52SVfytE4?~?yC~#r#HTh?aA1t7YWM<6(f-$Lr>Y736Q#t24D}mhsr*Bj7!O> znnq#ZpXPcjX1TK86Esp!FtxYwcCGSCPx9tlsqm0$loH`pxi^3U%Bp^2*``6{Bs(*e25PVUCYl&N10C)ARqM1jMVb3DD zZMCfpfPK|nT)^?1#V(U&f%xooH7}dI!T+TaH0{%RLA~n+9GZkiPajoxQMF(WskS}* z3Y6Lcq!&O(MfS$pGB8ieEIYDFF7?pC3);_w*bcT@Htyo_?kGf3TY| z_c$PR>FG-V_vqy0q-?NaH|y%Oa2exHl*m zHzfopj0b)~=3XMEW4;xQH*t_nfqCPlcKh~;8)=6oJOF}n4_<7?C}Bll;GT6TT^e%0 z$|9oc;l(<=e=`sLEqKa z9@)R*hGO$8Vzl5LG-gv1XaY8v&st#v}_LpwD7j?i5`YAetkBt%Z( zrua3n5KZ=$;3lo3*87&Mg4C}J?fG%@@W!%@oP^%O}IAn`hMKk}NX~Icf{0`V z$qYkAa?UgG_PDouKj$1c-{0?D>;2=Jg?IPeU0q#uT~*b6`)sdN78=#(adFo5tpyqI zr)3}ev&#Fa&FtiA_tHETsG~UJNK*}#v9QnnW=qIcr2~6+4?Fw)OWiwbR$Q_X)gnxr zzu;>%rrp=i8ZfKBL&NbQc!dM9R=~^Xk(!y7Y(z&0UicgGr93x-)0_Pw_1Iq@?#lwh z75?2u6w@XoWNYjA{Cvk*UyW$oz^)e)@;;mnf7}xy1MOsHaz5V8hn<@70sDX~v2oBC zvV$HTF3nOlZt!wF-d!oKH+ZD>NNwi1?R?<%6km{UuY$lkEE0BjS zFDB0TRph?Zn|&(=dG6$UyxWx+pUjwW#sLm4!u<}*RLDUKDKa&=&f~ZiWf3tzhkD{m zaiVcjaScxq8f|=Ub8ctpR??k>Gwc}GjVkxG@p%HO6cJw(YPgiI(AYRBm1_10k!(b~ zk54S@TPIe)+^I7#QNjXEAs$|b`#&-)l~d)QwnzNm5>`Z*ri3KUu!N0O*sH#qZOwgs zk0WX0B&qs!qHtujWub3vKJ+j}WwV`bRvdZ!;i*2(hZCPO*6O!lG6}s^$2PJ=vS-~i z!Eqnh&!u@WFZp-Gw+6)<^t!(ro!&DCBY1K=`s)s<98~74Pnq~T;mSY_#7=(bzl43O zx^$TIAb#XDh5_uU_KkPM3yM|-erd4avlS-p1?=0-7F8X+4AhqI`^iovWAe%Is?d8}(mT_{@PubuVxzSy2vNCi73Q@HfrGuZ-uySf9w8-OM*$v+h{h;mu1A&V0<5)(61wDg=C;U$3cE5<>AZvBvg8u=r}R0 zNZ-Q&zr!p)d4|F$L#3&CCv*og8MR1tlFgcXFYACcFFgr)9G+R`5xqVn=Q^X}TPp9A zr_y8J0T;*lB|xnxap4X3>{M1X+pW}y@Izb}#lf_#NHaxqf zq9-g@As-{Rz^175@-Kh+Z%E~@Ep_caHB)aQ&`1f{h-5k`D7j?3!!xzp{jzHF@)DHk zZI0ZnY*asG8nR&RFU=N0`?f;{`V8cYof4k5?dC+$CTB3h%a)R2zW?N!vtT{F_Q)R% zeD5Cxbi2UkJ(AIeKOh^Sw3>$c8u1PoUaUp5kzq7#%sxC9Pj5T zEq>fdrj4Y9I6V$K{RWfAMDK{JeUCFLv1KFP`;+l9jw_l3I9!oU&Aqot$hfgp(loMp zw@A`ed*sb?ZGjS5BWm)F92(dg7 zO3iq5`EbD1t?=h7iV15xGYa8dg@>o~zTZl&DM;poA^l=derKakH)$8i4 zOHs59hpm1&EKwvR;cBc3k+fM^sX@!~t|&87V6tD!cb%p~CW`=QM-Sqd9KmJwt|J{!MS^lp)-ybK&YH*&81Nh(n zYZm@@Gyi9J1{csKR??SG3vvNm_y30XKji$|N&c3exSip|IzJVq{WfL|0osWwil&LnUW&vwfzfVItQbMdVaBn+Nxj4 zv&f|;zmwsfc0X^Cb{afmFSVIMcEEG!u&=LweieSQodtJa6BQPLurfJ& zR*y!vfwA9^OMGD(sEgKPa_K zAs&8WWR@lyk$RsU;9O+A=hoE0)>=<@J!&eX!Dn~4)$gz`Ib8Ixl0Xc#PT;qGR)~mP zX96jTTJL^L1wmzrp=V2bFg<{djD-C#*bv}Y3WiI8q_fkdfuf%t_+|ra3$V3uy~oBN zl`jTb0)&adcd25>`{Cf!_`$EcXWb3g=zsCpV;+>IAMb-Y=PUwBpQA25KlEf!9J^6c z1H?O;o%mp3(;20XZoVNr8`}X0~1d@2%6TH8lmj+7tXfTTr4y5UDJm=th&|#dif95jFPQ*3rw>bEsmk3VQgGhT) z8^ENymIg`Z=Ucs`VLlI<-Jw^zLKLJHqr^^h1!rU!tD4X7i29%_)GSagudPGZgU`C~ z!=X>PF^Hb z+39V55$(6I`%Vf;RmizsMS4`rMr`U2cn2m*ow0!?K4Ja69vOavHQ_x8J?>BgBLj?{ z{Zw69+bjmJl&NWPM`Pq5fcr zK&;khLqg4LKg-l^%()%7ouMG>Ydb(SI}<6xBfe{8(aKJ?KL@OE6DYA}>U1mI0; zX3v?=I=8*7G68lKP)Cp5{f#Zlp7B~=h?B2xwDR{z;9rL)KWUv6p7}*o<-NBG2vch~ zrHJ>ThMHLr!DrU(=~7{)UYAJS(~>0sr%n@A9knqMC@WLRLJh7#G)zsV0AnV+rvr+hrD<*Cq+&KcTT0b;3kWU zbW#;vKp;kHGkTUCMX+zk+LPA4p38JnkvTF@Ix?eEH>1fDfT1o#k&oIlFgfRQ$%4i{ zZl_}8bN-?LMA?;6T&RpAUbtg#6G!sqUElgz_*z}6pO4nYf%a_9Tyr7RQgo{6L>EFm zAcpFxn%Um!6Kgmz&YjK&CTIPFEbA9b>--gRmQxpu<1%=IzigNtUlj51>Nd`;2vL!M^cYip#jt^;ieIm5Tfttots#^W9yhK1SWA8Q)&QknE8&GKVR|OeJ@%ibpjD zD^B0#9J2p0&^e!b*U2{S7B%@w1!gEf>8e9>zR6D5kIms*^DZTYBOc_4?9d_f)> z9Ul25!am!a5SCm5v?UiL9c^$tD>0aK`fJrJ7eE3+m%on~>! z|GVPDb3VFT$+Rn)jEstx=oJIZ!e1n57?;(rPM7NK*!a=Q+LRlgx~SqN7?}RIij^SR zi0W}*UOKQtjoihMfewP|oZdT(lUs#TcPXsY=xW2@0bgV{s&!`LD)%5hTE|jLh@Ty9 z{gphYJ81>+f45o%CAy0gcCt~ox5TbT&;m0)CgP%fBDL}J+roVRUID3R*#1SjlPxNV zl#H<|b|z=FIqj+4EZt)LLBtuy3pvjJyUg*8zqHS|d`>4C0QZXFF#l0`DC?qdug>ZI z=lu)tSR-9)MWQTJ!DKu4jPLHU%KbZ~o34~rtpCf*|F#|s9EX~JzAyLRLTESX0hh9? zFrCrd%d+IL*B36HPWqSAB(9<&w{B} zuOA@7i~P%VdpZ@{VND#4o*eb~Y8}IURqrxA0mMfPg*-U|7wcpdw9FRdr*RK6$2 zM}D;~^Zx~62s4#5*M5_Tq7^eVGD=EqfNagRY|Ue!ItUp19W0k(*pCnuWZMCN-bt$C zi8H}^QDXySe-cAj_Lcn(|4YajwUWpOP^yr^)vN67t(N!@6?2qY94YC%1G3&{W2zAF z8-UuAL_Hh=BZ0rzDz-S1Vw7rE{_eH2Ion6&hXU9W10**)*v0(z*8v3qz^v~w0qWEx0yyk4 z;l25fm`6L<8&H6c{he0;`3P1qp$@P%E5Ggx#wB8?)#m0@dUuGh_u)21YJgGU%*Y#&^-3TNy#(;xm5@5SBg53A(qq;5tK3#?MRoN@0BHOHm%AZHMfhGj}tc;+<#1+2hY zbJKH(WwQ|67CPO~ode(0>vYILlI9aAZYem`=5U=cw5k%|mcA1=pI`&?p@ckt4+g<085+)5E z=>WgypMitZ{rHKy3UKy$M1=8!6FT55xW6f`0t>%9)K-Zm`mU9dTHo!AR>3F=%#~M{ zZxSXETwQepyM&CEnx8eVf?5Sqv zl3=N4`h4PzB^P*^f}yx$iWfnS7`U*Em@vt5IRCcPi$>VRM{~07sqHtYIkf$)E?UY) zbTI`u!@6ayLCb;&2{?Gc_v?g7`qaL_%{G!@W6yJLzr@_{KutmzuZ~{`9301W&~jct zaVPEq;EaiV!eI1w1&TE>R~H_E(eJV4VNTH*JQR1V$4W2+Jr-%7kMJz4y#h|OHTGj? zk|fbf@n#XWJ8>{(TZR6}O%B=xe7A81ysXVN{>lyNBJA^@J#p*uLcuiT#m!Q*{Ic23 zW8o3K*K$nh)9@2${4H^=e9oG8jcM^fQiMsN7`1(?BJN1}B4d6Dus*tzIt!Z(PFn0gYuZ zw~{gsJ0>T=`dFm^8ObK?GJ`KJfv7W1_aL7J=1LB?`_%kvNgDoO`c;`PzWFGM!@-#l z6Dq8}E8UT_iRw7B_u`{|mVNyt+O1{|Hor*#*2^s%A*mQJMQZ*#7cdHz_~M&z1*Iw# z5CDiB3FJ=*XJsEd@Bh&y4IL1FZ}Xk2+;NlK#b5aLW4PbRQdE?2gI|U7 zE4vMMqlpTVB9Zf0Tf}5!Nj=90e2kM~l^OOG;59`K{{9i3$bKDtUpE`C`L&P*sE|Hl z&-duqK}pGXt0#Pr(mX}mNRdI@Bppi~r(1C-n%IhUfS^i1R?+5d!D8H00d2A(V3>=S zI=%wkQZ}Ld?5X*Em9=O5p4-5+D#0K=Xxq$mIUYIkUDcHkE$~Jg|A(K4Lw^+vU&N?X z9uXOFd^mYriGE)lgOr4pZ^zq?2&YS9i@7ua`x3vGaI~N5!7MXNCZJp6hOPvhK%>)1 zhAF;6?ReFPFi?G(=)IJqsL2G+MS;)onT;fSYYCI8#&;9864q5&Y#3oe{AnT%4j)$= z;SNW)7)8-)GmY8N0rl9W=6F3Zrl2bW<-WPZ?(vrb{$T&zd7xVX0<)1X`RiiOU$|r> z8zC~*5;plr(VQusnfgubHvOw0=afsl%8NPw>P?6l zekSW2*cwKgAbhjEnv?kAvnjVhONa7fC9zM7n;BC}LQ{UjmIhkt8f{^ICqI*l=^bX|}%*uyZ`Ch9}- zPv!!}5hKOX7 z8fjG)y0^1OK;6S3e7SLAEukH2J-G#V$$@3#x9Q5!h-61A8J76GP~)rwBhe|Id(rVm ziBsm!`hMugY_NcpsvsewOsZt_qyoH80X$u9RX97s{iY^ zKesO0#(O$V@+ta7F^1NSUb%Xun`VG#Elkv>p`AX9lOynpSLq^!2FYH#$(?cDor}A6 zysH6I?}CC`LoY+)B2}D|QQdoXhKh?J-9Zf@A7`CJD>%O%bi2|J4=s-}i-FTjU)swC zyjlS$@PA|?-T{+&a5RweVhQ&Jr_pR~q_wTLVlmjK*cPlDbPwDa@Z0tKoSV{a z*ft|fTutK<+Cgmr&n2d3=wzda`95*JsI=EAl?)i2;2-=!HQKjmIN&#?PU^ec(~!O9 z-7Lj8-Z~J!pE=apaN-Kd%e&1*c|AcImLbYeTT1;4Uw`G`S@vu$ zdyOUJ=N`*d)>Fp43H8{pcQ7#A&&~C9yX*WBCX*Bxf*Z3~rTe-WBB^uz_~`ycD3 z(<1mF$rZUfg0@<%ySwJJY;+JqlE{lE-52l8CUgb&5(2|b%R$XwdF+;@0R18~J~du) zBN}>7!@kHuljYK33|Jv-X8+INE~1uxQ_tCCYd6dHXk3e-BAbA>F=mHMp}a`N<8X<|gMR8oY*(M3J>iK)EUM^x0S)4gsG}R8zNc$6t1(fX#Y{A8Bz&i{pm2&VE_HGSzOBZ#fzRU$JBNGyNiT1ExHv z$X2hzp9r9yDAsuI?0W8WSywn6U4q{j>Vt3OR4mll?A+2#GipJ&dZ{B0j$`B43@L9G?2nFihsrpKbg%L* zhPPaOEL*W=P~D7>M#~xuQ%Dy()Icgzx<LyRzE?E~0H@H_V^K&h@c^ z+LxBFbN-rGiLbVbU^2dacbYwJC+vE@(d`%Pz&I}{|ML+7)m*meHaa?%a?m?XPXp7c z2=Y$C7^`2eetX_-i*(7n11p4khIo(!yV@ZATeFgKj}~|9mTF)!@K3w-E0XRBR+ucU zfhySI^-tT5f;BLB9sea>n1fYSis=o2XL{i;}Y}im~ZLsYI z!cb^+=h6_11GzivV$^G-(ueNjPi%&X^-2=kpRaK%8K~m&On*%LP zwh8HP8k2=OcUPWyl_uDY)!%Vw&+{rhx-9raRx@gM0oPi< zHn|+Ge!7Qh-t#-LBZ9ZjDw5-ImL8X*d07KKd)dX@2~%~-4eiLHl>9%E8EkZ-av&i|IGUbuve@kW9VTc0?|Ig_f7_9MIpbWwLANU#?Y0!x5#CAer|MEZ?=4g77uAO5yA*8@;{a;lln$(1mNIBG@?O z5+*v1qK_0Kf7NU{VsvSANj4%T*^nSO&6L|HFeE%(fgQE5sdb+wH88h+w<2X`(OV4w z-LEWOOZQp~B}x|8#ID5c&LJ#HEAsTcel3U5ynxwEBOhp|-Z}E@(GR1Dg)E?EzZfth68k^)L2-l@38*o#_Q>a zIqoqFEg$QD(S@>|gt6CA#yIGjYQbHq${O*tS(Kqwy+qVSYGC>85t$Vk_MYG-gEH=D zl1Zm7=E_W3*#3Tlw@c(6&5MGV&ztagRcFze*c= zsX({pB-qyn(ycCb_pPs|8e1Xbu2RszMYv*sd&j!?^?K6lhoBYM41jPMMPMjY1YU6G z-8Jg|yW%GC80v$OdTSye6UtNJ=5NConAA}H(rE3rHtz}ZK#qI{@d+I)Q{3(*YRtBw z1C*%ven=RGsA~cO#(WE7S{xt!ON}|YWUX0Ew{>JA9FQl9kyuQr5<46VM6WU>Z&0gT zc}vgFsP11Ah;iE3mZq36x9JVS_|}2D06m}c2Q?E}f6>QlV!+iXZM`*rOUc3%>9?9i z(0#oMmyqmDqt46Hzltj{W05I{%mG;+e?60$2mzmJ88kg(f5`r8pF~C%>+_i83x~vC zLrc?6{!LTD*6FnAF}qe0(5s-F2bj?-p8tH1@WKs4wz~n{PmW@K2gWr>`Zs$Xb_X>b zNptZ7WOc^^*_g|}6eK{X;1ORopbheXvBeEckI27h+y`wp_98$HeUbRT@*wQ_9V+mD z_$UgOP$%=nL~W<ZSk0k)(2c(GM*y}#trqRp4t@;CJNPH#{lnWO&3^NCAu%Zu49Il40%-FxX*sR9 z$s~|k)8!X)fO<;+>K%}rj`^zSpFqLxpFja~CJ14e(13>}1ogN8$Bun5A)J4#rVp?O zHT47bw7Bw!bN+4rMr9-&f4?yb?>w^tcw;UuPPev8Dds%NE;AT-jP@jWP1Q@lyDxIX zF?}QdOx1hfPf+AY)HDWsbIv~kgn;fY;Phj-1ZbRf+XzF{uY#G9c|F8j1p58@KMY#P zZ*&%7=8+4AIhfLaAKdB5M$NBWbEJ`S&R-V@xO7=76ik2d&s!gd{Sj~$CDG4R zu;3R!P{Lp3x1ps0KP9b&Zi3MV;skd}M%!Yv^TNOKz3@y)ZLMK1<*X>C2_x2JrDlWq z-!_fnpLmzZ#_u@$TLlny(GB`^4fKEI{O4hQ45Apqel%CS6O1Vs|4?gJuM|7gcm%4d zLBPxK0V5M1p#%wdS{j5x!i&^3-hlDGEak+kdNFQrr|A;W4u}qbfTMquAfowqIHDCl zKm`*BAOdTDcbQldV2l?NwpTHtw*`_M@_Gcp1U8s@yVS@%-hhhbU~ypJ{bjFsxbwBy zt6O1V&jA(fsS7FxD!Kq((znFL161@pR{b+{qwc$bNjfPIb|tjn28tknY1L}f&j94k z4&+{s#{>Er2CYh$)bN4auYrV4K5L7=Uks+CP?G$|B}`mr0)fC1G6WlgtO2asv8b9P zP%j3cUiq!%abPSULq*Py*DII=hWCvQ>$MyR%Y0y)Kp5(OSp%z0<_}uER8qMJ_M{mW zt;^R*Qt%6t|A)nVZGV4#u{|SO~U@weZpFopYcWf53|g9~|>mjMRC7D0uZM zFtIy-kioi^bkT}J2~)f;o>kL~kyRzuIAUc$-3qiDdIPrI4e8gIKL>$!*EoMiS`0zD zt*nIj^06jpXQn-0;tb^KsB(U7^;-Li31*lKAS~%pf?2<7j36x8JraSjOgiFfGyll( zQfeMA25`T}u7%%Gcld+{9%crrq4M8UW1=mJ@joO<6pZOf7QBmL*p6B8MdKLI-0Hoe zT_S1@5wN|zAejJFv4U*wzpwiL(UJ{@cBYQkI5|5{BaU>_rjKTKg%}wb`5YYbhA#7k ztcVU(As+IP=^ZR-SV!YGD80H_w?@?6endk!HeyN5-$?V7$C^$`PZxUOgc;NUJtznU z*B}6S-*ZH`yI&wM#e{VUhopg}m+rt+9PkG;&V3`#>`?YPhC40wF)F)pqrFZLcP4RI zHg8VedsT9qr897jWMJk}jv$Qi;RpVw?+fw=Yoi#9?Mba-Q4RLJ^cw8QPpB1X&JN+S zO_mYSXB!EDg+pR)x`r!yrY+Z~#)NUZ+O!!(MQl1Mw1fEdR@v4L?e}Kc1e+Yv*dtvm zA6uFaM3Sei?!12_EdF)scF;ZM)O|DjT)(#Nsj|4fv=o9Sv+Y>g;cDYJpB9h+x!nD- z>>4AOn{$i$pgm>^1&wp=?w~w617i{+fPJQg8)kqFRNlxNA)s)(!U7E@bvu*7VXW%M zNwjoQ^ca7Bb)?rbqbG{?Ca&@!i#)7PQp)0y1P#14+5<&;L}8Ii-|R;wpDp3_X2#p( z65ZBRxLsM%BP#^?8EN1hT$g{fyghJs#l6IM&uxb7%IbH!;x!UpMEX*p%3+v$_SQ|C zjK>0xj=K9E>}zD1i9YR!UO(;z$8)rR0c*eL8rothMl+3EtR1G&{=vq^P9Ml{e4}s86^+4B#pKzIRdl>)0joJmB<6F%qteN zf2BspCgC{QAwJAKm-*`X>CrEA5l3d+%l=UPJ?%X!x9{q5~Axp?fhCD>5D{azKZgQh68_SRtfZEs=hA& z8vU0(Mth?6H+Yhe#V@jX;>okt$#TPmx&p=Po0c`qwFMViGC99bwTIZ5Rt&^ehh7oW zC|?NX$>26FpQ5v-%GrU#zP!OqzI7 zB}U~Ay+HcnA=mONE`41v%WC(7lai19#-(vwt&riDDiT;~T@M(K==0?}l*4*+#Cyx! z>a)^=6+`-Ft=eHJ#|2|v=)QR5vLoWQmb_eVjqZZ#;T{t-G-|H@=I5LO~<2RpWC(B3I^OXUE1Yh zEqCF2rFIEU@3mQnKMoaArYpC1_Kk3f2#KGtN75e2dt7DG1r8T>(piQ%a(I^%V_m)_ zrh`lp;rnm^3{vZVf&+zfv_*<$*ZShxz8F9Go}6qu$1T*CoZOS1O%er!SjeBM-^mtR z7=@p+FsC&~tuPnJ)L|b}BG6u5ex)>NC9*?rmqdTC!H0->IN@-u4OzC^?u-A}N|QQq z(|)k1k4bxzJ*%l>S}XsV>vOG+0rs?O=o`abu@J5WO>M307oUwiFJw zpSvU_*X6L3rhi!#hZ4Y}pX+aHti4n6sxZ_znhdlJDC=pr7r|@XEVEu48n0|vfIJvh z9cgisc2$?yD_gWBXNjPE3g>($6|Q4|=K9GiYZY+i=dG|>w#llxEa_hvUWF^l={15~ z;8ZeAb_m+;E~Yp(Dbxt?1QQ9Y(OP(-N~s1WVA=<=j#&slYvmV>-rXt1-U%6)#!CD2e4xa`&|aO`s0)&Hhx;JB=Jp1`R)`h4Ubf%N!o@B>p*>3~ zXb=%=kr-WC*7no7+---|y22Y}E;$P?oljO`N>EA9x3dy9+2L~V;88U22~$J%apXfn zjQRLV@3}}?De6nOTUv$N%BnD5)?o4fsb2noWRTLgxS@R((l^%jeu1T2teG@Hg_nyl zpYpj3!Ov}tN_|t`j}4zF=%m^gj#89{Il3h+_nw>guCrdg^-D0LM!>!|#+5XGA62kS zxN~`rBsHXRVXLp?*N=rx`^S9pf;?;Z?B)@}g!zOLiM&^jlwMC^$wr{*g#)8mfv~^@ zyO@$!4knB%LGe#x~j#L_|Z^5cRaG=)i%i;0^{x?%c%X~oFQnH<4(^iovqfv>au5+ zK)K{e9vUjS$0Phc3)xcg6#3LC>vpnTqUU;dPSuCSy{W-;(Ss}ko)u^!!4cM5ucdtB z?RdJ^0SuM%@Ebw9t8FEx&j_m9#u(4XIT6sFX2rH|)2Tm&c9_QmF zCdNNk(NaUpYmGM1$Uk^pJ{(zOnswmnB;@f!2oD)Os{XO;Xats=lUea(+BQdojTu4_ zmO3~6-0x2<0R9^rI;k2(9CqqV45MnCSGyHrq*e&j+M5^pG} z9bhD-W~*|y(B8`J=kodJ(KYVAR<>QUtWmyFdqry<-fCU_mFUh_txrYkTuYCom)zi+ zT5;29J`)TG7V_LL1ahv;(=-J~T<7L`^2U6#C_N0vXYMwQu<4DeKbbD4h{MsoQ(41g z<;|{Plc(`q)}*z1Y#xG(NqQ{tm>kCOgQF{f*S?>9{?iDybKmmqrBZ&*iFT%Y#7 zBQZ76bZ`%Hq7;veoW8u#au}f5b?%!N>KM+KAU&H;=5{?l$D!On%}i$Tso$s!z>T*2 zQXu*SYRxZV{A2gcPZ$Da0SItUkvh+tbN+AefPZX8nJh(pC|MXZA!4|kSl#3dtv#%H zVK&a|zSOCAvlY4(*^-%zpW}(z7r_SBT(-w~rHf$m)^2*O^ZhD=)aB6vAGHpCWAbnv z)YUG95IHR^6%m_0H~Pys?9Hmz>r@7F8Ktx;%Umb&CasbaC=ztJ=NzVV+~T z6D{(Rf>xai+(qzbKMp!3Ip&F`mfpRbY-_bm0NP3+OBNRFhcyp`V3kLQeOu=_4-wIjQO|;nzM?eQ37Wj448ZLdK5T#9~Mho4dFsN zW~qdRrb@dCXPQ%hdXRqEAP!r`o{fQWKv8(|qn*SCuU`NhJ$x%^J ziIwhNS`PP?&mr%-zL2zi2I~h88iIjG@8f(H7D1&MBO6?&XJ)?U9|;MqcH@m1lwNo~ zeEwQ-b#jGH3Z{_eyi(K!@~55|3L&_JLrNxk5uWf03g;#BF0N(NuE+dE`_-ch1}_O5 zgS%GBr0zSuP#W&1p_2-P-QTSYg?$T)qMiK0pM5+!>!aukHAYvC@+m9X*N%4HU0GC6 zvWcd7^mfV;*ZGCent< zP$BPbO{BUi+|A8<=Xz)9#F%-!v}A{#(UFLz_f-RUBW0b6#qGVN!8Kw(Ck|eUuxMxO@IylTrHQ-iUu=j=#I}gGH|5t3>(} z0qUDy^ObLu%=o(Q*jfGxu`(u#Wbl@K(Rc-9&OBREF5D#D$hspvvZr_9%Iy;d$+=sR zL;6*)C!Q$$PHT^q(dk{XDmQ=KopxgeuNY;AfPrz`tY)UM$})WE=MBlc)**M^yMn@#*!EUFV&P|Ha1{2ZSzC?WV#ud((L+CY!=jPf?TvQ*<~>58 zW%3~I#ZjgGPwj%uE@`yQ*NCvnqqV?Rw~{^7PGb5o!c@1#A_&g-=JAdT66?ZC7c!c< zwUs@+>b9gey1iv<#7i8n-^Y$y&w1x#6U;j!Z7t?;Zi4M=1tfB?xT4>U-i^ZNdpBFB zepqng%Sj)NxNC^;^zyc!7;$~Y;1WaS)sP%onUY;wjBqHd9UgL3o}X3BL~dEPBL72W zB56g4n8P+y(ZY@*&6B&kS$^)hLBG@=PT=Ss_3}w918R0w_l#~XSs6K z(|yoSTA7g>KMVJUJ`2MK0`h|(?ZaUGo16l;KaL989?=C49rx=E~tbYyV7SYAAywy71SIw_dZoEb8seEt~ zgwZ^IPu0$#8|}s`i@1bWDQS9?G2}*1aOCZ@;M7ppR=bB~xEzf%skF~;^=MdGwO2RZ z%a0=-Hf}PNX^W(V%R;x2m&>GmJc88yJfyh{jr; zemBrg!f@12=Z9_e++hMGEoHH-C;UOOHR)`nUb3xvp4}m>t||q6z8wvcyx>!BPIX%U z!~qdX8pPE(W)Oxaw~}9HlJO=MMI}|wZfNCQQLk}T$yZ+2F;N?|-Q>fES(RmDX;&rY zWwhpmrV4vT=0R1<3M8GEH?K!g-qXM3Co5-D=Z&knH@9N;s$cul{$STD-c<=6(!52x zM&QkkHsD8r^yUt9p^8WMx(OKc16LB~=!8T))m$HGnr#+GZ7|11N3D>__N7X(^x{1# zE_lhtm+K|Q#ARR;c~~jPH(a9b44GLb?AOnJ#yt~$^C-u#R*HoZziqKkP({~&)xx!< zDWnYE!Sm)IN8ZjT0Qr>()QSYvuP)76vuZN>l3U2iSRrBgy8ZnNcY23^QMASniMW@@ zxs7Pr8cTJMSz^h%km@a~y`Ptda;O=KmQc?rEEDQu&emL@-Ri%i`{fhs+ko^EapF~d zt?|K_AR{85UVh}4Vj!Uy} z+y4#s70p7J4+C3Rw$D)O$F3eQ#g8mXWfpNP4Oi$HFO=W^bh~(R*=GC^T}B;+_&{C` z9v}wiwH~up;8V z3nL3Rx*3ZSYgNz+h`Z&Ro76v;Q=}b=TFBSmI!t-R8;QhSRxS z#}SZB_l)@Ag9Dzg?}~J9oa?J=Iv8J$M8_-N3@H7es#KoU%G^h(yIz>lck{d_Wa0{4 zzGDI@WrEh_uirEqB0BWS(6f2=s;UuXIUC|=bY$OaLGD{ig717CxfWf=rOFZvPCh>r zc(f_4d61 zTTuo&rOZsFo&phE#o}a+P|cMqI4x+$ks5`}4jtC)p6@8f?k^U#9#Y_P9gOxRCI&_5 zC{ZLsFD|VuM|U2|;L zn3U}^Z~-w5!opSmsT%CsaYP5&4RT^IHRwLgWQ(pLv`2>BM631a⪙~lKwWjjh{Hj zUM6ucd@m=Q%m@~Qli0sTSm@f2Y&g(y*TS?s=aKK1T8p-F(So5R&2fZbn=lM%8ib*R ze@Ks81Tk)Lwx3)5?M0CvH2BGruGi~3Oc0My+am@>)Jy#OaW@gqehDURSD;-_2o=iBsZ3Z2*7zx4V>=MMFR!-c zrMh`c^XP|8+xsDpZbTQ!?<<9m0+>3r#P0`380^%1gs9_et~) zYCfb1d>r7W+#c)&Ag{^ihFmaft$EoHSbDn)M$?dIGZ6`KB&F7;|2U#>A7AMR`k z+Gw_!TG?TB$~zvFVutqQ8->GbwDs8k_3{tFog!&jIy#2C3G&{azR%~1B9GoGz5P}& zhA8(Ww?zn=S%ohWa8HHGtHxMJXOx;MoTGE+&5wdZ7>Tt(P~>S;Y~acbXoZ^Y!S*!< z%CyeaXnZCIgUF*dUKz)vD_3CpuiJmt-MlzeCqEwBov)*=qWjT4y1t#)}2xBDs!N~K%vL_2m4|*8{*4%QV+P60f zma@0?;wiZx$sPkCrK&&mQ@8U$3{R~;C$ESO!B2Ttt#{P)RK#P1*pJe^ceTEkWBZUR>7P+-SR8>4j(eJWW6rjYX6mF zpor@&eB*M;(*sw=4wvY2v%1FJN@FN${FsF$VWues10D}nMh>Un2!?I>jUMNV*6P>x zoeEg06hr!&_ay8YFBH>Wq?)CIHTq=HGyCsx?H z;??zkMQG7&ifH8yWOnG=<@&KzFdgS+JsZhaV(yD^2#w#ZZyVEqG!BG(DK+A1%Jng< zT&I1%jc_m)V{%?UTH!c1hM1+O-QfF&j+KK;j~2OoODSt{H+~`JM(b8t9dUe#QP)uTK%A+-t8-LayUUO?4=7Q zMn{X(o57zx z)rdX3f2?O`t)BL7V#;8f$TOnYT)aolC0VL@dWn|{9@N4TV+un;OAob07k+S+6m6sx zsCfHD-Q*f;Ybj99AjYXvxi~nQ?+#&Gd2Z+6&t&S2OcA5&tf4*{eQlvQV7~G|@N%@ttmdL_(iX5oy9%?q5E5eyM{uaw6OUml!|I3P zI3(LaSau~=Xi;i@z9$v^Wj^0QVl8EZ^6rz99IdpS!Y)q@_V$bc{_ZaHY&29Q-Ud!z-=dVs|7ggo`XcZLUYL=amDJ>4R{JqUM zrE9Qnpky;&$IE@NVF2y6bFJ9YjC|Es=xR8sRt6dVWilq|qLb=Qsl&NePIA3i((z>q zx4coK(8clBU80jQaQFw_ZHU%K#(zSKz=y~(s_Q*TLzVB1=(6TDecvgtUPukxG44-T zC{iNpx`xQM6y5%*bf-sG|iMUz4*DJ3KgF|UwG+;Ag?*`H-qMBsO9Re9DZwH^^kdOwZMm6xCohh430bhO^<|R zp))toR(aV*dyj%_4Er}_Yur>fp6ZedW5X?0^LA{OZfFa}Ezb0g+ezp|4#E)IjY#8iFL6!dYnPPG>H5wdqm@gWns`c%VqIHi zIY#^0v2ir8UX3GoDl0d@4wHJi_p5vthD8BPugmo89$IRy<(a_%%A;4 zSkJ?t?v7^DqFWojlt(`s+gMM_i-9ohw(gG&Vt5$uFCXcCFxReg&ZrPS(J^$5*6fJ7 zlE-gf@;c7+CK+Lpcazd^diRGFD#It!Jh@uy#FCW|s`nI5(?d#fWE_#X)@DI6ITa-V zq5U#ftc)03ZEk`u)ajD+=bD;UWN~p$ujBd4;sLKe_{`WC)h1~WETQ{JmK}RxsqD5$ zhdxd2emSI}Zdb`F@!V+W_!HSBHgUtrl#zj@90j+n%nv$I6(1q2d!s8UdgExfLvBqn zS+I=w1>>)mj`_21MYja6RQEXW4(;~0tA7xw?WlA(SRf>*SPi4Mk2+Zn5gR5NHO+Pj zh6QM*9MFUyOC;%XgW^Hgj2(Zt!JLPFt8pzec+9X0ty=6>pd)v##tfFV{hZjw*9 zyF(jindY5V<6N;!8H-<=TmprfZ(;rL)=hC&)wKW9-gkvXl`Y|##*7FeIf+P6k|LmF zktPaAk_-xnk_E|8L?nk65y?W6bC4Vr5y?sx1Oz1KoZ;4X&diy6ru!Tp?%N&a`{tqD zyY>pTs@AIY*I&&jqYWF&-9jB3as`th>I)GEF5Mxn5qmqdmi`QK3JJ_BgM$)lWlvdb zMHy_J8%)u1TU1a|VP?KAs7Tg;pJo(urxL>2_Gh9qHkUh_Hl@3&bLt6mO~WQt{NER*Ffb*tN_k)01ZSVpgdkc(sqMfa>G0-Xf3Lk9!yKiBMis`0TOhG#<8_ zqJ2L8`-}6Pch{pd_TxpuSI6z zoefCyN$lHvV_lwh+&q(gCQVMxjmtbbqb^GqRdZ^GA;UqZ@B26Q=kkz^xFWSRyPIJf zKDN`wcBwFZckf+OVn@gKz^)=%VzhLNkus@}MP7W-*@nlH4qNzU+=0*Os~P-~ft0$` z;W9VGIPBMZM<@M^#S92b7fD}uMtV45kD# z%0i?@6$Fcv#j;n{k{5(|de(X~3g>IS=*^qgZpbRPrQIU4oHFus?%J_%Hr|SJX^QPC zOrS~*c8tx{^TE@VDd>SQ%1ttwoYhiuaBJ>*)s&UxJzLliP{&Yf>sYB3MbbfK`9R`j zZHpP9Zpdc;;B)Is5X^GvatJvUo&ILkYv#Gva+xtI&`SA^(RM<&VDxtjHJcB&?=Fd9 zPKvKc1eo^@Yqjmp&gzaV#{7A4tPh!-9NEN^pVYNwada|&; zDvJ7oNzk1vCQj{XUy+^*N-7JvGa1i$lqK-x`w5j*1@gF?v8!GFW zrYX-9MRxK}?I`6ZGLdAO+)ZnBxsWa4HaIsclI@dZFjpKF)iASFY_+zWek=5WTiRs6 zqR;1Jd=vS!0%pWTx`CNJ*G{ZdAFw1oxgzAKClJVFoW6l(iFC(Y!KbTB?$)-Py9pZ2N$th-kegOcf?(9fRf$6ryi z&!gs=JiK;aG_|;S)11V?TlcrA7%pjf8+A9!H3P}yd&+M5cJ03@NA+rT6tjAl#xp84 zQ4kkQ3{krTC?N?za%1UdzuL=ZNj8tzWNYQ6_LsX{j^fM-Zyt}9nR8x$zuWbri1jixAQ#{*6o{h8n66->lj@AUVgIIFSThRHrP%zUB*17zgUZbw5L=hK}o3ddi;_4xs%n}by|wo%pBf; z*DB25*5Bodf1c!HyeSXjOwEYdJ9h*vuGK z6*=GuKb8b_2N(t$&ZB}?Gnt_^T>ByWW}llg+|+K}R%SqcK2Li^N4%v1u3)-@pCC6Dqi;W0 z_>0*bv}g=$krHyv-W6>S@Hkw97^Kyvlng+d zg8+SM!MH5|x!o6r59{98FR2Ky;Q#<)L{%s#0@{-vvUrp8xI*CO4luQ6CYU-vfEwUJ zA^EnAu$Ig6U{3n$c%a?8B?`AX65=V)$OcHQ4(%9#&sGruK9+g;1uwv>5thy+A z6_)yl_hIP$i^Fgu^&42bMchZFb z;~$50z0r~p0Glg^_HD8$+(1&QpnW-A`2dl(0EqnXEKNSZzha?%wNdjxYCihXkHX92 zFk=&7V7~$Gu4qeq1`;BZ0TcQ;pvwfXxh%9-0*f9i`R{WZIsWqkQ&yU?>l5Aw%i&;1 zE#uII_I-%L7(x3~pnYZo#@7M;ewLInY;K^$f^ZLkB>K-3J1;>hFWk=5JzpCz^(or8 zkxS>iVR)^Xudwa@PaL%2Uz*hTox&?2stn-gkRlJDwc2)DXvxuZNbpvvQbmB+m}4h> z8_%65>uNziOXbvKa2Ws;6-LL%VPMr!Qj94niZ3|D;K1GV~rM z$18Ost(k}?1pQEjX)KGZ=u`lBFHECcG6ExU^Ei6ar;+9x%OXWn+qoJ=T4!eehj)JP zn|&)n)1yR)>79A$`MFx_(w`kX%O~X{Dh<#I0KorSOE||5>xLy|-GR&U?{+Yv3)55x z!SN-4u!dZP^kCud5=gSiLXf!AEqWzR$4Bvs3%WD@wsQ|0^V=@UMFXSJi--1$KP1t{ z#Tz%6_@#ZG$qii;D!HW5e?9j%1P0{$Z?%5@O}KHysr?j;4$wC}ioT_i!UXVr!c$N8 zdH#U~09|YE?U8y?bro@TX0L`MSJhPQZ%&B|^Gc$FmnG4Vc1*89SZGeg=6Z3&UBjOG zf>6N#vLZ?TqLBtwcpo%n#b*%Yt`%I|O)Q6mgVNH%^xufeNI~=)cH(CDnSGvjgaz7M z3T711eS!H#Pt@vkMsj6dJc>c`cq4vSUDVsaafGI?5opfQF!Lv7>=4cT0FlGT(FsRQB`rw}dvlX`7vm++lEv1T?UU)jLv2NZ zOfG3K`8)wXJWr%?yJR`4ANxsBaPU7maI#rc2I;^D=zzEN>q+RqN$3FCPI?G*KoUC8 zoJSOEfqIXzpT?M&JLbtM&_6JYX0B6UR-UEbnGKrHmXJt_*IA7Y9pynfA^m@M!dd}A z2_8xWxSL#TXgfg2Yv69wcghkEI{Gi5+^0Rl24}qCrDy!Sf?CgR#LT zOPiq-bNzu4p*oG)=_x`MX=)=su>p*<%$DDdxgwb(btjA6UiW#Y+yFHoErmtCO057? zFvQHT$a8zSAahXu#7XbQbLMElgf9*Sm`XPR?_h&!t_yEtt{K`m@pN>K~!#yJp)iOGKfNFijcp}6CmvWEettyX~9ZVs$?a3iiZ|kMc~KONZ9au2U$Lyy(m> zlKS$(;$Y4?$8XS{%b;j_(`T&#(7yl+b_iAu*ge3s2WCnud4X8D=)V}gXA=KA|6m@V z_KTm$_QRab?)%jw2MZdv^@&9VSQVnQ$n`iqVEZHV3sbHF&IU*(#`>#!#Gh0xgTt6u z2_px3jD7oo?eSX~|t2Pd@ukuTB)EljW_fd=2y_sKNA|BE>SwjQ-GBT1yYEt-F9*qY`VW_V3FHv$?PvD0fKk|Fe_<7 z`Og8<2@}dWA<+bLE(WgCk5^IgCtC4O?+aFnvHv09H8{cZJ!lUL7kL6pQ^KW$q zZPsZ2LJh434Q`2};DVdgV(8%GDfyXm$PF0X((=~r6VSfczZh+B?a$rBgW=Z-SOPnO z?Faq?lmo7PPPpSISne;b{c=k;Mj0%JCbZA)NU9*hKmjxLHA@{d@FiqUF|S3JfCg&( z#SVdMH>nPM4tKEk!SvLZC(S|!?|=Z)E+bN)(Ex%K#$M}-kW?J`Z`ZyIv@#pw*!j~S zhw9Ncvbl_a-QRazY2_cwau`t`qhF9Mjmslu3Y^Y=zWO=`djFHh$ACYb%O8{++hbP0 zjq-HN0l|=A^z6fITuh*ikn^Gc@c2C!1y1Tt$iT-gtmMx!Dsr}0RdEy>0Ac`ytjGh% zipcPdWpHs&X}N94tiVR;VR#ZBISz(-P&Q&fpoELOj%oMv%ufW1gso+6nMkb|0r+TB z8xE-c&rg6db;g-HEkcI*9*#_Vs{LC6M1H+LvJ7 z;i^Z%p$A$T%lvFZvdK=>7;9;FXvO$Xo~U5Dn{E7kU+y9hENt!nN4@?vEWfMQ+HSmr z%;;}81`A1IH>=Q(1HLa8?wW<9W~@UB8^^l`&d9h~rLltR!?k{nrk@!%AYmxc*O5ef zTPAbk{GxFKl|CdLHr9I2+3% z0+xfIuZ)4!0NLtbmEeW!c5xM|q0K%D{XkhQi6}UQ0~QUdkWNUr!%309|9~&>PpXOX zWJ&s}foI1mffzjCFLWb_Csm!2bWD5^k`SU}I;)Ne6jol|Fm4*k>byT;*rEtUw@l^!3CS5)xR! zMGk+uJFSWYpGI(VwHLVSJIO*fE{L=e`!g{wZgyOn2r55i<^hLiY)cWWDZGf#aBk1^ z`YtU2?c!Bmj=Zk(Qa(WPU1}zWb6;_ETH|{!l!F9b&0Ze!(I^r!Y1pCgRv#%c{EZSRGSh(~L;8kei+6|rVuB=*n@IcOW+oIndokNdm z9;gT*!dAiL=Qjf`{NJSlqAH#F8rVsI_fep8-V#RU{|N-0V=d9;_B)$H+fFX{P*!YxodvrQ2j8^}%%uoL(24=bI){*G#TRiB)oJ=EfoP|(k!nY+N8+kNI%HNdQ~Ox9ig13-)VWBHxC z_<-{mNEcuLQq_d`Pj+Hu;aJ~zQl$&qu+`TmQep%l@o)NA;vjaL$3cx8E(mloATbc@ zm=K@)c^36@6O87M$Mvxs@Ri7a*v;ziz7)La<b%Xoiq?J-?*Kj>a(ALA-cnx zc8ftqIGk=RkTR>?QTWj;v@;pMJqIPV#;jTW4;l#E#(7#hIX2I@9Czt5^$3BhTUk)Y2>Exx_`4Y6x$+%Fl%yW85G-e#oPzL{jb zW;dAj3O~Ze)0lemN7_>5shTcL&LoL@JMJ~CbB4Uj8xs|=)^7BipO@fm=Te`DZyn3` zq-(=2Rfk!p{q}CYV|EpjpfzX2+hemj`PnWc)uLD+(jb0E!RdU*E0qnqjq^3mTyv%@ zCTW9Fqk1u}y^lF7xaP2{{uAM|`{m;cFcPk7IRf{@R99@P(V4q;Go&rcF*`*s+3sY7 zO3m=DYTa*ACEA$W=J9NZy|DSAqM#q$t###5rnSy;UVX%KcgC$DXqkVebp-OX#kCa2 z`*ABq3mA@gOwZz^JiW8?&OMS-854PPmy(L5J_g|`RCk*QM((;#ykkIUZcXXT*x@TR z3uM(KJ>m8Cx~^k;V*Ahc^%AgUe()Xd+iliqFuoHfv0U9@9$r~|hwXUSZYGmax^@zW z;P9N#N%2Y%6*;rqxgqghhlWckzBC>*${O0NKKi>ItNkVJ$4IV5m5{sH;aZoS$GzzC zDN%7skFNC#y;peQFxMVpSyj~M%yRKyZA+g73ER?q%K$aA;N8^E96juzoqpnI7zc;V zHjfb=z4Ic1lhiDS{Te+w7q7keSRExq67w!Za$r4$Ynn#>g?GKo#g3xNVG-AJ{psqQ zAw30Xx}7MRmo0hB{7nnJD!f7$X$-3ue8ZSJa5nJHiJuM3jC9~Jp-OpPPLppFUF@`D za9JtFr4-ci zp%d6s)P6J`lSVClmQ`p`%TPPGf$E;a_11i&#NZ0{ILtsIv@fX|V>0C(HEQ>^Y(exI zeS)RYh}m29MiFf51=w}oJYiC3kf$H#JA10pYe?c^NKannNWWi~=75)LWz+v}y%LBh;yFFoA;MK7N8T+?L>?aIKhV?@&lu(7`cd zw^8uv5hg;vK+Rb@SNG%DSs8^dC8hMvU)gye6hNJpfAQK?Dj^=O8HFQVy_etGttXM)!^<|Yx((mnC+G(%qP2` z*_mYCrR_|;feZ(`u3n0ZRB(BQO@*d|U@i8_(=!SBOg4HWt|v6@U1nk>mFZjwrFFo1 z)MfBU_2G`;;eiF#TI8zvA}ZImJ`J|H=P=5c5=npLJSd=3KQ;HzvK zwUJP|^KgQe;`o`c$l_PW*gV^kYU3mZ7j#G}clG!Bb;-TCyD2hDl!=&MmJg=&&4@=t zNWDw6Fkkvokf*{Eg(ql}Xn}{rE78h>x;2Rt=#1mo%ENu~Ia(htI_2opSIHdX`q-Tt z?YRSSUQBd2%b1ED^`eWDpfg*3CMuhRja+e7Db*B#pO1Gj_#KiC6fH$Um;1ld>*?939lzG z6kc@L6a@A=v;@dlz3`$CIEQXC#2$yJV)(`JlzIP#274evSmY24g(%JwjrVC7E41jh z@nhbRvN)<4asTh}w9k#fu2}n!#o^A^91@G zX62x7ca7rEnKNhF6cv3>oM_nCu3uc7yOf0?Dk~`8?lSIe6XR1?)7EYtBOxKyhO!zF zn)pI+6NA;29I_Fi<>R(lYmS2Y?wZxG)zU^nP0iP^um{E8+@WH9LYwW}H?lHo zD6^iLjpAEtoj9^KC)6-v@W9{|F-hpshAA1jyds7D zQeexb^Zonx?XOq9y~@M41;;?fK$;_ZdKe7mCKT|Xrlc&hZuoB>>2-I92Ll2E4(sX3 zQ46_1g@td^CN734y9`7d&i50Z$9;N_)ZK2+)13WM?fI^|yWGkFx~UI;cHOuoySSmz zU%q^CoUr2HUL7B-hW>+)_uFMmx$c227iBKxzAd^Y#KQx{D?oDHa4nSgB5g6qyAg!< z^sY%GZoy)1tvvI?U#H!V;79YjY_o($IBZzDm}M?cvz$k1E0TmohdV$g@-*3a08k*m zy|g^>4B>gh>Q1x$51cs@eCAAXT^%&gcdR8TDRHvYtG#|W&qas%$E$~BNbkQv*}cZb z#^vSZ_LsOt-8{hA)|SV~`8Y4{#L|*AzgWByosq6Ax;0i*i@x&hOC5D}bxlo8)p^{S zsC3uRB4@d2+fq;`RIbVAil`|Htw5rtrp@M0uGTEM)@r+NABB#j=nbW@a;*IU9e0Z& zW^(4#89>DsP{*A02w#sFzk+k;deo#;c$;sd#8v0t}Pp#5*%`R>BaxA3EG`YXi~?7jX6m=u2YE1n z8wnTAWMq{Ups$>fi^;4c#C4qmdC08uKfYi*HjrhuIZKy+MM3|*{Z|fjR7H|0LYs$e z-jzr`8Nfu3j*U*1`nHP_!2pA;NYgnS?EAOl<{!h%$3Fj4)p9FJB{D?O+pRBfI-;Xx z%x>ZkZ)zAxOiCmk9b1x?zSPi=W$NPJRd8OO-cGSFbO*at-{;mtaP;FxM6Y2bx5iJ! zeUg@QClzZbs@+ zV3W`uP&Gb2ZpR%F5g8d4t*xo5+B`O8p6$gnv;Z0S*oQ=z7G1!20?VeNDMX zSDBR#C#=V{d^oPWB+{RNSz)Cocc$~9UnObu#=ch(p82_hNU_G%CpyF<(i8AF%ved_ z>WI12ulfJo#8eE9Y5rF`a4OKBI~n7oo4h!>Y^R8902K7(k>CWX_iKdfYVwtv&RQ`p zeH2^k2?a_UavJ8RHD(2{PdWW9q!&iXXHIHgvwtXeF@*QqN>bdX`Hr@j@_`_!gdX-9 zSt%mP?!ESYrF#WfH`hiwhzQ-C?@ca_E=$Yk#h?Ho5)@mZ=S>k&bkCkn;CZYCz^bN5 zkbQ+iAf0)^ayK9L=C%9%e$HwXZV6@eT%Eay_pG~o?sh}ySSG%*H(A)OZ&VD8sY}xQ z(G;cO{!x65iVBWwhWwT1!^{Czczy$A>nn^~Yut$na2h|NQR*TYdWX8~(W0t_E#BN! znysOsy9^as4~4}v%ng#~ZfO`cfs}(WP91rpIVuzaB}OxgZkg4COo8149_LwhQeWCE z@AeOQCn%(7$ zYGPI4v2dwcrL;GYJG7=>$YC;R_g2&Pm8{~=yEyi^#)m}F;em8JeYMBiOwzaV#>T(y zcCT&SD4A>D%_d7%w_mFr(56XACtGnJ4GiEWv6PsW8y?Kuja}(ATu*k)UhHrsIwojp zZtmk;v|f-?JgiWhPGpwv|MWx8NQ4;rmTQ-|atljGyGwC9eO$1*HGhOSdY4A9x6o*1 zQHVUEx5eXf;-k(l8{CE47@tRq;v1x$+eRA*QU}YLX%GMXFymZ|nIu_Dn41$?Uv;>q zq&r_)-BEhOqRFl!xgS4{H#^HvYg{5hVJ^Ca{C)@%p=gZSPGZhTZSP<`@P&&t@c7ap zK@!=S@)lPnKef8}tUXTNuZz>?JLHUlnR2h2q^4kar1&(V%u`2d^WCN2x23SXkkz!2XjEUO!8;(HzZHxnc25XrAa|Ngr6NDIGC|*pth*PKaz6g`InRnSanO zD7oD?Bkogvb7e@NRTHz{-sX(kqFmg&%OOpfCC4X@3b>r?22XkubCIOlBBLnFso`yYcQ7`zbaPWEfP)~+0us=vfUc;9`#d$)u5 zc}PKP#qNA0T@9;Jz&UOv`b_~ze9cO$%&CTghuX`n6}meZ1$NyNY5qiH*E}EhiTvgsRx>2*4^c%>lvDxw)cPo;3CaYut8qcM zO;eX9FR9=Y!m_ZQ(A_BtT9KBjy<=^^qIdM8C!6hV`SnJ@dp5`R0#*X0%Dq_AiBa4= z@`K%-F^h*I`~}2JI%bE~`*C<5)?+`v7>B0I7>l#7OcO;{C=NbO(>9EkD0`zccWCOw z4X0!^j3()y6z;B{x^#Hnq4}>nd+E*HJ`_ei?G3|pfk>e$gOgXgH zkGrgI%}ej9ge(5Y5<|C7VtksI7vZ>;#~?`sA77=s@TGlI(e)uhcYf&fs`U3KN@;YZ z&iw3Rzdc#ER=;Otw3BjeX=A1#>C~NOuGPCa^<~Q=a-08%NrXR= z$gIfJk8URPX(M~z49yJdbe6@On_R_Yij9&xNrlmMRZYhHw2A6hStmS+&U)Ap9s78M z&1I){e?G%m(Ubk4CynX*rT`A*M+z(F68I!FqiMlt9?2=sjb;^2R}omiLi zKDY3n!5Niu0u);1Y#!hhu?cH`hQyi5CjFUw%2Hpi;Mi?5Ae-KJ2D1;W4=QtzN@Raq z0PxGY?=QxiDoOF3Be9<|CWU`(=d68Xe;YG)m9Nr)wFu4qp8>qR8oTjftUaNHs#q|< zzYvP#J^EVK4SQF1<9;oIx5NJNCJl}$%J90|NtcG$kR1@Kngi$bgErIJ2Fp<3?6jL6X1C z+uX7;IC8SFl&!b8PJXYjIF3W&_Zh-Z0sejEk00pk(^DJ@j{77gpr`RlTwdS6!C`2k zt#C@r;w{d1&m!+`tyRvQJcY!msHmt~3?2suLl8yGXZriTun{L0LkkNVgHbiMwePz- zjuXp$e71+p`^eX43J1*kmvM(tB<@0V4W2A<9BVGO+I+XO)nZu;F*pbjfKZL+V(sl> z{ZLBWMMn*e{>|*HO-;=ecDvgUg@X7N9ECskl@=a3a>TrE@xOfJ`|9`a@E}Cm;HBW& zKplQpK_0#c9re`8V<-@v63dj!0q_~F6_!xP<@tLY+&1h{)X+^b_@hQjBx2XC0-d}1 z`c_uU+PqYL{sIo`^J;870$lkVLhLXs!+BC|J}j(k%`Htb{>?JpwjKA@_jm3PC4_KdqIN}OOvEJtz6J?&S33%HLYv=pmnF18Mpjmq zXpi5ScMunn?x+c->cfX0A4JV87h!@RTrrW7vdKo5zqp=0pmMzU0y#UGw7uVUq6#En4)F?qtr%~`Bu`Su&N>qM3ZkA6%PqrZnh+R? z8`W7=bxjs$&dSJ^t`Wt&xRdf4^6~AcJFh~eF=DTl?2(#QZi~E zpkf(z8A?v#@V78sy==;t=|ziUI=T#~+tDVh_E8Bx`VrOmw*3 z7lud#T2yniB+yY7srG^GITL2&+6CLcARYZf0_*VWv)Ft7M7R&mN^v+5naE8BJwa&k z>~QoM?4BJd9QF$m>>I8bATTg x!$0cqm$m(mI{c#!|ER;ChWr18v-rNZ?_IJVUXWL=`#$)SM9GR~irjtre*n3g-NXO@ diff --git a/profiler/advisor/img/overall_0.png b/profiler/advisor/img/overall_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f74cf2dcf131f36df9901e20ea327d509c6fee67 GIT binary patch literal 56377 zcmeFZc{tSVA3yp$tx8nZvR9T$sD!e&ptA2Wwk#3GIt&K$Xdwxe5JE_{5Ms>O%9?#& z$5^wDnXwPXa&GGR{+?=n-{)M{Ip_La=bT^H{4tr&=llNllc1O|boH-4 z{@k^P<_M4cPhX-KR!}OPidMhdnB`{Mk%Krn9lc$g$D0nJGfIq$BqdGy zU02Fg^xjNV%OaAJMyqhfrY4QI!wPdS_a+Bzws#c!jnagpw?ygtq)&5N{#=IqSnqwJW7fE_M!70s#(cQ1s ztW;g=t5e7vTu~)b*UhKHtn0`2U0rBG4)}>Tacn{_4@8?1(Ne*fJ9Xm4a36z|f-Fa3 zS28@$FnrcpmR2crTX(%zuXAr zjh9`S8P4fb!ZrQnW^cNlSsrH0I2_Q5)vxR+3RTFoC9#z)2pK9eVoaZ;cdXx5=TFz! zQ?}NOlT$`!Gw6!a7@AI%B1~t`Rm?62KgSLuYXas7sg@OK>F2*Il7{I|Gm{8)c*tXe zUT|edk?rMpd{y1uxS^WqtG$){l3YA)(<{ygasBeB*mOikSOfv5k6e4)QX(ULKl6B( zKEfS(T5SdDfF#eTi)2(E=P&CiNkFg2T&0*22iA#|%%~ECb9@}3uekIKO>|dTj|_UH zlezbz#3#z|$N1@W+yr9n3#?6c!YTM!b}&56O%`fB)ea#a-qB^WP=y9Og~pZ z^ZB7)G{pyK>a=vMjR;I+u^`X*F2@XHO&BJ!KER%5bf21ZcXL-7d78nDyXUCV#N{hK zy%;eFqf^&ivs&)=vmPCOobU`p7zU*xq9+Fsj!R)&dL+GK&o%FL=w zsd47<2ZMB+{hb3|NGlhIg@;hARFO#2xizHK8>+gja6}b{^2R8}O!x63-@5h6&MQd690V#c#uXvj^M-(h2TyN=bTl+?=U}X`w zlXgv@Y^JaDLh)_`S362p(o#g=X?>Am86E9~=03QHGo>&phuD`h5S6ROm2pK=->8O9OhXa#3)Qi!K4oEgjiWQ-TkbvWUy`OcPg z^P`O=OQX~X1-)0b5sxh(i3BHp{9ryc_xMO$+9G+m63!f$$QrIC-5FZ4Cqkx&MUZam z%=GM1(SEH{Th$#m$uE-P!@1HZ;Z;JaJ||#dUE!GAHfz0=lD$LJL9}9d=Qup)oJ*1VH|8!ONsaCMj`!rJvJWow70uY9-Sme;!yu`7Ae>Q{Cg zw;3}HZ(@LDzsBm8@f+yg&Z}+TSVd4>M)dhsh$U#_NabB0Z1tr6hl-Jif!v#XhFMC zZuC|}e&mf~eVhuOQV(Wa#g2lac-j7g>U0g-8Li(M`YJ=p|Yd#r5g|D z(rLw=<2o3U1A|V=O3$715qg8uPYHN1^db`RQgBxd>$MTftIrP|XH-a0KEvd&-lJ@` z`Y^LDWqy=ZM5*23VZ^dbjncgg5tPF2GY4$PM)!XFyA`QS&Ay6h*7 zKJmh0r}tjPFo%aYG$3QsrGGe8J|2wmVsA$Z$4+!=`MN6*i4_?2Z`&e`dyPJK*dPvD ziZ&C17Fu|0Fi4-v0-u;9m=|2fd7(wY&wZUz{Qwrq(;j{?XHFBpieK9^nDItR%!7Q=esg zM7#irmDQC`ZtEFb=6Hb!=VZ+-o7JWF;~l9q4;lj!(T_%P2q{Su`_OAc<27+Y(*#1? zlMEe+1<0`pkuM+e$n-F*T4PHBHoc0H z!?S0L4VQF8th`jGo5EyflrkD>yIK_>F12Y*pfwBGn(0IcZANaf?|09cLmZ}rGA70* zF?MTrRxu8Cfu+wqJk%GvE;)OvgFPj*Li?j7q#-lyGPqIA_%6O-C9BYQ&K978@&b>u5dc13r=>-Dcb0hhrgHIgT^C!IdPwlv8vAqm;Z=dCeDv@-V zMQM7!P}cy>$L$|8+a@nax~=mrW);-CT-lP)bHhY}YY{zz~}Bmv7ILpD{LLTA}sJr@{}a z_@brMR@)Cu1(!3V>D>*~XJo#~1oJ?=3f$L7AnDyZE?`m9L1TInlX(x_S0aO9;u?_g zZoiW9xbo@ep!tzXW_^ouojyw`M!V){OwB(&PLh}zV=C|O8UDsOojBK5oG|K?$*er0 zENf+w?N&P$4)I02&7HhGy-PiILeac4WGDyc6OLcB9gH8rOKga{*g4*DmSHr#!9 ziUs%u{}ACRnkNpC>qb}_<4KF-JEnW{VC7bE=*a6Tb;AO<0Foqi;eu%FzUo%X>193V zkai9}uUbi_cmakA*tpiZ9I2}8Mxcrpm)(e>?UbATXfzXYf0<%eM9+!i8vGf~yqod9 z*UlIv!qc{loEqK4QwYcpg0!)ev6btp$&^?0B>C z?MrB=L;F$aVAgqU`gEeJ?&0)vrsU4c7*%E_(%@QIVDj0KVOH3iClnaE6?(BSdr4}l z^Awsl>e%_t)fom6+|7+R-|5fsumikG9wlE`=>A$$=MOjxD^SySvr_0v3kkf&baug)$MGJ!WT3t3}Y$r?;kWt za^HVBajUKpc3{CP&mrUBQ+BOn)_}~k28@;-PJeXn`PtN_P3NgJU8_h|9Dn2)n_gdO z!`Ty=`aUtayoj@^kdtqO;5O#(lIfqttZR$-N#1tedLL8ZLFu^Jyon=(R%*Wq%EZmz znihxUc&69jKM6bMzt0-^Cf(lQX&$^N-HvG`WR_a_v@Axy7 z4MJ>l79q9@ned0x5q$yQ_3UdYsQmFc*f28^Ic$+g}WhM}dEKzkSh zbdD}vjMRF{BydU&A?3FF(>3#bHfOCrg!xoP>BMqQ>m+4qndjGavC3YN@Xh~p@@fg} zgIdB8Ljxn{U9(Tm?x04E3Bv>U_(A<5ZMA3p-d`S-tBmZ^NXv%_ZiMsPRy|;3P8@zg zxZ9atZ6K36&!FWGu?+x%LK^2bX(vilX@Eg;4d z$yFVwCx^U5$By)QiK(2n>Cg;bb4F+0>^>pc&-fioc>86~4HRA1UD-!RcZ@c9FT>W1 z`r~uToZ&YQWJKr+afyj)``E`;p2&xkG$g-Ds0PFo-*O3%>`YBz z%5IH%`g}Us!tG!tE3ONIcP~b$&j~NjFF zz>=VkC}yn{+T1i5iOs#$hDmkej;fx?G{j%I4@n7S*gd{{ z1BD`i-FjuOGw*2*?H@|pou+&AAlRwLG+K-eK--FL%S5b5Vy@fa2Kpdf4c}H)ZcJUc zQ+mpL4hk(6DBa(;hTENnJlb*mxdeMP$}SVzWTQIs&8%ol@7=)jFpJNwKlUX7>`U5V zTiKBv#KljM71(0hbE{f+zH!VxAp#}fUIk)C#dp;mQJ=_n760vW+R16l_u)RJQ?|4z z;^J~>$HF%$4aOVxN^i1C+QQb6u#)GO`cj(c-O@GH&xF`VqmV2Ewb1#S>yYI%X}n%H zMwK|2yOBq#3y(j(Ie0ql7GHu)_6@eHctITF-RVn+;jj>;=r*xH4qMqaGMsN8r+_)JhbE2)=dybkXV&-4G0^M`pcJS7$*r!!jS2(VM zxRH?6G&LQJrE4u|z2`5b>>dHDc zRp^Vq+=eKdzC<%)Q8^FG zg8gA@xHHbDvWIeJ`R}{xQ$}F<>Di7`LV_j?yi1)5Z?yFf2UdPXu9>aK9tjCs8v+3c z4|fm9R67(WMlc@aRlF(vQk%`Kx0pW^(KTN7nE17q@*LmRL{ye+TtZy zi5y&UG#_tEiJ?9q&kM`aM5DN~qhH7@JG|KlZ;nw}oW5t`d=<`*QH8;+>;vrfz1s4{ z2$~QsUFf|+6U}X8%B%jYfv0El&PtV{)p-H)MwghdtFDQYyb8mL!|ukr+;l7KGBFTG zozRStYdy8j2KH4J6fMCh6`y%~h6Hr-G=?Uffq+iHIj2L!v(F|>HfQ=l3pne@ zKgX0mG_l_=x!{_SnJV>1Ic-1v54l_iX%(x_o2C2yJKp+IuKY;+hVJlJ)y+tWUQ~)u z-Nmz>3S-$3tQhU9wm>dRtto2v*hr!hZA&pW2Tjt8-v?8glEYrDow(nTDtX2>*0w`8 zn*CNo_946c_i1P9GPS-q$6muLp{j00Bhwm`8);lY*6ptPxjp; z7;!YaZmFn*3UlQYPZl_hE*pWZ9kqkbr5=;&o6Ad8&=jl?2rcf{mg&5Ne&<*SIr2$1 zS9bkGZ)oUc*wpH*8JUBe`MPO zoUkM7(pYl_8gr1!=3+grdWtH-dED1s;?b&!Xq<$8F|05v{cs)`GE8q1`%9FHK9I ziL-4U4<1gd8`>Xmwyc*EmEq#iKOJ8t;|;vm?B{W%K6p3r{fK4T@ZswvBbSD82eL3x z`nhi_Et|K)X0~KghHKmniRAWAvnkVpR?FoAMe+7R>IG0t8I-a)P@Ctdv>=X^yV;+c z{cPEo9Otz47R~2L$*#Pa@pQQEtK0?%4WCjaGHP_Ertu!RVjKZgXjF27uH)$@BkAIz zjTd#F@O8(YP!jd+f)l(KT&X?1Hh6&jrM^xRoY8 zVl_gy59zWZp987*idp?q(CElep-(vB7`zD6hOots`vWsds+-_rp+5BfT5zBlN5Qp4jUv3Hs`L00OhLB!{o zL@5rnnNXG$)yLuCnk6<{?17yYc1kEsfBb>(AcMSFIH*7)ms%GbHbrmU`EGlE_?-jB zm)AlYY#5M%!*bhuE}Kcqc)NR;B0lNNS0MZ|Hl1~iGTxz~j*im%r9Ig{i8XTx;~?3Z zV(DynrSDpvs@^UKFv@U&K;A3ObH3O`tvivm2DMx)3N_7T~zzZdq;3v+pRFw8iHUpSiY z$-FDT=^ET_u`u|!QTyG@>ZgtW=cqK6YVxocwD6tYbEZ>2g#GjHSLI=Xm~BY>r$2A6 zSAHDiF;V!J=Xlqqsr~-B4rr1BirT10zP|ZuQry)zH}_6y+uxh_&kMgk_-j%RrcC9j zfA}>Vc;X=1&3EVKVyEBlv--m?ocRdusROl9d7fVZmE<_a(kjn8d8d;6Wl_)f{`qg* zRPOW1jNQN7wzGOc!G9A_ZQZ@q+X7U!iih@J^}_N*tNwm!dZDK`1Tt5P%^w>0&rJj} z`rOk)IOy%D4nq{#j!OUX{Bu{-|Ia04@PZnuA`aBpM#?4ObuXyUV zcFWH?&hwu8y?ey9rJ^?j1BA(fj~~tIp!`a2Z|_uuzgUsR^|xth_ibI~1E!|7ukYNx z{^NN$)1)PFYS+v2WbBF*;Lq~i+AvmgB)P++kxPF^iJvS&p3V6y$eD<(LlaKjzDpq>d>d|qBJ>3|q775o27p7*@;rAuDJ>f@?$m&J2DY$ROgcN%uP{ z?o>ta8OH=*C{?HxRbiUl0YS{_LT@0|y@hMb!w?er;osTNf5ucWbw4W*5hANB1*orC zb$iVndMrOr^@oN>Y?CT1+NWj~`{ulTKP_I~v`sFBVD{BP6e(oK`;Wv^Pf@$i3XGY4 z{6HdBw>_L~hc^A$xAGjpWkp{FLyH-wo%s2u9nsxDWmq42;-If?YD-O|c@FxG$F_&a z>NrAWQTK4dSN3(ulrEGsBa~divB=;>GRO3~F&0yI0dFFshdyB7JG5av!YH#iL3Bcp z5QO1nH+DG8*E?NK-}8)w)3nA!NKJ+7-m6DqGm%@ap(IHd=94vPQ#F}UdbY!k#7#KP z@sz%KDRyNxGlh7#nQM>Vq`2SkncugJMgUeg26iqi#)qf>{axP&a(V`sv;#zZ_(?N|7b#vQ-G)R5>Ow;L0ZCxSS_g=p7_bYrZ(>$4_~5LcLa~e zX+!oC4c-!^-HgHx&y5b8SY2P}eri?OO33Nfwqikg3`I%}Di6Fr=j>6Kn>p$JBASkh zHL&T)!yp-WhE@Wl`JMZdB0I;y8A{*C6lU1=z?TRz1!)U|kp1r`|Dg|ynLZ)+ zlXL$*a|33e3x>GYAcGgji=*#dBTHrb8s3={H(|fqPARzyL#&OC%;yJjZy6hgB@BGP zdZ_xZe+_IB+A=9k>m9jL5QH)~pP)CF59p{hqLK#z!} zZrEjV6>3*KFdapgVa)#3b(`i=Q@&6{;eutctM4VFeG>i2Vi@?H3&S${l30z)%o6cZ z^KB2SWJaHe1CC?a$62+B?v1q9jAgU@#WS+26Kxx9yYd{aJ0hth*f!oE|HjFFK)8M! z2$kZ>l$=hmye!p|pFBPG)(T5WVXSpLIl*~l&Csgxkyk79BN3f7V5tXL@}7MNO&+;{ z>YVna;ssg?f#8$Z9B`vjgOp;4Xh*5GLD$p;cE_}}R%-<)yXdaqCBmW$2k zubDA&_&CvnC>uN`9It`wg3d9K9laJtwTG`^PNm5C;L`$3wxk;#AeQr)$wzVgA7!YN zKP^Mxnfplcoz(JqGci#4=v!plGXCy5{@g_;3@Y?ldL17so_&vA5{RH;%jr~;lK=hC z9p~}CYJ2+yTcO(8?Y1+rP-+Q|8S}vp#zo5}fesRm1H}iWG{SxgRX?1HLGZ4!^)Zov zefveeEsPHON{Ls$Kqqf?SbVNxivD9Ktqu6QT+Mf$ieFQ#ep4`fwe_@GL~ zb?U`@?#s86TboQm@L@{C#g?jS!^$?7vb(d4i~78@5LBnYyvwdTYom%$okQB`ZCM%8 z!^UQ@{BhvioSn9IhEo?I_}bmOzP=!BpR(&NE%Nr9!Dly*yX>}ZT3lY<9E>#e^<4Y3 zpPFj#$vriTH1+MpVx^qM8+wf#z@24afVIASG`^d4d?E}_*Pk!}2X2Y|F?h&bkcNY7 zUr;~G+jUB7bA5FXa-?ubaIi_C(~0=9=gkw%@#J3$ zr|%L2NXVj0NaJ7%?RNGi!AQ)k*ViD$<|48>TzM%NtwO0dbs%j=^m$9PyNP+4a zkji&G{5>Lrzw{n`eT;jHI6nqAynjLpVWfa-$aY710%T>d?X*Dn9T8h_H&ACOnA&Zq2)KR@`piGFzU zuh^3y!?55hD$?;&0Wti)LtZb_Y)q-PR2v-qSL{iTw_N}9Piy%hpD572`rjchUhUS> zI%?AVc=BJdrw=zDWou9~mW!Zx`WH;{eWFh|H8VC-00R0SVDL|i`WuscL1UUgO+c2n z{}q!|QJ|dt8{_?_f_#1owC2AHDEH*i(reUe->=kqrPAuR$O3Uh2RckB}4i5eo2=p@yi_UpJ2qPt|d?J#Qrp>4PGw{d%i9wP)zi z_QMB#fP0*n7-t!u08>wrR@l`nz0fez(9pkxr;8jA zH`zv#&X2jb(tM(Z`T=BKRaI3xEDZP(2_rA8St}H{Y7n6m5BJ=l`T)ZpK#WgCGAfuA z*VNqDIgbjaVjzF+65F%sHa7x1U|MZ$Z&Va;HHvo@^B% zml47U^-mMpBG6mq>+4`S8TVRjXnxY3A4(249I_XdSYGNLG%l6)AsBjAVXfZ35G;b6 zYQzYMzz`s?s*#fys3fU8A8q&e2ly+B7y(CHCNAdoJv^W2QrWJ^kET+q=V>~Q9Ao1s ze%>64q!2p3y_;MPIrrME3mWWfO*IEp1_>hRrcsQM+BI0wH=0GNfQ;ClePwHBk(HQ1F(5OT#pVC`aYTBE6V%t``j&{)6Q3P^0UOTgM-|%Z+!Ar=NymFEXbASksfTy3apuc3HJNAGG=jFd!v z|5{GSKW%qMoi6P-X9Rn|v|Y`@YHQo*N=A6Q!xY^WHtSW(Y6E}V-%5>|*r4rnE`)JPFE%^H8LH1x{0 z*MVSHLQgJ>n~N`}5iMctpGw!JhS2MiXur{|h4|pccgw#TuHV|WgN1IiI@f1|(lp9G zpUX)p3ETPfz433C0zvA*VmJ=V&4FhK?8=I(P(Wp*h~)oTE*hKwbyQap7N7~gsaVd*3K?LezJBtEh*g%AE`jaVdSI{NA{pZFvh64WO(RO zr--21zJ(f-P`e}94XC81at7$RzMSrQ-E`OuhuO-_5Tzg`N3YRQ?cK~_FWPGc1Y<>|Woe#{ zbH_=?vR`2(56!{hiZTZMdE%6m_&9ofm?~}W_$D1~UGN*C z?Ckja{R{|?+rRoZ8y>LL?~|}y`k{}n=)Beq536kFj&P~qF%U+0)6&{mAJYAqHd=cTx-&Z9@O6*aAmqDHgzrnQ)d zAB!2yla%0`$5VAXy@}uACBpG+@$S8?1hXyqW7bLnB%iI|4WDo)@xIq2{6U40Nq4jJ z7%{z1NYZ9^k(b>YQE5pJkBNjny4ujVfi0s{E+y3cWgL=lKt1m37&lS+xSrYUXHo?0 zP|*+z#ew&BL&XEdh1~(}k2;M_l1Av$zcopP-Z(YA&|U7jS4#F^*@3U{vL&Yr2a&{V zH$_8IP51@wJ#(`mVD)lbCjDhqvYc01?;O8W|0VbEuNK843JT&wN~mUD?W+E-8p6>C zh{~cGEP|muU_kQdy^yUZ3^|wiX@3RbocSVy60w)_>RFSpH{_-J27T5ib4HblDE{Rx zt~Dki{thU+E&@hwQ|8%%OG&^0j?xuF38~DvukOj<{N|9b6hxs+ zRg3J%imJ;PAu;ht_l(m*lGevm`Oik-DK$BX*ucvshbHz4+{5YR$mNi93i)Pg-VR9P zF^*?Y1MvI=wHcx2K;wH!Be?Q26(XD=n5Fi}vz@^cH$o#?`^XBn)`sMk%NE85;PjQZ zh+l_e#~ki(;P~$?jjz!Y5ctLt(uT}?*(H6rXoi%0@vktyCBV31dMCwS)B<$JW#CIFXu#myc@EjCmmY0uQc)3;S)DL<8?kf(-B_f6=KNRq zs6)A~HG@3{!ta-Yi7i6#)MAFKCou$y8DvbK#2q|Q5>jp4zP2V;ZsRgJx`sc-`(Vn6 z@2N)@R8}t99C@0)UmX_Vb;8->SfHL;eXg%jE=iG5!#OFjRX6Fc8=2+6+?YB=xgt(D z=CUR0GIf<4r61os98XL8)Oy_iV^9)yLYC%X-wN%sDzD+=8>+wA*B)SBdP&$y%i2)ZMJD7;xdNd znNs*1--cw_pC5$cz1VfS9J2P|DhDp)&1C;P>8@9d>{+#ak({{dJfkpxE}cBYkY^-F zD_hwLHLrcZ6DKIS@}(fv1Uk32uXYTc)Geq!60O9u)XQkc64}l8WI{VdNx|)m1jW-? zy$bSD!#N__B};Z=L{VsfVb5HPkjT=JpfLdWrNuK=Epd5G$>D{x*@LmIPG25f#x%N> z+iJ&jXVcy@^H&00+WspJ#i z@vl|2W@oij9;F94Xh*2k1>T6()aY_4`bVx7LD10s(_aVSy5(H~ zaL&Wl5A?gBmvKJt>>O)E!r&FkM!W99*x%`~)`pI|Je0xhAHdxaSt~pKy8E}BL=f~! zKtUJ4Q8pGO6%L$#uPLa4n@bBkFb=PXRhkxW*!drHk5tDfpxhi^>q)8~>kxh_tGxgr zJKiDwhSRGQq@LE`GKjq*R>OBZY4)>N{;tWXaClRv0Vy*)hyr314kX>VtO~4GUD}E?1S4& zR7qt2qdilh0OfZKu0ck2S*AvwsGevRV7c%l5w%VTaZv8QS&wutMsIx$9O!85oyU!Z zbBtKd3MishEyCTrf~iVl|VDYilK3WYkv!qu+%$ zBYUq4(srza4MVoTLy==irCsN_+#K`1= zQUeBhABr%yn48;5+{-}R>Qv??KhD#uXLG)|5|)JRPTD7wwWSK2!iQ!BJKD-XPb=LK zt*g$BpvQh+;EN;6*P0RPmEonec@R<&=q4KzPgfMph~{>;F+Y}#nA{4-Yz78rhO^i0 z#c%r5O9_m$QnaRcL7bsL>Mh0tx1Q{4++Ei1d3von(b^QN`nveHvT3J3C8HI3GMKZG z6R|Nj536vC86X^3DC)?L5+GXZ#&y0f&eZ!zJUpz^@9q0-p}Vi#R2?~#))aPqwKNMF zPPdeZ^+0^0xjy?D9K|~J-Vx>6MIa@m^E-yK3>JMLImSGV)HF)=yDoLM6FF2S`;1}W zJn|W95DkhcG~{8G&V147XPdWLp+>Oc2s|d(apTmXlaa@9`|u<0au4|`U$s1x-(RYH zjqgpp9Zndfnd8*aFkfVK2;Vvz%r^3r9@wHguac(nUq_{G6HQKY%;FmxMLL6>Ml8xf0zBhyg9-{fjdcU6;RP`aLfm(hBNz z&^2NCRO))tNc2QJH=qR|>&j#3Qz9}3(swu(_=vYaPDv>(mFJmQ+{uwYlHMAdl`*D8+_$vxJm}XgdJmO+>GkHVk z4G+hMAbr0l@F(pA4IcV~6+%_f)n7(lAvTx)?gij{=J0T+wL3(_;VYWvBV}p6p$#*0 zmo*#f#w)At()iRVAbGQN3@8LxO62ykB{6iqOAgWzFO_e-VryNisnAs6z8W5Ph!Y{h zu>gWHkT1FP$;0^$1DTR7w9P(}mWIjqBWM3GcuQpvqrfU?9(*q}R`iTdZO8;jB3Uy% zo^z-MiZN&^;E*i=fLGt^9`ZeAwF;9BoQfsD!kC&YfYg@P)B*P*6 z0@^Yu;)w%eSfOjcS;o*XRfFzj?}>yszZd66xu!^0Hi=E9k241MT|KfiZ{w0V%p+60 zfPT>qqE)|^n9WN*PqkmOl&`rUQsG;n5lYRWsSplKm9|w_m|(^|%H|UWy|EK>ucn=# zPj8E|x4G8Tgo_+bu7+Ze!hrzAK@ z@1T)>gxam3*v`!OReswz!cOC$0xF9A)mdsN)^_QQKl>Zt0N^AZ&|e8E`0;(ozpn#q z>-KCg2Y=gNU`N|g1l05Bt7+H%VO9S@VYio_V2Cbmt zJ=Ss3zfe{8h$l6uJ^47JL6JK_iQ9)4Q!;wSjR|KG6TLR_t=4pFtsC%BjXh=>oYW?) zXJSU^%P;d734FMpevwQ*e3%YUF6S@E`a*5%K47JoN5QO`jmu;KqXAm4LO}->V4eW3 zIxsM>MVYS`QIB7yCf2{??+k5~uIHjqaBy)Ry1%E}6bWCQU(B}y^oB@7^I&8C9$(m< z;7?}d!PgSDThS!Q6^o1Eg8HRs*cs&J`fN+f=P1#QjSU|kpAzao?^_fqJ3AY2Y~T@q z+*)5<7&OAb#_Gbk-=_c+m(4r@3z)F$irdr;KwJUW1mIC~z=VPMfZ@URgAV|OJ^!OM z|M=o0Li;MI91NR>HUhW&v%yUfhDc#iIV}R0f^Vz=o=3lIYZEYG5|P5j9PI2RPQ%w^ zktgDtz)=4OStV}dfc2o|??27NIBY{C07M!L*$%kMnCR%}H;ex>DfM-90HFXh zy1A+0QkOT@>}{?U(B$^`Em_)ro}Z;h&y!|$P%fWx$)M6I z1YA@EXvr5EAU9BFh*I_PiZ(Y8(gXjlH<+|7<|{a@p5wmtEG?I(|Vx~%*CXwg3` zY+Jz0NB;}C=7*5P2%S6U@$)_3KvtPRHU9O`LtCh@T)$wU-Y8}4N~ z23iM7obH-?uLEzi)Y}WV?@r9U2b1D4r!;=NMd-WKfy(TBNj-7?(q#`&AZ;yU(`rE} zw_9X~5o(1}VTAnr!J!3lCqM{AU4vmicE5c}g_r^({@tGoT>*V?5DOTVN+$F6E~rrG zxced|2A(>wFSF@5Q;Di#+w?1zt)iT2qp%(TD+Mvey+>22o<-R{C8K~6^M6dOBL#F& zY};B9qb$;bkBCfZR`pHvMhur%8ouX+oMTZ{xo^KF zs%*LG=I^Q8P{>aHx!SrJDizn$vs_s{VbTYGCnx8XMc#nW=^z_;a4Su$tF0oESO@o+ zh%CR>%?-%eA|Y~WOl&8fO15)RXgi4;XZ&Jw)6;>*q98oZ#v=1|g%n|cG zVdLXGI5%MA%<1onE-fo7NGs+q&!|ycz>&6Ad$viWiR``Gt&a8T4|qHX98f2``I6PWWs-H*VRoVEwpgN} z7nMlGQ?Z@I5)vofadrX=tEFYovmw}d-hMv|$Bpjlf~{%ksQS>j$#aj>50|)|l#9mu zPLzok$Sjp31YD9<0Zdk}G()PN*dI#tAqED=nDimfMsi=l;1hUgp0+}LVQ(koCk9a9 zt#$1u?9U|1bR#A^p-^>u1b*_f4OmHY=2a&oo{?QjmhH62)GYT_sj!dEY)0AgvvJoH zjJt!E`KYTS7kk451yg4y^GbBkXfUBW4q>icQ-nLU>OEMr6*VlV*-6D8yxY-ag;wZ& zdi(oN?OM8eMWabV*11(Y&#`M+paS-C5q4jfhgJ>szG?v%Sqs4>dRC6Y+CImAdtynACUsddGq zd%3=CrVmP?_Qpo8{(JA+=uDi%JLJ6y9ipe{BMkGGqH!`GninsAXn`6 zKFw|21+8i}{oVZOcFcKPi6Tclt(K1Km>b_5-B@kiA@NA0rP-`eV4I(FP@8rZSVpXg zT#M*S(wTvLdmNA#Zvm?{j{a3xzZ>?cw&~y2v56S01;Fz{x8cJWB~%hRZg;kWV1$-5 zWb<2m(EhzwH4Mq_L!Xh3>z$*All8yS`RW=mNWjzWCf*B(%aX@)zIgl2%CBZwX}&T- zA5lOt(;$BjXy2Le7=uRfNIb=RP&qUJ4fQ-Cw^mLO*rzyOg;VqjEg_rpY0|yKj^;ZA z)qE-~-1;V;RVYqG`)LQ5V0B~%8@n(vtEldfMZFNoRbs*ZMCHlh*nqX5|1axjK&H@Iq#D@IWTI-CTz`$hp~R50njFy4r|xNnA9Mh(t|`mWZS(*NT~ z()`-Y%=he6@oxCHs9OJZ$b3L2`8VJ|OZxfH>tZP=h3B>7EE4WJ3s zB<}SJ=u`zqw?Ln6#r5mgA3g*ha29ZkiWUAPFTS_#!~j4SdX0nuit*3n`xFK`vzM{1 z9@k`Cwh6F~PEuK!^o2P90kGCogN~@e=gp>HC%|J6R0|$eKbI#G;22v~PR@3IE`7my zi9$lYjuXy~FzX^r-`YWujL?ALs%IP>1>Mhhs`Fdk%U@js`i58Q!UX{+LcZpJP(Qcb z;@S|Kgx*F<{TVJ~Ci2gz05>HF$b9Z|0Q#jYZy zxIsvs1L=iYrwTP4U^51%Vu}N!sCQa#6OQ57-+0LFr)}ev+vmamKh=Br-|P2L;i-Y6 z9jkc>3r5366aG9s1IACtWmSiK-z`jBJ#wjRULja?amJqhf!XzmuR?Ko8Xp!yh{>Vp zkcCmK-6!&dW4vJg+{g>?LM|pd<`4HBaP#hioge#@xpcE?+g?gE`Q%cnP>4jvO^>#Z zx95&4(vzkq*GHz?c*IO%kK+d@i)*ZQx4L|%Y)vQHGx_4)qTO7rNtQ+dYa)%oJ_Q*i zO`3rP=kg(*Onj8gOZ;~3w~yG%#a)IPU1z6~`pAPfbHLkXI_CoC=0`v0U2R!YbX*zj z>_uZiQGDAlMb^_@^};hq;ADYYbEIa5vU+I@TJy9D*Wdudr%xn;tc0v@&IDl@oX*O% zOMJ7ztXScZCOr9 zisuaEDyi~~LbHW*-gb;bWXl*V%nYYV`OUofJ>!CdenN;g`q*>by=iU}S;Fv(7S5%X z0LmMYhc36Ye~Dhgck4&KezIVzms>a|(2NmMJ8h!V0$xtM_lu)k6kUwohr1t#aXD>q zRg(qdnnBIlg_@}+(5Z#d`v;PPaZ(T7FM(HzaN`#8rgcg-mHFZ)LKW#&XI6PU><6n( z>Tg6=FGFb_SRZB$n`^TepN*JIy&@B}dM7$>Dn8 zZtjJ4uAZ6x<6Vv1yeuyY`>^Qb{Ql)GA>#_sAY~PJKOBdMJGZ3Q(ovivtxOIu|a=&UBqOrw)O7LE-p+#NN2eYW<~HA#hoM$Cc~x3%l@gS z+UNqCHBR>*8_1Kj!LPkH(z`!I*mx)S4m&&28YHS;1kcy4##iI!Mwmsf?~fM=&H^u_ zeb>X9)-J4>NM%39iaaVT3+8m%-?=e+@!jni&tyJ>_CDv! zYmI9Wgqr#jeJW&Hp)b+k&2z{p&>2FTS>?u3{`R9mMV;oU{YWgs2hTO9`!c-{@FF`^ z{$&(1JA})-c4%q5;zDQE(_8b0swS?sSL1NjHP<^MBKFs390X@7b7X0{NXLactsUMi z zWMX$$IeEJe5$FOh`vyjqGe%?>KAmxty&zr!X|8X)>WXhx_JldpjYO|5H4G2(3#dj; zm+|KpUsqOE#{VDez4upBUAqNpM@7VfAR?fGAVhlaf&x;c_fV87y@p-{M5U_`dO%9( zp+g`LkS?LOAcWA78hYsEZr<;FcieO2jPv~g_ZP=-jO}t~?WfK;pS4Iy_Cp$pgzZ1? z=lcACQ$a;V#2r(Sd4eBD8V`}4uQPB{ntvr1f86OgC6)<_=RhLS7kb_^nM}%Xy~@j{ zQArY~ZmtZH;+oelMOS-H*~XpoU3YS(kfprd=ru$_zDljc{(S9?Hu#o}IEAwN^Gx*Y zj7v?P7_o0KP&ufVuBy{B0itr zcb1q3zCMRN7R_+4$Rd|PJ_qGqNhA--yB>`tdB$;7Wc{7jt!_rP(P!MBn1j=QzjAC= z6m?7~m_qleMH%)_n*0`7&q?!F-Gs;28o)hzt-yH8v(@X&y6uRPb_`n+L^b2 z^WM}?ymy)-0%u1>M}@*E1^$u%Oq^J(uK3}8t%#1FsFFNxpy%gX`*H4K?A?pT7tITb zDIb(FiGC|!&tC|-#=selGnve-N)FcwtTpZ_3cTI=LHg0BxO*A8@8)@5nUbwH(2#x; zM1;At4o|tqIB{!4vmbk|ixFLNci-;iBvymJZ-#EM~uKugyC^nfrHCb76&cO+BeY_ zHXTZ~NEU^l_B7(%u}|I<^(z=D{3w+S+ZUCe-s>fu5xHuCf?WLp92XnzemhPKN|f$< zHt<%F+a{=&X{wU)EZ4xiOT@Ejix-#<`+lqkGrT>J(i-G%daWn+eh;k@-N<;@8sl> zq7|JJCel=tkP;m<^Ost%9XH3?Mc3zR(kA+B1RP~vrsI@|R^!7g(1V8T3(sM}0z<&GlML1j8u%9WY33c8&1oh3rAiS`48pw z+r-Q`d3lyY%69c`LiXpe1lU8Hj=9npl`NjUAlcr&)fWt7ZCAhU-2NZG_4Kb}CFgnx zFM8ZBWN+q}o_ipTyc2G=VpC4rdMG9@{;K`;xg+q1h|C+cbZK3Ph$atJh&bzhDCp^Y z7~8M#LKG}lJd~Jvq>oi%@}g_8HUIdamKuFO8s}@>ivU>&ml|J3(q`EUA?okLzIYvW z6prfg4RO=Z+|bvf`3P7PDA7@|qX!cmMuuFD785x9~R^>As$0y4dp!1%Xo%n&Da+RzLcE zv3_+`@#hvr`U_{6p$P~N@B_A5Lj;(XgbEvG_Pra%fQOrQ2fEvB?duMMprF<5y&A@~ zH#YW3LqA<--)FJL{h)_x{pQ4;(24OVVI|xLM%QJyqFC%cUUN6XP&1Yqe(vm*OOhIw zWhMsbYW*f{RhMQt;G9a&-v211dB=9aZ(o}AiVf2qkzBiB7S&ol>|TE<=k_}v)S#ag zuzHb~8#j+`L1=-{kHl&qZ}+k;lm8?_~WR+lvfZY=<}g0n&8`kJ2mwGUbMiCC|jK_W z;J_qS53aras`m7v3J(f`AFKCVFM)1beKcgTeYi!>Pj_=MAtPR7tP%4aIKz{`+pg~0 z*6SSA%$J_F)K=euNP?=dn$6lF#;fUMkOft-B~C%Jw)P(8C3E%E3z|ru%cjJO`KAzy zlStqB;VJiL6YoEk1(P}aCk zG)fXiy}7FVs@x7ozG3x3#qK6=;s5H);48EgMtzt5-kElHl;(ve4g!zSwi0e!4 zKkp>)FaLY&{V%ikw}0_rK4zZs3@<-@{=UfhG~5vF_b%7j{S2tqhKX5LJ5H9l(CgWb z0l|H=5}agYiS}sKQxNh=yLdb&gPud#BT{`suvQCLwe-!9SIn?KPVY8q;2H&^A*&+G z>77zj%TBZC`Sz%HUYBX@pd{BXBT^pPjBZ#c?2no{5*`VcC}t!<`!?S{sawmw>qGG{ z$?bopjnh}}`f;w;R&+?jOb44oWdmmlfwmaphOM7dm9m(PXVb%+cHEHhGm$}_4u{); zHS)80UDhxYhg(&wN+{Wger#PQy#)}SuAJDop~QxKtjXfTJqzh$ZGfOxhha%thH|i& zDaXVh^_1p*D;9kAV;r{5bg;MeVc10=X9l(w$R*zXo}{AfW&#FKb6*17b{FlpP zY;wA1qf6!)12+&_O~JEFtdXY2f$Z!Pzl4Q-)4K}GmemwI{)4uuF6 zE(A-l3BFy;P*2&hn^U$hkICJv`n?|5<)t0q(XlqdITmdtwZsYV(CXfG zMEldyeaWj&=fsP&J|r2axc-Dh(y`5meJkVhus-V1S6w2iM|Rn;>2O-R`ZmcHfW#lL ziL#tD|3ZJKA?c(YGKWH{C*73O!0CqpbSxYwg6l{zr2Y2sSSF=ouSlH7eGys@Rr_c!(=0jz&UJ zDkHtB6JO`CV!kV7d8NL57Y$l}zSe;7#;ajx)qDir?@!BvQ#6*GtAhMoJq~(=-bcf8 z1j6hzF%o*t!xGKQo%e)hThzwX7DpC~?RMUmzCBPO2^SH%2ljigme5P+l9I_8r6sHO z5Q$Z*NO8JCbFiettkCW&1?y6TCQ1@@bf<)87>086RTrhO?d2vy8`zH_%|Yk2;LHlD@t4~x2#3b#r<=K7 zRo~t1OG0T_EsGHF`$-g`SJh3XcZ=yZ#LozrKP!PyR=a;SJi?9$+xR24N|=>{Z`?C* z&?*^F%%RS|H~6^FvF`9L`j48nB(Y)uss(! z1mVR8*v36B%R%|(W>l;=NoQ57T9p*@92xABQMo3okp~klayNU9PNP5%E8C12(Mm5a zaTvVmJoC+qEMC+|US^qZ(QUaY=eWl!1Ljgt^rx_ETOvI9LM)ru%^^|+7ZC$8Ckt&> z&TMK4;z|{MBuq`)Wj8tQ>&9DT(0(G7Ak(7s83wki1?1Sz8TJe^PrpS!tr#`Hu!1Tw z`*LOi2Q(XGOMm2kQ;-j_cqmi_S6ZnVtS+R-GbLiL<=d6*K69G47T%wa?zOGNRLz5LcN<&79{03%L)`+!z2s1a=R~H`IrqrEV{PRy>1lTZ z$>T=bS7nf{Y5zx*RsnTy!kD4{@zF-YQm4#g`@3dxVXDl zJS>daNBA6B0;xFnShR0~aj}M=5hY;=6w3G z->w$YhGuX1OXT-RQl5?V+rVie zxhE{vpE;g?y!<4C+$ZZ%gW>BVDT&Mu!3eI(!aMjYvVy)9231UaPjV>~uPPo2HA?(l z@A&V$0g^+AoihD8uXuU1K|YN@7QI~XU`Im)&dU-3=DGLgqi2h!q1;Z`ono&NmPE#%}{6|bdmGQu@OGFmg$AuKCe4(4Q9>BjCQYxc?$i18u9}oR4qEu~v5An^* zRlv@STVMz+n?we}_w+01XVG7IYK+#xprE(uJ%pTil5U_b`D?@8HW?miAA}ALMoFMou&~R`1Wh4d3Ov6Z7{e+mD(KBLY!)3A@w~KW|4CM~Saprh z{PG~xrdsRIWmE4fiLE!HB&R#osT3Uy3Ju5?i>lO;RJOCg@3@!fM_(f$;wz2&OhXN= zuF{P|aiyB90?4W%AaAsrU^1;}Px>e67g}>*DTw*=2zdnhIK-*&f}uIIRl9%aZM5ElOgVGFg4>N{VWHDDokMs+CDS#|?cw z@zRq1SzC@ipV|3s<9}5IRXVv6``W^pGV(a#_dI-V<7* zf>-e}z5`;}`0JQvB_NU2qwu#$qMgvLTdz@3S#VO!zKiwG%k~dNfw$BV`Etj~NKUNwT4rhP#Nj%Xi1@yUoDH#dGreRuKi(&()|5Q1BIXTJ?VcKe2qNiUi&s2e)NieKC=L+9O!Hcww2I%>6HIM&`L*wqCX~ z|F5f$I9-7#Y$CvDUeaa#_b)$n7s15;vq`Dl|5_H~B!7>8TEvbr8UqEd3*dJaQ=AN6 zDo;8fqEm{Z4>J($%_F`$V*3Ttm5xZkx;|kKg~nbzHoTsd%UDD2qCnaFhm+om>)Q_& zY4Yu+LydD#Lp#S`qDDYQiY{d;I~MM479Olioi{<+?rU3u3409NeYOzt#ZdKlFfn#3 z!%HQx+oBr|y-owgr(4FCl<+8YHvPCUy!6yo8b+U5e@0aPDFT3QK9H$WDtrY-2)=wc z<>@>wWK+PQ4`h4oF&_vpPE27f@heZGcCp6w^U`IWd_VmXsDbCw!VM zChNg8VIl=KeHXa{ygVrgwx|g&C@3>}_v+HSjjsI`z1?a8!B;;~6O(K(CJqFM_R8e7 zz0_pGd67>Z+!UAkJB+o+12c<*fr+J*c=0?%Oo*{b(4W(09$tX>^ms(4bHS}A-reQk zy5rn}MJ(^_m%hx1q*@6daIFrEe{(6@OKCPlO=g=_A-M4>WZuy3=##v~27F|{LADrw z@JX5eq)%-;Z6VMr);MZycQRV7%8SIX`!iwC&9FCk?e^cc#c2o_O`$j)N3b2_%8rN6 zv{siq3$&y}K#M#bOcd#yl~NklOBNT<)ff2^kQ+#xG6NyD*e4O=ooX^bv(%u3AmgSaqz~_Y6GI0%d?~l^sR8{tNKsg^Nw~AbPokQSOLO>|_A8WZC){#a3C^QF zYi0Si{t|k|HFx{Ta$wDSwmBXuc`}aeMH?VkIhCMd8a}1Ks>HOJlgSsKuGaL$FKle9 z?pQYCbXFV+FL{jSAQh)bzr?yP`cmdjQp4Igczy1!rcbQ+?qut-=x??BOsFZR3-3C` zF66|nuzu`2_!M+u3%?&cb^oQ3-z&c~$w;S0$zH_pS32xs`ge(`_#CHF=Nk3+?OYw5 z;e`X43OZLghE*36lPvHMoLz zpZiej%iFol`TLc#f^XgRvVx#!uOtr->l!Xu*Y)0BVb2m^r0o8{o~*2CNn<$_yU7d$o~u2OQ?(+Z#V z9B^(p^F9URbksA~R8(zmv}?G`PVSHmG$GgHsU+CVnZ?W~?=|(MRo_j{J#MC?=V>*S z1KeZRUJ83SmhdqPlu`kJmzQ6j_n?+*>PqaxL5hX8zCS>RpLg0@+u-UN(V8IBXv}l* zL0J)EjNIWXm%P6CVfoD;XmCn(Ml_^4^`$e1G%+2AhwICm*%y3r8^#GNdG0x6@iXik zY0ti@0A1h%`4dBxaNn%1lxk)X@CbXR+(u<10pUsst{3!Lm%iuOb$+UOht|(s6|Ns1 z-rhS5s6gOvDM(C741Yzp5C$BR)rkO7SGJXv4e9$M;_xfE?yv`TF~HjniRc7DNAPkq&#a1ccI=cm||Sg;tLFE zR)vCV4Ky;4uc9xi4O2Q%lLW09o^Qh@&B}fZuX0Z>X>j{lg6Zu=%$U8STz*;=Q}U+< zQZL12KsLlF-*3ePlsM4rfx6P;0Wj10-Cl&_)SU|)V{)pNoRsJqVs9jd>q)Fw>^1xQ(+wvtxbRtj);QvfEVLOUAr zKrDFdhD<%g>^P4z7&~rtLor4quRrJg`sx?7sXjN{+ySw`*HAx1SAg?U!(1AVXtJmk z{f*fDd64;dFkF_C#SKRDy-<<8CEWe-tW8|B2Z_Uyh4_Q2pv?Zg5Rp>Kl9MfPu&f^ z-%I$?FYsWHTgLCotWTRI1MWUq^G1*9&fy4!;Ra}v(1&katJ&9X#>kq#&HXpIGiAng zh1gF&o1RM%!-l-0vftly;|6eRRIg!gqPg;6U?Kstpc53%Sqkjf{Yc6FYIo9b4m{b= zr^w$5EzSDUc28R0p*4|Fo1o64`Qi8G{EB{)o z!tq<|`iJk-Ne{8@pmf^+^x-*BRcPI#A!VVbx>1**-|P1V?|4Qkxb~puPclQ)?mnVR zqHTN7QA;1Zoz~Nz^6q7c^g7x|u*Gxs=nqaJf{$EpD38C_XumN@rbjq-EAaUHaqhRZ+cE6_-#BS$o}Mz@vp zXI}|W>a_c$YA?F3*c3Fbf|ncVND@uZOS`HDiU?X(hb*#zBVzBDW%YzhFh-n{9~el|vw$0i6I zyCu15r$`xhrjNe0yN7KE02EnO(`>sD#Vo>Hol8$p)D5bfEV6DZm4u-+Y>o@bn5W6#XFo5PDV8a@OS zJ55b(H$vAOEl_bEv<+WU?es)RrYHcwN^UBbci}nb*IynMRf^>-S`VZRmU1O&1SDa| zo=I_ga}58>>9p`G_ya+JyQ)>;%C!FNVff#X{(oZCstQm8LL4IbNh{Qb6QL~y{bTst zQA{S(;P=*r64NWp!OAKVn#^y8joG}-eTs%k<;DNaY5{kD@)q9`EAJS5S4{o3%5Et% zNzJvk1~M=Bxf43|u|xtdyaN2he|W8p2N1nPQV$UwhG7(l^je{2!ol%@FR|}^P9WM#^8<|?AplC3co9Pf;DOX9DMo3t=pK3 z*!-ou&j%4UmY;kYl}`&VeeN_RC7R(Do7aZSUF^%rx}5a{bT(DvZO84!P!ADkBprB& z3ctN|vG%YExr#u+5J7TwzZ+X@Pw6n?!}kCgcFNUx9cI5jP+$L1C29@IE20*au-!F0 zFXg1QHEL(Vl~R|^Tv}F;{?4nSPjrnVe8S&%pIcFRd_0tmYdiJ|IbNYVb?lgWQ;0iM5bIY{^+8yTA^QPTH)6^@9^#6&g*g}2@4 z3bXUF%hCk^cTc2ZZJD{oBQYHgLoA7+?r=QZd{>_d29xXj_}g~6kI+~?H6v6=8h?1W zpQ_#e=-YbRifTFrb1w$jSRMsb$O~Z)AH(8VwG}SXt7kIguDoM=4>B_=jJ7(E0O`kpNQRqQCnZuKqNy_YJ9y)c@6-#SV56+Ad{Ccp7QW@_RRofxY} za@x8({A&o^d#op?Syr?W$xlU%&ndoXz4kVPh%jy5Cm57YHg97a_02b;oMdy!^5EFd zSQJ!fwOTg{8L*c*XOXA(Yk}+AH7!2M_th(3&|3o`JFhIt)e3gxfjxMRb4k%EoXd|+ z&T4@_*48>vUgK*kLMzeZTR4)Qm)jBDgVeuJ{gtTKx`2j#T!_nqo!^tNtqP%q3M+WqreTRbXBuurwa4N_U(an*{&cuuNX%EG z3<^7KFS_290$~ASORTKx#p3nlhlf}@kKUGopr+%(dHpPwVgO26s_h3ptK)eD+@o_9 zpc$UWKIGH@PS%Kts101>9ft3d!P(i%uWT_^%?k9ypMPURsUaMdVuasy)MDX5y>Tr> zi{>oj$l5Aln0s*ZwEXSU9$1}E4sKHS*=Q!3)#hXIuAXGhDEgvvVpj{K9ijWsG?-i26yeeupVF!~e|+uN!eBzI z`dEi?m5eE8TgCn<67Y_STp(Of*|4Ep`aF1_9gk7X{DJvhYu0ShNv9b#`p`$hjVbLc*5<{8!Kp@u?8)c;C5QgJlHpvY8WFBiCP}#5B zmIYkBfxbJ;t%ITwj_XBKRSXf(zfo>^I{0fqQn*iz#|S;hT3!WZW%~6==v&Gv1e(Z- z`|4w^bS+LvwYw+iE^Q_O{Ngv->0ZRrO`L*oJPTi7#nxtL*yy1gOL~6QCznTE?8^2a z@wK~aZzqh8X5O5M29Bkj+HHot@MxGsrlN#!2UMr$nLKEC9A5o-vSgGh&sPkKRa43V z6gdkp5o+hwrgN=4uk1N^G~Q&ym~XSv$W}AwkPF_Xc~NBxG@gm zk`c32v%74P`nknxG#f{Tr8mBILDELt8tRm^^qqUotzSsqJbcG5Y4Z^B6#_u%* zB{K}cWvX{ZU%B9N49OyIqks80_IT-+qzeRS=NN1B@`?AUZa+Yu2=@}c3ZWCn#HQtDHy>A^U_&;2Lm1lU0$KNtYTGrE7rSHsghD zt!7s%9t^6ME(2XW@49Ok%(Z9@C`kX+HYm>8HyT$=3e0R{P z|9`6|&IKP!-VvBa#xg@Se{&{ZblpNOiV1-FlCet0ZfD!mc}%D&(U}_tJ1^FM126c2!55WIE@$$6T(5X9?TR`48O%8vO9B z&Z`39PXY1K-O;g7&)!o~2nRV0L0tA(8)NG2dO*xC*_=hZWSK<$i?M0Eo_q5;Sto_2 zq1UV~k@2-__ir?~&N5E}(0!4DI{vHzI#-ir-;FYt`uP)mC9+&?niX+D)SwW=oD1^; zO%U@Rp0#CDi7(gOAIa4|ny4)PTBl}J)AO>w(iT*Qwe@l<7fg+&6}!rABfJ+^GU@en zig-KE4t~zj^l6^mL3hLaPisX+*E2bSx7mQtsw7P2B;B?HBB23tXqdg*6cIhzW^eFZ zBcV5B=(KLED)au@H8VFv4usvNUxtmgh8!r!KZWOp)etL4TW9R1Rr4p=h!oiHN$^k( z#OqX~BF9k_5#>s5o%(^#19ay-fR#U*Ua4B?orYT$PS{{DUC;SaJ{#~EF>WlSdcnj| z;b5~I2o1gX-IO!aqP-E&zz`C@JDgoIox5QF?35e%UwDqI|0LDlWNkE`)|VsxhC5F? zCi)Nm2^m8Flj=g!g|lnNn$-Vf1^fFgr6Ol9;C=2tsXe{V8C`X_erE0PXzkN~vO>+f z``s)5%L3gGip9w7bljc9FUO&APt^QxCSCU8n$)_Y)3p$r|X8HU7 z8^gc8%zt1E+hlD3UEwWm&9Wy4jzVgYSNq^9$G1fggMA3N%v@)6v36~Cx698HibVhm zwI)M*Xphg{21I>O5YiBNZP?%*zmc1pw#;MgF|WqecZRhGiQAj^lpRWcL?=t97sR4$ zwnkk33|jpo4tzOc1$7PWd(2o%`F4osOQo6F^p1$XY;r9XFvph7 z=qU8YQ-FqR7}-vB?$HPT!pR%x0T)0E98i9eNOlloAI7kY`I4fAfs&Q>=)nDP-hd6? zZdSjLD}D@T{XU-|OB)#ef47tQ5Li41O$ZSkL377;`;oyfg;9UaL9`)Yi*>J5#?ixx-C+> zG`~Gz$L?zDYRw~WfbtUjp|GD$LzJXiE{fd7Pz474PP`0p%mIYUks%enT=Rm(b&NJ= zSbt5`xn2Zy<^1A#p0N$o+7TKvsQ9NV#@?sMu*RnC%>WBXq;%kh0_A*k?tV6}hmyFW zMJA@RYhB6AQ%c*gsHA@<5I0GT*};Yg9R(xdZVo=v6^?-(UzuC%=dm=uBoFo5aCsy8 z30MKQtnOK=1ek;Q!+3+-7#Zm0`}gro4^XE9z>Qov9jZ_o68!w`9Fu~P3}IU;yNplB zNa8__O;!JM64^wc4Vm|@sphi)E$yTH=Dbc-)J`d0o`5E_-c96``@5>_?Cu+DHScO7*BVqa@`jga~7Nd5|8_aCPqdL z;;YM5+yeYicFtql+}QWVyId<@Af1iki(rTskkTKkd*0z28oEw@M%uDh2W~hXh^DaA zXzR6rddm-M$38evI!)*A2Gn{S4fmaLYM08~mYib&UhAO74YXrOkdes&bTxLM3b^=d ztK3Y6w;W-c>FhZ?G!$=rdM!mj|B?p2xM?hdWQD~ec*z4Y?FDKk?6s|mMu2A6XRfH2 zaVLtzi|jQ_=`Nnt%r7kxo8K`aKU^WEn$J%@asZwM#Q<%!;k@&J5R3%!``N4-tMiU7zSns3y3g$*&5{JVE^D!a4uHsA9T zYk@*SUw1JnEWCD zKS@uUPbJYe-sH_#ZU>%C>Yi_{|C4gl@QieJTDD+$edJtly93uvUS0d-k~A~t`CaXe zW}|dvI4-8je64xWCGOlJ+tC+wEe82pe`cG7!)S^YwLYtOIvVftG7Ej7#w%$BG{D3x zM^0XcxL>KtdYGY$xkTR_$QJI9Fwd`UbtO*{x3l>!?3rv7=1xo(m^dX1`VwiN9h>@W zAdvPb69yR98+HK`mClg%UF%GcqD{I&33hOE=&lW>T5Sfh+x)_2XX2s3&irJw{A@bu zSU&;k z+#L-4Q|uOy!{j|K^FF0j$S7d-{)moM-m5Y-RuB;WMe_<7oQa;-Dx9sc9N3Be*sBa7 zX;RJ#wo6y+AFNMuEPKCG>-uz%uVPDz9VR|5a`kBXsVpQ7p~k8=_l%^i1#|1*-SFC< zPyD^e(8tpNoD68;SC%e!1u_T;N!{W-_y(MK+naUgQsSUCT_&?&i-Wb!8ms^r1J_n!92JiXpFB@bUZQw! zDZRz#=PTl85z+iYi?W03y|z0=l)JmkbAHa8&rq5R?RzzY+AdEX)b2J#kd+zwi612ElodK=VY=*V8ndXQ`$-`!vQd1g(gvCy7LQHtS#{$9EH@NO-wmJ!Q=#kMR6h)YZ|m!jdM3Ws z2wYaY`F^I9ffaT0OVanfbURRQPfi+G`v-uprcG7Awo4yHkvR+GP}tnV3act7i^v|c zqrNmjF{zAk_542Nb~J7!@k<-O3R64D?2Ris#?X;GQM1{Vwl!O}6NSJ5!p|hXvxgX) z$>;`=rFSg#@=QmkJM=t%Z#{q_%AZRdBo|f!f1YRrtU3ah55YY%;}Ld#IM)4x-PR$X zjV&EobM0%omCE=}g{UZ=rs`+H<Kb1o!2h$>o<7>;t1{~_pqQ3Ft zf0(c8q8t;IRbCQafwJJ++-i#ai_m;j=W`i3yRS5h4trBl{a~rYhbzxerxcqlFo08T zkVQC^wr+orwOo+?f$8GFg1aP3^*S=shhAJa)HQMUrf?}>vOtDx&Vf!pdz!FHExP(Jf} zzc!E3T)<8qW}QC60tRMoN9W3bS-0D`d2J4o6@JRiJ&_A=h6#ia7|szplZFFEEy~kd zqhU8rUT=$K>&I`<`?TbO{eE4ExNQFyX#Eoh$N~vG1iAscH+q%^&Hb1^bK2yc9qRxS zrt25?kFwB#Z#E~2!`@>WU2fdgxKR=r_UCze9u~}hbz9o*>eP#jC0oV%O@G8779t01ccQloB7WTA`$toF<)U9gRsL1Gp z5d+}MRcv)$!W*Q=!`G0?fEtf4-FUg@Pntm*lgkANK+?oCz>m29un#{j2QEyNVF7u~ z&B$5H2{WLeJR<{=~Zr$BI9n4|>>SAu5qsx{(vF_LhH?5?9g)G3u%t6iMJKt9< z^$%UXOE;h}W(Mt2FIoU3- z&G3{BYqwwkYiYG9!1sDaUl1w8jKgs)L_LLU5&=dcq1P7u zL!+RE%EXp!7T~QC64CaBvlLFV*a;0Bu%iDIjAW!0NNM z7nw`66FFk)mTUGSd}zOZ?j<|!y}L}{;~l%GQI|RpzHjY?S)q#6EA)vN?_oGka)fs$ zh}U_yqBojrCDhx{N~LecGx%;g4dhd>FG`RC=4zIDJorSV#>-O;?3wN_4Vt|Q5o2MvR-g|eWTqkBE zI{7cU4cJ$#{83}|axR}01ZJE@pTgOnFYo49qu+uK7S&9~|0~@i@42cSC!1d1!D0mh z#y#^+@_wMbVxtqDVO?Pr|BPFE7hR_Kw-x}Z`@5HrybLAE0<;sLdBk@B4Hn;)EWnQB zsd%kyCl^xYzdc|k-J@BOo%Xr*QSdL7{sbRDadaL|g-IiUDVAW(7G`Ays%TV&r?*z$ zR{@1erL3*Yz3Bx#kMPhFn%_b5+q92T%=?nfX*QLOmLULR5&u195M{RtP(wg|C<*^q ziY3St^=$+7x!PXH^lGg$^X7(TUpUUm#fLY1QzOE6E~my+$jnBe*1SPeoXR?&wZzThJBOQpK`~_g2##W{EUU9fcn_4iKl;-kiOF2 z!!M$4d09@kI~+v;d+nEgv^h8P!%3Qoe(feP&9Dp#5z{Fa?67_1eDua0f6K~;qbB*@ zPJyg*`A$vkM0f;y=cLh9)j6}=R}!d>hF_pATK>J4d&#w?xsme6eBa|)1x$AS98hI; z#9)+$s;_VLcpH*;Kk+gla&Aco{f_HiqRz|@2EYH)|)Wfv8H_~xBHNy1|3d+!I4WCZ9G4apy-yN%P1*aNcG`2>?iJ1FB&Q5;v07 zQ=t)tec-9uGC-VaG(fyg#jmrgA{*nAu=$voMj4ja>oFM%*<7%Az3c*eUbFV}H&t#z zr}I4k>pW9y;>VU)J3d^i^^N;#IJ^d7yl2j7jn;oo$CB{C&ZJ8zN#pCSg=jXjZgzWm zVSoevR%9#U<=kxFdp&j~*;fJPciCT~8BGSu9<;S}q30Lq-m8Q0of$PWiWNxBIpmWKTy49w~$n-_0*X&MUvQ{=*WO)cI+q z{2vND;GmTaSFsriOazo!1%;`_OQV^3!JG5SrRdzGUm9#-S&f-wpMM|rmNq1sln8T= z4*eEi$&A&xY18%lDfS}{F&>>j* zN;O+8-DWnZ9h=EuhpB-}i}P9q;#KGUkOfQrGobFRj%zEOvAAtFZPP0ULOZ1Qc9;q%y+ z&+?x-fhACbfsr(O<{KEx_Be^dzQ3& zo)c31hgaGUx!Y=$ z4Pax}LkD^Ib%;GNe)QGyxZ7wF(ggAA_Llh_b*26Ks1DJbSSz0glnE}Z^aot=qpRMj zWp@j#tz)>~)mkbJOMWwUz!u>8XDKr2Wpch|h+=KgkF+o3C~Dl*lI3rq#om!ZZ^X!X zFC_1L1c_a`Qg})Dcsep{p5GDVro7#!a5<%rk~_XiwmyfzZk7%SaC*`b6&)rXCU|uF zh|9gv+v;m+@7J4knR%(DNwDG~RoO#Nc488M21(x({>?4CnljO|H;qDcJ7ZV}+(3J4 zUIDe>2?pe3Cz-8OVLk7kuwtve%7|Ao$s9-(a=S8eL=B|G>CJ*sDvf(HS;}E=>ft=L z($p}&EAw#DPU2T<0n;_!bm&lV`(lU?dEn}e3@>hvt@7vTLhe-P&V_<#LQrV@q0`#A zf+)!Jw_vBk_oI#Tu5K(;NlMH4jkVH#4@Y6mifcnMOmRj(ehwA}a$LDX(!1nA! zslm9;vP9G~ie~GV=SPM>%Mmgr(g7Q5?HO-hq%&=K_sDj~(9wXr!`WuLUmMk~ z(!QHhViF9RK=HeV=}rD6TNrabp9bEnJM4VH<~|Ozq|`|>pM+6Ry|2Um$~z;KD;AaS z5*}48Hb2!R+c$>j?a6(uS!i|^j$LmT@pM+WS@05GB(&;-FE);Qjq)5!eyGc_wR!!O z4ETzIdiY*~DD5=E0zt1n-=6+bpIfy1@8dbmuis35Kk%D7m;?Nv z7g1ohKo-*LNr7Mdvbpa$FS-@P_2bxQwpYz4ot!~Q!u8|CpRM^v^&Qks7?g?3OW+&| zV9Xt>e)C@NLKK>6aQ+L&=yu`~&5^_LF98x%j3cXlwHuZoRObE4_n& zroy~E(Q9AQR_>`^&CJ!Yj^LDPH`mFmm&|^LMU&g&N{mdoz^vpIlMD`-!hl>MUP<~b zmS1jcxka~q8SbnsAw>C@UhqPU-W|7k(TQZbuyDO}O40t)@tsQ5qGtO3(3hQWTx5)Q zh_&yz#vVGLYre`7X!*6bE^I%ovufpOI|^)#W}E3&*w@oha4|gDGzGH`?x&9<>*aQS zebZbUOdR%)l-Z)BIM`ul>?$aaE)69Qfe~F1H@b74vuyX~ZVU!5qfwd!>*ujVy6f&rkom)Yae= z4KCH|Jc2>z@e0XG-gjtHtzIxvn9$%CMIB!&x%{INhth1L10Fs9<{*9d?Z5(!x>T!7 zaNs3<|G7tV*^gIU4}M70BUW01KUUMXN%hv!^Y}$mnz6Z_DBoZABH;>oP7a|N0}$0a z>I3MiZfPB1g%HhWhiNiX>$GPM~m-(5<>DEZ|nF#;@^JRoa(VTRqeoL35 z-*UJScgJ_`^Ge?i{=@C2B_ESkZ@N8q7^zk%W<83B(QK~p&nGr{g8k5;2o|TiMkm5V zU9cV=8kuze+wS;_oPJ|$udj=Ct)ErXa3BNYXAXCXv7R5)f>x5n>Q&!kpm-RfPdcX& z))mLsO0vT^_5}Ao3Ej0ZfPG7NS+0Hjt6eQzfOteS+<+=RX?XjZvXIOOLIWaOz}hcn zNiR@LSMq~)0u{dB5r-QDgmu6W71MEbviRD0tqVxXXSJmFy;`&~bzo3B0f*Q@TqQJp zDCr$H9o4wMG|v4$?R|GtQ&|_UA_|TL1QkcBqJjlb5KvmghEf%j&_P8|Ne!cxyfWa3Q(5XP>?I z*=L{M*(H9@?ETz?o>K4jwoPAlw1v5Vz~XJk*U5VlR2uXfCnn~S;l2l)_03)hYuK;G zz06F2X;mjs1A~`8VM<&W?QXac5>p~;q$Z*@bfr@HwN-FUKH}opqW0W~p|fIp-G{(J zlydumwvxNG$D-sqE`BL6JCDiA#x|tYyj+|FH>%>?r-j#Cc+U@qlILOZNqZ>9$}-sYvlqGwg2hv z28%6jKfOr@s!o&)DiZfJS!7*1j^}K-aD1OJ-d8E5Ow|G=e|p?nefh}^8f_W}vr6-J zZ@;Lv0^}WtyX0}@jclF)n}U=*SXi2 zU34dN-Ral^3&`DsL{-~DcT4@CIL;TUybi|oe0P!g%={S2nD$$-+?ZAc2xqRw1^ks@ zZT974^+)yg1id}Rw;!mSUauQIUGrGyVDD`^eEs>db9#gHkz_cc`@u5K;Z<4oq+g!1 z!}aFh=QX+d-6Sh+9An`k%u-M92f*N)-m2?o@q8nR_r0&Gmn}OongZSx@Ct0lwdRWN zTkex#;BN4pcg{=~-Wy}>3!uKfO1|?vqCF)b>&x)-BHJhJt69^=uA9bd7G!BA>n@x^ z!Luax^-E5!7;fRbx>(BId;FjwX1kx~PWK`1Ief@JA@VA&8i;wWSi`yMGpE$*hw~h4Ucm2G-osdeMWx0GP0r@>?*$H~ZLVL=wf82=O*gEsGtIUW zpnp;v2gg54YzU5@qrwi+&sinqv6QHx-q$bSX;!eHg5fcdXO0e+*4p_VoSikAt$Dsi zvS9GV9q)$lQ%*|AXl)_23v9%K+kvh8_bURlZe7;%;-6^9t#H3yE6D22Z7kk76SfZ6@_S); zr;n~6PPneIHe)$p`OxONT$u7j5BBDcoSv0nZ5@4%=M&tMnc~DJbM1o?tn=0(yBt5& zo#{wHrt<_T@k%ZYs(LL0`Nb{)neEqAR2VWMR0|mxAuWY1*YN(Sy^7l{yE&2Qe+Hb( z-vzH#reRm&+3=f5-X{06caHO13j@Xb$+}xljFqLih3wj$BXw2>%x;i9TAW?%m_GFj z_6e8}9Xqnms`W*?sVL*XT0oZv0*pO(Cy(c7z>WE;#}OOkcs|$zAx+f1VEX$eSDC`l zdmWaiva~{q^zmUftKx1K6~T>5(27U3EXMj=_vz$WiCDTdJZ)Azw+XA$UUI5lp{hPUIZHgMV?lydtv|EZdWN53Kwmk6UlqVoU}R#C$9Dg*n{O74En4 zLcsH?WZa{vt-f`k6Ajl^2>TmaD-Ud->W|$OROq4`YS)(&KWs^bafUb}m43*O62MjO z0{3Og!Tf|qruL__>BZt+4hAuXs95VWy#&4NPHZ`RAyc(8KciR*#&&JIeJ%l3p;Ms- zfBJ3&-XK)oay9SS&8x;U7xNLcgGdo+{9`|2ZZ~Zfej^Obo=yLNF?{AwXszaKS%(Sl zyYj09h{rMNGm{hNIG&U*;HjNkpHSpC!<<_I;*b@m6%Mxz1kVQ7GE4k z{+qx5S=q~F0vx&qaiX$>7JOlzBDDFNs2f3CNvhe;yrXF-NHq0Zpmx!bcK&xuxnNZ$`}(Kn`@kOk~O9=L-68b2t2Rw#Y9cwSb> z9U{|Fz=Qw$^%cNel+;Fs_<`?KXxIz76fp>B_AH4k1b0xUrh3|na%&eY5()zrk`S=`;4w|(|6ur`ZIdm;F<;*b}FjiBPe9} zLlz!Pp>FZ%gj`4pEp}5hAu6a5VxQqd`ozZXAzP)!5Sf>g#A>ajx z!DHbmRKVcYgXY7bVb+hyFjV>PrHuNV6A*QZK!qBSn#)~Sz;G!Hez4y(Qyv(JSYV1| z^rt6|bb@1YMN%Lu3WBysUt*3917>{jVbjsD)T^^B)1&w=UCzaWEPaB|=q^YitA(9p zWEaQ`3}$01SqvtaKW7}q_w7D016AcrFe@V=pcN6s?*eqder?Iu5dcW0oS6Xu;_gUb zUaE!pr0QzSWxTVDtZYy@M(^PKm7Dz%ln6~PiOKJbaVw$R_?bEmYM1@7Ucby zOP_EA6UMI*u;dv5%~I;p8WO*269rtL5sWN_bPlY*4`0R-`Po}Xm61D6>=Q*F~R=y-5pAB@dq}d*4l}g zvb8pA06yn;r{ETjz3Hze@du4)&MyWp`MGKqPv4K@^*Ww6f)rT)DuMMR+ZcxtMoRfT zzt=@rV@B5_`LLvpAXUIMWkl;5&r)0KKc?z`rgCxg`@1SBBb^mhHUXXCNFg0BR-vB7kXM&;AQ_(w4;C75!7-kn+$=J+vKPqWyWnXaHD8Zgv_0TKX3E|D=u#;}~3ck*)nRNh4m{`#LtF!{kY9)X$4 zC^Amt-UOy+oDte`5rOj-iNAPK_+$KrjHmty84IQcgv7IB;l2y`R37J~KX(`tV2B0) zPKZCZHod(NJ>6CzGoS=Kr|_o1kR)a*r^%T&d%5VuZ|f+0PA*)M?95LA4xNKE#pqt4uxdGM+_WGa=@&&*gZ?+r8p1?|B6Awa`voCOed z{rSAuM1`=0=y}F&O@GW$6xZeWuV4@@e*y;7ka)G2Re@raXGMEyXh_L63DUKC;t=wU z?undjrj8_zapya8ftKJR+Sn~a*u5RGM);WLXBh|72~2AW|B(%UUM*uTXM?B*v_J8u zyR~MlV@wz<2j{7s6MXpdur8)-Ys&U4GEt4O5m;aodrSJ|6v3O>h&B%%C0-Q+gcZ?) znw|N(aW@cKg@!U9x6ssci(1H_1aDF~5D$hp>4jC!`IMJ=^9_0u^0&)T42WDQ>ggV7 zv4~Gn9?%L1L<%R( zSrm;h)%f+zm@-G_`TTmzZLjhJxQRf^pgW99W3ki#&-nzH6()4SJ+y<)qVmFWb{II4 z`}rMhxQH356y{NThSS1$H1NTOOSS-_R4$4i#}d33&{G(zv1a~sE~t8aENeM_Ag<`nKGO=4K2Wp4Qk53>v+

1s4)RPwK<#{pTPb$MA zrj6=JUh`#R{naVdBz!SYwuQm-pgqC&1T_Qm?j_R=K|IYe%Lv29k?EJ5rI%ifQ}yQO z-9t!PU)cOxcKGZLNJ(y6bfyygw>B`pfmr^tG^zI&vuM6omQ0YzqtZg}HmgI9hpp~~ zTuv_Gg2UB!y|%>zPz-nkvW#oimY#(uHUXZ>kS0H2TmvU|(C|$C74&ZcO$3wKYw(jc zzGgUOP3Ui5o2I{*SB2j$@Sp!qY@$6rmA+(&^vjb+4bKRP@&G!8>S;gxs%mP%V#mkb zR$fBaT2^mfG4-hN*l`=@Kf>-*R=l|9cH609-+;<~%CQyWhYt&>sI7W7er)r@0P*|n zurs?8C1vN0Cf?jl9E*z)z(6qSaGLUN0pfzPuoD8do((7!YqNc&Xn!&m=k{%zkEQSrHTLSjQhcZ-XYmF>{Hmp000 zw6w=d?6RCNy$2NZi@AmoEekLaMnIMmuFE5_u~Es>6ZSdlBOK~oqDK`H%$3~SvdPOV z^T?y6XZ9F4+1kE$RUbtu$ZO&azna=#bF7*Xb!{ExQX_OVG&S}0(Q3)5&8|iH`8nrX zo|2_>2Rc1VJaQ~m6FdFN-d??0c4pT;4UK~b=b4o9AV!}rRLd^$%Brp)hPk=<4mY zdOFy;dp7>RV)%>JC6UjUKPl}ZgD?KFJ>j$tv8V$4ZdX0ct|au7HUZsxN#;JPysYr_ zp7nk`6V*96%4z8)IpCwUJw1aH6BF9h1CU&^di3iQ_>_O%nS16i2fMn9#cvOv^S^L} zfG*TFy@Uv|$DZk|r!ms?YA+V=uCF(_d2=S&bjJ<#)9Z8E)80gx9mAdYWb4nLQgE_U5Op`%&wFaNaCH=M|$M6-U?&d(U&k1K72`zR;Fl;duKTz3#zbP9?;)W zh;-}IT}@$$dq4Pq=%A?eYS5SeEw-^lIe^{42AQr!G?)p$PsU1=36SH+ox>en*~yQlsA%EfdryqQXiF zloi_B4Rg@iN1!<2{6l)3B`OV}SRrcsBPIzavM8&+lIh>9Mz0fC(to2DeR6IE6e|{m z`map-|3bZT=>IvPMxJT~?M4vPvN)t56Uo2d$Im9$y?Zy7Wd!Ewgs(pcWeOFIbPOhUaPNKhCYE$j9t^B^+N117ph_FN88A&rf$SDPtWEv?XahD?4o|w7D|CZ z=>G+MX;$w^hd0cDTtc$#>lb_Wz0Rjan`O)^$d-Sdp*2W}l@1N64z z7fA8_pl6Rj+4uB?M?QIjh3x(K(MkPaMy)FxZu0j0MgL-_e5OmY@M!0?m4C1MuYZ}_ z5;-{Gwd01hJWO@tFRgU3leN6BKmXgm8cDu_+T8^?qQZg59%N-q>*ahzRLX<&w_(e^ z?NuzUApQt^LGO27uoUzccdpz!S-u_09EI*tpcvyJ$r3d`L|B6`4wYf7F@j+Fp(_G+ z5~@e-5`nWw{`h2DRTTqLX+{b`dMV$fH;|M&m@pODNdoKlvQ!$V4})n&anl`draF<8 ze*FAPBu$452(5`61~-X}o1;qG;G==0tBw%QR4rY98^I2<%posq3`9flL*&#U0)=rh z5y7_aW1XCwaTDr_>so? zHI=A|!E!02Iar%4bRU++tZAk^W?GVcW8g66nlms%H($9h{kry3 z8Vt#&)+3!Q_T&0a{&B=8+8&KxkT#0LOoA=UPlN}uy671J#7#2XkM&KLi@V+#5h7kP zr{7PWaCi`J(bro^Y*oVM>MitDlLh!OZRumQJh*)mc%!_9v?a(bXErQQYKOwqx5 zeu#{WJkTEaE|xkk)=F=r#+M>5PUe8GZs}D2h&2r#v&=VUm`lOpGN2T8asU*#BOPJJ zdzz?PqgJD|4aWCQ)xdCvvEGtj02;Fs-`8;5#3)SBLwp@J)jMFE_Fskt{(p7ufN%$lyxGuGUj zPB+o8ZSD0_w#%^k?z*cSG5+VCdcL%vdc~^GHaQPx{h=|EPsycleq|Ggz_8WSZKN~9 zt5-VU>1n>rAqf zSd3>)s=oVS;=(K3fF&n|UEow%R6oV8{c;vx!)IG}J??Tx4jrtas8G^l7_KN@lp|xX z&7MrHDjlB*O@Pw;3`1}dYxX5Rc$nI!^8{ZR=fyo?=7?bgSBSC)<&}cGbjidnZ`6PV zfF8nP2l{do!WaSEg*B3oo3xf6(G7e+*{px(L9D-zJC58YAH;0S(4YjOC40M#f&w|) zB#YBj6a)FnmDM&477&vapPLeDr6U5nsB!R~bdJ?y^x@(1`Z(YdD||f#s2|7D@<2Tf z@6qvQO$YPJap>8MZ1OyS6w}!|J(L)@g$Oj?#L3HgA>%)EoQZ7 zH>IJqsr;8`BjmRgR34O3%kxI-+YQ&(^pX|&%Jy!|*grWwqit1KHS5mE*d!A&lMrZR zeHn_zZY6Lt`*K%zV{CsgkmOsTr}H24kUEFEFlC>`z!Waniy>0IGqm;|*sr4#y1%Iu z3|K-&6cKl^LbO#M#7iWZ*C?)Dy&u;V@aj`g0a9(Lc|hQrsE9>YY2m6~RhHMNKhE z+UUHrRZ0W0a=p3nGTyKbu^jTeNKC$auX`ybZ2axV7=T-szEbxvASb_N7{Z&JTM3G> zpP~}LZHt*IWIgQ6{8ZRBth$gz7(e!Q;EJ_E3^5RuD_mX8BjDV8_%ER~V zq=Gbg*gn1XX9ZD5YMk_ETBvr?iZ6g(A0R`X6!5s@df*7nN)lrdE{98?XOop5lF+n? z{F;YJo5L@2-&N|#Be9cFqk&vSP43&bSZAa`&z;OWlDYjrJ-uK};DyMH$$3xM9VsYZ z1GVY|o&(utUidf3+o0=n_e;?iO{k?GZS|uwnoJXEORxvTUN1~o{-cv@kS+Tm)LO0H z`6I7s?J!kb50YZakKCs0@@HXY5Dr(>C(wEM_d{j$3{o^5l48UUkKT@*L@0Opqt7oO zqNoDJ`@dvUJ+Ccn0Y&j4{J;8npkF$wI_@O-wK^oE?#(PVtnc*ni$G{E6ofgTIYKq3~cSg*15meha9b27{mq7yA3_hCR-NH1z)_l zC80LEE!z8uz8ciT)qX#;Bdrpk<29_T+S=OKcWdWOca#(Cc5M{<_>`O*dmJ3mdKb;| zvPzP@gUthz90?7LgoFfVDlv@RBnt^s3BO)9$N563t*w32(BQDEPtLIM5HMO-upSX5!%{X zH?AI5O+mkW0YZ8HdF@13^@F-8scC5oL(C= zA<<6-@wh{IHhYqc$7$0;?~bc=k+pO^EI_PqZBGFnWR^A7dUs;o&=cqhWUd4;eYbbd zx;%(IN|~~!xe16AX6b31_82o1JM>@G{>l$O^gyWbRu4#I#u~}qo|oKX+<7AOgQ7x| zrNwLenfA! z+DhspD9sbz{v#&+X3RxggV6oRO2M7YzgpPJZQ)T+q7w&&`Y#7*!NLFk3l$V9u)=Gi W%r8Mxf7cT5=j1W-qXmaue)}I{>dlJ) literal 0 HcmV?d00001 diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 59bfee77f6..1d3872a178 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -13,23 +13,31 @@ from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyze from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer from profiler.advisor.analyzer.schedule.dispatch.timeline_op_dispatch_analyzer import OpDispatchAnalyzer +from profiler.advisor.analyzer.schedule.syncbn.syncbn_analyzer import SyncBNAnalyzer +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_analyzer import SynchronizeStreamAnalyzer +from profiler.advisor.analyzer.dataloader.dataloader_analyzer import DataloaderAnalyzer +from profiler.advisor.analyzer.computation.ai_core_freq.ai_core_freq_analyzer import AICoreFreqAnalyzer + class Interface: supported_analyzer = { "schedule": OrderedDict({ - SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer, - SupportedScopes.TIMELINE_OP_DISPATCH: OpDispatchAnalyzer + SupportedScopes.SYNCBN: SyncBNAnalyzer, + SupportedScopes.TIMELINE_OP_DISPATCH: OpDispatchAnalyzer, + SupportedScopes.SYNCHRONIZE_STREAM: SynchronizeStreamAnalyzer, + SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer }), "computation": OrderedDict({ SupportedScopes.DYNAMIC_SHAPE_ANALYSIS: DynamicShapeAnalyzer, SupportedScopes.AICPU_ANALYSIS: AicpuAnalyzer, SupportedScopes.OPERATOR_NO_BOUND_ANALYSIS: OperatorBoundAnalyzer, SupportedScopes.BLOCK_DIM_ANALYSIS: BlockDimAnalyzer, - SupportedScopes.GRAPH: FusionOPAnalyzer + SupportedScopes.GRAPH: FusionOPAnalyzer, + SupportedScopes.FREQ_ANALYSIS: AICoreFreqAnalyzer }), "communication": OrderedDict(), "overall": OrderedDict({SupportedScopes.OVER_ALL: OverallSummaryAnalyzer}), - "dataloader": OrderedDict(), + "dataloader": OrderedDict({SupportedScopes.DATALOADER: DataloaderAnalyzer}), "cluster": OrderedDict({ SupportedScopes.SLOW_RANK: SlowRankAnalyzer, SupportedScopes.SLOW_LINK: SlowLinkAnalyzer @@ -66,7 +74,7 @@ class Interface: if render_html and result.data: if hasattr(analyzer, "html_render"): analyzer.html_render.render_html() - analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + analyzer.html_render.save_to_file(f'mstt_advisor_{Timer().strftime}.html') return result if not output_dict else dict(result.data) diff --git a/profiler/advisor/result/item.py b/profiler/advisor/result/item.py index fa0ffb5b1c..02db7fdd00 100644 --- a/profiler/advisor/result/item.py +++ b/profiler/advisor/result/item.py @@ -15,7 +15,7 @@ class OptimizeItem: @property def headers(self): - return ["problem", "description", "suggestion"] + return ["category", "description", "suggestion"] class StatisticsItem: diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index c7d7da8663..0d0602ee56 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -93,6 +93,9 @@ class SheetRecoder: if data not in self._sheet_data[sheet_name]["data"]: self._sheet_data[sheet_name]["data"].append(data) + def clear(self): + self._sheet_data.clear() + @singleton class OptimizeResult: @@ -110,12 +113,12 @@ class OptimizeResult: def add_tune_op_list(self, tune_op_list) -> None: """ add tune op name to tune op list - :param tune_op_list: tune op name list to be added + :param tune_op_list: list of operators to be optimized :return: None """ - for op_name in tune_op_list: - if op_name not in self._tune_op_list: - self._tune_op_list.append(op_name) + for operator_name in tune_op_list: + if operator_name not in self._tune_op_list: + self._tune_op_list.append(operator_name) def add(self, overview_item): sheet_name = "problems" @@ -148,6 +151,9 @@ class OptimizeResult: logger.info("Save problems details file to %s", Config().analysis_result_file) self._save_op_file_list() + def clear(self) -> None: + self.data.clear() + def _save_op_file_list(self) -> None: if not self._tune_op_list: return @@ -173,9 +179,9 @@ class TerminalResult: def __init__(self): self.width, _ = self.get_terminal_size() if self.width is None: - self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"]) + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"]) else: - self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"], + self.table = PrettyTable(["No.", "Category", "Description", "Suggestion"], max_table_width=max(self.width - 20, 180)) self.table.hrules = ALL self.result_list = [] diff --git a/profiler/advisor/rules/dataloader.yaml b/profiler/advisor/rules/dataloader.yaml new file mode 100644 index 0000000000..2bb7a4c0e7 --- /dev/null +++ b/profiler/advisor/rules/dataloader.yaml @@ -0,0 +1,9 @@ +# unit is milliseconds +dataloader_duration_threshold: 10 +problem: "Found slow dataloader, cost {dataloader_duration} milliseconds for one step while profiling, normally less than {dataloader_duration_threshold} milliseconds." +solutions: + - "Please check the disk I/O of your data directory. If you are training model in ModelArts, please move data to '/cache' or mount a more efficient cloud disk for better I/O." + - "Please check if there are any other multiprocess operations in runtime that may have affected the dataloader, such as training process core binding command 'taskset ...' used for launching the training job." + - "Please check the format of your data, avoid file format like tar, tar.gz, zip." + - "Please set 'pin_memory=True' for your dataloader." + - "Try to adjust dataloader parameter 'num_workers'." \ No newline at end of file diff --git a/profiler/advisor/rules/sync_batchnorm.yaml b/profiler/advisor/rules/sync_batchnorm.yaml new file mode 100644 index 0000000000..d65bcb0d4a --- /dev/null +++ b/profiler/advisor/rules/sync_batchnorm.yaml @@ -0,0 +1,41 @@ +problem: "Found {syncbn_num} SyncBatchNorm, which can lead to slow python task dispatch and frequent communication between devices and finally reducing training efficiency." +max_syncbn_num: 20 +solutions: + - enable batchnorm: + desc: "disable SyncBatchNorm by remove the code like 'torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)' if possible." + - enable efficient SyncBatchNorm: + desc: "replace the 'forward' method of python script 'torch_npu/utils/syncbatchnorm.py' in your runtime environment." + efficient_code: | + @staticmethod + def forward(self, input_tensor, weight, bias, running_mean, running_var, eps, momentum, process_group, world_size): + input_tensor = input_tensor.contiguous() + input_shape = input_tensor.shape + input_tensor_ = input_tensor.reshape(input_shape[0], input_shape[1], 1, -1) + sum_val, sum_square_val = torch.batch_norm_reduce(input_tensor_, eps) + + count = torch.full((1,), + input_tensor.numel() // input_tensor.size(1), + dtype=sum_val.dtype, + device=sum_val.device) + + num_channels = input_tensor.shape[1] + combined = torch.cat([sum_val, sum_square_val, count], dim=0) + combined_list = torch.empty((world_size,) + combined.shape, dtype=combined.dtype, device=combined.device) + dist.all_gather_togather(combined_list, combined, process_group, async_op=False) + sum_all, square_sum_all, count_all = torch.split(combined_list, num_channels, dim=1) + size = count_all.view(-1).sum() + if size == 1: + raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) + + mean, invstd = torch.batch_norm_gather_stats_update(input_tensor, + sum_all, + square_sum_all, + running_mean, + running_var, + momentum, + eps, + count_all.view(-1)) + self.save_for_backward(input_tensor, weight, mean, invstd, count_all.to(torch.int32)) + self.process_group = process_group + out = torch.batch_norm_elemt(input_tensor, weight, bias, mean, invstd, eps) + return out \ No newline at end of file diff --git a/profiler/advisor/rules/synchronize.yaml b/profiler/advisor/rules/synchronize.yaml new file mode 100644 index 0000000000..ed105b345c --- /dev/null +++ b/profiler/advisor/rules/synchronize.yaml @@ -0,0 +1,8 @@ +problem: "SynchronizeStream will reduce training efficiency. Found {synchronize_num} SynchronizeStream, {slow_synchronize_num} slow SynchronizeStream cost {total_synchronize_stream_time} us." +max_synchronize_num: 20 +slow_synchronize_threshold: 10 #ms +solutions: + - disable ascend launch blocking: + desc: "please check your env 'ASCEND_LAUNCH_BLOCKING', if ASCEND_LAUNCH_BLOCKING=1, please execute 'unset ASCEND_LAUNCH_BLOCKING' and then start your training job." + - modify code to avoid synchronize stream: + desc: "please try to modify your training code to avoid synchronize stream between cpu and npu." \ No newline at end of file diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 84419b6708..83f304c2d3 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -1,5 +1,6 @@ import inspect import json + import logging import multiprocessing as mp import os @@ -11,7 +12,7 @@ import traceback import types from functools import wraps from typing import Any, Set - +import ijson import click import requests from requests.adapters import HTTPAdapter @@ -43,7 +44,7 @@ class ContextObject(object): def debug_option(f): - return click.option('--debug', '-D', + return click.option('--debug', is_flag=True, expose_value=False, is_eager=True, @@ -413,7 +414,17 @@ def format_excel_title(title: str) -> str: title = title.replace("(ns)", '') title = title.replace("(%)", '') title = title.replace(" ", "_") - return title + + # 将kernel_details中的列名转为与op_summary_x.csv中一致 + kernel_details_col_name_map = { + "name": "op_name", + "type": "op_type", + "accelerator_core": "task_type", + "start_time": "task_start_time", + "duration": "task_duration", + "wait_time": "wait_time" + } + return kernel_details_col_name_map.get(title, title) def format_float(num: float) -> float: @@ -550,3 +561,50 @@ def get_file_path_by_walk(root, filename): file_path = os.path.join(root, name) return file_path return file_path + + +def check_path_valid(path): + if os.path.islink(os.path.abspath(path)): + logger.error("fThe path is detected as a soft connection. path:%ss", path) + return False + elif not os.access(path, os.R_OK): + logger.error(f"The file is not readable. path:%ss", path) + return False + elif os.path.getsize(path) > const.MAX_FILE_SIZE: + logger.error(f"The file size exceeds the limit. path:%ss, MAX_FILE_SIZE:%ss B",path, const.MAX_FILE_SIZE) + return False + return True + + +def parse_json_with_generator(timeline_data_path, func): + result = [] + if not check_path_valid(timeline_data_path): + return result + try: + with open(timeline_data_path, "r") as f: + if os.getenv(const.DISABLE_STREAMING_READER) == "1": + logger.debug("Disable streaming reader.") + file_parser = json.loads(f.read()) + else: + logger.debug("Enable streaming reader.") + file_parser = ijson.items(f, "item") + + for i, event in tqdm(enumerate(file_parser), + leave=False, ncols=100, desc="Building dataset for timeline analysis"): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + + except Exception: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", traceback.format_exc(), + timeline_data_path) + return result + + +def convert_to_float(num): + try: + return float(num) + except (ValueError, FloatingPointError): + logger.error(f"Can not convert %ss to float", num) + pass + return 0 diff --git a/profiler/cli/__init__.py b/profiler/cli/__init__.py index eab13571c5..e768e4cb86 100644 --- a/profiler/cli/__init__.py +++ b/profiler/cli/__init__.py @@ -1,4 +1,4 @@ from profiler.advisor.config.config import Config from profiler.advisor.utils.utils import Timer -Config().set_log_path(f"att_advisor_{Timer().strftime}.xlsx") +Config().set_log_path(f"mstt_advisor_{Timer().strftime}.xlsx") diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 2e173dc870..f400a265b7 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -83,9 +83,6 @@ def analyze_cli(**kwargs): help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_all(**kwargs) -> None: - # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 - if not kwargs.get("benchmark_profiling_path"): - kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") try: _analyze(Interface.all_dimension, **kwargs) except RuntimeError as e: diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index e794578da8..f9add948ea 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -32,6 +32,8 @@ from profiler.compare_tools.compare_backend.comparison_generator import Comparis @click.option('--enable_operator_compare', is_flag=True) @click.option('--enable_memory_compare', is_flag=True) @click.option('--enable_communication_compare', is_flag=True) +@click.option('--enable_api_compare', is_flag=True) +@click.option('--enable_kernel_compare', is_flag=True) @click.option('--disable_details', is_flag=True) @click.option('--output_path', '-o', 'output_path', type=click.Path()) @click.option('--max_kernel_num', 'max_kernel_num', type=int, help="The number of kernels per torch op is limited.") diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index e7e2d5adca..380192f87b 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -17,6 +17,8 @@ import os import csv import json +import yaml + from common_func.constant import Constant from common_func.path_manager import PathManager @@ -60,6 +62,23 @@ class FileManager: raise RuntimeError(f"Failed to read the file: {base_name}") from e return result_data + @classmethod + def read_yaml_file(cls, file_path: str) -> dict: + PathManager.check_path_readable(file_path) + base_name = os.path.basename(file_path) + file_size = os.path.getsize(file_path) + if file_size <= 0: + return {} + if file_size > Constant.MAX_JSON_SIZE: + raise RuntimeError(f"The file({base_name}) size exceeds the preset max value.") + + try: + with open(file_path, "r") as yaml_file: + result_data = yaml.safe_load(yaml_file) + except Exception as e: + raise RuntimeError(f"Failed to read the file: {base_name}") from e + return result_data + @classmethod def create_csv_file(cls, profiler_path: str, data: list, file_name: str, headers: list = None) -> None: if not data: diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index 78ea5d8971..b40f19e92f 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -145,6 +145,8 @@ python performance_compare.py [基准性能数据文件所在路径] [比对性 | --enable_operator_compare | 开启算子性能比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | | --enable_communication_compare | 开启通信性能比对。 | 否 | | --enable_memory_compare | 开启算子内存比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | +| --enable_kernel_compare | 开启kernel性能比对。仅针对NPU与NPU比对的场景。需要使用性能数据中的kernel_details.csv文件。 | 否 | +| --enable_api_compare | 开启API性能比对。需要使用性能数据中的trace_view.csv文件。 | 否 | | --disable_details | 隐藏明细比对,只进行统计级比对。 | 否 | 说明:以上开关均不设置的情况下,**工具默认开启所有的性能比对**,当用户设置了以上开关,则按照用户设置的开关进行性能比对,示例如下: @@ -174,9 +176,13 @@ python performance_compare.py [基准性能数据文件] [比对性能数据文 MindSpore场景仅支持**总体性能**和**通信性能**的对比。 +比对结果分为打屏和performance_comparison_result_{timestamp}.csv两种形式输出,其中打屏输出为概要信息,csv文件保存详细结果。 + ### 总体性能 -总体性能比对结果以打屏的形式呈现。 +#### 打屏结果 + +总体性能比对结果以打屏的形式呈现时,字段如下: | 字段 | 说明 | | --------------------------------------- | ------------------------------------------------------------ | @@ -196,6 +202,54 @@ MindSpore场景仅支持**总体性能**和**通信性能**的对比。 | E2E Time(Not minimal profiling) | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | | Other Time | AI CPU、DSA、TensorMove等其他算子耗时。 | +#### csv文件结果 + +总体性能比对结果在performance_comparison_result_*.xlsx中OverallMetrics的sheet页呈现时,示例如下: + +![OverallMetrics](./img/OverallMetrics.png) + +表头字段说明: + +| 字段 | 说明 | +| -------------- | --------------------------- | +| Index | 指标。 | +| Duration(ms) | 执行耗时,单位ms。 | +| Duration Ratio | 执行耗时占E2E总耗时的比例。 | +| Number | 计算算子的数量。 | + +Index列字段说明: + +| 字段 | | | 说明 | +| ---------------------------- | ------------------ | ----------------------------------- | ------------------------------------------------------------ | +| Computing Time | | | 计算流耗时,计算流所有event耗时总和。如果有多条并发计算,计算流耗时对重叠部分只会计算一次。 | +| | Flash Attention | | Flash Attention算子。 | +| | | Flash Attention (Forward) (Cube) | Flash Attention前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Forward) (Vector) | Flash Attention前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Flash Attention (Backward) (Cube) | Flash Attention反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Backward) (Vector) | Flash Attention反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Conv | | Conv算子。 | +| | | Conv (Forward) (Cube) | Conv前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Forward) (Vector) | Conv前向Vector算子。Conv前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Conv (Backward) (Cube) | Conv反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Backward) (Vector) | Conv反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Matmul | | Matmul算子。 | +| | | Matmul (Cube) | Matmul算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Matmul (Vector) | Matmul算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Paged Attention | | Paged Attention算子。 | +| | Vector | | Vector算子。 | +| | | Vector (Trans) | 转换类Vector算子,主要包含Cast、TransPose、TransData算子。(仅针对NPU数据) | +| | | Vector ( No Trans) | 非转换类Vector算子。 | +| | Cube | | 未识别出Flash Attention、Conv和Matmul的Cube算子。 | +| | SDMA (Tensor Move) | | 拷贝类任务。 | +| | Other | | AI CPU、DSA等其他算子。 | +| Uncovered Communication Time | | | 通信未掩盖耗时,包含卡间等待时间。 | +| | Wait | | 卡间同步等待耗时。(仅针对NPU数据) | +| | Transmit | | 通信传输耗时。 | +| Free Time | | | 调度耗时 = E2E耗时 - 算子耗时 - 通信不可掩盖耗时。Free的定义为Device侧既不在通信又不在计算的时间,因此包含拷贝时间(SDMA Time)。 | +| | SDMA | | NPU为除Tensor Move外的拷贝类任务,GPU为所有拷贝类任务。 | +| | Free | | 排除SDMA的空闲耗时。 | +| E2E Time | | | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | + 可以采取最简性能数据采集的方式来减少E2E耗时的性能膨胀,示例代码如下: ```python @@ -300,3 +354,29 @@ MindSpore场景暂不支持。 步骤1:查看MemoryCompareStatistic页,找出内存占用差距TOP的算子。 步骤2:查看MemoryCompare页,搜索内存占用差距TOP的算子,查看具体占用的子算子。 + +### kernel性能 + +仅针对NPU与NPU比对的场景。 + +kernel比对结果在performance_comparison_result_*.xlsx中KernelCompare页呈现。 + +按照Kernel(Kernel类型)和Input Shapes(输入Shape)分组统计,统计信息包括: + +- Total Duration(us):总耗时,单位us。 +- Avg Duration(us):平均耗时,单位us。 +- Max Duration(us):最大耗时,单位us。 +- Min Duration(us):最小耗时,单位us。 +- Calls:调用次数。 + +### API性能 + +API比对结果在performance_comparison_result_*.xlsx中ApiCompare页呈现。 + +按照api name(API名称)组统计,统计信息包括: + +- Total Duration(ms):总耗时,单位ms。 +- Self Time(ms):Self耗时(排除掉子event),单位ms。 +- Avg Duration(ms):平均耗时,单位ms。 +- Calls:调用次数。 + diff --git a/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py new file mode 100644 index 0000000000..bc5810068b --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py @@ -0,0 +1,32 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class ApiCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_api_by_name(cls, ops: list): + ops_dict = {} + for op in ops: + ops_dict.setdefault(op.name, []).append(op) + return ops_dict + + def _compare(self): + if not self._origin_data: + return + base_ops = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_ops = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_ops or not comparison_ops: + return + base_aggregated_ops = self._aggregated_api_by_name(base_ops) + comparison_aggregated_ops = self._aggregated_api_by_name(comparison_ops) + for op_name, base_data in base_aggregated_ops.items(): + comparsion_data = comparison_aggregated_ops.pop(op_name, []) + self._rows.append(self._bean(op_name, base_data, comparsion_data).row) + if comparison_aggregated_ops: + for op_name, comparison_data in comparison_aggregated_ops.items(): + self._rows.append(self._bean(op_name, [], comparison_data).row) + update_order_id(self._rows) diff --git a/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py new file mode 100644 index 0000000000..13c0f776af --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py @@ -0,0 +1,35 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class KernelCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_kernel_by_type_and_shape(cls, kernels: dict): + result_dict = {} + for type_shape, shape_values in kernels.items(): + for shape, kernel_data in shape_values.items(): + kernel = [single[1] for single in kernel_data] + result_list = [type_shape, shape, sum(kernel), len(kernel), max(kernel), min(kernel)] + result_dict.setdefault(f"{type_shape}{shape}", []).extend(result_list) + return result_dict + + def _compare(self): + if not self._origin_data: + return + base_kernels = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_kernels = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_kernels or not comparison_kernels: + return + base_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(base_kernels) + comparison_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(comparison_kernels) + for type_shape, base_data in base_aggregated_kernels.items(): + comparsion_data = comparison_aggregated_kernels.pop(type_shape, []) + self._rows.append(self._bean(base_data, comparsion_data).row) + if comparison_aggregated_kernels: + for _, comparison_data in comparison_aggregated_kernels.items(): + self._rows.append(self._bean([], comparison_data).row) + update_order_id(self._rows) \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py new file mode 100644 index 0000000000..55e08a86be --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py @@ -0,0 +1,47 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class ApiInfo: + def __init__(self, op_name: str, data_list: list): + self._data_list = data_list + self.name = op_name + self.total_dur = 0.0 + self.self_time = 0.0 + self.avg_dur = 0.0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for data in self._data_list: + self.total_dur += data.api_dur + self.self_time += data.api_self_time + self.total_dur /= 1000.0 + self.self_time /= 1000.0 + self.avg_dur = self.total_dur / self.number if self.number else 0.0 + + +class ApiCompareBean: + TABLE_NAME = Constant.API_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, op_name: str, base_api: list, comparison_api: list): + self._name = op_name + self._base_api = ApiInfo(op_name, base_api) + self._comparison_api = ApiInfo(op_name, comparison_api) + + @property + def row(self): + row = [None, self._name, + self._base_api.total_dur, self._base_api.self_time, self._base_api.avg_dur, self._base_api.number, + self._comparison_api.total_dur, self._comparison_api.self_time, + self._comparison_api.avg_dur, self._comparison_api.number] + diff_fields = [calculate_diff_ratio(self._base_api.total_dur, self._comparison_api.total_dur)[1], + calculate_diff_ratio(self._base_api.self_time, self._comparison_api.self_time)[1], + calculate_diff_ratio(self._base_api.avg_dur, self._comparison_api.avg_dur)[1], + calculate_diff_ratio(self._base_api.number, self._comparison_api.number)[1]] + row.extend(diff_fields) + return row + diff --git a/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py new file mode 100644 index 0000000000..df96addc4f --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py @@ -0,0 +1,75 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class KernelCompareInfo: + def __init__(self, data_list: list): + self._kernel_type = None + self._input_shapes = None + self._total_dur = None + self._number = None + self._max_dur = None + self._min_dur = None + if not data_list: + return + self._kernel_type = data_list[0] + self._input_shapes = data_list[1] + self._total_dur = data_list[2] + self._number = data_list[3] + self._max_dur = data_list[4] + self._min_dur = data_list[5] + + @property + def kernel_type(self): + return self._kernel_type + + @property + def input_shapes(self): + return self._input_shapes + + @property + def total_dur(self): + return self._total_dur if self._total_dur else 0.0 + + @property + def number(self): + return self._number + + @property + def max_dur(self): + return self._max_dur + + @property + def min_dur(self): + return self._min_dur + + @property + def avg_dur(self): + return self._total_dur / self._number if self._total_dur and self._number else 0.0 + + +class KernelCompareBean: + TABLE_NAME = Constant.KERNEL_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, base_kernel: list, comparison_kernel: list): + self._base_kernel = KernelCompareInfo(base_kernel) + self._comparison_kernel = KernelCompareInfo(comparison_kernel) + self._kernel_type = self._base_kernel.kernel_type \ + if self._base_kernel.kernel_type else self._comparison_kernel.kernel_type + self._input_shapes = self._base_kernel.input_shapes \ + if self._base_kernel.input_shapes else self._comparison_kernel.input_shapes + + @property + def row(self): + row = [None, self._kernel_type, self._input_shapes, + self._base_kernel.total_dur, self._base_kernel.avg_dur, + self._base_kernel.max_dur, self._base_kernel.min_dur, self._base_kernel.number, + self._comparison_kernel.total_dur, self._comparison_kernel.avg_dur, + self._comparison_kernel.max_dur, self._comparison_kernel.min_dur, self._comparison_kernel.number] + diff_fields = [calculate_diff_ratio(self._base_kernel.total_dur, self._comparison_kernel.total_dur)[1], + calculate_diff_ratio(self._base_kernel.avg_dur, self._comparison_kernel.avg_dur)[1]] + row.extend(diff_fields) + return row \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 9c4825c0e8..c15396e9c5 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -12,6 +12,7 @@ class KernelDetailsBean: self._data = data self._op_type = "" self._name = "" + self._input_shapes = "" self._aiv_vec_time = 0.0 self._aicore_time = 0.0 self._mac_time = 0.0 @@ -27,6 +28,10 @@ class KernelDetailsBean: def name(self) -> str: return self._name + @property + def input_shapes(self) -> str: + return self._input_shapes + @property def aiv_vec_time(self) -> float: if self._aiv_vec_time == "" or self._aiv_vec_time == "N/A": @@ -109,6 +114,7 @@ class KernelDetailsBean: def init(self): self._op_type = self._data.get('Type', "") self._name = self._data.get('Name', "") + self._input_shapes = self._data.get('Input Shapes', "") self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py index fdce23c6ab..3106527c41 100644 --- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py +++ b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py @@ -17,3 +17,20 @@ class OperatorDataPrepare: else: result_data.append(level1_node) return result_data + + def get_all_layer_ops(self) -> any: + root_node = TreeBuilder.build_tree(self.profiling_data.torch_op_data, [], []) + level1_child_nodes = root_node.child_nodes + node_queue = [] + result_data = [] + for level1_node in level1_child_nodes: + if level1_node.is_step_profiler(): + node_queue.extend(level1_node.child_nodes) + else: + node_queue.append(level1_node) + while len(node_queue) > 0: + node = node_queue.pop(0) + result_data.append(node) + if node.child_nodes: + node_queue.extend(node.child_nodes) + return result_data \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py index c89e845193..7bac2b0335 100644 --- a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py +++ b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py @@ -31,4 +31,30 @@ class OverallPerfInterface: def _generate_result(self): overall_data = self._profiling_data.overall_metrics - self._result_data = getattr(overall_data, "__dict__", {}) + + self._result_data = { + "profiling_type": overall_data.profiling_type, + "minimal_profiling": overall_data.minimal_profiling, + "overall": {"e2e_time_ms": overall_data.e2e_time_ms, + "computing_time_ms": overall_data.compute_time_ms, + "uncovered_communication_time_ms": overall_data.communication_not_overlapped_ms, + "free_time_ms": overall_data.free_time_ms}, + "computing_time_disaggregate": {"fa_time_ms": overall_data.fa_total_time, + "conv_time_ms": overall_data.conv_total_time, + "matmul_time_ms": overall_data.mm_total_time, + "page_attention_time_ms": overall_data.page_attention_time, + "vector_time_ms": overall_data.vector_total_time, + "tensor_move_time_ms": overall_data.sdma_time_tensor_move, + "other_cube_time_ms": overall_data.other_cube_time}, + "computing_num_disaggregate": {"fa_num": overall_data.fa_total_num, + "conv_num": overall_data.conv_total_num, + "matmul_num": overall_data.mm_total_num, + "page_attention_num": overall_data.page_attention_num, + "vector_num": overall_data.vector_total_num, + "tensor_move_num": overall_data.sdma_num_tensor_move, + "other_cube_num": overall_data.other_cube_num}, + "communication_time_disaggregate": {"wait_time_ms": overall_data.wait_time_ms, + "transmit_time_ms": overall_data.transmit_time_ms}, + "free_time_disaggregate": {"sdma_time_ms": overall_data.sdma_time_stream, + "free_ms": overall_data.free_time_ms - overall_data.sdma_time_stream} + } diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 292e312815..6fe693fb06 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -8,6 +8,8 @@ from compare_backend.comparator.module_comparetor import ModuleComparator from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator from compare_backend.comparator.operator_comparator import OperatorComparator from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_backend.comparator.api_compare_comparator import ApiCompareComparator +from compare_backend.comparator.kernel_compare_comparator import KernelCompareComparator from compare_backend.comparator.overall_metrics_comparator import OverallMetricsComparator from compare_backend.compare_bean.communication_bean import CommunicationBean from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean @@ -16,6 +18,8 @@ from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean +from compare_backend.compare_bean.api_compare_bean import ApiCompareBean +from compare_backend.compare_bean.kernel_compare_bean import KernelCompareBean from compare_backend.compare_bean.overall_metrics_bean import OverallMetricsBean from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare @@ -39,8 +43,10 @@ class DetailPerformanceGenerator(BaseGenerator): return op_compare_result def compare(self): - if self._args.enable_operator_compare or self._args.enable_memory_compare or \ - self._args.enable_communication_compare: + enable_compare = [self._args.enable_operator_compare, self._args.enable_memory_compare, + self._args.enable_communication_compare, self._args.enable_api_compare, + self._args.enable_kernel_compare] + if any(enable_compare): print("[INFO] Start to compare performance detail data, please wait.") comparator_list = self._create_comparator() else: @@ -97,6 +103,18 @@ class DetailPerformanceGenerator(BaseGenerator): comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) if not self._args.disable_details: comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) + if self._args.enable_api_compare: + api_compare_result = { + Constant.BASE_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.BASE_DATA)).get_all_layer_ops(), + Constant.COMPARISON_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.COMPARISON_DATA)).get_all_layer_ops()} + comparator_list.append(ApiCompareComparator(api_compare_result, ApiCompareBean)) + if self._args.enable_kernel_compare: + kernel_compare_result = { + Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).kernel_details, + Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).kernel_details} + comparator_list.append(KernelCompareComparator(kernel_compare_result, KernelCompareBean)) return comparator_list def match_torch_op(self) -> list: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 6ee07a6569..9daaa55ef1 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -20,6 +20,7 @@ class ProfilingResult: self.overall_metrics = ProfilingInfo(profiling_type) self.python_function_data = [] self.fwdbwd_dict = {} + self.kernel_details = {} def update_torch_op_data(self, event: TraceEventBean): event.is_torch_op = True @@ -43,6 +44,9 @@ class ProfilingResult: def update_comm_task_data(self, comm_name: str, task_event: TraceEventBean): self.communication_dict.setdefault(comm_name, {}).setdefault("comm_task", {}).setdefault( task_event.name, []).append(task_event.dur) + + def update_kernel_details(self, kernels: dict): + self.kernel_details = kernels class BaseProfilingParser(ABC): @@ -57,6 +61,8 @@ class BaseProfilingParser(ABC): self._enable_operator_compare = args.enable_operator_compare self._enable_memory_compare = args.enable_memory_compare self._enable_communication_compare = args.enable_communication_compare + self._enable_api_compare = args.enable_api_compare + self._enable_kernel_compare = args.enable_kernel_compare self._dispatch_func = self._get_dispatch_func() self._result_data = ProfilingResult(self._profiling_type) self._memory_events = [] @@ -80,6 +86,10 @@ class BaseProfilingParser(ABC): self._cpu_cube_op = cpu_cube_op return self._cpu_cube_op + @abstractmethod + def _update_kernel_details(self): + raise NotImplementedError("Function _update_kernel_details need to be implemented.") + @abstractmethod def _update_memory_list(self): raise NotImplementedError("Function _update_memory_list need to be implemented.") @@ -112,6 +122,8 @@ class BaseProfilingParser(ABC): self._update_memory_list() if self._enable_profiling_compare: self._update_overall_metrics() + if self._enable_kernel_compare: + self._update_kernel_details() self._check_result_data() return self._result_data @@ -291,7 +303,7 @@ class BaseProfilingParser(ABC): task_index += 1 def _check_result_data(self): - if self._enable_operator_compare or self._enable_memory_compare: + if self._enable_operator_compare or self._enable_memory_compare or self._enable_api_compare: if not self._result_data.torch_op_data: print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") if self._enable_operator_compare and not self._result_data.kernel_dict: @@ -300,6 +312,11 @@ class BaseProfilingParser(ABC): print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") if self._enable_communication_compare and not self._result_data.communication_dict: print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") + if self._enable_kernel_compare and not self._result_data.kernel_details: + if self._profiling_type == Constant.GPU: + print(f"[WARNING] kernel compare between GPU data and NPU data is not supported.") + else: + print(f"[WARNING] Can't find any kernel details in the file: {self._profiling_path}") def _read_trace_event(self): try: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index 7b1ae1a5a1..0aeeba83ef 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -33,6 +33,9 @@ class GPUProfilingParser(BaseProfilingParser): def __is_sdma_time(cls, name: str): return any(mask in name.lower() for mask in cls.SDMA_MARK_LIST) + def _update_kernel_details(self): + pass + def _update_memory_list(self): if not self._enable_memory_compare: return @@ -171,6 +174,8 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_memory_event) if self._enable_profiling_compare: func_set.add(self._picking_flow_event) + if self._enable_api_compare: + func_set.add(self._picking_torch_op_event) return list(func_set) def _infer_compute_stream_id(self): diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 457a3b6be5..cb25c252c6 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -53,8 +53,32 @@ class NPUProfilingParser(BaseProfilingParser): func_list.add(self._picking_kernel_event) func_list.add(self._picking_hccl_event) func_list.add(self._picking_flow_event) + if self._enable_api_compare: + func_list.add(self._picking_torch_op_event) return list(func_list) + def _update_kernel_details(self): + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except FileNotFoundError: + print("[WARNING] The file kernel_details.csv does not exist.") + except Exception: + print("[ERROR] Failed to read kernel_details.csv.") + return + if not kernel_details: + return + kernels_dict = {} + for kernel in kernel_details: + if kernel.is_invalid(): + continue + input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A' + kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append( + [kernel.name, kernel.duration]) + if len(kernels_dict) == 1: + print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") + return + self._result_data.update_kernel_details(kernels_dict) + def _update_memory_list(self): try: memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py index 4b5947fa7b..ab9fb43a96 100644 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ b/profiler/compare_tools/compare_backend/utils/args_manager.py @@ -69,6 +69,14 @@ class ArgsManager: def enable_communication_compare(self): return self._args.enable_communication_compare + @property + def enable_api_compare(self): + return self._args.enable_api_compare + + @property + def enable_kernel_compare(self): + return self._args.enable_kernel_compare + @classmethod def check_profiling_path(cls, file_path: str): PathManager.input_path_common_check(file_path) @@ -119,11 +127,14 @@ class ArgsManager: raise RuntimeError(msg) if not any([self._args.enable_profiling_compare, self._args.enable_operator_compare, - self._args.enable_memory_compare, self._args.enable_communication_compare]): + self._args.enable_memory_compare, self._args.enable_communication_compare, + self._args.enable_api_compare, self._args.enable_kernel_compare]): self._args.enable_profiling_compare = True self._args.enable_operator_compare = True self._args.enable_memory_compare = True self._args.enable_communication_compare = True + self._args.enable_api_compare = True + self._args.enable_kernel_compare = True base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py index ab9bc364f4..9e6291e89e 100644 --- a/profiler/compare_tools/compare_backend/utils/compare_args.py +++ b/profiler/compare_tools/compare_backend/utils/compare_args.py @@ -6,6 +6,8 @@ class Args: enable_operator_compare: bool = False, enable_memory_compare: bool = False, enable_communication_compare: bool = False, + enable_api_compare: bool = False, + enable_kernel_compare: bool = False, output_path: str = "", max_kernel_num: int = None, op_name_map: dict = {}, @@ -17,6 +19,8 @@ class Args: self.enable_operator_compare = enable_operator_compare self.enable_memory_compare = enable_memory_compare self.enable_communication_compare = enable_communication_compare + self.enable_api_compare = enable_api_compare + self.enable_kernel_compare = enable_kernel_compare self.output_path = output_path self.max_kernel_num = max_kernel_num self.op_name_map = op_name_map diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py index e200258802..252aa536e1 100644 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ b/profiler/compare_tools/compare_backend/utils/constant.py @@ -39,13 +39,16 @@ class Constant(object): # compare type OPERATOR_COMPARE = "OperatorCompare" MEMORY_COMPARE = "MemoryCompare" - + API_COMPARE = "ApiCompare" + KERNEL_COMPARE = "KernelCompare" # sheet name OPERATOR_SHEET = "OperatorCompare" MEMORY_SHEET = "MemoryCompare" OPERATOR_TOP_SHEET = "OperatorCompareStatistic" MEMORY_TOP_SHEET = "MemoryCompareStatistic" COMMUNICATION_SHEET = "CommunicationCompare" + API_SHEET = "ApiCompare" + KERNEL_SHEET = "KernelCompare" # table name OPERATOR_TABLE = "OperatorCompare" @@ -57,6 +60,8 @@ class Constant(object): MODULE_TABLE = "ModuleCompare" MODULE_TOP_TABLE = "ModuleCompareStatistic" OVERALL_METRICS_TABLE = "OverallMetrics" + API_TABLE = "ApiCompare" + KERNEL_TABLE = "KernelCompare" # memory SIZE = "Size(KB)" diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py index ae808863e7..b6be0ae2eb 100644 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ b/profiler/compare_tools/compare_backend/utils/excel_config.py @@ -57,7 +57,7 @@ class ExcelConfig(object): DEVICE_SELF_TIME = "Device Self Time(ms)" DEVICE_TOTAL_TIME = "Device Total Time(ms)" DIFF_SELF_TIME = "Device Self Time Diff(ms)" - DIFF_TOTAL_RATIO = "Total Diff Ratio" + DIFF_TOTAL_RATIO = "Diff Total Ratio" DIFF_TOTAL_TIME = "Device Total Time Diff(ms)" DEVICE_SELF_TIME_US = "Device Self Time(us)" DEVICE_TOTAL_TIME_US = "Device Total Time(us)" @@ -71,6 +71,14 @@ class ExcelConfig(object): DURATION = "Duration(ms)" DURATION_RATIO = "Duration Ratio" DIFF_DUR_MS = "Diff Duration(ms)" + API_NAME = "api name" + TOTAL_DURATION_MS = "Total Duration(ms)" + AVG_DURATION_MS = "Avg Duration(ms)" + SELF_TIME_MS = "Self Time(ms)" + DIFF_SELF_RATIO = "Diff Self Ratio" + DIFF_AVG_RATIO = "Diff Avg Ratio" + DIFF_CALLS_RATIO = "Diff Calls Ratio" + KERNEL = "Kernel" HEADERS = { Constant.OPERATOR_TABLE: [ @@ -193,7 +201,39 @@ class ExcelConfig(object): {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, {"name": DIFF_DUR_MS, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 10}, - + ], + Constant.API_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": API_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_SELF_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_CALLS_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + ], + Constant.KERNEL_COMPARE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": KERNEL, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, ] } @@ -201,7 +241,9 @@ class ExcelConfig(object): Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], Constant.MODULE_TABLE: ["E1:H1", "I1:L1"], - Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"]} + Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"], + Constant.API_TABLE: ["C1:F1", "G1:J1"], + Constant.KERNEL_TABLE: ["D1:H1", "I1:M1"]} # overall metrics index # computing time diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py index 690c46cd51..69ee92d123 100644 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py @@ -64,6 +64,14 @@ class TorchOpNode: def device_dur(self): return sum([kernel.device_dur for kernel in self._kernel_list]) + @property + def api_dur(self): + return self._event.dur + + @property + def api_self_time(self): + return self.api_dur - sum(child.api_dur for child in self._child_nodes) + def add_child_node(self, child_node): self._child_nodes.append(child_node) diff --git a/profiler/compare_tools/compare_backend/utils/tree_builder.py b/profiler/compare_tools/compare_backend/utils/tree_builder.py index 34c1fe1a1f..d5aa787ac2 100644 --- a/profiler/compare_tools/compare_backend/utils/tree_builder.py +++ b/profiler/compare_tools/compare_backend/utils/tree_builder.py @@ -23,7 +23,8 @@ class TreeBuilder: tree_node = TorchOpNode(event, last_node) last_node.add_child_node(tree_node) last_node = tree_node - tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) + if kernel_dict: + tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) else: event.set_name(last_node.name) last_node.set_memory_allocated(event) diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py index dffb7549fc..58bad621b0 100644 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py @@ -12,7 +12,7 @@ class WorkSheetCreator: self._work_sheet = None self._row_id = 1 self._field_format = {} - self._diff_ratio_index = None + self._diff_ratio_index = [] self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" def create_sheet(self): @@ -47,8 +47,10 @@ class WorkSheetCreator: self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) self._field_format[index] = header.get("type") - if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): - self._diff_ratio_index = index + ratio_white_list = [ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO, + ExcelConfig.DIFF_AVG_RATIO, ExcelConfig.DIFF_CALLS_RATIO, ExcelConfig.DIFF_SELF_RATIO] + if header.get("name") in ratio_white_list: + self._diff_ratio_index.append(index) self._row_id += 1 def _write_data(self): @@ -56,7 +58,7 @@ class WorkSheetCreator: for data in self._data.get("rows"): for index, cell_data in enumerate(data): cell_format = self._work_book.add_format(self._field_format.get(index)) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) @@ -76,7 +78,7 @@ class WorkSheetCreator: if index == 0: # 0 for Index field cell_style["indent"] = cell_data.count("\t") cell_format = self._work_book.add_format(cell_style) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) diff --git a/profiler/compare_tools/img/OverallMetrics.png b/profiler/compare_tools/img/OverallMetrics.png new file mode 100644 index 0000000000000000000000000000000000000000..b130d3607344c983a9304440e38a45fe96a4bb56 GIT binary patch literal 66941 zcmdqIXH=72w>GK|3i^maK`9z~Rhk6pT~NT#M5P8I9So6<5C}-ofb`x$q$tv)geFxW z^j-p?_YyjU651D@x9s=X-`?Yl^W*$EW3aeqa>KgUTC=QcUTfZgS{lkX|Gf9-rAwD? zs;VeyU%GUq;?kwd``5@x-#GP%tdlO6owSvoTq@`Wu8=-lu~JZ1xOAx~?8b@7Rnq6{ z_A2^Lmo5PsFWTi6yYJ?gE}dtoDk;2jGhVBa30+X>=e#GCbCt5!^fDuiCttMp32U@w zzKmza<6AuNaJL6FH=S>!*D?l2TXPchn|QWQ#3xjooOb0fr>vVYC~*hXbD{T%NeNN3 zV#3B}J?BSqY18Ms=O=R8Q5>o})aGieR;o!CZ6!*Q?eF5sz(8Ne!RC})9rAepsO}7V z&ZZj4@$a9u0f$p^I)Q7xECL_n;^H)!q}{fr9hTUeW^YB1{-{mLF|hfHP$;vUSEle+ za!qA5``cF>?mlX0_M@O;q7aXtCiPY=l`zd_*>FXd`LDaej~ZMe=h$z-2(bk)iq*ai ztEyN<;@nH-LZ4xBB}_h50yxO}rIZ#op#l|K@P)axgy=@4RQbO3uwT>pPko)bs%|Fj zZ}^%cmHXaFv624a|qXoBi4P?G&)N)okBgNVW?W8jjnvmfID#}7M#9BR+Ap<>t*aGog0?+ zKo)Uu7K?!%bEZ5qGCW zGOfV56GNE2nd#XTAu_HC;v3%}mVl(o_m2?%s8Ba}AS%v~F^)-2#x37*D-iX}-M(}w zPF6Z)Hl1o=zuqsslU0TTa2vkx6YJ=9z~iLoA%O}tw?%$YJGkrqr6x^>vHQj1&?uQ1 z;y_!9g{q15fThfAN$`NzqF=|hi3w2)^0wh6eakLcj~+&8VPf_{P$NO7CUHlB(>>{R zY56|CR_uDs(Y2?t0Ei*7-N!K}N)Z=8nViwd03@mr1Z?mO(9IK zaPaQdM%Hh(hP#+>bFk34bDcJMO*)+a_8r2Xqi@elHJ|J$AlGQ-F`^Y3ONpR@(zqi_ zf=%c)Q~wc8v+HHIFqcfg$y#ml(KU4Fum@lXB-a-dHyD)^(&!r|)Sau5ZdN2}j`*+O?W$!1S2 zORCbo_4yBgJ!yx07Y=2_BO^K39i)m>=ZkKk&}9hocM69Qk0%}x4_rWU9)(pJ2T=ri zR!odU=q)(m1b!Zy7g3Z4$L%}9$J$sA3a|=$*G^R!0u-Ge-Z|P2uPsBunTX*MffGZR zaK#>703?GMFbHN`yn;}+OEzRCGCd2s(8Z%zueL}!Mb&F6e`FKBo~xA#Ht+oYr7K6| zl*t9xYI*e4(l=IPIi5ZY61~;F29}PzRh81qb>*u@bf|x1ll&8V^+)b%#PI#zb(~o6 z94A9@WrD}&^`|ZA8inaKx<+vXFka|MFs~EP)H-*GxkoW!StZRdG}r<@Q05KBzP_(P z8u{gI*6s((%G@^T%6&On^M)Qzq&la{n0K-sa~DZfa!X;Ji$J&0opCSHRzZfKwb0$G zPXrV?KU^=6C53})vzGPGYMEvl*o@~`RRQktP22#&rPrlsmwpNg^NvOuS%-DT?=5~o zKv#`^<}{X7Sh0(g))aUhf@uV1*1Zq=9)$oJe3D%|aab+iv>%T7H#NdBj&H%z+f%Q?ClN$ zr5#xxayJ9CFA)uEY)M(P=6FG#Djc!Z0bdy0V+TiovR=MoeEOkFFw2;AdY&)48@>> z2Sxik<+RJuOT1Ru*~wCmc5%Ij;v)Hi@X8nj=xvd^Tr}a--wg69_xUf@AKg zDzO82LYHvsUGJaKN3HZ3f$ZCi$NXsD=ie%ZVI{=kM5qba2luVm+^FA>z~l=Nn^dXm zHC}ZH)%1{z$nUM z>DyR`rxI$CXzi{OTN6uW!9tw1CPX>0Vtz~eS842-ECtFE4K{A1A~{m zBoYl`1>!0LJ@?QD*@0(l(8WI(;;CdU-honqlhQC$svJPdN}WNsVBwaL$K2|%VhvE6 zt4Z2^`sIf5BG+jqKOlnF1w_9Bxd)D%xQH-AZQ!{>^(|cOtFqwZ8YvTZ;Iwam~b}y@#pzocyh?N_ZWQGvP;IsGV&>hODh&tAE}w>zp6kOHA46)7tFPqJ`pAL@I(0JQx@%pNuCn8}aB{ zUkGX$|UU(nd9c1hjuoG;Rmv_XIaNaOl%LMuOA~jViUk72%Ow>V7sDi1 zU@hUtB|Fu}jaG>i#+Yil#~o{&H!WtIkVjz0^&6!eH>&BX(fP4{c=>9j!YQwCuXG5v zBpX3oRU?`amv=s;^~Py~C_RYYd(#4i=0!mar+~UR)N!!j0MYpEQb;AP2!Us?nZJHR zh8r^vUN*zNJWx(a;a&92^chF(L628FD6yaeaq@X7m;GAvcc6k!C=Tqc&JSKJExVD1 z_G0%x4fN~;Q;ThWh~JIgCgXg-VMccv#)OWta(2bQK#|U-SEbo7%u;HFs9whdyqf?&UrwH-X8h*0DKA>JP ze835ENm>7fY}g+lvIa0bY8axRfm~4-r*sgGW4ArozDIS%vGr3JD5r&tvVFni>VkN` zU(@qJkA){Pp+I9PlIuJb07URR)u)e519zKQ4}5!Yl|w$`>?Cu5lmmDRb+{-njnaEQ zM;#UYx*Mth4`y-mjb$^u`G$5O6|#re>qO)Jr8G_wx$How%%@>sS29EfXz=BG5l*tG z3Sg%Sty`3xo`u>kf~}E9 zU&cdOcf(~TWv1&T+O^$1+y3M;Zl|mvirkJ|G=<{kv!w8_RggrwY^@sV_^GL)@jZ)9 zwrx}Mwi^YHxT>DGJ8jsgZ9aXCvqVx(9IIfv92TU zNUu31$P*YU{%-ZxWYnX*wzKP&JRQTsgKgtj1ZQrU?CAJ!t7K&s+e=HXll|`)b<>h0 z8Jq&r;i8|Gn|fEplDt=G3VBa;8H{E2(iZIqxtpB1;Dzm%h0w?}q#rM#<(`teWlaeI zyfiBLDH!Q!tplxWw*l+bIgsv-pDRe7>YtH`3+-jCIF+BjLoE zsyOtk?%=4`^y)!&8pQ8EQ8eqjMMr;%4{jk8e;~h5Qc?O{sBSm=p#1m3@ASay zhFQ&q0ETuJV;=Pqi@u6X##{PQ<);Lh(8)U4JZYm(+iO0qRr~;(G^R(`<04comBc(Q zH|jW|=bnYM-8*rmvgN1D2#6Fa!pk^?gcoR>VqA)lgp;S=Ky(sHbi|)W4eXE<}Gz2%ZMZEEX`yd1cM979;T?I`%OAF$AfTbX&LwKA94_Ft( zS(F1D8R*s>LKHV{z-?33x#u#*_oHB*v%N?|NBe2WMnf54@kh(0eCY{gC^xe0M!eSw zq~lIdT-jD-?c?m){Va8J3nbi-vn4bgdd~q)qk@*0TktxS`|+EkdQu!16%q;Bz6nP< zL2Q^8-9xVE;$?O2xmR_r-f5!OH++O{K`Nzmoa(I}t5{T=&oaH7K*+6>v_DRj5_{BU zjIGacT6)ymlm9XppUbd#?_4!)fTZdEoMBX{!j!GL`dGzAuWDk(BV0gm&SKrFev0yE z#`_ZbDNKT-C{Gi-yGP_U?N*6ju`^ydQt<_^KD9ESas=pCK^Ey4h)?g|B~REkm_Y46 zZEY}EuKc3D_;RJzyvv}LXRf6zJ8hWiKXGC}QYFx2E)46&60Ruf8J$J(QI89*kc@W~ zuP!SCEbPlmFloF+<2RK_qBMfZ!TuccVHOG0u-1<}jmOnjcn zSq9J-D4Utjg^BP$6C2aZi-fs~#~3q3fd$Yb#C>LbzfmHP|?=T8DfZkTPL4Vkpn{a;8u&B1?DWP zNa$EPKWHdsnd8^NperL)llY2p{u$fr6jF43ucl)vYt6g!+bZ~aI%JKP9e0&sRJz7J z6I3ZTA-nou#bijoN?BiQ!g#Dwpaq%d3Mcv@@`GTVR7=@uOt4GGbb3sUZxDn#ZJ8z^_#45Jiw*pC? zPRH0fzQDAgLZ$p7yI=scH?#7prw@;UAzN!qq!|YG8!l`hp?o+*_fz9!iEJi>=xbd_ zWn_gVTXK{u17O*15GtrDxq;sm?*n<_oFsZRdaf436IvrBb9obfk#Il)s#86#L$3;u zh5+|-*ZO*md_n8*+_K2t{ItAb!nf9^HKaiAjz$34Ue2pZ z04n!aS!fot!(hO#R1Dw(8Kdd%Z4In&1z6XvC{D3t@u&p49c~9s`ftJ4e(jQkJ3Ju(Hh^G>M|Y4ZC{63WTwaO zcn9P3uBuqaRK1aB;fzb^?K%_Lq_yf(b=}NfmkfJco(I7^ciA~lT9Gh6NIy>3#OqZS zCoUb||F#iGV*u`l4T`j(d*Sb66q$zQgJD$ey<~l~s7}rDIFw{>_iJ;1RLxp=ksb1l zm!qH~v!kXXGikSvz9D1ePO+hngE8uV)hLf~EV?{KxA0YWB}f9sICsy*QH7ZwI)%a- z&(DQ>At)cvNc1ZMTAM@yBt29-_OOz4M%SL!M#J>k6r+?+uMqZ{d{uFBRIkPeX|i(*RHI;8kcYOkTjTYD!{7< z2U9qegu#+3UH|(OtB2U)VfTlVxa`M5vVU_UDjI1F72zieh2hJozhLD&!Va}v%(CMQ zH=AQl?zxy=^7yC@h~RKn6DW#j!Tc9Z*MzeB2irHrUi&J7Lf zdqmPGj#B6bPK|l+HtF&oq8f8*=wF1v!#7%7mI_I%q_*NG$@X_~rE%c)1x$oorEG8F zvlhN+NG;|6CL-M=O)ogO+z$!~@mJxWNG+RcHQ?`JHr2oROCFk%FI~4tZBK#m_TNQP zKnb{@jtc%Dw=ktGWgxW#+3)Os7ZYTq8HFyA+t>A9)H;1d6-jM?!bR!t;sEL!`(NqK zpY+jz`n{F`q(=M_dU5#=k+|jDbcIggmhNRg(BD_+|3`~e!sY=XLZQY_M+diRlnboIPcYLMkMJijNZfX zFB&=w52S3Dwxq&QhK>4(4#>?mGOLD0i7IwSiVkoe@G=chz_Y!ZMxsc3AgO!E(i^SC zi+Y^2 z*2^{NYmu((ut!y12eQ8**toneDBVsY;blB~bRk@zHBCX+PC;{9a%G+06X#mu`71m* z#^xQf@dOQZm0lrplOpt`3(74&C(WV1Cf@_u59Pp(z$6fkf~3G0ACzn+ov^F>kI}l8DfU#Bm+72Cix{+q z(eK|>!$#_tZ(~YGEH){8U?rUS!l!fmIDL1}vsdTYn2dJ9LuUF>Lmk4*26bU?gh`w= z+i{jx>vu+M^4KU9oQlY*o(gTUQ{IzCv#)CYa@V)fMYatWCSP`~(T zRQOJC;ptKTZZo83=c^}NHnA(ud1HaSc{ZY{5ykyipKmk#&Ac`f4Mbi`WKBvKl1*bZzQ(JrhriQ;51ul>Q_ zEjO6#JY`w{ij(&!>`_}bsqwm$q(AyA&K|j4DRbRn3h&at`y*E?ma`&q1CkUWiH?1p zrUTCsFdV}qy17>F|Dv9gp1(HN3|oF0>MXbYGOa`A+B#bH*}`*V|0!Y`(t(mLje%(q zS6#)ibJOxr8$V45Sfc$=*&zPtIU^t=q8JR@9(_RXy5)u~{BYP-(I`aC%j2?au6S8I?s5Jd*$Vh% zX$&2TM5 zd5=+gg+Q^Ynot>0ffe^riB*Y_LyILCo*>Rf=dZ?K2pBs^pjuV{{9Z zzUH%E{*!@p7NXwSP;kWgmka0ukmtSZ-(5JWXik?2#V!Ut;s%a;6;+G?UW%nuBjep` zW(9)!CdT_$nIX{e0na4CkJfPo9}iudiRyv!?nQ;1S3Y0)kQ$eeaqxs!^{N!qFfAl% z9<#5osL5bxZ6PO=jw0alP+N~hGZJuZ@ew9~P0KDp$#Ra}zQAK>;m~P|xfzRd)hf?Q zw~^q>Z-ijH8n!I|Ub!9(`!~xtEC&V}_NdS;#C?^$t7Agy3s_eY}=b6}fPOTwZV zfv!55^at9a_HFE~r4_cm-~vKrR_%ib&gU}?el2srEx1JWm+TIXMU|RRVR8F{Isd(w zOML5`v?O$xstSp870=91$w>9v^P>d1)*?v1fR(hX9!15eM<1Bxf8tF#@+S8XGxe3j zC3`h7UCWJSjg{}-hg8aR_#DS5;`R49sLZ}cf$VB*Q-*~B`7lEi88)1Io$skYw_S~_ zNhj&(p|j3yd)Q)SGP5=*PX(E7#c@6J)cJ{gxro!CQ@Gboq(Sxc4iTOG6&7n|e=d+;akB!!*Xv5&4AVJ&vix&yta(=c^agktSGlp+pCGCCExN;@g23^_n56mo-5qo;2r{s=0OH))$NbhG8wbU3PqF7S_7$jH_PJ-i+-D6@ToSo ztZXurono~}!L4|n_)m+jqO78>amRloTRJFZK#sVwK{3BDJM*?b1yrkVe`epyCP zSb1~dJu+Qn7fmppWsOB4jy{FPppE}!#R5T9&k<62UDk> z_yBx&nauy18@Uk$TQB zmVsH=bpb=vu0cwTs*t43(u3rSZBmAVcKYY`r50K+&@ zn3i5x8W-1Y@I)S?V6^Q`n?QQ>K}_6??Zywg^^b=M7cp69$>&=!gZcmpdW75~d}!G<|B589a=(?<3gW^91r@F)U z_bnc`s~{pIJDX2$SlNW`n7)m=YaD5^Y(bArhfLL-fjx#YTWP2fdW8dsfNnNTOJQF1 zU#*Wnb#Bi(c~H;RVqwZXk`V#;A6*Og$v7V&FRi{sU1E}(JN%@O0lJJ?hn#<7K4Sd3RC?$ZzCN_7gT5z~aw*JL)fW=NluHFn8l!Kob5u#f5eDsRp?IL++^$pe4h!pu}Cz+uaY1Ut<)`du(bX~5MJR_7; zQNgCVBdOH8-L6{v?4P3>yiFS9KL$z?JKhM-P>jAAj3Ax-a;O@ku1Ok8s1B?Ak9A25 zwp=GH_k$%t*4!0mONMKuJKoiC@QvnkvCaj~&Yo271i41!50A0~@4h0H9iQuIZSAFY zHRRBK7p9lT=Q-_P&jW)$Gw*xu)%_mHrDW$0a^KrcqCv#%nx;@UGd;7tY@2`Zyk*!6 zyk3Iru~D9k(_=@bj6Q|uOX5e0^KT#3&^yCSQ}su!`QDYnh(jMvz(->4j88U+#9Tb% zQ4yhhlZL;}luwqT=ozsJovnCW{FUxy66ju;tEQ=}apfH`FB5L9X93ooQ*I$ML$ol~Lo+Yi_bN7h&_x1IgKb`D+COa+!-T3Y^7#C| zNlv%Ok6~$3=JXA%eRdMwbvH26m@lI1{Y3ZxSr8;+vR#kn)y=x0-QLnOz){^tA(#6* zRiz{}G)ig9ninR$0ai749Z-s4C?++-sm9?g`Wvjkk6me+p?eVRE(&lPWQKQs6JC=Y=0q$RdHQ`vCa=s zYv)V0HpW}~IXx)sI*$xCqklyW!937O{BnDY-`A-{=~wT0U6T!2(Sv-A#MW-NLk3aW{aIJS6cp2b@tNvyi3ApaJD`SOIwY;r)SqQe5-N6hu-65 z@ElE<;$$e!a>?C@e{ckRV#HS?e%*%~96WoVB9*|}GnJ>!ZPg;S%qH(QTEBWj05+O-cp&PS6f z=^{0W23QZrYV*7Eh`>VQlvd?PFf}B9Kh7LDR}n`%)n`T5 zXX5il)iR5saENl^EBKAl9hxMeWx$~1y^!WE*PC_sVhStA>e_N|!^(}JZ)&n840N^` z4Zj@=(K09Lc^67+7rt*yee2I?E`(+^Yaf26);YjiPO5x)bl*GaWcFV8fr(zyR%4dB z{fxo+uKaO6f0qGcJKAWOLoZhYgYldP!ZXhjgXic6X&@6_WZfoyA>yIZO3zbjT$62G z_=bj^+$xzQ1Pj)a*Q+V>^S;{d%feG4)FZ=`4$;ZVbuYX^@oSfrZc`Ns-KO;JFKTG| zv$VtWVc+gpp2mTL>4(mtD0SmFoTUzKB&VZD!$-t$HUIt4@E;2ep!Qg|d`vV{C;WhO z_&0By$Pj+eU^AGkY@v;=%(PdAl)-H0(*+r>n;JEfe53Hh*E9{Fani5jiUpIYq_(L_7AEQ6B0NnQm zT$vieqX;zwe1#k|R6@#EAB9<$_{5W8V(kLc^F${(>0QjHxhqQ9hv-ujCCnCq!!Op* z08x$U221j9WoFCx2&Y1UUxlHHhWbxO*O2dm>V67|s-~v=bdV7AEZnJ&K>NvEWhia{ z*BthEUu8%=M|h2WZA$uD08-vT42rVzG3k)a-OEJ>PGJ}!PBnF zqI33$nimD0zlp5@P#cEe=9Ua}Lb`}L1Y7buBK`-w*aCK7-c|TnB9!>rRNea@;T})t zRZ)6ffFr{8cZV@H^0ml@s(qWZbIn;9HjBZknyxVYXBovJL$43=nMm%5cvaN3ymidm4! z?K~Q{YiKg_o}L}ezuM_GPq2Y!2jm1gguHeSUg%CNAC}5Mi##1kXj&&ElQjD_DbV$W zeoJr&;oa!)_Y)n;DvATltii8T=*YGECM~e^Ci>IhKDcLHN;$_P+?>jwz^bQq*CKbDgJqqH2KN!{v$h|7oqRy z0_r);PUw^ri^E2P& zV#X$wgsMK3e!kXxiSebu_9s$+d$OsY5R(3R^ehwCnRl_%%eS_#sg3@WmX0EM`IGJv zv<@HBRu{rBDTlNYp1hlrO~8`kK0SyHh3M(qXSM4|dWK{>qW+TvLi4Z`dceV@sgNb8 z=y0UTpw-<@9k%}}D8=~byZUhI-A#qg7T))pzh-r(1J3pfF1z!6B3!>%2)Y2J$h=&R zjIGLi!ZTGj8;P=fjpyi9|8UMZUl6mZvyor8U7v>igunf?l!Oj{cqGS>3)`~W0+V+J zbEJ>3sAA-FmhEDM*)2062{6XgF*djv*KL6j7XwsuaB4|?pTYWc4h3fa+1*`7fvyb$?O8$TC?HNVMILhB-d0t<&<(7(TY{O3c35YLnL+ z;T52uE){@@ty6Mj>|ej5VLP%kvOD$ww}G^8eVs_#?osDQD8%O2^_`tigm6;LUPt)~oSCEG;i(SW8=kczPrWM90V6Rp8lz)Xs?pVj>r`GH;Fz`B{i)KD z!bvY+5A$mI+=WatH~c%D2X_MV)SH(KTq5%6)z2kw;E&7A1b=74MTS$04q3&Sf(&8D zBly(g*StE3RkoUM39v^|MS036`&g*Eggf7F%im$r2^j(YksK+HYm{O%tVaL%jgRP$*XGJGw)P9+dhH)%mifEo>%ZAI^SxeUqiwdbMrKxN* z1GMuVQ;w6VrvBww%V%6|`5k6Y^6qa1Y1c|Wo+;TkeKwht)kT?i-_**IrrnYp&mmC0 z7AehT=#cG}tpyBIShC8_h}CSu$M%FA&+T_e^4~X8J$izfH3O9}G-umAhUXUT2RBnS zBzExuOFvSC-L6O@RNdt@tQ`z4a?wHlxijeT(4EgTq%rIvX*Q>>cf}K@iwD;YK$#k~ z4GUc}1|;M*zIi@P%?*>J1sbe6+dnP^Jp|2+>;KV~q&SQX%~>3{SMorU(6?i_qq5`g zpGLRPS8$sQ2rU?H(9|Ap74<~jWk}i-Gwy8tHGu2vt-4!B+3Vm@#JEDoycG?fws>2@ z8`zUXVg;_c>s2{$ITj_D-hAKu#Yq_ZVvO5BR%|{@6$N7~XqGce8RnmtjxAvRSPNYl zSuHX#O!>?4P-YGtv`o^^%q2e`7zD=TFD2^8h2! z|Ct1Ngzj4}sU2IadO;E29?!lMA^Kf+K4Sj*b?@zVx*uItujx+Z(T&-1^K{QJRY!sj z^++N1&F)UiXIOq1#l5l5qNPon(6f{G1QUJZ$?~|v*pNu&bY`oct~GJ9!$jF(9382W zPN`!_U?J?2+oYMaER#{)y`#9Re%rKp`7&nP5|wha<#YAy%>io4`FfS*@A!B1Z0L}& zn@P%?q#(%?(DZ`e#$Q(QHm8!u;0#S*j*f3fR2bgC7fb`s%y0dUuDL>jVSq!;JFi(_ zgn|3{@Fjv`!e~79OU{@TQYfwcFkc9@kn?6BQaO^_yevWri1=dTCG|}`qUeOy{rZQ* z8VZ4({idqcqr<9%3S4jsI=(i>&NavD@{3aLuqPu*I<(k2Cd0=l)9{z?MQJ75D@MIK zmaOCQxBCZsGCT57)Rns3Jx z{@!Z)Lv=Cw9XO?F=#Z`UOpd0Kao*+ri-Kdpr8%mQ`P2AOwP3otQlt0%hKqMW2f0FN z^xKsNz%JvjKe@h+J?RN=TuNpy4sO=VP1SsC3$rIh%bt$YBE8YyeiyNFNRMIj*FI!m z^_zHAVKdqkD0^XvE$upvH}W_mdzIU_yB_lDjzor?MM~$k^-ket#VTGIInO>GrQ3}| zO!vZ4VyzbF=7#>r6hEd`p-mt{aINtVA3bh)1^QfR?T$X{7Ew(N{^@{BIVu}9H*|4` z+FuP0du)*h@f<)dvu$eRrRW;x8S)n3C}iMrlZq_ncd2~99_|$!VPLqhJLL$EYl#$j zN^-@wyu2lvhPTuRZeh9_*>3MXXe&OM~?I@c;F~(qLjp%w0E+ae7zuTGp%st(_`qAcXm~IPuQR{UADQWXKhWj zm+A7|s`IQRr|a$juiCnYr>~tocur0{y^%+rer%7Hum@V&EeLPlu<{cosZ$Zz$^Hsq z`uMqw%Ltd5QTsBc;o2E9cKqPm@{taIRZ0K=2fEAblKz$m(!{P@~`>PuzmAuQ)H}~ z1%1zOu+g#!kpLlnjpXSc;&0#YGS&wN-;$o3oZOiY-0;R9tzDk)P;5rGRMGEHFkd-( zORX^lt#0}hK4s=vanEyPGG_Wu>+bE#Ggp)0C9jEMqI6iR*Y=g&hAGc}8I@ValR@(U zfhI{p^!utc`ws=aoUbNX3v&dE|KXUVAIYr`)I(03fF5E}mQ-BBj$eblmqD{um8nh) z8QaMfVgl4bF4ZA%caKf@yskwTRnSu0E8Cj>>4olq8uWq^dxo3zZ^1D$)Iq!RG~KW^ zjuh)ij@SaaGV^O$b?;E-_uKoJm$Z%VFI});(q6v2#)4wZbc*i{#9+Pq3bQ}@?XEHn z->O$+f%^6De5h!<``OdT4nql=3=&d5o}IK>h-vk;W}W&fV%)2LqTU1(F9!_;Dc$~% zB4N2>4is_yF}88t<*a&F{g3tPxO;?YxXYV@jOy|(SJ6@Vr*gLm60&Sx$Zjt4pM6?w zT*k5ul=meVCEd+;?a4Y2HcRVL0cym(t_P?ApLYjQ)GcJaeMReV$6S=G4tiiJpI>QU zr%)>q;A2@qRn)vSbIhIJ%_m_c3Gdtd7gH zt2N6Die#TLX$7wj;_+7(&we1Zeo8*V(zh5TU6N_BMbex|$Z}PGpfMr9v<{z^8DlK3 z+w9jeIHxP0F$0X9iI3Lik8n@ePu>vy~@216q@UJNrPNv3JgwCE>jsCpMhgM>ox3 z)gQkt|HuNs337}|DRG`}+b(h1Tt(~%F-s_W=hSh{iIa!zdfNvMl!;E!KgtxKs%6Lv z{;YN-h;L@Lixm%Rd9`a3G8+Aj*o5b(WGAHhZWy!Y8Jk{Ia%`^mXwJrh>>MmOXhX*)T(!d+<_(C>M(J7j(Yl}e^oqv zNTAIKONrGtoaKoxV_+G!N!l_EwiDZ$pr89H6Hqtn^uU`IDkDttjbRN>8QETsZ8lu% zZr)}ERAf(V-paQNRa_x`uP5-A*|_B$QzMJQ^${ww?V}esHBNw9b7aet>}3Uo*zQ+S)P8sWOY)73;Bym>(!au555)3IAcPuOL4wQQZ_@<#83ZeKMbPS`@Z(j zQ1JD)zPDlz{VJsP$J7A#?|&AbmiKhpeSvFZ&>B%Ijp012t@RccysD==DAV&>^LXdVW#R4G-a*3!r{4Lz7#h5x6 zHg!qN-F#E9efy8=9SSlF5?Z@02+|Q!JU|?`*S-fwxmSzqG{6$(m}m?geY+(Fp z-GuEc?p?!{+oe^%KGI8!l-S(Kafxh#!~9({nEqw2Y0(3^pPd=eHKU5PZ6o_d`IP31 z`a_z&axE?XboOj(I9fzM5lS79aI^AH+wzXR(Cx0OTP^iJyR7f*lNt8Hj|G)~(!r%V z+>cG7#K|3#98BNS=B9C|qe2Mz$mOf}UQ;6gA?vy&uUll|~m|8n|>HjOT! zt8}*Bn$KNcua5Ymj!!;ij`<@cv5(<4Jo`yf<`ctHetwSVOeRvuk2H@d9k6ZdRjTAD zd`u`W+mpb78io{Mej+a0mmf}4pLTd$;s`fiiz2%2`@{(=68H1Oa(mxO!`U0CriTB3d zwycnh|7FHcQXpK${r3CX+sEYXfb7Q3l&l^_d_dp0Ajjf33j9UWOGgb-oBACQAb-*U zxVg#4}n;fJk101Bo54~Q!u;3YSdoLcXF zT^)rATet6hiT|Z!Hpp7oJSr9X&m!HBaoYEPkYt@rua(fU|<{PYvD zzZIuSgS<};BO<0Q-fVs-Ur%~*?Qg}q|DV5HUWwQ6kPL0-#f@)JoghO`*s4tf!#QF;ydMRUPVsV_lgf8?u3?G12(!#4YEn@0*`2E zO5X5sSvKg6Qe`RExm?51GVa0oRvZmf*Ssa+ti392T07BCV%fARgWhc}vxZE2ZFtA&wWKi~Sn&fG`Q3!Zl%gDuw>|KEdUX z@`>*Kn^|sY?~Kfl!O~yo$8xaP8=a>gM=b#;hrxT6PR1Y8!y@?);LajH?)%71wJ6Zq zN2k)oqv=XWv(uA`k%K1K2IS4m4uLNv1UL4o?_q8UEu^36>s zI;sZrYC^`e;#)?`kz9pRU{OKrmj+W6BMXPiycujx;K3^_=`(*vD=-UN5%+sMSn~U0 zubri&Cf}~5cl}!a7Sjh`R>VGy&wMFg#`K4W_vNYy*MbesuOe~&dE?!hZMl2zmyYE> ztmpM!ue+AJ>96za1w)e!$p9Ok0nH$`ev*d|WaD;}_j{BZ1d`lA|J^t`Eg!G_THsup zzX94{Xp~`;X+Cd2Ka|lPYA(|*pv7Ha2x`86Q#McR)aJ@CK#4G|wv1Tmo3Ni~*ZNMk z8C&6vt;%8W5&3&$p0t(mGFILGE`Z^ZevumYm?X^1;u1om`NW_~f9uutf+win@@3=n zp?9yy9Sp_?ESs3|s&=yhBNoD%!FG@c(ED!FMI z{v)o&a?utyZr--xn~=o^9lgTXg-GyeGH8KQ$=eCYLEKXk<8X_x&D2g1D<8IVyu1w6 zPkWNecz=hS8nS=J)+^T9zMAKGb?N`G_ntvfb?eqB8IdR`IVn*xG&C6$5hMx*az>J5 zu#qMQB?vS@B-lhz0f|k{Ip-XjoDpb{oZ+ta4QKCj&)MfxeYXmi@D268&&m)BToZZu*M`91Wov0N|CPLQnA@?{7O0 znEA&Jpqs6F#!c#c+)G;MHD%6GfiBCp?dm5%iSu$Uava+Cn6ckSh?^kBzqYL>#K*;( zj+jspMdGK)P$!oJ_c_&W^d_52-+vLFXMt-6`)az)e>Xq%bTfX`lTfz!)#skx9%p4y z%quS@d}xF2n}o>#&;2~N455(XMI&&a%Z#PC;?}&%=pdrsGKKJu<$`{~<3=6To_pmq z?-ytG$ldlJG@Rr1ar>KXPEQp{ub|-(nFb(Vo`;%@P)V zP5r7D6OIv3J}CdSg(XA`(qzHx56L?B^n15>h>+9?d6*k)y1(Wq9C%bW*ri=VcJ~gO;$p-p< zoOPZ+M4o}{uZ#~V#XiJSj-5=O9!}rUNhMN)Ph!k{0uzqXioR-iY7Vt@<#Hek#vki{ zY;gR!Ky<8Pawlm(g3H^&G@DRSF~b>HNYT|JGfS0z)R%;SIP zYHIdc7EdGv|0&7zM}_@Ki}!2z<`T5N;Jr3B7zWS>iPkLmsmO)x`@Kl7)`xNoXb{ol zNq8yKAk&GwNZ;jk9sbRGnCCd$1_?D;UL769aVWgG2F|0T%*I$>MYVX&y8<}__LC_L zDsKAA=yMR~1-gf--5r1Ag+VuM_m-^!>$9sA&7Pk*d1Ip5r_F;RF8&>=&1n(q--D)G z0)Ncdb|j|hS{w8|l&ncZIU6nV{bICRq-_gk^qn(UtTqQF`&hltVZq#<3=RGUJ8CtX z;@cnXnrOK+wYRjMG4q=7;>f>5W+VDRl(IO}Vj%M8AkWv*q$x~l^*@+5n8R9^Bz|;&h{m%6lq_P|&M2~YJG~+ssRd+5+mLt2Ew_(okL4jSR47Nw33kwd=?AIOlNJ4uo zu?p*)cTNQa^~jd|XXxBtBfJdfW9(<;JI)SmqT)0S@ebav@wM^(Vt@U8$u z7>fy&M>3t;9!3X5*knmC#ckH&hs5iSNS9LsFFbj;4cSh6%&(^MZx5#MgjM3|dR`~;HF=3bo*0_fyoua&$9yl7zXlt{?>(2i3ktK0wN0XT*()c|!o=UzkNsWzPby6|& zuRE!u`RC?~T&w?POvb@jKNkm{X%pVS)KYlNjwKhTy1N>6SZe;`w4}|h>$&$LBxWuq z)8B<*#7r_nzmWEX_PEfDV5s2kgMu@xkqIV8)7hObj|esppB~yu<&d8H!T4VH;%MP} z@Y)ud!+(<}qqL7F#I;x5Sz2nfwAG~+3kCllmj=Bp2m6UV4rj~^fRqf%Y42Jp|m_e1RN&uK6` zEq7go`FPAa>~#!3GP+kRvFai6B8R=^I# zpi|Ejeb=Y_G_~l_V&N})_J*)Q+ob!)C3ni^up34-cVli33=LY-+n07PV;>+g02*ax z^E*iVN&=mw z%TlbGFE=x7EW$_bHog~Jrm)C^`g}?;mTr{QBvKxIm_GItUvH{!GgycsVZR|7`425F z=5h4r;?4BQ4g7FvqXeu#)Yn~Ox5wV)O?pY$%8Nq7F3^&$cPzi4+r^Cz8YEAwlw?`u z>;djNbV5ANY0X{kHYUdgr*R~%0*79FE4GHcJ=W_zt5;-iTPu1?qEF|d!h7+`>-4AZ z!3cL6+2z`!-A`$!^s3Guawwz{S3D)0sH{Ey4pUgNE;bYcbOX4B+jK_-QTgL0{x2BeD8z#e$k3mg`rg z3mfO>cSA@m*`x&gJ3Zpm-=_m{4xcFmF!qn(-aZ>B!Wx@TXJ$OPhx}KfmY+u-X0R;Lsd@5XS;Zb2`NnN0ji7WCd z&Q<1DGG=10bxUc%Y2{>&@aN0E&}WL4Om3#=%~TRqbV+RAd!y=}sa{ub+=bm4|Uu^LWylIfddaAgg|Cumsn1iNJ|rh1oH5v4wt{ zN{{%&ZP#f%N}w6J=HQv%UN5{?sY&yjlBo_d#p-5Q2hIZKtttW`qxECS`23^x`HVsbh8 zeba{bKU`lt>xAu%)qly_=Ko@b4N&kY#1*m+%Yr`S&rIO2*)e!YZgdrNGLzuuMy6GM zZ#&*En>sr}@nIBwh%fq%3VK6*1XL-=< z#^ny;;NnTc*>YCylnIZ~z{DCRfIzV~mzyH8^QcD27kIzj(g0i3lsN6z+@3KUVAQ^YweR0Nc{iK5FFgEZvM^3#4SfXKnsPAF!oxRYY}FX+T<_rpExZj$G9`blGM2M*716oqTC{(WXqVD z24Pg9i=}9qpzi}f6YN3ldU6WhC#Y9TkRDC;xc+CVU>W?;Z-TnZxahcBWujA^<}Lbk z%P8A;r{KAkpuq6{Yv#5mG2x<}3P>OB!igu-Km$`Lfb)rNF`VZAh3*=t$E)OIn2+zr zTR;Dpv&Q26=DPKvkx`(}rwkqd-s#W{+Su2)M~2aB*Sm%5Snb$9Ob>)eV@h9tsAJix z^|a@{%sF1{KPGCBb`UrwA0MeM|4kz3IzNYN((!{gEFOUoFfpnBdKiIDp~o#`bh2CM zldxM?g)33H1Qw%IPE_ zyf{t4)@{&~FfbW=Ie>0Ti19kS2K`XK5IVL2xTOD%RWPcrMo)d02IM#d{o>aEO%7X| zL~`yx!#}VzO*qWD|2<1XGZT=gX)C;K+8QI^;r+6c;X%aZF6HVapM)^MccMHXMNxI= zKC?c^fApcBLwPdyr2^5krV0Vf6H)FrLi|k|LVuAVNXx=oj=RK=Uk|R8$Uoti^9Dm`JssCVXLeqaUHV)N7n#&uvy+KP&1bIq$1o(c*A_tNtB|1%~pqCnPyu$Kd5pT|O z`B$IQ)QhYZN?APMMe|_Cnl|NZA7~}=B5r+Exn~7B!qwdNTrcNzuqpTXNSHv=zhSPF zo9B(ab#g1pe9Yx1-75=-R-GJbw%BEgY5dcnveTL;`7&nvw5OVJ?;oUz54gB=$}Cy= z0Yz}@f6bKM{!m)?F4;hSOkXocKoc^GN6iFMQ4S{D;uY(ve!+9Cq> zy<7Za2U!ajCn?Ylkezd7CKU_wDT25;=Kkiyw5Gc)HS(u;*0{EB{r69K+y?9#;}&TK z<+^#Q)g1%s=OMFgoeraiwI%ICnuB)82#QJ+3JqAWhL!@^AAP&1;<5JbY0 zim+aw?L*rP299Q9x6B2jHIS-urYsF4l2AWtlE2EPX;dUZ@=HFa2Moe%o>=aHW<>8_ z2aDWW-0F8~455xmJl`h2FY1@4mvS%kr<4wm=jF zYiCXro%lZ*y#B%KrSg|>7MQQKgU8LvA^<@F@|GmBTHQ5>ofiaaN0hAtO`KKD~ks}o7{>=e2V?cr=cRGvRYT+;HZ5TG6)dtQATf(C(~5^ zz$SS2YmEq4=9}swVBgQ0ga_V$Jz|V@ zNFp#u5g+MBKu-#$hL@bBC?n4dnhQP)CP@(%P=s;2HR@cD(MN*1CBk^6C|ntcWrkr} zhBH>Sd$}!DK}_7yPd3=XDfK%(>a4R|vTA;#(3DG#ig9~3PD|GbDp90MQmIV77NKqz zsL%}PmLkgQ5q&g?AvoBhG`xBYzX;+GIMReKviH*kvv%rJ`1~k{pBuotzTlqA1yRc6 znFZ^&_g+LNOoi(j??;J_^g$o$G3CBwV5E+Z=i0t&l@S4o7Sy`fJ+(dZGSw-~Y+rKc zVLYgeGk!dyZC3I6DI=UJvX!^m1klgG+7s6dr&Ja5pSt^4SYJ4Qz+Wj#H7IY31J%4Uc1iTvnTdcP(4fVa0zbfpj(;x;m6wc%U({Xb)2(8 z3gIu^r%uwtdZg`(_l<`RE`MZopG7IIwwmvnFqrQ`#jkpTOO1_`-;Wy0OHl*tk&p11 zs{UbwT>ml)8K70qmB&&(SKp+2=?NIR-0~2B$T!xtB>K!&2xjh7jidcGBOqf6BMU5h zP8OHHupV2vtfg`{Z#-A-OQ2NiTGbs$7 zC@9G($2R07xhq=a{Emrn|Y zQj8Td7EIgdtjaXk7;B8*_iCyIb7jdrVb$_VqsPuN0`;uGIPP{Zl9x0(FP>E?+>Rio z%Y8Xrr#w+Pz!un;cbs?_4=ZzLcXK$$Ll>%uq1Hxhl%z;pK-4NuZ$*EvQ8#ilvbm|C z#E?{W7<>`g-$JSgY%-eP0H;l*nv)XEveFl2j+{4ey@2i_F!(Ht-s?*)+gc9bAr%|A zZhQ|$3ch68ESZET+bzs7TG4R>-^Q{wqZ{}~Md9WY?s6jVB~McPa2vLvD)1!jhlisc z!s~^_u}gb-=5FdTx(SEjs4hOQdtpNN}8&=IE`ww%+?`xJ_f&kdUrkz zS?OFW6Y{#fT-`Q*1t1H_RAQDF<^H-B`p-3`%Pxg?@`&fatVyLF{>p*+j3a zRJVl$t@n3^nd%Sj>M~epDQ#TO;9Q~HP>0Hc$(GyXYB45qzTDa&;JrNVDTh~fmwCM2 zzYogv#13)uAHw^pRau2xb;#7Y54)7|hO5S>FP`i@dB^0rV(2^O+|(Ar6Ev{K6!EP$ zOZwqdKxv|K^$9uK%7@7?aX?%esUis-ycemG`%&A7wF1l4(>>)xb5wId(PVC|%4hV$ z!t(u)((dj$$}kpBV2AU1ZI)c)!rv!A$uX8<6VVrRu(F;gC5u=Ewg)8%0$U}P9k_N? zs`?9Pr$E6>$3)b(^AMgtg``uzg`|)EevEg-2`eMPx&8Oie52rr7jdFnFfn?d)#YT~Vn^gb6e~eH^61FR98!7G`Rw=% zTA|3es4`&>VnNZ*y#jOuz;Dj)BR^dFuyJ~0?bsr&l{e0!0p+B|8iR>!l|yM}Lu$5a z*17jrp~yTpkyC55^#v(z^qG2iv4kMMH9eWwh4X>ets7%PeKuHc8E=Q-#0AyWyPt{} z?kGFq3a>q(XM{G~N9}wwza2IsURKPmUl}S(do-zM9ye~!6UY9ZYjn!RT(<7}`D9{zCaqu{!KVRwN>iZtm@9|gQ!rwT6~ zl2Q?1c6@Gxw=b@>%G4`|2u{jS4L@aM00xQ@AyR;d6OBCq)=JX!wa(1 zPlx4TMJXINU=C+Yl3VWb=j+byFXuxA%>N zfM0y4_wpHFZAlVs96dAuq zo9?t_eP|Wm7gb84nYobsT2IY!uu`cZx3@gHeefg+OSj=Z`8PL|sGV&e&UmqDd~XR7 z@`{LWjOAqcZHR=-0CB=%)?(1@0 zwV+S~X#VPYT|@Y0Lg90)J)HPDDgnoCoN=iCp@HUvrUgqsGBLuS8A-oOc2d=g@na?{ z93xajzbCt#4HgJIr(JkL;d{kqhx(xvcQBc^41zBe7d=u4@;#sKF(=# zbq??zXX%Tu29fMu*?m-~$Y)E_$oyy+m2+L0UO1Z1W(;^$cxCkM`5*EhN&P~Xc8Rnf zGT;fz@%seteL*R>DL8adxw%9Xt~Wv+{*;bwr+c!mQ{Rc3RzGd*7pK`#Y##q`P>pf& zJj_L+s>~d|DK$|xmi10jaq)HsHcWT3$%x9NcCde_bQp#rTf-jnXin%Ynd64|t58gDF8Z%Eb`oI(Kz7xoZ3=|4T;M1b(qREkoT|LA1&m#Z@tg+U~4 zF61QomJ-V{ww-v_o1vSiWm*@7nCIwK^LdD*MZAkS5NNgVkfh>ciIw^=Hy_es95jC> zzPG9u5XvN_AbN(|?257I_@w+uC{jeDa$g-@$5KG8-+l8}U|MCpUtRv7mFLM{)ccQA z9A8)qO%#3-8zOPi?X4+9Xry7EY4HG%ALY533;jNVLk(D9p@35N12uvJPa4nd^pj5= z#v(|*?yaK(CGvAETuhXYDeMmI`7x#Mp}6@jjR0d^H`-n>Z93HOnt@41t?Sd~H=yH{~b4(X5#|A#t?jm65qTjD>~wm1|wx z>Fj1U&n)$%CPc80=jkyd^Py=tW*E1jX9YGD`#M;rgJ7)vy)_j3CyA=DeGhNn{#T07 zO=HaxkI}Q;jSF$|cKk8nU_AiQzGM=jt5g`@2?3ea_W~luk_{|oZtgi`cOZwe0a};V z76Rjy;#6iJK6_cVQ-0*owg`NJ@a)oICvT7+;$(!=o;|&6_Gt*Z0HuZ%A2+pCb33cc z!Cb!0{OaePNKj7E?`4!cUq$sn?Zj)s z2edlRd_11S;_Ut}&iQ|vJ|vwtmb)KdZEhbQV<@#);r~y1h(Gg7|0hA?KLlz1&R+cw ze*6#n3QPU(SW-Vf;JSr>%|!gWB2F`0IVxmk7uE7=7K~@~ue+G9kf|w;7I&|j7W?~; z$K(6UNt0k#cUmL!BHL!`ov0y)M>egH#`-LGXa8Yz|^_G-A5L9on2xlK4(5+`Z@oF;_qyUe`rk8zr+Z*5I*PAj|SkTSghzykrxnH*eS%JY_Jvkcs()OJJ= zP*AwQC&|VoMx5_&O6Uo!Tdf*)m2JvAqR5o91of6Gy1Zb^G1LE8qugP~I#o|6L_(hl zXk=^xx_KHtNQv>FewnP+Pt*kC>TNt@Ds?AhE}-WCg;$*9;jfI*0~*XGuV`uSKfna? z#4I_nJhtEur${$sslU3S{)al(F=hJZz^twQPvcrD-?|n-f5?o<7(WtGo>JFOU;-aZ zk$HU?infQ8OuYnjja}|fbikWsm$%vtZ6mseY z4Wbc4v4)_)l=CWptZ|5=au;v&Mm5RhuraGE1(WE0*7a410Pu@o+Qn2+xq(6|S&s|UvQmMp!JRChGq zvzeR_Uo6SY*k`Y(y4m_=9bc&U$o9h~e+*-i{z467YQy;07FIkO=`ZRTVANuKP+bgvB9-)|oKaDAh<8FQ}U%`ZL zZ(GQ7^Y~uXNi7G^#8>kk*d-hLg6n;&8gUsR_@H%b2uBzb`uTqp_}8`Eciz>Rjs{Lm zJ`*}M(#4=gwnB+60Xn$3L6C127@>M>Q~QYA%Fh-wi?q`5>WhG9?`%X?^lhnYbcwxA zI(qY_4@Vm!czY3R17B!mxlG--posfnBVL*r{rg1mcg4LLO>27mD|A}eVX6?oQ^9|* z$zJgRgAPOYy*fB>Hfy_ApGi8?%!hY|$0T6Wz)vlg(+=xs=_DT8w*1x|y1RW5=qboI z1(6$C7S5&ORfu><}0JLF$`#=u>pEy8`4! zU>h%_TqFSCIVB!={e0H&#(jx=-c)I$2l$Y!kY0jqR~I>eO_o7|&BsKMs*bgR$))55 z8A2qtGo=}lg&Wx#L2tb{b+5s<#%%>g>upLNHXEpz9AWPoV9=&u`+11u_ppc>NV=gl zmG(q#AZ21#UxI}0sVV*MA2(_rv%FokUlscX#{gEYW;A|ME4$Ft5hbXY1>;g#KWp-a zv%^+V;-DKb{2?Hb5*LNbI6qkiXAFF}^^{Wf==4)S4u40wuM`(h^xI$A?P{ljC{r;< zb&|&Fs4P=3Aq<#-K3Y;7+}G6J=q1ntWPvap{}hz%p3NNO_q^Wj7Sl^Qy4V1YWH)%; zRAhge{DQ4fHo}cxPHMXi@s7UPY%Rk{+J{-V)OOxI)#AsocVNu!yoSK({_@+cFr~&? zDKU5qe#WsM?ac9pV(M(44Mvb_OHw+E)b91L0vqi=8dnG@HZSOXF=aDihqt1esupKA z-{K7Ti{!W!q;xMPcC-l`#9zHw1UY+bl_zTPIE>;xpfumL(L{#(5%4`UR16T5wUy=| zl?HZH%rVW3vO4Igf~;cNR_<$UI?$Z7NmAPc*+A1bGqfe`boMa@=pnuerFIi%w9p6=?3o5mnyl(+$m-> z53?{<(qLe2e9sMU7#ffWi8t1dG$hFPTcr=JzDFwd&A#>pWsL=)C+eLTKDx$NA1-LL za^7h+(YB?3s82g-GcP?+u_=8kkde3&7UegQ!vBSF+S^0v#j7*DsTH9w{N6tkgT-ip zWk%=L4zTysk*)nZ1;;V2NYvW zt#=NGW5FIgyx8(ZD*4VZ8ET5PAv~ew3+`|iBK6PnLZsTCo5#FtG$9~JM!~Q#BmO&Ew>Xk&3w^hIRC+QLUc&}z9spWp0mPvG#Dw1ql3~q4e z@iE%^&`POxozX8v3!Esr%G0BynyTy6Ui`G})si*Xe5a*C5^`jXt(++OG&^1*+`mq& zUFwb87I*j_G5QSKqrp;PSx*onBPiw$v3$fEKq^o;XmzRD(1|p!F~Jy`Obo6IwqG*t zL!=G8&9M!q>K_FpkFY-^k0!6Ab5nXC(>ERmJ&&I$PmGdPT%7H|A{i4djz%hk@3H9+ zl$Y9%-ipM?{rN!?o@irbuCc)(*cQg$f`|r6mw7GKh}po>-EmEyxxrOXEvxF!P3W6b z9-t05OeAl#>v$OvwJ&nC=Oe3mYLSbx>^`Qp|DvDDE_xQ&I# zlWgeAz^j;f4{ze2w03QC-jFM!7DBPd(=(*X3*c6#*TMd!%amP%RBL%SR2yFzLJ%c(2agiNv!}N2zS05Q zU$`7hH*qA=kW>~ge{&P@R@MGgOBmhQscI7)i}}|*4h-FLjMQ_ob44DtKZF0F$6v=5 zM_JLy@)UpDoAWQ5XYkD{?)@@VCk6_goICF$!X4Yj*hTapEvJRumX^pw_sqM_ufjN% zz{=C4zejr|M z`B@n81}(WtLb(h!mv0|gol<`KKK``4IRT7`_xiLHN85{Cz~XZZ>u_fgQcCe>YYP6A zY{h^A{>|#aA5~itY-PV3+iFex%c^R~E9?U-y$Qxq+}`8j?;idsI{eZw;&91MB>+kp zjUJoBlJvn@4wu@ph63}N%)xaawMDN|;mW!VlbTO}@Yo@MG|&|Mb34mI!{Jph4P4FM zIYErC1|J?`R(sr@R?IT2xqA<%HhiKoQ?_kRu@&fX70%cT0SS-q(fnqGD$iIQ0%P2Z z)Z-7cYN5LP!lm!i07CPFvvfuj51SHhn-vxWb?cCtF1Cz1j36I}RmAQ)9Z7!R=leHOMRzZ7r=o^P`(0g!%RyX+*VA7ISH?;!hC8=VD4p8nxT~=^8AVz43Slz=9S5EvR+%_v=0h&)36o8u z@xaG%8lm>1^yG)LOrp?i*Xw8L2OkC13WxTKU0t!vAX1*Gx4N;FydBCeu|%^+bV;QP zPWt&jZE-qYa(~b|PJ!VZijyDXj&TR;DcaMu@qx}oHfMgY^pWP;!UW;jWy4NO#u#12 zGH@QCVFC*qvLv5AWGn<0VtR&MftK^ghHClqv#zCIZHtM1V!VYZ;(TObwr(7NN>-#$ zoo`y_B8l^jFm>2ef88$%8B$rUFTF29E&|o$^L9lGfG2wPLGv`|1yn4ZD#~xN5PWm7{ z11rNPZeE6v-=}!}l5r=dg$=;S=H80l4r{YMe={3A59&BIcUdqOa{Mp>+~X8#VNAd! z5GTn|;dF;W=aUjAgFEW8xBjmK@sJsFa_xtVPRdgr+47H0oIx7-{?OE9vUWgcL}R@+ z8UDevuu{Y=&3+gol%)~fMZqzmAE`TA@jjE9SkboRFGE#1e-;x#Lmmjae==rix-~16?M2G_;)!tZ$a%J@{r9@MQ;_=T+R-)&A2ys~3>=Lt7++{FH=5eu zuN@oP%{F84Lb4c(8{3(;>7}o60`kC>6eFWKnYn}7$BD0r?3YV- z7cY5Tz}{KAC~8FC>{Ngp{>$2zW6Vvn)#GJD^&b3JUZGRiwq=dEY9*SEd0wd$4clMx zyhhF_tl9u(raT83j*LY#^1~rhS0PJx%M%fYl&{VwumP%r<`rt4BNFx()4^O!lk0e7 zY8S`PLMh?bQqAgl^}`7fci}p9|DSmsVY)>z6p})bL~CCCE2*x`gPG|JU4A*M%262 z+5T%be{0{`Sz=hT2})sL_}(n~pgB*^ouB!uEl-?!M{egGf4%vT2~p9Yjvt!Y&`D^n z4!J@>0aqn<7D9HmskSDJQ8nQHBfwY;~z1)|^fL;OAaf&N%*jk53Cx)#GI zT>9js@WQfavhkPk+IH2JvMt$&Zp-g>+~qWOL;5vw?t%T^k`T)$9d-&CG%Rto+qz?K zm4*+k^(3 zac93nBh6eSibSYFZ)m{4U`9*zn*#9|EB$;zL_e zZVrEr=MPTggATp0_ElDM9m<9AHI;9B+q6qQ!xZiyx(RgOnqBONQR- z(%JSbrL~t&eoq(nHIspL2Gtm?Kf#Sni1L^DIT0&`Sd)nEF69X#tR=3cfjA%#FS5>z zTg8cuZh)ummEBQUtLb6|;48aVa|p9gmx4MOh){BrY=rDD>>=EZ@PNPwjfmD;mY1ow zpG(k&_CAI)XT)LLRS&&KIX?EbUlfC4_1LXLMjcHrd?nyJ5w6?*EGZM4OGPzqTl9#( zhWN&+N3`KN@lILREeDKe3M()+JQH$dTYY=hHWM=iH#oefG&VY|y%+*u{*qk36jNIh zK1WB##MvLV8r5$>?F%BRzt&Wyu>l+Em}IfOW4aI>=0JhL8uE-{t&I;(V?SOHi7c=)(Ez%aGji;rm#I3s7%cIUVb_c}-6 zZ+!!hwM%+Hns0N++3!6q?or;iqnE{TiIBP3te*=?&d@wH^c=iW4_&En1&;v_LX zp#5&$_9>!O<_O4iSUvIJVf--MV&f0#bq>{;4jl#(y%bDAXAHgiI~)M$_+44zxM_@- z7GH1z=es&xBY9MJ{PYwlb*5lhYbAxju^gfzl7JdIszaI;xat)OZ~pHQ1NsIw^nGin zmS&FUf9M@QvNH7J{5x({%ZT5ZtM~OeKjLMt81Va#76_1VAKCMNTUseD#mVKI?y0O# z`&V>cDVHnFIcW)eLF82oYh}?y2OA0N)mi)xm5crU`hvsHwo|G>yK`E}s|`}rOxurg z6}67WnKmWv*dl+ZY)wAbt?$%5%4|TVmG-|l0s>iOQ|mAD%s|VAAtGH}cw1O@d6#W2 zwe4pNVF^}RZb}Ry<`(V7T>I;%yijksIbB`bIr?c&_|Zrdz+#e^MJ#)D1?wLTXGwbU zY;R;OxDFpPT1=TJdCOOHw@6*X5N5BufU&4J#nH{pNpanAAJFGGW(fQb!v95 zb}-a^g}S|OX{aTEQdHIH@^U|Z?r`;Br{DORvP%IgW;4Nf`S(!CF|DdeuJT0(Zc|jz zaT)oI1tSdRP?*dG!}JO5XG5(gPPUd{bF()F&rjKua`IbyQsSsGH?ZHn4hGj> zoTh%Gzf^3y>(~aQdgbQVnr#&<-V__!v99?HlRyj&&vnr-fqhhrQWJZHk9!Ae%PpQp z)FvZNgnR7U3OAa0&uu0i&;K+>SO7_x{c5wGZ0c4*)aIlrKyIC;lkPAe{FjeWH@G{= zHV(+-)Yff3YNx3`NjHj|`_Zh!*(@d&?=rKh2jqE83p5_KuNBm<^_^|*_`V_)Q>nK2 zGQa4q@=L#KfRbj7dF`q5M9D%PNiMiS*nv;ha7rSEm0$T}(S!I}m^9;(7&$p#OiX>) zl^P~7sy&g2-?!vgbY{_?d-5u=jq1eV09$0X=XBi#EDtPWWOk07lZE4Jv z-5omX*j&`N+l-MC`mMb@{eC+}?TF66NMGs6!t?UH!LniVl8sk(td*>d@5Pk^ zP@lui>86Dn4@N7WB=Q6KL@S(P!j&DFFfEfMWrP&c{nI>6O$$l)bJ^`-u}(ncer2V1z`viDZeuSk@8O|uvpx3En**(n2Fp6zgWqol%_QJ2H5;xYRSgJpD~nU|s={H?t#wsA zb1HFjULTUvWw%U=>&q@<$`?y|a}N(o6)$wg-iF@V?0%FvrwJuySV8US=#HkguS(I^ zvpLF*Ti%NonOU5hpPeere9=0o-5 ztmOqv3J;i1AlfwMHIS&3qQO5drYNUuy!q-A+0tl?PM$;$%b5)`GL6LIL_RU48}0Zi zS95A4Sh{Y$lSC8Bx9Q}+sX&>yQ3P~aP^C9-Yt_}cwXrMhxx6EdU+HdEKK8$D++PN_ zKhRy-w0?eaOfKnjx<8jpTn{2`3$7k0r!n2UTlkV{ee&a@E!o-bikd=4{_W)54!CoD z@3O%-V_iM6YtL+9@?D`NNO7?h^0nJOH)&)}!HO{uSga|}&yPoDs|EMPYmeLIAMkQfGQZvvrNA`j`=-8#c^*xt@hihU5dTEn@v-C)ded^2SBE80|hl zKG9E9%tN7b^_`Y*sFDQ422y=_`aX+KFSg_aZdBzm0GE&AN zlkZcrE2By$Ek)hR&`-r@!zFvp(nFCoQV2`k$zNA96Q0E5hm5mRG+~iFKpNbdx6Wk$ z2*{X57S?f}K}`lEI(4VN7h?VRh9;yadii!&sj9R>j{0+? zznCITo5woNc#Qv3^{?G|`>nQ_GpYgIVq1IjFKHrKJHv#gIx^}bWV~K5>OfQ*AmMC_ z&9DN`>(s_|bmeFQnmDB8&q?dOXcd9y>(0n`xm)R^MR7;ljs~4@3fuGo zMbww-qv-D9U>K?SCG2s}OJm~~ioH5E2cm5C2+}BRjxqVZeF(B5f3sm$ze~_|3OV(# z1Gw%*7(ofuZ4I{X$(oGJ0~G}=4mXBWu3FT=JY=+MkM*S7x<#4x$h(#WL(mcM_n@o< zd`e=v8(%5=zD={Y9h5rYS3aA~!TztqPN$b-eb!c$`zDE$0>VMw&lHz=Xu>*mh58PZ z>hD?oryx`MxsL1o)2Y-xSsUh57;wvvRj1f62RghRk&wi;q?&rb?GpEww*qK@#jqC0 zIV>;p*nrUge*f`*_z{?(-#7C#b|3G|$<}+HQ&JdMKlrDdE5+4FCRrg%bk0>k#ceXD z8)kU_bx-V%^p5|fTIGKkzXkC8XQx-EALu;JpWqV8%sPE^Ooe6#PWYzH5J?F6*46ve z)m^J3fSvIvR$=(@K8x9alheZ02LRtXM|!yC}W2jRo?J@;jK-rHIxPWu|S9sg&+uJ;us?_Vc&Jt%1` z4lpnd1az(~1o`9rrnZ^@RGP z+p}PoXw26%>%p!ijg<~8YvX40&GWzj%lpgAy_&r}J-e}*h8!xLot&3jv*U$IS_Ac> z_W2q`?9*%uU()>`5SuK`d)|Sq!x$@?h2J(NhN)>(p`AF#PFHxG=`Z06!*RB0z|$5Z z%@~dFe%z~#@m!RPsV>%ju~bmcoMIdh(ojZF>ldi(&KR~UBo}c=o$z`-P@#0@-zueP z2|j`EkWPI^JgwTz7&>||fzqbcm2{n}>KoPUceI1Mt{+At9IqX6A;WKhfKyCByO^gl zRFb)LZ+!3_gVouTYeAc+eia+Zdm!cX33EEpT=-CDmQgZm+A?h=)+KQ+Z7Kc1ppb|~ z3N^O36Uixyx!0#kV_q>xz$S~GS5f1YmoKPs(AhG(8w(dpRilrclTXzLbx3GEnkV3Y z0}i2lL0{5F5CZq_aN1zKYX!RUyVj5+iw~x90w0Jz@AtvYtHQl=`YYaFt(HHv0#6TI z+|O88*S*Ry{aIuqHu;I_H#K?+opRcsv7RZ%W@LSPJfB8;u#}PfY3=0&#z_$jtH>=+ zOoG~6*D34|g8QKc;!suR44m(G0 zW|^&3GQ2m@a!sJNvRci8MAQZ_(>exX;}jj?@ZD19<2zDP2M_d!7XI7;q$-(}!S& z3y4n{@Yj zmy&zsS;YoJ_qv)$6etXwYlkaYQ}qCq`gRv+3#ZgX_#+Koo_7ZZMD-Dk%g)72d#@(c~$iKH9VR* zA1f+b`*(LkHUX_9tNXeA>b0_hUC#1dE%)=Q#$Of;kEx|c-ryXV`nSAPIDFgJReh3G3)=t z01&pwwRqVC`@{?sK|Wiv*zZoh_9XD|f$ZT>U4X@w&!>t8v-esTUZxE|bgQ{JDl}wn zhX^=)cC=0MlfwAnqge*R$3+h?{f5{etQ0kdNmnpwQ|cgh6*OdpUdivvm25_su8w?4tU}4FHD)bhn!k-cd;rpmc;R7;$)KU#sT4QK7#HwDaUuQfP7JO2WpMd@{B>f)nL1f-HUb{A=*Z3jEj!-q+QFzYV+a;s(#(dyRY z;H2^DhyttDo(=1>bMb@KSEzCfC9?6ylulogVMwD*|I%loJafBwxfD!sJ3yq69%*3) zY58NxT*nXbc@nsnD*mV%Ax39cAZ~}aZ#EhAF^eh@krxxZt2zg`l7SjD&plPFz^Ld7 zwFn>0YMyA<~V_vT8>J-QFz#_FG zH;|xt&-zl0ugg}6C%F(yFvx*$!_oeyEAQG+&jr`Q1wlR%T0=wvcer4~i6}pWt2?>J zX5>M9JW)9Qtow(T(j%wlwiQa>sKbY+7cchaUodbm|Be|YHb^MwI7CZzTfSk^-F{V< zMSc0@*Z)J^dxk~1WbMBa1Q8Sg1(7IGi2~XL2_gz2K~W?J$wGG%1<42~L6U$Zn;@c~ z7l zx`tZ5M0q+383-kUeh$xpRtof%Vx({I$eq$|kem8xU{DaSPY6vGvZCW1T!SYB#iYWP zonTY1tMzUPjNXn5g)wF|^xef~olNK4>tCl!m|%m6P~Q0$*s{^v!41=-n7iX_$l}fN zttO<{bFm|}8ffez0p9g$aU^ZA!(3^p7^|Ceb^ zfczk~yJXAGZ7%yi5biU;t6HHWu#>adH5}b>u2VUGFx%gETkR4g23a6Gh>r}n^f2@alJ1;E})vG=3 z3;u+()|avB1IklCVOv)-Y!pY(2Td!GOwH7;XS7r(6wKXpTJWz&zIFuH^I8(=_&i6c zr*SJ%kLQ3=2;(8MV!;EoGlvzdo0^<=3UxbEf@hglfXn4<$PLL46@GcqDdgpM)}9rZ zeine^&hX?+S$6U=)Y2`PY{wD{oe>;?A>h_U&c*to8U=QSnJZ&o4Wgeg=9=>{h>Fgw zgYwZqvRHOT**E^O7fp}t=(mc{a(yfz(>I%7=!HN>1lc!q+-r>?vT!^#U2UeJ&stJJ zduG^Oh<)iDV4a#{`E~dGfY3sQSPwxq(-1^)4iRW?WhXxWN)Paf=YNrI+J8TMU(EZt zT}%(g^+@=t_7@t>2V%{)aI>b=dk^yzxj#PEV0bEa8`5!=ZqZHv?m8YU+#)1q=QF>! zzQ{k!2T-lXyK|Mup~4G6hU_;U(BsCu1)5x9PDMhnw?@7|tBHdaaG2|+lx{C#PKV$0 z^dqMLxAWUcGaIiRsF-VD(cHvaC-CK9afm#)kVm$8C<$nV-ugLW(F2IibBE>k; z?YKS_CW77c9>a^-7k!3oB*qIbBo%IN#|4TJrb9%EtvzIn`8;oafQW1>Yh=4Hdl`Gs zCeg+>YoHI)9hXg9k>6%#mG|86?LN8D0n}_YU#jlET{koAh*bL4ef;wAc7~l3?C@N6 zxJ_XDkM#^t(O;oYpso?o*Az7%exn|Piaft;+;hK_ zXWuLRS!kJ93gEzR;(7UtLIxTBEPAg*=O?`t2bRq{O(>t%uL1$D>Q9Bf%d<6o_Z3JC zQg2NClsCR69b=Df9Ip!zxt_Yy92LzJ($}{O>O7H0!WT^x#$8~V(lHkr@ATqXPSXmL zN9{nctc8ek8eJLhF`GG*PLH2kh zESvNDTOonCkvx+W{es>*!9=^)Xc#c?Z9Gh1kW7HMedpNrH0k|`mxT4Dmh!0}#Y33m zWbKv}Kiq=FcJxs-5Sg69p@pTQ&uZ}Y78{&`hUS=RPN9kE>{P1#^tIgh7Rri71u7%@ zt+|K_huvb?LIc{&1+3}r==;^z@e6FB*+XAj#RuAAehfr{+c!-ew(Cn(FuG4IN$&PC zm_%Z~Z5v7z8T*9}&}kC?ZF!ie(-ifJvtsFm34Ga9$gM9&XIuI+9bG6(g0fd<0Jt}T z!3m*13hQA(TCVhp1sYQ?CY)HnK%aBJGe5D)v7Q3Dw{W8zG0aGvM4>J~too*+dUS+3wqtZ_@eRYSu- z12!$63dsCH1?y&Q_3LXTZ^TH|i9X?PQ&z%NK;HbQVXj*Gg zK$CPSv>J?;@eIf{<%(JO%)_oh7Wau{!zd|0_;H%i=Y{t_-ZG5QK-r=!Jgs=Aip~6Q zHaXUB5-)=*~*CPMwMf4Y2B4AWR#-#m+U*RWUx%hV{#eev- zoBO3HusOn39qsNaU1p^DaWL6=yt9~7>*aN{x3=9meDt+ttQ=nrlp1bTY6V_}G}552 zfHd;>crhu-%iQrJ9+uoBvW9`I1Ln!vvHQ_hQ4A=l;c(%Nlb#3{<|S;GbQN1?r|tj+ zxD1sHn*56HTn>0gW{ z@qU#h?Hp_nzqJ^?Y5cF@@$8JHxy2?vyqY~fxy+6Uv1<KB?UK3@HrsDZ63bu+lkSi{}V{|BfQ~|S`Vpg&G_)? z!yAi1uuoPQ^J~f}Y!lzxj1$$B#&4qe@;=Ch_q%ggLI7y}>31*%BIGT(>uuQ~oxfi~ zG@`eq(eQY0nrzPUq_X=s7=5}1%e5z!xLaR;l$*flz&;{zwn*^SW zw;Rkiuti={K%Ses;I>-wA!nb&MQbT9j+^~29-;W>XoJajPmv24aT~t3>%`NDULA11 zoI@V;(b2j&%i`eDRaJO^vS z(&Z5%-0%&o>O!(df_E8gs`;PauLz>69yf4Mqq#V7x6o<+@_8n0Mo=PT$T{e)(MsC~ z@&;~tb-t#E2SFyYcEG>_=k~q%MN1t@IQAoOUK3jX1atpnHT}ng;z3rr+y1bet6?qw zdQDxU#e?!oGJ|y+Y-gIrE|Ur9vgC-~zay(eazk&N*FH-kvhD7ctx7yu*ZE^*wJ_+^ zjXPrk8SetkpA};lNF$!q_o{WROO@KGX+p_bP@RHnFNip zLGCpGOg8_!dlnlq3RaNOQJ}H9dhB!41Es(@27NW5?@QZWB&VUA-%IbhXi?bcscqzG zwaRD%dL%l0K>5LS`UWZJABJonayQGBcVDaCD{^n($Q*E-=y#z*cxPt1Lj@5xUG&Zm zw9zgy) z{>A;kwz=r@8|?l&2E~XNc3}R^pvkuQ&aNoow~L05aksP=F7k8gc8V zd6V`}^Sv*j`2YOn&%HOHe6b^JifcTkkGcMho0}qHmPR7)5BQX9+&`GW{uNl2`UtEH z60+>c99E#6xPFY4k@4CckdZm=F=M}16(b{kJlYF<<1W+6?&ioC_t*MSiVne$w1~p9Kh;86c0}kurZ{lqpn(DGxQ;0UyfV2EW7T8+nc}-I{}+c1!)>08 zG!@2oCG8aT-kYQ4TYrpdvLKX=d&$CrFZ3mLytvezb_x>Nx0z7gM(d8=q_8*jK!72o zjRv=?C)G)IpaU*D6c#GHW62KXEAzy>Lw*x+)jqV7&MrN2emo@2lhxjnWcf_H+Y_A> zBr+gj%ZEPH8JtSG#`BfW%r_PU zq1?;Nblv8cTIreKj1?>ePd$bBs!+_0y_5Mr&|`MPv-J-$&8Rx@TGh1C;`ZR2;rqt* z-edmz!6{-B7Fm}WJes7j0omN+>C<5b2;Z44wK#yllB-)4)Bg_b$9S6vfulX;*thlC z9dXu++}hQWG{Ms5k#a2K%B9CRl4+!V-F3x?avoiV-5DE?Jg4i{2ed@aXUBR!rHeZ1 z3LMg5jO%9@n_H0u^!>NnJU?x{|Eh0Rfq+DA2hL}0hrJYVqgWh`<%7%T8_}Ui%cJ3o zS-P!Dbwd(Yijyi0DQ0T28@(NO`7&|A`n(O5{OV&a8LIt+CaXwsYmt>{J1=kjaG6Vt18lbiz2pw4m zKI94YhVXrq4pYwy0%9B+#hrZ3%G^ril=|Yk*pX%};}+%j_p1dU%S~Xh#Tc85^t&Sb zGq38NfuVxj-pu*>SG6QwrF0vRV}Xxf+Zb2oJ`Xs?k}MlA^G7Fx!Kf{JzN6!64|P{5 zV(a?NGrgn0_TI)BlkVA38GsJWEDaqad~ zz7Jhmr*Z^%iw>jgb%Ov=syFVmng0;!$NDV&m1T{`q0%y3PxEhu;YXBQNDup1TsR1M za%nhYwbDqWuBv?yjawkt@ArV%3v%Gc4bqs^4^mj##pD9F3B{b}XyZv;% z(%oMyBOv+CnkDMTp%W9vcA^AjDZ@lBSBUEWNo9=xit325V>hdgjufUi9=EwBRM^I5 zBKe}V3#^W(@GB2)F6(kL?uTUz{L9K}+b}7l|E}l5@-oLL@d!3>>AvG?;t;@C+ZefOE%3k(Vu=L!PzQJdR)CwD) zThrDGX~d?NUw!Xd-Nf(=cfML3*=>qykbiqs9Jo5%e8yL_3Vy8`R5KVHK6eW z(Z_va6z#4}A_~dT7`VT8l(^l0*>~g3!B*b{iVPe*4RRV^@=jOO&>^ex6kZE`=XC6F zmD5 zeoj2KLz_RKF8|OjY5!LT`p%r00OYQCv(_L8foe|RkfY9zk2u`dZn>It#CEdPQx5pf+1wA-90SCP>lQ>zBRyIz%+5#i76C4rV zOO;kHUF8FZ3-*=3K{{K)9-&x$#c$2kn6oJE1^K*VPDfuPO=IYZZ4zKkRZK+bVR3zX z3MNaMl<4$yYlz`{@gL#l6M3IpScQN?Rb6i+%Jg}OgkU_ zh(qr7nY*a28iocZiu~|rCMY{-LGfE+fjai!LKB>%_1&w;TtAjeZUj9za?JL3jiCpO zw^{M5;Cl$QjR;M4_{+nTE;RGVmt)cxJDp1$7Y-(hWQ67`xaij~(Qz9G{KQQtdn!smI2-C>MDO>}A%N?pO z)aBCr;#48x7T7HVr58+8^;zMk!Ysc{3uzIvYzyX+uyZtJDo-Zq7ij1KvM z-*D|EOA%_pqGH8;i4zC3tKC$mq#CA4%5RKOjyoUybou`Ahe)E0o_M_x@;2?fy(Gi@ zv^MBqmZ!2H?h?%sRBOVOe3ZU0X1+p&~}b4 zN-rEdc}m_T3lZJ2MS{o2Yj4DO@Z9nmx1E99S&B}I7f@|!g*Vdf-(6ok9}=b5d$z8t z<14HVx~v_p0>>-^#?Z~E%U$WM&ll!DeVe;xTt3~Xa`3HG`e3Wp;l*oN2uo*BS3I zG6QyrOci17+fDjtdc#P06y(NXX$|K25`u>Zf?SpNM)TwodL+m9jYTAmd4%60-RvTEjLp-)Ztx8J?x zbAn#)6QLfLD9i3%7#P0YX6$HwGqO!MGZsp~kt?u8KbG@p$IU|>v<|T_wCf~Lk3LQr> zwZbdp-_Vvo`+|&f4Q?0unVk!)Q@VlP&o2pe#jT*>o750Jh}9(%EN^aNG5uP4@I^Bk z=v_TxjQJc?tQ11DXyu8(G4a!acBKwLx&Huas@H?%vPtd_pR!R!CN$l2I4j{sxz0EDx;Iw#HeFl3h>XvT*Zj{$`}6%X z_3O?joSL^Gl+bv21B}?%7S4sv=Ins~C7hT3j1T9hs$df|g>r)waqTb1PIZj`I__Fq z{5|SS!*jDWo0OeL(8)U)0#`##4iS}3Ho#^Exz9s_b1AIWa=)_hPUH^1MyXI!{Iqi9@0|uKekvB7=%wBaI-9a**zE*NtXR7bxg%|+u z%DTmyoj}4F<^e7=N2No(YJQtgMfFJ zI5nL=Dvp0C*1u3w5X<$X{bQaw+kI8X>0XU4vz4mxT-oo85P?{teS0uhw1oJtbXF`9_W3XqmF2)} zc-k}GvcFqOtvCF>ip7;@htFQp4p7$jE*W5SX~rU&V5wz(AZzra16h-T9#d8zOH;AE z-=J`MzTp{pp>S{3;<*h%+Mw^Ix5+DNNr<|g&K0DXxU8Ua+AMEXH?=q~p}OK~3$-uQ zUw*FSSgpyhRe2ZX8)7cK0FnjO!CAG22mVQ5yfHv?+V9FPU1;!`2ij{zXOcMQE4Ebs z^v>8YEvY4gf;WqWgdJ;kMHNOL(f{<7g}RBI66Kvt!$fj}VVM?-4`cTqBtIiH-4i}q z{KT9V2nC<4{9vCbswIn09-f?MKQJueC|7u%2wZ@>DyParnL$cX zfCw%UAii4DX*=*hVNAQu{GFJ^f@K2)V@`QJl@1=LO9Tlw;y7QhC4>4!r9A*LrT3HB zYkC;l4^LOKSaqHTYR<)B&dWhIZQ z8N|nh@D~wJ3B74PH0d2)acC!$$+q^SO`aCM4uyr+qz@)rkJ9FfE+C8*^^gUFosA4U|?69NB(*^44RoN~3q^Szho44Hwd*Dz&^A zOo&)@!}8<1A)!8P=JMlrV4B&Rzdu{SbkE)%IqUJs!x2xY^pEuQ+!^aj!=T+#<%Wq! z?`7x>Oh=zj?Qhzq8Zb#q>C{0c@d$3sgo`~bN^|>k6l^lG)Bi5|NJoMpG$W{Eup+aJ ztNv?vzcTck0@ge|p{h^;4Aw9~qUIUj6XJ_}^!wqZ4dF!%z3`q|2S*)A07lQ`2wG(<5Szx&9sqf+bR`aMHGH#B3r;t>ul&m*#bTd- zcM-4FbKM@5`og38gTa%Ej=>IMz3^l@Qxp)rBb40w2Lpj-@9GOQ!8s^MLo{Pp`gC8( zJeL-5b^UQP`U}c0uQ7L4iu~cRIbWrqADruMzej7L)uOO7Quqq9*-YZ-59xu_p|=4u5a z`8yIuB&gq=_F}A#+c_oM1~3)3+{?vfKR8RY^-;*4iph&p2$lVdi|pzpd_sX6STL`= ztS82vUVR_*~?s{Og)^;Ue($#C&-{97O&b}2Sg*gtqDDPsc{0riaOzm}ark?MG}v(wN3)UXva zJpWQ`=BwERt%8&^O>}HLDc>hTKuLX~B;a+v{)t|9Rwl zI2j{zod02*PN*QG+ZOLue9{$jk{()$1l=xX)*^xPbbc=lCaInBo}v>>#9kWNr4Z8z zdwLxoEE*Q-U2ipKYj*^_rw#UFUl``lEfYV)%yQs39zw^k znRnIX(b^Wz+BeWs*?Avd$i`QZLG}q>+>IM(i=cFoRh<+p!^Gr9CI~Mv)+MtC0Rsx# z@d_I&Z8OAt@h7&r?~yR0L^y}ynn9b7usbA(;LHcrkrOAc#NTyew=>&e>vni!p?@BY zxyVN?H-zYJdcs0f^4t>D$v@9o>?%q&`I1@>NJ<7!`W$+^;=nuNt@dMqh{32496mOb zBp0#o>&NB`I`ziTIpEi}Tixo1E4QJz%`YZC%$cIV$cl_@2GwO@n_9bKy6I5rs+FCy(Sq%&Ny!@(P6e@Xp`R}GdVXb)6 zp!0asAm#EC)1Z&|aFY>l8Wg96Q#LkYANkWX2y|YCbL`%{SH0Szm?w{;0QFhrq#mk& zEYsp^v!_A3qN2fDik5%Z(gm8l{%gTXx%=_a;rc+4jZn?MWd=t~JPUX|9u;XC@<03d z{MWr^fT@We0m%6NV;%T!CE;50TFSf{kdt8GDn2^gg?_BB1%=zIj>mh`(}LPVwT8z} z4!^c9AOC70JYKzxnT{2G z`{!X~8ppvk!KXr>EDisrq{**;tb3GHENHq<2}Bh|annZMQ|fnMYA$}Zb1Q=vH%s%@ z4@R3~rsMAI^$P`}np;T@d31;2i%J99*XBEcg3uXjz`fQx^T#C?Ev8(sk6ULKy)DTi;`X9X6z130~>+xqodu{W!H6b z_z`UjY`6++pqtw=(q6mkGP-eGrv2w|33ak!4$E|Gn6z}62u4=I4yOc}Z}P{YWCNb~ z&s5t5nA`22GRVd_(``}WdR>Y^1}3N^l-g)IdC)^By{G2Uy(0Zm;T63!&)wKWxK33t z$8w#AXOv7)qZ7*X@`79y5fJqycH5y-3WHLm5=5ev=f6Dxv_i>x+*=mjzb9iswh&IO z*JYPE&NFR4|Fq=SDLI6pS0|G&S6*IaUqBaElOd6js^|^%>EdewB%_2aYE@cxIKjE<1|kWANC}~%7Kx_D?^>WS(&TGQ`4!8Qd6tmXqjT09=LEz|V()N{s(YPzR-=p~ zPP<477Lm-uv}XieuQZxoqV1<%AFUz2EQo3#vv&ErU2RGZG7UdP$89PJ4~+;rFIU27 z&V+al?kjCpQHCqAaKI3FM(q){*ZO;1Qhfo+ zh}xxfStU;n?DV(6Nlo9ycVZcPfW{M3HWqB$DiEEgQ7)#y*Z(!;^ZD%-tpmRLJ>cyG z(qZ3@zq8TkCGSAUvhQ)||GZ}`ug&;<&Y4G?9lkL_n3@QHutTm{w)ngE-2N-V7E;#5?n_N+9RH?{tPs;d7( z0t7|Ww5kt=88h@;B^d+k;e*@Kulm~}g}TG1nj93IvK%cvU8aznGFxe3Oi}v>R)Glv zg2uhb5Zjt{m>J?OaG>3%lxL+0v@&kvwWk-~=CxQ$iZ>?ie}>)DPW zZp+P~+nFso+cBmEWkvMqLi}ywQL7s+xW0^ksjIT1S|74*+V=7b znTKQ2?fBCArK0i-;amatWC`I{`_J^&C*$mQGm^*FvFYRZ?CUh#0nssCZ_=zxIM&0?n$UD~ zLluFNL=b`G2#I$gyh~Gv9H)_rE6^2I*Wdi5o5zWK>-AGBK}nITxGCqxa8pRdv&9O0 zI^9L%SEqTQ(?DXf*VQ}BQFw7%?EC}d*Cc|WMc41|%o02&hFs{Ygb#s-&*Jn$^jN;V zX1wCU!0DO((Gp`a{0Rpvl8|{(<*tuReRwYpPci77d58&o91Y>ax~Q-WktnjK2~rEc zO`&bRMZ@~G1GxX{Di&Vy$bX~ZBE|Q#yCr8Pi|6)*Qg4Epb6s~^u?Vy zYq8pezVS}VW_;3uuNvZ-KI-3GnEir$gYQ~~aUE#&D;XqHc*KsBD*BM$`=rB#e0(~8 z;)U)|ZQE*rL+SJ_QbAF9QM(kg>>h{0^=I(*TN5_hT@l(b;cD344tCx=Y@rj;_rYNu z%{WTM^B0_=Vy?S&BOF>oReqxM6%rHvlJsSc!Joo>jvusk)XD@ZpTD!#?YwvQY2x;9 zQ%vSK)qF(EIrP|$l2)2NhiRXM%6)wQH@^}>u>kUo`0 zUn^$C#7wZ}tyg-0w6g6GUyUPcZP)I|0EZ6?%eAJQf2}UHfK9PfX-0@ss_tDnnJifd zsJpGilV#uGvb|zT_`dv9N%;RwZOT{_hgW<~I~;Y}{louPaMI|l^nakydyV1bbOr@o zaYf7n#8n{3nvIlT`>QVNpE}YfKT-jy0c&|4CW; zkb9L}h`fL&-!vL7xhZ-D4sRy1EG+~3SnM5_v% z|679kTZ4R#H)k|gwfRuwJ<>)=(*`?jTaW8MPMHC19pWo+aMX>KFy`YLsQ(ByRYM|~ zo?TrD8&vb6$#uD*6nrOvIa&71>$r6R)k7e(($0<>VQcnh`+er3oma}XEk6GtBUFKt z>?`9USL{(s_q`5f%Ot2A5M=7}AERW1ws$WjcJ;Jcx(f_skBYGQx=a367@mYLC(C}8 zGiRa{gT}~Q`F$twagZR(nE7%|lOCp*wq`gfD&bwS{&7p^*YMq{3pZkGxSIE=k4p3r zmvi?jd&vLx1oFV%r^eppdo2;S*SI^^6ojTJX3PiFEW5_H*zU`?y!;oxE03E2h1GH| zyougOyfv+%Sq{pi(P3SevDfg}FKvha@l2yKMDbTrzx?^(HsezeCyS@ncu(R#BbkSX zqTeRefOd%03!!fxV|b}&(N>_!RPRB84DG9aAU}R8k{@>mC>_cU(mz@IJAhI&7v|=H z78vuL!$+6mM$R^fD%n1Rq~AJK+m%nO$)5oyI+e`};)QYsDqGAvTeE(D)5#FiCbPgx zrbO!jTFE}Cbj;dj4Bv!NTt9^uViEkXnVu(o$)21$=BLNEZH3>G*fD8*d_^u$zW??30zI2SOS?LMn~xyGdf=J{6&R`iJ>#YdO4#Ab_SP|n@2%;T6>dB zNdHkF#o{~#Y=DAnX)PnOGU@q9(Jkhnzl4SOZ$Se?t@prsw6u3hjULp_-yi#C*cs?) zntB(qC`P<8DGb-G%G$H~<3^Lz4qGhrmpkIxplE}o?zI+7Pf1rrlpxFRz21SmEJEv-BcW>+I2TsLbJ|P7PaD5V5t1t{p&o zg_v(v_RbcaNVT`~=s(OuTE(}nRjM;3;LLZPFFV8-q71&h^U@)TbKi31_~$k8w=Eb1 zThyVkT!d2{rDCN=yzfxNF&vGw;#S=z!2Od_ZpXqP^+3a>iT}6*vSh&I;`FP*{7?54 z7d>l;uf>^4Vzi6DT79Up8h>MGkN z6a2N=Qjs}M#`B;l*28yx@^~L`#mXtDUY6TOWQPAsp}+7yxS(RM`uq<9SQ4;;nn=Lc zf1-hvxcB4VM}V!*|G;&4CynfR4aD6Sdb+1lOw;rE4yPIU7KXOd59ea^>SEkm( zZ85YXCp%`zs|)pL4=aHw^}KxB&E8n5Vr^;9b7E@*wQ5?Q*OS9pb#mcgy1Q)P@~VQ! zf6N80k{~r&)We-$)A398SE5!*CIxik%7R>slz3L?5$}w}CRy$jU5J(>TiyCV@Sb*c zX8-$Ppm^9=igFXOy-B-_q6ekt3(yZyLbP;zU={Q`u+xUsw`lMQ90;pT36~qb*EIU! zvNO6SwbK5ff}}+UxV(V|%D1t6+>w4Q&|t;VEknVzgKr z<`a?ovza}<71g7ZM@IH7{2mRpRzwdq{S*JA2U?&Rv*lJt7qbUwzqDGf;uyb7vQMK& zUHp7U(Rt}q-^LeJ8>iC2CY}A-xUXT|o{L1>J(E#|h2Va0J_zEPeV0Zref*2dvF5c% zR~Q;ke7KEcOv@uohtlDDJJ%Rcy4%%=NrN$V7YKtsmGfu3DX= zM;7gx#)s+&sIZR|$lIVU{~<(lfKxn6VhzHGqTi|V2#_1l$Rx;;Qj8m-DR7s(fxaAR ztcH|%|LQA>z7vv|5Em-H*jHcG_DVu{tA3AGs){p5XJazma<*Q9w41Y1XcDfqhUOnUps_ z1FH?*eR3(QB)fH?p)buk%`A8;e=;{5Wtb6KPPW~rYznV97Zm$?$H((}#{>7CEe@f# zs}r#W5Z}!_xBYyPZYp<9J*exR6}pFvzCtT(^PTXOBcOjj_T#4ENP}3lEi$C z7Bp!UEq3(~Y!(Zxp`+0=r%CV4dXq9^Kl#O$Cm}XqI{nv{Ao@i?dq>SfR$1ZHGD{t8 zS3NTICl)!c3KO~_osWJ=RJORGe< z*98d>S>U5NZw`5?J!~<}Ty@1oV5#9@XUrD}`AyIuJ!k~USymB|r+RgHAWy2=5-j-}bVW4_w-d0yf3M4lw z-Gd973jef?c#A8)rMvxDtg-@AWAZtR>8W8DX0+VVX6+P}_Uv+kLVC$S>F2Cu&7`RI znF1psHbgm7vR6*I8O^tO_{7f+6ry11_|l%4)8~P2_L5=&@fu7n45|alYrCU#jAoDC zm*B39J$yf2S#Y{VZf5U>4pM1ts|OO9b-g5SO0zb3Iytl*YwCUr^J=~+BX>>{r4@MV z?4(F@sy2cEuqKqi$B}l(vqFu`o7Q|D{vhMWI({YK5BYna&yaD%vp0XC(R&gmy#wP! zZpaA2sNk6tA5hd95pTMi#U6Eh1>wO6cd^cSHndH`eM3zqGJD4oX3`F(j@9Kinxy%Q zmx6?@z=KM4Si9D{dKRhWv(6q`0P)uB(BM7uINTg+ct zP2c|&mCKt`Zb@T)4hna3dY1ndV^b^dsU&X@BLbmmEf@QhEEzv~n(UB2i0-DD(!6g% zh}^_bz^zD!og2rqPZYD*!(_rq(tXdg0SrSf^FtDV)7D z@UYM$>WBAz&YDYSh&p^|AmNXr;V*9aE5REGUM4uP?%%!Mky+u^;D9^K;C7$B5O2n)qD5^C^25 z%R@N&BZIL3+i?!FR6}ycEyUAasvB~FKm~mu=(wEacv+u@|8{rC4Bv-aX5ZH^?vk#X z+WlV=t+pS41D`0T9LjEWj(k{YV?lmDYsq53{0I*F0y;#KfR@-A`?!%$r9R7i0QWIf z%VL|jiHDgvlCOd#Vw)YqmN-!1tyeO44LL<~Uk_eCMpc!!rn8p6R(%zm7xFVhyhX^h zpUC1|s%LmwqcQ)OxQSgCS^Ce}~n$KYazze7iO-Up#+ z_eodYpLeI<*GB&{5(X|cNlB|RZ^8GPHkRA|-jofV`UsyS#1qsC;!fg+|0kETzp>N` zIf|dK|5btpVuZXS`>%Rp9V3B*&p_++fI{ghV9RIv zE-$mXrgOU!8&lNT_b`cu*iP&N!Swh#|LM;$*zL}w?*+egvUuU~a~qykFJKP*@i0(~ z?;)aQ53n&r#4>?$E-5Kukvi#xs{o*?>&( zPS|7UZIKH#vEAhf0&cIzAq++PJtw6vFVPjB@Hzc0qZyjhdMrJ>r?jLP+BZAmF=nOslrJa zcQV&{dt>-@Ky%>*iv|@qJ|-WvcKI&sMrG$b0imgm86%7fWjurl%AVWzUp5uIgD%qO zX(7_1-zZ`?9yGWABn1Iddqf@xik7}cU$HH7&}EL`8wYr%T)$T8CH`kt%W9KDJSC)xPAAStbBd8qAST^YU~b@xNE z0ot~rP=8NJ-(ARrLWaW+o=UP!UttXN@eHp&Ut6qmx7(!NO%OSqS|vkv;aJk9=4|efUO98F*i}_x3B}(+U^T8l>Rz zTgh;l&mIW`aKzQu$_l*Chv(j=`z+9f<5=EHZY9>ZO4Cj9bvorNv$&Hwts1i#&7paOB}FEGF!2k};0 zzgv|M|B{`*c*aNU;@WG_?R}*xZj;6HkCpmI8|*LF%N(JEaFGFWq2%!0XY+5tSn2{eg$%e29c&ws9!*?>DV!@WzzUQ7D=)ec#42hzgsInx+7K($~ zO({L9F?bBrxN$zcB+UwEUtKDDY?*82lMju#<-cY`Dj$eD@_Q9@zR)2t*)j6oq`C=D z)!HY{DL0oPM2|fsnlE^bEcR1TV5;H)e6OTST1o$qT;Rhypo-M|do($Cm3fMtIbb3G zA7mx{CnWiV=!u}@|7EhK^?N|p1e^9DLvwJ>a`M*m9l+V#Tie<->Ll}tC3GLWkIey~ zOIM-VNUBzj%9|5K_9(Af>e+6d#ypsd~6|bBmV=qcv zSqaqOTSCu{is9Bq8?dzWdY!sNu9Px*m88Z)u((Wdz!v`%Bl9G#Q3vqFm7mV7UN?K; zFZ=^B{xwjD-+KR?z%u7#lh}Ye+1Q;G!j8iJP^m$5`+#&!sc>|=`24i=ihX`?+lShk8Y+usgB-6eS>^B;v~ zM`1f6LBadV4chI|y{P9+uddkTY12K98eTJcsxZh1?CaPbeU4OP?U-bE79H_A0$P9? zULn+7T$$Y@FiV`_CCk>>SNgf0VZu>dTff%+MEu)QB+k65Mw{imJ$IYP4<#qp3RU|x zsFa_B>V^7?=?pHwpk8$CrwNGDMJ9z*O@+Q_C=3E8D6Y?izr@!(2;S3uH4E!23~oTTo&oz?l0{Cp_}|nZx5`C~LG|w@Ul1N2Tf0@(^q? zj(c!;i@s4v?14|l_E^Lmd|;sz^}PihClCZ3Z$qt~@ww~=cu+c?QJh)c?+odLe+Uo{ zs9z9Wvy#-pp-&}*0J2m76d1eKJ2OMny^wVEM_SlHuIJnXqJ)rrf)(4!eu4$Uaht6! z#;{^~dj!($HUXgO^m~x?mG{n$ksdk^thE!+Ujpke%m_h0lz$prUC6=JCDZW!!SP~3 zyW@B$oF~Y{EVSGFS-cGvYq{P$Fk;mk zxwUa?O@gTv9o=)O+3~0R9(RbnK*cmW$D*!kSq)0%g&MRZYo0j``jSmI1=R?N6xO1)I35lqYv^j^gY5C$E>Jjw-0-_)WqzMQ?V?aU=3Q8|ZCqOs?0qG@xgeoGS^iF6ZT?j>b3n0=7M5&=k4TL7W zNKxL#?|$ET-@W(UH^v*|<&Qlw##(Fdx!0a+{^tCxwdPtF=WLKbi6S8Oe~oT$-R$>@ zKIxV?LKX`^N;GPkdAqXs#7rDkJiqgnSyY1%dn32d-+ig>skw1GF2s{(A4I%B4^+h7 zWxe- zi4!_CksadED3g+M?F(`gP4D2}`Y4lKWO9Vm@ic(N4`R^++b~NkjATQ=Nn5jeN$rkJ z@hur)Zkr}=q2(&DuozjJCH3rK>coV&pEJsi?L0Bn*)Fan|cz)R-{2e6unIngA9 z+xHf0_a17NPeq%w{QW)Pu3n$a2pUtzD#%zT|F=Ci!TmP=GVhYOX7%e4Ngix`#L4Lq zOmk}&@yZL|&TPj&7fC`}#8(kX!!6C4xykzBHKreJa~`!gDx%AW+MOmpKMm{XnNAI7 z@$=f4g&QK(4~%<5164*Xh(j*oAl1$NLUr-v>mt{>f}$t?Dg$WWmx14+D#vUPznWDNT>WRZcyQ>ycf&nejzAvEW*j`4JN$F@( zQTWpAZrdy4F+t`xsz%n(i~GR(zGmV|)W}h%*=*y+Q2ROkHb*O$^5s$gQhZ=ml<)o~ zzD(F^u1z*X#&To!h8M>j9kf|{C;BZlrM z0LkZ<>MA|A5Q!L&o~O2Y#X*JS3w!0GgmDL_GoK=xMN)69e^E=m$09;?7CLYN5@gs3 z#s*@S0$5svpq5Xxk;u~K^Hjn2;L(1*4fuQEEowbfLJT548)n!*{42vZNpKHn&lPiO z(fRpnAY&@0i$d;yF`H3aa#BduK1Z=K!C@D1yIuvsqT@M3QW zb_u)_9Y}Zg^;hWUxW0cY`MY{jqa26f$h6!=b{;iWpv~VU|LI0;Q`3j*sHG>ksdP+$ z{?q5%928`x-{+$@>+_?vm$_VRlrQIzn$ou>G!C`F`VuGn_&uUgoRbH2;#O|`n z6hDuqv^g|hw=QAZGmAyT9F5@N#4J^FJ^`8AOQprVEgqZuQQ@rknz%%LMAokl+g9#H zadDLy#P9|2y6`eAblWyHku*0WF=nTvyK-J8L^Bv(*5l~ZX8OeUSsInCORaLBpvcx* zN?Pp0OXoJF*z#aG0oAXKBdb^V*TNaVd@8%q7lSl zSR3kMnSR;FqSh`zRnq81uB*eCL79|)kuZ3@aa3Zd^DEMZry+6Gz(f}9`1-4xZc1mU zF>Ogsae~!wrp-;UF$UA9E?JGq%&+YYz1I(7HkC^TYV7BQ+tW(R(GR$Wf*fRfuk|oR z^0SL@vl5d1j&(-iDmL?~5SCXpSYy0EaSp$+c(u}(z5x&Ea6I^ti-a5iy9o+BMClTKkHpkw%6Rjm`%9gI7 z;yc#!I9`6tcCPYJvVEl@IU-4ScA!rTWhW2voQ#Lr{#N&Hv!Dt0_}%EfC|S=*{6g0T zqRUo3KXGocs-V4RxpWv5OB!xBXnY$lb8g72yY^QhoPEu{b^~Nwr}onzy zOB}VR?(lrTxBgpiD66%f4ZsDyTX|@F)?ko_5~-(8z`~sGu*u`a*Y$f24d=_nf*%!a zKeXLar?(tGcf)xL&@64TWJnn%UWbs#m0nC%w9`&YteQwQ*4_1V)$eUr6C9sa7^c|R zg-+Q003(mFpF@xOed1s=k18FT4g3l*=pI64(Ig_DvR=fdSe+P=q(1vrcz8ClN+^Fe zg$F5JMlBwz8Ql2M*g620aIr{Oanh$8opCM?R>SvO+oiP=y6UVuaR_>uEc!=A4b$gk zL=-7eQqlZ$9-eNOm)gQ?yh1@zP4Bf;I>Lmyi5qSQ<@pKv$i_Fn+VesKojV*Af3khk zD~IyLTwt#y3d$VQww*^Fe~c;BDA_bKt&3E!`03o4qI(&$T-n#{?CLd&Wkn1WyHNC* zI3_(_%&l6$*$-P>0_lo5zcIQy4+xfTjS-2X#PW{*nPrcKQynS5cG@=<3%Y187fG1L zC9!GRRC0~ciYrN+=mQH)^uI2V0+^FhW2*r0FRhI7%vA^puqCGgYgBhsq5o+>%FuM% zOaX=SHbp&$k8$BWG=h^sYY@OxvvEX_oCla**hkp3zm7q&b&9rS{#BI7=j4j)5^p29 z%e;iy`&f_{`p(1c$NMl+%aldqn9WV%c&WR&(yfE*W1Tl!B~KN2WDKy^#WpKhA$2Pw zDovM5;&0gB(na6+_E^%=({`*+0^w!5P@7-K$YZs$Mu4le?@55Rvez0V(~xQ~a| z%}|q$NNA69TYSti!6M*vh^eOGXeOmdj&rC>BSg?zQ&!M2%m7emA)USk#Ld_)ihKV zfB&g|ulsvCagNDb*E)n#A+nuH;-TW#t_vGF220+x&@ePrO|x%|k{tT1Ec;JcE(6%+(5CYq&gq$v!Up{Emyi=31l+itlX7q^$iSAne*TPogl!td zYS;RKIEz?WnJsEMJHpO4x`t|>TZAo*z}V&I*utGYZDMBBOcVEzLR)bruT?{fz_0vF zXbOye)Y{rFsV~$~DNgg@k`dnz6u0KKqBI@-gVK*P88w{ir@h0sRWL>^Ehq+H_z$C( zEc*h7N<0$HlB~2I&K!^uUn&{ zth?(UwrkR|$#%k(os<>oJwo&=r>IlomuE{P75|8%K(H8|xQ5p7GPiOe-TUZ3HF7I) z?wNTIrs>wosRYzp1>@AxLVyIiK`gR&f#;4ZN??$s8q{YXYjg8M_9vbARoWo2QYE@K zt{;vzVq$cV#ct%L(ySai89h?>10ku1GjJKI8c@#6o43UW>uO_3U3w$i^)ToC@t51P z>RQpuY>W))j_Si1y>~Ro21D_eydxEBHP%WymSBmWth>w=idg^R`7O+EWeJ}Dhq3@4 z1|KQChyAk68%+tnutHvc%q-$j8k(qI{cXCx^+IH&JdPexFiURJcf2t(H@kX7{ z+PNj{Ah9J>Lr#}s^6N5*aHvS$t!#Ur??v@Vy^A&v`$RQ1&e()@P?6T%i1zl9t*8B` z+UQN!{9Z=Jx4gOuLv$#4ri00LxK-!uhBuQ0*#fF~1hqAg)LfZ1F!BKZPFYki^ffSU3UyJTPu8ewj6P z23;uz*^;;UD(@P_?Nh zHMRINMR|!|D$i1Ri>b669D8C^4o-IB^G8a@WxLkU(I9+UbfPc>#QnJ?6 zd`)WgU`1k0JXcbcJCPky(M>fi6qjpEAgL1W=f2xxLv_<=ky4IlCM}?I5h*M*;iV-= zrYEswYO@53JF!xwn@U${KsQ5Z>^Qn+@B7vr6{FkwH@k$sT5@C{8cy=cy0=;6|)X=n`o37=zvOEt~RFK$n!-Ru|cML0sVV*PNm8js=1^K(~! z!a*JJ{-us>32eETK#BRyPIF6EVDDSUUSAOT0m^=u5a@{kJLU+@&vLgfry5@op;_`* z{Do_0f&AJV0s2n008`mVOpg;Dwn8INH;hVL9BaVeu}{rMwnlCYe=%})2acybT=ew| z0K2{UKEXXh7LTt6I!(~G$S){XV)f@Or(!I7g9fE2&Y?ygBXx<+7ti#--FkLJ? zhqy9;~ih7X!t8Bn9MD< z`I|~Sg$6^;m`B$0kqqdP%7}Pr)=>+m<=r*97d!-9_~i9J&ou(aZyp>x0$FWYmqcpR zHR~GDR-X17If%)n+9o+M!J3SRT6>dcI?ZyomHRbpSLH`7Ku71Qg|_f@PJAKQXS&E~ zNYuG&>2Yj}vbUCupVCskp9L9HEMS&@V7^AL6jlXu0Ima%d4oSoy@ij(b4n!YORQ24 z;S8i`Fr{8>;tFeU*s?*SXJO_B8d5CXB%7$o;&W9Ta0dGgjkJ>5v~aXk!~oIM)rSgO zljL1qloWwqqYE@^(O?pn;*F%X;tov`7yHnqbv&7-h%lFBmURhl^u@@06n|NtWzGk( z9ob;^i^iJ6@&rQ%`@(U0lg##=4T84vHDEM!uJlJ4Q4#;cD}ZG`e?iy3H@#gFwm^R1 z7deb}VOdjT7DISOVt zxart8EcBbGSn`1ZVf;ZQo%l6j_+9tHZ`ZHjgDvnsq{=&Jk;Ylc`sX!%=?s_F=BO5#;DOapO*7m> z0Y3cS81%Pae(2%TO3gyWb~cw5@Xh~)LI3XZ`!+N*_%t*&w$t6sfB7$Z?)BHC-ObHt zfg@^C$%zjaz<1orol8^vz-LB%O2>u5J{B85nsl$X@v1U`Wh!a1 zOe6&#-=BvaIF=pF=R8&0h08pzZ>q!)4#l^F6dXu+UX`Pu-K*OC^&0sHaD_3&VCBPa zQtQME>nq~tyHcUXxo*!uC!Q23=irFwFnM>4cK?Z9ot?gdVM&}WpUuIdli}AW&2i5k zJ2NqjyTp2Df0isx*5Fk(q;cn7UwV|PK$VfN)GP)j<`?W~G_C^76d%LDwhfA|Z{(VS zIf~!Cn(VF3PJC~LcHmGvsr$ld!mm7L9UsV2+Tg0O+$|Bk4==t`o0n7CJoKkolAX@0 zw!;}ejzC}J;MXYKH78F436+_jT86K)G^C9FTuM-q-)*8?uchqKDSpZ7T->Ea#- zikBQ7JWAT#S5E`pZSzJ1EB<;>84xQa_WNA*QJ>cmm3`&;jq-l@8Ne(h@EsGr?*!Z0VEnLK|4P=?^=CD90fZE3 zt*zbSU9w4RS}d?lZD`#I|BKt1d;E&0%w(}Cld#QJ1Lx~vp01`H#Fj+LRbIuk2|JAH z_ZvkX=}#0PZ$e#;;5&~ZOeb_DEguQ*3B&z9V^QYj?Sd%~1PZ}Ym0_;K?B%>>Ncm9l z;8AsKIO`XA-@2gf0__Cy;Uc11cX;PTPE*>2uTwM-g>rzjDf7t|P3F+!dB;Bff&7@@ zI4`41>RIIb6UyXkr!o~6v3yQ_(Kbsz?d@`_%9NLY+TDRaTc_J7jHDa>HOOvh!%bpjWb_^S0Bl*z?vH5ao$f01!^El#Lcei~G+K5Pt4xFTS zQBQ8i%E*Ysou(J?6#7?WrVcy1xb^DOH}V@$Ks;}k)Z$xL|5g7^6B-(!&nyyiL^L>Y z4PO{~ZWtG`FQo5@QpRb%K(`IH&znXgMD!)&_E6t|b`A~& z)KN$Yn&S1h`*`-iZ1QOpZdiQNz=-_&JBWSC_j%f2oF_km`sWq4bo9te`e@J`*nK%ri*5jhzF+SJZzBMZ~X zJm775UeS~$AFR^eBW2pd=n0H5oZ3paz1<003sH0N$r6?^eI}hh{ss}(T$Yf&kB3*K zxa4HmG<>}Y_rY(N-3Q}qN)Sk1`%>JsL+93Qn_4dWz2VQBi1kT5K90W4Kj*Ia*7Spn z%aX2ke7$Xd&oIYPf2bQl8tRV8Hp$Oh=Snf6OikPH%B>?bCq+jkq?AX^V^<}ULfe?- z7WuA3DwUYSacuGU9q-KfJKQn{swQI9Z5%fC*Gr>`L!Z%#yq_<=Vp zqDZ(1T){#RP?-r#{Lk6QBEwDoDPqmm()Nm8#`iD?G!0h zX`QjH9D}06raMQsnJ@7Gqy5&;wY2?(i!ype&V#7OQY;nGx9m|A-Jd(}1A*c5!dvEJ z)Jgu>VP|JAP9;gE_yB_|tampaZ|atQ`Aen0dJ-dMd3U2PGmU0UDW+;;_DJ(* zvtNP4_zx6ikXv-*1wed%#}V~eK~_}+8W8mcyR?Gvd98WE7OnpqqeXmPPjNr}uP9C9 zGT)rCq yhx@@o^{J8};0)IP|J}p+zmS^RdwnF*$(aZJv9lZL_bF7sPx-ONqhh(gUi~k None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "dataloader.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=True) + + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_no_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") - 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertFalse(checker.dataloader_issues) + + def test_found_slow_dataloader(self): + dataloader_duration = (self.rule.get("dataloader_duration_threshold") + 1) * 1000 + dataset = self._get_mock_dataset(dataloader_duration, is_empty_dataset=False) + checker = DataloaderChecker() + checker.check_slow_dataloader(dataset) + self.assertTrue(checker.dataloader_issues) + + desc = self.rule.get("problem").format(dataloader_duration=dataloader_duration / 1000, + dataloader_duration_threshold=self.rule.get( + "dataloader_duration_threshold")) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, dur, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["dataloader"] = [TimelineEvent({"dur": dur, "name": "dataloader"})] + return dataset + + +if __name__ == '__main__': + tester = TestDataloaderChecker() + tester.test_no_dataloader() + tester.test_no_slow_dataloader() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py new file mode 100644 index 0000000000..d1df810a0e --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_syncbn_checker.py @@ -0,0 +1,62 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.syncbn.syncbn_checker import SyncBNChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSyncBNChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "sync_batchnorm.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_syncbn(self): + dataset = self._get_mock_dataset(1, is_empty_dataset=True) + + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_syncbn_not_reach_threshold(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") - 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertFalse(checker.syncbn_issues) + + def test_found_slow_dataloader(self): + dataset = self._get_mock_dataset(self.rule.get("max_syncbn_num") + 1, is_empty_dataset=False) + checker = SyncBNChecker() + checker.check_syncbn(dataset) + self.assertTrue(checker.syncbn_issues) + + desc = self.rule.get("problem").format(syncbn_num=self.rule.get("max_syncbn_num") + 1) + + self.assertEqual(desc, checker.desc) + + def _get_mock_dataset(self, syncbn_num, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["sync_batchnorm"] = [] + for _ in range(syncbn_num): + dataset["sync_batchnorm"].append(TimelineEvent({"name": "SyncBatchNorm"})) + return dataset + + +if __name__ == '__main__': + tester = TestSyncBNChecker() + tester.test_no_syncbn() + tester.test_syncbn_not_reach_threshold() + tester.test_found_slow_dataloader() diff --git a/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py new file mode 100644 index 0000000000..360363ce37 --- /dev/null +++ b/profiler/test/ut/advisor/advisor_backend/timeline_advice/test_synchronize_stream.py @@ -0,0 +1,55 @@ +import unittest +import os +import sys +import yaml + +from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.test.ut.advisor.advisor_backend.tools.tool import recover_env + + +class TestSynchronizeChecker(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def setUp(self) -> None: + rule_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))), + "advisor", "rules", "synchronize.yaml") + + with open(rule_path, "rb") as file: + self.rule = yaml.safe_load(file) + + def test_no_synchronize_stream(self): + dataset = self._get_mock_dataset(1, [], is_empty_dataset=True) + + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def test_max_synchronize_stream(self): + dataset = self._get_mock_dataset(100, [], is_empty_dataset=False) + checker = SynchronizeStreamChecker() + checker.check_synchronize(dataset) + self.assertFalse(checker.synchronize_issues) + + def _get_mock_dataset(self, total_count, slow_synchronize_stream, is_empty_dataset=False): + dataset = TimelineEvent() + if is_empty_dataset: + return dataset + + dataset["synchronize_stream"] = TimelineEvent( + dict( + total_count=total_count, + slow_synchronize_stream=slow_synchronize_stream, + rule=dict(max_synchronize_num=10, problem="", solutions=[]), + ) + ) + return dataset + + +if __name__ == '__main__': + tester = TestSynchronizeChecker() + tester.test_no_synchronize_stream() + tester.test_max_synchronize_stream() diff --git a/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py new file mode 100644 index 0000000000..51acf3b8e2 --- /dev/null +++ b/profiler/test/ut/advisor/compute_advice/test_frequency_advice.py @@ -0,0 +1,145 @@ +import os +import shutil +import stat +import json + +import unittest +from profiler.advisor.interface.interface import Interface +from profiler.advisor.common.analyzer_scopes import SupportedScopes + + +class TestFrequencyAdvice(unittest.TestCase): + TMP_DIR = "./ascend_pt" + OUTPUT_DIR = "./ascend_pt/ASCEND_PROFILER_OUTPUT" + DEVICE_DIR = "./ascend_pt/PROF_000001_20240415174447255_OAANHDOMMJMHGIFC/device_0" + interface = None + err_interface = None + + def tearDown(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + self.clear_htmls() + + def setUp(self): + if os.path.exists(TestFrequencyAdvice.TMP_DIR): + shutil.rmtree(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.TMP_DIR): + os.makedirs(TestFrequencyAdvice.TMP_DIR) + if not os.path.exists(TestFrequencyAdvice.OUTPUT_DIR): + os.makedirs(TestFrequencyAdvice.OUTPUT_DIR) + if not os.path.exists(TestFrequencyAdvice.DEVICE_DIR): + os.makedirs(TestFrequencyAdvice.DEVICE_DIR) + self.clear_htmls() + + @classmethod + def clear_htmls(cls): + current_path = os.path.dirname(os.path.abspath(__file__)) + for filename in os.listdir(current_path): + # 检查文件是否以“att”开头 + if filename.startswith("att"): + # 构建文件的完整路径 + file_path = os.path.join(current_path, filename) + # 删除文件 + os.remove(file_path) + + @classmethod + def get_basic_trace_view(cls): + # Python pid + py_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 1, "args": {"name": "Python"}} + # ascend pid + ascend_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 4, "args": {"name": "Ascend Hardware"}} + # ascend pid + cann_pid_data = {"ph": "M", "name": "process_name", "tid": 0, "pid": 5, "args": {"name": "CANN"}} + # ascend hardware ops + ah_event1 = {"ph": "X", "name": "Slice1", "ts": "1699529623106750", "dur": 100, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + ah_event2 = {"ph": "X", "name": "Slice2", "ts": "1699529623106888", "dur": 80, "tid": 3, "pid": 4, + "args": {"Task Type": "AI_CORE"}} + # flow event + flow_event_s = {"ph": "s", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "200", "args": {}} + flow_event_e = {"ph": "f", "name": "link1", "id": 1, "tid": 3, "pid": 1, "ts": "1699529623106750", "args": {}} + return [py_pid_data, ascend_pid_data, cann_pid_data, ah_event1, ah_event2, flow_event_s, flow_event_e] + + @classmethod + def create_info_json(cls): + info = { + "DeviceInfo": [ + { + "id": 7, + "env_type": 3, + "ctrl_cpu_id": "ARMv8_Cortex_A55", + "ctrl_cpu_core_num": 1, + "ctrl_cpu_endian_little": 1, + "ts_cpu_core_num": 0, + "ai_cpu_core_num": 6, + "ai_core_num": 25, + "ai_cpu_core_id": 2, + "ai_core_id": 0, + "aicpu_occupy_bitmap": 252, + "ctrl_cpu": "0", + "ai_cpu": "2,3,4,5,6", + "aiv_num": 50, + "hwts_frequency": "49.999001", + "aic_frequency": "1850", + "aiv_frequency": "1850" + } + ] + } + with os.fdopen(os.open(f"{TestFrequencyAdvice.DEVICE_DIR}/info.json.0", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(info)) + + @classmethod + def create_non_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"ph": "X", "cat": "python_function", "name": "aten::slice", "ts": "200", "dur": 100, "tid": 2, + "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + py_event2 = {"ph": "X", "cat": "python_function", "name": "slice", "ts": "199", "dur": 200, "tid": 2, "pid": 1, + "args": {"Call stack": "/root/test/slice.py(116);\r\n/root/torch/module.py"}} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + # with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/msprof_20240415174455.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + @classmethod + def create_910B_trace_view(cls): + basic_info = cls.get_basic_trace_view() + + # python ops + py_event1 = {"name": "AI Core Freq", "ts": "1699529623106000.061", "pid": 682820896, "tid": 0, + "args": {"MHz": 1850}, "ph": "C"} + py_event2 = {"name": "AI Core Freq", "ts": "1699529623106770.541", "pid": 682820896, "tid": 0, + "args": {"MHz": 800}, "ph": "C"} + raw_data = [ + *basic_info, py_event1, py_event2 + ] + + with os.fdopen(os.open(f"{TestFrequencyAdvice.OUTPUT_DIR}/trace_view.json", + os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp: + fp.write(json.dumps(raw_data)) + + def test_run_should_run_success_when_msprof_not_contain_frequency_data(self): + self.create_info_json() + self.create_non_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(0, len(result.data.get("AI Core Frequency", []))) + result.clear() + + def test_run_should_run_success_when_trace_view_contain_frequency_data(self): + self.create_info_json() + self.create_910B_trace_view() + interface = Interface(profiling_path=self.TMP_DIR) + dimension = "computation" + scope = SupportedScopes.FREQ_ANALYSIS + result = interface.get_result(dimension, scope, render_html=1, output_dict=False, profiling_path=self.TMP_DIR) + self.assertEqual(2, len(result.data.get("AI Core Frequency", dict).get("data", []))) + result.clear() diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py index 44d97b248e..8073463592 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py @@ -24,6 +24,11 @@ class ProfilingParser(BaseProfilingParser): self._enable_operator_compare = True self._enable_memory_compare = True self._enable_communication_compare = True + self._enable_kernel_compare = True + self._enable_api_compare = True + + def _update_kernel_details(self): + pass def _update_memory_list(self): pass -- Gitee From 46a6ec6b2c531e28dcc0e4809227c490ff04da25 Mon Sep 17 00:00:00 2001 From: wugengjun <451676383@qq.com> Date: Mon, 5 Aug 2024 21:14:08 +0800 Subject: [PATCH 040/141] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=8F=AF=E8=A7=86?= =?UTF-8?q?=E5=8C=96ut=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pytorch/visualization/graph/node_op.py | 2 +- .../builder/test_graph_builder.py | 52 +++++++++++++ .../builder/test_msprobe_adapter.py | 73 +++++++++++++++++++ .../compare/test_graph_comparator.py | 32 ++++++++ .../compare/test_mode_adapter.py | 61 ++++++++++++++++ .../visualization/graph/test_base_node.py | 64 ++++++++++++++++ .../visualization/graph/test_graph.py | 50 +++++++++++++ .../visualization/graph/test_node_op.py | 28 +++++++ 8 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py create mode 100644 debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py diff --git a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py index ed06e0ef73..1629caabd1 100644 --- a/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py +++ b/debug/accuracy_tools/msprobe/pytorch/visualization/graph/node_op.py @@ -34,4 +34,4 @@ class NodeOp(Enum): pattern = op_patterns[index] if re.match(pattern, node_name): return op - raise Exception("Cannot parse node_name {node_name} into NodeOp") + raise Exception(f"Cannot parse node_name {node_name} into NodeOp") diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py new file mode 100644 index 0000000000..66eceea4b2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_graph_builder.py @@ -0,0 +1,52 @@ +import unittest +from unittest.mock import MagicMock, patch +from msprobe.pytorch.visualization.builder.graph_builder import GraphBuilder, Graph + + +class TestGraphBuilder(unittest.TestCase): + + def setUp(self): + self.construct_path = "step/rank/construct.json" + self.data_path = "step/rank/dump.json" + self.model_name = "TestModel" + self.graph = Graph(self.model_name) + self.construct_dict = { + "Tensor1": "Module1", + "Module1": None + } + self.data_dict = { + "Module1": {"data": "data for Module1"}, + "Tensor1": {"data": "data for Tensor1"} + } + + @patch('msprobe.pytorch.visualization.builder.graph_builder.load_json_file') + @patch('msprobe.pytorch.visualization.builder.graph_builder.load_data_json_file') + def test_build(self, mock_load_data_json_file, mock_load_json_file): + mock_load_data_json_file.return_value = self.data_dict + mock_load_json_file.return_value = self.construct_dict + + graph = GraphBuilder.build(self.construct_path, self.data_path, self.model_name) + self.assertIsNotNone(graph) + self.assertIsInstance(graph, Graph) + self.assertEqual(len(graph.node_map), 3) + + @patch('msprobe.pytorch.visualization.builder.graph_builder.save_json_file') + def test_to_json(self, mock_save_json_file): + GraphBuilder.to_json("step/rank/output.vis", self.graph) + mock_save_json_file.assert_called_once() + + @patch('msprobe.pytorch.visualization.graph.node_op.NodeOp.get_node_op') + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.get_input_output', return_value=([], [])) + def test__init_nodes(self, mock_get_input_output, mock_get_node_op): + GraphBuilder._init_nodes(self.graph, self.construct_dict, self.data_dict) + mock_get_node_op.assert_any_call("Tensor1") + mock_get_node_op.assert_any_call("Module1") + self.assertIs(self.graph.root, self.graph.get_node("TestModel")) + + def test__create_or_get_node(self): + node_op = MagicMock() + data_dict = {"node1": {}} + node = GraphBuilder._create_or_get_node(self.graph, data_dict, node_op, "node1") + self.assertIn("node1", self.graph.node_map) + self.assertEqual(node.input_data, {}) + self.assertEqual(node.output_data, {}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py new file mode 100644 index 0000000000..12ae24279f --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/builder/test_msprobe_adapter.py @@ -0,0 +1,73 @@ +import unittest +from unittest.mock import patch +from msprobe.pytorch.visualization.builder.msprobe_adapter import ( + get_compare_mode, + run_real_data, + get_input_output, + compare_data, + format_node_data, + compare_node, + _format_decimal_string, + _format_data +) +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestMsprobeAdapter(unittest.TestCase): + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.task_dumppath_get', return_value=(True, False)) + def test_get_compare_mode_summary(self, mock_task_dumppath_get): + mode = get_compare_mode("dummy_param") + self.assertEqual(mode, GraphConst.SUMMARY_COMPARE) + + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter._do_multi_process') + def test_run_real_data(self, mock_do_multi_process): + run_real_data("dump_path", "csv_path") + mock_do_multi_process.assert_called_once_with("dump_path", "csv_path") + + def test_get_input_output(self): + node_data = { + 'input_args': [{'type': 'torch.Tensor', 'dtype': 'torch.int64', 'shape': [5], + 'Max': 2049.0, 'Min': 0.0, 'Mean': 410.20001220703125, 'Norm': 2049.0009765625, + 'requires_grad': False, 'full_op_name': 'Distributed.broadcast.0.forward_input.0'}, + {'type': 'int', 'value': 0}], + 'input_kwargs': {'group': None}, + 'output': [{'type': 'torch.Tensor', 'dtype': 'torch.int64', 'shape': [5], + 'Max': 2049.0, 'Min': 0.0, 'Mean': 410.20001220703125, 'Norm': 2049.0009765625, + 'requires_grad': False, 'full_op_name': 'Distributed.broadcast.0.forward_output.0'}, + {'type': 'int', 'value': 0}, None] + } + node_id = "Distributed.broadcast.0.forward" + input_data, output_data = get_input_output(node_data, node_id) + self.assertIn("Distributed.broadcast.0.forward_output.0", output_data) + self.assertIn("Distributed.broadcast.0.forward_input.0", input_data) + + def test_compare_data(self): + data_dict_list1 = {'key1': {'type': 'Type1', 'dtype': 'DType1', 'shape': 'Shape1'}} + data_dict_list2 = {'key1': {'type': 'Type1', 'dtype': 'DType1', 'shape': 'Shape1'}} + self.assertTrue(compare_data(data_dict_list1, data_dict_list2)) + + def test_format_node_data(self): + data_dict = {'node1': {'data_name': 'data1', 'full_op_name': 'op1'}} + result = format_node_data(data_dict) + self.assertNotIn('data_name', result['node1']) + self.assertNotIn('requires_grad', result['node1']) + + @patch('msprobe.pytorch.visualization.builder.msprobe_adapter.get_accuracy') + def test_compare_node(self, mock_get_accuracy): + node_ids = ["node1", "node2"] + data_dicts = [{'node1': {"input_args": [], "input_kwargs": {}, "output": {}}}, + {'node2': {"input_args": [], "input_kwargs": {}, "output": {}}}] + stack_json_data = {} + result = compare_node(node_ids, data_dicts, stack_json_data, False, False) + mock_get_accuracy.assert_called_once() + self.assertIsInstance(result, list) + + def test__format_decimal_string(self): + s = "0.123456789%" + formatted_s = _format_decimal_string(s) + self.assertIn("0.123457%", formatted_s) + + def test__format_data(self): + data_dict = {'value': 0.123456789} + _format_data(data_dict) + self.assertEqual(data_dict['value'], '0.123457') \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py new file mode 100644 index 0000000000..bece5380f0 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_graph_comparator.py @@ -0,0 +1,32 @@ +import unittest +from unittest.mock import patch +from msprobe.pytorch.visualization.compare.graph_comparator import GraphComparator +from msprobe.pytorch.visualization.graph.graph import Graph +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestGraphComparator(unittest.TestCase): + + def setUp(self): + self.graphs = [Graph("model1"), Graph("model2")] + self.data_paths = ["step1/rank/dump.json", "step2/rank/dump.json"] + self.stack_path = "step1/rank/stack.json" + self.output_path = "output/output.vis" + + @patch('msprobe.pytorch.visualization.compare.graph_comparator.get_compare_mode') + @patch('msprobe.pytorch.visualization.compare.graph_comparator.load_json_file') + @patch('msprobe.pytorch.visualization.compare.graph_comparator.load_data_json_file') + def test__parse_param(self, mock_load_data_json_file, mock_load_json_file, mock_get_compare_mode): + mock_load_data_json_file.return_value = "data_dict" + mock_load_json_file.return_value = "construct_dict" + mock_get_compare_mode.return_value = GraphConst.SUMMARY_COMPARE + self.comparator = GraphComparator(self.graphs, self.data_paths, self.stack_path, self.output_path) + self.comparator._parse_param(self.data_paths, self.stack_path, self.output_path) + + self.assertEqual(self.comparator.dump_path_param, { + 'npu_json_path': self.data_paths[0], + 'bench_json_path': self.data_paths[1], + 'stack_json_path': self.stack_path, + 'is_print_compare_log': True + }) + self.assertEqual(self.comparator.output_path, self.output_path) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py new file mode 100644 index 0000000000..7883a09a34 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/compare/test_mode_adapter.py @@ -0,0 +1,61 @@ +import unittest +from unittest.mock import patch, MagicMock +from msprobe.pytorch.visualization.compare.mode_adapter import ModeAdapter +from msprobe.pytorch.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.pytorch.visualization.utils import GraphConst, ToolTip +from msprobe.core.common.const import CompareConst + + +class TestModeAdapter(unittest.TestCase): + + def setUp(self): + self.node_op = NodeOp.module + self.node_id = "node_1" + self.node = BaseNode(self.node_op, self.node_id) + self.compare_mode = GraphConst.REAL_DATA_COMPARE + self.adapter = ModeAdapter(self.compare_mode) + self.compare_data_dict = [{}, {}] + + def test_add_md5_compare_data(self): + node_data = {'md5_key': 'some_md5_value'} + compare_data_dict = {'md5_key': 'expected_md5_value'} + precision_status = ModeAdapter._add_md5_compare_data(node_data, compare_data_dict) + self.assertTrue(precision_status) + + @patch('msprobe.pytorch.visualization.compare.mode_adapter.ModeAdapter') + def test_parse_result(self, mock_mode_adapter): + mock_mode_adapter._add_summary_compare_data.return_value = (True, 0.5) + self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE + precision_status, precision_index, other_dict = self.adapter.parse_result( + self.node, self.compare_data_dict) + self.assertEqual(precision_status, True) + self.assertEqual(precision_index, 0.5) + self.assertEqual(other_dict, {}) + + def test_prepare_real_data(self): + self.adapter.is_real_data_compare = MagicMock(return_value=True) + result = self.adapter.prepare_real_data(self.node) + self.assertTrue(result) + + def test_compare_mode_methods(self): + self.adapter.compare_mode = GraphConst.SUMMARY_COMPARE + self.assertTrue(self.adapter.is_summary_compare()) + self.assertFalse(self.adapter.is_md5_compare()) + self.assertFalse(self.adapter.is_real_data_compare()) + + def test_add_csv_data(self): + compare_result_list = ['result1', 'result2'] + self.adapter.add_csv_data(compare_result_list) + self.assertEqual(self.adapter.csv_data, compare_result_list) + + def test_add_error_key(self): + node_data = {'key': {}} + self.adapter.compare_mode = GraphConst.REAL_DATA_COMPARE + self.adapter.add_error_key(node_data) + self.assertEqual(node_data['key'][GraphConst.ERROR_KEY], + [CompareConst.ONE_THOUSANDTH_ERR_RATIO, CompareConst.FIVE_THOUSANDTHS_ERR_RATIO]) + + def test_get_tool_tip(self): + self.adapter.compare_mode = GraphConst.MD5_COMPARE + tips = self.adapter.get_tool_tip() + self.assertEqual(tips, {'md5': ToolTip.MD5}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py new file mode 100644 index 0000000000..544950f358 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_base_node.py @@ -0,0 +1,64 @@ +import unittest +from msprobe.pytorch.visualization.graph.base_node import BaseNode, NodeOp +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestBaseNode(unittest.TestCase): + + def setUp(self): + self.node_op = NodeOp.module + self.node_id = "node_1" + self.up_node = BaseNode(self.node_op, "up_node_1") + self.node = BaseNode(self.node_op, self.node_id, self.up_node) + + def test_init_and_str(self): + self.assertEqual(self.node.op, self.node_op) + self.assertEqual(self.node.id, self.node_id) + self.assertEqual(str(self.node), 'id:\tnode_1') + + def test_eq(self): + other_node = BaseNode(self.node_op, self.node_id, self.up_node) + self.assertEqual(self.node, other_node) + + def test_get_suggestions(self): + self.node.get_suggestions() + self.assertIn(GraphConst.SUGGEST_KEY, self.node.suggestions) + + def test_set_input_output(self): + input_data = {'input1': 'value1'} + output_data = {'output1': 'value2'} + self.node.set_input_output(input_data, output_data) + self.assertEqual(self.node.input_data, input_data) + self.assertEqual(self.node.output_data, output_data) + + def test_add_upnode(self): + self.node = BaseNode(self.node_op, self.node_id) + new_up_node = BaseNode(self.node_op, "new_up_node_1") + self.node.add_upnode(new_up_node) + self.assertEqual(self.node.upnode, new_up_node) + self.assertIn(self.node, new_up_node.subnodes) + + def test_add_link(self): + other_node = BaseNode(self.node_op, "other_node_1") + ancestors = ['a1', 'a2'] + self.node.add_link(other_node, ancestors) + self.assertEqual(self.node.matched_node_link, ancestors) + self.assertEqual(other_node.matched_node_link, ancestors) + + def test_to_dict(self): + expected_result = { + 'id': self.node_id, + 'node_type': self.node_op.value, + 'data': {}, + 'output_data': {}, + 'input_data': {}, + 'upnode': self.up_node.id, + 'subnodes': [], + 'matched_node_link': [], + 'suggestions': {} + } + self.assertEqual(self.node.to_dict(), expected_result) + + def test_get_ancestors(self): + expected_ancestors = ['up_node_1'] + self.assertEqual(self.node.get_ancestors(), expected_ancestors) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py new file mode 100644 index 0000000000..19d0987434 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_graph.py @@ -0,0 +1,50 @@ +import unittest +from msprobe.pytorch.visualization.graph.graph import Graph, NodeOp +from msprobe.pytorch.visualization.graph.base_node import BaseNode +from msprobe.pytorch.visualization.utils import GraphConst + + +class TestGraph(unittest.TestCase): + + def setUp(self): + self.graph = Graph("model_name") + self.node_id = "node_id" + self.node_op = NodeOp.module + + def test_add_node_and_get_node(self): + self.graph.add_node(self.node_op, self.node_id) + node = self.graph.get_node(self.node_id) + self.assertIsNotNone(node) + self.assertIn(self.node_id, self.graph.node_map) + + def test_to_dict(self): + self.graph.add_node(self.node_op, self.node_id) + result = self.graph.to_dict() + self.assertEqual(result[GraphConst.JSON_ROOT_KEY], "model_name") + self.assertIn(self.node_id, result[GraphConst.JSON_NODE_KEY]) + + def test_str(self): + self.graph.add_node(self.node_op, self.node_id) + expected_str = f'{self.node_id}' + self.assertIn(expected_str, str(self.graph)) + + def test_match(self): + graph_a = Graph("model_name_a") + graph_b = Graph("model_name_b") + node_a = BaseNode(self.node_op, self.node_id) + graph_a.add_node(NodeOp.module, "node_id_a") + graph_b.add_node(NodeOp.module, "node_id_b") + matched_node, ancestors = Graph.match(graph_a, node_a, graph_b) + self.assertIsNone(matched_node) + self.assertEqual(ancestors, []) + + def test_dfs(self): + graph = Graph("model_name") + graph.add_node(NodeOp.module, "node_a") + graph.add_node(NodeOp.module, "node_b") + node_a = BaseNode(self.node_op, self.node_id) + result = {} + graph.dfs(node_a, result) + self.assertEqual(result, {'node_id': {'id': 'node_id', 'node_type': 0, 'data': {}, + 'output_data': {}, 'input_data': {}, 'upnode': 'None', 'subnodes': [], + 'matched_node_link': [], 'suggestions': {}}}) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py new file mode 100644 index 0000000000..1a340ac8b3 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/visualization/graph/test_node_op.py @@ -0,0 +1,28 @@ +import unittest +from msprobe.pytorch.visualization.graph.node_op import NodeOp + + +class TestNodeOp(unittest.TestCase): + + def test_get_node_op_valid(self): + node_name = "ModuleTest" + self.assertEqual(NodeOp.get_node_op(node_name), NodeOp.module) + + def test_get_node_op_invalid(self): + node_name = "InvalidNodeName" + with self.assertRaises(Exception): + NodeOp.get_node_op(node_name) + + def test_get_node_op_all(self): + test_cases = [ + ("ModuleTest", NodeOp.module), + ("TensorTest", NodeOp.function_api), + ("TorchTest", NodeOp.function_api), + ("FunctionalTest", NodeOp.function_api), + ("NPUTest", NodeOp.function_api), + ("VFTest", NodeOp.function_api), + ("DistributedTest", NodeOp.function_api), + ("AtenTest", NodeOp.function_api) + ] + for node_name, expected_op in test_cases: + self.assertEqual(NodeOp.get_node_op(node_name), expected_op) -- Gitee From d4be82829454a900b49c92d55aacc010bf6734bd Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Fri, 9 Aug 2024 15:08:34 +0800 Subject: [PATCH 041/141] update_precision_index --- .../module_visualization/graph/prof_node.py | 19 +++++++++++++++++++ .../graph_build/prof_graph_builder.py | 4 +++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index cfcdabbb99..7d96a49691 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -23,6 +23,7 @@ class ProfNode(BaseNode): def __init__(self, event: TraceEventBean, parent_node=None): super().__init__(event, parent_node) self._kernel_total_list = [] + self._precision_index = 1 @property def node_id(self): @@ -66,6 +67,7 @@ class ProfNode(BaseNode): @property def data(self): return {"Input Data": self.input_data, + "precision_index": self.precision_index, "Host Self Duration(us)": round(self.host_self_dur, 2), "Host Total Duration(us)": round(self.host_total_dur, 2), "Device Self Duration(us)": round(self.device_self_dur, 2), @@ -83,8 +85,25 @@ class ProfNode(BaseNode): def is_root_node(self): return self.node_id == Constant.NPU_ROOT_ID + @property + def precision_index(self): + return self._precision_index + + @precision_index.setter + def precision_index(self, precision_index): + self._precision_index = precision_index + def update_child_nodes(self, node): self._child_nodes.append(node) def update_kernel_total_list(self, kernel_list: list): self._kernel_total_list.extend(kernel_list) + + def update_child_precision_index(self): + if not self.child_nodes: + return + max_dur = max((node.device_total_dur for node in self.child_nodes)) + min_dur = min((node.device_total_dur for node in self.child_nodes)) + diff_dur = max_dur - min_dur + for node in self.child_nodes: + node.precision_index = 1- (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index 83331b6250..a1bd6ba000 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -29,7 +29,7 @@ class ProfGraphBuilder: def _create_event_bean_from_ops(cls, op_list: list, name: str) -> TraceEventBean: min_start = min((op.start_time for op in iter(op_list))) max_end = max((op.end_time for op in iter(op_list))) - # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了+1 +2处理 + # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-1 +2处理 return TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) @classmethod @@ -69,6 +69,8 @@ class ProfGraphBuilder: matched_node = matched_node.binary_search(start_time) all_data = root_node.find_all_child_nodes() all_data.append(root_node) + for node in all_data: + node.update_child_precision_index() return all_data def find_bwd_module(self) -> list: -- Gitee From 553dccdf77570cbc2af339a040f063a456a7bdd1 Mon Sep 17 00:00:00 2001 From: qianggee Date: Wed, 21 Aug 2024 08:22:47 +0000 Subject: [PATCH 042/141] merge from poc --- debug/accuracy_tools/kj600/README.md | 128 +++++- .../kj600/kj600/anomaly_inform.py | 1 - debug/accuracy_tools/kj600/kj600/const.py | 4 + .../kj600/distributed/wrap_distributed.py | 32 +- debug/accuracy_tools/kj600/kj600/features.py | 4 + .../accuracy_tools/kj600/kj600/module_hook.py | 382 +++++++++++++----- .../kj600/kj600/module_metric.py | 71 +++- .../kj600/kj600/module_spec_verifier.py | 7 - .../kj600/kj600/optimizer_collect.py | 83 +++- .../kj600/kj600/unittest/test_monitor.py | 145 +++++++ debug/accuracy_tools/kj600/kj600/utils.py | 27 +- debug/accuracy_tools/kj600/pyproject.toml | 6 +- 12 files changed, 725 insertions(+), 165 deletions(-) create mode 100644 debug/accuracy_tools/kj600/kj600/const.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py diff --git a/debug/accuracy_tools/kj600/README.md b/debug/accuracy_tools/kj600/README.md index 1782e58bec..6ffb45274e 100644 --- a/debug/accuracy_tools/kj600/README.md +++ b/debug/accuracy_tools/kj600/README.md @@ -20,21 +20,122 @@ ### 2. 安装 kj600 -方式一:从 git 直接安装 +方式一:下载源码安装 ``` -pip install git+https://gitee.com/xiangsen2/kj600.git +git clone -b poc https://gitee.com/ascend/mstt.git +cd mstt/debug/accuracy_tools/kj600 +pip install . ``` -方式二:下载源码安装 +## 快速上手 +### 梯度监控 +模型训练状态的异常通常会反映在loss和梯度上,通过对模型各个模块梯度的监控,可以帮助快速定位异常的第一现场。 + +1. 输出目录 +监控结果写入tensorboard的event文件/csv中,设置输出路径(默认为`kj600_output`,通过环境变量配置) +```bash +export KJ600_OUTPUT_DIR=/xxx/output_dir ``` -git clone https://gitee.com/xiangsen2/kj600.git -cd kj600 -pip install . + +2. 在训练脚本中使能工具(Megatron-LM) + +``` +from kj600.module_hook import TrainerMon +hooker = TrainerMon("./monitor_config.json", process_group=None, params_have_main_grad=True) + +model, optimizer, opt_param_scheduler = setup_model_and_optimizer( + model_provider, model_type) +# 模型、优化器初始化后使能工具 + +hooker.monitor_gnorm_with_ad( + model, grad_acc_steps=args.global_batch_size//args.data_parallel_size//args.micro_batch_size, optimizer=optimizer, dp_group=mpu.get_data_parallel_group(), tp_group=mpu.get_tensor_model_parallel_group()) + + +# 可以在任意位置获取当前的梯度统计量, 不同调用位置不能保证reduce已完成 +reduced, unreduced = hooker.generate_wgrad_metrics() +``` + + +| 字段名字 | 是否必选 | 解释 | +| ------------------------------------------------------------ | -------- | -------- | +|"grad_acc_steps"| 必选 |梯度累积的步数,当micro step=grad acc steps时,会触发反向hook获取模型梯度| +|"optimizer"| 可选 |各种并行域reduce后的梯度在opt.step前获取,数据写入在step后进行。默认patch pytorch的优化器,传入其他优化器(如MegatronOptimizer)可以调整工具行为,如clip_grad发生在megatron的优化器中,pytorch的优化器之前。| +|"dp_group"| 可选 |训练过程中的dp_group。dp域通信后,group内所有rank的梯度相同,落盘数据冗余。提供dp_group后,工具仅保留每个dp_group的第一个rank的梯度| +|"tp_group"| 可选 |训练过程中的tp_group。tp域通信后,group内部分参数所有rank的梯度相同,落盘数据冗余。提供tp_group后,工具仅保留每个tp_group中冗余参数在第一个rank的梯度。当前适配Megatron core_v0.6.0, 通过权重属性`tensor_model_parallel`判断是否冗余| + +3. 在json文件中配置工具 +``` +{ + "targets": { + "module": {}, + "module.module.language_model.encoder.layers.0": {"input_grad":"tuple[1]:0", "output_grad":"tuple[2]:0"} + }, + "print_struct": false, # 若不了解模型结构,可以打开print_struct打印模型结构 + "module_ranks": [0,1,2,3], # 需要监控的rank + "wg_distribution": true, + "format": "csv", # 如果不需要落盘文件,设置为 "api" + "ops": ["norm", "min", "max", "mean"], + "eps": 1e-8, + "ndigits: 6 +} +``` + +4. 结果验证 +训练日志中通常会打屏一个训练步的grad norm。提供了脚本校验落盘数据和打屏信息的一致性。 +```bash +python kj600/unittest/test_monitor.py -m kj600_output/Aug13_02-27-5 -l logs/train_gpt3_TP2_PP1_CP1_monitor.log -d 2 -t 2 +``` +`-m`指定落盘csv的路径前缀。`-l`指定训练日志。脚本通过关键词`grad norm: `匹配训练日志中的grad norm,根据实际情况修改。从落盘数据计算的grad norm和日志中的grad norm相对偏差超过1%,会有警告。`-d`、`--dp_size`声明data parallel size,`-t`、`--tp_size`声明tensor paralllel size。 +示例输出: +```txt +rank 2 is duplicated in dp group +rank 3 is duplicated in dp group +grad norm in consiste between training log and reduced gradients monitored +grad mean is in consisten between unreduced grad and reduced grad monitord. +``` +需要提供并行相关参数,具体参见: +```bash +python kj600/unittest/test_monitor.py -h +``` +### 梯度异常时序判断 +0. 训练前配置相关参数 +工具支持自动判断训练过程中的梯度异常,需要在配置文件中设置alert相关字段。`AnomalyTurbulence`会将当前数值与历史均值比较,如果相对偏差超过阈值,会在打屏信息中提示用户。如果打开`dump`选项,则会将异常梯度相关信息落盘,用于后续时序判断。 +```json + "alert": { + "rules": [{"rule_name": "AnomalyTurbulence", "args": {"threshold": 0.5}}], + "dump": true + }, +``` +1. 实例化工具时传入流水线并行group +```python +hooker = TrainerMon("./monitor_config.json", process_group=mpu.get_pipeline_model_parallel_group(), params_have_main_grad=True) +``` +照常开始训练 + +2. 进入工具路径启动异常分析脚本: +```shell +cd kj600/ +python3 anomaly_analyse.py -d $KJ600_OUTPUT_DIR/anomaly_detected +``` +支持以下参数配置 +| 字段名字 | 解释 | 是否必选释 | +| ------ | -------- | -------- | +|-d 或 --data_path| 指定梯度异常落盘文件夹,梯度监控功能输出,一般为$KJ600_OUTPUT_DIR/anomaly_detected。|是 | +|-o 或 --out_path| 排序后的异常落盘文件地址,默认在--data_path路径下落盘一个anomaly_analyse.json文件| 否 | +|-k 或 --topk| 指定保留前topk个异常,默认为8| 否 | +|-s 或 --step_list| 指定分析的step范围,默认为[]| 否 | + +## 已知问题 +- Megatron中使用流水线并行时,完成当前stage的计算并将output传递到下一个stage后,会调用`deallocate_output_tensor`释放output。当工具使能后,部分功能会给一些module注错反向hook,hook功能可能为output创建一个view副本,导致output内存无法释放。如果工具使能后出现如下报错,则需要跳过deallocate的步骤。在较新的megatron代码中,可以在`megatron/training/arguments.py`中将`kw_args['deallocate_pipeline_outputs']`设为False,或在`megatron/core/pipeline_parallel/schedules.py`中跳过`deallocate_output_tensor`的调用 +```bash +File "~/Megatron-LM/megatron/core/pipeline_parallel/schedules.py", line 117, in deallocate_output_tensor + assert out._base is None, "counter-productive to free a view of another tensor." +AssertionError: counter-productive to free a view of another tensor. ``` -# 快速上手 +## 详细配置 下面以Ascend/ModelLink训练框架为例,给出kj600工具的使用方法。 @@ -54,8 +155,10 @@ pip install . "cc_distribution": {"enable":true, "cc_codeline":[]}, "alert": { "rules": [{"rule_name": "AnomalyTurbulence", "args": {"threshold": 0.5}}], - "inform": {"recipient": "database", "connection_str": "mysql+pymysql://username:password@host:port/database"} + "inform": {"recipient": "database", "connection_str": "mysql+pymysql://username:password@host:port/database"}, + "dump": true }, + "format": "tensorboard" "ops": ["min", "max", "norm", "zeros", "id"], "eps": 1e-8 } @@ -80,6 +183,7 @@ pip install . |"wg_distribution"| 可选 | 若为true则会监控指定模块的参数梯度, 默认为false。 | |"alert"| 必选 | · "rules": 指定自动报警的异常检测机制及其相应的阈值。目前实现的异常检测是AnomalyTurbulence。 如果统计标量超出历史均值的指定浮动范围(threshold指定, 0.5意味着上浮或者下浮50%)则在控制台打印报警信息。
· "inform": 自动报警需要的配置,若想关闭自动报警删掉inform的配置即可。其中"recipient"指定自动报警的通知方式,可选值为"database"或"email",默认为"database"。
- 若"recipient"为"database",则需要指定"connection_str"字段,即数据库的连接URL,默认为{"recipient":"database", "connection_str": "mysql+pymysql://username:password@host:port/database"},若有特殊字符需要转义。
- 若"recipient"为"email",则需要指定"send_email_address"-发送方邮箱地址,"receive_email_address"-接收方邮箱地址,"send_email_username"-发送方邮箱用户名,"send_email_password"-发送方邮箱密码,"smtp_server"-发送方邮箱对应的SMTP服务器,"smtp_port"-发送方邮箱对应的SMTP端口号。默认为:
{"recipient":"email", send_email_address": "sender@huawei.com", "receive_email_address": "receiver@huawei.com", "send_email_username": "username", "send_email_password": "******", "smtp_server": "smtpscn.huawei.com", "smtp_port": "587"}| |"cc_distribution"| 可选 | 其中"enable"字段控制通信监控模块的开关;需要监控通信算子时,务必尽量早地实例化`TrainerMon`, 因为监控通过劫持原始func后挂hook实现,部分加速库初始化时会保存原始function,避免监控失效。"cc_codeline"字段指定监控的代码行,如:`train.py\\[23\\]`,默认为空列表,不特别指定;"cc_pre_hook"字段控制是否监控通信前的数据; 模块会在第二个optimize.step之前打印通信日志,包括通信api的调用栈、输入dtype、通信group。 "cc_log_only"为true时,仅打印日志,不监控通信的输入输出,并在打印后中断训练。可以根据通信日志设置"cc_codeline",规避与训练过程不相关的通信,比如一些时间、metrics的同步。| +|"format"| 可选 | 数据落盘格式,默认为tensorboard,支持可选 "csv"。 | |"ops"| 可选 |与ur_distribution、xy_distribution、mv_distribution、wg_distribution、mg_direction、cc_distribution配合,监控所选张量的min、max、norm、zeros值。其中,zeros代表监控所选张量的元素小于eps的比例,id代表监控所选的非张量本身,默认为[]。| |"eps"| 可选 |若ops里包含"zeros"则需要配置,默认为1e-8。| @@ -115,14 +219,17 @@ pip install . ``` from kj600.module_hook import TrainerMon - hooker = TrainerMon("./llama2_config.json", params_have_main_grad=True, opt_ty="Megatron_DistributedOptimizer") # or opt_ty=Megatron_Float16OptimizerWithFloat16Params + hooker = TrainerMon("./llama2_config.json", process_group=None, params_have_main_grad=True, opt_ty="Megatron_DistributedOptimizer") # or opt_ty=Megatron_Float16OptimizerWithFloat16Params hooker.hook_modules(model=model, grad_acc_steps=args.global_batch_size//args.data_parallel_size//args.micro_batch_size) ``` params_have_main_grad: 若为True则参数权重梯度为main_grad,否则为grad,默认为True。 如果不是Megatron-LM的训练框架, 可以设置对应的梯度累积步数grad_acc_steps。 - 如果要监控混合精度优化器的动量和方差, 需要在混合精度优化器构造后加入如下代码。 目前只支持Megatron_DistributedOptimizer, 使用bf16或者fp16混合精度时开启分布式优化器。 或者Megatron_Float16OptimizerWithFloat16Params, 使用bf16或者fp16混合精度选项并且不开启分布式优化器。 + 如果要监控优化器的动量和方差,需要在优化器构造后加入如下代码。 目前支持Megatron实现的优化器: + - Megatron_FP32OptimizerMon,普通优化器。 + - Megatron_Float16OptimizerWithFloat16Params, 使用bf16或者fp16混合精度选项并且不开启分布式优化器。 + - Megatron_DistributedOptimizer, 使用bf16或者fp16混合精度时开启分布式优化器。 ``` model, optimizer, opt_param_scheduler = setup_model_and_optimizer( @@ -171,6 +278,7 @@ TrainerMon.__init__(config_file_path, params_have_main_grad=True, opt_ty=None) - | 参数 | 说明 | 是否必选 | | ----- | -------------------- | -------- | | config_file_path |自己写的json配置文件路径。 | 是 | +| process_group | 传入ProcessGroup对象,用以确定pipeline并行不同rank异常间时序,megatron下通过core.parallel_state.get_pipeline_model_parallel_group()获得 | 否 | | params_have_main_grad |权重是否使用main_grad,是就为True,否则为False。默认为True。 | 否 | | opt_ty |优化器类型,有两个选项,Megatron_DistributedOptimizer:使用bf16或者fp16混合精度时开启分布式优化器;Megatron_Float16OptimizerWithFloat16Params:使用bf16或者fp16混合精度选项并且不开启分布式优化器,也适用于常规的adam优化器。如果使用的不是adam优化器,使用None。默认为None。 | 否 | diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_inform.py b/debug/accuracy_tools/kj600/kj600/anomaly_inform.py index 301ac76921..485c06d4d2 100644 --- a/debug/accuracy_tools/kj600/kj600/anomaly_inform.py +++ b/debug/accuracy_tools/kj600/kj600/anomaly_inform.py @@ -1,6 +1,5 @@ import smtplib from email.mime.text import MIMEText -import sqlite3 from datetime import datetime, timedelta from kj600.database import Database, ExceptionMessage diff --git a/debug/accuracy_tools/kj600/kj600/const.py b/debug/accuracy_tools/kj600/kj600/const.py new file mode 100644 index 0000000000..e4198a9942 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/const.py @@ -0,0 +1,4 @@ + +class Const: + vpp = "vpp" + vpp_sep = ':' \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py b/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py index 4e2d5e175e..49e81ec5a5 100644 --- a/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py +++ b/debug/accuracy_tools/kj600/kj600/distributed/wrap_distributed.py @@ -106,6 +106,13 @@ class ApiRegistry: dist.Work.wait = wrapped_wait(dist.Work) +def get_process_group(process_group): + return ( + process_group + if isinstance(process_group, dist.ProcessGroup) + else dist.GroupMember.WORLD + ) + def stack_filter(stack): for pattern in StackBlackList: @@ -180,33 +187,12 @@ def create_async_callback_func(context, ops, args, prefix): catch_data(context, ops, args, prefix) return store_data -def get_tensor_dtype(args): - dtypes = [] - for arg in args: - if isinstance(arg, torch.Tensor): - dtypes.append(arg.dtype) - else: - dtypes.append(None) - return dtypes - -def get_group_members(args): - group = None - for arg in args: - if isinstance(arg, dist.ProcessGroup): - group = arg - if group is None: - group = dist.GroupMember.WORLD - return dist.get_process_group_ranks(group) - def create_hooks(context, monitor): def cc_log_hook(module, args, kwargs): - all_args = args + tuple(kwargs.values()) - dtypes = '|'.join([str(i) if i else '' for i in get_tensor_dtype(all_args)]) stack = ';'.join(get_callstack()) - group_members = '|'.join([str(i) for i in get_group_members(all_args)]) - monitor.cc_logged_stack[module.op_name_].add(';'.join([dtypes, group_members, stack])) + monitor.cc_logged_stack[module.op_name_].add(stack) return def cc_pre_hook(module, args, kwargs): @@ -235,8 +221,8 @@ def create_hooks(context, monitor): if (dist.is_initialized() and dist.get_rank() not in monitor.module_rank_list and monitor.module_rank_list != []): return [pre_hooks, hooks] - pre_hooks.append(cc_log_hook) if monitor.cc_log_only: + pre_hooks.append(cc_log_hook) return [pre_hooks, hooks] if monitor.cc_pre_hook: diff --git a/debug/accuracy_tools/kj600/kj600/features.py b/debug/accuracy_tools/kj600/kj600/features.py index 7810188f7d..09b48cffda 100644 --- a/debug/accuracy_tools/kj600/kj600/features.py +++ b/debug/accuracy_tools/kj600/kj600/features.py @@ -11,6 +11,10 @@ def square_sum(x: torch.tensor): def get_min(x: torch.tensor): return torch.min(x) +@torch.no_grad() +def get_mean(x: torch.tensor): + return torch.mean(x) + @torch.no_grad() def get_norm(x: torch.tensor): return torch.norm(x, p=2) diff --git a/debug/accuracy_tools/kj600/kj600/module_hook.py b/debug/accuracy_tools/kj600/kj600/module_hook.py index 3b600b2b7f..21c326ac01 100644 --- a/debug/accuracy_tools/kj600/kj600/module_hook.py +++ b/debug/accuracy_tools/kj600/kj600/module_hook.py @@ -2,21 +2,40 @@ import os import uuid import json from collections import defaultdict +from functools import partial from datetime import datetime import torch import torch.distributed as dist +from torch import Stream from torch.optim.optimizer import register_optimizer_step_pre_hook, register_optimizer_step_post_hook -from kj600.module_spec_verifier import get_config, validate_config_spec -from kj600.optimizer_collect import MixPrecsionOptimizerMon, print_rank_0, OptimizerMonFactory, MegatronDistributedOptimizerMon +from kj600.module_spec_verifier import validate_config_spec +from kj600.optimizer_collect import OptimizerMon, print_rank_0, OptimizerMonFactory from kj600.features import eff_rank, get_sign_matches from kj600.visualizer import HeatmapVisualizer -from kj600.anomaly_detect import AnomalyScanner, SummaryWriterWithAD +from kj600.anomaly_detect import AnomalyScanner, AnomalyDataFactory, SummaryWriterWithAD, CSVWriterWithAD, BaseWriterWithAD from kj600.anomaly_inform import AnomalyInformFactory -from kj600.module_metric import get_metrics, write_metrics_tensorboard, get_summary_writer_tag_name, TensorMetrics -from kj600.distributed.wrap_distributed import api_register, create_hooks, op_aggregate -from kj600.utils import print_warn_log, print_info_log, get_param_struct +from kj600.anomaly_analyse import AnomalyDataWriter +from kj600.module_metric import get_metrics, write_metrics_tensorboard, write_metrics_csv, get_summary_writer_tag_name, TensorMetrics, squash_param_name +from kj600.distributed.wrap_distributed import api_register, create_hooks, op_aggregate, get_process_group +from kj600.utils import print_warn_log, print_info_log, print_error_log, get_param_struct +from kj600.const import Const +from kj600.file_check import FileOpen +try: + import torch_npu +except ImportError: + pass + + +def param_is_not_tensor_parallel_duplicate(param, tp_group): + return (hasattr(param, 'tensor_model_parallel') and param.tensor_model_parallel) or ( + torch.distributed.get_rank(group=tp_group) == 0 + ) + +def param_is_data_parallel_duplicate(dp_group): + return torch.distributed.get_rank(group=dp_group) != 0 + class ModuleHookContext: def __init__(self, module_name) -> None: self.step = 0 @@ -35,9 +54,6 @@ class ModuleHookContext: self.format_by_arg[key_name] = target_config[self.module_name][key_name] elif key_name in ['input', 'input_grad']: self.ignore_in = True - else: - raise KeyError(f"Missing key: {key_name} of {self.module_name} in config.json") - class OptimizerContext: def __init__(self) -> None: @@ -71,30 +87,50 @@ class CommunicationContext: def aggregate(self): self.data = self._agg(self.data) +class GradContext: + def __init__(self) -> None: + self.pre = [] + self.post = [] + self.acc_metric = [] + self.acc = {} + self.actv = defaultdict(dict) + + def reset(self): + self.pre.clear() + self.post.clear() + self.acc_metric.clear() + self.acc.clear() + self.actv.clear() + class TrainerMon: tensor_metrics = TensorMetrics() # opt_ty: "Megatron_Float16OptimizerWithFloat16Params" or "Megatron_DistributedOptimizer" - def __init__(self, config_file_path, params_have_main_grad=True, opt_ty=None) -> None: + def __init__(self, config_file_path, process_group=None, params_have_main_grad=True, opt_ty=None) -> None: self.module_fwd_hook_context_by_module = defaultdict(ModuleHookContext) self.module_bwd_hook_context_by_module = defaultdict(ModuleHookContext) self.optimizer_context = defaultdict(OptimizerContext) self.cc_context = defaultdict(CommunicationContext) + self.grad_context = GradContext() + self.process_group = get_process_group(process_group) self.params_have_main_grad = params_have_main_grad - self.config = get_config(config_file_path) + with FileOpen(config_file_path, 'r') as f: + self.config = json.load(f) self.module_rank_list = self.config.get("module_ranks", []) + self.format = self.config.get('format', 'tensorboard') self.eps = self.config.get('eps', 1e-8) self.ops = self.config.get('ops', []) + self.ndigits = self.config.get('ndigits', 6) self.xy_distribution = self.config.get('xy_distribution', False) if not self.xy_distribution: print_rank_0("> module input/output input_grad/output_grad is not monitored. ") - # backward hook cause megatron-lm pipeline parallel schedule assert exception. # TBD: backward hook cause output tensor is view of some base tensor. root cause invesigation pending. self.forward_only = self.config.get('forward_only', False) if self.forward_only: print_rank_0("> only module forward is monitored. ") + self.backward_only = self.config.get('backward_only', False) self.ur_distribution = self.config.get('ur_distribution', False) if not self.ur_distribution: @@ -121,27 +157,69 @@ class TrainerMon: api_register.redirect_api() alert_setting = self.config.get('alert', {"rules":[]}) - self.alert_rules = AnomalyScanner.load_rules(alert_setting["rules"]) - + self.alert_rules = AnomalyScanner.load_rules(alert_setting["rules"]) anomaly_inform = AnomalyInformFactory.create_informer(**alert_setting["inform"]) if "inform" in alert_setting else None - self.optimizer_hooked = False output_base_dir = os.getenv('KJ600_OUTPUT_DIR', './kj600_output') cur_time = datetime.now().strftime('%b%d_%H-%M-%S') unique_id = str(uuid.uuid4())[:8] + if dist.is_initialized(): - if (dist.get_rank() in self.module_rank_list) or len(self.module_rank_list) == 0: - self.summary_writer = SummaryWriterWithAD( - os.path.join(output_base_dir, f"{cur_time}-rank{dist.get_rank()}-{unique_id}"), self.alert_rules, unique_id, anomaly_inform) + rank = dist.get_rank() + tensorboard_dir = os.path.join(output_base_dir, f"{cur_time}-rank{rank}-{unique_id}") + pp_stage = dist.get_group_rank(self.process_group, rank) + group_mates = dist.get_process_group_ranks(self.process_group) else: - self.summary_writer = SummaryWriterWithAD(os.path.join(output_base_dir, f"{cur_time}-{unique_id}"), self.alert_rules, unique_id, anomaly_inform) + rank = 0 + tensorboard_dir = os.path.join(output_base_dir, f"{cur_time}-{unique_id}") + pp_stage = 0 + group_mates = [0] + self.rank = rank + + # 初始化AnomalyData工厂 + self.anomaly_data_factory = AnomalyDataFactory(rank, pp_stage, group_mates) if alert_setting.get('dump', False) else None + + if self.format == 'tensorboard': + writer = SummaryWriterWithAD + self.write_metrics = write_metrics_tensorboard + elif self.format == 'csv': + writer = CSVWriterWithAD + self.write_metrics = write_metrics_csv + elif self.format == 'api': + writer = BaseWriterWithAD + self.write_metrics = write_metrics_tensorboard + + if (rank in self.module_rank_list) or len(self.module_rank_list) == 0: + + self.summary_writer = writer( + tensorboard_dir, + self.alert_rules, + unique_id, + anomaly_inform, + self.anomaly_data_factory, + self.ndigits + ) + # 初始化anomaly deteted文件目录 + if self.anomaly_data_factory: + self.anomaly_data_writer = AnomalyDataWriter( + os.path.join(output_base_dir, "anomaly_detected"), rank) + self.anomaly_data_writer.init_detected_json() + # A HeatmapVisualizer instance is associated with an image self.update_heatmap_visualizer = defaultdict(HeatmapVisualizer) self.ratio_heatmap_visualizer = defaultdict(HeatmapVisualizer) - self.micro_batch_number = 0 + self.micro_batch_number = 1 + + self.weight_hooked = False + self.optimizer_hooked = False + self.param_registered = False + self.vpp = False + self.dp_group = None + self.tp_group = None - self.param_name_list = [] self.param2name = defaultdict(str) + self.param_name_call_id = {} + self.call_id = 0 self.mix_precision_optimizer_mon = OptimizerMonFactory.create_optimizer_mon(opt_ty) if opt_ty is None: @@ -149,9 +227,13 @@ class TrainerMon: raise Exception("ur_distribution cannot be enabled with unknown optimizer.") if self.mv_distribution: raise Exception("mv_distribution cannot be enabled with unknown optimizer.") + self.verbose = False self.print_struct = self.config.get("print_struct", False) + if self.print_struct: + self.verbose = True self.struct_printed = False self.module_struct = {} + return def __del__(self): @@ -160,7 +242,7 @@ class TrainerMon: @staticmethod def set_wrapped_optimizer(_wrapped_optimizer): - MixPrecsionOptimizerMon.set_wrapped_optimizer(_wrapped_optimizer) + OptimizerMon.set_wrapped_optimizer(_wrapped_optimizer) @staticmethod def adhoc_check(target_tensor:torch.tensor, module_name:str, tensor_name:str, rank_list, ops_list): @@ -172,32 +254,67 @@ class TrainerMon: TrainerMon.tensor_metrics.stat_insert(target_tensor, ops_list, module_name, tensor_name, rank) def hook_modules(self, model:torch.nn.Module, grad_acc_steps): - # fwd=0, bkd=1 - # targets is module name list like ["xx.xxx1", "xxx.xxx2"] which can be obtained when first run. - print_rank_0("> module names:") - for name, _ in model.named_modules(): - print_rank_0(f"\t{name}") - self.micro_batch_number = grad_acc_steps + if self.module_rank_list and (self.rank not in self.module_rank_list): + return + + if not isinstance(model, list): + model = [model] + + self._register_param_name(model) - if not self.module_rank_list or (dist.is_initialized() and dist.get_rank() in self.module_rank_list): - targets = [x for x, _ in model.named_modules()] if self.print_struct else self.config['targets'].keys() - hooked_count = self._hook_module(targets, model, fwd_or_bkd=0) + self.micro_batch_number = grad_acc_steps + for vpp_stage, model_chunk in enumerate(model): + vpp_stage = f'{vpp_stage}{Const.vpp_sep}' if self.vpp else '' + targets = [x for x, _ in model_chunk.named_modules()] if self.print_struct else self.config['targets'].keys() + hooked_count = self._hook_module(targets, model_chunk, vpp_stage) print_rank_0(f"> {hooked_count} out of {len(self.config['targets'])} are monitored.") - else: - return if not self.optimizer_hooked: - self.optimizer_hooked = True - print_rank_0("> parameter names:") - for name, param in model.named_parameters(): - print_rank_0(f"\t{name}") - for target_module, _ in self.config['targets'].items(): - if name.startswith(target_module): # name : language_model.encoder.layers.0.mlp.weight, target_module:language_model.encoder.layers.0 - self.param_name_list.append(name) - self.param2name[param] = name self.hook_optimizer() return + def generate_wgrad_metrics(self): + if not self.wg_distribution: + return {}, {} + + unreduced = {} + if self.weight_hooked: + for metric_name in self.ops: + unreduced[metric_name] = get_metrics(metric_name, self.grad_context.acc, self.eps) + self.grad_context.acc_metric = [unreduced] + + grad_dict = {} + for param, name in self.param2name.items(): + if self.tp_group and not param_is_not_tensor_parallel_duplicate(param, self.tp_group): + continue + if self.dp_group and param_is_data_parallel_duplicate(self.dp_group): + continue + grad = param.main_grad if self.params_have_main_grad else param.grad + if grad is None: + print_warn_log(f"grad is None: {name}, maybe something wrong happened.") + continue + key = get_summary_writer_tag_name(name, 'post_grad', self.rank) + grad_dict[key] = grad + + reduced = {op:get_metrics(op, grad_dict, self.eps) for op in self.ops} + self.grad_context.post = [reduced] + + return reduced, unreduced + + + def monitor_gnorm_with_ad(self, model, grad_acc_steps=1, optimizer=None, tp_group=None, dp_group=None): + print_info_log(f'grad acc steps {grad_acc_steps}') + self.hook_optimizer(optimizer) + self.micro_batch_number = grad_acc_steps + self.backward_only = True + + self.dp_group = dp_group + self.tp_group = tp_group + + self._register_param_name(model) + self._hook_weights() + self.hook_modules(model, grad_acc_steps) + def build_tbtag_tensor_map(self, module_name, tag, tensor): metrics = {} rank = dist.get_rank() if dist.is_initialized() else None @@ -233,27 +350,29 @@ class TrainerMon: if not self.xy_distribution: return for _, fwd_context in self.module_fwd_hook_context_by_module.items(): + if len(fwd_context.actv) == 0: + continue if not len(fwd_context.actv) == self.micro_batch_number: print_warn_log(f"fwd_context.actv not equal to micro_batch_number: {len(fwd_context.actv)}, {self.micro_batch_number}") - for metric_name in self.ops: - write_metrics_tensorboard(metric_name, self.summary_writer, fwd_context.actv, step) + self.write_metrics(self.ops, self.summary_writer, fwd_context.actv, step, 'actv') fwd_context.actv.clear() - for _, bwd_context in self.module_bwd_hook_context_by_module.items(): - if not len(bwd_context.actvgrad) == self.micro_batch_number: - print_warn_log(f"bwd_context.actvgrad not equal to micro_batch_number: {len(bwd_context.actvgrad)}, {self.micro_batch_number}") - for metric_name in self.ops: - write_metrics_tensorboard(metric_name, self.summary_writer, bwd_context.actvgrad, step) - bwd_context.actvgrad.clear() + self.write_metrics(self.ops, self.summary_writer, [self.grad_context.actv], step, 'grad_actv') - def hook_optimizer(self): + def write_grad_tb(self, step): + if not self.wg_distribution: + return + + self.write_metrics(self.ops, self.summary_writer, self.grad_context.post, step, 'grad_reduced') + self.write_metrics(self.ops, self.summary_writer, self.grad_context.acc_metric, step, 'grad_unreduced') + + def hook_optimizer(self, optimizer=None): # in DDP by default use params_have_main_grad def optimizer_pre_step_hook(optimizer, args, kwargs): context = self.optimizer_context[optimizer] if self.print_struct and not all(value == {} for value in self.module_struct.values()) and not self.struct_printed: self._smallest_rank_print("> module struct:") self._smallest_rank_print(json.dumps(self.module_struct, indent=4)) - self.struct_printed = True if not self.cc_log_only: raise Exception("exit after first step when print model struct") if self.cc_log_only and context.step > 0: @@ -261,10 +380,15 @@ class TrainerMon: self._smallest_rank_print(json.dumps({k:[i.split(';') for i in v] for k,v in self.cc_logged_stack.items()}, indent=4)) raise Exception("exit after first step when print cc stack") - - context.param_exp_avg, context.param_exp_avg_sq, context.param_adam_update, context.param_adam_ratio = self.mix_precision_optimizer_mon.fetch_mv(self, - optimizer, self.param2name) + self.generate_wgrad_metrics() + + mv_result = self.mix_precision_optimizer_mon.fetch_mv(self, optimizer, self.param2name) + context.param_exp_avg = mv_result.exp_avg + context.param_exp_avg_sq = mv_result.exp_avg_sq + context.param_adam_update = mv_result.update + context.param_adam_ratio = mv_result.ratio + for param, name in self.param2name.items(): if "params_effrank" in self.config and name in self.config["params_effrank"]: context.param_effective_rank[name] = eff_rank(param.detach()) @@ -272,9 +396,8 @@ class TrainerMon: if grad is None: print_warn_log(f"grad is None: {name}, maybe something wrong happened.") continue - if self.wg_distribution: - context.param_weight_grad[name] = grad - if self.mg_direction: + + if self.mg_direction: if context.step == 0: same_direction_ratio = torch.tensor(1.) else: @@ -282,15 +405,11 @@ class TrainerMon: context.param_mg_direction[name] = same_direction_ratio tbtag_tensor_map = {} - if self.wg_distribution: - tbtag_tensor_map.update(self.generate_param_metrics('weight_grad', context.param_weight_grad)) if self.mv_distribution: tbtag_tensor_map.update(self.generate_param_metrics('exp_avg', context.param_exp_avg)) tbtag_tensor_map.update(self.generate_param_metrics('exp_avg_sq', context.param_exp_avg_sq)) if self.mg_direction: tbtag_tensor_map.update(self.generate_param_metrics('mg_direction', context.param_mg_direction)) - # if not tbtag_tensor_map: - # return metric_dict = {} for metric_name in self.ops: metric_dict[metric_name] = get_metrics(metric_name, tbtag_tensor_map, self.eps) @@ -299,6 +418,7 @@ class TrainerMon: cc_metrics = self.generate_cc_metrics(k, c) for op, m in cc_metrics.items(): metric_dict[op].update(m) + if not metric_dict: return context.metric_list.append(metric_dict) @@ -308,7 +428,10 @@ class TrainerMon: context = self.optimizer_context[optimizer] rank = dist.get_rank() if dist.is_initialized() else None + if self.anomaly_data_factory: + self.anomaly_data_factory.set_call_id(self.param_name_call_id) self.write_xy_tb(context.step) + self.write_grad_tb(context.step) self.write_adhoc_check(context.step) if self.ur_distribution: @@ -317,20 +440,43 @@ class TrainerMon: for param_name, _ in context.param_adam_ratio.items(): self.ratio_heatmap_visualizer[param_name].visualize(get_summary_writer_tag_name(param_name, 'adam_ratio', rank), context.step, self.summary_writer) - for metric_name in self.ops: - if not context.metric_list: - break - write_metrics_tensorboard(metric_name, self.summary_writer, context.metric_list, context.step) + if context.metric_list: + self.write_metrics(self.ops, self.summary_writer, context.metric_list, context.step, 'other') context.metric_list.clear() context.step += 1 + self.grad_context.reset() + if self.anomaly_data_factory: + self.anomaly_data_writer.write_detected_json(self.summary_writer.get_anomalies()) + self.summary_writer.clear_anomalies() + self.call_id = 0 + self.param_name_call_id.clear() + return + + def patch_step(func, optimizer): + def wrapper(*args, **kwargs): + optimizer_pre_step_hook(optimizer, args, kwargs) + out = func(*args, **kwargs) + optimizer_post_step_hook(optimizer, args, kwargs) + return out + return wrapper + + if self.optimizer_hooked: return - if not self.module_rank_list or (dist.is_initialized() and dist.get_rank() in self.module_rank_list): - register_optimizer_step_pre_hook(optimizer_pre_step_hook) - register_optimizer_step_post_hook(optimizer_post_step_hook) + + if optimizer: + optimizer.__class__.step = patch_step(optimizer.__class__.step, optimizer) + + else: + if not self.module_rank_list or (dist.is_initialized() and dist.get_rank() in self.module_rank_list): + register_optimizer_step_pre_hook(optimizer_pre_step_hook) + register_optimizer_step_post_hook(optimizer_post_step_hook) + self.optimizer_hooked = True return def _smallest_rank_print(self, msg): + if not self.verbose: + return if dist.is_initialized(): if self.module_rank_list: if dist.get_rank() == min(self.module_rank_list): @@ -341,7 +487,35 @@ class TrainerMon: else: print_info_log(msg) - def _hook_module(self, target_names, module: torch.nn.Module, fwd_or_bkd): + def _register_param_name(self, model): + if self.param_registered: + return + if not isinstance(model, list): + model = [model] + + if len(model) > 1: + self.vpp = True + self._smallest_rank_print('vpp enabled') + + for vpp_stage, model_chunk in enumerate(model): + prefix = f'{Const.vpp}{vpp_stage}{Const.vpp_sep}' if self.vpp else '' + for param_name, param in model_chunk.named_parameters(): + name = prefix + squash_param_name(param_name) + for target in self.config['targets'].keys(): + if param_name.startswith(target) and param.requires_grad: + self._smallest_rank_print(f'>> monitoring: {name}') + setattr(param, "zero_out_wgrad", True) + if name in self.param2name.values() or name == '': + print_error_log(f'same name {name} for different param. Current param is {param_name}. \ + May be error of squash_param_name') + raise Exception("param with same name will be overwriten.") + self.param2name[param] = name + break + + self.param_registered = True + + + def _hook_module(self, target_names, module: torch.nn.Module, vpp_stage=''): if '_modules' not in module.__dict__: # nothing to hook return 0 @@ -352,8 +526,6 @@ class TrainerMon: self.module_struct[context.module_name].update( {"input": f"{get_param_struct(module_input)}", "output": f"{get_param_struct(module_output)}"}) return - if not self.xy_distribution: - return if not context.format_by_arg: context.set_format_by_arg('input', self.config['targets']) context.set_format_by_arg('output', self.config['targets']) @@ -390,11 +562,11 @@ class TrainerMon: self.module_struct[context.module_name].update( {"input_grad": f"{get_param_struct(input_grad)}", "output_grad": f"{get_param_struct(output_grad)}"}) return - if not self.xy_distribution: - return if not context.format_by_arg: context.set_format_by_arg('input_grad', self.config['targets']) context.set_format_by_arg('output_grad', self.config['targets']) + if not context.format_by_arg: + return if not context.verified: if not context.ignore_in: context.focused_in_col = validate_config_spec(context.format_by_arg['input_grad'], input_grad, context.module_name, 'input_grad') @@ -404,32 +576,62 @@ class TrainerMon: tbtag_tensor_map = {} if not context.ignore_in: cared_input_grad = input_grad if context.focused_in_col is None else input_grad[context.focused_in_col] - tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name, 'input_grad', cared_input_grad)) + tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name+f'_{context.micro_step}', f'input_grad', cared_input_grad)) cared_output_grad = output_grad if context.focused_out_col is None else output_grad[context.focused_out_col] - tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name, 'output_grad', cared_output_grad)) - metric_dict = {} - for metric_name in self.ops: - metric_dict[metric_name] = get_metrics(metric_name, tbtag_tensor_map, self.eps) + tbtag_tensor_map.update(self.build_tbtag_tensor_map(context.module_name+f'_{context.micro_step}', f'output_grad', cared_output_grad)) + if context.micro_step == 0 and context.actvgrad: print_warn_log(f"actvgrad context of {context.module_name} is not empty when first micro_step, maybe something wrong happened. Now clear it.") context.actvgrad.clear() - context.actvgrad.append(metric_dict) + for metric_name in self.ops: + self.grad_context.actv[metric_name].update(get_metrics(metric_name, tbtag_tensor_map, self.eps)) + context.micro_step += 1 if context.micro_step == self.micro_batch_number: context.micro_step = 0 context.step += 1 return - hooked_count = 0 - for name, submodule in module.named_modules(): - self.module_struct[name] = {} - if name in target_names: - submodule.register_forward_hook(fwd_hook_fun) - self.module_fwd_hook_context_by_module[submodule] = ModuleHookContext(name) - if not self.forward_only: - submodule.register_full_backward_hook(bwd_hook_fun) - self.module_bwd_hook_context_by_module[submodule] = ModuleHookContext(name) - print_rank_0(f"> {name} is monitored successfully") - hooked_count += 1 + if self.backward_only and self.forward_only: + print_warn_log('not enable backward_only and forward_only simultaneously') + + hooked_count = 0 + if self.xy_distribution or self.print_struct: + for module_name, submodule in module.named_modules(): + name = vpp_stage + module_name + self.module_struct[name] = {} + if name in target_names or module_name in target_names: + if not self.backward_only: + submodule.register_forward_hook(fwd_hook_fun) + self.module_fwd_hook_context_by_module[submodule] = ModuleHookContext(name) + if not self.forward_only: + submodule.register_full_backward_hook(bwd_hook_fun) + self.module_bwd_hook_context_by_module[submodule] = ModuleHookContext(name) + print_rank_0(f"> {name} is monitored successfully") + hooked_count += 1 return hooked_count + + def _hook_weights(self): + context = self.grad_context + + @torch.no_grad + def param_hook(*args, context_dict, param, key, name): + param.micro_step += 1 + self.param_name_call_id[name] = self.call_id + self.call_id += 1 + if param.micro_step == self.micro_batch_number: + param.micro_step = 0 + if self.params_have_main_grad: + context_dict[key] = param.main_grad.clone() + else: + context_dict[key] = param.grad.clone() + + for param, name in self.param2name.items(): + key = get_summary_writer_tag_name(name, 'acc_grad', self.rank) + setattr(param, 'micro_step', 0) + param_tmp = param.expand_as(param) + grad_acc = param_tmp.grad_fn.next_functions[0][0] + grad_acc.register_hook(partial(param_hook, context_dict=context.acc, param=param, key=key, name=name)) + + self.weight_hooked = True diff --git a/debug/accuracy_tools/kj600/kj600/module_metric.py b/debug/accuracy_tools/kj600/kj600/module_metric.py index e09536b072..b85e82c482 100644 --- a/debug/accuracy_tools/kj600/kj600/module_metric.py +++ b/debug/accuracy_tools/kj600/kj600/module_metric.py @@ -1,15 +1,26 @@ import math +import re import statistics -from kj600.features import square_sum, get_max, get_min, get_zeros, get_nans, get_norm +from kj600.features import square_sum, get_max, get_min, get_zeros, get_nans, get_norm, get_mean def get_summary_writer_tag_name(module_or_param_name:str, tag:str, rank): if rank is None: return f"{module_or_param_name}/{tag}" else: - return f"{module_or_param_name}/{rank}/{tag}" - + return f"{module_or_param_name}/rank{rank}/{tag}" + +def squash_param_name(param_name): + name = '' + for pattern in ['(?<=layers\.)[\d]*.*', 'embeddings?\.(.*)', 'final.*', 'output.*','norm.*']: + match = re.findall(pattern, param_name) + if match: + name += match[0] + break + if name == '': + name = param_name + return name # 用于存储所有metric实现类的注册表 config_metric_registry = {} @@ -28,7 +39,7 @@ class TensorMetrics: self.metrics = {} #tensor_tag --> [] self.cur_idx = {} - fun_map = {"norm": get_norm, "max": get_max, "min": get_min} + fun_map = {"norm": get_norm, "max": get_max, "min": get_min, "mean": get_mean} #get stats and insert into metrics dictionary def stat_insert(self, tensor, stat_ops, module_name, tensor_name, rank, eps=1e-8): prefix = get_summary_writer_tag_name(module_name, tensor_name, rank) @@ -75,6 +86,19 @@ class MinMetric(Metric): summary_writer.add_scalar(f'{key}_min', min_value, step) +@register_config_metric("mean") +class MeanMetric(Metric): + @staticmethod + def get_metric_value(tensor, eps): + return get_mean(tensor) + + @staticmethod + def metric_tensorboard(metric_name, summary_writer, metric_value, step): + for key in metric_value[0][metric_name].keys(): + mean_value = sum([item[metric_name][key].item() for item in metric_value]) / len(metric_value) + summary_writer.add_scalar(f'{key}_mean', mean_value, step) + + @register_config_metric("max") class MaxMetric(Metric): @staticmethod @@ -134,12 +158,12 @@ class IdentMetric(Metric): return tensor @staticmethod - def metric_tensorboard(metric_name, summary_writer, metric_value, step): #metric_value is a dict, key is parameter name and value is a list of scalar tensor + def metric_tensorboard(metric_name, summary_writer, metric_value, context): #metric_value is a dict, key is parameter name and value is a list of scalar tensor if len(metric_value) == 1: for key, value in metric_value[0][metric_name].items(): if not value: continue - summary_writer.add_scalar(f'{key}_identical', value.item(), step) + summary_writer.add_scalar(f'{key}_identical', value.item(), context) def get_metrics(metric_name, tag2tensor, eps): @@ -150,9 +174,32 @@ def get_metrics(metric_name, tag2tensor, eps): raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e -def write_metrics_tensorboard(metric_name, summary_writer, metric_value, step): - try: - fun_metric = config_metric_registry[metric_name] - return fun_metric.metric_tensorboard(metric_name, summary_writer, metric_value, step) - except KeyError as e: - raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e +def write_metrics_tensorboard(ops, summary_writer, metric_value, step, prefix=''): + for metric_name in ops: + try: + fun_metric = config_metric_registry[metric_name] + fun_metric.metric_tensorboard(metric_name, summary_writer, metric_value, step) + except KeyError as e: + raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e + +def write_metrics_csv(ops, summary_writer, metric_value, step, prefix=''): + for metric_name in ops: + try: + fun_metric = config_metric_registry[metric_name] + fun_metric.metric_tensorboard(metric_name, summary_writer, metric_value, step) + + except KeyError as e: + raise ValueError(f"Not supported this metric, expected metric: {config_metric_registry.keys()}, actual metric: {metric_name}") from e + + if not summary_writer.header: + if prefix in ['actv', 'grad_actv']: + summary_writer.header = ['param_name'] + ['input_'+op for op in ops] + ['output_'+op for op in ops] + else: + summary_writer.header = ['param_name'] + ops + + for key in metric_value[0][ops[0]].keys(): + if 'vpp' in key: + summary_writer.header.insert(0, 'vpp_stage') + break + summary_writer.write_csv(prefix, step) + summary_writer.header = [] diff --git a/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py b/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py index 395aa82f17..66ea280590 100644 --- a/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py +++ b/debug/accuracy_tools/kj600/kj600/module_spec_verifier.py @@ -2,15 +2,8 @@ import json import re import abc import torch -from kj600.utils import check_file_valid_readable -def get_config(file_path='config.json'): - check_file_valid_readable(file_path) - with open(file_path, 'r') as file: - config = json.load(file) - return config - # 用于存储所有validator实现类的注册表 config_validator_registry = {} diff --git a/debug/accuracy_tools/kj600/kj600/optimizer_collect.py b/debug/accuracy_tools/kj600/kj600/optimizer_collect.py index 285f17ca6d..61ae9de64a 100644 --- a/debug/accuracy_tools/kj600/kj600/optimizer_collect.py +++ b/debug/accuracy_tools/kj600/kj600/optimizer_collect.py @@ -1,10 +1,12 @@ -from collections import defaultdict +from abc import ABC, abstractmethod +from collections import defaultdict, namedtuple import torch import torch.distributed as dist -from kj600.visualizer import HeatmapVisualizer +from kj600.utils import print_warn_log -def print_rank_0(message, debug=False, force=False): + +def print_rank_0(message): if dist.is_initialized(): if dist.get_rank() == 0: print(message) @@ -12,20 +14,29 @@ def print_rank_0(message, debug=False, force=False): print(message) -class MixPrecsionOptimizerMon: +MVResult = namedtuple('MVResult', ("exp_avg", "exp_avg_sq", "update", "ratio")) + + +class OptimizerMon(ABC): wrapped_optimizer = None + @classmethod + def set_wrapped_optimizer(cls, wrapped_optimizer): + cls.wrapped_optimizer = wrapped_optimizer + + @abstractmethod + def fetch_mv(self, monitor, torch_opt, params2name): + pass + + +class MixPrecisionOptimizerMon(OptimizerMon): def __init__(self) -> None: self.fp16_to_fp32_param = {} - @staticmethod - def set_wrapped_optimizer(_wrapped_optimizer): - MixPrecsionOptimizerMon.wrapped_optimizer = _wrapped_optimizer - # parameter tensors we want to monitor and their names are in params2name_dict # base_optimizer is pytorch optimizer, wrapped_optimizer is a normal object with base_optimizer def fetch_mv(self, monitor, torch_opt, params2name): - mix_prec_opt = MixPrecsionOptimizerMon.wrapped_optimizer + mix_prec_opt = self.wrapped_optimizer if not self.fp16_to_fp32_param and mix_prec_opt is not None: for fp16_group, fp32_group in zip(mix_prec_opt.float16_groups, mix_prec_opt.fp32_from_float16_groups): @@ -44,8 +55,12 @@ class MixPrecsionOptimizerMon: param = self.fp16_to_fp32_param[param] if param in torch_opt.state: - exp_avg = torch_opt.state[param]["exp_avg"] - exp_avg_sq = torch_opt.state[param]["exp_avg_sq"] + state_param = torch_opt.state.get(param, None) + exp_avg = state_param.get("exp_avg", None) + exp_avg_sq = state_param.get("exp_avg_sq", None) + if exp_avg is None or exp_avg_sq is None: + print_warn_log(f"exp_avg or exp_avg_sq of {name} is None, maybe something wrong happened.") + continue if monitor.mv_distribution: exp_avg_dict[name] = exp_avg exp_avg_sq_dict[name] = exp_avg_sq @@ -53,15 +68,15 @@ class MixPrecsionOptimizerMon: exp_avg_dict[name] = exp_avg if monitor.ur_distribution: update_dict[name] = exp_avg / (torch.sqrt(exp_avg_sq) + torch_opt.defaults['eps']) - ratio_dict[name] = exp_avg / torch.sqrt(exp_avg_sq) + ratio_dict[name] = (exp_avg / torch.sqrt(exp_avg_sq)).nan_to_num(0) monitor.update_heatmap_visualizer[name].pre_cal(update_dict[name]) monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) - return exp_avg_dict, exp_avg_sq_dict, update_dict, ratio_dict + return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) -class MegatronDistributedOptimizerMon(MixPrecsionOptimizerMon): +class MegatronDistributedOptimizerMon(MixPrecisionOptimizerMon): def fetch_mv(self, monitor, torch_opt, params2name): - mix_prec_opt = MixPrecsionOptimizerMon.wrapped_optimizer + mix_prec_opt = self.wrapped_optimizer if not (hasattr(mix_prec_opt, "model_float16_groups") and hasattr(mix_prec_opt, "shard_fp32_from_float16_groups")): raise Exception("megatron distributed optimizer should have model_float16_groups and shard_fp32_from_float16_groups, \ if not, please check megatron-lm version") @@ -73,18 +88,48 @@ class MegatronDistributedOptimizerMon(MixPrecsionOptimizerMon): return self._fetch_mv_in_adam(params2name, torch_opt, monitor) -class DummyOptimizerMon(MixPrecsionOptimizerMon): +class MegatronFP32OptimizerMon(OptimizerMon): + def fetch_mv(self, monitor, torch_opt, params2name): + exp_avg_dict = defaultdict(float) + exp_avg_sq_dict = defaultdict(float) + update_dict = defaultdict() + ratio_dict = defaultdict() + + for param, name in params2name.items(): + if param in torch_opt.state: + state_param = torch_opt.state.get(param, None) + exp_avg = state_param.get("exp_avg", None) + exp_avg_sq = state_param.get("exp_avg_sq", None) + if exp_avg is None or exp_avg_sq is None: + print_warn_log(f"exp_avg or exp_avg_sq of {name} is None, maybe something wrong happened.") + continue + if monitor.mv_distribution: + exp_avg_dict[name] = exp_avg + exp_avg_sq_dict[name] = exp_avg_sq + if monitor.mg_direction: + exp_avg_dict[name] = exp_avg + if monitor.ur_distribution: + update_dict[name] = exp_avg / (torch.sqrt(exp_avg_sq) + torch_opt.defaults['eps']) + ratio_dict[name] = (exp_avg / torch.sqrt(exp_avg_sq)).nan_to_num(0) + monitor.update_heatmap_visualizer[name].pre_cal(update_dict[name]) + monitor.ratio_heatmap_visualizer[name].pre_cal(ratio_dict[name]) + return MVResult(exp_avg=exp_avg_dict, exp_avg_sq=exp_avg_sq_dict, update=update_dict, ratio=ratio_dict) + + +class DummyOptimizerMon(OptimizerMon): def fetch_mv(self, monitor, torch_opt, params2name): - return None, None, None, None + return MVResult(exp_avg=None, exp_avg_sq=None, update=None, ratio=None) class OptimizerMonFactory: @staticmethod - def create_optimizer_mon(opt_ty:str): + def create_optimizer_mon(opt_ty: str): if opt_ty == "Megatron_Float16OptimizerWithFloat16Params": - return MixPrecsionOptimizerMon() + return MixPrecisionOptimizerMon() if opt_ty == "Megatron_DistributedOptimizer": return MegatronDistributedOptimizerMon() + if opt_ty == "Megatron_FP32Optimizer": + return MegatronFP32OptimizerMon() if opt_ty is None or opt_ty == "unknown": return DummyOptimizerMon() raise Exception("opt_ty should be Megatron_Float16OptimizerWithFloat16Params or Megatron_DistributedOptimizer or None or unknown") diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py new file mode 100644 index 0000000000..ddea3244f5 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py @@ -0,0 +1,145 @@ +import sys +import os +import re +import argparse +import pandas as pd +from glob import glob +from collections import defaultdict + + +def parse_logfile(logfile): + grad_norm = [] + step = [] + with open(logfile) as f: + for line in f.readlines(): + if 'consumed samples' in line: + grad_norm.append(float(re.findall('(?<=grad norm\: )[\d\.]*', line)[0])) + # step = int(re.findall('(?<=iteration)[ \d]*', line)[0]) + return grad_norm + + +def parse_monitor_output(output_dir): + reduced = {} + unreduced = {} + for dir in glob(output_dir+'*'): + rank = int(re.findall('(?<=rank)[\d]*', dir)[0]) + unreduced[rank] = [] + reduced[rank] = [] + for file in os.listdir(dir): + # step = int(re.search("(?<=reduced\_)[\d]*", file)[0]) + # if step != 0: + # continue + df = pd.read_csv(os.path.join(dir, file)) + if '_unreduced_' in file: + unreduced[rank].append(df) + pass + elif '_reduced_' in file: + reduced[rank].append(df) + else: + print(f'unexpected file {file} in {dir}') + return reduced, unreduced + +def valid_reduce(reduced, unreduced, tp_size, dp_size, sequence_parallel): + steps = len(reduced[0]) + world_size = len(reduced) + errors = [] + for index, row in unreduced[0][0].iterrows(): + param = row['param_name'] + is_tp_duplicate = False + for step in range(2): + # sum reduced + reduced_mean = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + continue + df = reduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + if step == 0: + is_tp_duplicate = True + continue + reduced_mean += value[0] + + # sum unreduced + unreduced_mean = 0. + for rank in range(world_size): + df = unreduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + continue + unreduced_mean += list(df[df['param_name'] == param]['mean'])[0] + + unreduced_mean /= dp_size + if is_tp_duplicate and (not sequence_parallel or 'embedding' in param): + unreduced_mean /= tp_size + try: + assert_equal(unreduced_mean, reduced_mean) + except AssertionError as e: + errors.append([param, step, e, is_tp_duplicate]) + if errors: + print(errors) + else: + print(f'grad mean is in consist between unreduced grad and reduced grad monitord.') + + + +def assert_equal(a, b): + if b == 0 or a == 0: + return + if b == 0: + rel_diff = a + elif a == 0: + rel_diff = b + else: + rel_diff = abs(a/b-1) + assert rel_diff<0.01, f'{a}, {b}, {rel_diff}' + + +def valid_total_norm(total_norm, reduced, duplicate_embedding): + steps = len(total_norm) + world_size = len(reduced) + errors = [] + for step in range(steps): + calculated_norm = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + if step == 0: + print(f'rank {rank} is duplicated in dp group') + continue + for index, row in reduced[rank][step].iterrows(): + if duplicate_embedding and 'word_embedding' in row['param_name']: + continue + calculated_norm += row['norm']**2 + try: + assert_equal(calculated_norm**0.5, total_norm[step]) + except AssertionError as e: + errors.append([step, e]) + if errors: + print('total norm errors: ', errors) + else: + print('grad norm in consist between training log and reduced gradients monitored') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--monitor_output', '-m', type=str, required=True, help='path prefix to the output of monitor e.g. kj600_output/Aug12_07-16') + parser.add_argument('--logfile', '-l', type=str, required=True, help='path to the training log file') + parser.add_argument('--tp_size', '-t', type=int, required=True, help='tp parallel size') + parser.add_argument('--dp_size', '-d', type=int, required=True, help='dp parallel size') + parser.add_argument('--pp_size', '-p', type=int, required=True, help='pp parallel size') + parser.add_argument('--untie_embeddings_and_output_weights', '-u', action="store_true", default=False, help='whether untie_embeddings_and_output_weights in pp parallel') + parser.add_argument('--sequence_parallel', '-s', action="store_true", default=False, help='whether sequence parallel is enabled. Add -s to store true') + + args = parser.parse_args() + + assert args.tp_size > 0, 'if tp not enabled, set tp_size = 1' + assert args.dp_size > 0, 'if tp not enabled, set dp_size = 1' + assert args.pp_size > 0, 'if tp not enabled, set pp_size = 1' + + total_norm = parse_logfile(args.logfile) + reduced, unreduced = parse_monitor_output(args.monitor_output) + + duplicate_embedding = not args.untie_embeddings_and_output_weights and args.pp_size > 1 + + valid_total_norm(total_norm, reduced, duplicate_embedding) + valid_reduce(reduced, unreduced, args.tp_size, args.dp_size, args.sequence_parallel) \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/utils.py b/debug/accuracy_tools/kj600/kj600/utils.py index 53d47d9988..3aed6911c4 100644 --- a/debug/accuracy_tools/kj600/kj600/utils.py +++ b/debug/accuracy_tools/kj600/kj600/utils.py @@ -107,4 +107,29 @@ def check_file_valid_readable(path): def check_file_valid_writable(path): check_file_valid(path) check_path_writability(path) - \ No newline at end of file + + +def make_file_safety(file_path: str, permission=0o640): + if os.path.islink(file_path): + raise RuntimeError(f"Invalid soft link path: {file_path}") + file_real_path = os.path.realpath(file_path) + if os.path.exists(file_real_path): + return + parent_path = os.path.dirname(file_real_path) + if not os.path.exists(parent_path): + os.makedirs(parent_path, mode=0o750, exist_ok=True) + if not os.access(parent_path, os.W_OK): + raise PermissionError(f"The path {parent_path} is not writable!") + try: + os.close(os.open(file_real_path, os.O_WRONLY | os.O_CREAT, permission)) + except OSError as e: + raise RuntimeError("Can't create file: " + file_real_path) from e + os.chmod(file_real_path, permission) + + +def create_directory(dir_path): + dir_path = os.path.realpath(dir_path) + try: + os.makedirs(dir_path, mode=0o750, exist_ok=True) + except OSError as ex: + raise RuntimeError("Failed to create directory. Please check the path permission or disk space.") from ex \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/pyproject.toml b/debug/accuracy_tools/kj600/pyproject.toml index 5df9685633..dd5faebc38 100644 --- a/debug/accuracy_tools/kj600/pyproject.toml +++ b/debug/accuracy_tools/kj600/pyproject.toml @@ -7,7 +7,6 @@ name = "kj600" version = "0.0.1" dependencies = [ "torch", - "torch_npu", "torchvision", "tensorboard", "matplotlib", @@ -16,4 +15,7 @@ dependencies = [ ] [tool.setuptools.packages] -find = {} # Scan the project directory with the default parameters \ No newline at end of file +find = {} # Scan the project directory with the default parameters + +[tool.setuptools.package-data] +kj600 = ["distributed/*.yaml"] \ No newline at end of file -- Gitee From 823d12ea7524208de02eb6f35ab91b8e402bf0db Mon Sep 17 00:00:00 2001 From: qianggee Date: Wed, 21 Aug 2024 08:31:19 +0000 Subject: [PATCH 043/141] merge from poc --- .../kj600/kj600/anomaly_analyse.py | 248 ++++++++++++++ .../kj600/kj600/anomaly_detect.py | 169 ++++++++- .../accuracy_tools/kj600/kj600/file_check.py | 324 ++++++++++++++++++ 3 files changed, 729 insertions(+), 12 deletions(-) create mode 100644 debug/accuracy_tools/kj600/kj600/anomaly_analyse.py create mode 100644 debug/accuracy_tools/kj600/kj600/file_check.py diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py new file mode 100644 index 0000000000..f6069db6fb --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import argparse +import ast +import fcntl +import heapq +import json +import os +from pathlib import Path +import sys + +from kj600.utils import print_info_log, print_warn_log +from kj600.anomaly_detect import GradAnomalyData +from kj600.file_check import ( + change_mode, + check_link, + FileCheckConst, + check_path_before_create, + FileChecker, + FileOpen, +) + +ANOMALY_JSON = "anomaly.json" +ANALYSE_JSON = "anomaly_analyse.json" + +class AnomalyDataWriter: + """ + 异常数据写入类,负责将异常数据写入到JSON文件中。 + """ + + def __init__(self, dump_path, rank) -> None: + self.dump_path = dump_path + self.dump_rank_dir = os.path.join(self.dump_path, f"rank{rank}") + self.json_path = os.path.join(self.dump_rank_dir, ANOMALY_JSON) + + @staticmethod + def get_anomaly_dict(anomalies): + """将GradAnomalyData列表转换为json""" + anomalies_json = {} + for anomaly in anomalies: + anomalies_json.update({anomaly.get_key(): anomaly.to_dict()}) + return anomalies_json + + @staticmethod + def update_data_in_single_json(json_path, anomalies_data): + with FileOpen(json_path, "w+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + json.dump(anomalies_data, f, indent=1) + fcntl.flock(f, fcntl.LOCK_UN) + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def init_detected_json(self): + """初始化落盘文件""" + check_path_before_create(self.dump_path) + if not os.path.exists(self.dump_path): + Path(self.dump_path).mkdir( + mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True + ) + file_check = FileChecker(self.dump_path, FileCheckConst.DIR) + file_check.common_check() + + if not os.path.exists(self.dump_rank_dir): + Path(self.dump_rank_dir).mkdir( + FileCheckConst.DATA_DIR_AUTHORITY, parents=True, exist_ok=True + ) + + if os.path.exists(self.json_path): + file_check = FileChecker( + self.json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {self.json_path}.") + os.remove(self.json_path) + Path(self.json_path).touch() + change_mode(self.json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def write_detected_json(self, anomalies): + """ + 落盘异常数据 + Args: + anomalies: GradAnomalyData对象列表 + """ + anomalies_json = self.get_anomaly_dict(anomalies) + print_info_log(f"{ANOMALY_JSON} is at {self.dump_rank_dir}.") + if Path(self.json_path).exists() and os.path.getsize(self.json_path) > 0: + with FileOpen(self.json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_to_write = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + else: + data_to_write = {} + data_to_write.update(anomalies_json) + self.update_data_in_single_json(self.json_path, data_to_write) + + +class AnomalyDataLoader: + def __init__(self, data_path) -> None: + self.data_path = data_path + + @staticmethod + def create_instances_from_dict(anomalies_dict: dict): + instances = [] + for values in anomalies_dict.values(): + try: + instances.append(GradAnomalyData(**values)) + except KeyError as e: + print_warn_log(f"Missing key in anomaly data: {e}") + except ValueError as e: + print_warn_log( + f"Value error when creating a GradAnomalyData instance: {e}" + ) + return instances + + def get_anomalies_from_jsons(self): + """遍历文件夹,从rankK/anomaly.json中读取异常数据 + return: anomalies: GradAnomalyData对象列表 + """ + anomalies = [] + check_link(self.data_path) + for rank_dir in os.listdir(self.data_path): + rank_path = os.path.join(self.data_path, rank_dir) + if not os.path.isdir(rank_path): + continue + json_path = os.path.join(rank_path, ANOMALY_JSON) + if not os.path.exists(json_path): + continue + with FileOpen(json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_anomalies = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + instances = self.create_instances_from_dict(data_anomalies) + anomalies.extend(instances) + return anomalies + + +class AnomalyAnalyse: + def __init__(self) -> None: + self.sorted_anomalies = [] + + def get_range_top_K(self, topk, step_list, anomalies): + """ + 获取前topk个step_list范围内的异常。 + """ + if not step_list: + filtered_anomalies = anomalies + else: + filtered_anomalies = [ + anomaly for anomaly in anomalies if anomaly.step in step_list + ] + if topk >= len(filtered_anomalies): + self.sorted_anomalies = sorted(filtered_anomalies) + else: + self.sorted_anomalies = list(heapq.nsmallest(topk, filtered_anomalies)) + return self.sorted_anomalies + + def rewrite_sorted_anomalies(self, output_path): + """ + 将排序后的异常数据重新落盘 + """ + file_check = FileChecker( + output_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + + sorted_data = AnomalyDataWriter.get_anomaly_dict(self.sorted_anomalies) + print_info_log(f"{ANALYSE_JSON} is at {output_path}.") + json_path = os.path.join(output_path, ANALYSE_JSON) + if os.path.exists(json_path): + file_check = FileChecker( + json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {json_path}.") + os.remove(json_path) + Path(json_path).touch() + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + AnomalyDataWriter.update_data_in_single_json(json_path, sorted_data) + + +def _get_parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--data_path", dest="data_path_dir", default="./", type=str, + help=" The anomaly detect result dictionary: generate from kj600 tool.", + required=True, + ) + parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, + help=" The analyse task result out path.", + required=False, + ) + parser.add_argument("-k", "--topk", dest="top_k_number", default=8, type=int, + help=" Top K number of earliest anomalies.", + required=False, + ) + parser.add_argument("-s", "--step", dest="step_list", default=[], type=str, + help=" Analyse which steps.", + required=False, + ) + return parser.parse_args(sys.argv[1:]) + +def _get_step_and_stop(args): + try: + step_list = ast.literal_eval(args.step_list) + if not isinstance(step_list, list): + raise ValueError(f"{args.step_list} is not a list") + except (ValueError, SyntaxError, RecursionError) as e: + raise Exception( + f"The step list must be a resolvable list type" + ) from e + if args.top_k_number <= 0: + raise Exception("The top k number must be greater than 0.") + return step_list, args.top_k_number + +def _anomaly_analyse(): + args = _get_parse_args() + step_list, top_k_number = _get_step_and_stop(args) + loader = AnomalyDataLoader(args.data_path_dir) + anomalies = loader.get_anomalies_from_jsons() + analyser = AnomalyAnalyse() + top_anomalies = analyser.get_range_top_K( + top_k_number, step_list, anomalies + ) + analyser.rewrite_sorted_anomalies( + args.out_path if args.out_path else args.data_path_dir + ) + + print_info_log(f"Top {top_k_number} anomalies are listed as follows:") + for index, anomaly in enumerate(top_anomalies): + print_info_log(f"{index}: {anomaly.message}") + + +if __name__ == "__main__": + _anomaly_analyse() + print_info_log("Analyse task completed.") diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_detect.py b/debug/accuracy_tools/kj600/kj600/anomaly_detect.py index cbd7b6daa2..46b6f32f18 100644 --- a/debug/accuracy_tools/kj600/kj600/anomaly_detect.py +++ b/debug/accuracy_tools/kj600/kj600/anomaly_detect.py @@ -1,10 +1,16 @@ +import os +import sys import statistics as st from abc import ABC from typing import List -import sys -from torch.utils.tensorboard import SummaryWriter from collections import defaultdict -from kj600.utils import print_info_log +from dataclasses import dataclass, field +import pandas as pd +from torch.utils.tensorboard import SummaryWriter +from kj600.utils import print_info_log, check_file_valid_writable, make_file_safety, create_directory +from kj600.const import Const +from kj600.file_check import change_mode, FileCheckConst + class ScanRule(ABC): def apply(self, history, cur): @@ -59,15 +65,101 @@ class bcolors: BOLD = '\033[1m' UNDERLINE = '\033[4m' -class SummaryWriterWithAD(SummaryWriter): - def __init__(self, path, ad_rules, job_id, anomaly_inform=False): - super().__init__(path) +class AnomalyDataFactory(ABC): + def __init__(self, rank, pp_stage, group_mates): + super().__init__() + self.rank = rank + self.pp_stage = pp_stage + self.group_mates = group_mates + self.micro_step = 0 + self.vpp_stage = 0 + self.name2callid = {} + + def set_call_id(self, name2callid): + """根据当前GradContext信息更新call_id vpp_stage等信息 + """ + self.name2callid = name2callid + + def create(self, tag_name, message, step): + """如果检查出异常, 调用当前接口生成GradAnomalyData实例 + """ + param_name = tag_name.split('/')[0] + call_id = self.name2callid.get(param_name,-1) + if Const.vpp in param_name: + vpp_stage = int(param_name.lstrip(Const.vpp).split(Const.vpp_sep)[0]) + else: + vpp_stage = 0 + + return GradAnomalyData( + self.rank, + step, + self.micro_step, + self.pp_stage, + self.vpp_stage, + call_id, + tag_name, + message, + self.group_mates + ) + +@dataclass(eq=True) +class GradAnomalyData: + rank: int = 0 + step: int = 0 + micro_step: int = 0 + pp_stage: int = 0 + vpp_stage: int = 0 + call_id: int = 0 + tag_name: str = field(default=None, compare=False) + message: str = field(default="", compare=False) + group_mates: list = field(default=None, compare=False) + + def __lt__(self, other): + if not isinstance(other, GradAnomalyData): + return NotImplemented + if self.step != other.step: + return self.step < other.step + if self.micro_step != other.micro_step: + return self.micro_step < other.micro_step + if self.pp_stage != other.pp_stage: + return self.pp_stage > other.pp_stage + if self.vpp_stage != other.vpp_stage: + return self.vpp_stage > other.vpp_stage + if self.call_id != other.call_id: + return self.call_id < other.call_id + return False + + def __le__(self, other): + if not isinstance(other, GradAnomalyData): + return NotImplemented + return self == other or self < other + + def to_dict(self): + return self.__dict__ + + def get_key(self): + return ''.join( + (str(self.tag_name), "_step_", str(self.step), "_call_" , str(self.call_id))) + +class BaseWriterWithAD: + def __init__(self, path, ad_rules, job_id, anomaly_inform=False, anomaly_factory=None, ndigits=6): self.tag2scalars = defaultdict(list) self.ad_rules = ad_rules self.job_id = job_id self.anomaly_inform = anomaly_inform - - def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False, double_precision=False): + self.anomaly_factory = anomaly_factory + self.anomalies = [] + self.ndigits = ndigits + + def get_anomalies(self): + """返回已检测到的异常列表 + """ + return self.anomalies + + def clear_anomalies(self): + self.anomalies.clear() + + def add_scalar(self, tag, scalar_value, global_step=None): new_avg = avg = scalar_value if tag in self.tag2scalars: N = len(self.tag2scalars[tag]) @@ -76,11 +168,64 @@ class SummaryWriterWithAD(SummaryWriter): self.tag2scalars[tag].append((scalar_value, new_avg)) detected, rule_name = self._ad(scalar_value, history=avg) if detected: - print_info_log(f"{bcolors.WARNING}> Rule {rule_name} reports anomaly signal in {tag} at step {global_step}.{bcolors.ENDC}") - exception_message = f"{bcolors.WARNING}> Rule {rule_name} reports anomaly signal in {tag} at step {global_step}.{bcolors.ENDC}" + exception_message = f"Rule {rule_name} reports anomaly signal in {tag} at step {global_step}." + print_info_log(f"{bcolors.WARNING}> {exception_message}{bcolors.ENDC}") if self.anomaly_inform: self.anomaly_inform.run(exception_message, self.job_id) - return super().add_scalar(tag, scalar_value, global_step, walltime, new_style, double_precision) - + + if self.anomaly_factory: + self.anomalies.append(self.anomaly_factory.create(tag, exception_message, global_step)) + def _ad(self, scalar_value, history): return AnomalyScanner.scan(self.ad_rules, history, cur=scalar_value) + + +class CSVWriterWithAD(BaseWriterWithAD): + def __init__(self, path, ad_rules, job_id, anomaly_inform=False, anomaly_factory=None, ndigits=6): + super().__init__(path, ad_rules, job_id, anomaly_inform, anomaly_factory, ndigits) + + self.log_dir = path + create_directory(path) + self.context_dict = defaultdict(list) + self.header = [] + + def write_csv(self, prefix, step): + if len(self.context_dict) == 0: + return + filepath = os.path.join(self.log_dir, f'{prefix}_{step}.csv') + if not os.path.exists(filepath): + make_file_safety(filepath) + data_frame = pd.DataFrame(columns=self.header) + data_frame.to_csv(filepath, index=False) + change_mode(filepath, FileCheckConst.DATA_FILE_AUTHORITY) + + check_file_valid_writable(filepath) + new_data = [] + for name, metric_value in self.context_dict.items(): + if Const.vpp not in name: + new_data.append([name]+metric_value) + else: + new_data.append(name.lstrip(Const.vpp).split(Const.vpp_sep)+metric_value) + new_data = pd.DataFrame(new_data) + new_data.to_csv(filepath, mode='a+', header=False, index=False) + self.context_dict = defaultdict(list) + + def add_scalar(self, tag, scalar_value, global_step): + super().add_scalar(tag, scalar_value, global_step) + + name = tag.split('/')[0] + self.context_dict[name].append(round(scalar_value, self.ndigits)) + + def close(self): + pass + +class SummaryWriterWithAD(SummaryWriter, BaseWriterWithAD): + def __init__(self, path, ad_rules, job_id, anomaly_inform=False, anomaly_factory=None, ndigits=6): + super(SummaryWriter, self).__init__(path, ad_rules, job_id, anomaly_inform, anomaly_factory, ndigits) + super().__init__(path) + change_mode(path, FileCheckConst.DATA_DIR_AUTHORITY) + + def add_scalar(self, tag, scalar_value, global_step): + super(SummaryWriter, self).add_scalar(tag, scalar_value, global_step) + return super().add_scalar(tag, scalar_value, global_step) + \ No newline at end of file diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py new file mode 100644 index 0000000000..21f9e351a2 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/file_check.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import os +import re + +from kj600.utils import print_info_log + + +class CodedException(Exception): + def __init__(self, code, error_info=""): + super().__init__() + self.code = code + self.error_info = self.err_strs.get(code) + error_info + + def __str__(self): + return self.error_info + + +class FileCheckException(CodedException): + INVALID_FILE_ERROR = 0 + FILE_PERMISSION_ERROR = 1 + SOFT_LINK_ERROR = 2 + ILLEGAL_PATH_ERROR = 3 + ILLEGAL_PARAM_ERROR = 4 + FILE_TOO_LARGE_ERROR = 5 + + err_strs = { + SOFT_LINK_ERROR: "[kj600] 检测到软链接: ", + FILE_PERMISSION_ERROR: "[kj600] 文件权限错误: ", + INVALID_FILE_ERROR: "[kj600] 无效文件: ", + ILLEGAL_PATH_ERROR: "[kj600] 非法文件路径: ", + ILLEGAL_PARAM_ERROR: "[kj600] 非法打开方式: ", + FILE_TOO_LARGE_ERROR: "[kj600] 文件过大: ", + } + + +class FileCheckConst: + """ + Class for file check const + """ + + READ_ABLE = "read" + WRITE_ABLE = "write" + READ_WRITE_ABLE = "read and write" + DIRECTORY_LENGTH = 4096 + FILE_NAME_LENGTH = 255 + FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" + FILE_PATTERN = r"^[a-zA-Z0-9_./-]+$" + JSON_SUFFIX = ".json" + MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 + DIR = "dir" + FILE = "file" + DATA_DIR_AUTHORITY = 0o750 + DATA_FILE_AUTHORITY = 0o640 + FILE_SIZE_DICT = { + JSON_SUFFIX: MAX_JSON_SIZE, + } + + +class FileChecker: + """ + The class for check file. + + Attributes: + file_path: The file or dictionary path to be verified. + path_type: file or dictionary + ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability + file_type(str): The correct file type for file + """ + + def __init__( + self, file_path, path_type, ability=None, file_type=None, is_script=True + ): + self.file_path = file_path + self.path_type = self._check_path_type(path_type) + self.ability = ability + self.file_type = file_type + self.is_script = is_script + + @staticmethod + def _check_path_type(path_type): + if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: + print_info_log( + f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." + ) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + return path_type + + def common_check(self): + """ + 功能:用户校验基本文件权限:软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符 + 注意:文件后缀的合法性,非通用操作,可使用其他独立接口实现 + """ + check_path_exists(self.file_path) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + check_path_type(self.file_path, self.path_type) + self.check_path_ability() + if self.is_script: + check_path_owner_consistent(self.file_path) + check_path_pattern_vaild(self.file_path) + check_common_file_size(self.file_path) + check_file_suffix(self.file_path, self.file_type) + return self.file_path + + def check_path_ability(self): + if self.ability == FileCheckConst.WRITE_ABLE: + check_path_writability(self.file_path) + if self.ability == FileCheckConst.READ_ABLE: + check_path_readability(self.file_path) + if self.ability == FileCheckConst.READ_WRITE_ABLE: + check_path_readability(self.file_path) + check_path_writability(self.file_path) + + +class FileOpen: + """ + The class for open file by a safe way. + + Attributes: + file_path: The file or dictionary path to be opened. + mode(str): The file open mode + """ + + SUPPORT_READ_MODE = ["r", "rb"] + SUPPORT_WRITE_MODE = ["w", "wb", "a", "ab"] + SUPPORT_READ_WRITE_MODE = ["r+", "rb+", "w+", "wb+", "a+", "ab+"] + + def __init__(self, file_path, mode, encoding="utf-8"): + self.file_path = file_path + self.mode = mode + self.encoding = encoding + self._handle = None + + def __enter__(self): + self.check_file_path() + binary_mode = "b" + if binary_mode not in self.mode: + self._handle = open(self.file_path, self.mode, encoding=self.encoding) + else: + self._handle = open(self.file_path, self.mode) + return self._handle + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._handle: + self._handle.close() + + def check_file_path(self): + support_mode = ( + self.SUPPORT_READ_MODE + + self.SUPPORT_WRITE_MODE + + self.SUPPORT_READ_WRITE_MODE + ) + if self.mode not in support_mode: + print_info_log("File open not support %s mode" % self.mode) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + self.check_ability_and_owner() + check_path_pattern_vaild(self.file_path) + if os.path.exists(self.file_path): + check_common_file_size(self.file_path) + + def check_ability_and_owner(self): + if self.mode in self.SUPPORT_READ_MODE: + check_path_exists(self.file_path) + check_path_readability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_WRITE_MODE and os.path.exists(self.file_path): + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_READ_WRITE_MODE and os.path.exists(self.file_path): + check_path_readability(self.file_path) + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + + +def check_link(path): + abs_path = os.path.abspath(path) + if os.path.islink(abs_path): + print_info_log("The file path {} is a soft link.".format(path)) + raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) + + +def check_path_length(path, name_length=None): + file_max_name_length = ( + name_length if name_length else FileCheckConst.FILE_NAME_LENGTH + ) + if ( + len(path) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(path)) > file_max_name_length + ): + print_info_log("The file path length exceeds limit.") + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_exists(path): + if not os.path.exists(path): + print_info_log("The file path %s does not exist." % path) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_readability(path): + if not os.access(path, os.R_OK): + print_info_log("The file path %s is not readable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_writability(path): + if not os.access(path, os.W_OK): + print_info_log("The file path %s is not writable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_executable(path): + if not os.access(path, os.X_OK): + print_info_log("The file path %s is not executable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_other_user_writable(path): + st = os.stat(path) + if st.st_mode & 0o002: + print_info_log( + "The file path %s may be insecure because other users have write permissions. " + % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_owner_consistent(path): + file_owner = os.stat(path).st_uid + if file_owner != os.getuid(): + print_info_log( + "The file path %s may be insecure because is does not belong to you." % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_pattern_vaild(path): + if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): + print_info_log("The file path %s contains special characters." % (path)) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_file_size(file_path, max_size): + file_size = os.path.getsize(file_path) + if file_size >= max_size: + print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") + raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) + + +def check_common_file_size(file_path): + if os.path.isfile(file_path): + for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): + if file_path.endswith(suffix): + check_file_size(file_path, max_size) + break + + +def check_file_suffix(file_path, file_suffix): + if file_suffix: + if not file_path.endswith(file_suffix): + print_info_log(f"The {file_path} should be a {file_suffix} file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_type(file_path, file_type): + if file_type == FileCheckConst.FILE: + if not os.path.isfile(file_path): + print_info_log(f"The {file_path} should be a file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + if file_type == FileCheckConst.DIR: + if not os.path.isdir(file_path): + print_info_log(f"The {file_path} should be a dictionary!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_before_create(path): + if path_len_exceeds_limit(path): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, "The file path length exceeds limit." + ) + + if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, + "The file path {} contains special characters.".format(path), + ) + + +def change_mode(path, mode): + if not os.path.exists(path) or os.path.islink(path): + return + try: + os.chmod(path, mode) + except PermissionError as ex: + raise FileCheckException( + FileCheckException.FILE_PERMISSION_ERROR, + "Failed to change {} authority. {}".format(path, str(ex)), + ) from ex + + +def path_len_exceeds_limit(file_path): + return ( + len(os.path.realpath(file_path)) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(file_path)) > FileCheckConst.FILE_NAME_LENGTH + ) -- Gitee From b040901cd019d2d8a52a77a12ee204bc8509b4dd Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Mon, 19 Aug 2024 19:48:59 +0800 Subject: [PATCH 044/141] add_kernels_and_communications --- .../module_visualization/graph/prof_node.py | 46 +++++++-- .../graph_build/prof_graph_builder.py | 71 ++++++++++--- .../prof_parse/prof_data_pre_process.py | 99 ++++++++++++------- profiler/prof_common/base_node.py | 4 + profiler/prof_common/constant.py | 6 ++ profiler/prof_common/file_reader.py | 27 +++++ profiler/prof_common/kernel_bean.py | 43 ++++++++ profiler/prof_common/trace_event_bean.py | 24 ++++- profiler/prof_common/tree_builder.py | 6 +- 9 files changed, 268 insertions(+), 58 deletions(-) create mode 100644 profiler/prof_common/kernel_bean.py diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index 7d96a49691..3588a8b81b 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -18,39 +18,49 @@ from profiler.prof_common.trace_event_bean import TraceEventBean class ProfNode(BaseNode): - MODULE_TYPE = 1 def __init__(self, event: TraceEventBean, parent_node=None): super().__init__(event, parent_node) self._kernel_total_list = [] + self._communication_total_list = [] self._precision_index = 1 @property def node_id(self): return self._event.unique_id + @property + def node_type(self): + if self._event.event_type is None: + return Constant.VIRTUAL_TYPE + return self._event.event_type + @property def total_kernels(self): return self._kernel_total_list @property def host_total_dur(self): - if self.is_root_node: + if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: return sum((node.host_total_dur for node in self.child_nodes)) return self._event.dur @property def host_self_dur(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return 0 return self.host_total_dur - sum((node.host_total_dur for node in self.child_nodes)) @property def device_total_dur(self): - if self.is_root_node: + if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: return sum((node.device_total_dur for node in self.child_nodes)) return sum((kernel.dur for kernel in self._kernel_total_list)) @property def device_self_dur(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return 0 return self.device_total_dur - sum((node.device_total_dur for node in self.child_nodes)) @property @@ -64,6 +74,22 @@ class ProfNode(BaseNode): data["Input type"] = input_type return data + @property + def kernel_data(self) -> list: + if self.node_type == Constant.VIRTUAL_TYPE: + return [kernel for node in self.child_nodes for kernel in node.kernel_data] + return [kernel.kernel_info for kernel in self.total_kernels] + + @property + def communication_data(self) -> list: + if self.node_type == Constant.VIRTUAL_TYPE: + return [comm for node in self.child_nodes for comm in node.communication_data] + return [[comm.name, comm.dur] for comm in self._communication_total_list] + + @property + def overall_data(self): + return {"Computing Time(ms)": 1, "Uncovered Communication Time(ms)": 1, "Free Time(ms)": 1} + @property def data(self): return {"Input Data": self.input_data, @@ -71,12 +97,14 @@ class ProfNode(BaseNode): "Host Self Duration(us)": round(self.host_self_dur, 2), "Host Total Duration(us)": round(self.host_total_dur, 2), "Device Self Duration(us)": round(self.device_self_dur, 2), - "Device Total Duration(us)": round(self.device_total_dur, 2)} + "Device Total Duration(us)": round(self.device_total_dur, 2), + "kernels": self.kernel_data, + "communications": self.communication_data} @property def info(self): return {"id": self.node_id, - "node_type": self.MODULE_TYPE, + "node_type": self.node_type, "data": self.data, "upnode": self.parent_node.node_id if self.parent_node else "None", "subnodes": [node.node_id for node in iter(self.child_nodes)]} @@ -96,9 +124,15 @@ class ProfNode(BaseNode): def update_child_nodes(self, node): self._child_nodes.append(node) + def reset_child_nodes(self, nodes): + self._child_nodes = nodes + def update_kernel_total_list(self, kernel_list: list): self._kernel_total_list.extend(kernel_list) + def update_communication_total_list(self, communication_list: list): + self._communication_total_list.extend(communication_list) + def update_child_precision_index(self): if not self.child_nodes: return @@ -106,4 +140,4 @@ class ProfNode(BaseNode): min_dur = min((node.device_total_dur for node in self.child_nodes)) diff_dur = max_dur - min_dur for node in self.child_nodes: - node.precision_index = 1- (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 + node.precision_index = 1 - (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index a1bd6ba000..331e0cb050 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -30,7 +30,9 @@ class ProfGraphBuilder: min_start = min((op.start_time for op in iter(op_list))) max_end = max((op.end_time for op in iter(op_list))) # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-1 +2处理 - return TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) + event = TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) + event.event_type = Constant.MODULE_TYPE + return event @classmethod def _trans_flow_to_dict(cls, flow_events: dict, end_events: list) -> dict: @@ -48,6 +50,31 @@ class ProfGraphBuilder: result_data.setdefault(start_point.start_time, []).append(end_event) return result_data + @classmethod + def _create_virtual_node(cls, root_node: ProfNode): + virtual_nodes = [] + first_level_nodes = root_node.child_nodes + root_node.reset_child_nodes([]) + merged_nodes = [] + order_id = 1 + for node in first_level_nodes: + if node.node_type == Constant.OPERATOR_TYPE: + merged_nodes.append(node) + continue + if len(merged_nodes) >= 2: + virtual_node = ProfNode(TraceEventBean({}, f"Operators_Between_Modules_{order_id}"), root_node) + root_node.update_child_nodes(virtual_node) + order_id += 1 + for op_node in merged_nodes: + op_node.parent_node = virtual_node + virtual_node.update_child_nodes(op_node) + virtual_nodes.append(virtual_node) + elif len(merged_nodes) == 1: + root_node.update_child_nodes(merged_nodes[0]) + root_node.update_child_nodes(node) + merged_nodes = [] + return virtual_nodes + def build_graph(self): self._prof_data = ProfDataPreProcess(self._prof_data_path).run() all_data = [*self._prof_data.get(Constant.MODULE_EVENT, []), @@ -59,19 +86,17 @@ class ProfGraphBuilder: order_id = name_dict.get(event.name, 0) event.set_id(f"{event.name}_{order_id}") name_dict[event.name] = order_id + 1 - root_node = TreeBuilder.build_tree(all_data, ProfNode, TraceEventBean({}, Constant.NPU_ROOT_ID)) - kernel_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), - self._prof_data.get(Constant.KERNEL_EVENT, [])) - for start_time, kernels in kernel_flow_dict.items(): - matched_node = root_node.binary_search(start_time) - while matched_node != Constant.INVALID_RETURN: - matched_node.update_kernel_total_list(kernels) - matched_node = matched_node.binary_search(start_time) - all_data = root_node.find_all_child_nodes() - all_data.append(root_node) - for node in all_data: + all_nodes = TreeBuilder.build_tree(all_data, ProfNode, TraceEventBean({}, Constant.NPU_ROOT_ID)) + if len(all_nodes) < 2: + msg = "Failed to build graph." + raise RuntimeError(msg) + self._update_kernel_details(all_nodes[0]) + self._update_communication_details(all_nodes[0]) + virtual_nodes = self._create_virtual_node(all_nodes[0]) + all_nodes.extend(virtual_nodes) + for node in all_nodes: node.update_child_precision_index() - return all_data + return all_nodes def find_bwd_module(self) -> list: bwd_module_list = [] @@ -102,7 +127,7 @@ class ProfGraphBuilder: pre_status = Constant.FWD_OR_OPT # 通过连线匹配正向module,构建出反向的整体module关系 - root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({})) + root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({}))[0] fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, cpu_op_list) for start_time, end_events in fwdbwd_flow_dict.items(): matched_node = root_node.binary_search(start_time) @@ -115,3 +140,21 @@ class ProfGraphBuilder: bwd_module_list.append( self._create_event_bean_from_ops(module_node.bwd_op_list, f"{module_node.name} [BACKWARD]")) return bwd_module_list + + def _update_kernel_details(self, root_node): + kernel_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), + self._prof_data.get(Constant.KERNEL_EVENT, [])) + for start_time, kernels in kernel_flow_dict.items(): + matched_node = root_node.binary_search(start_time) + while matched_node != Constant.INVALID_RETURN: + matched_node.update_kernel_total_list(kernels) + matched_node = matched_node.binary_search(start_time) + + def _update_communication_details(self, root_node): + communication_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), + self._prof_data.get(Constant.HCCL_EVENT, [])) + for start_time, communications in communication_flow_dict.items(): + matched_node = root_node.binary_search(start_time) + while matched_node != Constant.INVALID_RETURN: + matched_node.update_communication_total_list(communications) + matched_node = matched_node.binary_search(start_time) diff --git a/profiler/module_visualization/prof_parse/prof_data_pre_process.py b/profiler/module_visualization/prof_parse/prof_data_pre_process.py index 9dc820e4ca..c16daaecd7 100644 --- a/profiler/module_visualization/prof_parse/prof_data_pre_process.py +++ b/profiler/module_visualization/prof_parse/prof_data_pre_process.py @@ -12,10 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging import os from profiler.prof_common.file_reader import FileReader from profiler.prof_common.constant import Constant +from profiler.prof_common.kernel_bean import KernelBean from profiler.prof_common.trace_event_bean import TraceEventBean @@ -23,13 +25,23 @@ class ProfDataPreProcess: def __init__(self, prof_data_path: str): self._prof_data_path = prof_data_path self._trace_path = "" + self._kernel_details_path = "" self._kernel_pid = None + self._hccl_pid = None self._result_data = {Constant.CPU_OP_EVENT: [], Constant.MODULE_EVENT: [], Constant.KERNEL_EVENT: [], - Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}} + Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: []} + + @staticmethod + def _check_trace_data(trace_data): + if not isinstance(trace_data, list): + msg = f"Invalid profiling data path, this feature only supports performance data " \ + f"collected by Ascend PyTorch Profiler." + raise RuntimeError(msg) def run(self) -> dict: self._check_trace_path() self._parse_trace_events() + self._parse_kernel_details() self._check_result_data() return self._result_data @@ -50,53 +62,68 @@ class ProfDataPreProcess: msg = f"Invalid profiling path: {self._prof_data_path}. The data path should be the " \ f"folder that ends with the ascend_pt collected by the Ascend PyTorch Profiler." raise RuntimeError(msg) + kernel_path = os.path.join(profiler_output, "kernel_details.csv") + if os.path.isfile(kernel_path): + self._kernel_details_path = kernel_path self._trace_path = json_path def _parse_trace_events(self): trace_data = FileReader.read_json_file(self._trace_path) self._check_trace_data(trace_data) - iter_trace_data = iter(trace_data) - for event in iter_trace_data: - bean = TraceEventBean(event) - if bean.is_optimizer(): - self._result_data[Constant.MODULE_EVENT].append(bean) - elif bean.is_cpu_op(): - if not bean.is_step(): - self._result_data[Constant.CPU_OP_EVENT].append(bean) - elif bean.is_nn_module(): - self._result_data[Constant.MODULE_EVENT].append(bean) - elif bean.is_torch_to_npu(): - if bean.is_flow_start(): - self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(bean.id, {})["start"] = bean - else: - self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(bean.id, {})["end"] = bean - elif bean.is_fwd_bwd_flow(): - if bean.is_flow_start(): - self._result_data[Constant.FWD_BWD_FLOW].setdefault(bean.id, {})["start"] = bean - else: - self._result_data[Constant.FWD_BWD_FLOW].setdefault(bean.id, {})["end"] = bean - elif bean.is_kernel_event(self._kernel_pid): - self._result_data[Constant.KERNEL_EVENT].append(bean) - - def _check_trace_data(self, trace_data): - if not isinstance(trace_data, list): - msg = f"Invalid profiling data path, this feature only supports performance data " \ - f"collected by Ascend PyTorch Profiler." - raise RuntimeError(msg) - iter_trace_data = iter(trace_data) + iter_trace_data = [TraceEventBean(data) for data in trace_data] for event in iter_trace_data: - bean = TraceEventBean(event) - if bean.is_npu_process(): - self._kernel_pid = bean.pid + if self._kernel_pid is not None and self._hccl_pid is not None: break + if not event.is_meta(): + continue + if event.is_npu_process(): + self._kernel_pid = event.pid + elif event.is_hccl_process(): + self._hccl_pid = event.pid if self._kernel_pid is None: - msg = f"There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." + msg = "There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." raise RuntimeError(msg) + for event in iter_trace_data: + if event.is_optimizer(): + event.event_type = Constant.MODULE_TYPE + self._result_data[Constant.MODULE_EVENT].append(event) + elif event.is_cpu_op(): + if not event.is_step(): + event.event_type = Constant.OPERATOR_TYPE + self._result_data[Constant.CPU_OP_EVENT].append(event) + elif event.is_nn_module(): + event.event_type = Constant.MODULE_TYPE + self._result_data[Constant.MODULE_EVENT].append(event) + elif event.is_torch_to_npu(): + if event.is_flow_start(): + self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(event.id, {})["start"] = event + else: + self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(event.id, {})["end"] = event + elif event.is_fwd_bwd_flow(): + if event.is_flow_start(): + self._result_data[Constant.FWD_BWD_FLOW].setdefault(event.id, {})["start"] = event + else: + self._result_data[Constant.FWD_BWD_FLOW].setdefault(event.id, {})["end"] = event + elif event.is_kernel_event(self._kernel_pid): + self._result_data[Constant.KERNEL_EVENT].append(event) + elif event.is_hccl_event(self._hccl_pid): + self._result_data[Constant.HCCL_EVENT].append(event) + + def _parse_kernel_details(self): + if not self._kernel_details_path: + return + try: + all_kernels = FileReader.read_csv_file(self._kernel_details_path, KernelBean) + except Exception as e: + logging.error(e) + kernels = list(filter(lambda x: x.is_computing_op, all_kernels)) + if kernels: + self._result_data[Constant.KERNEL_EVENT] = kernels def _check_result_data(self): if not self._result_data.get(Constant.CPU_OP_EVENT): - msg = f"This data does not have any aten operator, please make sure to enable the CPU switch." + msg = "This data does not have any aten operator, please make sure to enable the CPU switch." raise RuntimeError(msg) if not self._result_data.get(Constant.MODULE_EVENT): - msg = f"This data does not collect any modules, please make sure to turn on the with_stack switch." + msg = "This data does not collect any modules, please make sure to enable the with_stack or with_modules." raise RuntimeError(msg) diff --git a/profiler/prof_common/base_node.py b/profiler/prof_common/base_node.py index b7cd678000..1e12294328 100644 --- a/profiler/prof_common/base_node.py +++ b/profiler/prof_common/base_node.py @@ -47,6 +47,10 @@ class BaseNode: def end_time(self) -> Decimal: return self._event.end_time + @parent_node.setter + def parent_node(self, parent_node): + self._parent_node = parent_node + def update_child_nodes(self, node): self._child_nodes.append(node) diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 87bc51b56b..90ec6d006e 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -23,9 +23,15 @@ class Constant(object): CPU_OP_EVENT = "op_event" TORCH_TO_NPU_FLOW = "torch_to_device" KERNEL_EVENT = "kernel_event" + HCCL_EVENT = "hccl_event" FWD_BWD_FLOW = "fwd_to_bwd" NPU_ROOT_ID = "NPU" FWD_OR_OPT = 0 BACKWARD = 1 INVALID_RETURN = -1 + + # node type + MODULE_TYPE = 0 + OPERATOR_TYPE = 1 + VIRTUAL_TYPE = 9 diff --git a/profiler/prof_common/file_reader.py b/profiler/prof_common/file_reader.py index d8a9c8fb4d..9a225131f9 100644 --- a/profiler/prof_common/file_reader.py +++ b/profiler/prof_common/file_reader.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import csv import json import logging import os @@ -57,3 +58,29 @@ class FileReader: file.write(json.dumps(data, indent=indent)) except Exception as e: raise RuntimeError(f"Can't create the file: {output_path}") from e + + @classmethod + def read_csv_file(cls, file_path: str, bean_class: any = None) -> any: + PathManager.check_path_readable(file_path) + if not os.path.isfile(file_path): + raise FileNotFoundError("File not exists.") + file_size = os.path.getsize(file_path) + if file_size <= 0: + return [] + if file_size > Constant.MAX_FILE_SIZE_5_GB: + check_msg = input( + f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") + if check_msg.lower() != "y": + logging.warning(f"The user choose not to read the file: %s", file_path) + return [] + result_data = [] + try: + with open(file_path, newline="") as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + row_data = bean_class(row) if bean_class else row + result_data.append(row_data) + except Exception as e: + msg = f"Failed to read the file: {file_path}" + raise RuntimeError(msg) from e + return result_data diff --git a/profiler/prof_common/kernel_bean.py b/profiler/prof_common/kernel_bean.py new file mode 100644 index 0000000000..cbfa10c0a9 --- /dev/null +++ b/profiler/prof_common/kernel_bean.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024 Huawei Technologies Co., Ltd +# All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from profiler.prof_common.utils import convert_to_decimal + + +class KernelBean: + def __init__(self, data: dict): + self._name = data.get("Name", "") + self._op_type = data.get("Type", "") + self._core_type = data.get("Accelerator Core", "") + self._input_shape = data.get("Input Shapes", "").replace("\"", "") + self._input_type = data.get("Input Data Types", "") + self._input_format = data.get("Input Formats", "") + self._duration = data.get("Duration(us)", 0) + self._ts = data.get("Start Time(us)", "") + + @property + def start_time(self): + return convert_to_decimal(self._ts) + + @property + def is_computing_op(self): + return self._core_type != "HCCL" + + @property + def dur(self): + return float(self._duration) + + @property + def kernel_info(self): + return [self._name, self._op_type, self._core_type, self._input_shape, self._input_type, self.dur] diff --git a/profiler/prof_common/trace_event_bean.py b/profiler/prof_common/trace_event_bean.py index 2d4b96e4f6..0aee79907b 100644 --- a/profiler/prof_common/trace_event_bean.py +++ b/profiler/prof_common/trace_event_bean.py @@ -19,9 +19,10 @@ from profiler.prof_common.analyze_dict import AnalyzeDict class TraceEventBean(AnalyzeDict): - def __init__(self, data: dict, unique_id: int = None): + def __init__(self, data: dict, unique_id: str = None): super().__init__(data) self._id = unique_id + self._type = None @property def unique_id(self): @@ -35,6 +36,18 @@ class TraceEventBean(AnalyzeDict): def end_time(self) -> Decimal: return self.start_time + convert_to_decimal(self.dur) + @property + def kernel_info(self): + return [self.name, self.args.get("Task Type", ""), self.dur] + + @property + def event_type(self): + return self._type + + @event_type.setter + def event_type(self, event_type): + self._type = event_type + def set_id(self, name_id): self._id = name_id @@ -62,8 +75,17 @@ class TraceEventBean(AnalyzeDict): def is_flow_end(self): return self.ph == "f" + def is_meta(self): + return self.ph == "M" + def is_kernel_event(self, kernel_pid): return self.ph == "X" and self.pid == kernel_pid + def is_hccl_event(self, hccl_pid): + return self.ph == "X" and self.pid == hccl_pid and self.name.startswith("hcom_") + def is_npu_process(self): return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "Ascend Hardware" + + def is_hccl_process(self): + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "HCCL" diff --git a/profiler/prof_common/tree_builder.py b/profiler/prof_common/tree_builder.py index b7d3e1baf6..b6311c1a93 100644 --- a/profiler/prof_common/tree_builder.py +++ b/profiler/prof_common/tree_builder.py @@ -19,8 +19,10 @@ class TreeBuilder: @staticmethod def build_tree(event_list: list, node_class: any, root_bean: any): root_node = node_class(root_bean) + all_nodes = [root_node] + [None] * len(event_list) event_list.sort(key=lambda x: x.start_time) last_node = root_node + index = 1 for event in event_list: while last_node: if last_node != root_node and event.start_time > last_node.end_time: @@ -28,6 +30,8 @@ class TreeBuilder: continue tree_node = node_class(event, last_node) last_node.update_child_nodes(tree_node) + all_nodes[index] = tree_node last_node = tree_node + index += 1 break - return root_node + return all_nodes -- Gitee From 8ca203d1ea616b13784a91c0895ca8727b2a029e Mon Sep 17 00:00:00 2001 From: qianggee Date: Thu, 22 Aug 2024 08:56:57 +0000 Subject: [PATCH 045/141] reduce circular complexity --- .../accuracy_tools/kj600/kj600/module_hook.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/debug/accuracy_tools/kj600/kj600/module_hook.py b/debug/accuracy_tools/kj600/kj600/module_hook.py index 21c326ac01..1b9f752cf1 100644 --- a/debug/accuracy_tools/kj600/kj600/module_hook.py +++ b/debug/accuracy_tools/kj600/kj600/module_hook.py @@ -486,7 +486,26 @@ class TrainerMon: print_info_log(msg) else: print_info_log(msg) - + + def _is_target_param(self, param_name, param): + for target in self.config['targets'].keys(): + if param_name.startswith(target) and param.requires_grad: + self._smallest_rank_print(f'>> monitoring: {param_name}') + setattr(param, "zero_out_wgrad", True) + return True + + return False + + def _register_chunk(self, model_chunk, prefix): + for param_name, param in model_chunk.named_parameters(): + name = prefix + squash_param_name(param_name) + if self._is_target_param(param_name, param): + if name in self.param2name.values() or name == '': + print_error_log(f'same name {name} for different param. Current param is {param_name}. \ + May be error of squash_param_name') + raise Exception("param with same name will be overwritten.") + self.param2name[param] = name + def _register_param_name(self, model): if self.param_registered: return @@ -499,19 +518,8 @@ class TrainerMon: for vpp_stage, model_chunk in enumerate(model): prefix = f'{Const.vpp}{vpp_stage}{Const.vpp_sep}' if self.vpp else '' - for param_name, param in model_chunk.named_parameters(): - name = prefix + squash_param_name(param_name) - for target in self.config['targets'].keys(): - if param_name.startswith(target) and param.requires_grad: - self._smallest_rank_print(f'>> monitoring: {name}') - setattr(param, "zero_out_wgrad", True) - if name in self.param2name.values() or name == '': - print_error_log(f'same name {name} for different param. Current param is {param_name}. \ - May be error of squash_param_name') - raise Exception("param with same name will be overwriten.") - self.param2name[param] = name - break - + self._register_chunk(model_chunk, prefix) + self.param_registered = True -- Gitee From d0e1854e796f5acb49c8db60ff0d87bc02b31904 Mon Sep 17 00:00:00 2001 From: qianggee Date: Thu, 22 Aug 2024 09:06:28 +0000 Subject: [PATCH 046/141] split PR --- .../kj600/kj600/anomaly_analyse.py | 248 -------------- .../accuracy_tools/kj600/kj600/file_check.py | 324 ------------------ .../kj600/kj600/unittest/test_monitor.py | 145 -------- 3 files changed, 717 deletions(-) delete mode 100644 debug/accuracy_tools/kj600/kj600/anomaly_analyse.py delete mode 100644 debug/accuracy_tools/kj600/kj600/file_check.py delete mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py deleted file mode 100644 index f6069db6fb..0000000000 --- a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import argparse -import ast -import fcntl -import heapq -import json -import os -from pathlib import Path -import sys - -from kj600.utils import print_info_log, print_warn_log -from kj600.anomaly_detect import GradAnomalyData -from kj600.file_check import ( - change_mode, - check_link, - FileCheckConst, - check_path_before_create, - FileChecker, - FileOpen, -) - -ANOMALY_JSON = "anomaly.json" -ANALYSE_JSON = "anomaly_analyse.json" - -class AnomalyDataWriter: - """ - 异常数据写入类,负责将异常数据写入到JSON文件中。 - """ - - def __init__(self, dump_path, rank) -> None: - self.dump_path = dump_path - self.dump_rank_dir = os.path.join(self.dump_path, f"rank{rank}") - self.json_path = os.path.join(self.dump_rank_dir, ANOMALY_JSON) - - @staticmethod - def get_anomaly_dict(anomalies): - """将GradAnomalyData列表转换为json""" - anomalies_json = {} - for anomaly in anomalies: - anomalies_json.update({anomaly.get_key(): anomaly.to_dict()}) - return anomalies_json - - @staticmethod - def update_data_in_single_json(json_path, anomalies_data): - with FileOpen(json_path, "w+") as f: - fcntl.flock(f, fcntl.LOCK_EX) - json.dump(anomalies_data, f, indent=1) - fcntl.flock(f, fcntl.LOCK_UN) - change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) - - def init_detected_json(self): - """初始化落盘文件""" - check_path_before_create(self.dump_path) - if not os.path.exists(self.dump_path): - Path(self.dump_path).mkdir( - mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True - ) - file_check = FileChecker(self.dump_path, FileCheckConst.DIR) - file_check.common_check() - - if not os.path.exists(self.dump_rank_dir): - Path(self.dump_rank_dir).mkdir( - FileCheckConst.DATA_DIR_AUTHORITY, parents=True, exist_ok=True - ) - - if os.path.exists(self.json_path): - file_check = FileChecker( - self.json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE - ) - file_check.common_check() - print_warn_log(f"The existing file will be deleted: {self.json_path}.") - os.remove(self.json_path) - Path(self.json_path).touch() - change_mode(self.json_path, FileCheckConst.DATA_FILE_AUTHORITY) - - def write_detected_json(self, anomalies): - """ - 落盘异常数据 - Args: - anomalies: GradAnomalyData对象列表 - """ - anomalies_json = self.get_anomaly_dict(anomalies) - print_info_log(f"{ANOMALY_JSON} is at {self.dump_rank_dir}.") - if Path(self.json_path).exists() and os.path.getsize(self.json_path) > 0: - with FileOpen(self.json_path, "r+") as f: - fcntl.flock(f, fcntl.LOCK_EX) - data_to_write = json.load(f) - fcntl.flock(f, fcntl.LOCK_UN) - else: - data_to_write = {} - data_to_write.update(anomalies_json) - self.update_data_in_single_json(self.json_path, data_to_write) - - -class AnomalyDataLoader: - def __init__(self, data_path) -> None: - self.data_path = data_path - - @staticmethod - def create_instances_from_dict(anomalies_dict: dict): - instances = [] - for values in anomalies_dict.values(): - try: - instances.append(GradAnomalyData(**values)) - except KeyError as e: - print_warn_log(f"Missing key in anomaly data: {e}") - except ValueError as e: - print_warn_log( - f"Value error when creating a GradAnomalyData instance: {e}" - ) - return instances - - def get_anomalies_from_jsons(self): - """遍历文件夹,从rankK/anomaly.json中读取异常数据 - return: anomalies: GradAnomalyData对象列表 - """ - anomalies = [] - check_link(self.data_path) - for rank_dir in os.listdir(self.data_path): - rank_path = os.path.join(self.data_path, rank_dir) - if not os.path.isdir(rank_path): - continue - json_path = os.path.join(rank_path, ANOMALY_JSON) - if not os.path.exists(json_path): - continue - with FileOpen(json_path, "r+") as f: - fcntl.flock(f, fcntl.LOCK_EX) - data_anomalies = json.load(f) - fcntl.flock(f, fcntl.LOCK_UN) - instances = self.create_instances_from_dict(data_anomalies) - anomalies.extend(instances) - return anomalies - - -class AnomalyAnalyse: - def __init__(self) -> None: - self.sorted_anomalies = [] - - def get_range_top_K(self, topk, step_list, anomalies): - """ - 获取前topk个step_list范围内的异常。 - """ - if not step_list: - filtered_anomalies = anomalies - else: - filtered_anomalies = [ - anomaly for anomaly in anomalies if anomaly.step in step_list - ] - if topk >= len(filtered_anomalies): - self.sorted_anomalies = sorted(filtered_anomalies) - else: - self.sorted_anomalies = list(heapq.nsmallest(topk, filtered_anomalies)) - return self.sorted_anomalies - - def rewrite_sorted_anomalies(self, output_path): - """ - 将排序后的异常数据重新落盘 - """ - file_check = FileChecker( - output_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE - ) - file_check.common_check() - - sorted_data = AnomalyDataWriter.get_anomaly_dict(self.sorted_anomalies) - print_info_log(f"{ANALYSE_JSON} is at {output_path}.") - json_path = os.path.join(output_path, ANALYSE_JSON) - if os.path.exists(json_path): - file_check = FileChecker( - json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE - ) - file_check.common_check() - print_warn_log(f"The existing file will be deleted: {json_path}.") - os.remove(json_path) - Path(json_path).touch() - change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) - AnomalyDataWriter.update_data_in_single_json(json_path, sorted_data) - - -def _get_parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--data_path", dest="data_path_dir", default="./", type=str, - help=" The anomaly detect result dictionary: generate from kj600 tool.", - required=True, - ) - parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, - help=" The analyse task result out path.", - required=False, - ) - parser.add_argument("-k", "--topk", dest="top_k_number", default=8, type=int, - help=" Top K number of earliest anomalies.", - required=False, - ) - parser.add_argument("-s", "--step", dest="step_list", default=[], type=str, - help=" Analyse which steps.", - required=False, - ) - return parser.parse_args(sys.argv[1:]) - -def _get_step_and_stop(args): - try: - step_list = ast.literal_eval(args.step_list) - if not isinstance(step_list, list): - raise ValueError(f"{args.step_list} is not a list") - except (ValueError, SyntaxError, RecursionError) as e: - raise Exception( - f"The step list must be a resolvable list type" - ) from e - if args.top_k_number <= 0: - raise Exception("The top k number must be greater than 0.") - return step_list, args.top_k_number - -def _anomaly_analyse(): - args = _get_parse_args() - step_list, top_k_number = _get_step_and_stop(args) - loader = AnomalyDataLoader(args.data_path_dir) - anomalies = loader.get_anomalies_from_jsons() - analyser = AnomalyAnalyse() - top_anomalies = analyser.get_range_top_K( - top_k_number, step_list, anomalies - ) - analyser.rewrite_sorted_anomalies( - args.out_path if args.out_path else args.data_path_dir - ) - - print_info_log(f"Top {top_k_number} anomalies are listed as follows:") - for index, anomaly in enumerate(top_anomalies): - print_info_log(f"{index}: {anomaly.message}") - - -if __name__ == "__main__": - _anomaly_analyse() - print_info_log("Analyse task completed.") diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py deleted file mode 100644 index 21f9e351a2..0000000000 --- a/debug/accuracy_tools/kj600/kj600/file_check.py +++ /dev/null @@ -1,324 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -import os -import re - -from kj600.utils import print_info_log - - -class CodedException(Exception): - def __init__(self, code, error_info=""): - super().__init__() - self.code = code - self.error_info = self.err_strs.get(code) + error_info - - def __str__(self): - return self.error_info - - -class FileCheckException(CodedException): - INVALID_FILE_ERROR = 0 - FILE_PERMISSION_ERROR = 1 - SOFT_LINK_ERROR = 2 - ILLEGAL_PATH_ERROR = 3 - ILLEGAL_PARAM_ERROR = 4 - FILE_TOO_LARGE_ERROR = 5 - - err_strs = { - SOFT_LINK_ERROR: "[kj600] 检测到软链接: ", - FILE_PERMISSION_ERROR: "[kj600] 文件权限错误: ", - INVALID_FILE_ERROR: "[kj600] 无效文件: ", - ILLEGAL_PATH_ERROR: "[kj600] 非法文件路径: ", - ILLEGAL_PARAM_ERROR: "[kj600] 非法打开方式: ", - FILE_TOO_LARGE_ERROR: "[kj600] 文件过大: ", - } - - -class FileCheckConst: - """ - Class for file check const - """ - - READ_ABLE = "read" - WRITE_ABLE = "write" - READ_WRITE_ABLE = "read and write" - DIRECTORY_LENGTH = 4096 - FILE_NAME_LENGTH = 255 - FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" - FILE_PATTERN = r"^[a-zA-Z0-9_./-]+$" - JSON_SUFFIX = ".json" - MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 - DIR = "dir" - FILE = "file" - DATA_DIR_AUTHORITY = 0o750 - DATA_FILE_AUTHORITY = 0o640 - FILE_SIZE_DICT = { - JSON_SUFFIX: MAX_JSON_SIZE, - } - - -class FileChecker: - """ - The class for check file. - - Attributes: - file_path: The file or dictionary path to be verified. - path_type: file or dictionary - ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability - file_type(str): The correct file type for file - """ - - def __init__( - self, file_path, path_type, ability=None, file_type=None, is_script=True - ): - self.file_path = file_path - self.path_type = self._check_path_type(path_type) - self.ability = ability - self.file_type = file_type - self.is_script = is_script - - @staticmethod - def _check_path_type(path_type): - if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: - print_info_log( - f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." - ) - raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) - return path_type - - def common_check(self): - """ - 功能:用户校验基本文件权限:软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符 - 注意:文件后缀的合法性,非通用操作,可使用其他独立接口实现 - """ - check_path_exists(self.file_path) - check_link(self.file_path) - self.file_path = os.path.realpath(self.file_path) - check_path_length(self.file_path) - check_path_type(self.file_path, self.path_type) - self.check_path_ability() - if self.is_script: - check_path_owner_consistent(self.file_path) - check_path_pattern_vaild(self.file_path) - check_common_file_size(self.file_path) - check_file_suffix(self.file_path, self.file_type) - return self.file_path - - def check_path_ability(self): - if self.ability == FileCheckConst.WRITE_ABLE: - check_path_writability(self.file_path) - if self.ability == FileCheckConst.READ_ABLE: - check_path_readability(self.file_path) - if self.ability == FileCheckConst.READ_WRITE_ABLE: - check_path_readability(self.file_path) - check_path_writability(self.file_path) - - -class FileOpen: - """ - The class for open file by a safe way. - - Attributes: - file_path: The file or dictionary path to be opened. - mode(str): The file open mode - """ - - SUPPORT_READ_MODE = ["r", "rb"] - SUPPORT_WRITE_MODE = ["w", "wb", "a", "ab"] - SUPPORT_READ_WRITE_MODE = ["r+", "rb+", "w+", "wb+", "a+", "ab+"] - - def __init__(self, file_path, mode, encoding="utf-8"): - self.file_path = file_path - self.mode = mode - self.encoding = encoding - self._handle = None - - def __enter__(self): - self.check_file_path() - binary_mode = "b" - if binary_mode not in self.mode: - self._handle = open(self.file_path, self.mode, encoding=self.encoding) - else: - self._handle = open(self.file_path, self.mode) - return self._handle - - def __exit__(self, exc_type, exc_val, exc_tb): - if self._handle: - self._handle.close() - - def check_file_path(self): - support_mode = ( - self.SUPPORT_READ_MODE - + self.SUPPORT_WRITE_MODE - + self.SUPPORT_READ_WRITE_MODE - ) - if self.mode not in support_mode: - print_info_log("File open not support %s mode" % self.mode) - raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) - check_link(self.file_path) - self.file_path = os.path.realpath(self.file_path) - check_path_length(self.file_path) - self.check_ability_and_owner() - check_path_pattern_vaild(self.file_path) - if os.path.exists(self.file_path): - check_common_file_size(self.file_path) - - def check_ability_and_owner(self): - if self.mode in self.SUPPORT_READ_MODE: - check_path_exists(self.file_path) - check_path_readability(self.file_path) - check_path_owner_consistent(self.file_path) - if self.mode in self.SUPPORT_WRITE_MODE and os.path.exists(self.file_path): - check_path_writability(self.file_path) - check_path_owner_consistent(self.file_path) - if self.mode in self.SUPPORT_READ_WRITE_MODE and os.path.exists(self.file_path): - check_path_readability(self.file_path) - check_path_writability(self.file_path) - check_path_owner_consistent(self.file_path) - - -def check_link(path): - abs_path = os.path.abspath(path) - if os.path.islink(abs_path): - print_info_log("The file path {} is a soft link.".format(path)) - raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) - - -def check_path_length(path, name_length=None): - file_max_name_length = ( - name_length if name_length else FileCheckConst.FILE_NAME_LENGTH - ) - if ( - len(path) > FileCheckConst.DIRECTORY_LENGTH - or len(os.path.basename(path)) > file_max_name_length - ): - print_info_log("The file path length exceeds limit.") - raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) - - -def check_path_exists(path): - if not os.path.exists(path): - print_info_log("The file path %s does not exist." % path) - raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) - - -def check_path_readability(path): - if not os.access(path, os.R_OK): - print_info_log("The file path %s is not readable." % path) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_writability(path): - if not os.access(path, os.W_OK): - print_info_log("The file path %s is not writable." % path) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_executable(path): - if not os.access(path, os.X_OK): - print_info_log("The file path %s is not executable." % path) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_other_user_writable(path): - st = os.stat(path) - if st.st_mode & 0o002: - print_info_log( - "The file path %s may be insecure because other users have write permissions. " - % path - ) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_owner_consistent(path): - file_owner = os.stat(path).st_uid - if file_owner != os.getuid(): - print_info_log( - "The file path %s may be insecure because is does not belong to you." % path - ) - raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) - - -def check_path_pattern_vaild(path): - if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): - print_info_log("The file path %s contains special characters." % (path)) - raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) - - -def check_file_size(file_path, max_size): - file_size = os.path.getsize(file_path) - if file_size >= max_size: - print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") - raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) - - -def check_common_file_size(file_path): - if os.path.isfile(file_path): - for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): - if file_path.endswith(suffix): - check_file_size(file_path, max_size) - break - - -def check_file_suffix(file_path, file_suffix): - if file_suffix: - if not file_path.endswith(file_suffix): - print_info_log(f"The {file_path} should be a {file_suffix} file!") - raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) - - -def check_path_type(file_path, file_type): - if file_type == FileCheckConst.FILE: - if not os.path.isfile(file_path): - print_info_log(f"The {file_path} should be a file!") - raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) - if file_type == FileCheckConst.DIR: - if not os.path.isdir(file_path): - print_info_log(f"The {file_path} should be a dictionary!") - raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) - - -def check_path_before_create(path): - if path_len_exceeds_limit(path): - raise FileCheckException( - FileCheckException.ILLEGAL_PATH_ERROR, "The file path length exceeds limit." - ) - - if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): - raise FileCheckException( - FileCheckException.ILLEGAL_PATH_ERROR, - "The file path {} contains special characters.".format(path), - ) - - -def change_mode(path, mode): - if not os.path.exists(path) or os.path.islink(path): - return - try: - os.chmod(path, mode) - except PermissionError as ex: - raise FileCheckException( - FileCheckException.FILE_PERMISSION_ERROR, - "Failed to change {} authority. {}".format(path, str(ex)), - ) from ex - - -def path_len_exceeds_limit(file_path): - return ( - len(os.path.realpath(file_path)) > FileCheckConst.DIRECTORY_LENGTH - or len(os.path.basename(file_path)) > FileCheckConst.FILE_NAME_LENGTH - ) diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py deleted file mode 100644 index ddea3244f5..0000000000 --- a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py +++ /dev/null @@ -1,145 +0,0 @@ -import sys -import os -import re -import argparse -import pandas as pd -from glob import glob -from collections import defaultdict - - -def parse_logfile(logfile): - grad_norm = [] - step = [] - with open(logfile) as f: - for line in f.readlines(): - if 'consumed samples' in line: - grad_norm.append(float(re.findall('(?<=grad norm\: )[\d\.]*', line)[0])) - # step = int(re.findall('(?<=iteration)[ \d]*', line)[0]) - return grad_norm - - -def parse_monitor_output(output_dir): - reduced = {} - unreduced = {} - for dir in glob(output_dir+'*'): - rank = int(re.findall('(?<=rank)[\d]*', dir)[0]) - unreduced[rank] = [] - reduced[rank] = [] - for file in os.listdir(dir): - # step = int(re.search("(?<=reduced\_)[\d]*", file)[0]) - # if step != 0: - # continue - df = pd.read_csv(os.path.join(dir, file)) - if '_unreduced_' in file: - unreduced[rank].append(df) - pass - elif '_reduced_' in file: - reduced[rank].append(df) - else: - print(f'unexpected file {file} in {dir}') - return reduced, unreduced - -def valid_reduce(reduced, unreduced, tp_size, dp_size, sequence_parallel): - steps = len(reduced[0]) - world_size = len(reduced) - errors = [] - for index, row in unreduced[0][0].iterrows(): - param = row['param_name'] - is_tp_duplicate = False - for step in range(2): - # sum reduced - reduced_mean = 0. - for rank in range(world_size): - if len(reduced[rank]) == 0: - continue - df = reduced[rank][step] - value = list(df[df['param_name'] == param]['mean']) - if value == []: - if step == 0: - is_tp_duplicate = True - continue - reduced_mean += value[0] - - # sum unreduced - unreduced_mean = 0. - for rank in range(world_size): - df = unreduced[rank][step] - value = list(df[df['param_name'] == param]['mean']) - if value == []: - continue - unreduced_mean += list(df[df['param_name'] == param]['mean'])[0] - - unreduced_mean /= dp_size - if is_tp_duplicate and (not sequence_parallel or 'embedding' in param): - unreduced_mean /= tp_size - try: - assert_equal(unreduced_mean, reduced_mean) - except AssertionError as e: - errors.append([param, step, e, is_tp_duplicate]) - if errors: - print(errors) - else: - print(f'grad mean is in consist between unreduced grad and reduced grad monitord.') - - - -def assert_equal(a, b): - if b == 0 or a == 0: - return - if b == 0: - rel_diff = a - elif a == 0: - rel_diff = b - else: - rel_diff = abs(a/b-1) - assert rel_diff<0.01, f'{a}, {b}, {rel_diff}' - - -def valid_total_norm(total_norm, reduced, duplicate_embedding): - steps = len(total_norm) - world_size = len(reduced) - errors = [] - for step in range(steps): - calculated_norm = 0. - for rank in range(world_size): - if len(reduced[rank]) == 0: - if step == 0: - print(f'rank {rank} is duplicated in dp group') - continue - for index, row in reduced[rank][step].iterrows(): - if duplicate_embedding and 'word_embedding' in row['param_name']: - continue - calculated_norm += row['norm']**2 - try: - assert_equal(calculated_norm**0.5, total_norm[step]) - except AssertionError as e: - errors.append([step, e]) - if errors: - print('total norm errors: ', errors) - else: - print('grad norm in consist between training log and reduced gradients monitored') - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('--monitor_output', '-m', type=str, required=True, help='path prefix to the output of monitor e.g. kj600_output/Aug12_07-16') - parser.add_argument('--logfile', '-l', type=str, required=True, help='path to the training log file') - parser.add_argument('--tp_size', '-t', type=int, required=True, help='tp parallel size') - parser.add_argument('--dp_size', '-d', type=int, required=True, help='dp parallel size') - parser.add_argument('--pp_size', '-p', type=int, required=True, help='pp parallel size') - parser.add_argument('--untie_embeddings_and_output_weights', '-u', action="store_true", default=False, help='whether untie_embeddings_and_output_weights in pp parallel') - parser.add_argument('--sequence_parallel', '-s', action="store_true", default=False, help='whether sequence parallel is enabled. Add -s to store true') - - args = parser.parse_args() - - assert args.tp_size > 0, 'if tp not enabled, set tp_size = 1' - assert args.dp_size > 0, 'if tp not enabled, set dp_size = 1' - assert args.pp_size > 0, 'if tp not enabled, set pp_size = 1' - - total_norm = parse_logfile(args.logfile) - reduced, unreduced = parse_monitor_output(args.monitor_output) - - duplicate_embedding = not args.untie_embeddings_and_output_weights and args.pp_size > 1 - - valid_total_norm(total_norm, reduced, duplicate_embedding) - valid_reduce(reduced, unreduced, args.tp_size, args.dp_size, args.sequence_parallel) \ No newline at end of file -- Gitee From 9d6f5c935c274de263e2332ae0d10da0bf2d9934 Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Thu, 22 Aug 2024 15:50:40 +0800 Subject: [PATCH 047/141] add_overall_metrics --- .../module_visualization/graph/prof_node.py | 69 ++++++++++++++----- .../graph_build/prof_graph_builder.py | 33 +++++++-- .../prof_parse/prof_data_pre_process.py | 12 +++- profiler/prof_common/constant.py | 11 +++ profiler/prof_common/kernel_bean.py | 4 ++ profiler/prof_common/trace_event_bean.py | 11 ++- 6 files changed, 114 insertions(+), 26 deletions(-) diff --git a/profiler/module_visualization/graph/prof_node.py b/profiler/module_visualization/graph/prof_node.py index 3588a8b81b..df77d325df 100644 --- a/profiler/module_visualization/graph/prof_node.py +++ b/profiler/module_visualization/graph/prof_node.py @@ -24,6 +24,9 @@ class ProfNode(BaseNode): self._kernel_total_list = [] self._communication_total_list = [] self._precision_index = 1 + self._computing_time = 0 + self._uncovered_comm_time = 0 + self._free_time = 0 @property def node_id(self): @@ -37,11 +40,19 @@ class ProfNode(BaseNode): @property def total_kernels(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return [kernel for node in self.child_nodes for kernel in node.total_kernels] return self._kernel_total_list + @property + def total_communications(self): + if self.node_type == Constant.VIRTUAL_TYPE: + return [comm for node in self.child_nodes for comm in node.total_communications] + return self._communication_total_list + @property def host_total_dur(self): - if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: + if self.node_type == Constant.VIRTUAL_TYPE: return sum((node.host_total_dur for node in self.child_nodes)) return self._event.dur @@ -53,9 +64,7 @@ class ProfNode(BaseNode): @property def device_total_dur(self): - if self.is_root_node or self.node_type == Constant.VIRTUAL_TYPE: - return sum((node.device_total_dur for node in self.child_nodes)) - return sum((kernel.dur for kernel in self._kernel_total_list)) + return sum((kernel.dur for kernel in self.total_kernels)) @property def device_self_dur(self): @@ -82,24 +91,27 @@ class ProfNode(BaseNode): @property def communication_data(self) -> list: - if self.node_type == Constant.VIRTUAL_TYPE: - return [comm for node in self.child_nodes for comm in node.communication_data] - return [[comm.name, comm.dur] for comm in self._communication_total_list] + return [[comm.name, comm.dur] for comm in self.total_communications] @property def overall_data(self): - return {"Computing Time(ms)": 1, "Uncovered Communication Time(ms)": 1, "Free Time(ms)": 1} + return {"Computing Time(us)": round(self._computing_time, 3), + "Uncovered Communication Time(us)": round(self._uncovered_comm_time, 3), + "Free Time(us)": round(self._free_time, 3)} @property def data(self): - return {"Input Data": self.input_data, - "precision_index": self.precision_index, - "Host Self Duration(us)": round(self.host_self_dur, 2), - "Host Total Duration(us)": round(self.host_total_dur, 2), - "Device Self Duration(us)": round(self.device_self_dur, 2), - "Device Total Duration(us)": round(self.device_total_dur, 2), - "kernels": self.kernel_data, - "communications": self.communication_data} + data = { + "Overall Metrics": self.overall_data} if self.node_type != Constant.OPERATOR_TYPE else {} + data.update({"Input Data": self.input_data, + "precision_index": self.precision_index, + "Host Self Duration(us)": round(self.host_self_dur, 3), + "Host Total Duration(us)": round(self.host_total_dur, 3), + "Device Self Duration(us)": round(self.device_self_dur, 3), + "Device Total Duration(us)": round(self.device_total_dur, 3), + "kernels": self.kernel_data, + "Communications": self.communication_data}) + return data @property def info(self): @@ -141,3 +153,28 @@ class ProfNode(BaseNode): diff_dur = max_dur - min_dur for node in self.child_nodes: node.precision_index = 1 - (node.device_total_dur - min_dur) / diff_dur if diff_dur else 1 + + def update_overall_metrics(self, overlap_analysis_event): + if not self.total_kernels and not self.total_communications: + return + kernel_start = min((kernel.start_time for kernel in self.total_kernels)) if self.total_kernels else float("inf") + kernel_end = max((kernel.end_time for kernel in self.total_kernels)) if self.total_kernels else float("-inf") + comm_start = min((comm.start_time for comm in self.total_communications)) \ + if self.total_communications else float("inf") + comm_end = max((comm.end_time for comm in self.total_communications)) \ + if self.total_communications else float("-inf") + device_start = min(kernel_start, comm_start) + device_end = max(kernel_end, comm_end) + for event in overlap_analysis_event: + if event.start_time >= device_end: + continue + if event.end_time <= device_start: + continue + duration_us = float( + min(device_end, event.end_time) - max(device_start, event.start_time)) + if event.name == Constant.COMPUTING_EVENT: + self._computing_time += duration_us + elif event.name == Constant.FREE_EVENT: + self._free_time += duration_us + elif event.name == Constant.UNCOVERED_COMMUNICATION_EVENT: + self._uncovered_comm_time += duration_us diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py index 331e0cb050..9606193acd 100644 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ b/profiler/module_visualization/graph_build/prof_graph_builder.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from decimal import Decimal + from profiler.module_visualization.graph.prof_node import ProfNode from profiler.module_visualization.graph_build.fwd_module_node import FwdModuleNode from profiler.prof_common.tree_builder import TreeBuilder @@ -29,8 +31,9 @@ class ProfGraphBuilder: def _create_event_bean_from_ops(cls, op_list: list, name: str) -> TraceEventBean: min_start = min((op.start_time for op in iter(op_list))) max_end = max((op.end_time for op in iter(op_list))) - # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-1 +2处理 - event = TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) + # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了-0.0001 +0.0001处理 + event = TraceEventBean( + {"ts": min_start - Decimal("0.0001"), "dur": float(max_end - min_start + Decimal("0.0001")), "name": name}) event.event_type = Constant.MODULE_TYPE return event @@ -73,6 +76,15 @@ class ProfGraphBuilder: root_node.update_child_nodes(merged_nodes[0]) root_node.update_child_nodes(node) merged_nodes = [] + if len(merged_nodes) >= 2: + virtual_node = ProfNode(TraceEventBean({}, f"Operators_Between_Modules_{order_id}"), root_node) + root_node.update_child_nodes(virtual_node) + for op_node in merged_nodes: + op_node.parent_node = virtual_node + virtual_node.update_child_nodes(op_node) + virtual_nodes.append(virtual_node) + elif len(merged_nodes) == 1: + root_node.update_child_nodes(merged_nodes[0]) return virtual_nodes def build_graph(self): @@ -96,6 +108,8 @@ class ProfGraphBuilder: all_nodes.extend(virtual_nodes) for node in all_nodes: node.update_child_precision_index() + if node.node_type != Constant.OPERATOR_TYPE: + node.update_overall_metrics(self._prof_data.get(Constant.OVERLAP_ANALYSIS_EVENT, [])) return all_nodes def find_bwd_module(self) -> list: @@ -121,20 +135,29 @@ class ProfGraphBuilder: if op.tid == bwd_tid: bwd_op_list.append(op) pre_status = Constant.BACKWARD + continue elif pre_status == Constant.BACKWARD: bwd_module_list.append(self._create_event_bean_from_ops(bwd_op_list, "nn.Module: BACKWARD")) + bwd_module_list.extend(self._match_fwd_module(module_list, fwdbwd_flow, bwd_op_list)) bwd_op_list.clear() pre_status = Constant.FWD_OR_OPT + if bwd_op_list: + bwd_module_list.append(self._create_event_bean_from_ops(bwd_op_list, "nn.Module: BACKWARD")) + bwd_module_list.extend(self._match_fwd_module(module_list, fwdbwd_flow, bwd_op_list)) + bwd_op_list.clear() + return bwd_module_list + def _match_fwd_module(self, module_list, fwdbwd_flow, bwd_op_list): # 通过连线匹配正向module,构建出反向的整体module关系 - root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({}))[0] - fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, cpu_op_list) + bwd_module_list = [] + all_nodes = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({})) + root_node = all_nodes[0] + fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, bwd_op_list) for start_time, end_events in fwdbwd_flow_dict.items(): matched_node = root_node.binary_search(start_time) while matched_node != Constant.INVALID_RETURN: matched_node.update_bwd_op(end_events) matched_node = matched_node.binary_search(start_time) - all_nodes = root_node.find_all_child_nodes() for module_node in all_nodes: if module_node.bwd_op_list: bwd_module_list.append( diff --git a/profiler/module_visualization/prof_parse/prof_data_pre_process.py b/profiler/module_visualization/prof_parse/prof_data_pre_process.py index c16daaecd7..2b5291ea3e 100644 --- a/profiler/module_visualization/prof_parse/prof_data_pre_process.py +++ b/profiler/module_visualization/prof_parse/prof_data_pre_process.py @@ -28,8 +28,10 @@ class ProfDataPreProcess: self._kernel_details_path = "" self._kernel_pid = None self._hccl_pid = None + self._overlap_analysis_pid = None self._result_data = {Constant.CPU_OP_EVENT: [], Constant.MODULE_EVENT: [], Constant.KERNEL_EVENT: [], - Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: []} + Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}, Constant.HCCL_EVENT: [], + Constant.OVERLAP_ANALYSIS_EVENT: []} @staticmethod def _check_trace_data(trace_data): @@ -72,7 +74,7 @@ class ProfDataPreProcess: self._check_trace_data(trace_data) iter_trace_data = [TraceEventBean(data) for data in trace_data] for event in iter_trace_data: - if self._kernel_pid is not None and self._hccl_pid is not None: + if self._kernel_pid is not None and self._hccl_pid is not None and self._overlap_analysis_pid is not None: break if not event.is_meta(): continue @@ -80,6 +82,8 @@ class ProfDataPreProcess: self._kernel_pid = event.pid elif event.is_hccl_process(): self._hccl_pid = event.pid + elif event.is_overlap_analysis_process(): + self._overlap_analysis_pid = event.pid if self._kernel_pid is None: msg = "There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." raise RuntimeError(msg) @@ -108,6 +112,8 @@ class ProfDataPreProcess: self._result_data[Constant.KERNEL_EVENT].append(event) elif event.is_hccl_event(self._hccl_pid): self._result_data[Constant.HCCL_EVENT].append(event) + elif event.is_overlap_analysis_event(self._overlap_analysis_pid): + self._result_data[Constant.OVERLAP_ANALYSIS_EVENT].append(event) def _parse_kernel_details(self): if not self._kernel_details_path: @@ -124,6 +130,6 @@ class ProfDataPreProcess: if not self._result_data.get(Constant.CPU_OP_EVENT): msg = "This data does not have any aten operator, please make sure to enable the CPU switch." raise RuntimeError(msg) - if not self._result_data.get(Constant.MODULE_EVENT): + if not [event for event in self._result_data.get(Constant.MODULE_EVENT) if event.is_nn_module()]: msg = "This data does not collect any modules, please make sure to enable the with_stack or with_modules." raise RuntimeError(msg) diff --git a/profiler/prof_common/constant.py b/profiler/prof_common/constant.py index 90ec6d006e..b0c8877f56 100644 --- a/profiler/prof_common/constant.py +++ b/profiler/prof_common/constant.py @@ -24,6 +24,7 @@ class Constant(object): TORCH_TO_NPU_FLOW = "torch_to_device" KERNEL_EVENT = "kernel_event" HCCL_EVENT = "hccl_event" + OVERLAP_ANALYSIS_EVENT = "overlap_event" FWD_BWD_FLOW = "fwd_to_bwd" NPU_ROOT_ID = "NPU" @@ -35,3 +36,13 @@ class Constant(object): MODULE_TYPE = 0 OPERATOR_TYPE = 1 VIRTUAL_TYPE = 9 + + # trace bar + NPU_BAR = "Ascend Hardware" + HCCL_BAR = "HCCL" + OVERLAP_BAR = "Overlap Analysis" + + # overlap_analysis event + COMPUTING_EVENT = "Computing" + FREE_EVENT = "Free" + UNCOVERED_COMMUNICATION_EVENT = "Communication(Not Overlapped)" diff --git a/profiler/prof_common/kernel_bean.py b/profiler/prof_common/kernel_bean.py index cbfa10c0a9..4d60a69080 100644 --- a/profiler/prof_common/kernel_bean.py +++ b/profiler/prof_common/kernel_bean.py @@ -30,6 +30,10 @@ class KernelBean: def start_time(self): return convert_to_decimal(self._ts) + @property + def end_time(self): + return self.start_time + convert_to_decimal(self.dur) + @property def is_computing_op(self): return self._core_type != "HCCL" diff --git a/profiler/prof_common/trace_event_bean.py b/profiler/prof_common/trace_event_bean.py index 0aee79907b..f1ba62e69b 100644 --- a/profiler/prof_common/trace_event_bean.py +++ b/profiler/prof_common/trace_event_bean.py @@ -14,6 +14,7 @@ # limitations under the License. from decimal import Decimal +from profiler.prof_common.constant import Constant from profiler.prof_common.utils import convert_to_decimal from profiler.prof_common.analyze_dict import AnalyzeDict @@ -84,8 +85,14 @@ class TraceEventBean(AnalyzeDict): def is_hccl_event(self, hccl_pid): return self.ph == "X" and self.pid == hccl_pid and self.name.startswith("hcom_") + def is_overlap_analysis_event(self, overlap_analysis_pid): + return self.ph == "X" and self.pid == overlap_analysis_pid + def is_npu_process(self): - return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "Ascend Hardware" + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == Constant.NPU_BAR def is_hccl_process(self): - return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == "HCCL" + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == Constant.HCCL_BAR + + def is_overlap_analysis_process(self): + return self.ph == "M" and self.name == "process_name" and self.args.get("name", "") == Constant.OVERLAP_BAR -- Gitee From 7b579615949c1f99792f07bb19311152b76953b0 Mon Sep 17 00:00:00 2001 From: litian_drinksnow <1063185601@qq.com> Date: Tue, 27 Aug 2024 18:52:12 +0800 Subject: [PATCH 048/141] merge kj600 --- .../kj600/kj600/anomaly_analyse.py | 248 ++++++++++++++ .../accuracy_tools/kj600/kj600/file_check.py | 324 ++++++++++++++++++ .../kj600/kj600/unittest/test_monitor.py | 145 ++++++++ 3 files changed, 717 insertions(+) create mode 100644 debug/accuracy_tools/kj600/kj600/anomaly_analyse.py create mode 100644 debug/accuracy_tools/kj600/kj600/file_check.py create mode 100644 debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py diff --git a/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py new file mode 100644 index 0000000000..f6069db6fb --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/anomaly_analyse.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import argparse +import ast +import fcntl +import heapq +import json +import os +from pathlib import Path +import sys + +from kj600.utils import print_info_log, print_warn_log +from kj600.anomaly_detect import GradAnomalyData +from kj600.file_check import ( + change_mode, + check_link, + FileCheckConst, + check_path_before_create, + FileChecker, + FileOpen, +) + +ANOMALY_JSON = "anomaly.json" +ANALYSE_JSON = "anomaly_analyse.json" + +class AnomalyDataWriter: + """ + 异常数据写入类,负责将异常数据写入到JSON文件中。 + """ + + def __init__(self, dump_path, rank) -> None: + self.dump_path = dump_path + self.dump_rank_dir = os.path.join(self.dump_path, f"rank{rank}") + self.json_path = os.path.join(self.dump_rank_dir, ANOMALY_JSON) + + @staticmethod + def get_anomaly_dict(anomalies): + """将GradAnomalyData列表转换为json""" + anomalies_json = {} + for anomaly in anomalies: + anomalies_json.update({anomaly.get_key(): anomaly.to_dict()}) + return anomalies_json + + @staticmethod + def update_data_in_single_json(json_path, anomalies_data): + with FileOpen(json_path, "w+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + json.dump(anomalies_data, f, indent=1) + fcntl.flock(f, fcntl.LOCK_UN) + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def init_detected_json(self): + """初始化落盘文件""" + check_path_before_create(self.dump_path) + if not os.path.exists(self.dump_path): + Path(self.dump_path).mkdir( + mode=FileCheckConst.DATA_DIR_AUTHORITY, exist_ok=True + ) + file_check = FileChecker(self.dump_path, FileCheckConst.DIR) + file_check.common_check() + + if not os.path.exists(self.dump_rank_dir): + Path(self.dump_rank_dir).mkdir( + FileCheckConst.DATA_DIR_AUTHORITY, parents=True, exist_ok=True + ) + + if os.path.exists(self.json_path): + file_check = FileChecker( + self.json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {self.json_path}.") + os.remove(self.json_path) + Path(self.json_path).touch() + change_mode(self.json_path, FileCheckConst.DATA_FILE_AUTHORITY) + + def write_detected_json(self, anomalies): + """ + 落盘异常数据 + Args: + anomalies: GradAnomalyData对象列表 + """ + anomalies_json = self.get_anomaly_dict(anomalies) + print_info_log(f"{ANOMALY_JSON} is at {self.dump_rank_dir}.") + if Path(self.json_path).exists() and os.path.getsize(self.json_path) > 0: + with FileOpen(self.json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_to_write = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + else: + data_to_write = {} + data_to_write.update(anomalies_json) + self.update_data_in_single_json(self.json_path, data_to_write) + + +class AnomalyDataLoader: + def __init__(self, data_path) -> None: + self.data_path = data_path + + @staticmethod + def create_instances_from_dict(anomalies_dict: dict): + instances = [] + for values in anomalies_dict.values(): + try: + instances.append(GradAnomalyData(**values)) + except KeyError as e: + print_warn_log(f"Missing key in anomaly data: {e}") + except ValueError as e: + print_warn_log( + f"Value error when creating a GradAnomalyData instance: {e}" + ) + return instances + + def get_anomalies_from_jsons(self): + """遍历文件夹,从rankK/anomaly.json中读取异常数据 + return: anomalies: GradAnomalyData对象列表 + """ + anomalies = [] + check_link(self.data_path) + for rank_dir in os.listdir(self.data_path): + rank_path = os.path.join(self.data_path, rank_dir) + if not os.path.isdir(rank_path): + continue + json_path = os.path.join(rank_path, ANOMALY_JSON) + if not os.path.exists(json_path): + continue + with FileOpen(json_path, "r+") as f: + fcntl.flock(f, fcntl.LOCK_EX) + data_anomalies = json.load(f) + fcntl.flock(f, fcntl.LOCK_UN) + instances = self.create_instances_from_dict(data_anomalies) + anomalies.extend(instances) + return anomalies + + +class AnomalyAnalyse: + def __init__(self) -> None: + self.sorted_anomalies = [] + + def get_range_top_K(self, topk, step_list, anomalies): + """ + 获取前topk个step_list范围内的异常。 + """ + if not step_list: + filtered_anomalies = anomalies + else: + filtered_anomalies = [ + anomaly for anomaly in anomalies if anomaly.step in step_list + ] + if topk >= len(filtered_anomalies): + self.sorted_anomalies = sorted(filtered_anomalies) + else: + self.sorted_anomalies = list(heapq.nsmallest(topk, filtered_anomalies)) + return self.sorted_anomalies + + def rewrite_sorted_anomalies(self, output_path): + """ + 将排序后的异常数据重新落盘 + """ + file_check = FileChecker( + output_path, FileCheckConst.DIR, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + + sorted_data = AnomalyDataWriter.get_anomaly_dict(self.sorted_anomalies) + print_info_log(f"{ANALYSE_JSON} is at {output_path}.") + json_path = os.path.join(output_path, ANALYSE_JSON) + if os.path.exists(json_path): + file_check = FileChecker( + json_path, FileCheckConst.FILE, FileCheckConst.WRITE_ABLE + ) + file_check.common_check() + print_warn_log(f"The existing file will be deleted: {json_path}.") + os.remove(json_path) + Path(json_path).touch() + change_mode(json_path, FileCheckConst.DATA_FILE_AUTHORITY) + AnomalyDataWriter.update_data_in_single_json(json_path, sorted_data) + + +def _get_parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--data_path", dest="data_path_dir", default="./", type=str, + help=" The anomaly detect result dictionary: generate from kj600 tool.", + required=True, + ) + parser.add_argument("-o", "--out_path", dest="out_path", default="", type=str, + help=" The analyse task result out path.", + required=False, + ) + parser.add_argument("-k", "--topk", dest="top_k_number", default=8, type=int, + help=" Top K number of earliest anomalies.", + required=False, + ) + parser.add_argument("-s", "--step", dest="step_list", default=[], type=str, + help=" Analyse which steps.", + required=False, + ) + return parser.parse_args(sys.argv[1:]) + +def _get_step_and_stop(args): + try: + step_list = ast.literal_eval(args.step_list) + if not isinstance(step_list, list): + raise ValueError(f"{args.step_list} is not a list") + except (ValueError, SyntaxError, RecursionError) as e: + raise Exception( + f"The step list must be a resolvable list type" + ) from e + if args.top_k_number <= 0: + raise Exception("The top k number must be greater than 0.") + return step_list, args.top_k_number + +def _anomaly_analyse(): + args = _get_parse_args() + step_list, top_k_number = _get_step_and_stop(args) + loader = AnomalyDataLoader(args.data_path_dir) + anomalies = loader.get_anomalies_from_jsons() + analyser = AnomalyAnalyse() + top_anomalies = analyser.get_range_top_K( + top_k_number, step_list, anomalies + ) + analyser.rewrite_sorted_anomalies( + args.out_path if args.out_path else args.data_path_dir + ) + + print_info_log(f"Top {top_k_number} anomalies are listed as follows:") + for index, anomaly in enumerate(top_anomalies): + print_info_log(f"{index}: {anomaly.message}") + + +if __name__ == "__main__": + _anomaly_analyse() + print_info_log("Analyse task completed.") diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py new file mode 100644 index 0000000000..21f9e351a2 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/file_check.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +# Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +import os +import re + +from kj600.utils import print_info_log + + +class CodedException(Exception): + def __init__(self, code, error_info=""): + super().__init__() + self.code = code + self.error_info = self.err_strs.get(code) + error_info + + def __str__(self): + return self.error_info + + +class FileCheckException(CodedException): + INVALID_FILE_ERROR = 0 + FILE_PERMISSION_ERROR = 1 + SOFT_LINK_ERROR = 2 + ILLEGAL_PATH_ERROR = 3 + ILLEGAL_PARAM_ERROR = 4 + FILE_TOO_LARGE_ERROR = 5 + + err_strs = { + SOFT_LINK_ERROR: "[kj600] 检测到软链接: ", + FILE_PERMISSION_ERROR: "[kj600] 文件权限错误: ", + INVALID_FILE_ERROR: "[kj600] 无效文件: ", + ILLEGAL_PATH_ERROR: "[kj600] 非法文件路径: ", + ILLEGAL_PARAM_ERROR: "[kj600] 非法打开方式: ", + FILE_TOO_LARGE_ERROR: "[kj600] 文件过大: ", + } + + +class FileCheckConst: + """ + Class for file check const + """ + + READ_ABLE = "read" + WRITE_ABLE = "write" + READ_WRITE_ABLE = "read and write" + DIRECTORY_LENGTH = 4096 + FILE_NAME_LENGTH = 255 + FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" + FILE_PATTERN = r"^[a-zA-Z0-9_./-]+$" + JSON_SUFFIX = ".json" + MAX_JSON_SIZE = 1073741824 # 1 * 1024 * 1024 * 1024 + DIR = "dir" + FILE = "file" + DATA_DIR_AUTHORITY = 0o750 + DATA_FILE_AUTHORITY = 0o640 + FILE_SIZE_DICT = { + JSON_SUFFIX: MAX_JSON_SIZE, + } + + +class FileChecker: + """ + The class for check file. + + Attributes: + file_path: The file or dictionary path to be verified. + path_type: file or dictionary + ability(str): FileCheckConst.WRITE_ABLE or FileCheckConst.READ_ABLE to set file has writability or readability + file_type(str): The correct file type for file + """ + + def __init__( + self, file_path, path_type, ability=None, file_type=None, is_script=True + ): + self.file_path = file_path + self.path_type = self._check_path_type(path_type) + self.ability = ability + self.file_type = file_type + self.is_script = is_script + + @staticmethod + def _check_path_type(path_type): + if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: + print_info_log( + f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." + ) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + return path_type + + def common_check(self): + """ + 功能:用户校验基本文件权限:软连接、文件长度、是否存在、读写权限、文件属组、文件特殊字符 + 注意:文件后缀的合法性,非通用操作,可使用其他独立接口实现 + """ + check_path_exists(self.file_path) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + check_path_type(self.file_path, self.path_type) + self.check_path_ability() + if self.is_script: + check_path_owner_consistent(self.file_path) + check_path_pattern_vaild(self.file_path) + check_common_file_size(self.file_path) + check_file_suffix(self.file_path, self.file_type) + return self.file_path + + def check_path_ability(self): + if self.ability == FileCheckConst.WRITE_ABLE: + check_path_writability(self.file_path) + if self.ability == FileCheckConst.READ_ABLE: + check_path_readability(self.file_path) + if self.ability == FileCheckConst.READ_WRITE_ABLE: + check_path_readability(self.file_path) + check_path_writability(self.file_path) + + +class FileOpen: + """ + The class for open file by a safe way. + + Attributes: + file_path: The file or dictionary path to be opened. + mode(str): The file open mode + """ + + SUPPORT_READ_MODE = ["r", "rb"] + SUPPORT_WRITE_MODE = ["w", "wb", "a", "ab"] + SUPPORT_READ_WRITE_MODE = ["r+", "rb+", "w+", "wb+", "a+", "ab+"] + + def __init__(self, file_path, mode, encoding="utf-8"): + self.file_path = file_path + self.mode = mode + self.encoding = encoding + self._handle = None + + def __enter__(self): + self.check_file_path() + binary_mode = "b" + if binary_mode not in self.mode: + self._handle = open(self.file_path, self.mode, encoding=self.encoding) + else: + self._handle = open(self.file_path, self.mode) + return self._handle + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._handle: + self._handle.close() + + def check_file_path(self): + support_mode = ( + self.SUPPORT_READ_MODE + + self.SUPPORT_WRITE_MODE + + self.SUPPORT_READ_WRITE_MODE + ) + if self.mode not in support_mode: + print_info_log("File open not support %s mode" % self.mode) + raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) + check_link(self.file_path) + self.file_path = os.path.realpath(self.file_path) + check_path_length(self.file_path) + self.check_ability_and_owner() + check_path_pattern_vaild(self.file_path) + if os.path.exists(self.file_path): + check_common_file_size(self.file_path) + + def check_ability_and_owner(self): + if self.mode in self.SUPPORT_READ_MODE: + check_path_exists(self.file_path) + check_path_readability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_WRITE_MODE and os.path.exists(self.file_path): + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + if self.mode in self.SUPPORT_READ_WRITE_MODE and os.path.exists(self.file_path): + check_path_readability(self.file_path) + check_path_writability(self.file_path) + check_path_owner_consistent(self.file_path) + + +def check_link(path): + abs_path = os.path.abspath(path) + if os.path.islink(abs_path): + print_info_log("The file path {} is a soft link.".format(path)) + raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) + + +def check_path_length(path, name_length=None): + file_max_name_length = ( + name_length if name_length else FileCheckConst.FILE_NAME_LENGTH + ) + if ( + len(path) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(path)) > file_max_name_length + ): + print_info_log("The file path length exceeds limit.") + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_exists(path): + if not os.path.exists(path): + print_info_log("The file path %s does not exist." % path) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_path_readability(path): + if not os.access(path, os.R_OK): + print_info_log("The file path %s is not readable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_writability(path): + if not os.access(path, os.W_OK): + print_info_log("The file path %s is not writable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_executable(path): + if not os.access(path, os.X_OK): + print_info_log("The file path %s is not executable." % path) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_other_user_writable(path): + st = os.stat(path) + if st.st_mode & 0o002: + print_info_log( + "The file path %s may be insecure because other users have write permissions. " + % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_owner_consistent(path): + file_owner = os.stat(path).st_uid + if file_owner != os.getuid(): + print_info_log( + "The file path %s may be insecure because is does not belong to you." % path + ) + raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) + + +def check_path_pattern_vaild(path): + if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): + print_info_log("The file path %s contains special characters." % (path)) + raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) + + +def check_file_size(file_path, max_size): + file_size = os.path.getsize(file_path) + if file_size >= max_size: + print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") + raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) + + +def check_common_file_size(file_path): + if os.path.isfile(file_path): + for suffix, max_size in FileCheckConst.FILE_SIZE_DICT.items(): + if file_path.endswith(suffix): + check_file_size(file_path, max_size) + break + + +def check_file_suffix(file_path, file_suffix): + if file_suffix: + if not file_path.endswith(file_suffix): + print_info_log(f"The {file_path} should be a {file_suffix} file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_type(file_path, file_type): + if file_type == FileCheckConst.FILE: + if not os.path.isfile(file_path): + print_info_log(f"The {file_path} should be a file!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + if file_type == FileCheckConst.DIR: + if not os.path.isdir(file_path): + print_info_log(f"The {file_path} should be a dictionary!") + raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) + + +def check_path_before_create(path): + if path_len_exceeds_limit(path): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, "The file path length exceeds limit." + ) + + if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): + raise FileCheckException( + FileCheckException.ILLEGAL_PATH_ERROR, + "The file path {} contains special characters.".format(path), + ) + + +def change_mode(path, mode): + if not os.path.exists(path) or os.path.islink(path): + return + try: + os.chmod(path, mode) + except PermissionError as ex: + raise FileCheckException( + FileCheckException.FILE_PERMISSION_ERROR, + "Failed to change {} authority. {}".format(path, str(ex)), + ) from ex + + +def path_len_exceeds_limit(file_path): + return ( + len(os.path.realpath(file_path)) > FileCheckConst.DIRECTORY_LENGTH + or len(os.path.basename(file_path)) > FileCheckConst.FILE_NAME_LENGTH + ) diff --git a/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py new file mode 100644 index 0000000000..ddea3244f5 --- /dev/null +++ b/debug/accuracy_tools/kj600/kj600/unittest/test_monitor.py @@ -0,0 +1,145 @@ +import sys +import os +import re +import argparse +import pandas as pd +from glob import glob +from collections import defaultdict + + +def parse_logfile(logfile): + grad_norm = [] + step = [] + with open(logfile) as f: + for line in f.readlines(): + if 'consumed samples' in line: + grad_norm.append(float(re.findall('(?<=grad norm\: )[\d\.]*', line)[0])) + # step = int(re.findall('(?<=iteration)[ \d]*', line)[0]) + return grad_norm + + +def parse_monitor_output(output_dir): + reduced = {} + unreduced = {} + for dir in glob(output_dir+'*'): + rank = int(re.findall('(?<=rank)[\d]*', dir)[0]) + unreduced[rank] = [] + reduced[rank] = [] + for file in os.listdir(dir): + # step = int(re.search("(?<=reduced\_)[\d]*", file)[0]) + # if step != 0: + # continue + df = pd.read_csv(os.path.join(dir, file)) + if '_unreduced_' in file: + unreduced[rank].append(df) + pass + elif '_reduced_' in file: + reduced[rank].append(df) + else: + print(f'unexpected file {file} in {dir}') + return reduced, unreduced + +def valid_reduce(reduced, unreduced, tp_size, dp_size, sequence_parallel): + steps = len(reduced[0]) + world_size = len(reduced) + errors = [] + for index, row in unreduced[0][0].iterrows(): + param = row['param_name'] + is_tp_duplicate = False + for step in range(2): + # sum reduced + reduced_mean = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + continue + df = reduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + if step == 0: + is_tp_duplicate = True + continue + reduced_mean += value[0] + + # sum unreduced + unreduced_mean = 0. + for rank in range(world_size): + df = unreduced[rank][step] + value = list(df[df['param_name'] == param]['mean']) + if value == []: + continue + unreduced_mean += list(df[df['param_name'] == param]['mean'])[0] + + unreduced_mean /= dp_size + if is_tp_duplicate and (not sequence_parallel or 'embedding' in param): + unreduced_mean /= tp_size + try: + assert_equal(unreduced_mean, reduced_mean) + except AssertionError as e: + errors.append([param, step, e, is_tp_duplicate]) + if errors: + print(errors) + else: + print(f'grad mean is in consist between unreduced grad and reduced grad monitord.') + + + +def assert_equal(a, b): + if b == 0 or a == 0: + return + if b == 0: + rel_diff = a + elif a == 0: + rel_diff = b + else: + rel_diff = abs(a/b-1) + assert rel_diff<0.01, f'{a}, {b}, {rel_diff}' + + +def valid_total_norm(total_norm, reduced, duplicate_embedding): + steps = len(total_norm) + world_size = len(reduced) + errors = [] + for step in range(steps): + calculated_norm = 0. + for rank in range(world_size): + if len(reduced[rank]) == 0: + if step == 0: + print(f'rank {rank} is duplicated in dp group') + continue + for index, row in reduced[rank][step].iterrows(): + if duplicate_embedding and 'word_embedding' in row['param_name']: + continue + calculated_norm += row['norm']**2 + try: + assert_equal(calculated_norm**0.5, total_norm[step]) + except AssertionError as e: + errors.append([step, e]) + if errors: + print('total norm errors: ', errors) + else: + print('grad norm in consist between training log and reduced gradients monitored') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--monitor_output', '-m', type=str, required=True, help='path prefix to the output of monitor e.g. kj600_output/Aug12_07-16') + parser.add_argument('--logfile', '-l', type=str, required=True, help='path to the training log file') + parser.add_argument('--tp_size', '-t', type=int, required=True, help='tp parallel size') + parser.add_argument('--dp_size', '-d', type=int, required=True, help='dp parallel size') + parser.add_argument('--pp_size', '-p', type=int, required=True, help='pp parallel size') + parser.add_argument('--untie_embeddings_and_output_weights', '-u', action="store_true", default=False, help='whether untie_embeddings_and_output_weights in pp parallel') + parser.add_argument('--sequence_parallel', '-s', action="store_true", default=False, help='whether sequence parallel is enabled. Add -s to store true') + + args = parser.parse_args() + + assert args.tp_size > 0, 'if tp not enabled, set tp_size = 1' + assert args.dp_size > 0, 'if tp not enabled, set dp_size = 1' + assert args.pp_size > 0, 'if tp not enabled, set pp_size = 1' + + total_norm = parse_logfile(args.logfile) + reduced, unreduced = parse_monitor_output(args.monitor_output) + + duplicate_embedding = not args.untie_embeddings_and_output_weights and args.pp_size > 1 + + valid_total_norm(total_norm, reduced, duplicate_embedding) + valid_reduce(reduced, unreduced, args.tp_size, args.dp_size, args.sequence_parallel) \ No newline at end of file -- Gitee From 82c6f7155c96748daf92d06a0ba115f805cc0fe7 Mon Sep 17 00:00:00 2001 From: litian_drinksnow <1063185601@qq.com> Date: Wed, 28 Aug 2024 15:01:14 +0800 Subject: [PATCH 049/141] fix reviews --- .../accuracy_tools/kj600/kj600/file_check.py | 51 ++++++++----------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/debug/accuracy_tools/kj600/kj600/file_check.py b/debug/accuracy_tools/kj600/kj600/file_check.py index 21f9e351a2..80f456a628 100644 --- a/debug/accuracy_tools/kj600/kj600/file_check.py +++ b/debug/accuracy_tools/kj600/kj600/file_check.py @@ -17,7 +17,7 @@ import os import re -from kj600.utils import print_info_log +from kj600.utils import print_error_log class CodedException(Exception): @@ -94,7 +94,7 @@ class FileChecker: @staticmethod def _check_path_type(path_type): if path_type not in [FileCheckConst.DIR, FileCheckConst.FILE]: - print_info_log( + print_error_log( f"The path_type must be {FileCheckConst.DIR} or {FileCheckConst.FILE}." ) raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) @@ -167,7 +167,7 @@ class FileOpen: + self.SUPPORT_READ_WRITE_MODE ) if self.mode not in support_mode: - print_info_log("File open not support %s mode" % self.mode) + print_error_log(f"File open not support {self.mode} mode") raise FileCheckException(FileCheckException.ILLEGAL_PARAM_ERROR) check_link(self.file_path) self.file_path = os.path.realpath(self.file_path) @@ -194,52 +194,45 @@ class FileOpen: def check_link(path): abs_path = os.path.abspath(path) if os.path.islink(abs_path): - print_info_log("The file path {} is a soft link.".format(path)) + print_error_log(f"The file path {path} is a soft link.") raise FileCheckException(FileCheckException.SOFT_LINK_ERROR) -def check_path_length(path, name_length=None): - file_max_name_length = ( - name_length if name_length else FileCheckConst.FILE_NAME_LENGTH - ) - if ( - len(path) > FileCheckConst.DIRECTORY_LENGTH - or len(os.path.basename(path)) > file_max_name_length - ): - print_info_log("The file path length exceeds limit.") +def check_path_length(path): + if path_len_exceeds_limit(path): + print_error_log("The file path length exceeds limit.") raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) def check_path_exists(path): if not os.path.exists(path): - print_info_log("The file path %s does not exist." % path) + print_error_log(f"The file path {path} does not exist.") raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) def check_path_readability(path): if not os.access(path, os.R_OK): - print_info_log("The file path %s is not readable." % path) + print_error_log(f"The file path {path} is not readable.") raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_path_writability(path): if not os.access(path, os.W_OK): - print_info_log("The file path %s is not writable." % path) + print_error_log(f"The file path {path} is not writable.") raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_path_executable(path): if not os.access(path, os.X_OK): - print_info_log("The file path %s is not executable." % path) + print_error_log(f"The file path {path} is not executable.") raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_other_user_writable(path): st = os.stat(path) if st.st_mode & 0o002: - print_info_log( - "The file path %s may be insecure because other users have write permissions. " - % path + print_error_log( + f"The file path {path} may be insecure because other users have write permissions. " ) raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) @@ -247,22 +240,22 @@ def check_other_user_writable(path): def check_path_owner_consistent(path): file_owner = os.stat(path).st_uid if file_owner != os.getuid(): - print_info_log( - "The file path %s may be insecure because is does not belong to you." % path + print_error_log( + f"The file path {path} may be insecure because is does not belong to you." ) raise FileCheckException(FileCheckException.FILE_PERMISSION_ERROR) def check_path_pattern_vaild(path): if not re.match(FileCheckConst.FILE_VALID_PATTERN, path): - print_info_log("The file path %s contains special characters." % (path)) + print_error_log(f"The file path {path} contains special characters.") raise FileCheckException(FileCheckException.ILLEGAL_PATH_ERROR) def check_file_size(file_path, max_size): file_size = os.path.getsize(file_path) if file_size >= max_size: - print_info_log(f"The size of file path {file_path} exceeds {max_size} bytes.") + print_error_log(f"The size of file path {file_path} exceeds {max_size} bytes.") raise FileCheckException(FileCheckException.FILE_TOO_LARGE_ERROR) @@ -277,18 +270,18 @@ def check_common_file_size(file_path): def check_file_suffix(file_path, file_suffix): if file_suffix: if not file_path.endswith(file_suffix): - print_info_log(f"The {file_path} should be a {file_suffix} file!") + print_error_log(f"The {file_path} should be a {file_suffix} file!") raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) def check_path_type(file_path, file_type): if file_type == FileCheckConst.FILE: if not os.path.isfile(file_path): - print_info_log(f"The {file_path} should be a file!") + print_error_log(f"The {file_path} should be a file!") raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) if file_type == FileCheckConst.DIR: if not os.path.isdir(file_path): - print_info_log(f"The {file_path} should be a dictionary!") + print_error_log(f"The {file_path} should be a dictionary!") raise FileCheckException(FileCheckException.INVALID_FILE_ERROR) @@ -301,7 +294,7 @@ def check_path_before_create(path): if not re.match(FileCheckConst.FILE_PATTERN, os.path.realpath(path)): raise FileCheckException( FileCheckException.ILLEGAL_PATH_ERROR, - "The file path {} contains special characters.".format(path), + f"The file path {path} contains special characters." ) @@ -313,7 +306,7 @@ def change_mode(path, mode): except PermissionError as ex: raise FileCheckException( FileCheckException.FILE_PERMISSION_ERROR, - "Failed to change {} authority. {}".format(path, str(ex)), + f"Failed to change {path} authority. {str(ex)}", ) from ex -- Gitee From 241df4110ae49fe4a123c3002b96912edbf6925a Mon Sep 17 00:00:00 2001 From: heweidong7 <511650494@qq.com> Date: Sat, 31 Aug 2024 11:12:38 +0800 Subject: [PATCH 050/141] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=99=A8update=E5=92=8Cratio=E5=90=91=E9=87=8F=E7=9A=84?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 67 ++++++++++--------- .../kj600/kj600/optimizer_collect.py | 14 ++-- 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index a364b7d06a..c483749097 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -4,7 +4,9 @@