From c437aad8f4a615476f9d10bb998bf2b285742921 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 25 Jul 2024 17:32:26 +0800 Subject: [PATCH 01/67] =?UTF-8?q?=E6=8F=90=E7=82=BC=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mindspore/dump/hook_cell/api_registry.py | 93 +++++++++++++++++++ .../msprobe/mindspore/service.py | 2 + 2 files changed, 95 insertions(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 5508416fde0..2f032d93d7c 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -13,12 +13,18 @@ # limitations under the License. # ============================================================================ +import os +import functools import mindspore as ms +from mindspore.common.tensor import Tensor +from msprobe.core.common.utils import Const +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor from msprobe.core.common.utils import Const +PRIMITIVE_PREFIX = "Primitive" class ApiRegistry: def __init__(self): @@ -35,6 +41,7 @@ class ApiRegistry: self.norm_inner_ops_hook_attr = {} self.norm_inner_ops = ["norm", "square", "sqrt", "is_complex"] + self.primitive_counters = {} @staticmethod def store_ori_attr(ori_api_group, api_list, api_ori_attr): @@ -100,5 +107,91 @@ class ApiRegistry: if attr_name.startswith(Const.ATTR_NAME_PREFIX): self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) + def wrap_primitive(self, origin_func, primitive_name, service_instance): + primitive_instance = self + def func(self, *args, **kwargs): + if primitive_name not in primitive_instance.primitive_counters: + primitive_instance.primitive_counters[primitive_name] = 0 + else: + primitive_instance.primitive_counters[primitive_name] += 1 + + current_count = primitive_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] + captured_grads_output = [] + + def input_backward_hook(grad): + print(f"Grad input length: {len(grad)}") + print("Captured input grad:", grad) + captured_grads_input.append(grad) + backward_primitive_name = updated_primitive_name + Const.BACKWARD + new_module_input_output = ModuleBackwardInputsOutputs( + grad_input=tuple(captured_grads_input), + grad_output=tuple(captured_grads_output) if captured_grads_output else None + ) + service_instance.data_collector.backward_data_collect( + backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + ) +#1未考虑多输出场景 +# 如果时多grad呢 +# 3 输出的序号问题 + def output_backward_hook(grad): + captured_grads_output.append(grad) + backward_primitive_name = primitive_name + Const.BACKWARD + new_module_input_output = ModuleBackwardInputsOutputs( + grad_input=None, + grad_output=tuple(captured_grads_output) + ) + service_instance.data_collector.backward_data_collect( + backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + ) + + if not service_instance.switch: + return origin_func(*args, **kwargs) + + print(f"Entering {updated_primitive_name} hook, number of args: {len(args)}, name: {self.name}") + hooked_inputs = [] + + # for idx, arg in enumerate(args): + # if isinstance(arg, Tensor): + # arg_hooked = ops.HookBackward(input_backward_hook)(arg) + # hooked_inputs.append(arg_hooked) + # else: + # hooked_inputs.append(arg) + + out = origin_func(*arg, **kwargs) + forward_primitive_name = updated_primitive_name + Const.FORWARD + + if service_instance.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) + service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + if service_instance.data_collector.if_return_forward_new_output(): + out = service_instance.data_collector.get_forward_new_output() + + if isinstance(out, Tensor): + out = ops.HookBackward(output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + out = tuple(hooked_outputs) + + return out + + return func + + def register_hooks(self, service_instance): + primitive_set = set() + for name, cell in self.model.cells_and_names(): + for pname, primitive in cell._primitives.items(): + primitive_set.add((pname, primitive)) + + for pname, primitive in primitive_set: + print("primitive name is", pname) + NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) + primitive.__class__ = NewPrimitive api_register = ApiRegistry() diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index e8aa34dc4fe..8d802e14d06 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -136,3 +136,5 @@ class Service: if self.config.level == "L1": api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() + if self.model: + api_register.register_hooks(self) -- Gitee From e39e7175accd5ff882432572c889f1336a84cb8e Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 25 Jul 2024 20:28:17 +0800 Subject: [PATCH 02/67] =?UTF-8?q?=E8=B7=91=E9=80=9Aprimitive?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mindspore/debugger/precision_debugger.py | 4 +-- .../mindspore/dump/hook_cell/api_registry.py | 35 ++++--------------- 2 files changed, 8 insertions(+), 31 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 30f7162ff5c..28161c66855 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -27,12 +27,12 @@ class PrecisionDebugger: self.service = Service(self.config) @classmethod - def start(cls): + def start(cls, target=None): instance = cls._instance if not instance: raise Exception("No instance of PrecisionDebugger found.") if ms.get_context("mode") == ms.PYNATIVE_MODE and instance.config.level_ori == "L1": - instance.service.start() + instance.service.start(target) else: handler = TaskHandlerFactory.create(instance.config) handler.handle() diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 2f032d93d7c..03fd47e8fb1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -16,6 +16,7 @@ import os import functools import mindspore as ms +from mindspore import ops from mindspore.common.tensor import Tensor from msprobe.core.common.utils import Const from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs @@ -117,27 +118,11 @@ class ApiRegistry: current_count = primitive_instance.primitive_counters[primitive_name] updated_primitive_name = f"{PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] captured_grads_output = [] - def input_backward_hook(grad): - print(f"Grad input length: {len(grad)}") - print("Captured input grad:", grad) - captured_grads_input.append(grad) - backward_primitive_name = updated_primitive_name + Const.BACKWARD - new_module_input_output = ModuleBackwardInputsOutputs( - grad_input=tuple(captured_grads_input), - grad_output=tuple(captured_grads_output) if captured_grads_output else None - ) - service_instance.data_collector.backward_data_collect( - backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output - ) -#1未考虑多输出场景 -# 如果时多grad呢 -# 3 输出的序号问题 def output_backward_hook(grad): captured_grads_output.append(grad) - backward_primitive_name = primitive_name + Const.BACKWARD + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputsOutputs( grad_input=None, grad_output=tuple(captured_grads_output) @@ -149,21 +134,13 @@ class ApiRegistry: if not service_instance.switch: return origin_func(*args, **kwargs) - print(f"Entering {updated_primitive_name} hook, number of args: {len(args)}, name: {self.name}") hooked_inputs = [] - # for idx, arg in enumerate(args): - # if isinstance(arg, Tensor): - # arg_hooked = ops.HookBackward(input_backward_hook)(arg) - # hooked_inputs.append(arg_hooked) - # else: - # hooked_inputs.append(arg) - - out = origin_func(*arg, **kwargs) - forward_primitive_name = updated_primitive_name + Const.FORWARD + out = origin_func(*args, **kwargs) + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" if service_instance.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) + module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() @@ -185,7 +162,7 @@ class ApiRegistry: def register_hooks(self, service_instance): primitive_set = set() - for name, cell in self.model.cells_and_names(): + for name, cell in service_instance.model.cells_and_names(): for pname, primitive in cell._primitives.items(): primitive_set.add((pname, primitive)) -- Gitee From 6aaed48a6b8c22220b2a2e1e1878b8e2346aa185 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 11:11:06 +0800 Subject: [PATCH 03/67] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=BE=93=E5=85=A5?= =?UTF-8?q?=E6=A2=AF=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/base.py | 24 +++++++++++++++++++ .../mindspore/dump/hook_cell/api_registry.py | 14 +++++++++++ 2 files changed, 38 insertions(+) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 5d901291973..4bcf6418197 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -237,6 +237,30 @@ class BaseDataProcessor: return api_info_struct + def analyze_backward_input(self, name, module, module_input_output: ModuleBackwardInputsOutputs): + """ + Analyze and save backward input gradients. + """ + api_info_struct = {} + if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): + api_info_struct[name] = {} + self.api_data_category = Const.INPUT + output_info_list = self.analyze_element(module_input_output.grad_output_tuple) + api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list + return api_info_struct + + def analyze_backward_output(self, name, module, module_input_output: ModuleBackwardInputsOutputs): + """ + Analyze and save backward output gradients. + """ + api_info_struct = {} + if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): + api_info_struct[name] = {} + self.api_data_category = Const.OUTPUT + input_info_list = self.analyze_element(module_input_output.grad_input_tuple) + api_info_struct[name][Const.GRAD_INPUT] = input_info_list + return api_info_struct + def get_save_file_path(self, suffix): file_format = Const.PT_SUFFIX if self.config.framework == Const.PT_FRAMEWORK else Const.NUMPY_SUFFIX dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP + diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 03fd47e8fb1..77e740011f1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -118,8 +118,22 @@ class ApiRegistry: current_count = primitive_instance.primitive_counters[primitive_name] updated_primitive_name = f"{PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] captured_grads_output = [] + def input_backward_hook(grad): + print(f"Grad input length: {len(grad)}") + print("Captured input grad:", grad) + captured_grads_input.append(grad) + backward_primitive_name = updated_primitive_name + Const.BACKWARD + new_module_input_output = ModuleBackwardInputsOutputs( + grad_input=tuple(captured_grads_input), + grad_output=tuple(captured_grads_output) if captured_grads_output else None + ) + service_instance.data_collector.backward_data_collect( + backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + ) + def output_backward_hook(grad): captured_grads_output.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" -- Gitee From da8338f1f906f677fb6d79d0167300fdaef43593 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 11:31:03 +0800 Subject: [PATCH 04/67] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=BE=93=E5=85=A5hook?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 77e740011f1..b28a07d94c8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -149,8 +149,14 @@ class ApiRegistry: return origin_func(*args, **kwargs) hooked_inputs = [] - - out = origin_func(*args, **kwargs) + for idx, arg in enumerate(args): + if isinstance(arg, Tensor): + arg_hooked = ops.HookBackward(input_backward_hook)(arg) + hooked_inputs.append(arg_hooked) + else: + hooked_inputs.append(arg) + + out = origin_func(*hooked_inputs, **kwargs) forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" if service_instance.data_collector: -- Gitee From 085085d7ae18c8a0ce6060558754868cab0a404c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 14:40:15 +0800 Subject: [PATCH 05/67] =?UTF-8?q?=E6=8B=86=E5=88=86=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/data_dump/data_collector.py | 16 ++++++++++++++++ .../mindspore/dump/hook_cell/api_registry.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py index 800a2b81c2f..a537fa3d06a 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py @@ -100,6 +100,22 @@ class DataCollector: data_info = self.data_processor.analyze_backward(name, module, module_input_output) self.handle_data(name, data_info) + def backward_input_data_collect(self, name, module, pid, module_input_output): + self.update_construct(name) + if not self.check_scope_and_pid(self.scope, name, pid): + return + + data_info = self.data_processor.analyze_backward_input(name, module, module_input_output) + self.handle_data(name, data_info) + + def backward_output_data_collect(self, name, module, pid, module_input_output): + self.update_construct(name) + if not self.check_scope_and_pid(self.scope, name, pid): + return + + data_info = self.data_processor.analyze_backward_output(name, module, module_input_output) + self.handle_data(name, data_info) + def update_construct(self, name): if self.config.level not in DataCollector.level_without_construct: self.data_writer.update_construct({name: self.module_processor.api_parent_node}) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index b28a07d94c8..4a790a5cbb8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -130,7 +130,7 @@ class ApiRegistry: grad_input=tuple(captured_grads_input), grad_output=tuple(captured_grads_output) if captured_grads_output else None ) - service_instance.data_collector.backward_data_collect( + service_instance.data_collector.backward_input_data_collect( backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output ) @@ -141,7 +141,7 @@ class ApiRegistry: grad_input=None, grad_output=tuple(captured_grads_output) ) - service_instance.data_collector.backward_data_collect( + service_instance.data_collector.backward_output_data_collect( backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output ) -- Gitee From 88ce243a0e483a165e42e010856a88afbbb15220 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 14:43:28 +0800 Subject: [PATCH 06/67] =?UTF-8?q?=E6=94=B9=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 4a790a5cbb8..e50502cc991 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -125,7 +125,7 @@ class ApiRegistry: print(f"Grad input length: {len(grad)}") print("Captured input grad:", grad) captured_grads_input.append(grad) - backward_primitive_name = updated_primitive_name + Const.BACKWARD + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputsOutputs( grad_input=tuple(captured_grads_input), grad_output=tuple(captured_grads_output) if captured_grads_output else None -- Gitee From b4ea092df299862c74880a870d432e028b56b4b0 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 14:58:46 +0800 Subject: [PATCH 07/67] =?UTF-8?q?=E5=B0=9D=E8=AF=95=E5=8F=AA=E8=BE=93?= =?UTF-8?q?=E5=85=A5/=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/base.py | 15 +++++++++++++++ .../mindspore/dump/hook_cell/api_registry.py | 16 ++++++---------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 4bcf6418197..75238663f04 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -39,7 +39,22 @@ class ModuleBackwardInputsOutputs: @property def grad_output_tuple(self): return convert_tuple(self.grad_output) + +@dataclass +class ModuleBackwardInputs: + grad_input: Optional[Tuple] + + @property + def grad_input_tuple(self): + return convert_tuple(self.grad_input) + +@dataclass +class ModuleBackwardOutputs: + grad_output: Optional[Tuple] + @property + def grad_output_tuple(self): + return convert_tuple(self.grad_output) class TensorStatInfo: def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None): diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index e50502cc991..a9f01ef5fac 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -122,27 +122,23 @@ class ApiRegistry: captured_grads_output = [] def input_backward_hook(grad): - print(f"Grad input length: {len(grad)}") - print("Captured input grad:", grad) + # print(f"Grad input length: {len(grad)}") + # print("Captured input grad:", grad) captured_grads_input.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputsOutputs( - grad_input=tuple(captured_grads_input), - grad_output=tuple(captured_grads_output) if captured_grads_output else None - ) + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) service_instance.data_collector.backward_input_data_collect( - backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + backward_primitive_name, self, os.getpid(), new_module_input_output ) def output_backward_hook(grad): captured_grads_output.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputsOutputs( - grad_input=None, + new_module_input_output = ModuleBackwardOutputs( grad_output=tuple(captured_grads_output) ) service_instance.data_collector.backward_output_data_collect( - backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + backward_primitive_name, self, os.getpid(), new_module_input_output ) if not service_instance.switch: -- Gitee From 477d07441c832f0cd376ba1e65ca54c83c86c5a7 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 15:00:26 +0800 Subject: [PATCH 08/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index a9f01ef5fac..16528f76d70 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -19,7 +19,7 @@ import mindspore as ms from mindspore import ops from mindspore.common.tensor import Tensor from msprobe.core.common.utils import Const -from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, ModuleForwardInputs, ModuleForwardOutputs from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor -- Gitee From 149f349cc60ad3caf096bbd1a2c297f458e8771a Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 15:01:49 +0800 Subject: [PATCH 09/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 16528f76d70..a3f9ec35d76 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -19,7 +19,7 @@ import mindspore as ms from mindspore import ops from mindspore.common.tensor import Tensor from msprobe.core.common.utils import Const -from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, ModuleForwardInputs, ModuleForwardOutputs +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, ModuleBackwardInputs, ModuleBackwardOutputs from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor -- Gitee From 4dead5fbefa7f68b436b0df1f00e4fd778ed971c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 15:07:22 +0800 Subject: [PATCH 10/67] Update base.py --- .../msprobe/core/data_dump/data_processor/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 75238663f04..d96d107ac5d 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -260,8 +260,8 @@ class BaseDataProcessor: if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): api_info_struct[name] = {} self.api_data_category = Const.INPUT - output_info_list = self.analyze_element(module_input_output.grad_output_tuple) - api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list + output_info_list = self.analyze_element(module_input_output.grad_input_tuple) + api_info_struct[name][Const.GRAD_INPUT] = output_info_list return api_info_struct def analyze_backward_output(self, name, module, module_input_output: ModuleBackwardInputsOutputs): @@ -272,8 +272,8 @@ class BaseDataProcessor: if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): api_info_struct[name] = {} self.api_data_category = Const.OUTPUT - input_info_list = self.analyze_element(module_input_output.grad_input_tuple) - api_info_struct[name][Const.GRAD_INPUT] = input_info_list + output_info_list = self.analyze_element(module_input_output.grad_output_tuple) + api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list return api_info_struct def get_save_file_path(self, suffix): -- Gitee From d8fce96489f22adab19c9c7c93a5c15f3a05a40b Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 16:15:37 +0800 Subject: [PATCH 11/67] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=AD=A3=E5=90=91?= =?UTF-8?q?=E5=8F=8D=E5=90=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/core/data_dump/data_processor/base.py | 6 +++--- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index d96d107ac5d..e725d362e8c 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -261,7 +261,7 @@ class BaseDataProcessor: api_info_struct[name] = {} self.api_data_category = Const.INPUT output_info_list = self.analyze_element(module_input_output.grad_input_tuple) - api_info_struct[name][Const.GRAD_INPUT] = output_info_list + api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list return api_info_struct def analyze_backward_output(self, name, module, module_input_output: ModuleBackwardInputsOutputs): @@ -272,8 +272,8 @@ class BaseDataProcessor: if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): api_info_struct[name] = {} self.api_data_category = Const.OUTPUT - output_info_list = self.analyze_element(module_input_output.grad_output_tuple) - api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list + input_info_list = self.analyze_element(module_input_output.grad_output_tuple) + api_info_struct[name][Const.GRAD_INPUT] = input_info_list return api_info_struct def get_save_file_path(self, suffix): diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index a3f9ec35d76..97ab5e3285c 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -127,7 +127,7 @@ class ApiRegistry: captured_grads_input.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_input_data_collect( + service_instance.data_collector.backward_output_data_collect( backward_primitive_name, self, os.getpid(), new_module_input_output ) @@ -137,7 +137,7 @@ class ApiRegistry: new_module_input_output = ModuleBackwardOutputs( grad_output=tuple(captured_grads_output) ) - service_instance.data_collector.backward_output_data_collect( + service_instance.data_collector.backward_input_data_collect( backward_primitive_name, self, os.getpid(), new_module_input_output ) -- Gitee From c7cb33af18ed7c88a18d7dfc8905c0a4c9185c38 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 16:23:26 +0800 Subject: [PATCH 12/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 97ab5e3285c..a3f9ec35d76 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -127,7 +127,7 @@ class ApiRegistry: captured_grads_input.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_output_data_collect( + service_instance.data_collector.backward_input_data_collect( backward_primitive_name, self, os.getpid(), new_module_input_output ) @@ -137,7 +137,7 @@ class ApiRegistry: new_module_input_output = ModuleBackwardOutputs( grad_output=tuple(captured_grads_output) ) - service_instance.data_collector.backward_input_data_collect( + service_instance.data_collector.backward_output_data_collect( backward_primitive_name, self, os.getpid(), new_module_input_output ) -- Gitee From f1a616bbb1c5f9ab892adfce80cb5961d37975c3 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 16:33:20 +0800 Subject: [PATCH 13/67] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=B8=B8=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/core/common/const.py | 1 + .../msprobe/mindspore/dump/hook_cell/api_registry.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index df82455a676..81e21e3d2ac 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -16,6 +16,7 @@ class Const: OFF = 'OFF' BACKWARD = 'backward' FORWARD = 'forward' + PRIMITIVE_PREFIX = 'Primitive' DEFAULT_LIST = [] DEFAULT_PATH = './' WHITE_LIST = 'white_list' diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index a3f9ec35d76..01bce6b5259 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -117,13 +117,11 @@ class ApiRegistry: primitive_instance.primitive_counters[primitive_name] += 1 current_count = primitive_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" captured_grads_input = [] captured_grads_output = [] def input_backward_hook(grad): - # print(f"Grad input length: {len(grad)}") - # print("Captured input grad:", grad) captured_grads_input.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) -- Gitee From 1bbc79438375ee7a53942a983a1434c9b6834962 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 16:37:34 +0800 Subject: [PATCH 14/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 01bce6b5259..3ee7d9e0e46 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -109,14 +109,13 @@ class ApiRegistry: self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) def wrap_primitive(self, origin_func, primitive_name, service_instance): - primitive_instance = self def func(self, *args, **kwargs): - if primitive_name not in primitive_instance.primitive_counters: - primitive_instance.primitive_counters[primitive_name] = 0 + if primitive_name not in self.primitive_counters: + self.primitive_counters[primitive_name] = 0 else: - primitive_instance.primitive_counters[primitive_name] += 1 + self.primitive_counters[primitive_name] += 1 - current_count = primitive_instance.primitive_counters[primitive_name] + current_count = self.primitive_counters[primitive_name] updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" captured_grads_input = [] captured_grads_output = [] -- Gitee From 7df3f2edf624c06cb9f9ae1e8000347467aecb60 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 16:39:41 +0800 Subject: [PATCH 15/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 3ee7d9e0e46..01bce6b5259 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -109,13 +109,14 @@ class ApiRegistry: self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) def wrap_primitive(self, origin_func, primitive_name, service_instance): + primitive_instance = self def func(self, *args, **kwargs): - if primitive_name not in self.primitive_counters: - self.primitive_counters[primitive_name] = 0 + if primitive_name not in primitive_instance.primitive_counters: + primitive_instance.primitive_counters[primitive_name] = 0 else: - self.primitive_counters[primitive_name] += 1 + primitive_instance.primitive_counters[primitive_name] += 1 - current_count = self.primitive_counters[primitive_name] + current_count = primitive_instance.primitive_counters[primitive_name] updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" captured_grads_input = [] captured_grads_output = [] -- Gitee From 95f07448ddd07ccbb18f724b104bc78b0e996e96 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 17:20:06 +0800 Subject: [PATCH 16/67] =?UTF-8?q?=E8=BF=81=E7=A7=BB=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../mindspore/dump/hook_cell/api_registry.py | 2 - .../msprobe/mindspore/service.py | 78 ++++++++++++++++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 01bce6b5259..63a02cefef2 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -25,7 +25,6 @@ from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor from msprobe.core.common.utils import Const -PRIMITIVE_PREFIX = "Primitive" class ApiRegistry: def __init__(self): @@ -181,7 +180,6 @@ class ApiRegistry: primitive_set.add((pname, primitive)) for pname, primitive in primitive_set: - print("primitive name is", pname) NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) primitive.__class__ = NewPrimitive diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 8d802e14d06..e7039a3bdb3 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -39,6 +39,7 @@ class Service: self.first_start = True self.current_rank = None self.dump_iter_dir = None + self.primitive_counters = {} def build_hook(self, module_type, name): def forward_hook(api_or_module_name, module, input, output): @@ -137,4 +138,79 @@ class Service: api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() if self.model: - api_register.register_hooks(self) + register_hooks(self) + + def register_hooks(self): + primitive_set = set() + for name, cell in self.model.cells_and_names(): + for pname, primitive in cell._primitives.items(): + primitive_set.add((pname, primitive)) + + for pname, primitive in primitive_set: + NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname)}) + primitive.__class__ = NewPrimitive + + def wrap_primitive(self, origin_func, primitive_name): + def func(self, *args, **kwargs): + if primitive_name not in self.primitive_counters: + self.primitive_counters[primitive_name] = 0 + else: + self.primitive_counters[primitive_name] += 1 + + current_count = self.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] + captured_grads_output = [] + + def input_backward_hook(grad): + captured_grads_input.append(grad) + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + self.data_collector.backward_input_data_collect( + backward_primitive_name, self, os.getpid(), new_module_input_output + ) + + def output_backward_hook(grad): + captured_grads_output.append(grad) + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardOutputs( + grad_output=tuple(captured_grads_output) + ) + self.data_collector.backward_output_data_collect( + backward_primitive_name, self, os.getpid(), new_module_input_output + ) + + if not self.switch: + return origin_func(*args, **kwargs) + + hooked_inputs = [] + for idx, arg in enumerate(args): + if isinstance(arg, Tensor): + arg_hooked = ops.HookBackward(input_backward_hook)(arg) + hooked_inputs.append(arg_hooked) + else: + hooked_inputs.append(arg) + + out = origin_func(*hooked_inputs, **kwargs) + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + + if self.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) + self.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + if self.data_collector.if_return_forward_new_output(): + out = self.data_collector.get_forward_new_output() + + if isinstance(out, Tensor): + out = ops.HookBackward(output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + out = tuple(hooked_outputs) + + return out + + return func -- Gitee From fac1c0cc2fe68b7d40c67cf4a2305feffb5a3243 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 17:21:39 +0800 Subject: [PATCH 17/67] Update api_registry.py --- .../mindspore/dump/hook_cell/api_registry.py | 150 +++++++++--------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 63a02cefef2..4c2f81cb905 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -107,80 +107,80 @@ class ApiRegistry: if attr_name.startswith(Const.ATTR_NAME_PREFIX): self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) - def wrap_primitive(self, origin_func, primitive_name, service_instance): - primitive_instance = self - def func(self, *args, **kwargs): - if primitive_name not in primitive_instance.primitive_counters: - primitive_instance.primitive_counters[primitive_name] = 0 - else: - primitive_instance.primitive_counters[primitive_name] += 1 - - current_count = primitive_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] - captured_grads_output = [] - - def input_backward_hook(grad): - captured_grads_input.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output - ) - - def output_backward_hook(grad): - captured_grads_output.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads_output) - ) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output - ) - - if not service_instance.switch: - return origin_func(*args, **kwargs) - - hooked_inputs = [] - for idx, arg in enumerate(args): - if isinstance(arg, Tensor): - arg_hooked = ops.HookBackward(input_backward_hook)(arg) - hooked_inputs.append(arg_hooked) - else: - hooked_inputs.append(arg) - - out = origin_func(*hooked_inputs, **kwargs) - forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - - if service_instance.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) - service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) - if service_instance.data_collector.if_return_forward_new_output(): - out = service_instance.data_collector.get_forward_new_output() - - if isinstance(out, Tensor): - out = ops.HookBackward(output_backward_hook)(out) - elif isinstance(out, tuple): - hooked_outputs = [] - for tensor in out: - if isinstance(tensor, Tensor): - hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) - else: - hooked_outputs.append(tensor) - out = tuple(hooked_outputs) - - return out - - return func - - def register_hooks(self, service_instance): - primitive_set = set() - for name, cell in service_instance.model.cells_and_names(): - for pname, primitive in cell._primitives.items(): - primitive_set.add((pname, primitive)) - - for pname, primitive in primitive_set: - NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) - primitive.__class__ = NewPrimitive + # def wrap_primitive(self, origin_func, primitive_name, service_instance): + # primitive_instance = self + # def func(self, *args, **kwargs): + # if primitive_name not in primitive_instance.primitive_counters: + # primitive_instance.primitive_counters[primitive_name] = 0 + # else: + # primitive_instance.primitive_counters[primitive_name] += 1 + + # current_count = primitive_instance.primitive_counters[primitive_name] + # updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + # captured_grads_input = [] + # captured_grads_output = [] + + # def input_backward_hook(grad): + # captured_grads_input.append(grad) + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + # service_instance.data_collector.backward_input_data_collect( + # backward_primitive_name, self, os.getpid(), new_module_input_output + # ) + + # def output_backward_hook(grad): + # captured_grads_output.append(grad) + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # new_module_input_output = ModuleBackwardOutputs( + # grad_output=tuple(captured_grads_output) + # ) + # service_instance.data_collector.backward_output_data_collect( + # backward_primitive_name, self, os.getpid(), new_module_input_output + # ) + + # if not service_instance.switch: + # return origin_func(*args, **kwargs) + + # hooked_inputs = [] + # for idx, arg in enumerate(args): + # if isinstance(arg, Tensor): + # arg_hooked = ops.HookBackward(input_backward_hook)(arg) + # hooked_inputs.append(arg_hooked) + # else: + # hooked_inputs.append(arg) + + # out = origin_func(*hooked_inputs, **kwargs) + # forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + + # if service_instance.data_collector: + # module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) + # service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + # if service_instance.data_collector.if_return_forward_new_output(): + # out = service_instance.data_collector.get_forward_new_output() + + # if isinstance(out, Tensor): + # out = ops.HookBackward(output_backward_hook)(out) + # elif isinstance(out, tuple): + # hooked_outputs = [] + # for tensor in out: + # if isinstance(tensor, Tensor): + # hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + # else: + # hooked_outputs.append(tensor) + # out = tuple(hooked_outputs) + + # return out + + # return func + + # def register_hooks(self, service_instance): + # primitive_set = set() + # for name, cell in service_instance.model.cells_and_names(): + # for pname, primitive in cell._primitives.items(): + # primitive_set.add((pname, primitive)) + + # for pname, primitive in primitive_set: + # NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) + # primitive.__class__ = NewPrimitive api_register = ApiRegistry() -- Gitee From 36916dd494f20c97f2ec5f9926df833d7830ffb6 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 17:24:00 +0800 Subject: [PATCH 18/67] Update service.py --- .../msprobe/mindspore/service.py | 149 +++++++++--------- 1 file changed, 76 insertions(+), 73 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index e7039a3bdb3..b3f0f3885df 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -74,6 +74,81 @@ class Service: return backward_hook(*args, **kwargs) return wrap_forward_hook, wrap_backward_hook + + def wrap_primitive(self, origin_func, primitive_name): + def func(self, *args, **kwargs): + if primitive_name not in self.primitive_counters: + self.primitive_counters[primitive_name] = 0 + else: + self.primitive_counters[primitive_name] += 1 + + current_count = self.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] + captured_grads_output = [] + + def input_backward_hook(grad): + captured_grads_input.append(grad) + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + self.data_collector.backward_input_data_collect( + backward_primitive_name, self, os.getpid(), new_module_input_output + ) + + def output_backward_hook(grad): + captured_grads_output.append(grad) + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardOutputs( + grad_output=tuple(captured_grads_output) + ) + self.data_collector.backward_output_data_collect( + backward_primitive_name, self, os.getpid(), new_module_input_output + ) + + if not self.switch: + return origin_func(*args, **kwargs) + + hooked_inputs = [] + for idx, arg in enumerate(args): + if isinstance(arg, Tensor): + arg_hooked = ops.HookBackward(input_backward_hook)(arg) + hooked_inputs.append(arg_hooked) + else: + hooked_inputs.append(arg) + + out = origin_func(*hooked_inputs, **kwargs) + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + + if self.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) + self.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + if self.data_collector.if_return_forward_new_output(): + out = self.data_collector.get_forward_new_output() + + if isinstance(out, Tensor): + out = ops.HookBackward(output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + out = tuple(hooked_outputs) + + return out + + return func + + def register_hooks(self): + primitive_set = set() + for name, cell in self.model.cells_and_names(): + for pname, primitive in cell._primitives.items(): + primitive_set.add((pname, primitive)) + + for pname, primitive in primitive_set: + NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname)}) + primitive.__class__ = NewPrimitive def step(self): self.current_iter += 1 @@ -140,77 +215,5 @@ class Service: if self.model: register_hooks(self) - def register_hooks(self): - primitive_set = set() - for name, cell in self.model.cells_and_names(): - for pname, primitive in cell._primitives.items(): - primitive_set.add((pname, primitive)) - - for pname, primitive in primitive_set: - NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname)}) - primitive.__class__ = NewPrimitive - - def wrap_primitive(self, origin_func, primitive_name): - def func(self, *args, **kwargs): - if primitive_name not in self.primitive_counters: - self.primitive_counters[primitive_name] = 0 - else: - self.primitive_counters[primitive_name] += 1 - - current_count = self.primitive_counters[primitive_name] - updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] - captured_grads_output = [] - - def input_backward_hook(grad): - captured_grads_input.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - self.data_collector.backward_input_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output - ) - - def output_backward_hook(grad): - captured_grads_output.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads_output) - ) - self.data_collector.backward_output_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output - ) - - if not self.switch: - return origin_func(*args, **kwargs) - - hooked_inputs = [] - for idx, arg in enumerate(args): - if isinstance(arg, Tensor): - arg_hooked = ops.HookBackward(input_backward_hook)(arg) - hooked_inputs.append(arg_hooked) - else: - hooked_inputs.append(arg) - - out = origin_func(*hooked_inputs, **kwargs) - forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - - if self.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) - self.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) - if self.data_collector.if_return_forward_new_output(): - out = self.data_collector.get_forward_new_output() - - if isinstance(out, Tensor): - out = ops.HookBackward(output_backward_hook)(out) - elif isinstance(out, tuple): - hooked_outputs = [] - for tensor in out: - if isinstance(tensor, Tensor): - hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) - else: - hooked_outputs.append(tensor) - out = tuple(hooked_outputs) - - return out + - return func -- Gitee From da2f5ee3c3017c44b39e3133bc9d978c8d2a89f8 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 17:31:13 +0800 Subject: [PATCH 19/67] Update service.py --- .../msprobe/mindspore/service.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index b3f0f3885df..24ca7d4609b 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -16,6 +16,7 @@ import os from pathlib import Path import functools +from mindspore.common.tensor import Tensor from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope @@ -25,7 +26,7 @@ from msprobe.mindspore.common.log import logger from msprobe.core.common.utils import Const from msprobe.core.common.exceptions import DistributedNotInitializedError from msprobe.mindspore.dump.hook_cell.api_registry import api_register -from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, ModuleBackwardInputs, ModuleBackwardOutputs class Service: @@ -74,15 +75,16 @@ class Service: return backward_hook(*args, **kwargs) return wrap_forward_hook, wrap_backward_hook - + def wrap_primitive(self, origin_func, primitive_name): + service_instance = self def func(self, *args, **kwargs): - if primitive_name not in self.primitive_counters: - self.primitive_counters[primitive_name] = 0 + if primitive_name not in service_instance.primitive_counters: + service_instance.primitive_counters[primitive_name] = 0 else: - self.primitive_counters[primitive_name] += 1 + service_instance.primitive_counters[primitive_name] += 1 - current_count = self.primitive_counters[primitive_name] + current_count = service_instance.primitive_counters[primitive_name] updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" captured_grads_input = [] captured_grads_output = [] @@ -119,11 +121,11 @@ class Service: out = origin_func(*hooked_inputs, **kwargs) forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - if self.data_collector: + if service_instance.data_collector: module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) - self.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) - if self.data_collector.if_return_forward_new_output(): - out = self.data_collector.get_forward_new_output() + service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + if service_instance.data_collector.if_return_forward_new_output(): + out = service_instance.data_collector.get_forward_new_output() if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) @@ -213,7 +215,7 @@ class Service: api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() if self.model: - register_hooks(self) + self.register_hooks() -- Gitee From f8735e2f3abb6321279cd019fe0d5d93a83cd7c0 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 17:32:14 +0800 Subject: [PATCH 20/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 24ca7d4609b..9435c48068d 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -17,6 +17,7 @@ import os from pathlib import Path import functools from mindspore.common.tensor import Tensor +from mindspore import ops from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope @@ -107,7 +108,7 @@ class Service: backward_primitive_name, self, os.getpid(), new_module_input_output ) - if not self.switch: + if not service_instance.switch: return origin_func(*args, **kwargs) hooked_inputs = [] -- Gitee From 71eea504ba61a5bb02e19bbdc97e5451f13e7c1e Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 26 Jul 2024 17:35:44 +0800 Subject: [PATCH 21/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 9435c48068d..fddd27718f4 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -94,7 +94,7 @@ class Service: captured_grads_input.append(grad) backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - self.data_collector.backward_input_data_collect( + service_instance.data_collector.backward_input_data_collect( backward_primitive_name, self, os.getpid(), new_module_input_output ) @@ -104,7 +104,7 @@ class Service: new_module_input_output = ModuleBackwardOutputs( grad_output=tuple(captured_grads_output) ) - self.data_collector.backward_output_data_collect( + service_instance.data_collector.backward_output_data_collect( backward_primitive_name, self, os.getpid(), new_module_input_output ) -- Gitee From 41d45586ddd38c9ab246124e7c08eb3e7ce37103 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 27 Jul 2024 10:32:00 +0800 Subject: [PATCH 22/67] Update api_registry.py --- .../mindspore/dump/hook_cell/api_registry.py | 76 ------------------- 1 file changed, 76 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 4c2f81cb905..3e9425c6fd8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -107,80 +107,4 @@ class ApiRegistry: if attr_name.startswith(Const.ATTR_NAME_PREFIX): self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) - # def wrap_primitive(self, origin_func, primitive_name, service_instance): - # primitive_instance = self - # def func(self, *args, **kwargs): - # if primitive_name not in primitive_instance.primitive_counters: - # primitive_instance.primitive_counters[primitive_name] = 0 - # else: - # primitive_instance.primitive_counters[primitive_name] += 1 - - # current_count = primitive_instance.primitive_counters[primitive_name] - # updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - # captured_grads_input = [] - # captured_grads_output = [] - - # def input_backward_hook(grad): - # captured_grads_input.append(grad) - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - # service_instance.data_collector.backward_input_data_collect( - # backward_primitive_name, self, os.getpid(), new_module_input_output - # ) - - # def output_backward_hook(grad): - # captured_grads_output.append(grad) - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # new_module_input_output = ModuleBackwardOutputs( - # grad_output=tuple(captured_grads_output) - # ) - # service_instance.data_collector.backward_output_data_collect( - # backward_primitive_name, self, os.getpid(), new_module_input_output - # ) - - # if not service_instance.switch: - # return origin_func(*args, **kwargs) - - # hooked_inputs = [] - # for idx, arg in enumerate(args): - # if isinstance(arg, Tensor): - # arg_hooked = ops.HookBackward(input_backward_hook)(arg) - # hooked_inputs.append(arg_hooked) - # else: - # hooked_inputs.append(arg) - - # out = origin_func(*hooked_inputs, **kwargs) - # forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - - # if service_instance.data_collector: - # module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) - # service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) - # if service_instance.data_collector.if_return_forward_new_output(): - # out = service_instance.data_collector.get_forward_new_output() - - # if isinstance(out, Tensor): - # out = ops.HookBackward(output_backward_hook)(out) - # elif isinstance(out, tuple): - # hooked_outputs = [] - # for tensor in out: - # if isinstance(tensor, Tensor): - # hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) - # else: - # hooked_outputs.append(tensor) - # out = tuple(hooked_outputs) - - # return out - - # return func - - # def register_hooks(self, service_instance): - # primitive_set = set() - # for name, cell in service_instance.model.cells_and_names(): - # for pname, primitive in cell._primitives.items(): - # primitive_set.add((pname, primitive)) - - # for pname, primitive in primitive_set: - # NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) - # primitive.__class__ = NewPrimitive - api_register = ApiRegistry() -- Gitee From 5d69b20f2a7db6576522d38462f6d57b81933854 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 27 Jul 2024 15:51:41 +0800 Subject: [PATCH 23/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index fddd27718f4..e9ff2ab7520 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -40,8 +40,8 @@ class Service: self.current_iter = 0 self.first_start = True self.current_rank = None - self.dump_iter_dir = None self.primitive_counters = {} + self.dump_iter_dir = None def build_hook(self, module_type, name): def forward_hook(api_or_module_name, module, input, output): -- Gitee From 95339b740327abfdb91d7df7928b473cde1b1e2c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 27 Jul 2024 15:54:59 +0800 Subject: [PATCH 24/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index 3e9425c6fd8..b30505f2d4f 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -13,13 +13,8 @@ # limitations under the License. # ============================================================================ -import os -import functools import mindspore as ms from mindspore import ops -from mindspore.common.tensor import Tensor -from msprobe.core.common.utils import Const -from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, ModuleBackwardInputs, ModuleBackwardOutputs from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor @@ -41,7 +36,6 @@ class ApiRegistry: self.norm_inner_ops_hook_attr = {} self.norm_inner_ops = ["norm", "square", "sqrt", "is_complex"] - self.primitive_counters = {} @staticmethod def store_ori_attr(ori_api_group, api_list, api_ori_attr): -- Gitee From f06b73be7c40b5a7828e23c47459caa9ece5c30f Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 27 Jul 2024 15:55:36 +0800 Subject: [PATCH 25/67] Update api_registry.py --- .../msprobe/mindspore/dump/hook_cell/api_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py index b30505f2d4f..5508416fde0 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry.py @@ -14,7 +14,6 @@ # ============================================================================ import mindspore as ms -from mindspore import ops from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor @@ -101,4 +100,5 @@ class ApiRegistry: if attr_name.startswith(Const.ATTR_NAME_PREFIX): self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) + api_register = ApiRegistry() -- Gitee From b8f4d421fe9010a01163b8cfac20935569ff423f Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 09:59:49 +0800 Subject: [PATCH 26/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index e9ff2ab7520..ba4eca957e2 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -80,6 +80,7 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self def func(self, *args, **kwargs): + service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) if primitive_name not in service_instance.primitive_counters: service_instance.primitive_counters[primitive_name] = 0 else: -- Gitee From 4ce94697456e2ffe08a5adf594c36c33e7c84fa3 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 11:30:18 +0800 Subject: [PATCH 27/67] =?UTF-8?q?=E5=B0=9D=E8=AF=95=E6=97=A8=E5=9C=A8?= =?UTF-8?q?=E6=9C=80=E5=90=8E=E4=B8=80=E5=9D=97=E6=94=B6=E9=9B=86=E6=95=B0?= =?UTF-8?q?=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../msprobe/mindspore/service.py | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index ba4eca957e2..f7dfa044e8e 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -80,24 +80,15 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self def func(self, *args, **kwargs): - service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) - if primitive_name not in service_instance.primitive_counters: - service_instance.primitive_counters[primitive_name] = 0 - else: - service_instance.primitive_counters[primitive_name] += 1 - - current_count = service_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] - captured_grads_output = [] - def input_backward_hook(grad): captured_grads_input.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output - ) + if len(captured_grads_input) == num_tensors: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, self, os.getpid(), new_module_input_output + ) +# 等所有加入后在收集 def output_backward_hook(grad): captured_grads_output.append(grad) @@ -109,10 +100,22 @@ class Service: backward_primitive_name, self, os.getpid(), new_module_input_output ) + service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) + if primitive_name not in service_instance.primitive_counters: + service_instance.primitive_counters[primitive_name] = 0 + else: + service_instance.primitive_counters[primitive_name] += 1 + + current_count = service_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] + captured_grads_output = [] + if not service_instance.switch: return origin_func(*args, **kwargs) hooked_inputs = [] + num_tensors = sum(isinstance(arg, Tensor) for arg in args) for idx, arg in enumerate(args): if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) -- Gitee From 9348c9a4df412c22050c2697279f5cd030474031 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 11:31:14 +0800 Subject: [PATCH 28/67] =?UTF-8?q?=E6=89=93=E5=8D=B0tensor=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- debug/accuracy_tools/msprobe/mindspore/service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index f7dfa044e8e..9ec251353df 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -116,6 +116,7 @@ class Service: hooked_inputs = [] num_tensors = sum(isinstance(arg, Tensor) for arg in args) + print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 for idx, arg in enumerate(args): if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) -- Gitee From 61659c62dfc2a3e7f8ecf2b9bfe2e78f34a82b23 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 14:53:34 +0800 Subject: [PATCH 29/67] Update base.py --- .../msprobe/core/data_dump/data_processor/base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index e725d362e8c..a18efccbc6a 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -240,13 +240,16 @@ class BaseDataProcessor: api_info_struct = {} if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): api_info_struct[name] = {} - self.api_data_category = Const.OUTPUT + # self.api_data_category = Const.OUTPUT + self.api_data_category = Const.INPUT + input_info_list = self.analyze_element(module_input_output.grad_input_tuple) api_info_struct[name][Const.GRAD_INPUT] = input_info_list if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): api_info_struct[name] = api_info_struct.get(name, {}) - self.api_data_category = Const.INPUT + self.api_data_category = Const.OUTPUT + # self.api_data_category = Const.INPUT output_info_list = self.analyze_element(module_input_output.grad_output_tuple) api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list -- Gitee From f0818349a58c98888ca393b27d662488aef5ecd9 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 15:03:17 +0800 Subject: [PATCH 30/67] Update base.py --- .../msprobe/core/data_dump/data_processor/base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index a18efccbc6a..c55c5079e25 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -262,7 +262,8 @@ class BaseDataProcessor: api_info_struct = {} if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): api_info_struct[name] = {} - self.api_data_category = Const.INPUT + self.api_data_category = Const.OUTPUT + # self.api_data_category = Const.INPUT output_info_list = self.analyze_element(module_input_output.grad_input_tuple) api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list return api_info_struct @@ -274,7 +275,8 @@ class BaseDataProcessor: api_info_struct = {} if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): api_info_struct[name] = {} - self.api_data_category = Const.OUTPUT + self.api_data_category = Const.INPUT + # self.api_data_category = Const.OUTPUT input_info_list = self.analyze_element(module_input_output.grad_output_tuple) api_info_struct[name][Const.GRAD_INPUT] = input_info_list return api_info_struct -- Gitee From 7c2eec3adecd2a55791c0fd6a0adfba86fbf1f2c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 15:41:36 +0800 Subject: [PATCH 31/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 9ec251353df..2d4419b370c 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -117,7 +117,9 @@ class Service: hooked_inputs = [] num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 + print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 for idx, arg in enumerate(args): + print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) hooked_inputs.append(arg_hooked) @@ -133,6 +135,7 @@ class Service: if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() + # num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) elif isinstance(out, tuple): -- Gitee From b0f4bfca7869145e067b3465e52ab351c8c8ee07 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 15:54:18 +0800 Subject: [PATCH 32/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 2d4419b370c..06535109463 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -136,6 +136,7 @@ class Service: out = service_instance.data_collector.get_forward_new_output() # num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 + print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) elif isinstance(out, tuple): -- Gitee From dc1f5a437661e8deb02d628f6b0a5b5cce4ff2ce Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 29 Jul 2024 16:01:48 +0800 Subject: [PATCH 33/67] Update service.py --- .../msprobe/mindspore/service.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 06535109463..54d34861dc6 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -92,13 +92,14 @@ class Service: def output_backward_hook(grad): captured_grads_output.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads_output) - ) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output - ) + if len(captured_grads_output) == num_output_tensors: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardOutputs( + grad_output=tuple(captured_grads_output) + ) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, self, os.getpid(), new_module_input_output + ) service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) if primitive_name not in service_instance.primitive_counters: @@ -134,8 +135,8 @@ class Service: service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() - - # num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 + + num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) -- Gitee From 574e684341d5e16a2d3c8983faf507f7ee218233 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 30 Jul 2024 20:16:31 +0800 Subject: [PATCH 34/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 54d34861dc6..6c681a12422 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -223,8 +223,8 @@ class Service: def register_hook_new(self): logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": - api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) - api_register.api_set_hook_func() + # api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + # api_register.api_set_hook_func() if self.model: self.register_hooks() -- Gitee From 5b91d4839615c8572b6a478f42f31dd946cdce0c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 11:28:42 +0800 Subject: [PATCH 35/67] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/data_dump/data_processor/mindspore_processor.py | 5 +++-- debug/accuracy_tools/msprobe/mindspore/service.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 7533e2ee0de..db02f26f607 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -57,8 +57,9 @@ class MindsporeDataProcessor(BaseDataProcessor): if data.numel() == 0: return tensor_stat elif data.dtype == ms.bool_: - tensor_stat.max = self.mint_ops_func["max"](data).item() - tensor_stat.min = self.mint_ops_func["min"](data).item() + data_np = data.asnumpy() + tensor_stat.max = bool(np.max(data_np)) + tensor_stat.min = bool(np.min(data_np)) elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() elif data.dtype == ms.complex64 or data.dtype == ms.complex128: diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 6c681a12422..46a054275b1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -131,7 +131,7 @@ class Service: forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" if service_instance.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) + module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() -- Gitee From 74297ab27573157dc4050f162c30e8f846c987e2 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 17:07:56 +0800 Subject: [PATCH 36/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 46a054275b1..42ab8b31aa8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -77,19 +77,21 @@ class Service: return wrap_forward_hook, wrap_backward_hook + def wrap_primitive(self, origin_func, primitive_name): service_instance = self - def func(self, *args, **kwargs): + def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): def input_backward_hook(grad): captured_grads_input.append(grad) if len(captured_grads_input) == num_tensors: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output + backward_primitive_name, service_instance, os.getpid(), new_module_input_output ) -# 等所有加入后在收集 + return input_backward_hook + def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): def output_backward_hook(grad): captured_grads_output.append(grad) if len(captured_grads_output) == num_output_tensors: @@ -98,9 +100,11 @@ class Service: grad_output=tuple(captured_grads_output) ) service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, self, os.getpid(), new_module_input_output + backward_primitive_name, service_instance, os.getpid(), new_module_input_output ) + return output_backward_hook + def func(self, *args, **kwargs): service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) if primitive_name not in service_instance.primitive_counters: service_instance.primitive_counters[primitive_name] = 0 @@ -119,6 +123,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 + input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) for idx, arg in enumerate(args): print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): @@ -138,6 +143,8 @@ class Service: num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 + output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) + if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) elif isinstance(out, tuple): -- Gitee From 19330452a66810b9a7eacf23121ce1b406b47828 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 17:16:59 +0800 Subject: [PATCH 37/67] Update service.py --- .../msprobe/mindspore/service.py | 64 ++++++++++++------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 42ab8b31aa8..fea947dfd90 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -81,28 +81,46 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): - def input_backward_hook(grad): - captured_grads_input.append(grad) - if len(captured_grads_input) == num_tensors: - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - return input_backward_hook - - def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): - def output_backward_hook(grad): - captured_grads_output.append(grad) - if len(captured_grads_output) == num_output_tensors: + # def input_backward_hook(grad): + # captured_grads_input.append(grad) + # if len(captured_grads_input) == num_tensors: + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + # service_instance.data_collector.backward_input_data_collect( + # backward_primitive_name, service_instance, os.getpid(), new_module_input_output + # ) + # return input_backward_hook + + # def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): + # def output_backward_hook(grad): + # captured_grads_output.append(grad) + # if len(captured_grads_output) == num_output_tensors: + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # new_module_input_output = ModuleBackwardOutputs( + # grad_output=tuple(captured_grads_output) + # ) + # service_instance.data_collector.backward_output_data_collect( + # backward_primitive_name, service_instance, os.getpid(), new_module_input_output + # ) + # return output_backward_hook + def create_backward_hook(captured_grads, num_grads, updated_primitive_name, is_input): + def backward_hook(grad): + captured_grads.append(grad) + if len(captured_grads) == num_grads: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads_output) - ) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - return output_backward_hook + if is_input: + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + else: + new_module_input_output = ModuleBackwardOutputs( + grad_output=tuple(captured_grads) + ) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + return backward_hook def func(self, *args, **kwargs): service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) @@ -123,7 +141,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 - input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) + input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) for idx, arg in enumerate(args): print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): @@ -143,7 +161,7 @@ class Service: num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) + output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) -- Gitee From 9418a12bde6e93059a89adc2f2bde84df536731a Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 17:20:49 +0800 Subject: [PATCH 38/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index fea947dfd90..326bc75d1b9 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -80,7 +80,7 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self - def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): + # def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): # def input_backward_hook(grad): # captured_grads_input.append(grad) # if len(captured_grads_input) == num_tensors: @@ -141,7 +141,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 - input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) + input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) for idx, arg in enumerate(args): print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): @@ -161,7 +161,7 @@ class Service: num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) + output_backward_hook = create_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) -- Gitee From d44838b7fd782fd3441ccd8bdf0d47da94ae265d Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 17:28:58 +0800 Subject: [PATCH 39/67] Update service.py --- .../msprobe/mindspore/service.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 326bc75d1b9..42b8df62695 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -77,7 +77,24 @@ class Service: return wrap_forward_hook, wrap_backward_hook - + def create_backward_hook(captured_grads, num_grads, updated_primitive_name, is_input): + def backward_hook(grad): + captured_grads.append(grad) + if len(captured_grads) == num_grads: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + if is_input: + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + else: + new_module_input_output = ModuleBackwardOutputs( + grad_output=tuple(captured_grads) + ) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + return backward_hook def wrap_primitive(self, origin_func, primitive_name): service_instance = self # def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): @@ -103,24 +120,7 @@ class Service: # backward_primitive_name, service_instance, os.getpid(), new_module_input_output # ) # return output_backward_hook - def create_backward_hook(captured_grads, num_grads, updated_primitive_name, is_input): - def backward_hook(grad): - captured_grads.append(grad) - if len(captured_grads) == num_grads: - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - if is_input: - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - else: - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads) - ) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - return backward_hook + def func(self, *args, **kwargs): service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) -- Gitee From ae8a1f63bd9ef480a413d8443f3b9c33ce618967 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 17:33:52 +0800 Subject: [PATCH 40/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 42b8df62695..108c4df94fc 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -141,7 +141,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 - input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) + input_backward_hook = self.create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) for idx, arg in enumerate(args): print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): @@ -161,7 +161,7 @@ class Service: num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = create_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) + output_backward_hook = self.create_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) -- Gitee From 9a97bcf905b4686e746aff63dfff3cf1099f6645 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 20:29:57 +0800 Subject: [PATCH 41/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 108c4df94fc..7868d0897cb 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -141,7 +141,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 - input_backward_hook = self.create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) + input_backward_hook = service_instance.create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) for idx, arg in enumerate(args): print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): @@ -161,7 +161,7 @@ class Service: num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = self.create_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) + output_backward_hook = service_instance.create_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) -- Gitee From 2713c9d15fd71ccb4c51b9487a8f013ad69438d4 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Thu, 1 Aug 2024 20:37:07 +0800 Subject: [PATCH 42/67] Update service.py --- .../msprobe/mindspore/service.py | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 7868d0897cb..fed54b08d33 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -77,49 +77,49 @@ class Service: return wrap_forward_hook, wrap_backward_hook - def create_backward_hook(captured_grads, num_grads, updated_primitive_name, is_input): - def backward_hook(grad): - captured_grads.append(grad) - if len(captured_grads) == num_grads: - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - if is_input: - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + # def create_backward_hook(captured_grads, num_grads, updated_primitive_name, is_input): + # def backward_hook(grad): + # captured_grads.append(grad) + # if len(captured_grads) == num_grads: + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # if is_input: + # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + # service_instance.data_collector.backward_input_data_collect( + # backward_primitive_name, service_instance, os.getpid(), new_module_input_output + # ) + # else: + # new_module_input_output = ModuleBackwardOutputs( + # grad_output=tuple(captured_grads) + # ) + # service_instance.data_collector.backward_output_data_collect( + # backward_primitive_name, service_instance, os.getpid(), new_module_input_output + # ) + # return backward_hook + def wrap_primitive(self, origin_func, primitive_name): + service_instance = self + def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): + def input_backward_hook(grad): + captured_grads_input.append(grad) + if len(captured_grads_input) == num_tensors: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) service_instance.data_collector.backward_input_data_collect( backward_primitive_name, service_instance, os.getpid(), new_module_input_output ) - else: + return input_backward_hook + + def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): + def output_backward_hook(grad): + captured_grads_output.append(grad) + if len(captured_grads_output) == num_output_tensors: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads) + grad_output=tuple(captured_grads_output) ) service_instance.data_collector.backward_output_data_collect( backward_primitive_name, service_instance, os.getpid(), new_module_input_output ) - return backward_hook - def wrap_primitive(self, origin_func, primitive_name): - service_instance = self - # def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): - # def input_backward_hook(grad): - # captured_grads_input.append(grad) - # if len(captured_grads_input) == num_tensors: - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - # service_instance.data_collector.backward_input_data_collect( - # backward_primitive_name, service_instance, os.getpid(), new_module_input_output - # ) - # return input_backward_hook - - # def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): - # def output_backward_hook(grad): - # captured_grads_output.append(grad) - # if len(captured_grads_output) == num_output_tensors: - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # new_module_input_output = ModuleBackwardOutputs( - # grad_output=tuple(captured_grads_output) - # ) - # service_instance.data_collector.backward_output_data_collect( - # backward_primitive_name, service_instance, os.getpid(), new_module_input_output - # ) - # return output_backward_hook + return output_backward_hook def func(self, *args, **kwargs): @@ -141,7 +141,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 - input_backward_hook = service_instance.create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, is_input=True) + input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) for idx, arg in enumerate(args): print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): @@ -161,7 +161,7 @@ class Service: num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = service_instance.create_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name, is_input=False) + output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) -- Gitee From 65b228b154ca1fa14f679951a97a92fc62b81d2c Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 2 Aug 2024 11:16:58 +0800 Subject: [PATCH 43/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index fed54b08d33..1c4e98450c2 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -248,8 +248,8 @@ class Service: def register_hook_new(self): logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": - # api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) - # api_register.api_set_hook_func() + api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + api_register.api_set_hook_func() if self.model: self.register_hooks() -- Gitee From 980a7537b5e76418f77de886e1ca0443d6ce98c3 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Fri, 2 Aug 2024 15:52:20 +0800 Subject: [PATCH 44/67] Update service.py --- .../msprobe/mindspore/service.py | 150 ++++++++++-------- 1 file changed, 87 insertions(+), 63 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 1c4e98450c2..1d2439b14c2 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -99,83 +99,107 @@ class Service: service_instance = self def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): def input_backward_hook(grad): - captured_grads_input.append(grad) - if len(captured_grads_input) == num_tensors: - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) + try: + captured_grads_input.append(grad) + if len(captured_grads_input) == num_tensors: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + captured_grads_input.clear() + + except Exception as e: + print(f"Error occurred in input_backward_hook: {e}") + print(f"Captured grads input: {captured_grads_input}") + print(f"Num tensors: {num_tensors}") + print(f"Updated primitive name: {updated_primitive_name}") + raise # 重新引发异常 return input_backward_hook def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): def output_backward_hook(grad): - captured_grads_output.append(grad) - if len(captured_grads_output) == num_output_tensors: - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads_output) - ) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) + try: + captured_grads_output.append(grad) + if len(captured_grads_output) == num_output_tensors: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardOutputs( + grad_output=tuple(captured_grads_output) + ) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + # 释放 captured_grads_output 列表 + captured_grads_output.clear() + except Exception as e: + print(f"Error occurred in output_backward_hook: {e}") + print(f"Captured grads output: {captured_grads_output}") + print(f"Num output tensors: {num_output_tensors}") + print(f"Updated primitive name: {updated_primitive_name}") + raise # 重新引发异常 return output_backward_hook def func(self, *args, **kwargs): - service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) - if primitive_name not in service_instance.primitive_counters: - service_instance.primitive_counters[primitive_name] = 0 - else: - service_instance.primitive_counters[primitive_name] += 1 - - current_count = service_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] - captured_grads_output = [] - - if not service_instance.switch: - return origin_func(*args, **kwargs) - - hooked_inputs = [] - num_tensors = sum(isinstance(arg, Tensor) for arg in args) - print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 - print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 - input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) - for idx, arg in enumerate(args): - print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 - if isinstance(arg, Tensor): - arg_hooked = ops.HookBackward(input_backward_hook)(arg) - hooked_inputs.append(arg_hooked) + try: + service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) + if primitive_name not in service_instance.primitive_counters: + service_instance.primitive_counters[primitive_name] = 0 else: - hooked_inputs.append(arg) + service_instance.primitive_counters[primitive_name] += 1 + + current_count = service_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] + captured_grads_output = [] - out = origin_func(*hooked_inputs, **kwargs) - forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + if not service_instance.switch: + return origin_func(*args, **kwargs) - if service_instance.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) - service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) - if service_instance.data_collector.if_return_forward_new_output(): - out = service_instance.data_collector.get_forward_new_output() - - num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 - print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) - - if isinstance(out, Tensor): - out = ops.HookBackward(output_backward_hook)(out) - elif isinstance(out, tuple): - hooked_outputs = [] - for tensor in out: - if isinstance(tensor, Tensor): - hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + hooked_inputs = [] + num_tensors = sum(isinstance(arg, Tensor) for arg in args) + print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 + print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 + input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) + for idx, arg in enumerate(args): + print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 + if isinstance(arg, Tensor): + arg_hooked = ops.HookBackward(input_backward_hook)(arg) + hooked_inputs.append(arg_hooked) else: - hooked_outputs.append(tensor) - out = tuple(hooked_outputs) + hooked_inputs.append(arg) + + out = origin_func(*hooked_inputs, **kwargs) + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - return out + if service_instance.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) + service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + if service_instance.data_collector.if_return_forward_new_output(): + out = service_instance.data_collector.get_forward_new_output() + + num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 + print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 + output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) + + if isinstance(out, Tensor): + out = ops.HookBackward(output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + out = tuple(hooked_outputs) + return out + except Exception as e: + print(f"Error occurred in wrap_primitive: {e}") + print(f"Arguments(args): {args}") + print(f"Arguments(kwargs): {kwargs}") + print(f"Current primitive name: {primitive_name}") + raise Exception("This is a primitive op dump error") return func def register_hooks(self): -- Gitee From b3c234dedec9c4d63660461aabd1aaf96f99420f Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 3 Aug 2024 15:33:06 +0800 Subject: [PATCH 45/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 1d2439b14c2..2b27e84b7b9 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -114,7 +114,7 @@ class Service: print(f"Captured grads input: {captured_grads_input}") print(f"Num tensors: {num_tensors}") print(f"Updated primitive name: {updated_primitive_name}") - raise # 重新引发异常 + raise # 重新引发异常 return input_backward_hook def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): -- Gitee From 02e759b5866555068f308c2de6d2445317a56adb Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 3 Aug 2024 15:33:55 +0800 Subject: [PATCH 46/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 2b27e84b7b9..66e5d71e664 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -114,7 +114,7 @@ class Service: print(f"Captured grads input: {captured_grads_input}") print(f"Num tensors: {num_tensors}") print(f"Updated primitive name: {updated_primitive_name}") - raise # 重新引发异常 + raise Exception("This is a primitive op input_backward dump error") # 重新引发异常 return input_backward_hook def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): @@ -136,7 +136,7 @@ class Service: print(f"Captured grads output: {captured_grads_output}") print(f"Num output tensors: {num_output_tensors}") print(f"Updated primitive name: {updated_primitive_name}") - raise # 重新引发异常 + raise Exception("This is a primitive op output_backward dump error")# 重新引发异常 return output_backward_hook -- Gitee From 712981c40d3153a5ef7cf27f1ba4d013edea1eb7 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 3 Aug 2024 16:28:42 +0800 Subject: [PATCH 47/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 66e5d71e664..b8561a2d67a 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -272,8 +272,8 @@ class Service: def register_hook_new(self): logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": - api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) - api_register.api_set_hook_func() + # api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + # api_register.api_set_hook_func() if self.model: self.register_hooks() -- Gitee From de6b85dd48facbf8b44d5a9c79dff50f624a5e5e Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 3 Aug 2024 17:47:58 +0800 Subject: [PATCH 48/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index b8561a2d67a..7c26f54ec34 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -142,7 +142,7 @@ class Service: def func(self, *args, **kwargs): try: - service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) + # service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) if primitive_name not in service_instance.primitive_counters: service_instance.primitive_counters[primitive_name] = 0 else: @@ -171,7 +171,7 @@ class Service: out = origin_func(*hooked_inputs, **kwargs) forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - + service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) if service_instance.data_collector: module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) -- Gitee From 9a781c25756b57a9ab325d86803b5b727253219a Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Sat, 3 Aug 2024 17:51:57 +0800 Subject: [PATCH 49/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 7c26f54ec34..ffdbc1e56ad 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -103,6 +103,7 @@ class Service: captured_grads_input.append(grad) if len(captured_grads_input) == num_tensors: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) service_instance.data_collector.backward_input_data_collect( backward_primitive_name, service_instance, os.getpid(), new_module_input_output @@ -123,6 +124,7 @@ class Service: captured_grads_output.append(grad) if len(captured_grads_output) == num_output_tensors: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(updated_primitive_name) new_module_input_output = ModuleBackwardOutputs( grad_output=tuple(captured_grads_output) ) @@ -171,7 +173,7 @@ class Service: out = origin_func(*hooked_inputs, **kwargs) forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) + service_instance.data_collector.visit_and_clear_overflow_status(forward_primitive_name) if service_instance.data_collector: module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) -- Gitee From faf5b9f00c0f4661dc9695216ea890a8bd1f3826 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 11:22:21 +0800 Subject: [PATCH 50/67] Update service.py --- .../msprobe/mindspore/service.py | 78 +++++++++++-------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index ffdbc1e56ad..12c5ea98c0e 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -97,6 +97,7 @@ class Service: # return backward_hook def wrap_primitive(self, origin_func, primitive_name): service_instance = self + def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): def input_backward_hook(grad): try: @@ -110,12 +111,9 @@ class Service: ) captured_grads_input.clear() - except Exception as e: - print(f"Error occurred in input_backward_hook: {e}") - print(f"Captured grads input: {captured_grads_input}") - print(f"Num tensors: {num_tensors}") - print(f"Updated primitive name: {updated_primitive_name}") - raise Exception("This is a primitive op input_backward dump error") # 重新引发异常 + except Exception as exception: + raise Exception(f"This is a primitive op input_backward dump error: {exception}" + f", updated_primitive_name: {updated_primitive_name}") return input_backward_hook def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): @@ -124,7 +122,7 @@ class Service: captured_grads_output.append(grad) if len(captured_grads_output) == num_output_tensors: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - service_instance.data_collector.visit_and_clear_overflow_status(updated_primitive_name) + service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) new_module_input_output = ModuleBackwardOutputs( grad_output=tuple(captured_grads_output) ) @@ -133,38 +131,51 @@ class Service: ) # 释放 captured_grads_output 列表 captured_grads_output.clear() - except Exception as e: - print(f"Error occurred in output_backward_hook: {e}") - print(f"Captured grads output: {captured_grads_output}") - print(f"Num output tensors: {num_output_tensors}") - print(f"Updated primitive name: {updated_primitive_name}") - raise Exception("This is a primitive op output_backward dump error")# 重新引发异常 + except Exception as exception: + raise Exception(f"This is a primitive op output_backward dump error: {exception}" + f", updated_primitive_name: {updated_primitive_name}") return output_backward_hook + def hook_primitive_inputs(args, captured_grads_input, updated_primitive_name): + hooked_inputs = [] + num_tensors = sum(isinstance(arg, Tensor) for arg in args) + input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) + for idx, arg in enumerate(args): + if isinstance(arg, Tensor): + arg_hooked = ops.HookBackward(input_backward_hook)(arg) + hooked_inputs.append(arg_hooked) + else: + hooked_inputs.append(arg) + return hooked_inputs + + + def hook_primitive_outputs(out, create_output_backward_hook, output_backward_hook): + if isinstance(out, Tensor): + return ops.HookBackward(create_output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(create_output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + return tuple(hooked_outputs) + return out def func(self, *args, **kwargs): try: - # service_instance.data_collector.visit_and_clear_overflow_status(primitive_name) - if primitive_name not in service_instance.primitive_counters: - service_instance.primitive_counters[primitive_name] = 0 - else: - service_instance.primitive_counters[primitive_name] += 1 - + service_instance._update_primitive_counters(primitive_name) current_count = service_instance.primitive_counters[primitive_name] updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] - captured_grads_output = [] + captured_grads_input, captured_grads_output = [], [] if not service_instance.switch: return origin_func(*args, **kwargs) hooked_inputs = [] num_tensors = sum(isinstance(arg, Tensor) for arg in args) - print(f"Number of tensor arguments: {num_tensors}") # 打印 num_tensors 的值 - print(f"Arguments(args): type={type(args)}") # 打印每个 arg 的类型 input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) for idx, arg in enumerate(args): - print(f"Argument {idx}: type={type(arg)}") # 打印每个 arg 的类型 if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) hooked_inputs.append(arg_hooked) @@ -179,11 +190,11 @@ class Service: service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() - + num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) - + if isinstance(out, Tensor): out = ops.HookBackward(output_backward_hook)(out) elif isinstance(out, tuple): @@ -196,14 +207,17 @@ class Service: out = tuple(hooked_outputs) return out - except Exception as e: - print(f"Error occurred in wrap_primitive: {e}") - print(f"Arguments(args): {args}") - print(f"Arguments(kwargs): {kwargs}") - print(f"Current primitive name: {primitive_name}") - raise Exception("This is a primitive op dump error") + except Exception as exception: + raise Exception(f"This is a primitive op dump error: {exception}" + f", primitive_name: {primitive_name}") return func + def _update_primitive_counters(self, primitive_name): + if primitive_name not in self.primitive_counters: + self.primitive_counters[primitive_name] = 0 + else: + self.primitive_counters[primitive_name] += 1 + def register_hooks(self): primitive_set = set() for name, cell in self.model.cells_and_names(): -- Gitee From bbac45931076a68b1e87308862e359550140ddf4 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 11:40:43 +0800 Subject: [PATCH 51/67] Update service.py --- .../msprobe/mindspore/service.py | 169 ++++++++++-------- 1 file changed, 92 insertions(+), 77 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 12c5ea98c0e..4cc60821606 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -77,69 +77,78 @@ class Service: return wrap_forward_hook, wrap_backward_hook - # def create_backward_hook(captured_grads, num_grads, updated_primitive_name, is_input): - # def backward_hook(grad): - # captured_grads.append(grad) - # if len(captured_grads) == num_grads: - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # if is_input: - # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) - # service_instance.data_collector.backward_input_data_collect( - # backward_primitive_name, service_instance, os.getpid(), new_module_input_output - # ) - # else: - # new_module_input_output = ModuleBackwardOutputs( - # grad_output=tuple(captured_grads) - # ) - # service_instance.data_collector.backward_output_data_collect( - # backward_primitive_name, service_instance, os.getpid(), new_module_input_output - # ) - # return backward_hook def wrap_primitive(self, origin_func, primitive_name): service_instance = self - def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): - def input_backward_hook(grad): + # def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): + # def input_backward_hook(grad): + # try: + # captured_grads_input.append(grad) + # if len(captured_grads_input) == num_tensors: + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) + # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) + # service_instance.data_collector.backward_input_data_collect( + # backward_primitive_name, service_instance, os.getpid(), new_module_input_output + # ) + # captured_grads_input.clear() + # + # except Exception as exception: + # raise Exception(f"This is a primitive op input_backward dump error: {exception}" + # f", updated_primitive_name: {updated_primitive_name}") + # return input_backward_hook + # + # def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): + # def output_backward_hook(grad): + # try: + # captured_grads_output.append(grad) + # if len(captured_grads_output) == num_output_tensors: + # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + # service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) + # new_module_input_output = ModuleBackwardOutputs( + # grad_output=tuple(captured_grads_output) + # ) + # service_instance.data_collector.backward_output_data_collect( + # backward_primitive_name, service_instance, os.getpid(), new_module_input_output + # ) + # # 释放 captured_grads_output 列表 + # captured_grads_output.clear() + # except Exception as exception: + # raise Exception(f"This is a primitive op output_backward dump error: {exception}" + # f", updated_primitive_name: {updated_primitive_name}") + # return output_backward_hook + + def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type): + def backward_hook(grad): try: - captured_grads_input.append(grad) - if len(captured_grads_input) == num_tensors: + captured_grads.append(grad) + if len(captured_grads) == num_tensors: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - captured_grads_input.clear() + if hook_type == 'input': + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + elif hook_type == 'output': + new_module_input_output = ModuleBackwardOutputs(grad_output=tuple(captured_grads)) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + + captured_grads.clear() except Exception as exception: - raise Exception(f"This is a primitive op input_backward dump error: {exception}" + raise Exception(f"This is a primitive op {hook_type}_backward dump error: {exception}" f", updated_primitive_name: {updated_primitive_name}") - return input_backward_hook - def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): - def output_backward_hook(grad): - try: - captured_grads_output.append(grad) - if len(captured_grads_output) == num_output_tensors: - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) - new_module_input_output = ModuleBackwardOutputs( - grad_output=tuple(captured_grads_output) - ) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - # 释放 captured_grads_output 列表 - captured_grads_output.clear() - except Exception as exception: - raise Exception(f"This is a primitive op output_backward dump error: {exception}" - f", updated_primitive_name: {updated_primitive_name}") - return output_backward_hook + return backward_hook def hook_primitive_inputs(args, captured_grads_input, updated_primitive_name): hooked_inputs = [] num_tensors = sum(isinstance(arg, Tensor) for arg in args) - input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) + input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, + 'input') for idx, arg in enumerate(args): if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) @@ -148,15 +157,19 @@ class Service: hooked_inputs.append(arg) return hooked_inputs + def hook_primitive_outputs(out, captured_grads_output, updated_primitive_name): + num_output_tensors = sum( + isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 + output_backward_hook = create_backward_hook(captured_grads_output, num_output_tensors, + updated_primitive_name, 'output') - def hook_primitive_outputs(out, create_output_backward_hook, output_backward_hook): if isinstance(out, Tensor): - return ops.HookBackward(create_output_backward_hook)(out) + return ops.HookBackward(output_backward_hook)(out) elif isinstance(out, tuple): hooked_outputs = [] for tensor in out: if isinstance(tensor, Tensor): - hooked_outputs.append(ops.HookBackward(create_output_backward_hook)(tensor)) + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) else: hooked_outputs.append(tensor) return tuple(hooked_outputs) @@ -172,39 +185,41 @@ class Service: if not service_instance.switch: return origin_func(*args, **kwargs) - hooked_inputs = [] - num_tensors = sum(isinstance(arg, Tensor) for arg in args) - input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) - for idx, arg in enumerate(args): - if isinstance(arg, Tensor): - arg_hooked = ops.HookBackward(input_backward_hook)(arg) - hooked_inputs.append(arg_hooked) - else: - hooked_inputs.append(arg) + # hooked_inputs = [] + # num_tensors = sum(isinstance(arg, Tensor) for arg in args) + # input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) + # for idx, arg in enumerate(args): + # if isinstance(arg, Tensor): + # arg_hooked = ops.HookBackward(input_backward_hook)(arg) + # hooked_inputs.append(arg_hooked) + # else: + # hooked_inputs.append(arg) + hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name) out = origin_func(*hooked_inputs, **kwargs) forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" service_instance.data_collector.visit_and_clear_overflow_status(forward_primitive_name) if service_instance.data_collector: module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) - service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + service_instance.data_collector.forward_data_collect(forward_primitive_name, self, + os.getpid(), module_input_output) if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() - - num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 - print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) - - if isinstance(out, Tensor): - out = ops.HookBackward(output_backward_hook)(out) - elif isinstance(out, tuple): - hooked_outputs = [] - for tensor in out: - if isinstance(tensor, Tensor): - hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) - else: - hooked_outputs.append(tensor) - out = tuple(hooked_outputs) + out = hook_primitive_outputs(out, captured_grads_output, updated_primitive_name) + # num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 + # print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 + # output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) + # + # if isinstance(out, Tensor): + # out = ops.HookBackward(output_backward_hook)(out) + # elif isinstance(out, tuple): + # hooked_outputs = [] + # for tensor in out: + # if isinstance(tensor, Tensor): + # hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + # else: + # hooked_outputs.append(tensor) + # out = tuple(hooked_outputs) return out except Exception as exception: -- Gitee From 51a52c0117bb1b244c6c5c010638a3da2f33b883 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 11:47:52 +0800 Subject: [PATCH 52/67] Update service.py --- .../msprobe/mindspore/service.py | 61 ------------------- 1 file changed, 61 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 4cc60821606..200f7759bf8 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -80,44 +80,6 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self - # def create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name): - # def input_backward_hook(grad): - # try: - # captured_grads_input.append(grad) - # if len(captured_grads_input) == num_tensors: - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) - # new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads_input)) - # service_instance.data_collector.backward_input_data_collect( - # backward_primitive_name, service_instance, os.getpid(), new_module_input_output - # ) - # captured_grads_input.clear() - # - # except Exception as exception: - # raise Exception(f"This is a primitive op input_backward dump error: {exception}" - # f", updated_primitive_name: {updated_primitive_name}") - # return input_backward_hook - # - # def create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name): - # def output_backward_hook(grad): - # try: - # captured_grads_output.append(grad) - # if len(captured_grads_output) == num_output_tensors: - # backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - # service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) - # new_module_input_output = ModuleBackwardOutputs( - # grad_output=tuple(captured_grads_output) - # ) - # service_instance.data_collector.backward_output_data_collect( - # backward_primitive_name, service_instance, os.getpid(), new_module_input_output - # ) - # # 释放 captured_grads_output 列表 - # captured_grads_output.clear() - # except Exception as exception: - # raise Exception(f"This is a primitive op output_backward dump error: {exception}" - # f", updated_primitive_name: {updated_primitive_name}") - # return output_backward_hook - def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type): def backward_hook(grad): try: @@ -185,15 +147,6 @@ class Service: if not service_instance.switch: return origin_func(*args, **kwargs) - # hooked_inputs = [] - # num_tensors = sum(isinstance(arg, Tensor) for arg in args) - # input_backward_hook = create_input_backward_hook(captured_grads_input, num_tensors, updated_primitive_name) - # for idx, arg in enumerate(args): - # if isinstance(arg, Tensor): - # arg_hooked = ops.HookBackward(input_backward_hook)(arg) - # hooked_inputs.append(arg_hooked) - # else: - # hooked_inputs.append(arg) hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name) out = origin_func(*hooked_inputs, **kwargs) @@ -206,20 +159,6 @@ class Service: if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() out = hook_primitive_outputs(out, captured_grads_output, updated_primitive_name) - # num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 - # print(f"Arguments(out): type={type(out)}") # 打印每个 arg 的类型 - # output_backward_hook = create_output_backward_hook(captured_grads_output, num_output_tensors, updated_primitive_name) - # - # if isinstance(out, Tensor): - # out = ops.HookBackward(output_backward_hook)(out) - # elif isinstance(out, tuple): - # hooked_outputs = [] - # for tensor in out: - # if isinstance(tensor, Tensor): - # hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) - # else: - # hooked_outputs.append(tensor) - # out = tuple(hooked_outputs) return out except Exception as exception: -- Gitee From b62bcb43e3f7ea0a726187c947437413d5a2f2a4 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 17:25:24 +0800 Subject: [PATCH 53/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 617f30ca1fa..a6c570f55f1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -142,16 +142,16 @@ class Service: return tuple(hooked_outputs) return out - def func(self, *args, **kwargs): + def func(instance_self, *args, **kwargs): try: + if not service_instance.switch: + return origin_func(*args, **kwargs) + service_instance._update_primitive_counters(primitive_name) current_count = service_instance.primitive_counters[primitive_name] updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" captured_grads_input, captured_grads_output = [], [] - if not service_instance.switch: - return origin_func(*args, **kwargs) - hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name) out = origin_func(*hooked_inputs, **kwargs) @@ -159,7 +159,7 @@ class Service: service_instance.data_collector.visit_and_clear_overflow_status(forward_primitive_name) if service_instance.data_collector: module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) - service_instance.data_collector.forward_data_collect(forward_primitive_name, self, + service_instance.data_collector.forward_data_collect(forward_primitive_name, instance_self, os.getpid(), module_input_output) if service_instance.data_collector.if_return_forward_new_output(): out = service_instance.data_collector.get_forward_new_output() @@ -191,6 +191,8 @@ class Service: self.current_iter += 1 self.data_collector.update_iter(self.current_iter) HOOKCell.cell_count = defaultdict(int) + self.primitive_counters.clear() + def start(self, model=None): self.model = model -- Gitee From d3fdd9c3d7cc466dac0d3c42976a50bd2c7348f0 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 20:27:10 +0800 Subject: [PATCH 54/67] Update service.py --- .../msprobe/mindspore/service.py | 50 +++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index a6c570f55f1..0829465bcc9 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -17,10 +17,10 @@ import os import copy from pathlib import Path import functools -from mindspore.common.tensor import Tensor -from mindspore import ops from collections import defaultdict +from mindspore.common.tensor import Tensor +from mindspore import ops from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope from msprobe.mindspore.common.utils import get_rank_if_initialized @@ -29,7 +29,8 @@ from msprobe.mindspore.common.log import logger from msprobe.core.common.utils import Const from msprobe.core.common.exceptions import DistributedNotInitializedError from msprobe.mindspore.dump.hook_cell.api_registry import api_register -from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs, ModuleBackwardInputs, ModuleBackwardOutputs +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs,\ + ModuleBackwardInputs, ModuleBackwardOutputs from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell @@ -106,8 +107,13 @@ class Service: captured_grads.clear() except Exception as exception: - raise Exception(f"This is a primitive op {hook_type}_backward dump error: {exception}" - f", updated_primitive_name: {updated_primitive_name}") + raise Exception( + "This is a primitive op {hook_type}_backward dump error: {exception}," + " updated_primitive_name: {updated_primitive_name}".format( + hook_type=hook_type, exception=exception, updated_primitive_name=updated_primitive_name + ) + ) +#改为.format() return backward_hook @@ -116,7 +122,7 @@ class Service: num_tensors = sum(isinstance(arg, Tensor) for arg in args) input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, 'input') - for idx, arg in enumerate(args): + for _, arg in enumerate(args): if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) hooked_inputs.append(arg_hooked) @@ -144,14 +150,14 @@ class Service: def func(instance_self, *args, **kwargs): try: + service_instance.update_primitive_counters(primitive_name) + current_count = service_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + if not service_instance.switch: return origin_func(*args, **kwargs) - service_instance._update_primitive_counters(primitive_name) - current_count = service_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" captured_grads_input, captured_grads_output = [], [] - hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name) out = origin_func(*hooked_inputs, **kwargs) @@ -167,25 +173,27 @@ class Service: return out except Exception as exception: - raise Exception(f"This is a primitive op dump error: {exception}" - f", primitive_name: {primitive_name}") + raise Exception("This is a primitive op dump error: {}," + " primitive_name: {}".format(exception, primitive_name)) + return func - def _update_primitive_counters(self, primitive_name): + def update_primitive_counters(self, primitive_name): if primitive_name not in self.primitive_counters: self.primitive_counters[primitive_name] = 0 else: self.primitive_counters[primitive_name] += 1 def register_hooks(self): - primitive_set = set() - for name, cell in self.model.cells_and_names(): - for pname, primitive in cell._primitives.items(): - primitive_set.add((pname, primitive)) - - for pname, primitive in primitive_set: - NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname)}) - primitive.__class__ = NewPrimitive + primitive_set = set() + for _, cell in self.model.cells_and_names(): + for pname, primitive in cell._primitives.items(): + primitive_set.add((pname, primitive)) + + for pname, primitive in primitive_set: + NewPrimitive = type('NewPrimitive', (primitive.__class__,), + {'__call__': self.wrap_primitive(primitive.__call__, pname)}) + primitive.__class__ = NewPrimitive def step(self): self.current_iter += 1 -- Gitee From 2e4e86c099cd49b30d4f8bd4b93ee2c86fb67d10 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 20:33:18 +0800 Subject: [PATCH 55/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 0829465bcc9..8f3e4c3472e 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -112,8 +112,8 @@ class Service: " updated_primitive_name: {updated_primitive_name}".format( hook_type=hook_type, exception=exception, updated_primitive_name=updated_primitive_name ) - ) -#改为.format() + ) from exception + return backward_hook @@ -174,7 +174,7 @@ class Service: return out except Exception as exception: raise Exception("This is a primitive op dump error: {}," - " primitive_name: {}".format(exception, primitive_name)) + " primitive_name: {}".format(exception, primitive_name)) from exception return func -- Gitee From 07bb2eb7e97b96f9c6c095b3bf1e44f3ee3b11fc Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Mon, 5 Aug 2024 20:35:57 +0800 Subject: [PATCH 56/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 8f3e4c3472e..bc81eb7bc56 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -266,8 +266,8 @@ class Service: def register_hook_new(self): logger.info("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": - # api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) - # api_register.api_set_hook_func() + api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) + api_register.api_set_hook_func() if self.model: self.register_hooks() -- Gitee From 66504478621bb1cf3396fb640983f60c83a91c3d Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 11:16:01 +0800 Subject: [PATCH 57/67] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A3=80=E8=A7=86?= =?UTF-8?q?=E6=84=8F=E8=A7=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_processor/mindspore_processor.py | 4 +- .../msprobe/mindspore/service.py | 51 ++++++++++--------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index e7718504791..b28817e4aa7 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -75,8 +75,8 @@ class MindsporeDataProcessor(BaseDataProcessor): return tensor_stat elif data.dtype == ms.bool_: data_np = data.asnumpy() - tensor_stat.max = bool(np.max(data_np)) - tensor_stat.min = bool(np.min(data_np)) + tensor_stat.max = np.max(data_np) + tensor_stat.min = np.min(data_np) elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() elif data.dtype == ms.complex64 or data.dtype == ms.complex128: diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index bc81eb7bc56..6ff31664156 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -88,24 +88,25 @@ class Service: def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type): def backward_hook(grad): + captured_grads.append(grad) try: - captured_grads.append(grad) - if len(captured_grads) == num_tensors: + if len(captured_grads) == num_tensors and hook_type == Const.INPUT: backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) - - if hook_type == 'input': - new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) - service_instance.data_collector.backward_input_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - elif hook_type == 'output': - new_module_input_output = ModuleBackwardOutputs(grad_output=tuple(captured_grads)) - service_instance.data_collector.backward_output_data_collect( - backward_primitive_name, service_instance, os.getpid(), new_module_input_output - ) - + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) captured_grads.clear() + elif len(captured_grads) == num_tensors and hook_type == Const.OUTPUT: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) + new_module_input_output = ModuleBackwardOutputs(grad_output=tuple(captured_grads)) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + captured_grads.clear() + except Exception as exception: raise Exception( "This is a primitive op {hook_type}_backward dump error: {exception}," @@ -114,14 +115,13 @@ class Service: ) ) from exception - return backward_hook def hook_primitive_inputs(args, captured_grads_input, updated_primitive_name): hooked_inputs = [] num_tensors = sum(isinstance(arg, Tensor) for arg in args) input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, - 'input') + Const.INPUT) for _, arg in enumerate(args): if isinstance(arg, Tensor): arg_hooked = ops.HookBackward(input_backward_hook)(arg) @@ -131,10 +131,12 @@ class Service: return hooked_inputs def hook_primitive_outputs(out, captured_grads_output, updated_primitive_name): - num_output_tensors = sum( - isinstance(tensor, Tensor) for tensor in out if isinstance(out, tuple)) if isinstance(out, tuple) else 1 + if isinstance(out, tuple): + num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out) + else: + num_output_tensors = 1 output_backward_hook = create_backward_hook(captured_grads_output, num_output_tensors, - updated_primitive_name, 'output') + updated_primitive_name, Const.OUTPUT) if isinstance(out, Tensor): return ops.HookBackward(output_backward_hook)(out) @@ -148,7 +150,7 @@ class Service: return tuple(hooked_outputs) return out - def func(instance_self, *args, **kwargs): + def wrapped_primitive_call(instance_self, *args, **kwargs): try: service_instance.update_primitive_counters(primitive_name) current_count = service_instance.primitive_counters[primitive_name] @@ -176,7 +178,7 @@ class Service: raise Exception("This is a primitive op dump error: {}," " primitive_name: {}".format(exception, primitive_name)) from exception - return func + return wrapped_primitive_call def update_primitive_counters(self, primitive_name): if primitive_name not in self.primitive_counters: @@ -201,7 +203,6 @@ class Service: HOOKCell.cell_count = defaultdict(int) self.primitive_counters.clear() - def start(self, model=None): self.model = model self.start_call = True @@ -269,7 +270,7 @@ class Service: api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() if self.model: + print(f"Type of self.model: {type(self.model)}") # 使用 print + # 或者使用 logger + logger.info(f"Type of self.model: {type(self.model)}") self.register_hooks() - - - -- Gitee From 57b22c4e8a42630c224aeecfef3fc4e2e45075dd Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 11:24:16 +0800 Subject: [PATCH 58/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 6ff31664156..147dfde8b79 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -265,7 +265,7 @@ class Service: dump_file_path, stack_file_path, construct_file_path, dump_data_dir, None) def register_hook_new(self): - logger.info("The {} hook function is successfully mounted to the model.".format(self.config.task)) + logger.info("The {} hook function1111 is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() -- Gitee From e9eac8eab43cec60586f062b98669f6a7884a0a3 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 12:43:16 +0800 Subject: [PATCH 59/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 147dfde8b79..9b9da9ec3fe 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -21,6 +21,7 @@ from collections import defaultdict from mindspore.common.tensor import Tensor from mindspore import ops +from mindspore import nn from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope from msprobe.mindspore.common.utils import get_rank_if_initialized @@ -31,6 +32,7 @@ from msprobe.core.common.exceptions import DistributedNotInitializedError from msprobe.mindspore.dump.hook_cell.api_registry import api_register from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs,\ ModuleBackwardInputs, ModuleBackwardOutputs +from msprobe.core.common.exceptions import MsprobeException from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell @@ -203,8 +205,16 @@ class Service: HOOKCell.cell_count = defaultdict(int) self.primitive_counters.clear() + def check_model_valid(model): + if not model or isinstance(model, nn.Cell): + return model + raise MsprobeException( + MsprobeException.INVALID_PARAM_ERROR, "model 参数必须是 mindspore.nn.Cell 类型。" + ) + + def start(self, model=None): - self.model = model + self.model = self.check_model_valid(model) self.start_call = True logger.info("msprobe: debugger.start() is set successfully") if self.config.step and self.current_iter > max(self.config.step): -- Gitee From deb692050d5252047d5a826dbc4bb2f8b1511bb2 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 12:47:34 +0800 Subject: [PATCH 60/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 9b9da9ec3fe..9f001fd8799 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -205,6 +205,7 @@ class Service: HOOKCell.cell_count = defaultdict(int) self.primitive_counters.clear() + @staticmethod def check_model_valid(model): if not model or isinstance(model, nn.Cell): return model -- Gitee From 9ea3eb706d0fb414ed6149c7016ed5dc5afe767b Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 12:57:13 +0800 Subject: [PATCH 61/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 9f001fd8799..912cb9608ef 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -87,7 +87,7 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self - + def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type): def backward_hook(grad): captured_grads.append(grad) @@ -206,7 +206,7 @@ class Service: self.primitive_counters.clear() @staticmethod - def check_model_valid(model): + def check_model_valid(self, model): if not model or isinstance(model, nn.Cell): return model raise MsprobeException( -- Gitee From 89d57719f029081eaa936abeebfd1f0092706e78 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 13:01:59 +0800 Subject: [PATCH 62/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 912cb9608ef..9a240c379db 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -87,7 +87,7 @@ class Service: def wrap_primitive(self, origin_func, primitive_name): service_instance = self - + def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type): def backward_hook(grad): captured_grads.append(grad) @@ -206,7 +206,7 @@ class Service: self.primitive_counters.clear() @staticmethod - def check_model_valid(self, model): + def check_model_valid(model): if not model or isinstance(model, nn.Cell): return model raise MsprobeException( @@ -215,7 +215,7 @@ class Service: def start(self, model=None): - self.model = self.check_model_valid(model) + self.model = Service.check_model_valid(model) self.start_call = True logger.info("msprobe: debugger.start() is set successfully") if self.config.step and self.current_iter > max(self.config.step): -- Gitee From 1525a443446b9de08412448887c9691948f20811 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 14:15:10 +0800 Subject: [PATCH 63/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 9a240c379db..8b71e988c8d 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -205,7 +205,7 @@ class Service: HOOKCell.cell_count = defaultdict(int) self.primitive_counters.clear() - @staticmethod +# @staticmethod def check_model_valid(model): if not model or isinstance(model, nn.Cell): return model @@ -213,7 +213,6 @@ class Service: MsprobeException.INVALID_PARAM_ERROR, "model 参数必须是 mindspore.nn.Cell 类型。" ) - def start(self, model=None): self.model = Service.check_model_valid(model) self.start_call = True -- Gitee From f7ef581ea6ed7bba6dc7b01c4000004efa93e5a3 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 14:32:52 +0800 Subject: [PATCH 64/67] Update service.py --- .../msprobe/mindspore/service.py | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index 8b71e988c8d..af0bf466152 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -153,33 +153,51 @@ class Service: return out def wrapped_primitive_call(instance_self, *args, **kwargs): - try: - service_instance.update_primitive_counters(primitive_name) - current_count = service_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - if not service_instance.switch: - return origin_func(*args, **kwargs) + service_instance.update_primitive_counters(primitive_name) + current_count = service_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + + if not service_instance.switch: + return origin_func(*args, **kwargs) + + captured_grads_input, captured_grads_output = [], [] - captured_grads_input, captured_grads_output = [], [] + try: hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name) + except Exception as exception: + raise Exception("This is a primitive op dump error during input hooking: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception + try: out = origin_func(*hooked_inputs, **kwargs) - forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - service_instance.data_collector.visit_and_clear_overflow_status(forward_primitive_name) - if service_instance.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) + except Exception as exception: + raise Exception("This is a primitive op dump error during function call: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception + + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(forward_primitive_name) + if service_instance.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) + try: service_instance.data_collector.forward_data_collect(forward_primitive_name, instance_self, os.getpid(), module_input_output) - if service_instance.data_collector.if_return_forward_new_output(): - out = service_instance.data_collector.get_forward_new_output() - out = hook_primitive_outputs(out, captured_grads_output, updated_primitive_name) + except Exception as exception: + raise Exception("This is a primitive op dump error during forward data collection: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception - return out + if service_instance.data_collector.if_return_forward_new_output(): + out = service_instance.data_collector.get_forward_new_output() + + try: + out = hook_primitive_outputs(out, captured_grads_output, updated_primitive_name) except Exception as exception: - raise Exception("This is a primitive op dump error: {}," + raise Exception("This is a primitive op dump error during output hooking: {}," " primitive_name: {}".format(exception, primitive_name)) from exception + return out + + return wrapped_primitive_call def update_primitive_counters(self, primitive_name): @@ -205,7 +223,7 @@ class Service: HOOKCell.cell_count = defaultdict(int) self.primitive_counters.clear() -# @staticmethod + @staticmethod def check_model_valid(model): if not model or isinstance(model, nn.Cell): return model -- Gitee From f32090296817bace40c8e971cae1c785866b6d9b Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 14:39:23 +0800 Subject: [PATCH 65/67] Update service.py --- debug/accuracy_tools/msprobe/mindspore/service.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index af0bf466152..b795ec10342 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -293,12 +293,9 @@ class Service: dump_file_path, stack_file_path, construct_file_path, dump_data_dir, None) def register_hook_new(self): - logger.info("The {} hook function1111 is successfully mounted to the model.".format(self.config.task)) + logger.info("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() if self.model: - print(f"Type of self.model: {type(self.model)}") # 使用 print - # 或者使用 logger - logger.info(f"Type of self.model: {type(self.model)}") self.register_hooks() -- Gitee From 554232ab08dc914c322e11528019b664068f55c6 Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 15:04:40 +0800 Subject: [PATCH 66/67] primitive op dump --- .gitignore | 9 +- .../bench_functions/npu_fusion_attention.py | 56 +- .../api_accuracy_checker/common/utils.py | 6 +- .../run_ut/data_generate.py | 8 +- .../run_ut/multi_run_ut.py | 11 +- .../tensor_transport_layer/attl.py | 9 +- debug/accuracy_tools/grad_tool/README.md | 34 +- .../grad_tool/common/base_comparator.py | 20 +- .../grad_tool/common/constant.py | 4 +- .../accuracy_tools/grad_tool/common/utils.py | 11 +- .../grad_tool/grad_ms/global_context.py | 30 +- .../grad_tool/grad_ms/grad_analyzer.py | 26 +- .../grad_tool/grad_ms/grad_comparator.py | 13 +- .../accuracy_tools/grad_tool/grad_ms/utils.py | 19 +- .../grad_tool/grad_pt/grad_comparator.py | 15 +- .../grad_tool/grad_pt/grad_monitor.py | 7 +- debug/accuracy_tools/msprobe/README.md | 45 +- debug/accuracy_tools/msprobe/config/README.md | 60 +- .../msprobe/core/common/const.py | 22 +- .../msprobe/core/common/utils.py | 11 +- .../msprobe/core/common_config.py | 41 +- .../msprobe/core/data_dump/data_collector.py | 45 +- .../core/data_dump/data_processor/base.py | 105 ++- .../core/data_dump/data_processor/factory.py | 12 +- .../data_processor/mindspore_processor.py | 90 +- .../data_processor/pytorch_processor.py | 75 +- .../msprobe/core/data_dump/json_writer.py | 22 +- .../msprobe/mindspore/common/utils.py | 13 + .../mindspore/debugger/debugger_config.py | 15 +- .../mindspore/debugger/precision_debugger.py | 42 +- .../dump/hook_cell/api_registry copy.py | 198 +++++ .../mindspore/dump/hook_cell/hook_cell.py | 18 +- .../msprobe/mindspore/ms_config.py | 30 +- .../msprobe/mindspore/service.py | 183 +++- .../run_ut/data_generate.py | 12 +- .../run_ut/multi_run_ut.py | 19 +- .../api_accuracy_checker/run_ut/run_ut.py | 18 +- .../pytorch/bench_functions/__init__.py | 15 + .../pytorch/bench_functions/apply_adam_w.py | 28 + .../bench_functions/confusion_transpose.py | 19 + .../pytorch/bench_functions/fast_gelu.py | 55 ++ .../bench_functions/layer_norm_eval.py | 6 + .../msprobe/pytorch/bench_functions/linear.py | 12 + .../bench_functions/matmul_backward.py | 48 + .../bench_functions/npu_fusion_attention.py | 421 +++++++++ .../pytorch/bench_functions/rms_norm.py | 15 + .../pytorch/bench_functions/rotary_mul.py | 52 ++ .../bench_functions/scaled_mask_softmax.py | 26 + .../msprobe/pytorch/bench_functions/swiglu.py | 55 ++ .../msprobe/pytorch/common/parse_json.py | 4 +- .../msprobe/pytorch/common/utils.py | 35 + .../msprobe/pytorch/compare/acc_compare.py | 29 +- .../pytorch/compare/distributed_compare.py | 16 +- .../pytorch/debugger/debugger_config.py | 2 +- .../pytorch/debugger/precision_debugger.py | 1 + .../pytorch/doc/api_accuracy_checker.md | 68 +- .../msprobe/pytorch/doc/dump.md | 87 +- .../pytorch/free_benchmark/common/constant.py | 3 + .../pytorch/free_benchmark/common/utils.py | 4 + .../result_handlers/base_handler.py | 72 +- .../msprobe/pytorch/function_factory.py | 75 ++ .../pytorch/hook_module/hook_module.py | 6 + .../pytorch/hook_module/support_wrap_ops.yaml | 3 +- .../msprobe/pytorch/hook_module/wrap_aten.py | 21 +- .../pytorch/hook_module/wrap_npu_custom.py | 20 +- .../msprobe/pytorch/module_processer.py | 29 +- .../msprobe/pytorch/pt_config.py | 4 +- .../accuracy_tools/msprobe/pytorch/service.py | 15 +- .../test/core_ut/test_common_config.py | 2 +- .../test/mindspore_ut/test_ms_config.py | 4 +- .../run_ut/test_multi_run_ut.py | 4 +- .../msprobe/test/pytorch_ut/test_pt_config.py | 2 +- debug/accuracy_tools/setup.py | 2 +- .../.github/workflows/libkineto_ci.yml | 56 -- .../workflows/tb_plugin_build_pip_package.yml | 19 - .../.github/workflows/tb_plugin_ci.yml | 57 -- plugins/tensorboard-plugins/.gitignore | 3 - plugins/tensorboard-plugins/.gitmodules | 6 - .../tensorboard-plugins/CODE_OF_CONDUCT.md | 77 -- plugins/tensorboard-plugins/CONTRIBUTING.md | 34 - plugins/tensorboard-plugins/LICENSE | 33 - plugins/tensorboard-plugins/README.md | 38 - .../libkineto/CMakeLists.txt | 198 ----- .../tensorboard-plugins/libkineto/README.md | 65 -- .../libkineto/include/AbstractConfig.h | 113 --- .../include/ActivityProfilerInterface.h | 91 -- .../include/ActivityTraceInterface.h | 21 - .../libkineto/include/ActivityType.h | 34 - .../libkineto/include/ClientInterface.h | 16 - .../libkineto/include/Config.h | 433 --------- .../libkineto/include/GenericTraceActivity.h | 125 --- .../libkineto/include/IActivityProfiler.h | 104 --- .../libkineto/include/ILoggerObserver.h | 50 -- .../libkineto/include/ITraceActivity.h | 53 -- .../libkineto/include/ThreadUtil.h | 22 - .../libkineto/include/TraceSpan.h | 36 - .../libkineto/include/libkineto.h | 138 --- .../libkineto/include/time_since_epoch.h | 16 - .../libkineto/libkineto_defs.bzl | 77 -- .../sample_programs/kineto_playground.cpp | 38 - .../sample_programs/kineto_playground.cu | 60 -- .../sample_programs/kineto_playground.cuh | 18 - .../libkineto/src/AbstractConfig.cpp | 188 ---- .../libkineto/src/ActivityBuffers.h | 29 - .../libkineto/src/ActivityLoggerFactory.h | 60 -- .../src/ActivityProfilerController.cpp | 246 ----- .../src/ActivityProfilerController.h | 84 -- .../libkineto/src/ActivityProfilerProxy.cpp | 119 --- .../libkineto/src/ActivityProfilerProxy.h | 73 -- .../libkineto/src/ActivityTrace.h | 45 - .../libkineto/src/ActivityType.cpp | 58 -- .../libkineto/src/Config.cpp | 473 ---------- .../libkineto/src/ConfigLoader.cpp | 300 ------- .../libkineto/src/ConfigLoader.h | 147 --- .../libkineto/src/CudaDeviceProperties.cpp | 130 --- .../libkineto/src/CudaDeviceProperties.h | 31 - .../libkineto/src/CuptiActivity.h | 114 --- .../libkineto/src/CuptiActivity.tpp | 111 --- .../libkineto/src/CuptiActivityApi.cpp | 343 ------- .../libkineto/src/CuptiActivityApi.h | 100 --- .../libkineto/src/CuptiActivityBuffer.h | 51 -- .../libkineto/src/CuptiActivityPlatform.cpp | 31 - .../libkineto/src/CuptiActivityPlatform.h | 12 - .../libkineto/src/CuptiActivityProfiler.cpp | 841 ------------------ .../libkineto/src/CuptiActivityProfiler.h | 364 -------- .../libkineto/src/CuptiCallbackApi.cpp | 260 ------ .../libkineto/src/CuptiCallbackApi.h | 130 --- .../libkineto/src/CuptiCallbackApiMock.h | 32 - .../libkineto/src/CuptiEventApi.cpp | 112 --- .../libkineto/src/CuptiEventApi.h | 49 - .../libkineto/src/CuptiMetricApi.cpp | 107 --- .../libkineto/src/CuptiMetricApi.h | 38 - .../libkineto/src/CuptiNvPerfMetric.cpp | 504 ----------- .../libkineto/src/CuptiNvPerfMetric.h | 71 -- .../libkineto/src/CuptiRangeProfilerApi.cpp | 751 ---------------- .../libkineto/src/CuptiRangeProfilerApi.h | 220 ----- .../src/CuptiRangeProfilerConfig.cpp | 68 -- .../libkineto/src/CuptiRangeProfilerConfig.h | 86 -- .../libkineto/src/DaemonConfigLoader.h | 27 - .../libkineto/src/Demangle.cpp | 49 - .../libkineto/src/Demangle.h | 12 - .../libkineto/src/EventProfiler.cpp | 635 ------------- .../libkineto/src/EventProfiler.h | 341 ------- .../libkineto/src/EventProfilerController.cpp | 423 --------- .../libkineto/src/EventProfilerController.h | 63 -- .../libkineto/src/GenericTraceActivity.cpp | 10 - .../libkineto/src/ILoggerObserver.cpp | 54 -- .../libkineto/src/Logger.cpp | 136 --- .../libkineto/src/Logger.h | 244 ----- .../libkineto/src/LoggerCollector.h | 70 -- .../libkineto/src/RoctracerActivityApi.cpp | 569 ------------ .../libkineto/src/RoctracerActivityApi.h | 171 ---- .../libkineto/src/RoctracerActivityBuffer.h | 30 - .../libkineto/src/SampleListener.h | 146 --- .../libkineto/src/ScopeExit.h | 29 - .../libkineto/src/ThreadUtil.cpp | 203 ----- .../libkineto/src/WeakSymbols.cpp | 12 - .../libkineto/src/cupti_call.h | 33 - .../libkineto/src/cupti_strings.cpp | 502 ----------- .../libkineto/src/cupti_strings.h | 14 - .../libkineto/src/init.cpp | 139 --- .../libkineto/src/libkineto_api.cpp | 41 - .../libkineto/src/output_base.h | 104 --- .../libkineto/src/output_csv.cpp | 88 -- .../libkineto/src/output_csv.h | 39 - .../libkineto/src/output_json.cpp | 583 ------------ .../libkineto/src/output_json.h | 91 -- .../libkineto/src/output_membuf.h | 130 --- .../libkineto/test/CMakeLists.txt | 3 - .../libkineto/test/ConfigTest.cpp | 315 ------- .../test/CuptiActivityProfilerTest.cpp | 629 ------------- .../libkineto/test/CuptiCallbackApiTest.cpp | 239 ----- .../libkineto/test/CuptiProfilerApiTest.cu | 353 -------- .../test/CuptiRangeProfilerApiTest.cpp | 113 --- .../test/CuptiRangeProfilerConfigTest.cpp | 67 -- .../test/CuptiRangeProfilerTestUtil.h | 96 -- .../libkineto/test/CuptiStringsTest.cpp | 29 - .../libkineto/test/EventProfilerTest.cpp | 578 ------------ .../libkineto/test/LoggerObserverTest.cpp | 96 -- .../test/MockActivitySubProfiler.cpp | 49 - .../libkineto/test/MockActivitySubProfiler.h | 72 -- .../libkineto/test/PidInfoTest.cpp | 27 - profiler/README.md | 1 + profiler/advisor/README.md | 5 +- profiler/advisor/common/profiling/ge_info.py | 3 +- profiler/advisor/common/profiling/msprof.py | 3 +- .../advisor/common/profiling/op_summary.py | 4 +- profiler/advisor/common/profiling/tasktime.py | 4 +- .../config/profiling_data_version_config.yaml | 17 +- .../dataset/profiling/profiling_dataset.py | 11 +- .../dataset/profiling/profiling_parser.py | 27 +- profiler/advisor/img/overall.png | Bin 64492 -> 49616 bytes profiler/advisor/img/overall_0.png | Bin 0 -> 56377 bytes profiler/advisor/utils/utils.py | 12 +- profiler/affinity_cpu_bind/README.md | 40 - profiler/affinity_cpu_bind/bind_core.py | 213 ----- profiler/cli/cluster_cli.py | 4 +- profiler/cli/compare_cli.py | 2 + profiler/cluster_analyse/README.md | 39 +- .../analysis/analysis_facade.py | 12 - .../cluster_analyse/analysis/base_analysis.py | 153 ---- .../analysis/cann_api_sum/__init__.py | 14 - .../analysis/cann_api_sum/cann_api_sum.py | 108 --- .../analysis/cann_api_sum/stats.ipynb | 86 -- .../analysis/cluster_display.py | 239 ----- .../analysis/compute_op_sum/__init__.py | 14 - .../analysis/compute_op_sum/compute_op_sum.py | 103 --- .../analysis/compute_op_sum/stats.ipynb | 164 ---- .../analysis/hccl_sum/__init__.py | 14 - .../analysis/hccl_sum/hccl_sum.py | 133 --- .../analysis/hccl_sum/stats.ipynb | 162 ---- .../analysis/mstx_sum/__init__.py | 14 - .../analysis/mstx_sum/mstx_sum.py | 204 ----- .../analysis/mstx_sum/stats.ipynb | 180 ---- profiler/cluster_analyse/cluster_analysis.py | 75 +- .../cluster_statistics_export/__init__.py | 14 - .../cann_api_sum_export.py | 65 -- .../compute_op_sum_export.py | 49 - .../hccl_sum_export.py | 39 - .../mstx_mark_export.py | 57 -- .../mstx_step_export.py | 35 - .../cluster_statistics_export/stats_export.py | 40 - .../common_func/analysis_loader.py | 38 - .../cluster_analyse/common_func/constant.py | 10 - .../cluster_analyse/common_func/context.py | 85 -- .../cluster_analyse/common_func/db_manager.py | 7 - .../common_func/sql_extention_func.py | 73 -- profiler/cluster_analyse/common_func/utils.py | 73 -- profiler/compare_tools/README.md | 82 +- .../comparator/api_compare_comparator.py | 32 + .../comparator/kernel_compare_comparator.py | 35 + .../compare_bean/api_compare_bean.py | 47 + .../compare_bean/kernel_compare_bean.py | 75 ++ .../origin_data_bean/kernel_details_bean.py | 6 + .../data_prepare/operator_data_prepare.py | 17 + .../generator/detail_performance_generator.py | 22 +- .../profiling_parser/base_profiling_parser.py | 19 +- .../profiling_parser/gpu_profiling_parser.py | 5 + .../profiling_parser/npu_profiling_parser.py | 24 + .../compare_backend/utils/args_manager.py | 13 +- .../compare_backend/utils/compare_args.py | 4 + .../compare_backend/utils/constant.py | 7 +- .../compare_backend/utils/excel_config.py | 48 +- .../compare_backend/utils/torch_op_node.py | 8 + .../compare_backend/utils/tree_builder.py | 3 +- .../view/work_sheet_creator.py | 12 +- profiler/compare_tools/img/OverallMetrics.png | Bin 0 -> 66941 bytes profiler/compare_tools/performance_compare.py | 2 + profiler/merge_profiling_timeline/README.md | 115 --- profiler/merge_profiling_timeline/main.py | 233 ----- ...\345\257\274\346\210\252\345\233\2761.png" | Bin 53047 -> 0 bytes ...\345\257\274\346\210\252\345\233\2762.png" | Bin 64432 -> 0 bytes profiler/module_visualization/__init__.py | 0 .../module_visualization/graph/__init__.py | 0 .../module_visualization/graph/prof_node.py | 90 -- .../graph_build/__init__.py | 0 .../graph_build/fwd_module_node.py | 29 - .../graph_build/prof_graph_builder.py | 115 --- .../module_visualization/prof_graph_export.py | 39 - .../prof_parse/__init__.py | 0 .../prof_parse/prof_data_pre_process.py | 102 --- .../test_base_profiling_parser.py | 5 + 262 files changed, 2720 insertions(+), 19791 deletions(-) create mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py create mode 100644 debug/accuracy_tools/msprobe/pytorch/function_factory.py delete mode 100644 plugins/tensorboard-plugins/.github/workflows/libkineto_ci.yml delete mode 100644 plugins/tensorboard-plugins/.github/workflows/tb_plugin_build_pip_package.yml delete mode 100644 plugins/tensorboard-plugins/.github/workflows/tb_plugin_ci.yml delete mode 100644 plugins/tensorboard-plugins/.gitignore delete mode 100644 plugins/tensorboard-plugins/.gitmodules delete mode 100644 plugins/tensorboard-plugins/CODE_OF_CONDUCT.md delete mode 100644 plugins/tensorboard-plugins/CONTRIBUTING.md delete mode 100644 plugins/tensorboard-plugins/LICENSE delete mode 100644 plugins/tensorboard-plugins/README.md delete mode 100644 plugins/tensorboard-plugins/libkineto/CMakeLists.txt delete mode 100644 plugins/tensorboard-plugins/libkineto/README.md delete mode 100644 plugins/tensorboard-plugins/libkineto/include/AbstractConfig.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ActivityProfilerInterface.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ActivityTraceInterface.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ActivityType.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ClientInterface.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/Config.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/GenericTraceActivity.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/IActivityProfiler.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ILoggerObserver.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ITraceActivity.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/ThreadUtil.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/TraceSpan.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/libkineto.h delete mode 100644 plugins/tensorboard-plugins/libkineto/include/time_since_epoch.h delete mode 100644 plugins/tensorboard-plugins/libkineto/libkineto_defs.bzl delete mode 100644 plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cu delete mode 100644 plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cuh delete mode 100644 plugins/tensorboard-plugins/libkineto/src/AbstractConfig.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityBuffers.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityLoggerFactory.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityTrace.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ActivityType.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/Config.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ConfigLoader.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ConfigLoader.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivity.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivity.tpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityBuffer.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApiMock.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/DaemonConfigLoader.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/Demangle.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/Demangle.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/EventProfiler.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/EventProfiler.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/EventProfilerController.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/EventProfilerController.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/GenericTraceActivity.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ILoggerObserver.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/Logger.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/Logger.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/LoggerCollector.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/RoctracerActivityBuffer.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/SampleListener.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ScopeExit.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/ThreadUtil.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/WeakSymbols.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/cupti_call.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/cupti_strings.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/cupti_strings.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/init.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/libkineto_api.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/output_base.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/output_csv.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/output_csv.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/output_json.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/src/output_json.h delete mode 100644 plugins/tensorboard-plugins/libkineto/src/output_membuf.h delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CMakeLists.txt delete mode 100644 plugins/tensorboard-plugins/libkineto/test/ConfigTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiActivityProfilerTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiCallbackApiTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiProfilerApiTest.cu delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerApiTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerConfigTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerTestUtil.h delete mode 100644 plugins/tensorboard-plugins/libkineto/test/CuptiStringsTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/EventProfilerTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/LoggerObserverTest.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.cpp delete mode 100644 plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.h delete mode 100644 plugins/tensorboard-plugins/libkineto/test/PidInfoTest.cpp create mode 100644 profiler/advisor/img/overall_0.png delete mode 100644 profiler/affinity_cpu_bind/README.md delete mode 100644 profiler/affinity_cpu_bind/bind_core.py delete mode 100644 profiler/cluster_analyse/analysis/cann_api_sum/__init__.py delete mode 100644 profiler/cluster_analyse/analysis/cann_api_sum/cann_api_sum.py delete mode 100644 profiler/cluster_analyse/analysis/cann_api_sum/stats.ipynb delete mode 100644 profiler/cluster_analyse/analysis/cluster_display.py delete mode 100644 profiler/cluster_analyse/analysis/compute_op_sum/__init__.py delete mode 100644 profiler/cluster_analyse/analysis/compute_op_sum/compute_op_sum.py delete mode 100644 profiler/cluster_analyse/analysis/compute_op_sum/stats.ipynb delete mode 100644 profiler/cluster_analyse/analysis/hccl_sum/__init__.py delete mode 100644 profiler/cluster_analyse/analysis/hccl_sum/hccl_sum.py delete mode 100644 profiler/cluster_analyse/analysis/hccl_sum/stats.ipynb delete mode 100644 profiler/cluster_analyse/analysis/mstx_sum/__init__.py delete mode 100644 profiler/cluster_analyse/analysis/mstx_sum/mstx_sum.py delete mode 100644 profiler/cluster_analyse/analysis/mstx_sum/stats.ipynb delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/__init__.py delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/cann_api_sum_export.py delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/compute_op_sum_export.py delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/hccl_sum_export.py delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/mstx_mark_export.py delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/mstx_step_export.py delete mode 100644 profiler/cluster_analyse/cluster_statistics_export/stats_export.py delete mode 100644 profiler/cluster_analyse/common_func/analysis_loader.py delete mode 100644 profiler/cluster_analyse/common_func/context.py delete mode 100644 profiler/cluster_analyse/common_func/sql_extention_func.py delete mode 100644 profiler/cluster_analyse/common_func/utils.py create mode 100644 profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py create mode 100644 profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py create mode 100644 profiler/compare_tools/img/OverallMetrics.png delete mode 100644 profiler/merge_profiling_timeline/README.md delete mode 100644 profiler/merge_profiling_timeline/main.py delete mode 100644 "profiler/merge_profiling_timeline/perfetto\344\275\277\347\224\250\346\214\207\345\257\274\346\210\252\345\233\2761.png" delete mode 100644 "profiler/merge_profiling_timeline/perfetto\344\275\277\347\224\250\346\214\207\345\257\274\346\210\252\345\233\2762.png" delete mode 100644 profiler/module_visualization/__init__.py delete mode 100644 profiler/module_visualization/graph/__init__.py delete mode 100644 profiler/module_visualization/graph/prof_node.py delete mode 100644 profiler/module_visualization/graph_build/__init__.py delete mode 100644 profiler/module_visualization/graph_build/fwd_module_node.py delete mode 100644 profiler/module_visualization/graph_build/prof_graph_builder.py delete mode 100644 profiler/module_visualization/prof_graph_export.py delete mode 100644 profiler/module_visualization/prof_parse/__init__.py delete mode 100644 profiler/module_visualization/prof_parse/prof_data_pre_process.py diff --git a/.gitignore b/.gitignore index c70c40e0f52..01a2222429c 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,11 @@ cython_debug/ att_advisor*.html *.xlsx operator_tuning_file*.cfg -.ipynb_checkpoints/ \ No newline at end of file +.ipynb_checkpoints/ +.idea/vcs.xml +.idea/inspectionProfiles/profiles_settings.xml +.idea/misc.xml +.idea/modules.xml +.idea/mstt_primitive.iml +.idea/.gitignore +.gitignore diff --git a/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py b/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py index 4c230c17c04..d5a91ce3b5f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py +++ b/debug/accuracy_tools/api_accuracy_checker/bench_functions/npu_fusion_attention.py @@ -8,7 +8,6 @@ from api_accuracy_checker.common.utils import logger gtype = torch.float64 # arm host必须选择float64,x86环境选择float32即可,64也行。arm计算很慢,s=8k的场景建议使用x86 softmax_build_mode = "QKV" # "MAX_SUM" - """ # 前向函数声明对比 标杆实现:fusion_attention_forward: q, k, v, drop_mask, atten_mask, pse, scale, keep_prob @@ -45,6 +44,9 @@ def softmax_grad(dp, softmax_res): def broadcast_kv(num_heads, num_kv_heads, kv_tensor, dtype): + if num_kv_heads == 0 or num_kv_heads < num_heads: + raise ValueError(f"num_kv_heads must be non-zero and less than num_heads.") + factor = num_heads // num_kv_heads kv_shape = kv_tensor.shape B = kv_shape[0] @@ -102,28 +104,34 @@ def parse_bsnd_args(query, key, head_num, input_layout): if not isinstance(input_layout, str) or input_layout not in supported_input_layout: raise ValueError(f"Invalid input_layout arg which must be one of {supported_input_layout}.") - if input_layout == "BSH": - B, S1, H1 = query.shape - _, S2, H2 = key.shape - D = H1 // N1 - N2 = H2 // D - elif input_layout == "SBH": - S1, B, H1 = query.shape - S2, _, H2 = key.shape - D = H1 // N1 - N2 = H2 // D - elif input_layout == "BSND": - B, S1, N1, D = query.shape - _, S2, N2, _ = key.shape - H1 = N1 * D - H2 = N2 * D - elif input_layout == "BNSD": - B, N1, S1, D = query.shape - _, N2, S2, _ = key.shape - H1 = N1 * D - H2 = N2 * D - elif input_layout == "TND": + if input_layout == "TND": raise ValueError(f"input_layout {input_layout} does not supported for now.") + try: + if input_layout == "BSH": + B, S1, H1 = query.shape + _, S2, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "SBH": + S1, B, H1 = query.shape + S2, _, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "BSND": + B, S1, N1, D = query.shape + _, S2, N2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + elif input_layout == "BNSD": + B, N1, S1, D = query.shape + _, N2, S2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + except Exception as e: + raise ValueError(f"query.shape: {query.shape}, key.shape: {key.shape}, parse_bsnd_args error: {e}") from e + + if D == 0: + raise ValueError(f"Value D must be non-zero.") DTYPE = query.dtype return B, S1, S2, N1, N2, D, H1, H2, DTYPE @@ -251,6 +259,8 @@ def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softma """ print(f"Using softmax_max and softmax_sum to rebuild original softmax") qk = calculate_qk(q, k, atten_mask, pse, scale) + if softmax_max.shape[-1] == 0: + raise ValueError(f"softmax_max.shape[-1] must be non-zero, softmax_max.shape: {softmax_max.shape}") repeat_dim = qk.shape[-1] // softmax_max.shape[-1] softmax_res = torch.exp(qk.sub(softmax_max.repeat(1, 1, 1, repeat_dim))).div( softmax_sum.repeat(1, 1, 1, repeat_dim)) @@ -394,6 +404,8 @@ def npu_fusion_attention_grad(*args, **kwargs): # N不等长适配by cdy if not (N1 == N2): + if N2 == 0: + raise ValueError("dims_kwargs.N2 must be non-zero.") G = int(N1 / N2) dk = torch.sum(dk.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) dv = torch.sum(dv.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) diff --git a/debug/accuracy_tools/api_accuracy_checker/common/utils.py b/debug/accuracy_tools/api_accuracy_checker/common/utils.py index 76d117afb49..83b73e90f97 100644 --- a/debug/accuracy_tools/api_accuracy_checker/common/utils.py +++ b/debug/accuracy_tools/api_accuracy_checker/common/utils.py @@ -634,7 +634,11 @@ def initialize_save_path(save_path, dir_name): def write_pt(file_path, tensor): if os.path.exists(file_path): raise ValueError(f"File {file_path} already exists") - torch.save(tensor, file_path) + try: + torch.save(tensor, file_path) + except Exception as e: + error_message = "An unexpected error occurred: %s when saving tensor to %s" % (str(e), file_path) + print_error_log(error_message) full_path = os.path.realpath(file_path) file_check_util.change_mode(full_path, FileCheckConst.DATA_FILE_AUTHORITY) return full_path diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py index 67dc5ad2532..57811648391 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/data_generate.py @@ -20,9 +20,10 @@ import math import torch import numpy -from api_accuracy_checker.common.utils import Const, check_file_or_directory_path, check_object_type, print_warn_log, \ - print_error_log, get_full_data_path, CompareException +from api_accuracy_checker.common.utils import Const, check_object_type, print_warn_log, print_error_log, \ + get_full_data_path, CompareException from api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] @@ -83,7 +84,8 @@ def gen_real_tensor(data_path, convert_type): convert_type: convert ori_type to dist_type flag. """ data_path = os.path.realpath(data_path) - check_file_or_directory_path(data_path) + data_path_checker = FileChecker(data_path, FileCheckConst.FILE, ability=FileCheckConst.READ_ABLE) + data_path = data_path_checker.common_check() if not data_path.endswith('.pt') and not data_path.endswith('.npy'): error_info = f"The file: {data_path} is not a pt or numpy file." raise CompareException(CompareException.INVALID_FILE_ERROR, error_info) diff --git a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py index df6c99a567c..0ab8073937f 100644 --- a/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/api_accuracy_checker/run_ut/multi_run_ut.py @@ -88,14 +88,9 @@ def run_parallel_ut(config): def update_progress_bar(progress_bar, result_csv_path): while any(process.poll() is None for process in processes): - try: - with open(result_csv_path, 'r') as result_file: - completed_items = len(result_file.readlines()) - 1 - progress_bar.update(completed_items - progress_bar.n) - except FileNotFoundError: - print_warn_log(f"Result CSV file not found: {result_csv_path}.") - except Exception as e: - print_error_log(f"An unexpected error occurred while reading result CSV: {e}") + with FileOpen(result_csv_path, 'r') as result_file: + completed_items = len(result_file.readlines()) - 1 + progress_bar.update(completed_items - progress_bar.n) time.sleep(1) for fwd, bwd in zip(config.forward_files, config.backward_files): diff --git a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py index 0b91d2bbc82..5fb63779fbb 100644 --- a/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py +++ b/debug/accuracy_tools/api_accuracy_checker/tensor_transport_layer/attl.py @@ -13,6 +13,7 @@ import torch from api_accuracy_checker.tensor_transport_layer.client import TCPClient from api_accuracy_checker.tensor_transport_layer.server import TCPServer from api_accuracy_checker.common.utils import logger +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker from ptdbg_ascend.src.python.ptdbg_ascend.common.utils import remove_path @@ -138,8 +139,10 @@ class ATTL: file_path = os.path.join(self.session_config.nfs_path, buffer.name + ".pt") else: file_path = os.path.join(self.session_config.nfs_path, buffer + f"_{int(time.time())}") - - torch.save(buffer, file_path) + try: + torch.save(buffer, file_path) + except Exception as e: + self.logger.error("there is something error. please check it. %s", e) def download(self): for file_type in ("start*", "*.pt", "end*"): @@ -150,6 +153,8 @@ class ATTL: if cur_file is None: return None else: + cur_file_checker = FileChecker(cur_file, FileCheckConst.FILE, ability=FileCheckConst.READ_ABLE) + cur_file = cur_file_checker.common_check() buffer = torch.load(cur_file) remove_path(cur_file) return buffer diff --git a/debug/accuracy_tools/grad_tool/README.md b/debug/accuracy_tools/grad_tool/README.md index a7929ca8187..1d35f03e479 100644 --- a/debug/accuracy_tools/grad_tool/README.md +++ b/debug/accuracy_tools/grad_tool/README.md @@ -28,7 +28,7 @@ ### 梯度数据导出 -1. 创建配置文件config.yaml,PyTorch框架样例代码如下: +1. 创建配置文件config.yaml,样例如下: ```python level: L1 @@ -38,40 +38,30 @@ bounds: output_path: your_output_dir ``` - > 在MindSpore框架下,当前不支持rank和step配置,默认所有rank和所有step都进行采集, - > MindSpore中step指的是优化器被调用的次数(并非模型跑的step,某些step,例如loss为nan时,不会调用优化器) + > step指的是优化器被调用的次数(并非模型跑的step,某些step,例如loss为nan时,不会调用优化器) **参数说明** - | 参数 | 说明 | 是否必选 | - |--------------------------------|----------------------------------------------------|----------| - | level | Level级别,PyTorch可取值:L0、L1、L2,MindSpore可取值:L0, L1, L2, L3。决定导出数据的详细程度,级别越大导出数据越详细。数据类型:str。 | PyTorch是(MindSpore否,默认为L0) | - | param_list | 填写需要监控的权重名称。不指定或列表为空就表示监控所有权重。数据类型:List[str]。 | 否 | - | rank | 在多卡场景下,填写需要导出梯度数据的卡的Rank ID,不指定或列表为空就表示导出所有Rank的数据。单卡场景无需关注该参数。数据类型:List[int]。(MindSpore当前不支持指定rank) | 否 | - | step | 指定需要导出数据的step。对于PyTorch不指定或列表为空就表示导出所有step的数据,对于MindSpore不指定表示导出所有step,指定时要求传入range列表,例如[1, 2],否则无效。数据类型:List[int]。(MindSpore当前不支持指定step) | 否 | - | bounds | 用来划分区间以统计值分布。需要保证由数据小到大排列。不传则使用默认值[-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10](mindspore为[-0.1, 0., 1.0]),数据类型:List。 | 否 | - | output_path | 输出目录。如果不存在就会创建一个新目录。数据类型:str。 | PyTorch是(MindSpore否,默认为./grad_stat | + | 参数 | 说明 | 输入类型 | 是否必选 | + |--------------------------------|-----------------------------------|-----------------|----------| + | level | 输出级别。决定导出数据的详细程度,级别越大导出数据越详细。可取值:L0, L1, L2|str | 是 | + | param_list | 权重名称列表,表示需要监控的权重。不指定或列表为空就表示监控所有权重。 | List[str] | 否 | + | rank | rank id列表,在多卡场景下,表示需要导出梯度数据的进程的rank id。不指定或列表为空就表示导出所有rank的数据。单卡场景无需关注该参数。 (MindSpore静态图模式下,当前暂不支持指定rank功能) | List[int] | 否 | + | step | step列表,表示需要导出数据的step列表。不指定或列表为空就表示导出所有step的数据。(MindSpore静态图模式下,当前暂不支持指定step功能) | List[int] | 否 | + | bounds | 区间列表,用来划分区间以统计数值的分布。需要保证由数据小到大排列。不指定则使用默认值[-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10] | List[float] | 否 | + | output_path | 输出目录。如果不存在就会创建一个新目录。 | str | 是 | **不同级别的level的导出数据** -- PyTorch/MindSpore动态图不同level数据 | 级别 | 特征数据表头 | 是否有方向数据 | | ---- | ------------------------------------------------------------ | -------------- | | L0 | ("param_name", "MD5", "max", "min", "norm", "shape") | 否 | | L1 | ("param_name", "max", "min", "norm", "shape") | 是 | | L2 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | - -- MindSpore静态图不同level数据 - - | 级别 | 特征数据表头 | 是否有方向数据 | - | ---- | ------------------------------------------------------------ | -------------- | - | L0 | ("param_name", "max", "min", "norm", "shape") | 否 | - | L1 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 否 | - | L2 | ("param_name", "max", "min", "norm", "shape") | 是 | - | L3 | ("param_name", *intervals, "=0", "max", "min", "norm", "shape") | 是 | intervals就是根据值分布bounds划分出的区间。 + MindSpore静态图模式下,L0级别中暂不支持"MD5" **方向数据解释** @@ -98,7 +88,7 @@ gm = GradientMonitor("config_path", framework="MindSpore") gm.monitor(optimizer) ``` -3. 结束监控(MindSpore需要) +3. 结束监控(MindSpore静态图模式下需要) 在训练结束之后,调用stop接口 diff --git a/debug/accuracy_tools/grad_tool/common/base_comparator.py b/debug/accuracy_tools/grad_tool/common/base_comparator.py index d3254ae71f9..03f74a21e47 100644 --- a/debug/accuracy_tools/grad_tool/common/base_comparator.py +++ b/debug/accuracy_tools/grad_tool/common/base_comparator.py @@ -7,7 +7,10 @@ import pandas as pd import matplotlib.pyplot as plt from grad_tool.common.constant import GradConst -from grad_tool.common.utils import write_csv, check_file_or_directory_path, print_info_log, create_directory +from grad_tool.common.utils import write_csv, check_file_or_directory_path, print_info_log, create_directory, print_error_log + +from ptdbg_ascend.src.python.ptdbg_ascend.common import file_check_util +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, check_path_pattern_valid, check_path_length class BaseComparator(ABC): @@ -85,8 +88,19 @@ class BaseComparator(ABC): picture_dir = os.path.join(output_dir, "similarities_picture") if not os.path.isdir(picture_dir): create_directory(picture_dir) - plt.savefig(os.path.join(picture_dir, f"{key}_similarities.png")) - plt.close() + file_path= os.path.join(picture_dir, f"{key}_similarities.png") + if os.path.exists(file_path): + raise ValueError(f"File {file_path} already exists") + check_path_length(file_path) + check_path_pattern_valid(file_path) + try: + plt.savefig(file_path) + plt.close() + except Exception as e: + error_message = "An unexpected error occurred: %s when savfig to %s" % (str(e), file_path) + print_error_log(error_message) + full_path = os.path.realpath(file_path) + file_check_util.change_mode(full_path, FileCheckConst.DATA_FILE_AUTHORITY) head_tuple = tuple(['step'] + [str(step) for step in steps]) write_csv(os.path.join(output_dir, "similarities.csv"), [[key] + value], head_tuple) diff --git a/debug/accuracy_tools/grad_tool/common/constant.py b/debug/accuracy_tools/grad_tool/common/constant.py index d569d47c16d..38d33e98864 100644 --- a/debug/accuracy_tools/grad_tool/common/constant.py +++ b/debug/accuracy_tools/grad_tool/common/constant.py @@ -23,8 +23,7 @@ class GradConst: LEVEL0 = "L0" LEVEL1 = "L1" LEVEL2 = "L2" - LEVEL3 = "L3" - SUPPORTED_LEVEL = {"L0", "L1", "L2", "L3"} + SUPPORTED_LEVEL = {"L0", "L1", "L2"} # numpy coding STEP_IDX = 0 @@ -40,6 +39,7 @@ class GradConst: DIRECTORY_LENGTH = 4096 FILE_NAME_LENGTH = 255 FILE_VALID_PATTERN = r"^[a-zA-Z0-9_.:/-]+$" + PARAM_VALID_PATTERN = r"^[a-zA-Z0-9.]+$" DIR = "dir" FILE = "file" diff --git a/debug/accuracy_tools/grad_tool/common/utils.py b/debug/accuracy_tools/grad_tool/common/utils.py index cdce3fda7e3..fceda8ce0f2 100644 --- a/debug/accuracy_tools/grad_tool/common/utils.py +++ b/debug/accuracy_tools/grad_tool/common/utils.py @@ -7,6 +7,7 @@ import yaml import pandas as pd from grad_tool.common.constant import GradConst +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileOpen def _print_log(level, msg, end='\n'): @@ -114,7 +115,7 @@ class ListCache(list): def get_config(filepath): - with open(filepath, 'r') as file: + with FileOpen(filepath, 'r') as file: config = yaml.safe_load(file) return config @@ -220,3 +221,11 @@ def change_mode(path, mode): except PermissionError as ex: print_error_log(f'Failed to change {path} authority. {str(ex)}') raise ex + +def check_param(param_name): + if not re.match(GradConst.PARAM_VALID_PATTERN, param_name): + raise RuntimeError("The parameter name contains special characters.") + +def check_str(string, variable_name): + if not isinstance(string, str): + raise ValueError(f'The variable: "{variable_name}" is not a string.') \ No newline at end of file diff --git a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py index d44bea52c78..424f16aedd3 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/global_context.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/global_context.py @@ -4,7 +4,7 @@ from typing import Dict, List, Union from grad_tool.common.utils import print_warn_log from grad_tool.common.constant import GradConst -from grad_tool.common.utils import path_valid_check, create_directory +from grad_tool.common.utils import path_valid_check, create_directory, check_str class GlobalContext: @@ -12,13 +12,13 @@ class GlobalContext: _instance = None _instance_lock = threading.Lock() _setting = { - GradConst.LEVEL: GradConst.LEVEL0, + GradConst.LEVEL: None, GradConst.PARAM_LIST: None, GradConst.STEP: None, GradConst.RANK: None, GradConst.CURRENT_STEP: 0, - GradConst.BOUNDS: [-1., 0., 1.], - GradConst.OUTPUT_PATH: "./grad_stat" + GradConst.BOUNDS: [-10, -1, -0.1, -0.01, -0.001, 0, 0.001, 0.01, 0.1, 1, 10], + GradConst.OUTPUT_PATH: None } def __new__(cls, *args, **kwargs): @@ -29,23 +29,25 @@ class GlobalContext: return cls._instance def init_context(self, config_dict: Dict): - if config_dict.get(GradConst.LEVEL, None) in GradConst.SUPPORTED_LEVEL: + level = config_dict.get(GradConst.LEVEL) + check_str(level, variable_name = "level in yaml") + if level in GradConst.SUPPORTED_LEVEL: self._setting[GradConst.LEVEL] = config_dict.get(GradConst.LEVEL) else: - print_warn_log("Invalid level set in config yaml file, use L0 instead.") + raise ValueError("Invalid level set in config yaml file, level option: L0, L1, L2") + self._set_input_list(config_dict, GradConst.PARAM_LIST, str) self._set_input_list(config_dict, GradConst.BOUNDS, float) self._set_input_list(config_dict, GradConst.STEP, int) self._set_input_list(config_dict, GradConst.RANK, int) + output_path = config_dict.get(GradConst.OUTPUT_PATH) - if output_path: - try: - path_valid_check(output_path) - except RuntimeError as err: - print_warn_log(f"Invalid output_path, use default output_path. The error message is {err}.") - output_path = None - if output_path: - self._setting[GradConst.OUTPUT_PATH] = output_path + check_str(output_path, variable_name = "output_path in yaml") + try: + path_valid_check(output_path) + except RuntimeError as err: + raise ValueError(f"Invalid output_path: {output_path}. The error message is {err}.") from err + self._setting[GradConst.OUTPUT_PATH] = output_path if not os.path.isdir(self._setting.get(GradConst.OUTPUT_PATH)): create_directory(self._setting.get(GradConst.OUTPUT_PATH)) else: diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py index 75280b31944..c843df3884e 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_analyzer.py @@ -16,6 +16,7 @@ from grad_tool.common.utils import ListCache, print_warn_log from grad_tool.common.utils import create_directory, check_file_or_directory_path, write_csv from grad_tool.grad_ms.global_context import grad_context from grad_tool.grad_ms.global_context import GlobalContext +from ptdbg_ascend.src.python.ptdbg_ascend.common.file_check_util import FileCheckConst, FileChecker def get_rank_id(): @@ -31,11 +32,10 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, ''' Dump gradient statistic data. level0: [step, max, min, norm, shape_dim, shape] - level1: [step, max, min, norm, shape_dim, shape, dist_dim, dist] - level2: [step, max, min, norm, shape_dim, shape] + grad_bool_data - level3: [step, max, min, norm, shape_dim, shape, dist_dim, dist] + grad_bool_data + level1: [step, max, min, norm, shape_dim, shape] + grad_bool_data + level2: [step, max, min, norm, shape_dim, shape, dist_dim, dist] + grad_bool_data ''' - dump_path = dump_dir + g_name + dump_path = os.path.join(dump_dir, g_name) dump_dir_path = dump_path + "_dir" save_op = ms.ops.TensorDump() @@ -51,7 +51,7 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, level0_stat = ms.ops.concat((extrem_stat, shape_stat), axis=0) level_stat = level0_stat - if level == "L1" or level == "L3": + if level == GradConst.LEVEL2: zero_grad = (grad == 0).sum() dist_dim = ms.Tensor([len(bounds) + 2]).float() bucket_result = ms.ops.bucketize(grad.float(), bounds) @@ -60,11 +60,11 @@ def grad_dump(dump_dir: str, g_name: str, dump_step: Parameter, grad: ms.Tensor, dist_stat.append(zero_grad) dist_stat.append(ms.Tensor(1, dtype=ms.int64)) # make sure dist_stat is not empty dist_stat = ms.ops.stack(dist_stat, axis=0).float() - level1_stat = ms.ops.concat((level0_stat, dist_dim, dist_stat), axis=0) - level_stat = level1_stat + level2_stat = ms.ops.concat((level0_stat, dist_dim, dist_stat), axis=0) + level_stat = level2_stat save_op(dump_path, level_stat) - if level == "L2" or level == "L3": + if level == GradConst.LEVEL1 or level == GradConst.LEVEL2: grad_direction = grad > 0 save_op(dump_dir_path, grad_direction) @@ -155,7 +155,7 @@ class CSVGenerator(Process): level = grad_context.get_context(GradConst.LEVEL) try: shape_dim = int(stat_data[GradConst.SHAPE_DIM_IDX]) - if level in [GradConst.LEVEL1, GradConst.LEVEL3]: + if level == GradConst.LEVEL2: dist_dim = int(stat_data[shape_dim + GradConst.SHAPE_DIM_IDX + 1]) length = shape_dim + dist_dim + 7 else: @@ -170,6 +170,8 @@ class CSVGenerator(Process): stat_data = None max_try = 10 while max_try: + file_path_checker = FileChecker(file_path, FileCheckConst.DIR,FileCheckConst.READ_ABLE) + file_path = file_path_checker.common_check() try: stat_data = np.load(file_path) return stat_data @@ -178,7 +180,7 @@ class CSVGenerator(Process): max_try -= 1 time.sleep(0.1) return stat_data - + def gen_csv_line(self, file_path: str, stat_data) -> None: shape_dim = int(stat_data[GradConst.SHAPE_DIM_IDX]) file_name = os.path.basename(file_path) @@ -187,7 +189,7 @@ class CSVGenerator(Process): if not param_name: raise RuntimeError("Invalid gradient statistic file name.") csv_line = [param_name] - if self.level == GradConst.LEVEL1 or self.level == GradConst.LEVEL3: + if self.level == GradConst.LEVEL2: csv_line.extend(self.get_dist_data(shape_dim, stat_data)) csv_line.extend(self.get_extrem_data(shape_dim, stat_data)) self.cache_list.append(csv_line) @@ -208,7 +210,7 @@ class CSVGenerator(Process): def create_csv_file(self): headers = ["Param_name"] - if self.level == GradConst.LEVEL1 or self.level == GradConst.LEVEL3: + if self.level == GradConst.LEVEL2: headers.extend(self.get_dist_header()) headers.extend(self.get_extrem_headers()) output_path = f"{self.save_dir}/grad_summary_{self.current_step}.csv" diff --git a/debug/accuracy_tools/grad_tool/grad_ms/grad_comparator.py b/debug/accuracy_tools/grad_tool/grad_ms/grad_comparator.py index 2bfeda4387e..3b930d4e283 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/grad_comparator.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/grad_comparator.py @@ -9,10 +9,19 @@ class MsGradComparator(BaseComparator): @classmethod def _load_grad_files(cls, grad_file1: str, grad_file2: str): + if not os.path.exists(grad_file1): + raise ValueError(f"file {grad_file1} not exists, please check the file path.") + if not os.path.exists(grad_file2): + raise ValueError(f"file {grad_file2} not exists, please check the file path.") + grad1_suffix = grad_file1.split(".")[-1] grad2_suffix = grad_file2.split(".")[-1] - grad1 = torch.load(grad_file1).numpy() if grad1_suffix == "pt" else np.load(grad_file1) - grad2 = torch.load(grad_file2).numpy() if grad2_suffix == "pt" else np.load(grad_file2) + + try: + grad1 = torch.load(grad_file1).numpy() if grad1_suffix == "pt" else np.load(grad_file1) + grad2 = torch.load(grad_file2).numpy() if grad2_suffix == "pt" else np.load(grad_file2) + except Exception as e: + raise RuntimeError(f"An unexpected error occurred: {e} when loading grad_file.") from e if grad1.shape != grad2.shape: raise RuntimeError(f"numpy shape is not equal: {grad_file1}, {grad_file2}") diff --git a/debug/accuracy_tools/grad_tool/grad_ms/utils.py b/debug/accuracy_tools/grad_tool/grad_ms/utils.py index 23703f28208..c8ee1fd1d45 100644 --- a/debug/accuracy_tools/grad_tool/grad_ms/utils.py +++ b/debug/accuracy_tools/grad_tool/grad_ms/utils.py @@ -3,7 +3,8 @@ import os import numpy as np import mindspore from grad_tool.common.constant import GradConst -from grad_tool.common.utils import print_warn_log, create_directory, change_mode, check_file_or_directory_path +from grad_tool.common.utils import (print_warn_log, create_directory, change_mode, check_file_or_directory_path, + path_valid_check, check_param) level_adp = { "L0": { @@ -20,23 +21,27 @@ level_adp = { }, } + def save_grad_direction(param_name, grad, save_path): if not os.path.exists(save_path): create_directory(save_path) + check_file_or_directory_path(save_path, file_type=GradConst.DIR) + check_param(param_name) save_filepath = os.path.join(save_path, f"{param_name}.npy") - check_file_or_directory_path(save_filepath) + path_valid_check(save_filepath) if grad.dtype == mindspore.bfloat16: grad = grad.to(mindspore.float32) grad_direction_tensor = grad > 0 grad_direction_ndarray = grad_direction_tensor.numpy() - np.save(save_filepath, grad_direction_ndarray) + try: + np.save(save_filepath, grad_direction_ndarray) + except Exception as e: + raise RuntimeError(f"An unexpected error occurred: {e} when saving numpy to {save_filepath}") from e change_mode(save_filepath, 0o640) + def get_adapted_level(level: str): - if level == GradConst.LEVEL3: - print_warn_log(f"In mindpsore pynative mode, only 'L0', 'L1' and 'L2' are supported, use L0 instead") - level = GradConst.LEVEL0 level_adapted = level_adp.get(level) - return level_adapted \ No newline at end of file + return level_adapted diff --git a/debug/accuracy_tools/grad_tool/grad_pt/grad_comparator.py b/debug/accuracy_tools/grad_tool/grad_pt/grad_comparator.py index d1229b93de7..38f0e32153e 100644 --- a/debug/accuracy_tools/grad_tool/grad_pt/grad_comparator.py +++ b/debug/accuracy_tools/grad_tool/grad_pt/grad_comparator.py @@ -1,3 +1,5 @@ +import os + import torch from grad_tool.common.base_comparator import BaseComparator @@ -7,8 +9,17 @@ class PtGradComparator(BaseComparator): @classmethod def _load_grad_files(cls, grad_file1: str, grad_file2: str): - tensor1 = torch.load(grad_file1, map_location=torch.device("cpu")) - tensor2 = torch.load(grad_file2, map_location=torch.device("cpu")) + if not os.path.exists(grad_file1): + raise ValueError(f"file {grad_file1} not exists, please check the file path.") + if not os.path.exists(grad_file2): + raise ValueError(f"file {grad_file2} not exists, please check the file path.") + + try: + tensor1 = torch.load(grad_file1, map_location=torch.device("cpu")) + tensor2 = torch.load(grad_file2, map_location=torch.device("cpu")) + except Exception as e: + raise RuntimeError(f"An unexpected error occurred: {e} when loading tensor.") from e + if tensor1.shape != tensor2.shape: raise RuntimeError(f"tensor shape is not equal: {grad_file1}, {grad_file2}") if tensor1.dtype != torch.bool: diff --git a/debug/accuracy_tools/grad_tool/grad_pt/grad_monitor.py b/debug/accuracy_tools/grad_tool/grad_pt/grad_monitor.py index f3079e622c2..2e1abde0d1a 100644 --- a/debug/accuracy_tools/grad_tool/grad_pt/grad_monitor.py +++ b/debug/accuracy_tools/grad_tool/grad_pt/grad_monitor.py @@ -61,7 +61,10 @@ class PtGradientMonitor(BaseMonitor): param_grad = grad.clone().detach() is_positive = param_grad > 0 save_filepath = os.path.join(save_path, f"{param_name}.pt") - torch.save(is_positive, save_filepath) + try: + torch.save(is_positive, save_filepath) + except Exception as e: + raise RuntimeError(f"An unexpected error occurred: {e} when saving tensor to {save_filepath}") from e change_mode(save_filepath, 0o640) def monitor(self, model): @@ -96,7 +99,7 @@ class PtGradientMonitor(BaseMonitor): output_lines.append(grad_info) if self._level_adp["have_grad_direction"]: PtGradientMonitor.save_grad_direction(param_name, grad, - f'{self._output_path}/rank{self._rank}/step{self._step}') + f'{self._output_path}/rank{self._rank}/step{self._step}') output_path = os.path.join(self._output_path, f"rank{getattr(self, '_rank')}", f"grad_summary_{self._step}.csv") write_csv(output_path, output_lines, diff --git a/debug/accuracy_tools/msprobe/README.md b/debug/accuracy_tools/msprobe/README.md index 1e8c1a1f08d..42743c50781 100644 --- a/debug/accuracy_tools/msprobe/README.md +++ b/debug/accuracy_tools/msprobe/README.md @@ -10,10 +10,15 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud ```shell pip install mindstudio-probe ``` - 说明 - 1. 使用`pip install mindstudio-probe==版本号`可安装指定版本的包 - 2. pip命令会自动安装包及其依赖 - 3. 安装成功后,日志会显示`Successfully installed mindstudio-probe-版本号` +使用`pip install mindstudio-probe==版本号`可安装指定版本的包。 + +pip命令会自动安装最新的包及其配套依赖。 + +提示如下信息则表示安装成功。 + +```bash +Successfully installed mindstudio_probe-{version} +``` ### 下载whl包安装 1. 使用pip命令安装numpy、openpyxl、pandas、PyYAML、rich、torch、tqdm依赖。 @@ -26,6 +31,7 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud | 版本 | 发布日期 | 支持PyTorch版本 | 下载链接 | 校验码 | | ----- | ---------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | + | 1.0.1 | 2024-07-25 | 2.0/2.1/2.2 | [mindstudio_probe-1.0.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/msprobe/1.0/mindstudio_probe-1.0.1-py3-none-any.whl) | b699e224e4d4e3bcf9412c54fa858a1ee370f0d7a2bc69cb3f1273ac14a6dc82 | | 1.0 | 2024-07-09 | 2.0/2.1/2.2 | [ascend_training_accuracy_tools-1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/att/1.0/ascend_training_accuracy_tools-1.0-py3-none-any.whl) | 5016dfe886c5d340ec6f60a959673355855f313c91f100680da814efb49f8e81 | | 0.0.3 | 2024-06-11 | 2.0/2.1/2.2 | [ascend_training_accuracy_tools-0.0.3-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/att/0.0/ascend_training_accuracy_tools-0.0.3-py3-none-any.whl) | f46d9714704859e2d67861a65bbb3c76b0a250cf6e238b978b5b959ab1fe125a | | 0.0.2 | 2024-05-23 | 2.0/2.1/2.2 | [ascend_training_accuracy_tools-0.0.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/att/0.0/ascend_training_accuracy_tools-0.0.2-py3-none-any.whl) | 2e35809bde559e9c4d2f16a02ccde779ed9e436bb65fded0b7ebaf6ac2c88d93 | @@ -92,6 +98,37 @@ MindStudio精度调试工具(MindStudio Probe),简称msprobe,是MindStud Finished processing dependencies for mindstudio-probe=={version} ``` +### 查看msprobe工具信息 + +执行如下命令查看msprobe工具信息。 + +```bash +pip show mindstudio-probe +``` + +输出结果如下示例: + +```bash +Name: mindstudio-probe +Version: 1.0 +Summary: This is a pytorch precision comparison tools +Home-page: +Author: +Author-email: +License: +Location: /home/xx/anaconda3/envs/pt21py38/lib/python3.8/site-packages +Requires: numpy, openpyxl, pandas, pyyaml, rich, tqdm, wheel +Required-by: +``` + +关键字段含义: + +- Name:工具名称。 +- Version:工具版本号。 +- Summary:工具概述。 +- Location:工具安装路径。 +- Requires:工具依赖。 + ## 工具使用 安装msprobe工具后,可以按照如下思路选择合适的子工具进行精度调试: diff --git a/debug/accuracy_tools/msprobe/config/README.md b/debug/accuracy_tools/msprobe/config/README.md index 7b91bd26f16..7d11a365253 100644 --- a/debug/accuracy_tools/msprobe/config/README.md +++ b/debug/accuracy_tools/msprobe/config/README.md @@ -2,13 +2,38 @@ 当前配置文件主要为PrecisionDebugger接口执行dump或无标杆比对操作时调用的配置,当PrecisionDebugger接口未指定该配置文件时,使用该文件的默认配置。配置文件详见[config.json](./config.json)。 +当在环境上安装msprobe工具后,config.json文件位置可通过如下方式查找: + +查找msprobe工具安装路径。 + +``` +pip show mindstudio-probe +``` + +输出结果如下示例: + +``` +Name: mindstudio-probe +Version: 1.0 +Summary: This is a pytorch precision comparison tools +Home-page: +Author: +Author-email: +License: +Location: /home/xx/anaconda3/envs/pt21py38/lib/python3.8/site-packages +Requires: numpy, openpyxl, pandas, pyyaml, rich, tqdm, wheel +Required-by: +``` + +Location字段为msprobe工具的安装路径,那么config.json文件位置为/home/xx/anaconda3/envs/pt21py38/lib/python3.8/site-packages/msprobe/config + ## 参数说明 ### **通用配置参数** | 参数名 | 说明 | 是否必选 | | ----------------- | ------------------------------------------------------------ | -------- | -| task | dump的任务类型,str类型。可取值"free_benchmark"(无标杆比对,仅PyTorch场景支持)、"statistics"(仅dump API统计信息,默认值)、"tensor"(dump API统计信息和完全复刻整网的API运行情况的真实数据)、"overflow_check"(溢出检测)。配置示例:"task": "tensor"。根据task参数取值的不同,可以配置不同场景参数,详见:“**task配置为free_benchmark**”,“**task配置为statistics**”,“**task配置为tensor**”,“**task配置为overflow_check**”。 | 否 | +| task | dump的任务类型,str类型。可取值:
"free_benchmark"(无标杆比对,仅PyTorch场景支持)。
"statistics"(仅dump API统计信息,默认值)。
"tensor"(dump API统计信息和完全复刻整网的API运行情况的真实数据)。
"overflow_check"(溢出检测,仅PyTorch和MindSpore静态图场景支持)。
"run_ut"(精度预检配置,仅PyTorch场景支持)。
配置示例:"task": "tensor"。
根据task参数取值的不同,可以配置不同场景参数,详见:“**task配置为free_benchmark**”,“**task配置为statistics**”,“**task配置为tensor**”,“**task配置为overflow_check**”,“**task配置为run_ut**”。 | 否 | | dump_path | 设置dump数据目录路径,str类型。配置示例:"dump_path": "./dump_path"。MindSpore场景仅支持绝对路径。 | 是 | | rank | 指定对某张卡上的数据进行dump,list[int]类型,默认未配置(表示dump所有卡的数据),应配置为大于等于0的整数,且须配置实际可用的Rank ID。配置示例:"rank": [1]。
对于PyTorch场景,Rank ID从0开始计数,最大取值为所有节点可用卡总数-1,若所配置的值大于实际训练所运行的卡的Rank ID,则dump数据为空,比如当前环境Rank ID为0到7,实际训练运行0到3卡,此时若配置Rank ID为4或不存在的10等其他值,此时dump数据为空。
对于MindSpore场景,所有节点的Rank ID均从0开始计数,最大取值为每个节点可用卡总数-1,config.json配置一次rank参数对所有节点同时生效。 | 否 | | step | 指定dump某个step的数据,list[int]类型。默认未配置,表示dump所有step数据。dump特定step时,须指定为训练脚本中存在的step。step为list格式,可配置逐个step,例如:"step": [0,1,2]。 | 否 | @@ -85,6 +110,18 @@ task配置为free_benchmark时,开启**无标杆比对**,在NPU环境下通 | overflow_nums | 控制溢出次数,int类型,仅PyTorch场景支持,表示第N次溢出时,停止训练,过程中检测到溢出API对应kernel数据均dump。配置示例:"overflow_nums": 3。默认为1,即检测到1次溢出,训练停止,配置为-1时,表示持续检测溢出直到训练结束。 | 否 | | check_mode | MindSpore场景kernel级别的溢出检测,str类型,可取值"aicore"(开启AI Core的溢出检测)、"atomic"(开启Atomic的溢出检测)、"all"(开启AI Core和Atomic的溢出检测,默认值)。配置示例"check_mode": "aicore"。 | 否 | +### task配置为run_ut + +仅PyTorch场景支持。 + +| 参数名称 | 说明 | 是否必选 | +| --------------- | ------------------------------------------------------------ | -------- | +| white_list | API dump白名单,仅对指定的API进行dump。配置示例:"white_list": ["conv1d", "conv2d"]。默认未配置白名单,即dump全量API数据。 | 否 | +| black_list | API dump黑名单,被指定的API不进行dump。配置示例:"black_list": ["conv1d", "conv2d"]。默认未配置黑名单,即dump全量API数据。 | 否 | +| error_data_path | 配置保存精度未达标的API输入输出数据路径,默认为当前路径。配置示例"error_data_path": "./"。 | 否 | + +说明:white_list和black_list同时配置时,二者配置的API名单若无交集,则白名单生效,若API名单存在交集,则白名单排除的部分以及交集的API不进行dump。 + ## 配置示例 以下示例包含当前支持的所有场景可配置的完整参数。 @@ -180,6 +217,27 @@ task配置为free_benchmark时,开启**无标杆比对**,在NPU环境下通 } ``` +### PyTorch场景task配置为run_ut + +```json +{ + "task": "run_ut", + "dump_path": "/home/data_dump", + "rank": [], + "step": [], + "level": "L1", + "seed": 1234, + "is_deterministic": false, + "enable_dataloader": false, + + "run_ut": { + "white_list": [], + "black_list": [], + "error_data_path": "./" + } +} +``` + ### MindSpore场景task配置为statistics ```json diff --git a/debug/accuracy_tools/msprobe/core/common/const.py b/debug/accuracy_tools/msprobe/core/common/const.py index df82455a676..eff7b8be8ad 100644 --- a/debug/accuracy_tools/msprobe/core/common/const.py +++ b/debug/accuracy_tools/msprobe/core/common/const.py @@ -16,6 +16,7 @@ class Const: OFF = 'OFF' BACKWARD = 'backward' FORWARD = 'forward' + PRIMITIVE_PREFIX = 'Primitive' DEFAULT_LIST = [] DEFAULT_PATH = './' WHITE_LIST = 'white_list' @@ -45,6 +46,7 @@ class Const: PT_SUFFIX = ".pt" ONE_GB = 1073741824 # 1 * 1024 * 1024 * 1024 TEN_GB = 10737418240 # 10 * 1024 * 1024 * 1024 + ONE_MB = 1048576 # 1 * 1024 * 1024 FILE_PATTERN = r'^[a-zA-Z0-9_./-]+$' DISTRIBUTED_PREFIX_LENGTH = 60 # env dump path @@ -80,12 +82,12 @@ class Const: INT_TYPE = [np.int32, np.int64] NPU = 'NPU' DISTRIBUTED = 'Distributed' - + INPLACE_LIST = [ "broadcast", "all_reduce", "reduce", "all_gather", "gather", "scatter", "reduce_scatter", - "_reduce_scatter_base", "_all_gather_base", "send", "recv", "irecv", "isend", "all_to_all_single" + "_reduce_scatter_base", "_all_gather_base", "send", "recv", "irecv", "isend", "all_to_all_single", "all_to_all" ] - + CONVERT = { "int32_to_int64": ["torch.int32", "torch.int64"], } @@ -252,3 +254,17 @@ class OverflowConst: OVERFLOW_DEBUG_MODE_ENABLE = "OVERFLOW_DEBUG_MODE_ENABLE" OVERFLOW_ORIGINAL_MODE = 0 OVERFLOW_DEBUG_MODE = 1 + + +class MsConst: + CELL = "cell" + API = "api" + KERNEL = "kernel" + TOOL_LEVEL_DICT = { + "L0": CELL, + "L1": API, + "L2": KERNEL + } + PYNATIVE_MODE = "pynative" + GRAPH_GE_MODE = "graph_ge" + GRAPH_KBYK_MODE = "graph_kbyk" diff --git a/debug/accuracy_tools/msprobe/core/common/utils.py b/debug/accuracy_tools/msprobe/core/common/utils.py index 32aba8d8af4..6e901deb9eb 100644 --- a/debug/accuracy_tools/msprobe/core/common/utils.py +++ b/debug/accuracy_tools/msprobe/core/common/utils.py @@ -148,7 +148,7 @@ def check_summary_only_valid(summary_only): return summary_only -def check_compare_param(input_parma, output_path, stack_mode=False, summary_compare=False, md5_compare=False): +def check_compare_param(input_parma, output_path, summary_compare=False, md5_compare=False): if not (isinstance(input_parma, dict) and isinstance(output_path, str)): logger.error("Invalid input parameters") raise CompareException(CompareException.INVALID_PARAM_ERROR) @@ -318,15 +318,6 @@ def execute_command(cmd): raise CompareException(CompareException.INVALID_DATA_ERROR) -def save_numpy_data(file_path, data): - """ - save_numpy_data - """ - if not os.path.exists(os.path.dirname(file_path)): - os.makedirs(os.path.dirname(file_path)) - np.save(file_path, data) - - def parse_value_by_comma(value): """ parse value by comma, like '1,2,4,8' diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py index ed38eba008b..d6c15e101e7 100644 --- a/debug/accuracy_tools/msprobe/core/common_config.py +++ b/debug/accuracy_tools/msprobe/core/common_config.py @@ -18,24 +18,27 @@ class CommonConfig: def _check_config(self): if self.task and self.task not in Const.TASK_LIST: - logger.error_log_with_exp( - "task is invalid, it should be one of {}".format(Const.TASK_LIST), MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("task is invalid, it should be one of {}".format(Const.TASK_LIST), + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.rank is not None and not isinstance(self.rank, list): - logger.error_log_with_exp("rank is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("rank is invalid, it should be a list", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.step is not None and not isinstance(self.step, list): - logger.error_log_with_exp("step is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("step is invalid, it should be a list", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.level and self.level not in Const.LEVEL_LIST: - logger.error_log_with_exp( - "level is invalid, it should be one of {}".format(Const.LEVEL_LIST), MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("level is invalid, it should be one of {}".format(Const.LEVEL_LIST), + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.seed is not None and not isinstance(self.seed, int): - logger.error_log_with_exp("seed is invalid, it should be an integer", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("seed is invalid, it should be an integer", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if not isinstance(self.is_deterministic, bool): - logger.error_log_with_exp( - "is_deterministic is invalid, it should be a boolean", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("is_deterministic is invalid, it should be a boolean", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if not isinstance(self.enable_dataloader, bool): - logger.error_log_with_exp( - "enable_dataloader is invalid, it should be a boolean", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) - + logger.error_log_with_exp("enable_dataloader is invalid, it should be a boolean", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + class BaseConfig: def __init__(self, json_config): @@ -44,15 +47,17 @@ class BaseConfig: self.data_mode = json_config.get('data_mode') self.backward_input = json_config.get("backward_input") self.file_format = json_config.get("file_format") - self.summary_mode = json_config.get("summary_mode") - self.overflow_num = json_config.get("overflow_num") + self.summary_mode = json_config.get("summary_mode") + self.overflow_nums = json_config.get("overflow_nums") self.check_mode = json_config.get("check_mode") def check_config(self): if self.scope is not None and not isinstance(self.scope, list): - logger.error_log_with_exp("scope is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("scope is invalid, it should be a list", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.list is not None and not isinstance(self.list, list): - logger.error_log_with_exp("list is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) + logger.error_log_with_exp("list is invalid, it should be a list", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) if self.data_mode is not None and not isinstance(self.data_mode, list): - logger.error_log_with_exp("data_mode is invalid, it should be a list", MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) - + logger.error_log_with_exp("data_mode is invalid, it should be a list", + MsprobeException(MsprobeException.INVALID_PARAM_ERROR)) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py index 800a2b81c2f..de2b93c206d 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_collector.py @@ -1,7 +1,6 @@ - import os -from msprobe.core.data_dump.scope import build_scope, ListScope +from msprobe.core.data_dump.scope import build_scope, ListScope from msprobe.core.data_dump.json_writer import DataWriter from msprobe.core.common.log import logger from msprobe.core.common.const import Const @@ -21,7 +20,8 @@ class DataCollector: self.config = config self.data_writer = DataWriter() self.data_processor = DataProcessorFactory.create_processor(self.config, self.data_writer) - self.module_processor = DataProcessorFactory.get_module_processor(self.config.framework) if self.config.framework == Const.PT_FRAMEWORK else None + self.module_processor = DataProcessorFactory.get_module_processor(self.config.framework) \ + if self.config.framework == Const.PT_FRAMEWORK else None self.module_count = {} if self.config.task == Const.FREE_BENCHMARK: self.scope = build_scope(ListScope, self.config.scope, self.config.list) @@ -35,7 +35,7 @@ class DataCollector: @property def dump_file_path(self): return self.data_writer.dump_file_path - + @staticmethod def check_scope_and_pid(scope, name, pid): return (not scope or scope.check(name)) and pid == os.getpid() @@ -43,10 +43,10 @@ class DataCollector: @staticmethod def is_inplace(module): return getattr(module, "op_is_inplace", False) - + def if_return_forward_new_output(self): return self.data_processor.if_return_forward_new_output() - + def get_forward_new_output(self): return self.data_processor.get_forward_new_output() @@ -88,8 +88,11 @@ class DataCollector: else: data_info = self.data_processor.analyze_forward_inplace(name, module_input_output) if self.config.level == "L2": - return + return self.data_writer.update_stack(self.data_processor.analyze_api_call_stack(name)) + if self.data_processor.stop_run(): + self.handle_data(name, data_info, use_buffer=False) + raise Exception("[msprobe] exit") self.handle_data(name, data_info) def backward_data_collect(self, name, module, pid, module_input_output): @@ -98,6 +101,25 @@ class DataCollector: return data_info = self.data_processor.analyze_backward(name, module, module_input_output) + if self.data_processor.stop_run(): + self.handle_data(name, data_info, use_buffer=False) + raise Exception("[msprobe] exit") + self.handle_data(name, data_info) + + def backward_input_data_collect(self, name, module, pid, module_input_output): + self.update_construct(name) + if not self.check_scope_and_pid(self.scope, name, pid): + return + + data_info = self.data_processor.analyze_backward_input(name, module, module_input_output) + self.handle_data(name, data_info) + + def backward_output_data_collect(self, name, module, pid, module_input_output): + self.update_construct(name) + if not self.check_scope_and_pid(self.scope, name, pid): + return + + data_info = self.data_processor.analyze_backward_output(name, module, module_input_output) self.handle_data(name, data_info) def update_construct(self, name): @@ -105,12 +127,15 @@ class DataCollector: self.data_writer.update_construct({name: self.module_processor.api_parent_node}) self.data_writer.update_construct(self.module_processor.module_node) - def handle_data(self, name, data_info): + def handle_data(self, name, data_info, use_buffer=True): msg = f"msProbe is collecting data on {name}. " if data_info: msg = self.update_data(data_info, msg) logger.info(msg) - self.data_writer.flush_data_when_buffer_is_full() + if use_buffer: + self.data_writer.flush_data_when_buffer_is_full() + else: + self.write_json() def module_count_func(self, name, name_template): module_name = name.split(Const.SEP)[-3] @@ -135,6 +160,6 @@ class DataCollector: def update_dump_paths(self, *args): self.data_writer.update_dump_paths(*args) self.data_writer.initialize_json_file(task=self.config.task, level=self.config.level) - + def update_iter(self, current_iter): self.data_processor.update_iter(current_iter) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py index 5d901291973..13134d61980 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/base.py @@ -35,11 +35,26 @@ class ModuleBackwardInputsOutputs: @property def grad_input_tuple(self): return convert_tuple(self.grad_input) - + @property def grad_output_tuple(self): - return convert_tuple(self.grad_output) + return convert_tuple(self.grad_output) + +@dataclass +class ModuleBackwardInputs: + grad_input: Optional[Tuple] + + @property + def grad_input_tuple(self): + return convert_tuple(self.grad_input) + +@dataclass +class ModuleBackwardOutputs: + grad_output: Optional[Tuple] + @property + def grad_output_tuple(self): + return convert_tuple(self.grad_output) class TensorStatInfo: def __init__(self, max_val=None, min_val=None, mean_val=None, norm_val=None): @@ -53,7 +68,7 @@ class BaseDataProcessor: _recursive_key_stack = [] special_type = (np.integer, np.floating, np.bool_, np.complexfloating, np.str_, np.byte, np.unicode_, bool, int, float, str, slice) - + def __init__(self, config, data_writer): self.data_writer = data_writer self.config = config @@ -65,11 +80,11 @@ class BaseDataProcessor: self.current_iter = 0 self._return_forward_new_output = False self._forward_new_output = None - + @property def data_path(self): return self.data_writer.dump_tensor_data_dir - + @staticmethod def analyze_api_call_stack(name): stack_str = [] @@ -87,7 +102,7 @@ class BaseDataProcessor: stack_str.append(stack_line) stack_info_struct = {name: stack_str} return stack_info_struct - + @staticmethod def _convert_numpy_to_builtin(arg): type_mapping = { @@ -103,26 +118,15 @@ class BaseDataProcessor: if isinstance(arg, numpy_type): return builtin_type(arg), type(arg).__name__ return arg, '' - + @staticmethod def _analyze_numpy(value, numpy_type): return {"type": numpy_type, "value": value} - - @staticmethod - def _analyze_builtin(arg): - single_arg = {} - if isinstance(arg, slice): - single_arg.update({"type": "slice"}) - single_arg.update({"value": [arg.start, arg.stop, arg.step]}) - else: - single_arg.update({"type": type(arg).__name__}) - single_arg.update({"value": arg}) - return single_arg - + @classmethod def get_special_types(cls): return cls.special_type - + @classmethod def recursive_apply_transform(cls, args, transform): if isinstance(args, cls.get_special_types()): @@ -177,13 +181,14 @@ class BaseDataProcessor: return (Const.ALL in self.config.data_mode or forward_backward in self.config.data_mode or input_output in self.config.data_mode) - - def analyze_pre_forward(self, name, module,module_input_output: ModuleForwardInputsOutputs): + + def analyze_pre_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs): pass - + def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs): api_info_struct = {} - if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT): # check whether data_mode contains forward or input + # check whether data_mode contains forward or input + if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT): api_info_struct[name] = {} self.api_data_category = Const.INPUT args_info_list = self.analyze_element(module_input_output.args_tuple) @@ -192,13 +197,14 @@ class BaseDataProcessor: kwargs_info_list = self.analyze_element(module_input_output.kwargs) api_info_struct[name][Const.INPUT_KWARGS] = kwargs_info_list - if self.is_dump_for_data_mode(Const.FORWARD, Const.OUTPUT): # check whether data_mode contains forward or output + # check whether data_mode contains forward or output + if self.is_dump_for_data_mode(Const.FORWARD, Const.OUTPUT): api_info_struct[name] = api_info_struct.get(name, {}) self.api_data_category = Const.OUTPUT output_info_list = self.analyze_element(module_input_output.output_tuple) api_info_struct[name][Const.OUTPUT] = output_info_list return api_info_struct - + def analyze_pre_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs): api_info_struct = {} if self.is_dump_for_data_mode(Const.FORWARD, Const.INPUT): @@ -210,7 +216,7 @@ class BaseDataProcessor: kwargs_info_list = self.analyze_element(module_input_output.kwargs) api_info_struct[name][Const.INPUT_KWARGS] = kwargs_info_list return api_info_struct - + def analyze_forward_inplace(self, name, module_input_output: ModuleForwardInputsOutputs): concat_args = module_input_output.concat_args_and_kwargs() api_info_struct = {} @@ -220,26 +226,55 @@ class BaseDataProcessor: output_info_list = self.analyze_element(concat_args) api_info_struct[name][Const.OUTPUT] = output_info_list return api_info_struct - + def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs): api_info_struct = {} - if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): + if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): api_info_struct[name] = {} - self.api_data_category = Const.OUTPUT + self.api_data_category = Const.INPUT input_info_list = self.analyze_element(module_input_output.grad_input_tuple) - api_info_struct[name][Const.GRAD_INPUT] = input_info_list + api_info_struct[name][Const.INPUT] = input_info_list - if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): + if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): api_info_struct[name] = api_info_struct.get(name, {}) - self.api_data_category = Const.INPUT + self.api_data_category = Const.OUTPUT output_info_list = self.analyze_element(module_input_output.grad_output_tuple) + api_info_struct[name][Const.OUTPUT] = output_info_list + + return api_info_struct + + def analyze_backward_input(self, name, module, module_input_output: ModuleBackwardInputsOutputs): + """ + Analyze and save backward input gradients. + """ + api_info_struct = {} + if self.is_dump_for_data_mode(Const.BACKWARD, Const.INPUT): + api_info_struct[name] = {} + self.api_data_category = Const.OUTPUT + # self.api_data_category = Const.INPUT + output_info_list = self.analyze_element(module_input_output.grad_input_tuple) api_info_struct[name][Const.GRAD_OUTPUT] = output_info_list + return api_info_struct + def analyze_backward_output(self, name, module, module_input_output: ModuleBackwardInputsOutputs): + """ + Analyze and save backward output gradients. + """ + api_info_struct = {} + if self.is_dump_for_data_mode(Const.BACKWARD, Const.OUTPUT): + api_info_struct[name] = {} + self.api_data_category = Const.INPUT + # self.api_data_category = Const.OUTPUT + input_info_list = self.analyze_element(module_input_output.grad_output_tuple) + api_info_struct[name][Const.GRAD_INPUT] = input_info_list return api_info_struct def get_save_file_path(self, suffix): - file_format = Const.PT_SUFFIX if self.config.framework == Const.PT_FRAMEWORK else Const.NUMPY_SUFFIX + file_format = Const.PT_SUFFIX if self.config.framework == Const.PT_FRAMEWORK else Const.NUMPY_SUFFIX dump_data_name = (self.current_api_or_module_name + Const.SEP + self.api_data_category + Const.SEP + suffix + file_format) file_path = os.path.join(self.data_writer.dump_tensor_data_dir, dump_data_name) - return dump_data_name, file_path \ No newline at end of file + return dump_data_name, file_path + + def stop_run(self): + return False diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py index 86ef2115fb2..ad74acdeeba 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/factory.py @@ -4,7 +4,7 @@ from msprobe.core.common.const import Const class DataProcessorFactory: _data_processor = {} _module_processor = {} - + @classmethod def register_processor(cls, framework, task, processor_class): key = (framework, task) @@ -13,7 +13,7 @@ class DataProcessorFactory: @classmethod def register_module_processor(cls, framework, processor_class): cls._module_processor[framework] = processor_class - + @classmethod def get_module_processor(cls, framework): processor_class = cls._module_processor.get(framework) @@ -39,7 +39,7 @@ class DataProcessorFactory: TensorDataProcessor as PytorchTensorDataProcessor, OverflowCheckDataProcessor as PytorchOverflowCheckDataProcessor, FreeBenchmarkDataProcessor as PytorchFreeBenchmarkDataProcessor, - KernelDumpDataProcessor as PytorchKernelDumpDataProcessor + KernelDumpDataProcessor as PytorchKernelDumpDataProcessor ) from ....pytorch.module_processer import ModuleProcesser cls.register_processor(Const.PT_FRAMEWORK, Const.STATISTICS, PytorchStatisticsDataProcessor) @@ -47,11 +47,13 @@ class DataProcessorFactory: cls.register_processor(Const.PT_FRAMEWORK, Const.OVERFLOW_CHECK, PytorchOverflowCheckDataProcessor) cls.register_processor(Const.PT_FRAMEWORK, Const.FREE_BENCHMARK, PytorchFreeBenchmarkDataProcessor) cls.register_processor(Const.PT_FRAMEWORK, Const.KERNEL_DUMP, PytorchKernelDumpDataProcessor) - cls.register_module_processor(Const.PT_FRAMEWORK, ModuleProcesser) + cls.register_module_processor(Const.PT_FRAMEWORK, ModuleProcesser) elif framework == Const.MS_FRAMEWORK: from .mindspore_processor import ( StatisticsDataProcessor as MindsporeStatisticsDataProcessor, - TensorDataProcessor as MindsporeTensorDataProcessor + TensorDataProcessor as MindsporeTensorDataProcessor, + OverflowCheckDataProcessor as MindsporeOverflowCheckDataProcessor ) cls.register_processor(Const.MS_FRAMEWORK, Const.STATISTICS, MindsporeStatisticsDataProcessor) cls.register_processor(Const.MS_FRAMEWORK, Const.TENSOR, MindsporeTensorDataProcessor) + cls.register_processor(Const.MS_FRAMEWORK, Const.OVERFLOW_CHECK, MindsporeOverflowCheckDataProcessor) diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py index 7533e2ee0de..b28817e4aa7 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/mindspore_processor.py @@ -19,7 +19,8 @@ from mindspore import ops import numpy as np from msprobe.core.common.const import Const -from msprobe.core.data_dump.data_processor.base import BaseDataProcessor, TensorStatInfo +from msprobe.core.data_dump.data_processor.base import (BaseDataProcessor, TensorStatInfo, + ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs) from msprobe.core.common.file_check import path_len_exceeds_limit, change_mode, FileCheckConst from msprobe.mindspore.dump.hook_cell.wrap_functional import load_ops_functions from msprobe.mindspore.common.utils import convert_bf16_to_fp32 @@ -30,7 +31,7 @@ from msprobe.mindspore.dump.hook_cell.api_registry import api_register class MindsporeDataProcessor(BaseDataProcessor): mindspore_special_type = tuple([ms.Tensor]) ops_func, mint_ops_func, _ = load_ops_functions() - + def __init__(self, config, data_writer): super().__init__(config, data_writer) self.mindspore_object_key = { @@ -47,18 +48,35 @@ class MindsporeDataProcessor(BaseDataProcessor): @staticmethod def analyze_dtype_in_kwargs(element): return {"type": "mindspore.dtype", "value": str(element)} - + + @staticmethod + def _analyze_builtin(arg): + single_arg = {} + if isinstance(arg, slice): + single_arg.update({"type": "slice"}) + # slice参数中可能存在tensor类型,json序列化,需要转换为python数值类型 + values = [ + value if not isinstance(value, ms.Tensor) else value.item() + for value in [arg.start, arg.stop, arg.step] + ] + single_arg.update({"value": values}) + else: + single_arg.update({"type": type(arg).__name__}) + single_arg.update({"value": arg}) + return single_arg + @classmethod def get_special_types(cls): return super().get_special_types() + cls.mindspore_special_type - + def get_stat_info(self, data): tensor_stat = TensorStatInfo() if data.numel() == 0: return tensor_stat elif data.dtype == ms.bool_: - tensor_stat.max = self.mint_ops_func["max"](data).item() - tensor_stat.min = self.mint_ops_func["min"](data).item() + data_np = data.asnumpy() + tensor_stat.max = np.max(data_np) + tensor_stat.min = np.min(data_np) elif not data.shape: tensor_stat.max = tensor_stat.min = tensor_stat.mean = tensor_stat.norm = data.item() elif data.dtype == ms.complex64 or data.dtype == ms.complex128: @@ -90,7 +108,7 @@ class MindsporeDataProcessor(BaseDataProcessor): if isinstance(element, (bool, int, float, str, slice)): return self._analyze_builtin(element) - return None + return {} def analyze_element(self, element): return self.recursive_apply_transform(element, self.analyze_single_element) @@ -129,3 +147,61 @@ class TensorDataProcessor(MindsporeDataProcessor): else: logger.warning(f'The file path {file_path} length exceeds limit.') return single_arg + + +class OverflowCheckDataProcessor(MindsporeDataProcessor): + __slots__ = ["cached_tensors_and_file_paths"] + + def __init__(self, config, data_writer): + super().__init__(config, data_writer) + self.cached_tensors_and_file_paths = {} + self.real_overflow_dump_times = 0 + self.overflow_nums = config.overflow_nums + + def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs): + self.has_overflow = False + api_info_struct = super().analyze_forward(name, module, module_input_output) + self.maybe_save_overflow_data() + return api_info_struct if self.has_overflow else None + + def analyze_backward(self, name, module, module_input_output: ModuleBackwardInputsOutputs): + self.has_overflow = False + api_info_struct = super().analyze_backward(name, module, module_input_output) + self.maybe_save_overflow_data() + return api_info_struct if self.has_overflow else None + + def maybe_save_overflow_data(self): + if self.has_overflow: + for file_path, tensor in self.cached_tensors_and_file_paths.items(): + tensor = convert_bf16_to_fp32(tensor) + np.save(file_path, tensor.asnumpy()) + change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) + self.real_overflow_dump_times += 1 + self.cached_tensors_and_file_paths = {} + + def stop_run(self): + if self.overflow_nums == -1: + return False + if self.real_overflow_dump_times >= self.overflow_nums: + logger.warning(f"[msprobe] 超过预设溢出次数 当前溢出次数: {self.real_overflow_dump_times}") + return True + return False + + def _analyze_maybe_overflow_tensor(self, tensor_json): + if tensor_json['Max'] is None: + return + if np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']): + self.has_overflow = True + if np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min']): + self.has_overflow = True + + def _analyze_tensor(self, tensor, suffix): + dump_data_name, file_path = self.get_save_file_path(suffix) + if not path_len_exceeds_limit(file_path): + self.cached_tensors_and_file_paths.update({file_path: tensor}) + else: + logger.warning(f'The file path {file_path} length exceeds limit.') + single_arg = super()._analyze_tensor(tensor, suffix) + self._analyze_maybe_overflow_tensor(single_arg) + single_arg.update({"data_name": dump_data_name}) + return single_arg diff --git a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py index f307909a416..007fec80964 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/data_processor/pytorch_processor.py @@ -15,8 +15,9 @@ from msprobe.pytorch.free_benchmark import FreeBenchmarkCheck, UnequalRow try: import torch_npu + is_gpu = False except ImportError: - pass + is_gpu = True class PytorchDataProcessor(BaseDataProcessor): @@ -77,6 +78,38 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_stat.norm = torch._C._VariableFunctionsClass.norm(data_clone).item() return tensor_stat + @staticmethod + def handle_tensor_extremum_nan_inf(tensor, operator): + data_clone = tensor.detach() + data_nan = torch._C._VariableFunctionsClass.isnan(data_clone) + if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel(): + return float('nan') + finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone) + if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0: + finite_values = data_clone[finite_mask] + return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(finite_values).item() + else: + data_no_nan = data_clone[~data_nan] + return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \ + torch._C._VariableFunctionsClass.min(data_no_nan).item() + + @staticmethod + def _analyze_builtin(arg): + single_arg = {} + if isinstance(arg, slice): + single_arg.update({"type": "slice"}) + # slice参数中可能存在tensor类型,json序列化,需要转换为python数值类型 + values = [ + value if not isinstance(value, torch.Tensor) else value.item() + for value in [arg.start, arg.stop, arg.step] + ] + single_arg.update({"value": values}) + else: + single_arg.update({"type": type(arg).__name__}) + single_arg.update({"value": arg}) + return single_arg + @staticmethod def _analyze_torch_size(arg): return {"type": "torch.Size", "value": list(arg)} @@ -97,7 +130,7 @@ class PytorchDataProcessor(BaseDataProcessor): return self._analyze_tensor(element, Const.SEP.join(suffix_stack)) if isinstance(element, (bool, int, float, str, slice)): return self._analyze_builtin(element) - return None + return {} def analyze_element(self, element): return self.recursive_apply_transform(element, self.analyze_single_element) @@ -113,9 +146,17 @@ class PytorchDataProcessor(BaseDataProcessor): tensor_json.update({"Mean": tensor_stat.mean}) tensor_json.update({"Norm": tensor_stat.norm}) tensor_json.update({"requires_grad": tensor.requires_grad}) - if self.config.summary_mode == "md5": + + if tensor_stat.max is not None: + if np.isinf(tensor_stat.max) or np.isnan(tensor_stat.max): + tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "max") + if tensor_stat.min is not None: + if np.isinf(tensor_stat.min) or np.isnan(tensor_stat.min): + tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(tensor, "min") + + if self.config.summary_mode == Const.MD5: tensor_md5 = self.get_md5_for_tensor(tensor) - tensor_json.update({"md5": tensor_md5}) + tensor_json.update({Const.MD5: tensor_md5}) return tensor_json @@ -143,7 +184,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): super().__init__(config, data_writer) self.cached_tensors_and_file_paths = {} self.real_overflow_dump_times = 0 - self.overflow_nums = config.overflow_num + self.overflow_nums = config.overflow_nums self.bits_for_overflow = 8 @staticmethod @@ -151,21 +192,6 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): overflow_mode = os.getenv(OverflowConst.OVERFLOW_DEBUG_MODE_ENABLE, Const.ENV_DISABLE) return overflow_mode == Const.ENV_ENABLE - @staticmethod - def handle_tensor_extremum_nan_inf(data_clone, operator): - data_nan = torch._C._VariableFunctionsClass.isnan(data_clone) - if int(torch._C._VariableFunctionsClass.sum(data_nan)) == data_clone.numel(): - return float('nan') - finite_mask = torch._C._VariableFunctionsClass.isfinite(data_clone) - if int(torch._C._VariableFunctionsClass.sum(finite_mask)) > 0: - finite_values = data_clone[finite_mask] - return torch._C._VariableFunctionsClass.max(finite_values).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(finite_values).item() - else: - data_no_nan = data_clone[~data_nan] - return torch._C._VariableFunctionsClass.max(data_no_nan).item() if operator == 'max' else \ - torch._C._VariableFunctionsClass.min(data_no_nan).item() - def analyze_forward(self, name, module, module_input_output: ModuleForwardInputsOutputs): self.has_overflow = False api_info_struct = super().analyze_forward(name, module, module_input_output) @@ -211,16 +237,13 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): else: torch_npu._C._clear_overflow_npu() - def _analyze_maybe_overflow_tensor(self, tensor_json, tensor): - data_clone = tensor.detach() - if hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan(): + def _analyze_maybe_overflow_tensor(self, tensor_json): + if is_gpu or (hasattr(torch_npu._C, '_npu_is_support_inf_nan') and torch_npu._C._npu_is_support_inf_nan()): if tensor_json['Max'] is None: return if np.isinf(tensor_json['Max']) or np.isnan(tensor_json['Max']): - tensor_json['Max_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(data_clone, "max") self.has_overflow = True if np.isinf(tensor_json['Min']) or np.isnan(tensor_json['Min']): - tensor_json['Min_except_inf_nan'] = self.handle_tensor_extremum_nan_inf(data_clone, "min") self.has_overflow = True else: self.has_overflow = self.check_overflow_npu() @@ -234,7 +257,7 @@ class OverflowCheckDataProcessor(PytorchDataProcessor): else: logger.warning(f'The file path {file_path} length exceeds limit.') single_arg = super()._analyze_tensor(tensor, suffix) - self._analyze_maybe_overflow_tensor(single_arg, tensor) + self._analyze_maybe_overflow_tensor(single_arg) single_arg.update({"data_name": dump_data_name}) return single_arg diff --git a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py index c4b7fc11ec4..112e45171ef 100644 --- a/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py +++ b/debug/accuracy_tools/msprobe/core/data_dump/json_writer.py @@ -4,7 +4,7 @@ import fcntl import json from pathlib import Path -from msprobe.core.common.file_check import change_mode +from msprobe.core.common.file_check import change_mode, FileOpen from msprobe.core.common.log import logger from msprobe.core.common.const import Const, FileCheckConst @@ -30,20 +30,20 @@ class DataWriter: return is_exists = os.path.exists(file_path) append = "a+" if is_exists else "w+" - with os.fdopen( - os.open(file_path, Const.WRITE_FLAGS, FileCheckConst.DATA_FILE_AUTHORITY), append, newline="" - ) as csv_file: + with FileOpen(file_path, append) as csv_file: spawn_writer = csv.writer(csv_file) if not is_exists: spawn_writer.writerow(result_header) spawn_writer.writerows([result,]) + is_new_file = not is_exists + if is_new_file: + change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) def initialize_json_file(self, **kwargs): kwargs.update({"dump_data_dir": self.dump_tensor_data_dir, Const.DATA: {}}) - with os.fdopen( - os.open(self.dump_file_path, Const.OVERWRITE_FLAGS, FileCheckConst.DATA_FILE_AUTHORITY), 'w' - ) as f: + with FileOpen(self.dump_file_path, 'w') as f: json.dump(kwargs, f) + change_mode(self.dump_file_path, FileCheckConst.DATA_FILE_AUTHORITY) if os.path.exists(self.stack_file_path): os.remove(self.stack_file_path) @@ -83,7 +83,7 @@ class DataWriter: def write_data_json(self, file_path): logger.info(f"dump.json is at {os.path.dirname(os.path.dirname(file_path))}. ") if Path(file_path).exists() and os.path.getsize(file_path) > 0: - with open(file_path, "r+") as f: + with FileOpen(file_path, "r+") as f: fcntl.flock(f, fcntl.LOCK_EX) data_to_write = json.load(f) fcntl.flock(f, fcntl.LOCK_UN) @@ -91,7 +91,7 @@ class DataWriter: self.init_json['data_path'] = self.dump_tensor_data_dir data_to_write = self.init_json data_to_write[Const.DATA].update(self.cache_data[Const.DATA]) - with open(file_path, 'w+') as f: + with FileOpen(file_path, 'w+') as f: fcntl.flock(f, fcntl.LOCK_EX) json.dump(data_to_write, f, indent=1) fcntl.flock(f, fcntl.LOCK_UN) @@ -99,13 +99,13 @@ class DataWriter: self.cache_data[Const.DATA].clear() def write_stack_info_json(self, file_path): - with open(file_path, 'w+') as f: + with FileOpen(file_path, 'w+') as f: fcntl.flock(f, fcntl.LOCK_EX) json.dump(self.cache_stack, f, indent=1) fcntl.flock(f, fcntl.LOCK_UN) def write_construct_info_json(self, file_path): - with open(file_path, 'w+') as f: + with FileOpen(file_path, 'w+') as f: fcntl.flock(f, fcntl.LOCK_EX) json.dump(self.cache_construct, f, indent=1) fcntl.flock(f, fcntl.LOCK_UN) diff --git a/debug/accuracy_tools/msprobe/mindspore/common/utils.py b/debug/accuracy_tools/msprobe/mindspore/common/utils.py index d02f3819537..6abf0a1ee88 100644 --- a/debug/accuracy_tools/msprobe/mindspore/common/utils.py +++ b/debug/accuracy_tools/msprobe/mindspore/common/utils.py @@ -29,3 +29,16 @@ def convert_bf16_to_fp32(tensor): tensor = tensor.to(ms.float32) return tensor + +class MsprobeStep(ms.train.Callback): + + def __init__(self, debugger): + super(MsprobeStep, self).__init__() + self.debugger = debugger + + def on_train_step_begin(self, run_context): + self.debugger.start() + + def on_train_step_end(self, run_context): + self.debugger.stop() + self.debugger.step() diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py index 04d66d6a26e..23cb7294b8d 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/debugger_config.py @@ -1,14 +1,10 @@ import os + from msprobe.core.common.utils import Const +from msprobe.core.common.const import MsConst class DebuggerConfig: - convert_map = { - "L0": "cell", - "L1": "api", - "L2": 'kernel' - } - def __init__(self, common_config, task_config): self.dump_path = common_config.dump_path self.task = common_config.task @@ -16,12 +12,13 @@ class DebuggerConfig: self.step = [] if not common_config.step else common_config.step if not common_config.level: common_config.level = "L1" - self.level = DebuggerConfig.convert_map[common_config.level] + self.level = MsConst.TOOL_LEVEL_DICT.get(common_config.level, MsConst.API) self.level_ori = common_config.level self.list = [] if not task_config.list else task_config.list - self.scope =[] if not task_config.scope else task_config.scope - self.data_mode = [] if not task_config.data_mode else task_config.data_mode + self.scope = [] if not task_config.scope else task_config.scope + self.data_mode = [] if not task_config.data_mode else task_config.data_mode self.file_format = task_config.file_format + self.overflow_nums = 1 if not task_config.overflow_nums else task_config.overflow_nums self.check_mode = task_config.check_mode self.framework = Const.MS_FRAMEWORK self.summary_mode = task_config.summary_mode diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index 30f7162ff5c..40b44c57ec9 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -1,9 +1,12 @@ import os + import mindspore as ms + from msprobe.mindspore.service import Service from msprobe.mindspore.ms_config import parse_json_config from msprobe.mindspore.debugger.debugger_config import DebuggerConfig from msprobe.mindspore.task_handler_factory import TaskHandlerFactory +from msprobe.core.common.const import MsConst class PrecisionDebugger: @@ -14,6 +17,8 @@ class PrecisionDebugger: cls._instance = super().__new__(cls) cls._instance.initialized = False cls._instance.config = None + cls.service = None + cls.first_start = False return cls._instance def __init__(self, config_path=None): @@ -24,28 +29,47 @@ class PrecisionDebugger: common_config, task_config = parse_json_config(config_path) self.config = DebuggerConfig(common_config, task_config) self.initialized = True - self.service = Service(self.config) + + @staticmethod + def _get_execution_mode(): + if ms.get_context("mode") == ms.GRAPH_MODE: + if ms.context.get_jit_config().get("jit_level") == "O2" or ms.get_context("jit_level") == "O2": + return MsConst.GRAPH_GE_MODE + else: + return MsConst.GRAPH_KBYK_MODE + else: + return MsConst.PYNATIVE_MODE @classmethod - def start(cls): + def start(cls, target=None): instance = cls._instance if not instance: raise Exception("No instance of PrecisionDebugger found.") - if ms.get_context("mode") == ms.PYNATIVE_MODE and instance.config.level_ori == "L1": - instance.service.start() + + instance.config.execution_mode = instance._get_execution_mode() + if instance.config.execution_mode == MsConst.PYNATIVE_MODE and instance.config.level == MsConst.API: + if not instance.service: + instance.service = Service(instance.config) + instance.service.start(target) else: - handler = TaskHandlerFactory.create(instance.config) - handler.handle() + if not instance.first_start: + handler = TaskHandlerFactory.create(instance.config) + handler.handle() + + instance.first_start = True @classmethod def stop(cls): instance = cls._instance if not instance: raise Exception("PrecisionDebugger instance is not created.") - instance.service.stop() + if instance.service: + instance.service.stop() @classmethod def step(cls): - if not cls._instance: + instance = cls._instance + if not instance: raise Exception("PrecisionDebugger instance is not created.") - cls._instance.service.step() \ No newline at end of file + if instance.service: + instance.service.step() diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py new file mode 100644 index 00000000000..ad73bcd9119 --- /dev/null +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py @@ -0,0 +1,198 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import functools +import mindspore as ms +from mindspore import ops +from mindspore.common.tensor import Tensor +from msprobe.core.common.utils import Const +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs +from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ + HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP +from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor +from msprobe.core.common.utils import Const + +PRIMITIVE_PREFIX = "Primitive" + +class ApiRegistry: + def __init__(self): + self.tensor_ori_attr = {} + self.functional_ori_attr = {} + self.mint_ops_ori_attr = {} + self.mint_func_ops_ori_attr = {} + self.norm_inner_ops_ori_attr = {} + + self.tensor_hook_attr = {} + self.functional_hook_attr = {} + self.mint_ops_hook_attr = {} + self.mint_func_ops_hook_attr = {} + self.norm_inner_ops_hook_attr = {} + + self.norm_inner_ops = ["norm", "square", "sqrt", "is_complex"] + self.primitive_counters = {} + + @staticmethod + def store_ori_attr(ori_api_group, api_list, api_ori_attr): + for api in api_list: + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(ori_api_group, sub_module_name) + api_ori_attr[api] = getattr(sub_module, sub_op) + else: + api_ori_attr[api] = getattr(ori_api_group, api) + + @staticmethod + def set_api_attr(api_group, attr_dict): + for api, api_attr in attr_dict.items(): + if Const.SEP in api: + sub_module_name, sub_op = api.rsplit(Const.SEP, 1) + sub_module = getattr(api_group, sub_module_name, None) + if sub_module is not None: + setattr(sub_module, sub_op, api_attr) + else: + setattr(api_group, api, api_attr) + + def norm_inner_op_set_hook_func(self): + self.set_api_attr(ms.ops, self.norm_inner_ops_hook_attr) + + def norm_inner_op_set_ori_func(self): + self.set_api_attr(ms.ops, self.norm_inner_ops_ori_attr) + + def api_set_hook_func(self): + self.set_api_attr(ms.Tensor, self.tensor_hook_attr) + self.set_api_attr(ms.ops, self.functional_hook_attr) + self.set_api_attr(ms.mint, self.mint_ops_hook_attr) + self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_hook_attr) + + def api_set_ori_func(self): + self.set_api_attr(ms.Tensor, self.tensor_ori_attr) + self.set_api_attr(ms.ops, self.functional_ori_attr) + self.set_api_attr(ms.mint, self.mint_ops_ori_attr) + self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_ori_attr) + + def initialize_hook(self, hook): + self.store_ori_attr(ms.Tensor, get_tensor_ops(), self.tensor_ori_attr) + wrap_tensor_ops_and_bind(hook) + for attr_name in dir(HOOKTensor): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.tensor_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKTensor, attr_name) + + functional_ops, mint_ops, mint_func_ops = get_functional_ops() + self.store_ori_attr(ms.ops, self.norm_inner_ops, self.norm_inner_ops_ori_attr) + self.store_ori_attr(ms.ops, functional_ops, self.functional_ori_attr) + self.store_ori_attr(ms.mint, mint_ops, self.mint_ops_ori_attr) + self.store_ori_attr(ms.mint.nn.functional, mint_func_ops, self.mint_func_ops_ori_attr) + setup_hooks(hook) + for attr_name in dir(HOOKFunctionalOP): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.functional_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKFunctionalOP, attr_name) + if attr_name[Const.ATTR_NAME_PREFIX_LEN:] in self.norm_inner_ops: + self.norm_inner_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKFunctionalOP, attr_name) + for attr_name in dir(HOOKMintOP): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.mint_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintOP, attr_name) + for attr_name in dir(HOOKMintNNFunctionalOP): + if attr_name.startswith(Const.ATTR_NAME_PREFIX): + self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) + + def wrap_primitive(self, origin_func, primitive_name, service_instance): + primitive_instance = self + def func(self, *args, **kwargs): + if primitive_name not in primitive_instance.primitive_counters: + primitive_instance.primitive_counters[primitive_name] = 0 + else: + primitive_instance.primitive_counters[primitive_name] += 1 + + current_count = primitive_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + captured_grads_input = [] + captured_grads_output = [] + + def input_backward_hook(grad): + print(f"Grad input length: {len(grad)}") + print("Captured input grad:", grad) + captured_grads_input.append(grad) + backward_primitive_name = updated_primitive_name + Const.BACKWARD + new_module_input_output = ModuleBackwardInputsOutputs( + grad_input=tuple(captured_grads_input), + grad_output=tuple(captured_grads_output) if captured_grads_output else None + ) + service_instance.data_collector.backward_data_collect( + backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + ) +#1未考虑多输出场景 +# 如果时多grad呢 +# 3 输出的序号问题 + def output_backward_hook(grad): + captured_grads_output.append(grad) + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + new_module_input_output = ModuleBackwardInputsOutputs( + grad_input=None, + grad_output=tuple(captured_grads_output) + ) + service_instance.data_collector.backward_data_collect( + backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output + ) + + if not service_instance.switch: + return origin_func(*args, **kwargs) + + print(f"Entering {updated_primitive_name} hook, number of args: {len(args)}, name: {self.name}") + hooked_inputs = [] + + # for idx, arg in enumerate(args): + # if isinstance(arg, Tensor): + # arg_hooked = ops.HookBackward(input_backward_hook)(arg) + # hooked_inputs.append(arg_hooked) + # else: + # hooked_inputs.append(arg) + + out = origin_func(*args, **kwargs) + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + + if service_instance.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) + service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) + if service_instance.data_collector.if_return_forward_new_output(): + out = service_instance.data_collector.get_forward_new_output() + + if isinstance(out, Tensor): + out = ops.HookBackward(output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + out = tuple(hooked_outputs) + + return out + + return func + + def register_hooks(self, service_instance): + primitive_set = set() + for name, cell in service_instance.model.cells_and_names(): + for pname, primitive in cell._primitives.items(): + primitive_set.add((pname, primitive)) + + for pname, primitive in primitive_set: + print("primitive name is", pname) + NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) + primitive.__class__ = NewPrimitive + +api_register = ApiRegistry() diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py index bcb80dd2266..57ed44111ca 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/hook_cell.py @@ -18,26 +18,23 @@ from mindspore import nn from msprobe.core.common.const import Const -cell_count = defaultdict(int) -g_stop_hook = False - - class HOOKCell(nn.Cell): + cell_count = defaultdict(int) + g_stop_hook = False def __init__(self, build_hook) -> None: super(HOOKCell, self).__init__() self.changed_status = False self.input_kwargs = {} self.prefix = "" - global g_stop_hook - if not g_stop_hook: - g_stop_hook = True + if not HOOKCell.g_stop_hook: + HOOKCell.g_stop_hook = True self.changed_status = True if hasattr(self, "prefix_op_name_"): self.prefix = self.prefix_op_name_ - cell_count[self.prefix] += 1 - self.prefix = self.prefix + str(cell_count[self.prefix] - 1) + Const.SEP + HOOKCell.cell_count[self.prefix] += 1 + self.prefix = self.prefix + str(HOOKCell.cell_count[self.prefix] - 1) + Const.SEP forward_hook, backward_hook = build_hook(self.prefix) self.register_forward_hook(forward_hook) self.register_backward_hook(backward_hook) @@ -52,6 +49,5 @@ class HOOKCell(nn.Cell): finally: if self.changed_status: self.changed_status = False - global g_stop_hook - g_stop_hook = False + HOOKCell.g_stop_hook = False return out diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index 49ce4cf2c09..c0ef6bb6c00 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -36,37 +36,39 @@ class StatisticsConfig(BaseConfig): raise Exception("summary_mode is invalid") -class OverflowCheck(BaseConfig): +class OverflowCheckConfig(BaseConfig): def __init__(self, json_config): super().__init__(json_config) - self.file_format = None - self.check_mode = json_config.get("check_mode") + self.data_mode = ["all"] self._check_config() def _check_config(self): - if self.data_mode is not None and len(self.data_mode) > 0: - if len(self.data_mode) > 1 or self.data_mode[0] not in ["all", "input", "output"]: - raise Exception("data_mode must be all, input or output") + if self.overflow_nums is not None and not isinstance(self.overflow_nums, int): + raise Exception("overflow_nums is invalid, it should be an integer") + if self.overflow_nums is not None and self.overflow_nums != -1 and self.overflow_nums <= 0: + raise Exception("overflow_nums should be -1 or positive integer") if self.check_mode and self.check_mode not in ["all", "aicore", "atomic"]: raise Exception("check_mode is invalid") +TaskDict = { + Const.TENSOR: TensorConfig, + Const.STATISTICS: StatisticsConfig, + Const.OVERFLOW_CHECK: OverflowCheckConfig, +} + + def parse_common_config(json_config): return CommonConfig(json_config) def parse_task_config(task, json_config): - task_map = json_config[task] + task_map = json_config.get(task) if not task_map: task_map = dict() - if task == Const.TENSOR: - return TensorConfig(task_map) - elif task == Const.STATISTICS: - return StatisticsConfig(task_map) - elif task == Const.OVERFLOW_CHECK: - return OverflowCheck(task_map) - else: + if task not in TaskDict: raise Exception("task is invalid.") + return TaskDict.get(task)(task_map) def parse_json_config(json_file_path): diff --git a/debug/accuracy_tools/msprobe/mindspore/service.py b/debug/accuracy_tools/msprobe/mindspore/service.py index e8aa34dc4fe..b795ec10342 100644 --- a/debug/accuracy_tools/msprobe/mindspore/service.py +++ b/debug/accuracy_tools/msprobe/mindspore/service.py @@ -14,9 +14,14 @@ # ============================================================================ import os +import copy from pathlib import Path import functools +from collections import defaultdict +from mindspore.common.tensor import Tensor +from mindspore import ops +from mindspore import nn from msprobe.core.data_dump.data_collector import build_data_collector from msprobe.core.data_dump.scope import BaseScope from msprobe.mindspore.common.utils import get_rank_if_initialized @@ -25,20 +30,25 @@ from msprobe.mindspore.common.log import logger from msprobe.core.common.utils import Const from msprobe.core.common.exceptions import DistributedNotInitializedError from msprobe.mindspore.dump.hook_cell.api_registry import api_register -from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs +from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs,\ + ModuleBackwardInputs, ModuleBackwardOutputs +from msprobe.core.common.exceptions import MsprobeException +from msprobe.mindspore.dump.hook_cell.hook_cell import HOOKCell class Service: def __init__(self, config): self.model = None - self.config = config + self.config = copy.deepcopy(config) self.config.level = self.config.level_ori - self.data_collector = build_data_collector(config) + self.data_collector = build_data_collector(self.config) self.switch = False self.current_iter = 0 self.first_start = True self.current_rank = None + self.primitive_counters = {} self.dump_iter_dir = None + self.start_call = False def build_hook(self, module_type, name): def forward_hook(api_or_module_name, module, input, output): @@ -50,6 +60,7 @@ class Service: self.data_collector.forward_data_collect(api_or_module_name, module, pid, module_input_output) if self.data_collector.if_return_forward_new_output(): return self.data_collector.get_forward_new_output() + del module.input_kwargs return output def backward_hook(api_or_module_name, module, grad_input, grad_output): @@ -68,18 +79,162 @@ class Service: def wrap_forward_hook(*args, **kwargs): return forward_hook(*args, **kwargs) - + def wrap_backward_hook(*args, **kwargs): return backward_hook(*args, **kwargs) - + return wrap_forward_hook, wrap_backward_hook + def wrap_primitive(self, origin_func, primitive_name): + service_instance = self + + def create_backward_hook(captured_grads, num_tensors, updated_primitive_name, hook_type): + def backward_hook(grad): + captured_grads.append(grad) + try: + if len(captured_grads) == num_tensors and hook_type == Const.INPUT: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) + new_module_input_output = ModuleBackwardInputs(grad_input=tuple(captured_grads)) + service_instance.data_collector.backward_input_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + captured_grads.clear() + elif len(captured_grads) == num_tensors and hook_type == Const.OUTPUT: + backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(backward_primitive_name) + new_module_input_output = ModuleBackwardOutputs(grad_output=tuple(captured_grads)) + service_instance.data_collector.backward_output_data_collect( + backward_primitive_name, service_instance, os.getpid(), new_module_input_output + ) + captured_grads.clear() + + except Exception as exception: + raise Exception( + "This is a primitive op {hook_type}_backward dump error: {exception}," + " updated_primitive_name: {updated_primitive_name}".format( + hook_type=hook_type, exception=exception, updated_primitive_name=updated_primitive_name + ) + ) from exception + + return backward_hook + + def hook_primitive_inputs(args, captured_grads_input, updated_primitive_name): + hooked_inputs = [] + num_tensors = sum(isinstance(arg, Tensor) for arg in args) + input_backward_hook = create_backward_hook(captured_grads_input, num_tensors, updated_primitive_name, + Const.INPUT) + for _, arg in enumerate(args): + if isinstance(arg, Tensor): + arg_hooked = ops.HookBackward(input_backward_hook)(arg) + hooked_inputs.append(arg_hooked) + else: + hooked_inputs.append(arg) + return hooked_inputs + + def hook_primitive_outputs(out, captured_grads_output, updated_primitive_name): + if isinstance(out, tuple): + num_output_tensors = sum(isinstance(tensor, Tensor) for tensor in out) + else: + num_output_tensors = 1 + output_backward_hook = create_backward_hook(captured_grads_output, num_output_tensors, + updated_primitive_name, Const.OUTPUT) + + if isinstance(out, Tensor): + return ops.HookBackward(output_backward_hook)(out) + elif isinstance(out, tuple): + hooked_outputs = [] + for tensor in out: + if isinstance(tensor, Tensor): + hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) + else: + hooked_outputs.append(tensor) + return tuple(hooked_outputs) + return out + + def wrapped_primitive_call(instance_self, *args, **kwargs): + + service_instance.update_primitive_counters(primitive_name) + current_count = service_instance.primitive_counters[primitive_name] + updated_primitive_name = f"{Const.PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" + + if not service_instance.switch: + return origin_func(*args, **kwargs) + + captured_grads_input, captured_grads_output = [], [] + + try: + hooked_inputs = hook_primitive_inputs(args, captured_grads_input, updated_primitive_name) + except Exception as exception: + raise Exception("This is a primitive op dump error during input hooking: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception + + try: + out = origin_func(*hooked_inputs, **kwargs) + except Exception as exception: + raise Exception("This is a primitive op dump error during function call: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception + + forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" + service_instance.data_collector.visit_and_clear_overflow_status(forward_primitive_name) + if service_instance.data_collector: + module_input_output = ModuleForwardInputsOutputs(args=hooked_inputs, kwargs=kwargs, output=out) + try: + service_instance.data_collector.forward_data_collect(forward_primitive_name, instance_self, + os.getpid(), module_input_output) + except Exception as exception: + raise Exception("This is a primitive op dump error during forward data collection: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception + + if service_instance.data_collector.if_return_forward_new_output(): + out = service_instance.data_collector.get_forward_new_output() + + try: + out = hook_primitive_outputs(out, captured_grads_output, updated_primitive_name) + except Exception as exception: + raise Exception("This is a primitive op dump error during output hooking: {}," + " primitive_name: {}".format(exception, primitive_name)) from exception + + return out + + + return wrapped_primitive_call + + def update_primitive_counters(self, primitive_name): + if primitive_name not in self.primitive_counters: + self.primitive_counters[primitive_name] = 0 + else: + self.primitive_counters[primitive_name] += 1 + + def register_hooks(self): + primitive_set = set() + for _, cell in self.model.cells_and_names(): + for pname, primitive in cell._primitives.items(): + primitive_set.add((pname, primitive)) + + for pname, primitive in primitive_set: + NewPrimitive = type('NewPrimitive', (primitive.__class__,), + {'__call__': self.wrap_primitive(primitive.__call__, pname)}) + primitive.__class__ = NewPrimitive + def step(self): self.current_iter += 1 self.data_collector.update_iter(self.current_iter) + HOOKCell.cell_count = defaultdict(int) + self.primitive_counters.clear() + + @staticmethod + def check_model_valid(model): + if not model or isinstance(model, nn.Cell): + return model + raise MsprobeException( + MsprobeException.INVALID_PARAM_ERROR, "model 参数必须是 mindspore.nn.Cell 类型。" + ) def start(self, model=None): - self.model = model + self.model = Service.check_model_valid(model) + self.start_call = True + logger.info("msprobe: debugger.start() is set successfully") if self.config.step and self.current_iter > max(self.config.step): self.stop() raise Exception("msprobe: exit after iteration {}".format(max(self.config.step))) @@ -96,16 +251,22 @@ class Service: self.register_hook_new() self.first_start = False self.switch = True - logger.info_on_rank_0(f"Dump switch is turned on at step {self.current_iter}. ") + logger.info(f"Dump switch is turned on at step {self.current_iter}. ") self.create_dirs() - logger.info_on_rank_0(f"Dump data will be saved in {self.dump_iter_dir}.") + logger.info(f"Dump data will be saved in {self.dump_iter_dir}.") def stop(self): + logger.info("msprobe: debugger.stop() is set successfully. " + "Please set debugger.start() to turn on the dump switch again. ") + if not self.start_call: + logger.error("msprobe: debugger.start() is not set in the current scope.") + raise Exception("debugger.start() is not set in the current scope.") if self.config.step and self.current_iter not in self.config.step: return if self.config.rank and self.current_rank not in self.config.rank: return self.switch = False + self.start_call = False self.data_collector.write_json() def create_dirs(self): @@ -130,9 +291,11 @@ class Service: construct_file_path = os.path.join(dump_dir, "construct.json") self.data_collector.update_dump_paths( dump_file_path, stack_file_path, construct_file_path, dump_data_dir, None) - + def register_hook_new(self): - logger.info_on_rank_0("The {} hook function is successfully mounted to the model.".format(self.config.task)) + logger.info("The {} hook function is successfully mounted to the model.".format(self.config.task)) if self.config.level == "L1": api_register.initialize_hook(functools.partial(self.build_hook, BaseScope.Module_Type_API)) api_register.api_set_hook_func() + if self.model: + self.register_hooks() diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py index f495cd673d7..b2eec691af0 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py @@ -21,10 +21,11 @@ import torch import numpy from msprobe.pytorch.api_accuracy_checker.run_ut.run_ut_utils import hf_32_standard_api -from msprobe.pytorch.api_accuracy_checker.common.utils import check_file_or_directory_path, check_object_type, \ - get_full_data_path, CompareException +from msprobe.pytorch.api_accuracy_checker.common.utils import check_object_type, get_full_data_path, \ + CompareException +from msprobe.core.common.file_check import FileChecker from msprobe.pytorch.common.log import logger -from msprobe.core.common.const import Const +from msprobe.core.common.const import Const, FileCheckConst TORCH_TYPE = ["torch.device", "torch.dtype"] TENSOR_DATA_LIST = ["torch.Tensor", "torch.nn.parameter.Parameter"] @@ -87,12 +88,13 @@ def gen_real_tensor(data_path, convert_type): convert_type: convert ori_type to dist_type flag. """ data_path = os.path.realpath(data_path) - check_file_or_directory_path(data_path) + data_path_checker = FileChecker(data_path, FileCheckConst.FILE, ability=FileCheckConst.READ_ABLE) + data_path = data_path_checker.common_check() if not data_path.endswith('.pt') and not data_path.endswith('.npy'): error_info = f"The file: {data_path} is not a pt or numpy file." raise CompareException(CompareException.INVALID_FILE_ERROR, error_info) if data_path.endswith('.pt'): - data = torch.load(data_path).cpu() + data = torch.load(data_path, map_location=torch.device('cpu')) else: data_np = numpy.load(data_path) data = torch.from_numpy(data_np) diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py index 9c96a52d8bd..9acb5ee6498 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py @@ -68,7 +68,7 @@ signal.signal(signal.SIGTERM, signal_handler) ParallelUTConfig = namedtuple('ParallelUTConfig', ['api_files', 'out_path', 'num_splits', 'save_error_data_flag', 'jit_compile_flag', 'device_id', - 'result_csv_path', 'total_items', 'real_data_path']) + 'result_csv_path', 'total_items', 'config_path']) def run_parallel_ut(config): @@ -90,7 +90,7 @@ def run_parallel_ut(config): *(['-j'] if config.jit_compile_flag else []), *(['-save_error_data'] if config.save_error_data_flag else []), '-csv_path', config.result_csv_path, - *(['-real_data_path', config.real_data_path] if config.real_data_path else []) + *(['-config', config.config_path] if config.config_path else []) ] return cmd @@ -110,14 +110,9 @@ def run_parallel_ut(config): def update_progress_bar(progress_bar, result_csv_path): while any(process.poll() is None for process in processes): - try: - with open(result_csv_path, 'r') as result_file: - completed_items = len(result_file.readlines()) - 1 - progress_bar.update(completed_items - progress_bar.n) - except FileNotFoundError: - logger.warning(f"Result CSV file not found: {result_csv_path}.") - except Exception as e: - logger.error(f"An unexpected error occurred while reading result CSV: {e}") + with FileOpen(result_csv_path, 'r') as result_file: + completed_items = len(result_file.readlines()) - 1 + progress_bar.update(completed_items - progress_bar.n) time.sleep(1) for api_info in config.api_files: @@ -175,7 +170,7 @@ def prepare_config(args): out_path_checker = FileChecker(out_path, FileCheckConst.DIR, ability=FileCheckConst.WRITE_ABLE) out_path = out_path_checker.common_check() split_files, total_items = split_json_file(api_info, args.num_splits, args.filter_api) - + config_path = os.path.realpath(args.config_path) if args.config_path else None result_csv_path = args.result_csv_path or os.path.join(out_path, f"accuracy_checking_result_{time.strftime('%Y%m%d%H%M%S')}.csv") if not args.result_csv_path: details_csv_path = os.path.join(out_path, f"accuracy_checking_details_{time.strftime('%Y%m%d%H%M%S')}.csv") @@ -187,7 +182,7 @@ def prepare_config(args): logger.info(f"UT task details will be saved in {details_csv_path}") return ParallelUTConfig(split_files, out_path, args.num_splits, args.save_error_data, args.jit_compile, args.device_id, result_csv_path, - total_items, args.real_data_path) + total_items, config_path) def main(): diff --git a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py index 30994f70944..559dfdc0f14 100644 --- a/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +++ b/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py @@ -27,6 +27,8 @@ from msprobe.pytorch.api_accuracy_checker.compare.compare_column import CompareC from msprobe.pytorch.hook_module.wrap_tensor import TensorOPTemplate from msprobe.pytorch.hook_module.wrap_functional import FunctionalOPTemplate from msprobe.pytorch.hook_module.wrap_torch import TorchOPTemplate +from msprobe.pytorch.hook_module.wrap_npu_custom import NpuOPTemplate +from msprobe.pytorch.hook_module.wrap_aten import AtenOPTemplate from msprobe.pytorch.api_accuracy_checker.common.config import msCheckerConfig from msprobe.pytorch.common.parse_json import parse_json_info_forward_backward from msprobe.core.common.file_check import FileOpen, FileChecker, \ @@ -78,6 +80,12 @@ def exec_api(api_type, api_name, args, kwargs): if api_type == "Torch": torch_api = TorchOPTemplate(api_name, str, False) out = torch_api.forward(*args, **kwargs) + if api_type == "Aten": + torch_api = AtenOPTemplate(api_name, None, False) + out = torch_api.forward(*args, **kwargs) + if api_type == "NPU": + torch_api = NpuOPTemplate(api_name, None, False) + out = torch_api.forward(*args, **kwargs) return out @@ -274,7 +282,7 @@ def run_torch_api(api_full_name, real_data_path, backward_content, api_info_dict if need_backward: if need_to_backward(grad_index, out): - backward_args = backward_content[api_full_name].get("grad_output") + backward_args = backward_content[api_full_name].get("input") grad = gen_args(backward_args, api_name, real_data_path=real_data_path)[0] bench_grad, _ = generate_cpu_params(grad, {}, False, api_name) bench_grad_out = run_backward(cpu_args, bench_grad, grad_index, out) @@ -379,10 +387,6 @@ def _run_ut_parser(parser): help=" The path of accuracy_checking_result_{timestamp}.csv, " "when run ut is interrupted, enter the file path to continue run ut.", required=False) - parser.add_argument("-real_data_path", dest="real_data_path", nargs="?", const="", default="", type=str, - help=" In real data mode, the root directory for storing real data " - "must be configured.", - required=False) parser.add_argument("-f", "--filter_api", dest="filter_api", action="store_true", help=" Whether to filter the api in the api_info_file.", required=False) parser.add_argument("-config", "--config_path", dest="config_path", default="", type=str, @@ -400,9 +404,9 @@ def preprocess_forward_content(forward_content): if key not in arg_cache: filtered_new_args = [ {k: v for k, v in arg.items() if k not in ['Max', 'Min']} - for arg in value['args'] if isinstance(arg, dict) + for arg in value['input_args'] if isinstance(arg, dict) ] - arg_cache[key] = (filtered_new_args, value['kwargs']) + arg_cache[key] = (filtered_new_args, value['input_kwargs']) filtered_new_args, new_kwargs = arg_cache[key] diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py new file mode 100644 index 00000000000..eb06867371c --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/__init__.py @@ -0,0 +1,15 @@ +import os +from pkgutil import iter_modules +from importlib import import_module + +""" +gpu and cpu not implement benchmark function, supplementary benchmarking function implementation +""" + +package_path = os.path.dirname(os.path.realpath(__file__)) +for _, module_name, _ in iter_modules([package_path]): + module = import_module(f"{__name__}.{module_name}") + for attr_name in dir(module): + attr = getattr(module, attr_name) + if callable(attr) and "npu_custom" not in attr_name: + globals()[attr_name] = attr diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py new file mode 100644 index 00000000000..caf21a604c6 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/apply_adam_w.py @@ -0,0 +1,28 @@ +import torch + + +def npu_apply_adam_w(beta1_power, beta2_power, lr, weight_decay, + beta1, beta2, eps, grad, max_grad_norm, amsgrad, maximize, out): + var, m, v = out + if amsgrad: + max_grad_norm = (torch.rand(var.shape) * 10.0 - 5.0).to(var.dtype) + beta1_power_out = beta1_power * beta1 + beta2_power_out = beta2_power * beta2 + var_t = var * (1 + (-lr * weight_decay)) + gt = -grad if maximize else grad + m_out = m * beta1 - (beta1 + (-1)) * gt + v_out = v * beta2 - (beta2 + (-1)) * gt * gt + + if amsgrad: + max_grad_norm_out = torch.max(max_grad_norm, v_out) + if (1 - beta2_power_out) == 0: + beta2_power_out -= eps + denom = torch.sqrt(torch.div(max_grad_norm_out, (1 - beta2_power_out))) + eps + else: + vraintain = torch.div(v_out, (1 - beta2_power_out)) + denom = torch.sqrt(vraintain) + eps + + if (1 - beta1_power_out) == 0: + beta1_power_out -= eps + var_out = var_t + torch.div(-lr * m_out, (1 - beta1_power_out)).div(denom) + return var_out.cpu(), m_out.cpu(), v_out.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py new file mode 100644 index 00000000000..627bf11b64f --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/confusion_transpose.py @@ -0,0 +1,19 @@ +def npu_confusion_transpose(data, perm, shape, transpose_first): + if transpose_first: + output = data.permute(*perm).contiguous().view(shape) + else: + output = data.view(shape).permute(*perm) + return output.cpu() + + +def npu_confusion_transpose_backward(grad, perm, shape, transpose_first): + shape_cal = shape if transpose_first else [shape[perm_dim] for perm_dim in perm] + perm_cal = [0] * len(perm) + for i, perm_dim in enumerate(perm): + perm_cal[perm_dim] = i + + if transpose_first: + result = grad.permute(*perm_cal).reshape(shape_cal) + else: + result = grad.reshape(shape_cal).permute(*perm_cal) + return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py new file mode 100644 index 00000000000..a1a9ca08085 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/fast_gelu.py @@ -0,0 +1,55 @@ +import torch + + +def fast_gelu(input0): + attr = 1.702 + const_0 = 0 - attr + const_1 = 1 + const_2 = attr / 2 + + abs_x = torch.abs(input0) + mul_abs_x = abs_x * const_0 + exp_abs_x = torch.exp(mul_abs_x) + div_down = exp_abs_x + const_1 + + pn_x = input0 - abs_x + mul_pn_x = pn_x * const_2 + exp_pn_x = torch.exp(mul_pn_x) + div_up = input0 * exp_pn_x + div_down_rec = torch.reciprocal(div_down) + result = div_up * div_down_rec + + return result.cpu() + + +def npu_fast_gelu_backward(grad, input_x): + const_2 = 1.702 + const_3 = 1.0 + const_1 = 0.0 - const_2 + + # e^(-1.702x) + abs_x = torch.abs(input_x) + mul_abs_x = abs_x * const_1 + exp_x = torch.exp(mul_abs_x) + + # 1.702xe^(-1.702x) + add_2 = input_x * exp_x + add_2 = add_2 * const_2 + + # e^(1.702(x-|x|)) + pn_x = input_x - abs_x + mul_pn_x = pn_x * const_2 + exp_pn_x = torch.exp(mul_pn_x) + + # e^(-1.702x) + 1.702xe^(-1.702x) + e^(1.702(x-|x|)) + div_up = exp_x + add_2 + div_up = div_up + exp_pn_x + + # (e^(-1.702x)+1)^2 + div_down_i = exp_x + const_3 + div_down = div_down_i * div_down_i + div_down_rec = torch.reciprocal(div_down) + result_temp = div_up * div_down_rec + result = grad * result_temp + + return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py new file mode 100644 index 00000000000..f6949c079e2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/layer_norm_eval.py @@ -0,0 +1,6 @@ +import torch + + +def npu_layer_norm_eval(data, normalized_shape): + result = torch.nn.functional.layer_norm(data, normalized_shape) + return result.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py new file mode 100644 index 00000000000..95db875edf6 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/linear.py @@ -0,0 +1,12 @@ +import torch + + +def npu_linear(x, weight, bias): + output = torch.nn.functional.linear(x, weight, bias) + return output.cpu() + + +def npu_linear_backward(grad, input_data, weight): + input_grad = torch.matmul(grad, weight) + weight_grad = torch.matmul(grad.t(), input_data) + return input_grad.cpu(), weight_grad.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py new file mode 100644 index 00000000000..ed1c746ec16 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/matmul_backward.py @@ -0,0 +1,48 @@ +import torch + + +def matmul_backward(grad, self, other, mask): + grad_self, grad_other = None, None + dim_self = self.dim() + dim_other = other.dim() + + size_grad = list(grad.size()) + size_self = list(self.size()) + size_other = list(other.size()) + if dim_self == 1 and dim_other == 1: + grad_self = other.mul(grad) if mask[0] else grad_self + grad_other = self.mul(grad) if mask[1] else grad_other + elif dim_self == 2 and dim_other == 1: + grad_self = grad.unsqueeze(1).mm(other.unsqueeze(0)) if mask[0] else grad_self + grad_other = self.transpose(-1, -2).mm(grad.unsqueeze(1)).squeeze_(1) if mask[1] else grad_other + elif dim_self == 1 and dim_other == 2: + grad_self = grad.unsqueeze(0).mm(other.transpose(-1, -2)).squeeze_(0) if mask[0] else grad_self + grad_other = self.unsqueeze(1).mm(grad.unsqueeze(0)) if mask[1] else grad_other + elif dim_self >= 3 and (dim_other == 1 or dim_other == 2): + view_size = 1 if dim_other == 1 else size_grad[-1] + unfolded_grad = (grad.unsqueeze(-1) if dim_other == 1 else grad).contiguous().view(-1, view_size) + if mask[0]: + grad_self = unfolded_grad.mm(other.unsqueeze(0) if dim_other == 1 else other.transpose(-1, -2)) \ + .view(size_self) + if mask[1]: + unfolded_self = self.contiguous().view([-1, size_self[-1]]) + grad_other = unfolded_self.transpose(-1, -2).mm(unfolded_grad).view(size_other) + elif (dim_self == 1 or dim_self == 2) and dim_other >= 3: + view_size = 1 if dim_self == 1 else size_grad[-2] + unfolded_grad_T = grad.view([-1, view_size]) \ + if dim_self == 1 else grad.transpose(-1, -2).contiguous().view([-1, view_size]) + if mask[0]: + # create a 2D-matrix from other + unfolded_other_T = \ + other.transpose(-1, -2).contiguous().view([-1, size_other[-2]]).transpose(-1, -2) + grad_self = unfolded_other_T.mm(unfolded_grad_T).transpose(-1, -2).view(size_self) + if mask[1]: + size_other_T = size_other[:-2] + size_other_T.extend(size_other[::-1][:2]) + grad_other = \ + unfolded_grad_T.mm(self.unsqueeze(0) if dim_self == 1 else self).view(size_other_T).transpose(-1, -2) + else: + grad_self = torch.matmul(grad, other.transpose(-1, -2)) if mask[0] else grad_self + grad_other = torch.matmul(self.transpose(-1, -2), grad) if mask[1] else grad_other + + return grad_self.cpu(), grad_other.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py new file mode 100644 index 00000000000..63f1fa2a3b6 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/npu_fusion_attention.py @@ -0,0 +1,421 @@ +import torch +import numpy as np +from einops import rearrange + +from msprobe.pytorch.common.utils import logger + +gtype = torch.float64 # arm host必须选择float64,x86环境选择float32即可,64也行。arm计算很慢,s=8k的场景建议使用x86 +softmax_build_mode = "QKV" # "MAX_SUM" + +""" +# 前向函数声明对比 +标杆实现:fusion_attention_forward: q, k, v, drop_mask, atten_mask, pse, scale, keep_prob +融合算子:npu_fusion_attention_forward: query, key, value, head_num, input_layout, *, pse=None, padding_mask=None, + atten_mask=None, scale=1.0, keep_prob=1.0, pre_tockens=2147483647, + next_tockens=2147483647, inner_precise=0, prefix=None, sparse_mode=0, + gen_mask_parallel=True, sync=False + +# 反向函数声明对比 +标杆实现:fusion_attention_backward: dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob +融合算子:npu_fusion_attention_backward: query, key, value, dy, head_num, input_layout, *, pse=None, padding_mask=None, + atten_mask=None, softmax_max=None, softmax_sum=None, softmax_in=None, + attention_in=None, scale_value=1.0, keep_prob=1.0, pre_tockens=2147483647, + next_tockens=2147483647, inner_precise=0, seed=0, offset=0, + numels=0, prefix=None, sparse_mode=0, gen_mask_parallel=True, sync=False +""" + + +def softmax_forward(x): + x_max = torch.max(x, dim=-1, keepdims=True)[0] + x_sub = x.sub(x_max) + y = torch.exp(x_sub) + x_sum = y.sum(dim=-1, keepdims=True) + res = y.div(x_sum) + return res, x_max, x_sum + + +def softmax_grad(dp, softmax_res): + muls = dp * softmax_res + muls_r = muls.sum(dim=-1, keepdims=True) + sub_r = dp - muls_r + res = sub_r * softmax_res + return res + + +def broadcast_kv(num_heads, num_kv_heads, kv_tensor, dtype): + if num_kv_heads == 0 or num_kv_heads < num_heads: + raise ValueError(f"num_kv_heads must be non-zero and less than num_heads.") + + factor = num_heads // num_kv_heads + kv_shape = kv_tensor.shape + B = kv_shape[0] + S = kv_shape[2] + D = kv_shape[3] + kv_res = torch.zeros([B, num_heads, S, D]).to(dtype) + for i in range(num_heads): + j = i // factor + kv_res[:, i:i + 1, :, :] = kv_tensor[:, j:j + 1, :, :] + return kv_res + + +def calculate_qk(q, k, atten_mask, pse, scale): + if pse is None or len(pse.shape) == 0: + qk = torch.matmul(q, k.permute(0, 1, 3, 2)).mul(scale) + else: + qk = (torch.matmul(q, k.permute(0, 1, 3, 2)) + pse).mul(scale) + if atten_mask is None or len(atten_mask.shape) == 0: + return qk + else: + qk = qk + atten_mask.bool() * (-40000.0) # -10000 + return qk + + +def fusion_attention_forward(q, k, v, drop_mask, atten_mask, pse, scale, keep_prob): + qk = calculate_qk(q, k, atten_mask, pse, scale) + softmax_res, softmax_max, softmax_sum = softmax_forward(qk) + if drop_mask is None or len(drop_mask.shape) == 0: + drop_res = softmax_res + else: + drop_res = softmax_res * drop_mask * (1.0 / keep_prob) + y = torch.matmul(drop_res, v) + return y, softmax_max, softmax_sum + + +def fusion_attention_backward(dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob): + dp = torch.matmul(dx, v.permute(0, 1, 3, 2)) + if drop_mask is None or len(drop_mask.shape) == 0: + drop_res = softmax_res.permute(0, 1, 3, 2) + dp_drop = dp + else: + drop_res = softmax_res.mul(drop_mask).mul(1.0 / keep_prob).permute(0, 1, 3, 2) + dp_drop = dp * drop_mask * (1.0 / keep_prob) + dv = torch.matmul(drop_res, dx) + softmax_grad_res = (softmax_grad(dp_drop, softmax_res) * scale) + dq = torch.matmul(softmax_grad_res, k) + dk = torch.matmul(softmax_grad_res.permute(0, 1, 3, 2), q) + return dq, dk, dv + + +def parse_bsnd_args(query, key, head_num, input_layout): + supported_input_layout = ["BSH", "SBH", "BSND", "BNSD", "TND"] + B, S1, S2, N1, N2, D, H1, H2 = None, None, None, head_num, None, None, None, None + + if not isinstance(input_layout, str) or input_layout not in supported_input_layout: + raise ValueError(f"Invalid input_layout arg which must be one of {supported_input_layout}.") + + if input_layout == "TND": + raise ValueError(f"input_layout {input_layout} does not supported for now.") + try: + if input_layout == "BSH": + B, S1, H1 = query.shape + _, S2, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "SBH": + S1, B, H1 = query.shape + S2, _, H2 = key.shape + D = H1 // N1 + N2 = H2 // D + elif input_layout == "BSND": + B, S1, N1, D = query.shape + _, S2, N2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + elif input_layout == "BNSD": + B, N1, S1, D = query.shape + _, N2, S2, _ = key.shape + H1 = N1 * D + H2 = N2 * D + except Exception as e: + raise ValueError(f"query.shape: {query.shape}, key.shape: {key.shape}, parse_bsnd_args error: {e}") from e + + if D == 0: + raise ValueError(f"Value D must be non-zero.") + DTYPE = query.dtype + return B, S1, S2, N1, N2, D, H1, H2, DTYPE + + +def convert_from_bnsd(_input, input_layout): + if input_layout == "BSH": + # (B,N,S,D)=>(B,S,N*D) + out = rearrange(_input, 'b n s d -> b s (n d)').contiguous() + elif input_layout == "SBH": + # (B,N,S,D)=>(S,B,N*D) + out = rearrange(_input, 'b n s d -> s b (n d)').contiguous() + elif input_layout == "BSND": + # (B,N,S,D)=>(B,S,N,D) + out = rearrange(_input, 'b n s d -> b s n d').contiguous() + elif input_layout == "TND": + raise ValueError(f"input_layout {input_layout} does not supported for now.") + else: + out = _input + return out + + +def convert_to_bnsd(_input, n, input_layout): + # 默认"BNSD"无需处理 + if input_layout == "BSH": + # (B,S,N*D)=>(B,N,S,D) + out = rearrange(_input, 'b s (n d) -> b n s d', n=n) + elif input_layout == "SBH": + # (S,B,N*D)=>(B,N,S,D) + out = rearrange(_input, 's b (n d) -> b n s d', n=n) + elif input_layout == "BSND": + # (B,S,N,D)=>(B,N,S,D) + out = rearrange(_input, 'b s n d -> b n s d', n=n) + elif input_layout == "TND": + raise ValueError(f"input_layout {input_layout} does not supported for now.") + else: + out = _input + if out.dim() != 4: + raise ValueError(f"convert qkv format failed with input_layout {input_layout}.") + return out.to(gtype) + + +def generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tocken, next_tocken, dtype): + """ + # 当sparse_mode=2、3、4时小算子到融合算子会走这个优化,反过来看就要拆解回原来的基本实现 + ===> atten_mask = torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(dtype) + """ + shape = [S1, S2] + + if atten_mask is not None: + # 当FA的输入已经包含atten_mask时,可以认为已经是转换之后的mask矩阵了,有三种特殊场景,即稀疏矩阵场景,需要进行逆向还原 + if sparse_mode == 2 or sparse_mode == 3 or sparse_mode == 4: + logger.info(f"S1: {S1}, S2:{S2}, atten_mask.shape:{atten_mask.shape}, atten_mask.dtype:{atten_mask.dtype}") + + if atten_mask.dim() == 2 and atten_mask.shape[0] == 2048 and atten_mask.shape[1] == 2048: + if atten_mask.equal(torch.from_numpy(np.triu(np.ones([2048, 2048]), k=1)).to(atten_mask.dtype)): + if sparse_mode == 2: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=1)) + elif sparse_mode == 3: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=S2 - S1 + 1)) + elif sparse_mode == 4: + atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1)) + atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1)) + atten_mask = atten_mask_u + atten_mask_l + logger.debug(f"反向转换atten_mask {atten_mask.shape}") + return atten_mask.to(dtype) + + return atten_mask.to(dtype) + + if atten_mask is not None: + if atten_mask.dim() == 2: + if atten_mask.shape[0] != S1 or atten_mask.shape[1] != S2: + raise ValueError(f"Invalid atten_mask shape `SS` {atten_mask.shape}") + shape = [S1, S2] + elif atten_mask.dim() == 4: + if atten_mask.shape[1] == 1: + shape = [B, 1, S1, S2] if B != 1 else [1, 1, S1, S2] + else: + shape = [B, N1, S1, S2] if B != 1 else [1, N1, S1, S2] + + if sparse_mode == 0: + atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1)) + atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1)) + atten_mask = atten_mask_u + atten_mask_l + elif sparse_mode == 1: # no sparse + atten_mask = torch.from_numpy(np.zeros(shape)) + elif sparse_mode == 2: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=1)) + elif sparse_mode == 3: + atten_mask = torch.from_numpy(np.triu(np.ones(shape), k=S2 - S1 + 1)) + elif sparse_mode == 4: + atten_mask_u = torch.from_numpy(np.triu(np.ones(shape), k=next_tocken + 1)) + atten_mask_l = torch.from_numpy(np.tril(np.ones(shape), k=-pre_tocken - 1)) + atten_mask = atten_mask_u + atten_mask_l + # 注:不会出现sparse_mode=5的情况,该情况要求必须要传入atten_mask,且atten_mask矩阵数据格式须为BNSS或B1SS, + # 因此可以认为FA的输入已经是正确的atten_mask了 + return atten_mask.to(dtype) + + +def generate_kv(key, value, N1, N2): + # N不等长适配by cdy + if not (N1 == N2): + k_new = broadcast_kv(N1, N2, key, key.dtype) + v_new = broadcast_kv(N1, N2, value, value.dtype) + else: + k_new = key + v_new = value + return k_new, v_new + + +def rebuid_softmax_by_qkv(q, k, atten_mask, pse, scale): + """ + attention = softmax(QK^T/sqrt(d))V + softmax(x_i) = e^(x_i - x_max) / sum(e^(x_i - x_max)) + """ + logger.info("Using QKV to rebuild original softmax") + qk = calculate_qk(q, k, atten_mask, pse, scale) + softmax_res, x_max, x_sum = softmax_forward(qk) + return softmax_res + + +def rebuild_softmax_by_max_sum(q, k, atten_mask, pse, scale, softmax_max, softmax_sum): + """ + attention = softmax(QK^T/sqrt(d))V + softmax(x_i) = e^(x_i - x_max_i) / x_sum_i) + """ + logger.info("Using softmax_max and softmax_sum to rebuild original softmax") + qk = calculate_qk(q, k, atten_mask, pse, scale) + if softmax_max.shape[-1] == 0: + raise ValueError(f"softmax_max.shape[-1] must be non-zero, softmax_max.shape: {softmax_max.shape}") + repeat_dim = qk.shape[-1] // softmax_max.shape[-1] + softmax_res = torch.exp(qk.sub(softmax_max.repeat(1, 1, 1, repeat_dim))).div( + softmax_sum.repeat(1, 1, 1, repeat_dim)) + return softmax_res + + +def npu_fusion_attention_forward_patch(*args, **kwargs): + # query, key, value, head_num, input_layout + if len(args) != 5: + raise ValueError(f"Unsupported npu_fusion_attention args {args}.") + + B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], args[3], args[4]) + if N1 == N2 and S1 == S2: + logger.debug(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + else: + logger.debug(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + if not (N1 % N2 == 0 and N1 >= N2): + raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.") + + dims_kwargs = {"B": B, "S1": S1, "S2": S2, "N1": N1, "N2": N2, + "D": D, "H1": H1, "H2": H2, "DTYPE": DTYPE} + + new_kwargs = {"keep_prob": 1, + "scale": kwargs.get("scale", 1 / (D ** 0.5)), + "sparse_mode": kwargs.get("sparse_mode", 0), + "prefix": kwargs.get("prefix"), + "pre_tockens": kwargs.get("pre_tockens", 2147483647), + "next_tockens": kwargs.get("next_tockens", 2147483647), + "pse": kwargs.get("pse"), + "padding_mask": kwargs.get("padding_mask"), + "atten_mask": kwargs.get("atten_mask")} + + return args, dims_kwargs, new_kwargs + + +def npu_fusion_attention_backward_patch(*args, **kwargs): + if len(args) != 6: + raise ValueError(f"Unsupported npu_fusion_attention_grad args {args}.") + + B, S1, S2, N1, N2, D, H1, H2, DTYPE = parse_bsnd_args(args[0], args[1], args[4], args[5]) + if N1 == N2 and S1 == S2: + logger.info(f"running case : BNSD = {B}_{N1}_{S1}_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + else: + logger.info(f"running case: BNSD = {B}_{N1}({N2})_{S1}({S2})_{D}, sparse = {kwargs.get('sparse_mode', 0)}") + if not (N1 % N2 == 0 and N1 >= N2): + raise ValueError(f"N1与N2不匹配,请检查: N1 = {N1}, N2 = {N2}.") + + dims_kwargs = {"B": B, "S1": S1, "S2": S2, "N1": N1, "N2": N2, + "D": D, "H1": H1, "H2": H2, "DTYPE": DTYPE} + + new_kwargs = {"keep_prob": 1, + "scale_value": kwargs.get("scale_value", 1 / (D ** 0.5)), + "sparse_mode": kwargs.get("sparse_mode", 0), + "prefix": kwargs.get("prefix"), + "pre_tockens": kwargs.get("pre_tockens", 2147483647), + "next_tockens": kwargs.get("next_tockens", 2147483647), + "pse": kwargs.get("pse"), + "padding_mask": kwargs.get("padding_mask"), + "softmax_max": kwargs.get("softmax_max"), + "softmax_sum": kwargs.get("softmax_sum"), + "softmax_in": kwargs.get("softmax_in"), + "attention_in": kwargs.get("attention_in"), + "seed": kwargs.get("seed", 0), + "offset": kwargs.get("offset", 0), + "numels": kwargs.get("numels", 0), + "atten_mask": kwargs.get("atten_mask")} + + return args, dims_kwargs, new_kwargs + + +def npu_fusion_attention(*args, **kwargs): + new_args, dims_kwargs, new_kwargs = npu_fusion_attention_forward_patch(*args, **kwargs) + query, key, value, input_layout = new_args[0], new_args[1], new_args[2], new_args[4] + N1 = dims_kwargs.get("N1") + N2 = dims_kwargs.get("N2") + S1 = dims_kwargs.get("S1") + S2 = dims_kwargs.get("S2") + B = dims_kwargs.get("B") + DTYPE = dims_kwargs.get("DTYPE") + atten_mask = new_kwargs.get("atten_mask") + keep_prob = new_kwargs.get("keep_prob") + sparse_mode = new_kwargs.get("sparse_mode") + pre_tockens = new_kwargs.get("pre_tockens") + next_tockens = new_kwargs.get("next_tockens") + pse = new_kwargs.get("pse") + scale = new_kwargs.get("scale") + + atten_mask = generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tockens, next_tockens, DTYPE) + query = convert_to_bnsd(query, N1, input_layout) + key = convert_to_bnsd(key, N2, input_layout) + value = convert_to_bnsd(value, N2, input_layout) + k_new, v_new = generate_kv(key, value, N1, N2) + out_golden, softmax_max, softmax_sum = fusion_attention_forward(q=query, k=k_new, v=v_new, + drop_mask=None, atten_mask=atten_mask, + pse=pse, scale=scale, + keep_prob=keep_prob) + if out_golden.dim() == 5: + out_golden = out_golden.reshape(out_golden.size(0), out_golden.size(1) * out_golden.size(2), out_golden.size(3), + out_golden.size(4)) + out_golden = convert_from_bnsd(out_golden, input_layout) + + return out_golden.cpu(), softmax_max.repeat(1, 1, 1, 8).cpu(), softmax_sum.repeat(1, 1, 1, 8).cpu() + + +def npu_fusion_attention_grad(*args, **kwargs): + # dx, q, k, v, softmax_res, drop_mask, pse, scale, keep_prob + new_args, dims_kwargs, new_kwargs = npu_fusion_attention_backward_patch(*args, **kwargs) + query, key, value, dx, input_layout = new_args[0], new_args[1], new_args[2], new_args[3], new_args[5] + N1 = dims_kwargs.get("N1") + N2 = dims_kwargs.get("N2") + S1 = dims_kwargs.get("S1") + S2 = dims_kwargs.get("S2") + B = dims_kwargs.get("B") + D = dims_kwargs.get("D") + DTYPE = dims_kwargs.get("DTYPE") + atten_mask = new_kwargs.get("atten_mask") + keep_prob = new_kwargs.get("keep_prob") + sparse_mode = new_kwargs.get("sparse_mode") + pre_tockens = new_kwargs.get("pre_tockens") + next_tockens = new_kwargs.get("next_tockens") + pse = new_kwargs.get("pse") + softmax_max = new_kwargs.get("softmax_max") + softmax_sum = new_kwargs.get("softmax_sum") + scale_value = new_kwargs.get("scale_value") + + atten_mask = generate_atten_mask(sparse_mode, atten_mask, B, N1, S1, S2, pre_tockens, next_tockens, DTYPE) + query = convert_to_bnsd(query, N1, input_layout) + dx = convert_to_bnsd(dx, N1, input_layout) + key = convert_to_bnsd(key, N2, input_layout) + value = convert_to_bnsd(value, N2, input_layout) + k_new, v_new = generate_kv(key, value, N1, N2) + + if softmax_build_mode == "QKV": + softmax_res = rebuid_softmax_by_qkv(query, k_new, atten_mask, pse, scale_value) + else: + softmax_res = rebuild_softmax_by_max_sum(query, k_new, atten_mask, pse, scale_value, softmax_max, softmax_sum) + + dq, dk, dv = fusion_attention_backward(dx, query, k_new, v_new, softmax_res, None, pse, scale_value, keep_prob) + + # N不等长适配by cdy + if not (N1 == N2): + if N2 == 0: + raise ValueError("dims_kwargs.N2 must be non-zero.") + G = int(N1 / N2) + dk = torch.sum(dk.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) + dv = torch.sum(dv.reshape(B, N2, G, S2, D), dim=2, keepdim=True).reshape(B, N2, S2, D) + + if dq.dim() == 5: + dq = dq.reshape(dq.size(0), dq.size(1) * dq.size(2), dq.size(3), dq.size(4)) + if dk.dim() == 5: + dk = dk.reshape(dk.size(0), dk.size(1) * dk.size(2), dk.size(3), dk.size(4)) + if dv.dim() == 5: + dv = dv.reshape(dv.size(0), dv.size(1) * dv.size(2), dv.size(3), dv.size(4)) + + dq = convert_from_bnsd(dq, input_layout) + dk = convert_from_bnsd(dk, input_layout) + dv = convert_from_bnsd(dv, input_layout) + + return dq.cpu(), dk.cpu(), dv.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py new file mode 100644 index 00000000000..e647312fdb2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rms_norm.py @@ -0,0 +1,15 @@ +import torch + + +def npu_rms_norm(x, gamma, epsilon=1e-5): + rstd = torch.rsqrt(torch.mean(torch.pow(x, 2), axis=-1, keepdim=True) + epsilon) + res = x * rstd * gamma + return res.cpu(), rstd.float().cpu() + + +def npu_rms_norm_backward(grad, x, gamma, rstd): + mean_gy = (grad * x * gamma * rstd).mean(dim=-1, keepdim=True) + grad_x = (grad * gamma - x * rstd * mean_gy) * rstd + grad_gamma = x * grad * rstd + return grad_x.cpu(), grad_gamma.cpu() + diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py new file mode 100644 index 00000000000..0e0fda5f73f --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/rotary_mul.py @@ -0,0 +1,52 @@ +import torch + + +def npu_rotary_mul(x, r1, r2): + x1, x2 = torch.chunk(x, 2, -1) + x_new = torch.cat((-x2, x1), dim=-1) + output = r1 * x + r2 * x_new + return output.cpu() + + +def npu_rotary_mul_backward(dy_tensor, x, r1, r2): + x.requires_grad = True + r1.requires_grad = True + r2.requires_grad = True + # golden + x1, x2 = torch.chunk(x, 2, -1) + x_new = torch.cat((-x2, x1), dim=-1) + golden_tensor = r1 * x + r2 * x_new + golden_tensor.backward(dy_tensor) + r1_shape = r1.shape + r1_grad = torch.zeros(r1_shape).type(torch.float32) + r2_grad = torch.zeros(r1_shape).type(torch.float32) + x1, x2 = torch.chunk(x.float(), 2, -1) + x_new2 = torch.cat((-x2, x1), dim=-1) + x_shape = x.shape + h = x.float() + grad = dy_tensor.float() + condition_1 = (((r1_shape[0] == 1 and x_shape[0] != 1) or (r1_shape[0] == 1 and x_shape[0] == 1)) and + ((r1_shape[2] == 1 and x_shape[2] != 1) or (r1_shape[2] == 1 and x_shape[2] == 1)) and + (r1_shape[1] == x_shape[1]) and (r1_shape[3] == x_shape[3])) + condition_2 = (((r1_shape[0] == 1 and x_shape[0] != 1) or (r1_shape[0] == 1 and x_shape[0] == 1)) and + ((r1_shape[1] == 1 and x_shape[1] != 1) or (r1_shape[1] == 1 and x_shape[1] == 1)) and + (r1_shape[2] == x_shape[2]) and (r1_shape[3] == x_shape[3])) + condition_3 = (((r1_shape[2] == 1 and x_shape[2] != 1) or (r1_shape[2] == 1 and x_shape[2] == 1)) and + ((r1_shape[1] == 1 and x_shape[1] != 1) or (r1_shape[1] == 1 and x_shape[1] == 1)) and + (r1_shape[0] == x_shape[0]) and (r1_shape[3] == x_shape[3])) + if condition_1: + for i in range(x_shape[0]): + for j in range(x_shape[2]): + r2_grad[0, :, 0, :] += (x_new2[i, :, j, :] * grad[i, :, j, :]) + r1_grad[0, :, 0, :] += (h[i, :, j, :] * grad[i, :, j, :]) + elif condition_2: + for i in range(x_shape[0]): + for j in range(x_shape[1]): + r2_grad[0, 0, :, :] += (x_new2[i, j, :, :] * grad[i, j, :, :]) + r1_grad[0, 0, :, :] += (h[i, j, :, :] * grad[i, j, :, :]) + elif condition_3: + for i in range(x_shape[1]): + for j in range(x_shape[2]): + r2_grad[:, 0, 0, :] += (x_new2[:, i, j, :] * grad[:, i, j, :]) + r1_grad[:, 0, 0, :] += (h[:, i, j, :] * grad[:, i, j, :]) + return x.grad.cpu(), r1_grad.cpu(), r2_grad.cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py new file mode 100644 index 00000000000..8717aebaf90 --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/scaled_mask_softmax.py @@ -0,0 +1,26 @@ +import torch + + +def npu_scaled_masked_softmax(x, mask, scale, fixed_triu_mask): + if fixed_triu_mask: + mask = (torch.triu(torch.ones(mask.shape), k=1)).bool().to(mask.device) + dtype = x.dtype + x = (x * scale).masked_fill(mask, value=-10000) + x = x - torch.max(x, dim=-1, keepdims=True)[0] + x = torch.exp(x.float()) + y = torch.div(x, torch.sum(x, dim=-1, keepdims=True)) + return y.to(dtype).cpu() + + +def npu_scaled_masked_softmax_backward(y_grad, y, mask, scale, fixed_triu_mask): + if fixed_triu_mask: + mask = (torch.triu(torch.ones(mask.shape), k=1)).bool().to(mask.device) + dtype = y_grad.dtype + y_grad = y_grad.float() + y = y.float() + x_grad = y_grad * y + x_grad = y_grad - torch.sum(x_grad, dim=-1, keepdims=True) + x_grad = x_grad * y + x_grad = x_grad * scale + x_grad = x_grad.masked_fill(mask, value=0) + return x_grad.to(dtype).cpu() diff --git a/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py b/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py new file mode 100644 index 00000000000..e03c975a50a --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/bench_functions/swiglu.py @@ -0,0 +1,55 @@ +import torch + + +def npu_swiglu(x, dim=-1): + tensor_dtype = x.dtype + + inTensors = torch.chunk(x, 2, dim=dim) + if tensor_dtype == torch.float32: + tensor_scalar = torch.sigmoid(torch.mul(inTensors[0], 1.0)) + output_data = torch.mul(torch.mul(tensor_scalar, inTensors[0]), inTensors[1]) + else: + tensor_self_float = inTensors[0].type(torch.float) + tensor_other_float = inTensors[1].type(torch.float) + tensor_out_float = torch.nn.functional.silu(tensor_self_float).type(tensor_dtype).type( + torch.float32) * tensor_other_float + output_data = tensor_out_float.type(tensor_dtype) + return output_data.cpu() + + +def npu_swiglu_backward(grad, x, dim=-1): + tensor_dtype = grad.dtype + in_tensors = torch.chunk(x, 2, dim=dim) + tensor_grad_out = grad + + if tensor_dtype == torch.float16: + tensor_out1 = torch.mul( + torch.mul(in_tensors[1].type(torch.float32), swish_grad(1, in_tensors[0].type(torch.float32))), + tensor_grad_out.type(torch.float32)).type(torch.float16) + tensor_out2 = torch.mul(tensor_grad_out.type(torch.float32), + swish(1, in_tensors[0].type(torch.float32))).type(torch.float16) + output = torch.cat((tensor_out1, tensor_out2), dim) + elif tensor_dtype == torch.bfloat16: + tensor_self_float = in_tensors[0].type(torch.float) + tensor_other_float = in_tensors[1].type(torch.float) + tensor_gradout_float = tensor_grad_out.type(torch.float) + + tensor_out1 = torch.mul(tensor_gradout_float, swish_grad(1.0, tensor_self_float)).type(torch.bfloat16).type( + torch.float32) * tensor_other_float + tensor_out2 = swish(1.0, tensor_self_float).type(torch.bfloat16).type(torch.float32) * tensor_gradout_float + tensor_out_float = torch.cat((tensor_out1, tensor_out2), dim=dim) + output = tensor_out_float.type(torch.bfloat16) + else: + tensor_out1 = torch.mul(torch.mul(in_tensors[1], swish_grad(1.0, in_tensors[0])), tensor_grad_out) + tensor_out2 = torch.mul(tensor_grad_out, swish(1.0, in_tensors[0])) + output = torch.cat((tensor_out1, tensor_out2), dim) + return output.cpu() + + +def swish_grad(beta, x): + return torch.sigmoid(beta * x) + x * (1 - torch.sigmoid(beta * x)) * torch.sigmoid(beta * x) * beta + + +def swish(beta, x): + return x * torch.sigmoid(beta * x) + diff --git a/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py b/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py index 22f79879867..ccad903724c 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/parse_json.py @@ -1,5 +1,7 @@ import json + from msprobe.core.common.exceptions import ParseJsonException +from msprobe.core.common.file_check import FileOpen def parse_json_info_forward_backward(json_path): @@ -11,7 +13,7 @@ def parse_json_info_forward_backward(json_path): api_name = '.'.join(name_struct[:-1]) return api_name - with open(json_path, 'r') as f: + with FileOpen(json_path, 'r') as f: dump_json = json.load(f) real_data_path = dump_json.get("dump_data_dir") diff --git a/debug/accuracy_tools/msprobe/pytorch/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/common/utils.py index acc1de10514..181491488f9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/common/utils.py @@ -14,10 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. """ +import logging import os import random import stat import torch +import torch.distributed as dist import numpy as np from functools import wraps from msprobe.core.common.exceptions import DistributedNotInitializedError @@ -221,3 +223,36 @@ class Const: CONVERT_API = { "int32_to_int64": ["cross_entropy"] } + + +def get_tensor_rank(in_feat, out_feat): + if dist.is_initialized(): + return dist.get_rank() + + def get_tensor_rank_single(x): + if isinstance(x, (list, tuple)): + if len(x) > 0: + return get_tensor_rank_single(x[0]) + elif isinstance(x, torch.Tensor): + device = x.device + if device.type != 'cpu': + return device.index + return None + + in_rank = get_tensor_rank_single(in_feat) + out_rank = get_tensor_rank_single(out_feat) + tensor_rank = in_rank if in_rank else out_rank + return tensor_rank + + +def _create_logger(level=logging.INFO): + logger_ = logging.getLogger() + logger_.setLevel(level) + ch = logging.StreamHandler() + ch.setLevel(level) + logger_.addHandler(ch) + return logger_ + + +log_level = logging.DEBUG if os.environ.get("API_ACCURACY_CHECK_LOG_LEVEL") == "1" else logging.INFO +logger = _create_logger(log_level) diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py index e214910566e..2a68c756ed3 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/acc_compare.py @@ -492,7 +492,7 @@ def compare_by_op(op_name, op_name_mapping_dict, input_parma): error_file = error.filename n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True - except FileCheckerException: + except FileCheckException: error_file = data_name n_value, b_value = CompareConst.READ_NONE, CompareConst.READ_NONE error_flag = True @@ -645,7 +645,11 @@ def highlight_rows_xlsx(result_df, highlight_dict, file_path): elif (i - 2) in highlight_dict['yellow_rows']: ws.cell(row=i, column=j).fill = PatternFill(start_color=CompareConst.YELLOW, end_color=CompareConst.YELLOW, fill_type="solid") - wb.save(file_path) + try: + wb.save(file_path) + except Exception as e: + logger.error('Save result file failed') + raise CompareException(CompareException.WRITE_FILE_ERROR) from e change_mode(file_path, FileCheckConst.DATA_FILE_AUTHORITY) @@ -655,8 +659,8 @@ def compare(input_parma, output_path, stack_mode=False, auto_analyze=True, summary_compare, md5_compare = task_dumppath_get(input_parma) check_configuration_param(stack_mode, auto_analyze, fuzzy_match) create_directory(output_path) - check_compare_param(input_parma, output_path, stack_mode, summary_compare, md5_compare) - except CompareException as error: + check_compare_param(input_parma, output_path, summary_compare, md5_compare) + except (CompareException, FileCheckException) as error: logger.error('Compare failed. Please check the arguments and do it again!') sys.exit(error.code) compare_core(input_parma, output_path, stack_mode=stack_mode, @@ -764,9 +768,14 @@ def op_item_parse(item, op_name, index, item_list=None, top_bool=True): else: full_op_name = op_name else: - full_op_name = op_name + '.' + str(index) + full_op_name = op_name + Const.SEP + str(index) if isinstance(item, dict): - if 'dtype' in item: + if 'type' not in item: + for kwarg in item: + kwarg_parsed_list = op_item_parse(item[kwarg], op_name + Const.SEP + kwarg, None) + item_list += kwarg_parsed_list + kwarg_parsed_list.clear() + elif 'dtype' in item: parsed_item = item parsed_item['full_op_name'] = full_op_name item_list.append(parsed_item) @@ -869,13 +878,13 @@ def read_op(op_data, op_name): op_parsed_list += output_parsed_list output_parsed_list.clear() if 'backward' in op_name: - if 'grad_input' in op_data: - input_item = op_data['grad_input'] + if 'input' in op_data: + input_item = op_data['input'] input_parsed_list = op_item_parse(input_item, op_name + '_input', None) op_parsed_list = input_parsed_list.copy() input_parsed_list.clear() - if 'grad_output' in op_data: - output_item = op_data['grad_output'] + if 'output' in op_data: + output_item = op_data['output'] output_parsed_list = op_item_parse(output_item, op_name + '_output', None) op_parsed_list += output_parsed_list output_parsed_list.clear() diff --git a/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py index 0298eca9e7e..caac1395807 100644 --- a/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py +++ b/debug/accuracy_tools/msprobe/pytorch/compare/distributed_compare.py @@ -21,6 +21,7 @@ from msprobe.core.common.utils import CompareException, check_compare_param, \ check_configuration_param, task_dumppath_get, check_file_or_directory_path, check_regex_prefix_format_valid from msprobe.pytorch.compare.acc_compare import compare_core from msprobe.core.common.file_check import create_directory +from msprobe.core.common.exceptions import FileCheckException from msprobe.pytorch.common.log import logger @@ -86,12 +87,11 @@ def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs): 'or use compare() api and manually match the ranks.') raise CompareException(CompareException.INVALID_PATH_ERROR) for nr, br in zip(npu_ranks, bench_ranks): - n_dir = os.path.join(npu_dump_dir, nr) - b_dir = os.path.join(bench_dump_dir, br) - s_dir = b_dir - npu_json_path = extract_json(n_dir, stack_json=False) - bench_json_path = extract_json(b_dir, stack_json=False) - stack_json_path = extract_json(s_dir, stack_json=True) + npu_data_dir = os.path.join(npu_dump_dir, nr) + bench_data_dir = os.path.join(bench_dump_dir, br) + npu_json_path = extract_json(npu_data_dir, stack_json=False) + bench_json_path = extract_json(bench_data_dir, stack_json=False) + stack_json_path = extract_json(npu_data_dir, stack_json=True) dump_result_param = { 'npu_json_path': npu_json_path, @@ -103,8 +103,8 @@ def compare_distributed(npu_dump_dir, bench_dump_dir, output_path, **kwargs): summary_compare, md5_compare = task_dumppath_get(dump_result_param) check_configuration_param(stack_mode, auto_analyze, fuzzy_match) create_directory(output_path) - check_compare_param(dump_result_param, output_path, stack_mode=stack_mode, summary_compare=summary_compare) - except CompareException as error: + check_compare_param(dump_result_param, output_path, summary_compare=summary_compare, md5_compare=md5_compare) + except (CompareException, FileCheckException) as error: logger.error('Compare failed. Please check the arguments and do it again!') sys.exit(error.code) compare_core(dump_result_param, output_path, suffix=f'_{nr}-{br}', summary_compare=summary_compare, diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py index cfc588e1e97..f1289e9b013 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/debugger_config.py @@ -21,7 +21,7 @@ class DebuggerConfig: self.acl_config = common_config.acl_config if common_config.acl_config else "" self.is_forward_acl_dump = True self.summary_mode = task_config.summary_mode if task_config.summary_mode else Const.STATISTICS - self.overflow_num = task_config.overflow_num if task_config.overflow_num else 1 + self.overflow_nums = task_config.overflow_nums if task_config.overflow_nums else 1 self.framework = Const.PT_FRAMEWORK if self.task == Const.FREE_BENCHMARK: diff --git a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py index 137c51895d0..6119bbd1d4f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/pytorch/debugger/precision_debugger.py @@ -27,6 +27,7 @@ class PrecisionDebugger: step=None, ): if not hasattr(self, "initialized"): + self.api_origin = False self.initialized = True self.model = self.check_model_valid(model) common_config, task_config = parse_json_config(config_path, task) diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md b/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md index b3ed4a9e24c..41b97098ae9 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/api_accuracy_checker.md @@ -21,7 +21,7 @@ 精度预检操作流程如下: 1. 在NPU和GPU环境下分别安装msprobe工具。详见《[MindStudio精度调试工具](../../README.md)》的“工具安装”章节。 -2. 在NPU训练脚本内添加msprobe工具dump接口PrecisionDebugger采集待预检数据。详见《[精度数据采集](./dump.md)》。 +2. 在NPU训练脚本内添加msprobe工具dump接口PrecisionDebugger,采集待预检数据。详见《[精度数据采集](./dump.md)》,注意需要配置level="L1"。 3. 将NPU环境下dump的预检数据拷贝至GPU环境。 4. 在NPU和GPU环境下分别执行run_ut,生成结果用于最终api_precision_compare操作的输入。详见“**run_ut预检操作**”。 5. 将NPU和GPU执行run_ut生成的`accuracy_checking_details_{timestamp}.csv`结果文件拷贝至同一环境下。 @@ -51,10 +51,12 @@ run_ut预检操作包括如下场景: | -api_info或--api_info_file | 指定API信息文件dump.json。 | 是 | | -save_error_data | 保存精度未达标的API输入输出数据。 | 否 | | -o或--out_path | 指定run_ut执行结果存盘路径,默认“./”(相对于run_ut的路径)。 | 否 | + | | | | | -j或--jit_compile | 开启jit编译。 | 否 | | -d或--device | 指定Device ID,选择UT代码运行所在的卡,默认值为0。 | 否 | | -csv_path或--result_csv_path | 指定本次运行中断时生成的`accuracy_checking_result_{timestamp}.csv`文件路径,执行run_ut中断时,若想从中断处继续执行,配置此参数即可。需要指定为上次中断的`accuracy_checking_result_{timestamp}.csv`文件。详见“**断点续检**”。 | run_ut操作中断后继续执行场景下必选 | | -f或--filter_api | 过滤模型中除最大值和最小值以外其他参数和结构相同的API。适用于模型较大且重复API较多的场景。 | 否 | + | -config或--config_path | 指定预检操作过程中的额外配置(包括黑名单、白名单等)的[config.json](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe/config)文件,默认未配置。config.json文件的配置可参考《[配置文件说明](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/config/README.md#pytorch场景task配置为run_ut)》。 | 否 | run_ut执行结果包括`accuracy_checking_result_{timestamp}.csv`和`accuracy_checking_details_{timestamp}.csv`两个文件。`accuracy_checking_result_{timestamp}.csv`是API粒度的,标明每个API是否通过测试。建议用户先查看`accuracy_checking_result_{timestamp}.csv`文件,对于其中没有通过测试的或者特定感兴趣的API,根据其API name字段在`accuracy_checking_details_{timestamp}.csv`中查询其各个输出的达标情况以及比较指标。详细介绍请参见“**预检结果**”。 @@ -64,7 +66,7 @@ run_ut预检操作包括如下场景: msprobe -f pytorch run_ut -api_info ./dump.json -save_error_data ``` - 数据默认会存盘到'./ut_error_data{timestamp}'路径下(相对于启动run_ut的路径),有需要的话,用户可以通过修改mstt/debug/accuracy_tools/api_accuracy_checker目录下,config.yaml文件的error_data_path参数来配置保存路径,详见“config.yaml文件说明”。 + 数据默认会存盘到'./ut_error_data{timestamp}'路径下(相对于启动run_ut的路径),有需要的话,用户可以通过error_data_path参数来配置保存路径,error_data_path参数在[config.json](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe/config)文件或config.yaml文件配置,config.json文件需要在run_ut操作时通过-config参数指定,config.yaml文件详见“**config.yaml文件说明**”。 #### 使用multi_run_ut.py执行多线程预检 @@ -99,23 +101,65 @@ msprobe -f pytorch multi_run_ut -api_info ./dump.json -n 32 -d 0 1 2 3 msprobe -f pytorch run_ut -api_info ./dump.json -csv_path /home/xxx/ut/accuracy_checking_result_{timestamp}.csv ``` -#### API预检白名单 +#### API预检黑名单和白名单 -run_ut过程支持API预检白名单,操作方式如下: +run_ut过程支持API预检黑名单和白名单,通过如下文件配置black_list(黑名单)或white_list(白名单)参数来指定不需要或需要预检的API名称: -修改mstt/debug/accuracy_tools/api_accuracy_checker目录下config.yaml文件的white_list参数,配置需要预检的API名称,详见“config.yaml文件说明”。 +- 配置[config.json](https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe/config)文件,config.json文件需要在run_ut操作时通过-config参数指定。 +- 配置config.yaml文件,详见“**config.yaml文件说明**”。 + +config.json文件的优先级高于config.yaml文件,即执行config.json文件时,config.yaml文件的配置不生效。 ### config.yaml文件说明 -config.yaml文件可以通过配置参数来控制dump和run_ut操作的白名单等功能。 +config.yaml文件可以通过配置参数来控制dump和run_ut操作的白名单、黑名单等功能。操作步骤如下: + +1. 查找msprobe工具安装路径。 + + ```bash + pip show mindstudio-probe + ``` + + 输出结果如下示例: + + ```bash + Name: mindstudio-probe + Version: 1.0 + Summary: This is a pytorch precision comparison tools + Home-page: + Author: + Author-email: + License: + Location: /home/xx/anaconda3/envs/pt21py38/lib/python3.8/site-packages + Requires: numpy, openpyxl, pandas, pyyaml, rich, tqdm, wheel + Required-by: + ``` + + Location字段为msprobe工具的安装路径,那么config.yaml文件位置为/home/xx/anaconda3/envs/pt21py38/lib/python3.8/site-packages/msprobe/pytorch/api_accuracy_checker/config.yaml + +2. 进入config.yaml文件 + + ```bash + vi /home/xx/anaconda3/envs/pt21py38/lib/python3.8/site-packages/msprobe/pytorch/api_accuracy_checker/config.yaml + ``` + +3. 修改config.yaml文件参数。 + + ```yaml + white_list: [] + black_list: [] + error_data_path: './' + precision: 14 + ``` -文件路径为:mstt/debug/accuracy_tools/msprobe/pytorch/api_accuracy_checker/config.yaml + | 参数名称 | 说明 | 是否必选 | + | --------------- | ------------------------------------------------------------ | -------- | + | white_list | API dump白名单,仅对指定的API进行dump。参数示例:white_list=["conv1d", "conv2d"]。默认未配置白名单,即dump全量API数据。 | 否 | + | black_list | API dump黑名单,被指定的API不进行dump。参数示例:black_list=["conv1d", "conv2d"]。默认未配置黑名单,即dump全量API数据。 | 否 | + | error_data_path | 配置保存精度未达标的API输入输出数据路径。参数示例"error_data_path": "./"。默认为当前路径。 | 否 | + | precision | 浮点数表示位数,默认取小数点后14位。 | 否 | -| 参数名称 | 说明 | 是否必选 | -| --------------- | ------------------------------------------------------------ | -------- | -| white_list | API dump白名单,指定dump具体API数据,也可以直接配置预检的API白名单,详细请参见“**API预检白名单**”。参数示例:white_list=["conv1d", "conv2d"]。默认未配置白名单,即dump全量API数据。 | 否 | -| error_data_path | 配置保存精度未达标的API输入输出数据路径。 | 否 | -| precision | 浮点数表示位数,默认取小数点后14位。 | 否 | + 说明:white_list和black_list同时配置时,二者配置的API名单若无交集,则白名单生效,若API名单存在交集,则白名单排除的部分以及交集的API不进行dump。 ## 预检结果 diff --git a/debug/accuracy_tools/msprobe/pytorch/doc/dump.md b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md index 7d0763b6848..7e393cd1026 100644 --- a/debug/accuracy_tools/msprobe/pytorch/doc/dump.md +++ b/debug/accuracy_tools/msprobe/pytorch/doc/dump.md @@ -12,7 +12,7 @@ msprobe工具主要通过在训练脚本内添加dump接口并启动训练的方 通过加载dump配置文件的方式来确定dump操作的详细配置。 -可以在from msprobe.pytorch import PrecisionDebugger和模型初始化之间的任意位置添加该接口。 +PrecisionDebugger接口可以在from msprobe.pytorch import PrecisionDebugger之后的位置添加。详细使用可参考“**示例代码**”或“**model配置代码示例**”。 **原型** @@ -20,7 +20,7 @@ msprobe工具主要通过在训练脚本内添加dump接口并启动训练的方 PrecisionDebugger(config_path=None, task=None, dump_path=None, level=None, model=None, step=None) ``` -说明:上述参数除config_path和model外,其他参数均在[config.json](../../config)文件中可配,此处的参数优先级高于[config.json](../../config)文件中的配置,而config.json文件可以配置更多参数,若需要进行更多场景的精度数据dump,建议配置[config.json](../../config)文件。 +说明:上述参数除config_path和model外,其他参数均在[config.json](../../config)文件中可配,此处的参数优先级高于[config.json](../../config)文件中的配置,而config.json文件可以配置更多参数,若需要进行更多场景的精度数据dump,建议配置[config.json](../../config)文件。config.json文件的配置可参考《[配置文件说明](https://gitee.com/ascend/mstt/blob/master/debug/accuracy_tools/msprobe/config/README.md)》。 **参数说明** @@ -77,9 +77,9 @@ if __name__ == "__main__" **功能说明** -启动函数。 +dump启动函数。 -在模型初始化之后的任意位置添加。 +在模型初始化之后的位置添加。需要与stop函数一起添加在for循环内。 **原型** @@ -93,9 +93,9 @@ debugger.start() **功能说明** -停止函数。 +dump停止函数。 -在**start**函数之后的任意位置添加。 +在**start**函数之后的任意位置添加。若需要dump反向数据,则需要添加在反向计算代码(如loss.backward)之后。 **原型** @@ -105,13 +105,33 @@ debugger.stop() 该函数为类函数,可以使用debugger.stop()也可以使用PrecisionDebugger.stop()。 +### forward_backward_dump_end函数 + +**功能说明** + +dump停止函数。用于dump指定代码的前反向数据。 + +在**start**函数之后,反向计算代码(如loss.backward)之前的任意位置添加,可以dump **start**函数和该函数之间的前反向数据,可以通过调整**start**函数与该函数的位置,来指定需要dump的代码块。 + +要求**stop**函数添加在反向计算代码(如loss.backward)之后,此时该函数与**stop**函数之间的代码不会被dump。 + +使用示例参见“**示例代码 > 扩展示例**”。 + +**原型** + +```Python +forward_backward_dump_end() +``` + +该函数为类函数,可以使用debugger.forward_backward_dump_end()也可以使用PrecisionDebugger.forward_backward_dump_end()。 + ### step函数 **功能说明** 结束标识。 -在最后一个**stop**函数后或一个step结束的位置添加。 +在最后一个**stop**函数后或一个step结束的位置添加。需要与start函数一起添加在for循环内。 **原型** @@ -123,24 +143,57 @@ debugger.step() ## 示例代码 +### 基础操作 + +如下示例可dump完整代码的前反向数据。 + ```Python from msprobe.pytorch import PrecisionDebugger + +# 请勿将PrecisionDebugger的初始化流程插入到循环代码中 debugger = PrecisionDebugger(config_path="./config.json", dump_path="./dump_path") -# 请勿将以上初始化流程插入到循环代码中 -# 模型初始化 -# 下面代码也可以用PrecisionDebugger.start()和PrecisionDebugger.stop() -debugger.start() +# 模型、损失函数的定义及初始化等操作 +# ... -# 需要dump的代码片段1 +# 数据集迭代的位置一般为模型训练开始的位置 +for data, label in data_loader: + debugger.start() # 开启数据dump -debugger.stop() -debugger.start() + # 如下是模型每个step执行的逻辑 + output = model(data) + #... + loss.backward() + + debugger.stop() # 关闭数据dump + debugger.step() # 结束一个step的dump +``` -# 需要dump的代码片段2 +### 扩展示例 -debugger.stop() -debugger.step() +如下示例dump指定代码块前反向数据。 + +```Python +from msprobe.pytorch import PrecisionDebugger + +# 请勿将PrecisionDebugger的初始化流程插入到循环代码中 +debugger = PrecisionDebugger(config_path="./config.json", dump_path="./dump_path") + +# 模型、损失函数的定义及初始化等操作 +# ... + +# 数据集迭代的位置一般为模型训练开始的位置 +for data, label in data_loader: + debugger.start() # 开启数据dump + + # 如下是模型每个step执行的逻辑 + output = model(data) + debugger.forward_backward_dump_end() # 插入该函数到start函数之后,只dump start函数到该函数之间代码的前反向数据,本函数到stop函数之间的数据则不dump + #... + loss.backward() + + debugger.stop() # 关闭数据dump + debugger.step() # 结束一个step的dump ``` ## dump结果文件介绍 diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py index e737e7b2179..c5e93be138d 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/constant.py @@ -52,6 +52,7 @@ class ThresholdConfig: DTYPE_PER_THD = { torch.float16: 1.002, + torch.bfloat16: 1.004, torch.float32: 1.0002, } BENCHMARK_THD_DICT = { @@ -60,6 +61,8 @@ class ThresholdConfig: torch.bfloat16: BenchmarkThd(2**-8, 1.0, 2**-8, 1e-4), } + TENSOR_SPLIT_MAX_CHUNK = 128 + class PreheatConfig: IF_PREHEAT = "if_preheat" diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py index ddcbd9d0f5c..631beeb85cb 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/common/utils.py @@ -96,3 +96,7 @@ class TorchC: add = torch._C._VariableFunctionsClass.add bitwise_xor = torch._C._VariableFunctionsClass.bitwise_xor clone = torch._C._VariableFunctionsClass.clone + clamp = torch._C._VariableFunctionsClass.clamp + tensor_split = torch._C._VariableFunctionsClass.tensor_split + stack = torch._C._VariableFunctionsClass.stack + reshape = torch._C._VariableFunctionsClass.reshape diff --git a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py index 1728b096f5b..e36f5867355 100644 --- a/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +++ b/debug/accuracy_tools/msprobe/pytorch/free_benchmark/result_handlers/base_handler.py @@ -1,6 +1,7 @@ import math from abc import ABC, abstractmethod from typing import Any, Optional, Tuple +import numpy as np import torch from msprobe.core.common.const import Const @@ -34,15 +35,36 @@ class FuzzHandler(ABC): origin_ouput = origin_ouput.values perturbed_output = perturbed_output.values if hasattr(perturbed_output, "dtype"): - abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(perturbed_output.dtype) + abs_tol = ThresholdConfig.ABS_TOL_VALUE_DICT.get(perturbed_output.dtype, FuzzThreshold.F32_THD) else: - abs_tol = FuzzThreshold.F32_THD.value + abs_tol = FuzzThreshold.F32_THD return ( origin_ouput.to(perturbed_output.dtype).to(perturbed_output.device), perturbed_output, abs_tol, ) + @staticmethod + def tensor_split_for_error_calculate(origin_output, perturbed_output): + """ + 对将投入误差值计算的扰动前后输出张量进行分块 + :param origin_output: 原始输出 + :param perturbed_output: 扰动后输出 + :return origin_output_chunks: 切块后原始输出列表 + :return perturbed_output_chunks: 切块后扰动后输出列表 + """ + single_output_mem = origin_output.element_size() * origin_output.nelement() / Const.ONE_MB + if single_output_mem == 0 or origin_output.ndim == 0: + return [origin_output], [perturbed_output] + # 张量大小和批数之间的关系:chunks_exp=math.log(M,2)-4, chunks=2**chunks_exp (M为对比张量数据大小[Mb]) + chunks_exp = int(math.log(single_output_mem, 2)) - 4 + chunks = 2 ** chunks_exp + chunks = max(chunks, 1) + chunks = min(chunks, ThresholdConfig.TENSOR_SPLIT_MAX_CHUNK) + origin_output_chunks = TorchC.tensor_split(TorchC.reshape(origin_output, (-1,)), chunks) + perturbed_output_chunks = TorchC.tensor_split(TorchC.reshape(perturbed_output, (-1,)), chunks) + return origin_output_chunks, perturbed_output_chunks + @staticmethod def convert_overflow_ratio_to_consistent(ratio): if math.isnan(ratio) or math.isinf(ratio): @@ -61,36 +83,28 @@ class FuzzHandler(ABC): self, origin_output, perturbed_output, norm_type, abs_tol ): if norm_type == NormType.ENDLESS_NORM: - return self.get_endless_norm(origin_output, perturbed_output, abs_tol) + return self.calculate_error(origin_output, perturbed_output, abs_tol) return ThresholdConfig.COMP_CONSISTENT - def get_endless_norm(self, origin_output, perturbed_output, abs_tol): - ratio_tensor1 = TorchC.where( - TorchC.gt(TorchC.abs(perturbed_output), abs_tol), - TorchC.div( - TorchC.abs(origin_output), - TorchC.add(TorchC.abs(perturbed_output), abs_tol), - ), - 1, - ) - ratio_tensor2 = TorchC.where( - TorchC.gt(TorchC.abs(origin_output), abs_tol), - TorchC.div( - TorchC.abs(perturbed_output), - TorchC.add(TorchC.abs(origin_output), abs_tol), - ), - 1, - ) + def calculate_error(self, origin_output, perturbed_output, abs_tol): + origin_output_chunks, perturbed_output_chunks = self.tensor_split_for_error_calculate(origin_output, perturbed_output) + norm1 = -np.inf + norm2 = -np.inf + norm3 = np.inf + for i, chunk_origin in enumerate(origin_output_chunks): + if chunk_origin.nelement() == 0: + break + chunk_perturbed = perturbed_output_chunks[i] + ratio_tensor1 = TorchC.where(TorchC.abs(chunk_perturbed) > abs_tol, + TorchC.div(TorchC.clamp(chunk_origin, min=abs_tol), TorchC.clamp(chunk_perturbed, min=abs_tol)), 1) + ratio_tensor2 = TorchC.where(TorchC.abs(chunk_origin) > abs_tol, + TorchC.div(TorchC.clamp(chunk_perturbed, min=abs_tol), TorchC.clamp(chunk_origin, min=abs_tol)), 1) + norm_values = TorchC.stack([TorchC.max(ratio_tensor1), TorchC.max(ratio_tensor2)]) + max_ratio1, max_ratio2 = norm_values.tolist() + norm1 = max(norm1, self.convert_overflow_ratio_to_consistent(max_ratio1)) + norm2 = max(norm2, self.convert_overflow_ratio_to_consistent(max_ratio2)) + norm3 = min(norm3, self.convert_overflow_ratio_to_consistent(max_ratio1)) - norm1 = self.convert_overflow_ratio_to_consistent( - TorchC.max(ratio_tensor1).item() - ) - norm2 = self.convert_overflow_ratio_to_consistent( - TorchC.max(ratio_tensor2).item() - ) - norm3 = self.convert_overflow_ratio_to_consistent( - TorchC.min(ratio_tensor1).item() - ) if norm3 < 0: ratio = ThresholdConfig.SYMBOL_FLIPPING else: diff --git a/debug/accuracy_tools/msprobe/pytorch/function_factory.py b/debug/accuracy_tools/msprobe/pytorch/function_factory.py new file mode 100644 index 00000000000..c2fd8bfd0cb --- /dev/null +++ b/debug/accuracy_tools/msprobe/pytorch/function_factory.py @@ -0,0 +1,75 @@ +from msprobe.pytorch.common.utils import logger +from msprobe.pytorch.bench_functions.apply_adam_w import npu_apply_adam_w +from msprobe.pytorch.bench_functions.confusion_transpose import npu_confusion_transpose, \ + npu_confusion_transpose_backward +from msprobe.pytorch.bench_functions.fast_gelu import fast_gelu, npu_fast_gelu_backward +from msprobe.pytorch.bench_functions.layer_norm_eval import npu_layer_norm_eval +from msprobe.pytorch.bench_functions.linear import npu_linear, npu_linear_backward +from msprobe.pytorch.bench_functions.matmul_backward import matmul_backward +from msprobe.pytorch.bench_functions.npu_fusion_attention import npu_fusion_attention, npu_fusion_attention_grad +from msprobe.pytorch.bench_functions.rms_norm import npu_rms_norm, npu_rms_norm_backward +from msprobe.pytorch.bench_functions.rotary_mul import npu_rotary_mul, npu_rotary_mul_backward +from msprobe.pytorch.bench_functions.scaled_mask_softmax import npu_scaled_masked_softmax, \ + npu_scaled_masked_softmax_backward +from msprobe.pytorch.bench_functions.swiglu import npu_swiglu, npu_swiglu_backward, swish_grad, swish + + +class Register(dict): + def __init__(self, *args, **kwargs): + super(Register, self).__init__(*args, **kwargs) + self._dict = {} + + def __call__(self, target_func_list): + for target in target_func_list: + self.register(target) + return + + def __setitem__(self, key, value): + self._dict[key] = value + + def __getitem__(self, key): + return self._dict[key] + + def __contains__(self, key): + return key in self._dict + + def __str__(self): + return str(self._dict) + + def keys(self): + return self._dict.keys() + + def values(self): + return self._dict.values() + + def items(self): + return self._dict.items() + + def register(self, target): + + def add_register_item(key, value): + if key in self._dict: + logger.warning(f"{value.__name__} has been registered before, so we will overriden it.") + self[key] = value + return value + + if callable(target): + return add_register_item(target.__name__, target) + else: + raise Exception(f"The func {target} is not callable.") + + +# register for npu custom bench functions +npu_custom_functions = Register() +npu_custom_functions([ + npu_apply_adam_w, npu_confusion_transpose, fast_gelu, npu_layer_norm_eval, npu_linear, npu_fusion_attention, + npu_rms_norm, npu_rotary_mul, npu_scaled_masked_softmax, npu_swiglu +]) + +# register for npu custom backward bench functions +npu_custom_grad_functions = Register() +npu_custom_grad_functions([ + npu_confusion_transpose_backward, npu_fast_gelu_backward, npu_linear_backward, matmul_backward, + npu_fusion_attention_grad, npu_rms_norm_backward, npu_rotary_mul_backward, npu_scaled_masked_softmax_backward, + npu_swiglu_backward +]) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py index 6693a09d028..ff6427e51e5 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/hook_module.py @@ -17,9 +17,11 @@ import functools import threading + import torch import torch.nn as nn import torch.utils.hooks as full_hooks + from msprobe.core.common.const import Const @@ -61,6 +63,10 @@ class HOOKModule(nn.Module): HOOKModule.inner_stop_hook[self.current_thread] = False return result + @classmethod + def reset_module_stats(cls): + cls.module_count = {} + def _call_func(self, *input, **kwargs): full_backward_hooks, non_full_backward_hooks = [], [] if len(self._backward_hooks) > 0: diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml index d64c577ff38..f68708e945e 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/support_wrap_ops.yaml @@ -1873,4 +1873,5 @@ distributed: - reduce_scatter - _reduce_scatter_base - _all_gather_base - - all_to_all_single \ No newline at end of file + - all_to_all_single + - all_to_all \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py index 4617e4854fc..a02abbe5f4b 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_aten.py @@ -24,12 +24,14 @@ from msprobe.pytorch.hook_module.hook_module import HOOKModule from msprobe.pytorch.common.utils import torch_device_guard from msprobe.core.common.const import Const from msprobe.core.common.file_check import FileOpen - +from msprobe.pytorch.function_factory import npu_custom_grad_functions cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") with FileOpen(yaml_path, 'r') as f: - WrapAtenOps = yaml.safe_load(f).get('aten') + Ops = yaml.safe_load(f) + WrapAtenOps = Ops.get('aten') + WhiteAtenOps = Ops.get('white_aten_ops', []) aten_func = {} @@ -48,7 +50,7 @@ class HOOKAtenOP(object): class AtenOPTemplate(HOOKModule): - def __init__(self, op, hook): + def __init__(self, op, hook, need_hook=True): if isinstance(op, torch._ops.OpOverloadPacket): op_name_ = op._qualified_op_name.split("::")[-1] else: @@ -58,10 +60,21 @@ class AtenOPTemplate(HOOKModule): op_name_ = op_name_ + '.' + overload_name self.op = op self.prefix_op_name_ = "Aten" + Const.SEP + str(op_name_) + Const.SEP - super().__init__(hook) + self.need_hook = need_hook + if self.need_hook: + super().__init__(hook) @torch_device_guard def forward(self, *args, **kwargs): + if isinstance(self.op, str): + if self.op in npu_custom_grad_functions: + return npu_custom_grad_functions[self.op](*args, **kwargs) + if self.op in WhiteAtenOps: + return eval(f"torch.ops.aten.{self.op}")(*args, **kwargs) + if self.op not in aten_func: + raise Exception(f"Skip op[{self.op}] accuracy check, because the op is not " + f"in dir(torch.ops.aten) and support yaml.") + return aten_func[self.op](*args, **kwargs) return self.op(*args, **kwargs) diff --git a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py index 992713bce57..8a67ed94290 100644 --- a/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py +++ b/debug/accuracy_tools/msprobe/pytorch/hook_module/wrap_npu_custom.py @@ -17,19 +17,26 @@ import os import torch -import torch_npu import yaml from msprobe.pytorch.hook_module.hook_module import HOOKModule from msprobe.pytorch.common.utils import torch_device_guard, torch_without_guard_version from msprobe.core.common.const import Const from msprobe.core.common.file_check import FileOpen +from msprobe.pytorch.function_factory import npu_custom_functions cur_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(cur_path, "support_wrap_ops.yaml") with FileOpen(yaml_path, 'r') as f: WrapNpuOps = yaml.safe_load(f).get('torch_npu') +try: + import torch_npu +except ImportError: + is_gpu = True +else: + is_gpu = False + def get_npu_ops(): global WrapNpuOps @@ -46,13 +53,19 @@ class HOOKNpuOP(object): class NpuOPTemplate(HOOKModule): - def __init__(self, op_name, hook): + def __init__(self, op_name, hook, need_hook=True): self.op_name_ = op_name self.prefix_op_name_ = "NPU" + Const.SEP + str(op_name) + Const.SEP - super().__init__(hook) + self.need_hook = need_hook + if need_hook: + super().__init__(hook) @torch_device_guard def forward(self, *args, **kwargs): + if not self.need_hook: + if self.op_name_ not in npu_custom_functions: + raise Exception(f'There is not bench function {self.op_name_}') + return npu_custom_functions[self.op_name_](*args, **kwargs) if torch_without_guard_version: return getattr(torch.ops.npu, str(self.op_name_))(*args, **kwargs) else: @@ -60,7 +73,6 @@ class NpuOPTemplate(HOOKModule): def wrap_npu_op(op_name, hook): - def npu_op_template(*args, **kwargs): return NpuOPTemplate(op_name, hook)(*args, **kwargs) diff --git a/debug/accuracy_tools/msprobe/pytorch/module_processer.py b/debug/accuracy_tools/msprobe/pytorch/module_processer.py index 422d36d6ac7..f9368a08745 100644 --- a/debug/accuracy_tools/msprobe/pytorch/module_processer.py +++ b/debug/accuracy_tools/msprobe/pytorch/module_processer.py @@ -1,15 +1,17 @@ from functools import wraps + import torch from torch.utils.hooks import BackwardHook + from msprobe.core.common.const import Const from msprobe.core.data_dump.scope import ModuleRangeScope class ModuleProcesser: + module_count = {} module_stack = [] api_parent_node = "" module_node = {} - current_module_name = "" def __init__(self, scope): if isinstance(scope, ModuleRangeScope): @@ -19,7 +21,6 @@ class ModuleProcesser: BackwardHook.setup_input_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_input_hook) BackwardHook.setup_output_hook = ModuleProcesser.clone_return_value(BackwardHook.setup_output_hook) BackwardHook.setup_output_hook = ModuleProcesser.filter_tensor_and_tuple(BackwardHook.setup_output_hook) - self.module_count = {} @staticmethod def filter_tensor_and_tuple(func): @@ -55,11 +56,26 @@ class ModuleProcesser: else: return result + @staticmethod + def module_count_func(module_name): + if module_name not in ModuleProcesser.module_count: + ModuleProcesser.module_count[module_name] = 0 + else: + ModuleProcesser.module_count[module_name] += 1 + return ModuleProcesser.module_count[module_name] + + @classmethod + def reset_module_stats(cls): + cls.module_count = {} + cls.module_stack = [] + cls.api_parent_node = "" + cls.module_node = {} + def node_hook(self, name_prefix, start_or_stop, **kwargs): def pre_hook(module, input, output=None): try: - index = self.module_count_func(name_prefix) + index = ModuleProcesser.module_count_func(name_prefix) except IndexError as e: index = None pass @@ -89,10 +105,3 @@ class ModuleProcesser: return pre_hook else: return end_hook - - def module_count_func(self, module_name): - if module_name not in self.module_count: - self.module_count[module_name] = 0 - else: - self.module_count[module_name] += 1 - return self.module_count[module_name] diff --git a/debug/accuracy_tools/msprobe/pytorch/pt_config.py b/debug/accuracy_tools/msprobe/pytorch/pt_config.py index a3d765f3a4d..ceec92a633a 100644 --- a/debug/accuracy_tools/msprobe/pytorch/pt_config.py +++ b/debug/accuracy_tools/msprobe/pytorch/pt_config.py @@ -32,12 +32,12 @@ class StatisticsConfig(BaseConfig): class OverflowCheckConfig(BaseConfig): def __init__(self, json_config): super().__init__(json_config) - self.overflow_num = json_config.get("overflow_nums") + self.overflow_nums = json_config.get("overflow_nums") self.check_mode = json_config.get("check_mode") self.check_overflow_config() def check_overflow_config(self): - if self.overflow_num is not None and not isinstance(self.overflow_num, int): + if self.overflow_nums is not None and not isinstance(self.overflow_nums, int): raise Exception("overflow_num is invalid") if self.check_mode is not None and self.check_mode not in ["all", "aicore", "atomic"]: raise Exception("check_mode is invalid") diff --git a/debug/accuracy_tools/msprobe/pytorch/service.py b/debug/accuracy_tools/msprobe/pytorch/service.py index daeda889879..6b8d67abc9f 100644 --- a/debug/accuracy_tools/msprobe/pytorch/service.py +++ b/debug/accuracy_tools/msprobe/pytorch/service.py @@ -2,17 +2,18 @@ import functools import os from pathlib import Path -from msprobe.pytorch.common.log import logger -from msprobe.core.common.file_check import FileChecker, check_path_before_create from msprobe.core.common.const import Const, FileCheckConst from msprobe.core.common.exceptions import DistributedNotInitializedError, MsprobeException +from msprobe.core.common.file_check import FileChecker, check_path_before_create from msprobe.core.data_dump.data_collector import build_data_collector -from msprobe.core.data_dump.scope import BaseScope from msprobe.core.data_dump.data_processor.base import ModuleForwardInputsOutputs, ModuleBackwardInputsOutputs +from msprobe.core.data_dump.scope import BaseScope +from msprobe.pytorch.common.log import logger from msprobe.pytorch.common.utils import get_rank_if_initialized -from msprobe.pytorch.module_processer import ModuleProcesser from msprobe.pytorch.hook_module import remove_dropout from msprobe.pytorch.hook_module.api_registry import api_register +from msprobe.pytorch.hook_module.hook_module import HOOKModule +from msprobe.pytorch.module_processer import ModuleProcesser class Service: @@ -67,7 +68,8 @@ class Service: if not self.switch: return if self.data_collector: - module_input_output = ModuleBackwardInputsOutputs(grad_input=grad_input, grad_output=grad_output) + # 此处获取到的grad_input实际为反向过程的输出数据,grad_output为反向过程的输入数据,因此传入时调换顺序 + module_input_output = ModuleBackwardInputsOutputs(grad_input=grad_output, grad_output=grad_input) self.data_collector.backward_data_collect(api_or_module_name, module, pid, module_input_output) pid = os.getpid() @@ -82,6 +84,9 @@ class Service: self.current_iter += 1 self.data_collector.update_iter(self.current_iter) + ModuleProcesser.reset_module_stats() + HOOKModule.reset_module_stats() + def start(self, model, api_origin=False): self.model = model if self.config.step and self.current_iter > max(self.config.step): diff --git a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py index 06c7378ed36..8b2138a485b 100644 --- a/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py +++ b/debug/accuracy_tools/msprobe/test/core_ut/test_common_config.py @@ -121,7 +121,7 @@ class TestCommonConfig(TestCase): self.assertIsNone(base_config.backward_input) self.assertIsNone(base_config.file_format) self.assertIsNone(base_config.summary_mode) - self.assertIsNone(base_config.overflow_num) + self.assertIsNone(base_config.overflow_nums) self.assertIsNone(base_config.check_mode) json_config.update({"scope": "Tensor_Add"}) diff --git a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py index 673386afb5d..30212d95e62 100644 --- a/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py +++ b/debug/accuracy_tools/msprobe/test/mindspore_ut/test_ms_config.py @@ -19,7 +19,7 @@ from unittest.mock import patch, mock_open from msprobe.core.common.const import Const from msprobe.mindspore.ms_config import (parse_json_config, parse_task_config, - TensorConfig, StatisticsConfig, OverflowCheck) + TensorConfig, StatisticsConfig, OverflowCheckConfig) class TestMsConfig(TestCase): @@ -62,7 +62,7 @@ class TestMsConfig(TestCase): self.assertTrue(isinstance(task_config, StatisticsConfig)) task_config = parse_task_config("overflow_check", mock_json_config) - self.assertTrue(isinstance(task_config, OverflowCheck)) + self.assertTrue(isinstance(task_config, OverflowCheckConfig)) with self.assertRaises(Exception) as context: parse_task_config("free_benchmark", mock_json_config) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py index 771e0423804..27126cdddda 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/api_accuracy_checker/run_ut/test_multi_run_ut.py @@ -48,7 +48,7 @@ class TestMultiRunUT(unittest.TestCase): device_id=[0, 1], result_csv_path='result.csv', total_items=2, - real_data_path=None + config_path=None ) mock_file.side_effect = [ @@ -81,7 +81,7 @@ class TestMultiRunUT(unittest.TestCase): args.jit_compile = False args.device_id = [0, 1] args.result_csv_path = None - args.real_data_path = None + args.config_path = None config = prepare_config(args) diff --git a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py index c344f0b66b0..470390d77b2 100644 --- a/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py +++ b/debug/accuracy_tools/msprobe/test/pytorch_ut/test_pt_config.py @@ -45,7 +45,7 @@ class TestPtConfig(TestCase): } } result = parse_task_config(Const.OVERFLOW_CHECK, overflow_check_config) - self.assertEqual(result.overflow_num, 1) + self.assertEqual(result.overflow_nums, 1) self.assertEqual(result.check_mode, "all") free_benchmark_config = { diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 4e0eaa1f375..afbf8feb3a0 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -14,7 +14,7 @@ import setuptools -__version__ = '1.0.0' +__version__ = '1.0.1' INSTALL_REQUIRED = [ "wheel", diff --git a/plugins/tensorboard-plugins/.github/workflows/libkineto_ci.yml b/plugins/tensorboard-plugins/.github/workflows/libkineto_ci.yml deleted file mode 100644 index 3133d6400fb..00000000000 --- a/plugins/tensorboard-plugins/.github/workflows/libkineto_ci.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: LIBKINETOCI - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@v2 - - name: Checkout submodules - shell: bash - run: | - auth_header="$(git config --local --get http.https://github.com/.extraheader)" - git submodule sync --recursive - git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 - - - name: Get env vars - run: | - echo GITHUB_WORKFLOW = $GITHUB_WORKFLOW - echo HOME = $HOME - echo GITHUB_ACTION = $GITHUB_ACTION - echo GITHUB_ACTIONS = $GITHUB_ACTIONS - echo GITHUB_REPOSITORY = $GITHUB_REPOSITORY - echo GITHUB_EVENT_NAME = $GITHUB_EVENT_NAME - echo GITHUB_EVENT_PATH = $GITHUB_EVENT_PATH - echo GITHUB_WORKSPACE = $GITHUB_WORKSPACE - echo GITHUB_SHA = $GITHUB_SHA - echo GITHUB_REF = $GITHUB_REF - c++ --verbose - - # TODO: Figure out how to install cupti headers T84637671 - - name: Build static lib - run: | - set -e - mkdir build_static - cd build_static - cmake -DKINETO_LIBRARY_TYPE=static ../libkineto/ - make -j - - - name: Build shared lib - run: | - set -e - mkdir build_shared - cd build_shared - cmake -DKINETO_LIBRARY_TYPE=shared ../libkineto/ - make -j diff --git a/plugins/tensorboard-plugins/.github/workflows/tb_plugin_build_pip_package.yml b/plugins/tensorboard-plugins/.github/workflows/tb_plugin_build_pip_package.yml deleted file mode 100644 index 9bdafcc4426..00000000000 --- a/plugins/tensorboard-plugins/.github/workflows/tb_plugin_build_pip_package.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Build torch-tb-profiler Pip Package - -on: - # TODO: Add an on_release trigger to build on tags - workflow_dispatch: - -jobs: - build-package: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: build pip package - run: | - set -e - cd tb_plugin - python setup.py sdist bdist_wheel - cd dist/ - pip install *.whl - python -c "import torch_tb_profiler;print(torch_tb_profiler.__version__)" diff --git a/plugins/tensorboard-plugins/.github/workflows/tb_plugin_ci.yml b/plugins/tensorboard-plugins/.github/workflows/tb_plugin_ci.yml deleted file mode 100644 index 1b59a7bf90a..00000000000 --- a/plugins/tensorboard-plugins/.github/workflows/tb_plugin_ci.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: TB_Plugin_CI - -on: - push: - branches: - - main - - release/** - - plugin/** - - pull_request: - branches: - - main - - release/** - - plugin/** - -jobs: - generate-matrix: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - id: set-matrix - run: | - echo $GITHUB_BASE_REF - if [ $GITHUB_BASE_REF == "plugin/vnext" ] - then - echo "::set-output name=matrix::{\"python-version\":[3.7, 3.8, 3.9], \"cuda-version\":[\"cpu\"], \"pytorch-version\":[\"nightly\"]}" - else - echo "::set-output name=matrix::{\"python-version\":[3.7, 3.8, 3.9], \"cuda-version\":[\"cpu\"], \"pytorch-version\":[\"nightly\", \"1.11rc\", \"stable\"]}" - fi - - build: - needs: generate-matrix - runs-on: ubuntu-latest - strategy: - matrix: ${{fromJSON(needs.generate-matrix.outputs.matrix)}} - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - architecture: 'x64' - - name: Test - env: - CUDA_VERSION: ${{ matrix.cuda-version }} - PYTORCH_VERSION: ${{ matrix.pytorch-version }} - TORCH_PROFILER_LOG_LEVEL: DEBUG - GRPC_VERBOSITY: DEBUG - GRPC_ENABLE_FORK_SUPPORT: 'False' - run: | - set -e - cd tb_plugin - sh ./ci_scripts/install_env.sh - pip install .[gs] - cd test - pytest diff --git a/plugins/tensorboard-plugins/.gitignore b/plugins/tensorboard-plugins/.gitignore deleted file mode 100644 index ce186381c0b..00000000000 --- a/plugins/tensorboard-plugins/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# ignore common items -.idea -.vscode diff --git a/plugins/tensorboard-plugins/.gitmodules b/plugins/tensorboard-plugins/.gitmodules deleted file mode 100644 index 4660ee8bc9e..00000000000 --- a/plugins/tensorboard-plugins/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "libkineto/third_party/googletest"] - path = libkineto/third_party/googletest - url = https://github.com/google/googletest.git -[submodule "libkineto/third_party/fmt"] - path = libkineto/third_party/fmt - url = https://github.com/fmtlib/fmt.git diff --git a/plugins/tensorboard-plugins/CODE_OF_CONDUCT.md b/plugins/tensorboard-plugins/CODE_OF_CONDUCT.md deleted file mode 100644 index a0cbeaab765..00000000000 --- a/plugins/tensorboard-plugins/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,77 +0,0 @@ -# Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to make participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, sex characteristics, gender identity and expression, -level of experience, education, socio-economic status, nationality, personal -appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or - advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies within all project spaces, and it also applies when -an individual is representing the project or its community in public spaces. -Examples of representing a project or community include using an official -project e-mail address, posting via an official social media account, or acting -as an appointed representative at an online or offline event. Representation of -a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at . All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html - -[homepage]: https://www.contributor-covenant.org - -For answers to common questions about this code of conduct, see -https://www.contributor-covenant.org/faq - diff --git a/plugins/tensorboard-plugins/CONTRIBUTING.md b/plugins/tensorboard-plugins/CONTRIBUTING.md deleted file mode 100644 index a2e931bb6f0..00000000000 --- a/plugins/tensorboard-plugins/CONTRIBUTING.md +++ /dev/null @@ -1,34 +0,0 @@ -# Contributing to Kineto -We want to make contributing to this project as easy and transparent as -possible. - -## Code of Conduct -The code of conduct is described in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md). - -## Pull Requests -We actively welcome your pull requests. - -1. Fork the repo and create your branch from `main`. -2. If you've added code that should be tested, add tests. -3. If you've changed APIs, update the documentation. -4. Ensure the test suite passes. -5. Make sure your code lints. -6. If you haven't already, complete the Contributor License Agreement ("CLA"). - -## Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Facebook's open source projects. - -Complete your CLA here: - -## Issues -We use GitHub issues to track public bugs. Please ensure your description is -clear and has sufficient instructions to be able to reproduce the issue. - -Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -## License -By contributing to Kineto, you agree that your contributions will be licensed -under the LICENSE file in the root directory of this source tree. diff --git a/plugins/tensorboard-plugins/LICENSE b/plugins/tensorboard-plugins/LICENSE deleted file mode 100644 index edb179715b5..00000000000 --- a/plugins/tensorboard-plugins/LICENSE +++ /dev/null @@ -1,33 +0,0 @@ -BSD License - -For Kineto software - -Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. - -All contributions by Microsoft: -Copyright (c) Microsoft Corporation. (The Azure AI Platform team) - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name Facebook nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/plugins/tensorboard-plugins/README.md b/plugins/tensorboard-plugins/README.md deleted file mode 100644 index 3a18f4c6239..00000000000 --- a/plugins/tensorboard-plugins/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Kineto - -Kineto is part of the PyTorch Profiler. - -The Kineto project was started to help enable -- **performance observability and diagnostics** across common ML bottleneck components -- **actionable recommendations** for common issues -- integration of external system-level profiling tools -- integration with popular visualization platforms and analysis pipelines - -A central component is libkineto, a profiling library with special focus on low-overhead GPU timeline tracing. - -The PyTorch Profiler TensorBoard plugin provides powerful and intuitive visualizations of profiling results, as well as actionable recommendations, and is the best way to experience the new PyTorch Profiler. - -## Libkineto -Libkineto is an in-process profiling library integrated with the PyTorch Profiler. Please refer to the [README](libkineto/README.md) file in the `libkineto` folder as well as documentation on the [new PyTorch Profiler API](https://pytorch.org/docs/master/profiler.html). - -## PyTorch TensorBoard Profiler NPU Plugin -The goal of the PyTorch TensorBoard Profiler is to provide a seamless and intuitive end-to-end profiling experience, including straightforward collection from PyTorch and insightful visualizations and recommendations in the TensorBoard UI. -Please refer to the [README](tb_plugin/README.md) file in the `tb_plugin` folder. - -## Future Development Direction: -Some areas we're currently working on: -- Support for tracing distributed workloads -- Trace processing, analysis and recommendation engine -- System-level activities, multiple tracing sources -- Profiling and monitoring daemon for larger scale deployments - -## Releases and Contributing -We will follow the PyTorch release schedule which roughly happens on a 3 month basis. - -We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. - -If you plan to contribute new features, please first open an issue and discuss the feature with us. Sending a PR without discussion might end up resulting in a rejected PR because we might be taking the infrastructure in a different direction than you might be aware of. We expect the architecture to keep evolving. - -## License -Kineto has a BSD-style license, as found in the [LICENSE](LICENSE) file. - diff --git a/plugins/tensorboard-plugins/libkineto/CMakeLists.txt b/plugins/tensorboard-plugins/libkineto/CMakeLists.txt deleted file mode 100644 index 63966de803a..00000000000 --- a/plugins/tensorboard-plugins/libkineto/CMakeLists.txt +++ /dev/null @@ -1,198 +0,0 @@ -cmake_minimum_required(VERSION 3.5 FATAL_ERROR) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") - -#install libraries into correct locations on all platforms -include(GNUInstallDirs) - -# function to extract filelists from libkineto_defs.bzl file -find_package(PythonInterp) -function(get_filelist name outputvar) - execute_process( - COMMAND "${PYTHON_EXECUTABLE}" -c - "exec(open('libkineto_defs.bzl').read());print(';'.join(${name}))" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - OUTPUT_VARIABLE _tempvar) - string(REPLACE "\n" "" _tempvar "${_tempvar}") - set(${outputvar} ${_tempvar} PARENT_SCOPE) -endfunction() - -project(kineto VERSION 0.1 LANGUAGES CXX C) - -set(KINETO_LIBRARY_TYPE "default" CACHE STRING - "Type of library (default, static or shared) to build") -set_property(CACHE KINETO_LIBRARY_TYPE PROPERTY STRINGS default shared) -option(KINETO_BUILD_TESTS "Build kineto unit tests" ON) - -set(LIBKINETO_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") -set(LIBKINETO_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") -set(LIBKINETO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) -set(LIBKINETO_THIRDPARTY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party") -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -#We should default to a Release build -if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) -endif() - -if (NOT CUDA_SOURCE_DIR) - set(CUDA_SOURCE_DIR "$ENV{CUDA_SOURCE_DIR}") - message(INFO " CUDA_SOURCE_DIR = ${CUDA_SOURCE_DIR}") -endif() - -if (NOT ROCM_SOURCE_DIR) - set(ROCM_SOURCE_DIR "$ENV{ROCM_SOURCE_DIR}") - message(INFO " ROCM_SOURCE_DIR = ${ROCM_SOURCE_DIR}") -endif() - -# Set LIBKINETO_NOCUPTI to explicitly disable CUPTI -# Otherwise, CUPTI is disabled if not found -IF (NOT CUDA_SOURCE_DIR OR NOT CUPTI_INCLUDE_DIR OR NOT CUDA_cupti_LIBRARY) - set(LIBKINETO_NOCUPTI ON CACHE BOOL "" FORCE) -endif() - -IF (NOT ROCM_SOURCE_DIR AND NOT ROCTRACER_INCLUDE_DIR) - set(LIBKINETO_NOROCTRACER ON CACHE BOOL "" FORCE) -endif() - -# Define file lists -if (LIBKINETO_NOCUPTI AND LIBKINETO_NOROCTRACER) - get_filelist("get_libkineto_cpu_only_srcs(with_api=False)" LIBKINETO_SRCS) - message(INFO " CUPTI unavailable or disabled - not building GPU profilers") -elseif(NOT LIBKINETO_NOROCTRACER) - get_filelist("get_libkineto_roctracer_srcs()" LIBKINETO_SRCS) - message(INFO " Building with roctracer") -else() - get_filelist("get_libkineto_cupti_srcs(with_api=False)" LIBKINETO_SRCS) -endif() -get_filelist("get_libkineto_public_headers()" LIBKINETO_PUBLIC_HEADERS) -get_filelist("get_libkineto_api_srcs()" LIBKINETO_API_SRCS) - -add_library(kineto_base OBJECT ${LIBKINETO_SRCS}) -add_library(kineto_api OBJECT ${LIBKINETO_API_SRCS}) - -# Make libraries depend on libkineto_defs.bzl -add_custom_target(libkineto_defs.bzl DEPENDS libkineto_defs.bzl) -add_dependencies(kineto_base libkineto_defs.bzl) - -set_target_properties(kineto_base kineto_api PROPERTIES - CXX_STANDARD 14 - CXX_STANDARD_REQUIRED YES - CXX_EXTENSIONS NO - CXX_VISIBILITY_PRESET hidden) - -set(KINETO_COMPILE_OPTIONS "-DKINETO_NAMESPACE=libkineto") -list(APPEND KINETO_COMPILE_OPTIONS "-DFMT_HEADER_ONLY") -if(NOT MSVC) - list(APPEND KINETO_COMPILE_OPTIONS "-std=c++14") -else() - list(APPEND KINETO_COMPILE_OPTIONS "/std:c++14") - list(APPEND KINETO_COMPILE_OPTIONS "-DWIN32_LEAN_AND_MEAN") - list(APPEND KINETO_COMPILE_OPTIONS "-DNOGDI") -endif() -if (NOT LIBKINETO_NOCUPTI) - list(APPEND KINETO_COMPILE_OPTIONS "-DHAS_CUPTI") -endif() -if (NOT LIBKINETO_NOROCTRACER) - target_compile_options(kineto_base PRIVATE "-DHAS_ROCTRACER") - target_compile_options(kineto_base PRIVATE "-D__HIP_PLATFORM_HCC__") - target_compile_options(kineto_base PRIVATE "-D__HIP_PLATFORM_AMD__") -endif() - -target_compile_options(kineto_base PRIVATE "${KINETO_COMPILE_OPTIONS}") -target_compile_options(kineto_api PRIVATE "${KINETO_COMPILE_OPTIONS}") - -if(NOT TARGET fmt) - if(NOT FMT_SOURCE_DIR) - set(FMT_SOURCE_DIR "${LIBKINETO_THIRDPARTY_DIR}/fmt" - CACHE STRING "fmt source directory from submodules") - endif() - - # Build FMT. - # FMT and some other libraries use BUILD_SHARED_LIBS to control - # the library type. - # Save and restore the value after configuring FMT - set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) - set(FMT_LIBRARY_TYPE static CACHE STRING "Set lib type to static") - add_subdirectory("${FMT_SOURCE_DIR}" "${LIBKINETO_BINARY_DIR}/fmt") - set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) -endif() - -set(FMT_INCLUDE_DIR "${FMT_SOURCE_DIR}/include") -message(STATUS "Kineto: FMT_SOURCE_DIR = ${FMT_SOURCE_DIR}") -message(STATUS "Kineto: FMT_INCLUDE_DIR = ${FMT_INCLUDE_DIR}") -if (NOT CUPTI_INCLUDE_DIR) - set(CUPTI_INCLUDE_DIR "${CUDA_SOURCE_DIR}/extras/CUPTI/include") -endif() -if (NOT CUDA_INCLUDE_DIRS) - set(CUDA_INCLUDE_DIRS "${CUDA_SOURCE_DIR}/include") -endif() -if (NOT ROCTRACER_INCLUDE_DIR) - set(ROCTRACER_INCLUDE_DIR "${ROCM_SOURCE_DIR}/roctracer/include") -endif() -if (NOT ROCM_INCLUDE_DIRS) - set(ROCM_INCLUDE_DIRS "${ROCM_SOURCE_DIR}/include") -endif() - -message(INFO " CUPTI_INCLUDE_DIR = ${CUPTI_INCLUDE_DIR}") -message(INFO " ROCTRACER_INCLUDE_DIR = ${ROCTRACER_INCLUDE_DIR}") - -target_include_directories(kineto_base PUBLIC - $ - $ - $ - $ - $ - $ - $) - -target_include_directories(kineto_api PUBLIC - $ - $) - -if(KINETO_LIBRARY_TYPE STREQUAL "default") - add_library(kineto - $ - $) -elseif(KINETO_LIBRARY_TYPE STREQUAL "static") - add_library(kineto STATIC - $ - $) -elseif(KINETO_LIBRARY_TYPE STREQUAL "shared") - add_library(kineto SHARED - $) - set_property(TARGET kineto_base PROPERTY POSITION_INDEPENDENT_CODE ON) - set_target_properties(kineto PROPERTIES - CXX_VISIBILITY_PRESET hidden) -else() - message(FATAL_ERROR "Unsupported library type ${KINETO_LIBRARY_TYPE}") -endif() - -if(NOT LIBKINETO_NOROCTRACER) - find_library(ROCTRACER_LIBRARY NAMES libroctracer64.so HINTS /opt/rocm/roctracer/lib) - target_link_libraries(kineto "${ROCTRACER_LIBRARY}") - find_library(KINETO_HIP_LIBRARY NAMES libamdhip64.so HINTS /opt/rocm/lib) - target_link_libraries(kineto "${KINETO_HIP_LIBRARY}") -endif() - -if(NOT LIBKINETO_NOCUPTI) - target_link_libraries(kineto "${CUDA_cupti_LIBRARY}") -endif() -target_link_libraries(kineto $) -add_dependencies(kineto fmt::fmt-header-only) - -install(TARGETS kineto EXPORT kinetoLibraryConfig - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) - -install(FILES ${LIBKINETO_PUBLIC_HEADERS} - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/kineto") - -install(EXPORT kinetoLibraryConfig DESTINATION share/cmake/kineto - FILE kinetoLibraryConfig.cmake) - -if(KINETO_BUILD_TESTS) - add_subdirectory(test) -endif() diff --git a/plugins/tensorboard-plugins/libkineto/README.md b/plugins/tensorboard-plugins/libkineto/README.md deleted file mode 100644 index 37127ca5aa8..00000000000 --- a/plugins/tensorboard-plugins/libkineto/README.md +++ /dev/null @@ -1,65 +0,0 @@ -# Libkineto - -Libkineto is an in-process profiling library, part of the Kineto performance -tools project. - -The library provides a way to collect GPU traces and metrics from the host -process, either via the library public API or by sending a signal, if enabled. - -Currently only NVIDIA GPUs are supported. - -## Build Notes -Libkineto uses the standard CMAKE-based build flow. - -### Dependencies -Libkineto requires gcc 5+ and: - -- NVIDIA CUPTI: used to collect traces and metrics from NVIDIA GPUs. -- fmt: used for its convenient and lightweight string formatting functionality. -- googletest: required to build and run Kineto's tests. - - **googletest is not required** if you don't want to run Kineto tests. -By default, building of tests is **on**. Turn it off by setting `KINETO_BUILD_TESTS` to **off**. - -You can download [NVIDIA CUPTI][1], [fmt][2], [googletest][3] and set -`CUDA_SOURCE_DIR`, `FMT_SOURCE_DIR`, `GOOGLETEST_SOURCE_DIR` respectively for -cmake to find these libraries. If the fmt and googletest variables are not set, cmake will -build the git submodules found in the `third_party` directory. -If `CUDA_SOURCE_DIR` is not set, libkineto will fail to build. - -### Building Libkineto - -``` -# Check out repo and sub modules -git clone --recursive https://github.com/pytorch/kineto.git -# Build libkineto with cmake -cd kineto/libkineto -mkdir build && cd build -cmake .. -make -``` - -To run the tests after building libkineto (if tests are built), use the following -command: -``` -make test -``` - -### Installing Libkineto -``` -make install -``` - -## How Libkineto works -We will provide a high-level overview, design philosophy and brief descriptions of various -parts of Libkineto in upcoming blogs. - -## Full documentation -We strive to keep our source files readable. The best and up-to-date -documentation is available in the source files. - -## License -Libkineto is BSD licensed, as detailed in the [LICENSE](../LICENSE) file. - -[1]:https://developer.nvidia.com/CUPTI-CTK10_2 -[2]:https://github.com/fmt -[3]:https://github.com/google/googletest diff --git a/plugins/tensorboard-plugins/libkineto/include/AbstractConfig.h b/plugins/tensorboard-plugins/libkineto/include/AbstractConfig.h deleted file mode 100644 index 1cadf4906c1..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/AbstractConfig.h +++ /dev/null @@ -1,113 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include - -namespace KINETO_NAMESPACE { - -class AbstractConfig { - public: - AbstractConfig& operator=(const AbstractConfig&) = delete; - AbstractConfig(AbstractConfig&&) = delete; - AbstractConfig& operator=(AbstractConfig&&) = delete; - - virtual ~AbstractConfig() { - for (const auto& p : featureConfigs_) { - delete p.second; - } - } - - // Return a copy of the full derived class - virtual AbstractConfig* cloneDerived(AbstractConfig& parent) const = 0; - - // Returns true if successfully parsed the config string - bool parse(const std::string& conf); - - // Default setup for signal-triggered profiling - virtual void setSignalDefaults() { - for (auto& p : featureConfigs_) { - p.second->setSignalDefaults(); - } - } - - // Default setup for client-triggered profiling - virtual void setClientDefaults() { - for (auto& p : featureConfigs_) { - p.second->setClientDefaults(); - } - } - - // Time config was created / updated - std::chrono::time_point timestamp() const { - return timestamp_; - } - - // Source config string that this was parsed from - const std::string& source() const { - return source_; - } - - AbstractConfig& feature(std::string name) const { - const auto& pos = featureConfigs_.find(name); - return *pos->second; - } - - // Transfers ownership of cfg arg - void addFeature(const std::string& name, AbstractConfig* cfg) { - featureConfigs_[name] = cfg; - } - - protected: - AbstractConfig() {} - AbstractConfig(const AbstractConfig& other) = default; - - // Return true if the option was recognized and successfully parsed. - // Throw std::invalid_argument if val is invalid. - virtual bool handleOption(const std::string& name, std::string& val); - - // Perform post-validation checks, typically conditons involving - // multiple options. - // Throw std::invalid_argument if automatic correction can not be made. - // - // @param fallbackProfileStartTime Specify a fallback profile start timestamp in case it was never specified by the client - virtual void validate(const std::chrono::time_point& fallbackProfileStartTime) = 0; - - // TODO: Separate out each profiler type into features? - virtual void printActivityProfilerConfig(std::ostream& s) const; - - // Helpers for use in handleOption - // Split a string by delimiter and remove external white space - std::vector splitAndTrim(const std::string& s, char delim) const; - // Lowercase for case-insensitive comparisons - std::string toLower(std::string& s) const; - // Does string end with suffix - bool endsWith(const std::string& s, const std::string& suffix) const; - // Conversions - int64_t toIntRange(const std::string& val, int64_t min, int64_t max) const; - int32_t toInt32(const std::string& val) const; - int64_t toInt64(const std::string& val) const; - bool toBool(std::string& val) const; - - void cloneFeaturesInto(AbstractConfig& cfg) const { - for (const auto& feature : featureConfigs_) { - cfg.featureConfigs_[feature.first] = feature.second->cloneDerived(cfg); - } - } - - private: - // Time config was created / updated - std::chrono::time_point timestamp_{}; - - // Original configuration string, used for comparison - std::string source_{""}; - - // Configuration objects for optional features - std::map featureConfigs_{}; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/include/ActivityProfilerInterface.h b/plugins/tensorboard-plugins/libkineto/include/ActivityProfilerInterface.h deleted file mode 100644 index 29871e47ab8..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ActivityProfilerInterface.h +++ /dev/null @@ -1,91 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include - -#include "ActivityType.h" -#include "ActivityTraceInterface.h" -#include "IActivityProfiler.h" - -namespace libkineto { - -class ActivityProfilerController; -struct CpuTraceBuffer; -class Config; - -class ActivityProfilerInterface { - - public: - virtual ~ActivityProfilerInterface() {}; - - virtual void init() {} - virtual bool isInitialized() { - return false; - } - virtual bool isActive(){ - return false; - } - - // *** Asynchronous API *** - // Instead of starting and stopping the trace manually, provide a start time - // and duration and / or iteration stop criterion. - // Tracing terminates when either condition is met. - virtual void scheduleTrace(const std::string& configStr) {} - - // *** Synchronous API *** - // These must be called in order: - // prepareTrace -> startTrace -> stopTrace. - - // Many tracing structures are lazily initialized during trace collection, - // with potentially high overhead. - // Call prepareTrace to enable tracing, then run the region to trace - // at least once (and ideally run the same code that is to be traced) to - // allow tracing structures to be initialized. - virtual void prepareTrace( - const std::set& activityTypes, - const std::string& configStr = "") {} - - // Start recording, potentially reusing any buffers allocated since - // prepareTrace was called. - virtual void startTrace() {} - - // Stop and process trace, producing an in-memory list of trace records. - // The processing will be done synchronously (using the calling thread.) - virtual std::unique_ptr stopTrace() { - return nullptr; - } - - // Re-evaluate internal state to allow for triggering operations based - // on number of iteration. each implicitly increments the iteration count - virtual void step() {} - - // *** TraceActivity API *** - // FIXME: Pass activityProfiler interface into clientInterface? - virtual void pushCorrelationId(uint64_t id){} - virtual void popCorrelationId(){} - virtual void transferCpuTrace( - std::unique_ptr traceBuffer){} - - // Correlation ids for user defined spans - virtual void pushUserCorrelationId(uint64_t){} - virtual void popUserCorrelationId(){} - - // Saves information for the current thread to be used in profiler output - // Client must record any new kernel thread where the activity has occured. - virtual void recordThreadInfo() {} - - // Record trace metadata, currently supporting only string key and values, - // values with the same key are overwritten - virtual void addMetadata(const std::string& key, const std::string& value) = 0; - - // Add a child activity profiler, this enables frameworks in the application - // to enable custom framework events. - virtual void addChildActivityProfiler( - std::unique_ptr profiler) {} -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/ActivityTraceInterface.h b/plugins/tensorboard-plugins/libkineto/include/ActivityTraceInterface.h deleted file mode 100644 index 23d4edab00c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ActivityTraceInterface.h +++ /dev/null @@ -1,21 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include - -namespace libkineto { - -struct ITraceActivity; - -class ActivityTraceInterface { - public: - virtual ~ActivityTraceInterface() {} - virtual const std::vector* activities() { - return nullptr; - } - virtual void save(const std::string& path) {} -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/ActivityType.h b/plugins/tensorboard-plugins/libkineto/include/ActivityType.h deleted file mode 100644 index 74c6a2531d6..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ActivityType.h +++ /dev/null @@ -1,34 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include - -namespace libkineto { - -enum class ActivityType { - CPU_OP = 0, // cpu side ops - USER_ANNOTATION, - GPU_USER_ANNOTATION, - GPU_MEMCPY, - GPU_MEMSET, - CONCURRENT_KERNEL, // on-device kernels - EXTERNAL_CORRELATION, - CUDA_RUNTIME, // host side cuda runtime events - CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics - GLOW_RUNTIME, // host side glow runtime events - CPU_INSTANT_EVENT, // host side point-like events - PYTHON_FUNCTION, - OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. - ENUM_COUNT // This is to add buffer and not used for any profiling logic. Add your new type before it. -}; - -const char* toString(ActivityType t); -ActivityType toActivityType(const std::string& str); - -// Return an array of all activity types except COUNT -constexpr int activityTypeCount = (int)ActivityType::ENUM_COUNT; -const std::array activityTypes(); - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/ClientInterface.h b/plugins/tensorboard-plugins/libkineto/include/ClientInterface.h deleted file mode 100644 index 06dc0758381..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ClientInterface.h +++ /dev/null @@ -1,16 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -namespace libkineto { - -class ClientInterface { - public: - virtual ~ClientInterface() {} - virtual void init() = 0; - virtual void warmup(bool setupOpInputsCollection) = 0; - virtual void start() = 0; - virtual void stop() = 0; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/Config.h b/plugins/tensorboard-plugins/libkineto/include/Config.h deleted file mode 100644 index 040e96c9f75..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/Config.h +++ /dev/null @@ -1,433 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include "AbstractConfig.h" -#include "ActivityType.h" - -#include -#include -#include -#include -#include -#include - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -class Config : public AbstractConfig { - public: - Config(); - Config& operator=(const Config&) = delete; - Config(Config&&) = delete; - Config& operator=(Config&&) = delete; - - // Return a full copy including feature config object - std::unique_ptr clone() const { - auto cfg = std::unique_ptr(new Config(*this)); - cloneFeaturesInto(*cfg); - return cfg; - } - - bool handleOption(const std::string& name, std::string& val) override; - - void setClientDefaults() override; - - // Log events to this file - const std::string& eventLogFile() const { - return eventLogFile_; - } - - bool activityProfilerEnabled() const { - return activityProfilerEnabled_ || - activitiesOnDemandTimestamp_.time_since_epoch().count() > 0; - } - - // Log activitiy trace to this file - const std::string& activitiesLogFile() const { - return activitiesLogFile_; - } - - // Log activitiy trace to this url - const std::string& activitiesLogUrl() const { - return activitiesLogUrl_; - } - - void setActivitiesLogUrl(const std::string& url) { - activitiesLogUrl_ = url; - } - - bool activitiesLogToMemory() const { - return activitiesLogToMemory_; - } - - // Is profiling enabled for the given device? - bool eventProfilerEnabledForDevice(uint32_t dev) const { - return 0 != (eventProfilerDeviceMask_ & (1 << dev)); - } - - // Take a sample (read hardware counters) at this frequency. - // This controls how often counters are read - if all counters cannot - // be collected simultaneously then multiple samples are needed to - // collect all requested counters - see multiplex period. - std::chrono::milliseconds samplePeriod() const { - return samplePeriod_; - } - - void setSamplePeriod(std::chrono::milliseconds period) { - samplePeriod_ = period; - } - - // When all requested counters cannot be collected simultaneously, - // counters will be multiplexed at this frequency. - // Multiplexing can have a large performance impact if done frequently. - // To avoid a perf impact, keep this at 1s or above. - std::chrono::milliseconds multiplexPeriod() const { - return multiplexPeriod_; - } - - void setMultiplexPeriod(std::chrono::milliseconds period) { - multiplexPeriod_ = period; - } - - // Report counters at this frequency. Note that several samples can - // be reported each time, see samplesPerReport. - std::chrono::milliseconds reportPeriod() const { - return reportPeriod_; - } - - void setReportPeriod(std::chrono::milliseconds msecs); - - // Number of samples dispatched each report period. - // Must be in the range [1, report period / sample period]. - // In other words, aggregation is supported but not interpolation. - int samplesPerReport() const { - return samplesPerReport_; - } - - void setSamplesPerReport(int count) { - samplesPerReport_ = count; - } - - // The names of events to collect - const std::set& eventNames() const { - return eventNames_; - } - - // Add additional events to be profiled - void addEvents(const std::set& names) { - eventNames_.insert(names.begin(), names.end()); - } - - // The names of metrics to collect - const std::set& metricNames() const { - return metricNames_; - } - - // Add additional metrics to be profiled - void addMetrics(const std::set& names) { - metricNames_.insert(names.begin(), names.end()); - } - - const std::vector& percentiles() const { - return eventReportPercentiles_; - } - - // Profile for this long, then revert to base config - std::chrono::seconds eventProfilerOnDemandDuration() const { - return eventProfilerOnDemandDuration_; - } - - void setEventProfilerOnDemandDuration(std::chrono::seconds duration) { - eventProfilerOnDemandDuration_ = duration; - } - - // Too many event profilers on a single system can overload the driver. - // At some point, latencies shoot through the roof and collection of samples - // becomes impossible. To avoid this situation we have a limit of profilers - // per GPU. - // NOTE: Communication with a daemon is needed for this feature. - // Library must be built with an active DaemonConfigLoader. - int maxEventProfilersPerGpu() const { - return eventProfilerMaxInstancesPerGpu_; - } - - // On Cuda11 we've seen occasional hangs when reprogramming counters - // Monitor profiling threads and report when a thread is not responding - // for a given number of seconds. - // A period of 0 means disable. - std::chrono::seconds eventProfilerHeartbeatMonitorPeriod() const { - return eventProfilerHeartbeatMonitorPeriod_; - } - - // The types of activities selected in the configuration file - const std::set& selectedActivityTypes() const { - return selectedActivityTypes_; - } - - void setSelectedActivityTypes(const std::set& types) { - selectedActivityTypes_ = types; - } - - bool isOpInputsCollectionEnabled() const { - return enableOpInputsCollection_; - } - - // Trace for this long - std::chrono::milliseconds activitiesDuration() const { - return activitiesDuration_; - } - - // Trace for this many iterations, determined by external API - int activitiesRunIterations() const { - return activitiesRunIterations_; - } - - std::chrono::milliseconds activitiesDurationDefault() const; - - void setActivitiesDuration(std::chrono::milliseconds duration) { - activitiesDuration_ = duration; - } - - int activitiesMaxGpuBufferSize() const { - return activitiesMaxGpuBufferSize_; - } - - std::chrono::seconds activitiesWarmupDuration() const { - return activitiesWarmupDuration_; - } - - int activitiesWarmupIterations() const { - return activitiesWarmupIterations_; - } - - // Timestamp at which the profiling to start, requested by the user. - const std::chrono::time_point requestTimestamp() - const { - if (profileStartTime_.time_since_epoch().count()) { - return profileStartTime_; - } - - // TODO(T94634890): Deperecate requestTimestamp - return requestTimestamp_ + maxRequestAge() + activitiesWarmupDuration(); - } - - bool hasProfileStartTime() const { - return requestTimestamp_.time_since_epoch().count() > 0 || - profileStartTime_.time_since_epoch().count() > 0; - } - - int profileStartIteration() const { - return profileStartIteration_; - } - - bool hasProfileStartIteration() const { - return profileStartIteration_ >= 0 && activitiesRunIterations_ > 0; - } - - void setProfileStartIteration(int iter) { - profileStartIteration_ = iter; - } - - int profileStartIterationRoundUp() const { - return profileStartIterationRoundUp_; - } - - // calculate the start iteration accounting for warmup - int startIterationIncludingWarmup() const { - if (!hasProfileStartIteration()) { - return -1; - } - return profileStartIteration_ - activitiesWarmupIterations_; - } - - const std::chrono::seconds maxRequestAge() const; - - // All VLOG* macros will log if the verbose log level is >= - // the verbosity specified for the verbose log message. - // Default value is -1, so messages with log level 0 will log by default. - int verboseLogLevel() const { - return verboseLogLevel_; - } - - // Modules for which verbose logging is enabled. - // If empty, logging is enabled for all modules. - const std::vector& verboseLogModules() const { - return verboseLogModules_; - } - - bool sigUsr2Enabled() const { - return enableSigUsr2_; - } - - bool ipcFabricEnabled() const { - return enableIpcFabric_; - } - - static std::chrono::milliseconds alignUp( - std::chrono::milliseconds duration, - std::chrono::milliseconds alignment) { - duration += alignment; - return duration - (duration % alignment); - } - - std::chrono::time_point - eventProfilerOnDemandStartTime() const { - return eventProfilerOnDemandTimestamp_; - } - - std::chrono::time_point - eventProfilerOnDemandEndTime() const { - return eventProfilerOnDemandTimestamp_ + eventProfilerOnDemandDuration_; - } - - std::chrono::time_point - activityProfilerRequestReceivedTime() const { - return activitiesOnDemandTimestamp_; - } - - // Users may request and set trace id and group trace id. - const std::string& requestTraceID() const { - return requestTraceID_; - } - - void setRequestTraceID(const std::string& tid) { - requestTraceID_ = tid; - } - - const std::string& requestGroupTraceID() const { - return requestGroupTraceID_; - } - - void setRequestGroupTraceID(const std::string& gtid) { - requestGroupTraceID_ = gtid; - } - - void updateActivityProfilerRequestReceivedTime(); - - void printActivityProfilerConfig(std::ostream& s) const override; - - void validate( - const std::chrono::time_point& fallbackProfileStartTime) override; - - static void addConfigFactory( - std::string name, - std::function factory); - - void print(std::ostream& s) const; - - private: - explicit Config(const Config& other) = default; - - AbstractConfig* cloneDerived(AbstractConfig& parent) const override { - // Clone from AbstractConfig not supported - assert(false); - return nullptr; - } - - uint8_t createDeviceMask(const std::string& val); - - // Adds valid activity types from the user defined string list in the - // configuration file - void setActivityTypes(const std::vector& selected_activities); - - // Sets the default activity types to be traced - void selectDefaultActivityTypes() { - // If the user has not specified an activity list, add all types - for (ActivityType t : activityTypes()) { - // Do no enable this by default - // TODO: introduce optional types - if (t != ActivityType::OVERHEAD) { - selectedActivityTypes_.insert(t); - } - } - } - - int verboseLogLevel_; - std::vector verboseLogModules_; - - // Event profiler - // These settings are also supported in on-demand mode - std::chrono::milliseconds samplePeriod_; - std::chrono::milliseconds reportPeriod_; - int samplesPerReport_; - std::set eventNames_; - std::set metricNames_; - - // On-demand duration - std::chrono::seconds eventProfilerOnDemandDuration_; - // Last on-demand request - std::chrono::time_point - eventProfilerOnDemandTimestamp_; - - int eventProfilerMaxInstancesPerGpu_; - - // Monitor whether event profiler threads are stuck - // at this frequency - std::chrono::seconds eventProfilerHeartbeatMonitorPeriod_; - - // These settings can not be changed on-demand - std::string eventLogFile_; - std::vector eventReportPercentiles_ = {5, 25, 50, 75, 95}; - uint8_t eventProfilerDeviceMask_ = ~0; - std::chrono::milliseconds multiplexPeriod_; - - // Activity profiler - bool activityProfilerEnabled_; - std::set selectedActivityTypes_; - - // The activity profiler settings are all on-demand - std::string activitiesLogFile_; - - std::string activitiesLogUrl_; - - // Log activities to memory buffer - bool activitiesLogToMemory_{false}; - - int activitiesMaxGpuBufferSize_; - std::chrono::seconds activitiesWarmupDuration_; - int activitiesWarmupIterations_; - - // Client Interface - // Enable inputs collection when tracing ops - bool enableOpInputsCollection_{true}; - - // Profile for specified iterations and duration - std::chrono::milliseconds activitiesDuration_; - int activitiesRunIterations_; - - // Below are not used - // Use this net name for iteration count - std::string activitiesExternalAPIIterationsTarget_; - // Only profile nets that includes this in the name - std::vector activitiesExternalAPIFilter_; - // Only profile nets with at least this many operators - int activitiesExternalAPINetSizeThreshold_; - // Only profile nets with at least this many GPU operators - int activitiesExternalAPIGpuOpCountThreshold_; - // Last activity profiler request - std::chrono::time_point - activitiesOnDemandTimestamp_; - - // Synchronized start timestamp - std::chrono::time_point profileStartTime_; - // or start iteration - int profileStartIteration_; - int profileStartIterationRoundUp_; - - // DEPRECATED - std::chrono::time_point requestTimestamp_; - - // Enable profiling via SIGUSR2 - bool enableSigUsr2_; - - // Enable IPC Fabric instead of thrift communication - bool enableIpcFabric_; - - // Logger Metadata - std::string requestTraceID_; - std::string requestGroupTraceID_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/include/GenericTraceActivity.h b/plugins/tensorboard-plugins/libkineto/include/GenericTraceActivity.h deleted file mode 100644 index 4272cf1efa4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/GenericTraceActivity.h +++ /dev/null @@ -1,125 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include - -#include "ThreadUtil.h" -#include "ITraceActivity.h" -#include "TraceSpan.h" - -namespace libkineto { - -// Link type, used in GenericTraceActivity.flow.type -constexpr unsigned int kLinkFwdBwd = 1; -constexpr unsigned int kLinkAsyncCpuGpu = 2; - -// @lint-ignore-every CLANGTIDY cppcoreguidelines-non-private-member-variables-in-classes -// @lint-ignore-every CLANGTIDY cppcoreguidelines-pro-type-member-init -class GenericTraceActivity : public ITraceActivity { - - public: - GenericTraceActivity() : activityType(ActivityType::ENUM_COUNT), traceSpan_(NULL) {} - - GenericTraceActivity( - const TraceSpan& trace, ActivityType type, const std::string& name) - : activityType(type), activityName(name), traceSpan_(&trace) { - } - - int64_t deviceId() const override { - return device; - } - - int64_t resourceId() const override { - return resource; - } - - int32_t getThreadId() const override { - return threadId; - } - - int64_t timestamp() const override { - return startTime; - } - - int64_t duration() const override { - return endTime - startTime; - } - - int64_t correlationId() const override { - return id; - } - - ActivityType type() const override { - return activityType; - } - - const ITraceActivity* linkedActivity() const override { - return nullptr; - } - - int flowType() const override { - return flow.type; - } - - int flowId() const override { - return flow.id; - } - - bool flowStart() const override { - return flow.start; - } - - const std::string name() const override { - return activityName; - } - - const TraceSpan* traceSpan() const override { - return traceSpan_; - } - - void log(ActivityLogger& logger) const override; - - //Encode client side metadata as a key/value - template - void addMetadata(const std::string& key, const ValType& value) { - metadata_.push_back(fmt::format("\"{}\": {}", key, value)); - } - - void addMetadataQuoted(const std::string& key, const std::string& value) { - metadata_.push_back(fmt::format("\"{}\": \"{}\"", key, value)); - } - - const std::string metadataJson() const override { - return fmt::format("{}", fmt::join(metadata_, ", ")); - } - - virtual ~GenericTraceActivity() {}; - - int64_t startTime{0}; - int64_t endTime{0}; - int32_t id{0}; - int32_t device{0}; - int32_t resource{0}; - int32_t threadId{0}; - ActivityType activityType; - std::string activityName; - struct Flow { - Flow(): id(0), type(0), start(0) {} - // Ids must be unique within each type - uint32_t id : 27; - // Type will be used to connect flows between profilers, as - // well as look up flow information (name etc) - uint32_t type : 4; - uint32_t start : 1; - } flow; - - private: - const TraceSpan* traceSpan_; - std::vector metadata_; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/IActivityProfiler.h b/plugins/tensorboard-plugins/libkineto/include/IActivityProfiler.h deleted file mode 100644 index f5d4b3fb828..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/IActivityProfiler.h +++ /dev/null @@ -1,104 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include - -#include "Config.h" -#include "GenericTraceActivity.h" - -/* This file includes an abstract base class for an activity profiler - * that can be implemented by multiple tracing agents in the application. - * The high level Kineto profiler can co-ordinate start and end of tracing - * and combine together events from multiple such activity profilers. - */ - -namespace libkineto { - -using namespace KINETO_NAMESPACE; - -#ifdef _MSC_VER -// workaround for the predefined ERROR macro on Windows -#undef ERROR -#endif // _MSC_VER - -enum class TraceStatus { - READY, // Accepting trace requests - WARMUP, // Performing trace warmup - RECORDING, // Actively collecting activities - PROCESSING, // Recording is complete, preparing results - ERROR, // One or more errors (and possibly also warnings) occurred. - WARNING, // One or more warnings occurred. -}; - -/* IActivityProfilerSession: - * an opaque object that can be used by a high level profiler to - * start/stop and return trace events. - */ -class IActivityProfilerSession { - - public: - virtual ~IActivityProfilerSession() {} - - // start the trace collection synchronously - virtual void start() = 0; - - // stop the trace collection synchronously - virtual void stop() = 0; - - TraceStatus status() { - return status_; - } - - // returns list of Trace Activities - virtual std::vector& activities() = 0; - - // returns errors with this trace - virtual std::vector errors() = 0; - - // processes trace activities using logger - virtual void processTrace(ActivityLogger& logger) = 0; - - // XXX define trace formats - // virtual save(string name, TraceFormat format) - - protected: - TraceStatus status_ = TraceStatus::READY; -}; - - -/* Activity Profiler Plugins: - * These allow other frameworks to integrate into Kineto's primariy - * activity profiler. While the primary activity profiler handles - * timing the trace collections and correlating events the plugins - * can become source of new trace activity types. - */ -class IActivityProfiler { - - public: - - virtual ~IActivityProfiler() {} - - // name of profiler - virtual const std::string& name() const = 0; - - // returns activity types this profiler supports - virtual const std::set& availableActivities() const = 0; - - // Calls prepare() on registered tracer providers passing in the relevant - // activity types. Returns a profiler session handle - virtual std::unique_ptr configure( - const std::set& activity_types, - const Config& config) = 0; - - // asynchronous version of the above with future timestamp and duration. - virtual std::unique_ptr configure( - int64_t ts_ms, - int64_t duration_ms, - const std::set& activity_types, - const Config& config) = 0; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/ILoggerObserver.h b/plugins/tensorboard-plugins/libkineto/include/ILoggerObserver.h deleted file mode 100644 index 4fce7851b96..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ILoggerObserver.h +++ /dev/null @@ -1,50 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -// Stages in libkineto used when pushing logs to UST Logger. -constexpr char kWarmUpStage[] = "Warm Up"; -constexpr char kCollectionStage[] = "Collection"; -constexpr char kPostProcessingStage[] = "Post Processing"; - -#if !USE_GOOGLE_LOG - -#include -#include - -namespace libkineto { - -enum LoggerOutputType { - VERBOSE = 0, - INFO = 1, - WARNING = 2, - ERROR = 3, - STAGE = 4, - ENUM_COUNT = 5 -}; - -const char* toString(LoggerOutputType t); -LoggerOutputType toLoggerOutputType(const std::string& str); - -constexpr int LoggerTypeCount = (int) LoggerOutputType::ENUM_COUNT; - -class ILoggerObserver { - public: - virtual ~ILoggerObserver() = default; - virtual void write(const std::string& message, LoggerOutputType ot) = 0; - virtual const std::map> extractCollectorMetadata() = 0; - virtual void reset() = 0; - virtual void addDevice(const int64_t device) = 0; - virtual void setTraceDurationMS(const int64_t duration) = 0; - virtual void addEventCount(const int64_t count) = 0; - virtual void setTraceID(const std::string&) {} - virtual void setGroupTraceID(const std::string&) {} - virtual void addDestination(const std::string& dest) = 0; - -}; - -} // namespace libkineto - -#endif // !USE_GOOGLE_LOG diff --git a/plugins/tensorboard-plugins/libkineto/include/ITraceActivity.h b/plugins/tensorboard-plugins/libkineto/include/ITraceActivity.h deleted file mode 100644 index a477ed81466..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ITraceActivity.h +++ /dev/null @@ -1,53 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -#include "ActivityType.h" - -namespace libkineto { - -class ActivityLogger; -struct TraceSpan; - -// Generic activity interface is borrowed from tensorboard protobuf format. -struct ITraceActivity { - virtual ~ITraceActivity() {} - // Device is a physical or logical entity, e.g. CPU, GPU or process - virtual int64_t deviceId() const = 0; - // A resource is something on the device, h/w thread, - // functional units etc. - virtual int64_t resourceId() const = 0; - // s/w thread - virtual int32_t getThreadId() const = 0; - // Start timestamp in mucrosecond - virtual int64_t timestamp() const = 0; - // Duration in microseconds - virtual int64_t duration() const = 0; - // Used to link up async activities - virtual int64_t correlationId() const = 0; - // Part of a flow, identified by flow id and type - virtual int flowType() const = 0; - virtual int flowId() const = 0; - virtual bool flowStart() const = 0; - virtual ActivityType type() const = 0; - virtual const std::string name() const = 0; - // Optional linked activity - virtual const ITraceActivity* linkedActivity() const = 0; - // Optional containing trace object - virtual const TraceSpan* traceSpan() const = 0; - // Log activity - virtual void log(ActivityLogger& logger) const = 0; - // Return json formatted metadata - // FIXME: Return iterator to dynamic type map here instead - virtual const std::string metadataJson() const = 0; - - static int64_t nsToUs(int64_t ns) { - // It's important that this conversion is the same everywhere. - // No rounding! - return ns / 1000; - } -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/ThreadUtil.h b/plugins/tensorboard-plugins/libkineto/include/ThreadUtil.h deleted file mode 100644 index d1dc80ad2ab..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/ThreadUtil.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace libkineto { - -int32_t systemThreadId(); -int32_t threadId(); -bool setThreadName(const std::string& name); -std::string getThreadName(); - -int32_t processId(); -std::string processName(int32_t pid); - -// Return a list of pids and process names for the current process -// and its parents. -std::vector> pidCommandPairsOfAncestors(); - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/TraceSpan.h b/plugins/tensorboard-plugins/libkineto/include/TraceSpan.h deleted file mode 100644 index af9a9d5ee55..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/TraceSpan.h +++ /dev/null @@ -1,36 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include - -namespace libkineto { - -struct TraceSpan { - TraceSpan() = delete; - TraceSpan( - int64_t startTime, int64_t endTime, std::string name) - : startTime(startTime), endTime(endTime), name(std::move(name)) { - } - TraceSpan( - int opCount, int it, std::string name, std::string prefix) - : opCount(opCount), - iteration(it), - name(std::move(name)), - prefix(std::move(prefix)) { - } - - // FIXME: change to duration? - int64_t startTime{0}; - int64_t endTime{0}; - int opCount{0}; - int iteration{-1}; - // Name is used to identify timeline - std::string name; - // Prefix used to distinguish trace spans on the same timeline - std::string prefix; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/libkineto.h b/plugins/tensorboard-plugins/libkineto/include/libkineto.h deleted file mode 100644 index 87c3d64f638..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/libkineto.h +++ /dev/null @@ -1,138 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -// Mediator for initialization and profiler control - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ActivityProfilerInterface.h" -#include "ActivityType.h" -#include "ClientInterface.h" -#include "GenericTraceActivity.h" -#include "TraceSpan.h" -#include "IActivityProfiler.h" -#include "ActivityTraceInterface.h" - -#include "ThreadUtil.h" - -extern "C" { - void suppressLibkinetoLogMessages(); - int InitializeInjection(void); - bool libkineto_init(bool cpuOnly, bool logOnError); -} - -namespace libkineto { - -class Config; -class ConfigLoader; - -struct CpuTraceBuffer { - TraceSpan span{0, 0, "none"}; - int gpuOpCount; - std::deque activities; -}; - -using ChildActivityProfilerFactory = - std::function()>; - -class LibkinetoApi { - public: - - explicit LibkinetoApi(ConfigLoader& configLoader) - : configLoader_(configLoader) { - } - - // Called by client that supports tracing API. - // libkineto can still function without this. - void registerClient(ClientInterface* client); - - // Called by libkineto on init - void registerProfiler(std::unique_ptr profiler) { - activityProfiler_ = std::move(profiler); - initClientIfRegistered(); - } - - ActivityProfilerInterface& activityProfiler() { - return *activityProfiler_; - } - - ClientInterface* client() { - return client_; - } - - void initProfilerIfRegistered() { - static std::once_flag once; - if (activityProfiler_) { - std::call_once(once, [this] { - if (!activityProfiler_->isInitialized()) { - activityProfiler_->init(); - initChildActivityProfilers(); - } - }); - } - } - - bool isProfilerInitialized() const { - return activityProfiler_ && activityProfiler_->isInitialized(); - } - - bool isProfilerRegistered() const { - return activityProfiler_ != nullptr; - } - - void suppressLogMessages() { - suppressLibkinetoLogMessages(); - } - - // Provides access to profier configuration manaegement - ConfigLoader& configLoader() { - return configLoader_; - } - - void registerProfilerFactory( - ChildActivityProfilerFactory factory) { - if (isProfilerInitialized()) { - activityProfiler_->addChildActivityProfiler(factory()); - } else { - childProfilerFactories_.push_back(factory); - } - } - - private: - - void initChildActivityProfilers() { - if (!isProfilerInitialized()) { - return; - } - for (const auto& factory : childProfilerFactories_) { - activityProfiler_->addChildActivityProfiler(factory()); - } - childProfilerFactories_.clear(); - } - - // Client is initialized once both it and libkineto has registered - void initClientIfRegistered(); - - ConfigLoader& configLoader_; - std::unique_ptr activityProfiler_{}; - ClientInterface* client_{}; - int32_t clientRegisterThread_{0}; - - bool isLoaded_{false}; - std::vector childProfilerFactories_; -}; - -// Singleton -LibkinetoApi& api(); - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/include/time_since_epoch.h b/plugins/tensorboard-plugins/libkineto/include/time_since_epoch.h deleted file mode 100644 index caa6b4d9276..00000000000 --- a/plugins/tensorboard-plugins/libkineto/include/time_since_epoch.h +++ /dev/null @@ -1,16 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -namespace libkineto { - -inline int64_t timeSinceEpoch( - const std::chrono::time_point& t) { - return std::chrono::duration_cast( - t.time_since_epoch()) - .count(); -} - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/libkineto_defs.bzl b/plugins/tensorboard-plugins/libkineto/libkineto_defs.bzl deleted file mode 100644 index 330c54a22df..00000000000 --- a/plugins/tensorboard-plugins/libkineto/libkineto_defs.bzl +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# All rights reserved. -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -def get_libkineto_api_srcs(): - return [ - "src/ThreadUtil.cpp", - "src/libkineto_api.cpp", - ] - -def get_libkineto_cupti_srcs(with_api = True): - return [ - "src/CudaDeviceProperties.cpp", - "src/CuptiActivityApi.cpp", - "src/CuptiActivityPlatform.cpp", - "src/CuptiCallbackApi.cpp", - "src/CuptiEventApi.cpp", - "src/CuptiMetricApi.cpp", - "src/CuptiRangeProfilerApi.cpp", - "src/Demangle.cpp", - "src/EventProfiler.cpp", - "src/EventProfilerController.cpp", - "src/WeakSymbols.cpp", - "src/cupti_strings.cpp", - ] + (get_libkineto_cpu_only_srcs(with_api)) - -def get_libkineto_roctracer_srcs(with_api = True): - return [ - "src/RoctracerActivityApi.cpp", - ] + (get_libkineto_cpu_only_srcs(with_api)) - -def get_libkineto_cpu_only_srcs(with_api = True): - return [ - "src/AbstractConfig.cpp", - "src/CuptiActivityProfiler.cpp", - "src/ActivityProfilerController.cpp", - "src/ActivityProfilerProxy.cpp", - "src/ActivityType.cpp", - "src/Config.cpp", - "src/ConfigLoader.cpp", - "src/CuptiActivityApi.cpp", - "src/Demangle.cpp", - "src/GenericTraceActivity.cpp", - "src/ILoggerObserver.cpp", - "src/Logger.cpp", - "src/init.cpp", - "src/output_csv.cpp", - "src/output_json.cpp", - ] + (get_libkineto_api_srcs() if with_api else []) - -def get_libkineto_public_headers(): - return [ - "include/AbstractConfig.h", - "include/ActivityProfilerInterface.h", - "include/ActivityType.h", - "include/Config.h", - "include/ClientInterface.h", - "include/GenericTraceActivity.h", - "include/GenericTraceActivity.h", - "include/IActivityProfiler.h", - "include/ILoggerObserver.h", - "include/ITraceActivity.h", - "include/TraceSpan.h", - "include/ThreadUtil.h", - "include/libkineto.h", - "include/time_since_epoch.h", - ] - -# kineto code should be updated to not have to -# suppress these warnings. -KINETO_COMPILER_FLAGS = [ - "-fexceptions", - "-Wno-deprecated-declarations", - "-Wno-unused-function", - "-Wno-unused-private-field", -] diff --git a/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cpp b/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cpp deleted file mode 100644 index 780047912ed..00000000000 --- a/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include -#include - -#include -#include - -#include "kineto/libkineto/sample_programs/kineto_playground.cuh" - -using namespace kineto; - -static const std::string kFileName = "/tmp/kineto_playground_trace.json"; - -int main() { - warmup(); - - // Kineto config - - // Empty types set defaults to all types - std::set types; - - auto& profiler = libkineto::api().activityProfiler(); - libkineto::api().initProfilerIfRegistered(); - profiler.prepareTrace(types); - - // Good to warm up after prepareTrace to get cupti initialization to settle - warmup(); - profiler.startTrace(); - playground(); - - auto trace = profiler.stopTrace(); - LOG(INFO) << "Stopped and processed trace. Got " << trace->activities()->size() << " activities."; - trace->save(kFileName); - return 0; -} - diff --git a/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cu b/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cu deleted file mode 100644 index 54c6f82ff4b..00000000000 --- a/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cu +++ /dev/null @@ -1,60 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include - -#include "kineto_playground.cuh" - - -namespace kineto { - -void warmup(void) { - // Inititalizing CUDA can take a while which we normally do not want to see in Kineto traces. - // This is done in various ways that take Kineto as dependency. This is our way of doing warmup - // for kineto_playground - size_t bytes = 1000; - float* mem = NULL; - auto error = cudaMalloc(&mem, bytes); - if (error != cudaSuccess) { - printf("cudaMalloc failed during kineto_playground warmup. error code: %d", error); - return; - } - - cudaFree(mem); -} - -void basicMemcpyMemset(void) { - size_t size = (1 << 8) * sizeof(float); - float *hostMemSrc, *deviceMem, *hostMemDst; - cudaError_t err; - - hostMemSrc = (float*)malloc(size); - hostMemDst = (float*)malloc(size); - err = cudaMalloc(&deviceMem, size); - if (err != cudaSuccess) { - printf("cudaMalloc failed during %s", __func__); - return; - } - - memset(hostMemSrc, 1, size); - cudaMemcpy(deviceMem, hostMemSrc, size, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - printf("cudaMemcpy failed during %s", __func__); - return; - } - - cudaMemcpy(hostMemDst, deviceMem, size, cudaMemcpyDeviceToHost); - if (err != cudaSuccess) { - printf("cudaMemcpy failed during %s", __func__); - return; - } - - free(hostMemSrc); - free(hostMemDst); - cudaFree(deviceMem); -} - -void playground(void) { - // Add your experimental CUDA implementation here. -} - -} diff --git a/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cuh b/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cuh deleted file mode 100644 index 54e1ee59ada..00000000000 --- a/plugins/tensorboard-plugins/libkineto/sample_programs/kineto_playground.cuh +++ /dev/null @@ -1,18 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -namespace kineto { - -// Warms up CUDA before the tracing starts -void warmup(void); - -// Basic usage of cudaMemcpy and cudaMemset -void basicMemcpyMemset(void); - -// Your experimental code goes in here! -void playground(void); - -} diff --git a/plugins/tensorboard-plugins/libkineto/src/AbstractConfig.cpp b/plugins/tensorboard-plugins/libkineto/src/AbstractConfig.cpp deleted file mode 100644 index d60ab43c9a3..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/AbstractConfig.cpp +++ /dev/null @@ -1,188 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "AbstractConfig.h" - -#include -#include -#include - -#include "Logger.h" - -using namespace std::chrono; - -using std::string; -using std::vector; - -namespace KINETO_NAMESPACE { - -constexpr char kWhitespace[] = "\t\n "; - -static bool isWhitespace(string& s) { - return s.find_first_not_of(kWhitespace) == string::npos; -} - -// Remove whitespace from both end of string -static inline string trim(string& s) { - if (s.empty()) { - return s; - } else if (isWhitespace(s)) { - return ""; - } - auto start = s.find_first_not_of(kWhitespace); - auto end = s.find_last_not_of(kWhitespace); - return s.substr(start, end - start + 1); -} - -// Helper function for split. -// Return the index of char d in string s. -// If not found, returns the length of the string. -static int find(const char* s, char delim) { - int i; - for (i = 0; s[i]; i++) { - if (s[i] == delim) { - break; - } - } - return i; -} - -// Split a string by delimiter -static vector split(const string& s, char delim) { - vector res; - const char* cs = s.c_str(); - for (int i = find(cs, delim); cs[i]; cs += i + 1, i = find(cs, delim)) { - res.emplace_back(cs, i); - } - res.emplace_back(cs); - return res; -} - -// Remove a trailing comment. -static inline string stripComment(const string& s) { - std::size_t pos = s.find("#"); - return s.substr(0, pos); -} - -string AbstractConfig::toLower(string& s) const { - string res = s; - for (int i = 0; i < res.size(); i++) { - if (res[i] >= 'A' && res[i] <= 'Z') { - res[i] += ('a' - 'A'); - } - } - return res; -} - -bool AbstractConfig::endsWith(const string& s, const string& suffix) const { - if (suffix.size() > s.size()) { - return false; - } - return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; -} - -vector AbstractConfig::splitAndTrim(const string& s, char delim) const { - auto res = split(s, delim); - for (string& x : res) { - x = trim(x); - } - return res; -} - -int64_t AbstractConfig::toIntRange(const string& val, int64_t min, int64_t max) - const { - char* invalid; - int64_t res = strtoll(val.c_str(), &invalid, 10); - if (val.empty() || *invalid) { - throw std::invalid_argument(fmt::format("Invalid integer: {}", val)); - } else if (res < min || res > max) { - throw std::invalid_argument(fmt::format( - "Invalid argument: {} - expected range [{}, {}]", res, min, max)); - } - return res; -} - -int32_t AbstractConfig::toInt32(const string& val) const { - return toIntRange(val, 0, ~0u / 2); -} - -int64_t AbstractConfig::toInt64(const string& val) const { - return toIntRange(val, 0, ~0ul / 2); -} - -bool AbstractConfig::toBool(string& val) const { - const std::array bool_vals{ - "n", "y", "no", "yes", "f", "t", "false", "true"}; - const string lower_val = toLower(val); - for (int i = 0; i < bool_vals.size(); i++) { - if (lower_val == bool_vals[i]) { - return i % 2; - } - } - throw std::invalid_argument(fmt::format("Invalid bool argument: {}", val)); - return false; -} - -bool AbstractConfig::parse(const string& conf) { - std::istringstream iss(conf); - string line; - - timestamp_ = system_clock::now(); - - // Read the string stream 1 line at a time to parse. - while (std::getline(iss, line)) { - line = stripComment(line); - if (isWhitespace(line)) { - continue; - } - vector key_val = splitAndTrim(line, '='); - if (key_val.size() != 2) { - LOG(ERROR) << "Invalid config line: " << line; - return false; - } else { - bool handled = false; - try { - handled = handleOption(key_val[0], key_val[1]); - if (!handled) { - for (auto& feature_cfg : featureConfigs_) { - if (feature_cfg.second->handleOption(key_val[0], key_val[1])) { - handled = true; - break; - } - } - } - } catch (const std::exception& e) { - LOG(ERROR) << "Failed to parse config line: " << line; - LOG(ERROR) << e.what(); - return false; - } - if (!handled) { - // This might be due to using a newer config option on an - // older binary where it is not supported. In this case, - // print a warning message - but it is expected to work! - LOG(WARNING) << "Unrecognized config line: " << line; - } - } - } - - validate(timestamp_); - - // Store original text, used to detect updates - source_ = conf; - timestamp_ = system_clock::now(); - return true; -} - -bool AbstractConfig::handleOption( - const std::string& /* unused */, - std::string& /* unused */) { - LOG(ERROR) << "handleOption unimplemented"; - return false; -} - -void AbstractConfig::printActivityProfilerConfig(std::ostream& s) const { - for (const auto& feature_cfg : featureConfigs_) { - feature_cfg.second->printActivityProfilerConfig(s); - } -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityBuffers.h b/plugins/tensorboard-plugins/libkineto/src/ActivityBuffers.h deleted file mode 100644 index 157af879379..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityBuffers.h +++ /dev/null @@ -1,29 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - - -#include -#include - -#include "libkineto.h" -#include "CuptiActivityBuffer.h" - -namespace KINETO_NAMESPACE { - -struct ActivityBuffers { - std::list> cpu; - std::unique_ptr gpu; - - // Add a wrapper object to the underlying struct stored in the buffer - template - const ITraceActivity& addActivityWrapper(const T& act) { - wrappers_.push_back(std::make_unique(act)); - return *wrappers_.back().get(); - } - - private: - std::vector> wrappers_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityLoggerFactory.h b/plugins/tensorboard-plugins/libkineto/src/ActivityLoggerFactory.h deleted file mode 100644 index 0d1bf642cd6..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityLoggerFactory.h +++ /dev/null @@ -1,60 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace KINETO_NAMESPACE { - -class ActivityLogger; - -class ActivityLoggerFactory { - - public: - using FactoryFunc = - std::function(const std::string& url)>; - - // Add logger factory for a protocol prefix - void addProtocol(const std::string& protocol, FactoryFunc f) { - factories_[tolower(protocol)] = f; - } - - // Create a logger, invoking the factory for the protocol specified in url - std::unique_ptr makeLogger(const std::string& url) const { - std::string protocol = extractProtocol(url); - auto it = factories_.find(tolower(protocol)); - if (it != factories_.end()) { - return it->second(stripProtocol(url)); - } - throw std::invalid_argument(fmt::format( - "No logger registered for the {} protocol prefix", - protocol)); - return nullptr; - } - - private: - static std::string tolower(std::string s) { - std::transform(s.begin(), s.end(), s.begin(), - [](unsigned char c) { return std::tolower(c); } - ); - return s; - } - - static std::string extractProtocol(std::string url) { - return url.substr(0, url.find("://")); - } - - static std::string stripProtocol(std::string url) { - size_t pos = url.find("://"); - return pos == url.npos ? url : url.substr(pos + 3); - } - - std::map factories_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.cpp b/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.cpp deleted file mode 100644 index c85d41ed73f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.cpp +++ /dev/null @@ -1,246 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "ActivityProfilerController.h" - -#include -#include - -#include "ActivityLoggerFactory.h" -#include "ActivityTrace.h" -#include "CuptiActivityApi.h" -#ifdef HAS_ROCTRACER -#include "RoctracerActivityApi.h" -#endif -#include "ThreadUtil.h" -#include "output_json.h" -#include "output_membuf.h" - -#include "Logger.h" - -using namespace std::chrono; - -namespace KINETO_NAMESPACE { - -constexpr milliseconds kProfilerIntervalMsecs(1000); - -ActivityProfilerController::ActivityProfilerController( - ConfigLoader& configLoader, bool cpuOnly) - : configLoader_(configLoader) { -#ifdef HAS_ROCTRACER - profiler_ = std::make_unique( - RoctracerActivityApi::singleton(), cpuOnly); -#else - profiler_ = std::make_unique( - CuptiActivityApi::singleton(), cpuOnly); -#endif - configLoader_.addHandler(ConfigLoader::ConfigKind::ActivityProfiler, this); -} - -ActivityProfilerController::~ActivityProfilerController() { - configLoader_.removeHandler( - ConfigLoader::ConfigKind::ActivityProfiler, this); - if (profilerThread_) { - // signaling termination of the profiler loop - stopRunloop_ = true; - profilerThread_->join(); - delete profilerThread_; - profilerThread_ = nullptr; - } -} - -static ActivityLoggerFactory initLoggerFactory() { - ActivityLoggerFactory factory; - factory.addProtocol("file", [](const std::string& url) { - return std::unique_ptr(new ChromeTraceLogger(url)); - }); - return factory; -} - -static ActivityLoggerFactory& loggerFactory() { - static ActivityLoggerFactory factory = initLoggerFactory(); - return factory; -} - -void ActivityProfilerController::addLoggerFactory( - const std::string& protocol, ActivityLoggerFactory::FactoryFunc factory) { - loggerFactory().addProtocol(protocol, factory); -} - -static std::unique_ptr makeLogger(const Config& config) { - if (config.activitiesLogToMemory()) { - return std::make_unique(config); - } - return loggerFactory().makeLogger(config.activitiesLogUrl()); -} - -bool ActivityProfilerController::canAcceptConfig() { - return !profiler_->isActive(); -} - -void ActivityProfilerController::acceptConfig(const Config& config) { - VLOG(1) << "acceptConfig"; - if (config.activityProfilerEnabled()) { - scheduleTrace(config); - } -} - -void ActivityProfilerController::profilerLoop() { - setThreadName("Kineto Activity Profiler"); - VLOG(0) << "Entering activity profiler loop"; - - auto now = system_clock::now(); - auto next_wakeup_time = now + kProfilerIntervalMsecs; - - while (!stopRunloop_) { - now = system_clock::now(); - - while (now < next_wakeup_time) { - /* sleep override */ - std::this_thread::sleep_for(next_wakeup_time - now); - now = system_clock::now(); - } - - if (!profiler_->isActive()) { - std::lock_guard lock(asyncConfigLock_); - if (asyncRequestConfig_ - && !asyncRequestConfig_->hasProfileStartIteration()) { - // Note on now + kProfilerIntervalMsecs - // Profiler interval does not align perfectly upto startTime - warmup. Waiting until the next tick - // won't allow sufficient time for the profiler to warm up. So check if we are very close to the warmup time and trigger warmup - if (now + kProfilerIntervalMsecs - >= (asyncRequestConfig_->requestTimestamp() - asyncRequestConfig_->activitiesWarmupDuration())) { - LOG(INFO) << "Received on-demand activity trace request by " - << " profile timestamp = " - << asyncRequestConfig_-> - requestTimestamp().time_since_epoch().count(); - activateConfig(now); - } - } - } - - while (next_wakeup_time < now) { - next_wakeup_time += kProfilerIntervalMsecs; - } - - if (profiler_->isActive()) { - next_wakeup_time = profiler_->performRunLoopStep(now, next_wakeup_time); - VLOG(1) << "Profiler loop: " - << duration_cast(system_clock::now() - now).count() - << "ms"; - } - } - - VLOG(0) << "Exited activity profiling loop"; -} - -void ActivityProfilerController::step() { - int64_t currentIter = ++iterationCount_; - VLOG(0) << "Step called , iteration = " << currentIter; - - // optimization to not take the lock unless necessary - if (asyncRequestConfig_ && !profiler_->isActive()) { - std::lock_guard lock(asyncConfigLock_); - auto startIter = asyncRequestConfig_->startIterationIncludingWarmup(); - - if (asyncRequestConfig_->hasProfileStartIteration() - && currentIter >= startIter) { - LOG(INFO) << "Received on-demand activity trace request by profile" - << " start iteration = " - << asyncRequestConfig_->profileStartIteration() - << " current iteration = " << currentIter; - - if (currentIter > startIter) { - // adjust the start iteration if it is in the past - auto newProfileStart = currentIter + - asyncRequestConfig_->activitiesWarmupIterations(); - LOG(INFO) << "Start iteration updated to " << newProfileStart; - asyncRequestConfig_->setProfileStartIteration(newProfileStart); - } - activateConfig(system_clock::now()); - } - } - - if (profiler_->isActive()) { - auto now = system_clock::now(); - auto next_wakeup_time = now + kProfilerIntervalMsecs; - profiler_->performRunLoopStep(now, next_wakeup_time, currentIter); - } -} - -void ActivityProfilerController::activateConfig( - std::chrono::time_point now) { - logger_ = makeLogger(*asyncRequestConfig_); - profiler_->setLogger(logger_.get()); - profiler_->configure(*asyncRequestConfig_, now); - asyncRequestConfig_ = nullptr; -} - -void ActivityProfilerController::scheduleTrace(const Config& config) { - VLOG(1) << "scheduleTrace"; - if (profiler_->isActive()) { - LOG(ERROR) << "Ignored request - profiler busy"; - return; - } - int64_t currentIter = iterationCount_; - if (config.hasProfileStartIteration() && currentIter < 0) { - LOG(ERROR) << "Ignored profile iteration count based request as " - << "application is not updating iteration count"; - return; - } - std::lock_guard lock(asyncConfigLock_); - asyncRequestConfig_ = config.clone(); - - auto startIter = asyncRequestConfig_->startIterationIncludingWarmup(); - - if (asyncRequestConfig_->hasProfileStartIteration() - && (currentIter > startIter) - && asyncRequestConfig_->profileStartIterationRoundUp() > 0) { - auto newProfileStart - = currentIter + asyncRequestConfig_->activitiesWarmupIterations(); - // round up to nearest multiple - auto divisor = asyncRequestConfig_->profileStartIterationRoundUp(); - auto rem = newProfileStart % divisor; - newProfileStart += ((rem == 0) ? 0 : divisor - rem); - LOG(INFO) << "Rounding up profiler start iteration to : " << newProfileStart; - asyncRequestConfig_->setProfileStartIteration(newProfileStart); - } - - // start a profilerLoop() thread to handle request - if (!profilerThread_) { - profilerThread_ = - new std::thread(&ActivityProfilerController::profilerLoop, this); - } -} - -void ActivityProfilerController::prepareTrace(const Config& config) { - // Requests from ActivityProfilerApi have higher priority than - // requests from other sources (signal, daemon). - // Cancel any ongoing request and refuse new ones. - auto now = system_clock::now(); - if (profiler_->isActive()) { - LOG(WARNING) << "Cancelling current trace request in order to start " - << "higher priority synchronous request"; - if (libkineto::api().client()) { - libkineto::api().client()->stop(); - } - profiler_->stopTrace(now); - profiler_->reset(); - } - - profiler_->configure(config, now); -} - -std::unique_ptr ActivityProfilerController::stopTrace() { - profiler_->stopTrace(std::chrono::system_clock::now()); - auto logger = std::make_unique(profiler_->config()); - profiler_->processTrace(*logger); - profiler_->reset(); - return std::make_unique(std::move(logger), loggerFactory()); -} - -void ActivityProfilerController::addMetadata( - const std::string& key, const std::string& value) { - profiler_->addMetadata(key, value); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.h b/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.h deleted file mode 100644 index 415f107cbed..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerController.h +++ /dev/null @@ -1,84 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include - -#include "ActivityLoggerFactory.h" -#include "CuptiActivityProfiler.h" -#include "ActivityProfilerInterface.h" -#include "ActivityTraceInterface.h" -#include "ConfigLoader.h" -#include "CuptiActivityApi.h" - -namespace KINETO_NAMESPACE { - -class Config; - -class ActivityProfilerController : public ConfigLoader::ConfigHandler { - public: - explicit ActivityProfilerController(ConfigLoader& configLoader, bool cpuOnly); - ActivityProfilerController(const ActivityProfilerController&) = delete; - ActivityProfilerController& operator=(const ActivityProfilerController&) = - delete; - - ~ActivityProfilerController(); - - static void addLoggerFactory( - const std::string& protocol, - ActivityLoggerFactory::FactoryFunc factory); - - bool canAcceptConfig() override; - void acceptConfig(const Config& config) override; - - void scheduleTrace(const Config& config); - - void prepareTrace(const Config& config); - - void startTrace() { - profiler_->startTrace(std::chrono::system_clock::now()); - } - - void step(); - - std::unique_ptr stopTrace(); - - bool isActive() { - return profiler_->isActive(); - } - - void transferCpuTrace( - std::unique_ptr cpuTrace) { - return profiler_->transferCpuTrace(std::move(cpuTrace)); - } - - void recordThreadInfo() { - profiler_->recordThreadInfo(); - } - - void addChildActivityProfiler( - std::unique_ptr profiler) { - profiler_->addChildActivityProfiler(std::move(profiler)); - } - - void addMetadata(const std::string& key, const std::string& value); - - private: - void profilerLoop(); - void activateConfig(std::chrono::time_point now); - - std::unique_ptr asyncRequestConfig_; - std::mutex asyncConfigLock_; - std::unique_ptr profiler_; - std::unique_ptr logger_; - std::thread* profilerThread_{nullptr}; - std::atomic_bool stopRunloop_{false}; - std::atomic iterationCount_{-1}; - ConfigLoader& configLoader_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.cpp b/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.cpp deleted file mode 100644 index b2d36b7b3ab..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "ActivityProfilerProxy.h" - -#include "ActivityProfilerController.h" -#include "Config.h" -#include "CuptiActivityApi.h" -#include "Logger.h" -#include - -namespace KINETO_NAMESPACE { - -ActivityProfilerProxy::ActivityProfilerProxy( - bool cpuOnly, ConfigLoader& configLoader) - : cpuOnly_(cpuOnly), configLoader_(configLoader) { -} - -ActivityProfilerProxy::~ActivityProfilerProxy() { - delete controller_; -}; - -void ActivityProfilerProxy::init() { - if (!controller_) { - controller_ = new ActivityProfilerController(configLoader_, cpuOnly_); - } -} - -void ActivityProfilerProxy::scheduleTrace(const std::string& configStr) { - Config config; - config.parse(configStr); - controller_->scheduleTrace(config); -} - -void ActivityProfilerProxy::scheduleTrace(const Config& config) { - controller_->scheduleTrace(config); -} - -void ActivityProfilerProxy::prepareTrace( - const std::set& activityTypes, - const std::string& configStr) { - Config config; - bool validate_required = true; - - // allow user provided config to override default options - if (!configStr.empty()) { - if (!config.parse(configStr)) { - LOG(WARNING) << "Failed to parse config : " << configStr; - } - // parse also runs validate - validate_required = false; - } - - config.setClientDefaults(); - config.setSelectedActivityTypes(activityTypes); - - if (validate_required) { - config.validate(std::chrono::system_clock::now()); - } - - controller_->prepareTrace(config); -} - -void ActivityProfilerProxy::startTrace() { - controller_->startTrace(); -} - -std::unique_ptr -ActivityProfilerProxy::stopTrace() { - return controller_->stopTrace(); -} - -void ActivityProfilerProxy::step() { - controller_->step(); -} - -bool ActivityProfilerProxy::isActive() { - return controller_->isActive(); -} - -void ActivityProfilerProxy::pushCorrelationId(uint64_t id) { - CuptiActivityApi::pushCorrelationID(id, - CuptiActivityApi::CorrelationFlowType::Default); -} - -void ActivityProfilerProxy::popCorrelationId() { - CuptiActivityApi::popCorrelationID( - CuptiActivityApi::CorrelationFlowType::Default); -} - -void ActivityProfilerProxy::pushUserCorrelationId(uint64_t id) { - CuptiActivityApi::pushCorrelationID(id, - CuptiActivityApi::CorrelationFlowType::User); -} - -void ActivityProfilerProxy::popUserCorrelationId() { - CuptiActivityApi::popCorrelationID( - CuptiActivityApi::CorrelationFlowType::User); -} - -void ActivityProfilerProxy::transferCpuTrace( - std::unique_ptr traceBuffer) { - controller_->transferCpuTrace(std::move(traceBuffer)); -} - -void ActivityProfilerProxy::addMetadata( - const std::string& key, const std::string& value) { - controller_->addMetadata(key, value); -} - -void ActivityProfilerProxy::recordThreadInfo() { - controller_->recordThreadInfo(); -} - -void ActivityProfilerProxy::addChildActivityProfiler( - std::unique_ptr profiler) { - controller_->addChildActivityProfiler(std::move(profiler)); -} - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.h b/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.h deleted file mode 100644 index b5cf84b2f1d..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityProfilerProxy.h +++ /dev/null @@ -1,73 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include "ActivityProfilerInterface.h" - -#include -#include -#include - -#include "ActivityType.h" -#include "ITraceActivity.h" - -namespace libkineto { - // previous declaration is struct so this one must be too. - struct CpuTraceBuffer; -} - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -class ActivityProfilerController; -class Config; -class ConfigLoader; - -class ActivityProfilerProxy : public ActivityProfilerInterface { - - public: - ActivityProfilerProxy(bool cpuOnly, ConfigLoader& configLoader); - ~ActivityProfilerProxy() override; - - void init() override; - bool isInitialized() override { - return controller_ != nullptr; - } - - bool isActive() override; - - void recordThreadInfo() override; - - void scheduleTrace(const std::string& configStr) override; - void scheduleTrace(const Config& config); - - void prepareTrace( - const std::set& activityTypes, - const std::string& configStr = "") override; - - void startTrace() override; - void step() override; - std::unique_ptr stopTrace() override; - - void pushCorrelationId(uint64_t id) override; - void popCorrelationId() override; - - void pushUserCorrelationId(uint64_t id) override; - void popUserCorrelationId() override; - - void transferCpuTrace( - std::unique_ptr traceBuffer) override; - - void addMetadata(const std::string& key, const std::string& value) override; - - virtual void addChildActivityProfiler( - std::unique_ptr profiler) override; - - private: - bool cpuOnly_{true}; - ConfigLoader& configLoader_; - ActivityProfilerController* controller_{nullptr}; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityTrace.h b/plugins/tensorboard-plugins/libkineto/src/ActivityTrace.h deleted file mode 100644 index 0be76af08e4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityTrace.h +++ /dev/null @@ -1,45 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include - -#include "ActivityLoggerFactory.h" -#include "ActivityTraceInterface.h" -#include "output_json.h" -#include "output_membuf.h" - -namespace libkineto { - -class ActivityTrace : public ActivityTraceInterface { - public: - ActivityTrace( - std::unique_ptr tmpLogger, - const ActivityLoggerFactory& factory) - : memLogger_(std::move(tmpLogger)), - loggerFactory_(factory) { - } - - const std::vector* activities() override { - return memLogger_->traceActivities(); - }; - - void save(const std::string& url) override { - std::string prefix; - // if no protocol is specified, default to file - if (url.find("://") == url.npos) { - prefix = "file://"; - } - memLogger_->log(*loggerFactory_.makeLogger(prefix + url)); - }; - - private: - // Activities are logged into a buffer - std::unique_ptr memLogger_; - - // Alternative logger used by save() if protocol prefix is specified - const ActivityLoggerFactory& loggerFactory_; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/ActivityType.cpp b/plugins/tensorboard-plugins/libkineto/src/ActivityType.cpp deleted file mode 100644 index 18856b72370..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ActivityType.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "ActivityType.h" - -#include - -namespace libkineto { - -struct ActivityTypeName { - const char* name; - ActivityType type; -}; - -static constexpr std::array map{{ - {"cpu_op", ActivityType::CPU_OP}, - {"user_annotation", ActivityType::USER_ANNOTATION}, - {"gpu_user_Annotation", ActivityType::GPU_USER_ANNOTATION}, - {"gpu_memcpy", ActivityType::GPU_MEMCPY}, - {"gpu_memset", ActivityType::GPU_MEMSET}, - {"kernel", ActivityType::CONCURRENT_KERNEL}, - {"external_correlation", ActivityType::EXTERNAL_CORRELATION}, - {"cuda_runtime", ActivityType::CUDA_RUNTIME}, - {"cuda_profiler_range", ActivityType::CUDA_PROFILER_RANGE}, - {"glow_runtime", ActivityType::GLOW_RUNTIME}, - {"cpu_instant_event", ActivityType::CPU_INSTANT_EVENT}, - {"python_function", ActivityType::PYTHON_FUNCTION}, - {"overhead", ActivityType::OVERHEAD}, - {"ENUM_COUNT", ActivityType::ENUM_COUNT} -}}; - -static constexpr bool matchingOrder(int idx = 0) { - return map[idx].type == ActivityType::ENUM_COUNT || - ((idx == (int) map[idx].type) && matchingOrder(idx + 1)); -} -static_assert(matchingOrder(), "ActivityTypeName map is out of order"); - -const char* toString(ActivityType t) { - return map[(int)t].name; -} - -ActivityType toActivityType(const std::string& str) { - for (int i = 0; i < activityTypeCount; i++) { - if (str == map[i].name) { - return map[i].type; - } - } - throw std::invalid_argument(fmt::format("Invalid activity type: {}", str)); -} - -const std::array activityTypes() { - std::array res; - for (int i = 0; i < activityTypeCount; i++) { - res[i] = map[i].type; - } - return res; -} - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/Config.cpp b/plugins/tensorboard-plugins/libkineto/src/Config.cpp deleted file mode 100644 index 95538840f37..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/Config.cpp +++ /dev/null @@ -1,473 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "Config.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Logger.h" -#include "ThreadUtil.h" - -using namespace std::chrono; - -using std::string; -using std::vector; - -namespace KINETO_NAMESPACE { - -constexpr milliseconds kDefaultSamplePeriodMsecs(1000); -constexpr milliseconds kDefaultMultiplexPeriodMsecs(1000); -constexpr milliseconds kDefaultActivitiesProfileDurationMSecs(500); -constexpr int kDefaultActivitiesMaxGpuBufferSize(128 * 1024 * 1024); -constexpr seconds kDefaultActivitiesWarmupDurationSecs(5); -constexpr seconds kDefaultBufferUntilWarmup(10); -constexpr seconds kDefaultReportPeriodSecs(1); -constexpr int kDefaultSamplesPerReport(1); -constexpr int kDefaultMaxEventProfilersPerGpu(1); -constexpr int kDefaultEventProfilerHearbeatMonitorPeriod(0); -constexpr seconds kMaxRequestAge(10); - -// Event Profiler -constexpr char kEventsKey[] = "EVENTS"; -constexpr char kMetricsKey[] = "METRICS"; -constexpr char kSamplePeriodKey[] = "SAMPLE_PERIOD_MSECS"; -constexpr char kMultiplexPeriodKey[] = "MULTIPLEX_PERIOD_MSECS"; -constexpr char kReportPeriodKey[] = "REPORT_PERIOD_SECS"; -constexpr char kSamplesPerReportKey[] = "SAMPLES_PER_REPORT"; -constexpr char kEventsLogFileKey[] = "EVENTS_LOG_FILE"; -constexpr char kEventsEnabledDevicesKey[] = "EVENTS_ENABLED_DEVICES"; -constexpr char kOnDemandDurationKey[] = "EVENTS_DURATION_SECS"; -constexpr char kMaxEventProfilersPerGpuKey[] = "MAX_EVENT_PROFILERS_PER_GPU"; -constexpr char kHeartbeatMonitorPeriodKey[] = - "EVENTS_HEARTBEAT_MONITOR_PERIOD_SECS"; - -// Activity Profiler -constexpr char kActivitiesEnabledKey[] = "ACTIVITIES_ENABLED"; -constexpr char kActivityTypesKey[] = "ACTIVITY_TYPES"; -constexpr char kActivitiesLogFileKey[] = "ACTIVITIES_LOG_FILE"; -constexpr char kActivitiesDurationKey[] = "ACTIVITIES_DURATION_SECS"; -constexpr char kActivitiesDurationMsecsKey[] = "ACTIVITIES_DURATION_MSECS"; -constexpr char kActivitiesWarmupDurationSecsKey[] = "ACTIVITIES_WARMUP_PERIOD_SECS"; -constexpr char kActivitiesMaxGpuBufferSizeKey[] = - "ACTIVITIES_MAX_GPU_BUFFER_SIZE_MB"; - -// Client Interface -constexpr char kClientInterfaceEnableOpInputsCollection[] = "CLIENT_INTERFACE_ENABLE_OP_INPUTS_COLLECTION"; - -constexpr char kActivitiesWarmupIterationsKey[] = "ACTIVITIES_WARMUP_ITERATIONS"; -constexpr char kActivitiesIterationsKey[] = "ACTIVITIES_ITERATIONS"; -// Common - -// Client-side timestamp used for synchronized start across hosts for -// distributed workloads. -// Specified in milliseconds Unix time (milliseconds since epoch). -// To use, compute a future timestamp as follows: -// * C++: + duration_cast( -// system_clock::now().time_since_epoch()).count() -// * Python: + int(time.time() * 1000) -// * Bash: $(( + $(date +%s%3N))) -// If used for a tracing request, timestamp must be far enough in the future -// to accommodate ACTIVITIES_WARMUP_PERIOD_SECS as well as any delays in -// propagating the request to the profiler. -// If the request can not be honored, it is up to the profilers to report -// an error somehow - no checks are done at config parse time. -// Note PROFILE_START_ITERATION has higher precedence -constexpr char kProfileStartTimeKey[] = "PROFILE_START_TIME"; -// DEPRECATED - USE PROFILE_START_TIME instead -constexpr char kRequestTimestampKey[] = "REQUEST_TIMESTAMP"; - -// Alternatively if the application supports reporting iterations -// start the profile at specific iteration. If the iteration count -// is >= this value the profile is started immediately. -// A value >= 0 is valid for this config option to take effect. -// Note PROFILE_START_ITERATION will take precedence over PROFILE_START_TIME. -constexpr char kProfileStartIterationKey[] = "PROFILE_START_ITERATION"; - -// Users can also start the profile on an integer multiple of the config -// value PROFILE_START_ITERATION_ROUNDUP. This knob behaves similar to -// PROFILE_START_ITERATION but instead of saying : "start collection trace on -// iteration 500", one can configure it to "start collecting trace on the next -// 100th iteration". -// -// For example, -// PROFILE_START_ITERATION_ROUNDUP = 1000, and the current iteration is 2010 -// The profile will then be collected on the next multiple of 1000 ie. 3000 -// Note PROFILE_START_ITERATION_ROUNDUP will also take precedence over -// PROFILE_START_TIME. -constexpr char kProfileStartIterationRoundUpKey[] - = "PROFILE_START_ITERATION_ROUNDUP"; - -// Enable on-demand trigger via kill -USR2 -// When triggered in this way, /tmp/libkineto.conf will be used as config. -constexpr char kEnableSigUsr2Key[] = "ENABLE_SIGUSR2"; - -// Enable communication through IPC Fabric -// and disable thrift communication with dynolog daemon -constexpr char kEnableIpcFabricKey[] = "ENABLE_IPC_FABRIC"; - -// Verbose log level -// The actual glog is not used and --v and --vmodule has no effect. -// Instead set the verbose level and modules in the config file. -constexpr char kLogVerboseLevelKey[] = "VERBOSE_LOG_LEVEL"; -// By default, all modules will log verbose messages >= verboseLogLevel. -// But to reduce noise we can specify one or more modules of interest. -// A module is a C/C++ object file (source file name), -// Example argument: ActivityProfiler.cpp,output_json.cpp -constexpr char kLogVerboseModulesKey[] = "VERBOSE_LOG_MODULES"; - -// Max devices supported on any system -constexpr uint8_t kMaxDevices = 8; - -namespace { - -struct FactoryMap { - - void addFactory( - std::string name, - std::function factory) { - std::lock_guard lock(lock_); - factories_[name] = factory; - } - - void addFeatureConfigs(Config& cfg) { - std::lock_guard lock(lock_); - for (const auto& p : factories_) { - cfg.addFeature(p.first, p.second(cfg)); - } - } - -// Config factories are shared between objects and since -// config objects can be created by multiple threads, we need a lock. - std::mutex lock_; - std::map> factories_; -}; - -std::shared_ptr configFactories() { - // Ensure this is safe to call during shutdown, even as static - // destructors are invoked. Once factories destructor has been - // invoked, weak_ptr.lock() will return nullptr. - // But calls before that point will have a valid shared_ptr, - // delaying destruction of the underlying FactoryMap. - static auto factories = std::make_shared(); - static std::weak_ptr weak_ptr = factories; - return weak_ptr.lock(); -} - -} // namespace - -void Config::addConfigFactory( - std::string name, - std::function factory) { - auto factories = configFactories(); - if (factories) { - factories->addFactory(name, factory); - } -} - -static string defaultTraceFileName() { - return fmt::format("/tmp/libkineto_activities_{}.json", processId()); -} - -Config::Config() - : verboseLogLevel_(-1), - samplePeriod_(kDefaultSamplePeriodMsecs), - reportPeriod_(duration_cast(kDefaultReportPeriodSecs)), - samplesPerReport_(kDefaultSamplesPerReport), - eventProfilerOnDemandDuration_(seconds(0)), - eventProfilerMaxInstancesPerGpu_(kDefaultMaxEventProfilersPerGpu), - eventProfilerHeartbeatMonitorPeriod_( - kDefaultEventProfilerHearbeatMonitorPeriod), - multiplexPeriod_(kDefaultMultiplexPeriodMsecs), - activityProfilerEnabled_(true), - activitiesLogFile_(defaultTraceFileName()), - activitiesLogUrl_(fmt::format("file://{}", activitiesLogFile_)), - activitiesMaxGpuBufferSize_(kDefaultActivitiesMaxGpuBufferSize), - activitiesWarmupDuration_(kDefaultActivitiesWarmupDurationSecs), - activitiesWarmupIterations_(0), - activitiesDuration_(kDefaultActivitiesProfileDurationMSecs), - activitiesRunIterations_(0), - activitiesOnDemandTimestamp_(milliseconds(0)), - profileStartTime_(milliseconds(0)), - profileStartIteration_(-1), - profileStartIterationRoundUp_(-1), - requestTimestamp_(milliseconds(0)), - enableSigUsr2_(false), - enableIpcFabric_(false) { - auto factories = configFactories(); - if (factories) { - factories->addFeatureConfigs(*this); - } -} - -uint8_t Config::createDeviceMask(const string& val) { - uint8_t res = 0; - for (const auto& d : splitAndTrim(val, ',')) { - res |= 1 << toIntRange(d, 0, kMaxDevices - 1); - } - return res; -} - -const seconds Config::maxRequestAge() const { - return kMaxRequestAge; -} - -static std::string getTimeStr(time_point t) { - std::time_t t_c = system_clock::to_time_t(t); - return fmt::format("{:%H:%M:%S}", fmt::localtime(t_c)); -} - -static time_point handleRequestTimestamp(int64_t ms) { - auto t = time_point(milliseconds(ms)); - auto now = system_clock::now(); - if (t > now) { - throw std::invalid_argument(fmt::format( - "Invalid {}: {} - time is in future", - kRequestTimestampKey, - getTimeStr(t))); - } else if ((now - t) > kMaxRequestAge) { - throw std::invalid_argument(fmt::format( - "Invalid {}: {} - time is more than {}s in the past", - kRequestTimestampKey, - getTimeStr(t), - kMaxRequestAge.count())); - } - return t; -} - -void Config::setActivityTypes( - const std::vector& selected_activities) { - selectedActivityTypes_.clear(); - if (selected_activities.size() > 0) { - for (const auto& activity : selected_activities) { - if (activity == "") { - continue; - } - selectedActivityTypes_.insert(toActivityType(activity)); - } - } -} - -bool Config::handleOption(const std::string& name, std::string& val) { - // Event Profiler - if (!name.compare(kEventsKey)) { - vector event_names = splitAndTrim(val, ','); - eventNames_.insert(event_names.begin(), event_names.end()); - } else if (!name.compare(kMetricsKey)) { - vector metric_names = splitAndTrim(val, ','); - metricNames_.insert(metric_names.begin(), metric_names.end()); - } else if (!name.compare(kSamplePeriodKey)) { - samplePeriod_ = milliseconds(toInt32(val)); - } else if (!name.compare(kMultiplexPeriodKey)) { - multiplexPeriod_ = milliseconds(toInt32(val)); - } else if (!name.compare(kReportPeriodKey)) { - setReportPeriod(seconds(toInt32(val))); - } else if (!name.compare(kSamplesPerReportKey)) { - samplesPerReport_ = toInt32(val); - } else if (!name.compare(kEventsLogFileKey)) { - eventLogFile_ = val; - } else if (!name.compare(kEventsEnabledDevicesKey)) { - eventProfilerDeviceMask_ = createDeviceMask(val); - } else if (!name.compare(kOnDemandDurationKey)) { - eventProfilerOnDemandDuration_ = seconds(toInt32(val)); - eventProfilerOnDemandTimestamp_ = timestamp(); - } else if (!name.compare(kMaxEventProfilersPerGpuKey)) { - eventProfilerMaxInstancesPerGpu_ = toInt32(val); - } else if (!name.compare(kHeartbeatMonitorPeriodKey)) { - eventProfilerHeartbeatMonitorPeriod_ = seconds(toInt32(val)); - } - - // Activity Profiler - else if (!name.compare(kActivitiesDurationKey)) { - activitiesDuration_ = - duration_cast(seconds(toInt32(val))); - activitiesOnDemandTimestamp_ = timestamp(); - } else if (!name.compare(kActivityTypesKey)) { - vector activity_types = splitAndTrim(toLower(val), ','); - setActivityTypes(activity_types); - } else if (!name.compare(kActivitiesDurationMsecsKey)) { - activitiesDuration_ = milliseconds(toInt32(val)); - activitiesOnDemandTimestamp_ = timestamp(); - } else if (!name.compare(kActivitiesIterationsKey)) { - activitiesRunIterations_ = toInt32(val); - activitiesOnDemandTimestamp_ = timestamp(); - } else if (!name.compare(kLogVerboseLevelKey)) { - verboseLogLevel_ = toInt32(val); - } else if (!name.compare(kLogVerboseModulesKey)) { - verboseLogModules_ = splitAndTrim(val, ','); - } else if (!name.compare(kActivitiesEnabledKey)) { - activityProfilerEnabled_ = toBool(val); - } else if (!name.compare(kActivitiesLogFileKey)) { - activitiesLogFile_ = val; - activitiesLogUrl_ = fmt::format("file://{}", val); - activitiesOnDemandTimestamp_ = timestamp(); - } else if (!name.compare(kActivitiesMaxGpuBufferSizeKey)) { - activitiesMaxGpuBufferSize_ = toInt32(val) * 1024 * 1024; - } else if (!name.compare(kActivitiesWarmupDurationSecsKey)) { - activitiesWarmupDuration_ = seconds(toInt32(val)); - } else if (!name.compare(kActivitiesWarmupIterationsKey)) { - activitiesWarmupIterations_ = toInt32(val); - } - - // Client Interface - else if (!name.compare(kClientInterfaceEnableOpInputsCollection)) { - enableOpInputsCollection_ = toBool(val); - } - - // Common - else if (!name.compare(kRequestTimestampKey)) { - VLOG(0) << kRequestTimestampKey - << " has been deprecated - please use " - << kProfileStartTimeKey; - requestTimestamp_ = handleRequestTimestamp(toInt64(val)); - } else if (!name.compare(kProfileStartTimeKey)) { - profileStartTime_ = - time_point(milliseconds(toInt64(val))); - } else if (!name.compare(kProfileStartIterationKey)) { - profileStartIteration_ = toInt32(val); - } else if (!name.compare(kProfileStartIterationRoundUpKey)) { - profileStartIterationRoundUp_ = toInt32(val); - } else if (!name.compare(kEnableSigUsr2Key)) { - enableSigUsr2_ = toBool(val); - } else if (!name.compare(kEnableIpcFabricKey)) { - enableIpcFabric_ = toBool(val); - } else { - return false; - } - return true; -} - -std::chrono::milliseconds Config::activitiesDurationDefault() const { - return kDefaultActivitiesProfileDurationMSecs; -}; - -void Config::updateActivityProfilerRequestReceivedTime() { - activitiesOnDemandTimestamp_ = system_clock::now(); -} - -void Config::setClientDefaults() { - AbstractConfig::setClientDefaults(); - activitiesLogToMemory_ = true; -} - -void Config::validate( - const time_point& fallbackProfileStartTime) { - if (samplePeriod_.count() == 0) { - LOG(WARNING) << "Sample period must be greater than 0, setting to 1ms"; - samplePeriod_ = milliseconds(1); - } - - if (multiplexPeriod_ < samplePeriod_) { - LOG(WARNING) << "Multiplex period can not be smaller " - << "than sample period"; - LOG(WARNING) << "Setting multiplex period to " << samplePeriod_.count() - << "ms"; - multiplexPeriod_ = samplePeriod_; - } - - if ((multiplexPeriod_ % samplePeriod_).count() != 0) { - LOG(WARNING) << "Multiplex period must be a " - << "multiple of sample period"; - multiplexPeriod_ = alignUp(multiplexPeriod_, samplePeriod_); - LOG(WARNING) << "Setting multiplex period to " << multiplexPeriod_.count() - << "ms"; - } - - if ((reportPeriod_ % multiplexPeriod_).count() != 0 || - reportPeriod_.count() == 0) { - LOG(WARNING) << "Report period must be a " - << "multiple of multiplex period"; - reportPeriod_ = alignUp(reportPeriod_, multiplexPeriod_); - LOG(WARNING) << "Setting report period to " << reportPeriod_.count() - << "ms"; - } - - if (samplesPerReport_ < 1) { - LOG(WARNING) << "Samples per report must be in the range " - << "[1, report period / sample period]"; - LOG(WARNING) << "Setting samples per report to 1"; - samplesPerReport_ = 1; - } - - int max_samples_per_report = reportPeriod_ / samplePeriod_; - if (samplesPerReport_ > max_samples_per_report) { - LOG(WARNING) << "Samples per report must be in the range " - << "[1, report period / sample period] ([1, " - << reportPeriod_.count() << "ms / " << samplePeriod_.count() - << "ms = " << max_samples_per_report << "])"; - LOG(WARNING) << "Setting samples per report to " << max_samples_per_report; - samplesPerReport_ = max_samples_per_report; - } - - if (!hasProfileStartTime()) { - VLOG(0) - << "No explicit timestamp has been set. " - << "Defaulting it to now + activitiesWarmupDuration with buffer."; - profileStartTime_ = fallbackProfileStartTime + - activitiesWarmupDuration() + kDefaultBufferUntilWarmup; - } - - if (profileStartIterationRoundUp_ == 0) { - // setting to 0 will mess up modulo arithmetic, set it to -1 so it has no effect - LOG(WARNING) << "Profiler start iteration round up should be >= 1."; - profileStartIterationRoundUp_ = -1; - } - - if (profileStartIterationRoundUp_ > 0 && !hasProfileStartIteration()) { - VLOG(0) << "Setting profiler start iteration to 0 so this config is " - << "triggered via iteration count."; - profileStartIteration_ = 0; - } - - if (selectedActivityTypes_.size() == 0) { - selectDefaultActivityTypes(); - } -} - -void Config::setReportPeriod(milliseconds msecs) { - reportPeriod_ = msecs; -} - -void Config::printActivityProfilerConfig(std::ostream& s) const { - s << "Log file: " << activitiesLogFile() << std::endl; - if (hasProfileStartIteration()) { - s << "Trace start Iteration: " << profileStartIteration() << std::endl; - s << "Trace warmup Iterations: " << activitiesWarmupIterations() << std::endl; - s << "Trace profile Iterations: " << activitiesRunIterations() << std::endl; - if (profileStartIterationRoundUp() > 0) { - s << "Trace start iteration roundup : " << profileStartIterationRoundUp() - << std::endl; - } - } else if (hasProfileStartTime()) { - std::time_t t_c = system_clock::to_time_t(requestTimestamp()); - LOG(INFO) << "Trace start time: " - << fmt::format("{:%Y-%m-%d %H:%M:%S}", fmt::localtime(t_c)); - s << "Trace duration: " << activitiesDuration().count() << "ms" - << std::endl; - s << "Warmup duration: " << activitiesWarmupDuration().count() << "s" - << std::endl; - } - - s << "Max GPU buffer size: " << activitiesMaxGpuBufferSize() / 1024 / 1024 - << "MB" << std::endl; - - std::vector activities; - for (const auto& activity : selectedActivityTypes_) { - activities.push_back(toString(activity)); - } - s << "Enabled activities: " - << fmt::format("{}", fmt::join(activities, ",")) << std::endl; - - AbstractConfig::printActivityProfilerConfig(s); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ConfigLoader.cpp b/plugins/tensorboard-plugins/libkineto/src/ConfigLoader.cpp deleted file mode 100644 index 4080b678d37..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ConfigLoader.cpp +++ /dev/null @@ -1,300 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "ConfigLoader.h" - -#ifdef __linux__ -#include -#endif - -#include -#include -#include -#include -#include - -#include "DaemonConfigLoader.h" - -#include "Logger.h" - -using namespace std::chrono; -using std::string; - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -constexpr char kConfigFileEnvVar[] = "KINETO_CONFIG"; -#ifdef __linux__ -constexpr char kConfigFile[] = "/etc/libkineto.conf"; -constexpr char kOnDemandConfigFile[] = "/tmp/libkineto.conf"; -#else -constexpr char kConfigFile[] = "libkineto.conf"; -constexpr char kOnDemandConfigFile[] = "libkineto.conf"; -#endif - -constexpr std::chrono::seconds kConfigUpdateIntervalSecs(300); -constexpr std::chrono::seconds kOnDemandConfigUpdateIntervalSecs(5); - -#ifdef __linux__ -static struct sigaction originalUsr2Handler = {}; -#endif - -// Use SIGUSR2 to initiate profiling. -// Look for an on-demand config file. -// If none is found, default to base config. -// Try to not affect existing handlers -static bool hasOriginalSignalHandler() { -#ifdef __linux__ - return originalUsr2Handler.sa_handler != nullptr || - originalUsr2Handler.sa_sigaction != nullptr; -#else - return false; -#endif -} - -static void handle_signal(int signal) { -#ifdef __linux__ - if (signal == SIGUSR2) { - ConfigLoader::instance().handleOnDemandSignal(); - if (hasOriginalSignalHandler()) { - // Invoke original handler and reinstate ours - struct sigaction act; - sigaction(SIGUSR2, &originalUsr2Handler, &act); - raise(SIGUSR2); - sigaction(SIGUSR2, &act, &originalUsr2Handler); - } - } -#endif -} - -static void setupSignalHandler(bool enableSigUsr2) { -#ifdef __linux__ - if (enableSigUsr2) { - struct sigaction act = {}; - act.sa_handler = &handle_signal; - act.sa_flags = SA_NODEFER; - if (sigaction(SIGUSR2, &act, &originalUsr2Handler) < 0) { - PLOG(ERROR) << "Failed to register SIGUSR2 handler"; - } - if (originalUsr2Handler.sa_handler == &handle_signal) { - originalUsr2Handler = {}; - } - } else if (hasOriginalSignalHandler()) { - sigaction(SIGUSR2, &originalUsr2Handler, nullptr); - originalUsr2Handler = {}; - } -#endif -} - -// return an empty string if reading gets any errors. Otherwise a config string. -static std::string readConfigFromConfigFile(const char* filename) { - // Read whole file into a string. - std::ifstream file(filename); - std::string conf; - try { - conf.assign( - std::istreambuf_iterator(file), std::istreambuf_iterator()); - } catch (std::exception& e) { - VLOG(0) << "Error reading " << filename << ": " - << e.what(); - conf = ""; - } - return conf; -} - -static std::function()>& -daemonConfigLoaderFactory() { - static std::function()> factory = nullptr; - return factory; -} - -void ConfigLoader::setDaemonConfigLoaderFactory( - std::function()> factory) { - daemonConfigLoaderFactory() = factory; -} - -ConfigLoader& ConfigLoader::instance() { - static ConfigLoader config_loader; - return config_loader; -} - -// return an empty string if polling gets any errors. Otherwise a config string. -std::string ConfigLoader::readOnDemandConfigFromDaemon( - time_point now) { - if (!daemonConfigLoader_) { - return ""; - } - bool events = canHandlerAcceptConfig(ConfigKind::EventProfiler); - bool activities = canHandlerAcceptConfig(ConfigKind::ActivityProfiler); - return daemonConfigLoader_->readOnDemandConfig(events, activities); -} - -int ConfigLoader::contextCountForGpu(uint32_t device) { - if (!daemonConfigLoader_) { - // FIXME: Throw error? - return 0; - } - return daemonConfigLoader_->gpuContextCount(device); -} - -ConfigLoader::ConfigLoader() - : configUpdateIntervalSecs_(kConfigUpdateIntervalSecs), - onDemandConfigUpdateIntervalSecs_(kOnDemandConfigUpdateIntervalSecs), - stopFlag_(false), - onDemandSignal_(false) { -} - -void ConfigLoader::startThread() { - if (!updateThread_) { - // Create default base config here - at this point static initializers - // of extensions should have run and registered all config feature factories - std::lock_guard lock(configLock_); - if (!config_) { - config_ = std::make_unique(); - } - updateThread_ = - std::make_unique(&ConfigLoader::updateConfigThread, this); - } -} - -ConfigLoader::~ConfigLoader() { - if (updateThread_) { - stopFlag_ = true; - { - std::lock_guard lock(updateThreadMutex_); - updateThreadCondVar_.notify_one(); - } - updateThread_->join(); - } -#if !USE_GOOGLE_LOG - Logger::clearLoggerObservers(); -#endif // !USE_GOOGLE_LOG -} - -void ConfigLoader::handleOnDemandSignal() { - onDemandSignal_ = true; - { - std::lock_guard lock(updateThreadMutex_); - updateThreadCondVar_.notify_one(); - } -} - -const char* ConfigLoader::configFileName() { - if (!configFileName_) { - configFileName_ = getenv(kConfigFileEnvVar); - if (configFileName_ == nullptr) { - configFileName_ = kConfigFile; - } - } - return configFileName_; -} - -DaemonConfigLoader* ConfigLoader::daemonConfigLoader() { - if (!daemonConfigLoader_ && daemonConfigLoaderFactory()) { - daemonConfigLoader_ = daemonConfigLoaderFactory()(); - daemonConfigLoader_->setCommunicationFabric(config_->ipcFabricEnabled()); - } - return daemonConfigLoader_.get(); -} - -void ConfigLoader::updateBaseConfig() { - // First try reading local config file - // If that fails, read from daemon - // TODO: Invert these once daemon path fully rolled out - std::string config_str = readConfigFromConfigFile(configFileName()); - if (config_str.empty() && daemonConfigLoader()) { - // If local config file was not successfully loaded (e.g. not found) - // then try the daemon - config_str = daemonConfigLoader()->readBaseConfig(); - } - if (config_str != config_->source()) { - std::lock_guard lock(configLock_); - config_ = std::make_unique(); - config_->parse(config_str); - if (daemonConfigLoader()) { - daemonConfigLoader()->setCommunicationFabric(config_->ipcFabricEnabled()); - } - setupSignalHandler(config_->sigUsr2Enabled()); - SET_LOG_VERBOSITY_LEVEL( - config_->verboseLogLevel(), - config_->verboseLogModules()); - VLOG(0) << "Detected base config change"; - } -} - -void ConfigLoader::configureFromSignal( - time_point now, - Config& config) { - LOG(INFO) << "Received on-demand profiling signal, " - << "reading config from " << kOnDemandConfigFile; - // Reset start time to 0 in order to compute new default start time - const std::string config_str = "PROFILE_START_TIME=0\n" - + readConfigFromConfigFile(kOnDemandConfigFile); - config.parse(config_str); - config.setSignalDefaults(); - notifyHandlers(config); -} - -void ConfigLoader::configureFromDaemon( - time_point now, - Config& config) { - const std::string config_str = readOnDemandConfigFromDaemon(now); - if (config_str.empty()) { - return; - } - - LOG(INFO) << "Received config from dyno:\n" << config_str; - config.parse(config_str); - notifyHandlers(config); -} - -void ConfigLoader::updateConfigThread() { - auto now = system_clock::now(); - auto next_config_load_time = now; - auto next_on_demand_load_time = now + onDemandConfigUpdateIntervalSecs_; - seconds interval = configUpdateIntervalSecs_; - if (interval > onDemandConfigUpdateIntervalSecs_) { - interval = onDemandConfigUpdateIntervalSecs_; - } - auto onDemandConfig = std::make_unique(); - - // This can potentially sleep for long periods of time, so allow - // the desctructor to wake it to avoid a 5-minute long destruct period. - for (;;) { - { - std::unique_lock lock(updateThreadMutex_); - updateThreadCondVar_.wait_for(lock, interval); - } - if (stopFlag_) { - break; - } - now = system_clock::now(); - if (now > next_config_load_time) { - updateBaseConfig(); - next_config_load_time = now + configUpdateIntervalSecs_; - } - if (onDemandSignal_.exchange(false)) { - onDemandConfig = config_->clone(); - configureFromSignal(now, *onDemandConfig); - } else if (now > next_on_demand_load_time) { - onDemandConfig = std::make_unique(); - configureFromDaemon(now, *onDemandConfig); - next_on_demand_load_time = now + onDemandConfigUpdateIntervalSecs_; - } - if (onDemandConfig->verboseLogLevel() >= 0) { - LOG(INFO) << "Setting verbose level to " - << onDemandConfig->verboseLogLevel() - << " from on-demand config"; - SET_LOG_VERBOSITY_LEVEL( - onDemandConfig->verboseLogLevel(), - onDemandConfig->verboseLogModules()); - } - } -} - -bool ConfigLoader::hasNewConfig(const Config& oldConfig) { - std::lock_guard lock(configLock_); - return config_->timestamp() > oldConfig.timestamp(); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ConfigLoader.h b/plugins/tensorboard-plugins/libkineto/src/ConfigLoader.h deleted file mode 100644 index 4ce3468e48d..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ConfigLoader.h +++ /dev/null @@ -1,147 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "Config.h" - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "ILoggerObserver.h" - -namespace libkineto { - class LibkinetoApi; -} - -namespace KINETO_NAMESPACE { - -using namespace libkineto; -class DaemonConfigLoader; - -class ConfigLoader { - public: - - static ConfigLoader& instance(); - - enum ConfigKind { - ActivityProfiler = 0, - EventProfiler, - NumConfigKinds - }; - - struct ConfigHandler { - virtual ~ConfigHandler() {} - virtual bool canAcceptConfig() = 0; - virtual void acceptConfig(const Config& cfg) = 0; - }; - - void addHandler(ConfigKind kind, ConfigHandler* handler) { - std::lock_guard lock(updateThreadMutex_); - handlers_[kind].push_back(handler); - startThread(); - } - - void removeHandler(ConfigKind kind, ConfigHandler* handler) { - std::lock_guard lock(updateThreadMutex_); - auto it = std::find( - handlers_[kind].begin(), handlers_[kind].end(), handler); - if (it != handlers_[kind].end()) { - handlers_[kind].erase(it); - } - } - - void notifyHandlers(const Config& cfg) { - std::lock_guard lock(updateThreadMutex_); - for (auto& key_val : handlers_) { - for (ConfigHandler* handler : key_val.second) { - handler->acceptConfig(cfg); - } - } - } - - bool canHandlerAcceptConfig(ConfigKind kind) { - std::lock_guard lock(updateThreadMutex_); - for (ConfigHandler* handler : handlers_[kind]) { - if (!handler->canAcceptConfig()) { - return false; - } - } - return true; - } - - void initBaseConfig() { - bool init = false; - { - std::lock_guard lock(configLock_); - init = !config_ || config_->source().empty(); - } - if (init) { - updateBaseConfig(); - } - } - - inline std::unique_ptr getConfigCopy() { - std::lock_guard lock(configLock_); - return config_->clone(); - } - - bool hasNewConfig(const Config& oldConfig); - int contextCountForGpu(uint32_t gpu); - - void handleOnDemandSignal(); - - static void setDaemonConfigLoaderFactory( - std::function()> factory); - - private: - ConfigLoader(); - ~ConfigLoader(); - - const char* configFileName(); - DaemonConfigLoader* daemonConfigLoader(); - - void startThread(); - void updateConfigThread(); - void updateBaseConfig(); - - // Create configuration when receiving SIGUSR2 - void configureFromSignal( - std::chrono::time_point now, - Config& config); - - // Create configuration when receiving request from a daemon - void configureFromDaemon( - std::chrono::time_point now, - Config& config); - - std::string readOnDemandConfigFromDaemon( - std::chrono::time_point now); - - std::mutex configLock_; - std::atomic configFileName_{nullptr}; - std::unique_ptr config_; - std::unique_ptr daemonConfigLoader_; - std::map> handlers_; - - std::chrono::seconds configUpdateIntervalSecs_; - std::chrono::seconds onDemandConfigUpdateIntervalSecs_; - std::unique_ptr updateThread_; - std::condition_variable updateThreadCondVar_; - std::mutex updateThreadMutex_; - std::atomic_bool stopFlag_{false}; - std::atomic_bool onDemandSignal_{false}; - -#if !USE_GOOGLE_LOG - std::unique_ptr> loggerObservers_; - std::mutex loggerObserversMutex_; -#endif // !USE_GOOGLE_LOG -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.cpp b/plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.cpp deleted file mode 100644 index 1e909d5f9cf..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) Kineto Contributors - * All rights reserved. - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "CudaDeviceProperties.h" - -#include -#include - -#include -#include - -#include "Logger.h" - -namespace KINETO_NAMESPACE { - -static const std::vector createDeviceProps() { - std::vector props; - int device_count; - cudaError_t error_id = cudaGetDeviceCount(&device_count); - // Return empty vector if error. - if (error_id != cudaSuccess) { - LOG(ERROR) << "cudaGetDeviceCount failed with code " << error_id; - return {}; - } - VLOG(0) << "Device count is " << device_count; - for (size_t i = 0; i < device_count; ++i) { - cudaDeviceProp prop; - error_id = cudaGetDeviceProperties(&prop, i); - // Return empty vector if any device property fail to get. - if (error_id != cudaSuccess) { - LOG(ERROR) << "cudaGetDeviceProperties failed with " << error_id; - return {}; - } - props.push_back(prop); - LOGGER_OBSERVER_ADD_DEVICE(i); - } - return props; -} - -static const std::vector& deviceProps() { - static const std::vector props = createDeviceProps(); - return props; -} - -static const std::string createDevicePropertiesJson( - size_t id, const cudaDeviceProp& props) { - return fmt::format(R"JSON( - {{ - "id": {}, "name": "{}", "totalGlobalMem": {}, - "computeMajor": {}, "computeMinor": {}, - "maxThreadsPerBlock": {}, "maxThreadsPerMultiprocessor": {}, - "regsPerBlock": {}, "regsPerMultiprocessor": {}, "warpSize": {}, - "sharedMemPerBlock": {}, "sharedMemPerMultiprocessor": {}, - "numSms": {}, "sharedMemPerBlockOptin": {} - }})JSON", - id, props.name, props.totalGlobalMem, - props.major, props.minor, - props.maxThreadsPerBlock, props.maxThreadsPerMultiProcessor, - props.regsPerBlock, props.regsPerMultiprocessor, props.warpSize, - props.sharedMemPerBlock, props.sharedMemPerMultiprocessor, - props.multiProcessorCount, props.sharedMemPerBlockOptin); -} - -static const std::string createDevicePropertiesJson() { - std::vector jsonProps; - const auto& props = deviceProps(); - for (size_t i = 0; i < props.size(); i++) { - jsonProps.push_back(createDevicePropertiesJson(i, props[i])); - } - return fmt::format("{}", fmt::join(jsonProps, ",")); -} - -const std::string& devicePropertiesJson() { - static std::string devicePropsJson = createDevicePropertiesJson(); - return devicePropsJson; -} - -int smCount(uint32_t deviceId) { - const std::vector &props = deviceProps(); - return deviceId >= props.size() ? 0 : - props[deviceId].multiProcessorCount; -} - -float kernelOccupancy( - uint32_t deviceId, - uint16_t registersPerThread, - int32_t staticSharedMemory, - int32_t dynamicSharedMemory, - int32_t blockX, - int32_t blockY, - int32_t blockZ, - float blocksPerSm) { - // Calculate occupancy - float occupancy = -1.0; - const std::vector &props = deviceProps(); - if (deviceId < props.size()) { - cudaOccFuncAttributes occFuncAttr; - occFuncAttr.maxThreadsPerBlock = INT_MAX; - occFuncAttr.numRegs = registersPerThread; - occFuncAttr.sharedSizeBytes = staticSharedMemory; - occFuncAttr.partitionedGCConfig = PARTITIONED_GC_OFF; - occFuncAttr.shmemLimitConfig = FUNC_SHMEM_LIMIT_DEFAULT; - occFuncAttr.maxDynamicSharedSizeBytes = 0; - const cudaOccDeviceState occDeviceState = {}; - int blockSize = blockX * blockY * blockZ; - size_t dynamicSmemSize = dynamicSharedMemory; - cudaOccResult occ_result; - cudaOccDeviceProp prop(props[deviceId]); - cudaOccError status = cudaOccMaxActiveBlocksPerMultiprocessor( - &occ_result, &prop, &occFuncAttr, &occDeviceState, - blockSize, dynamicSmemSize); - if (status == CUDA_OCC_SUCCESS) { - if (occ_result.activeBlocksPerMultiprocessor < blocksPerSm) { - blocksPerSm = occ_result.activeBlocksPerMultiprocessor; - } - occupancy = blocksPerSm * blockSize / - (float) props[deviceId].maxThreadsPerMultiProcessor; - } else { - LOG_EVERY_N(ERROR, 1000) << "Failed to calculate occupancy, status = " - << status; - } - } - return occupancy; -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.h b/plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.h deleted file mode 100644 index b731fde0c2a..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CudaDeviceProperties.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) Kineto Contributors - * All rights reserved. - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -namespace KINETO_NAMESPACE { - -int smCount(uint32_t deviceId); - -// Return estimated achieved occupancy for a kernel -float kernelOccupancy( - uint32_t deviceId, - uint16_t registersPerThread, - int32_t staticSharedMemory, - int32_t dynamicSharedMemory, - int32_t blockX, - int32_t blockY, - int32_t blockZ, - float blocks_per_sm); - -// Return compute properties for each device as a json string -const std::string& devicePropertiesJson(); - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivity.h b/plugins/tensorboard-plugins/libkineto/src/CuptiActivity.h deleted file mode 100644 index 09c29504060..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivity.h +++ /dev/null @@ -1,114 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -#include "ITraceActivity.h" -#include "CuptiActivityPlatform.h" -#include "ThreadUtil.h" -#include "cupti_strings.h" - -namespace libkineto { - class ActivityLogger; -} - -namespace KINETO_NAMESPACE { - -using namespace libkineto; -struct TraceSpan; - -// These classes wrap the various CUPTI activity types -// into subclasses of ITraceActivity so that they can all be accessed -// using the ITraceActivity interface and logged via ActivityLogger. - -// Abstract base class, templated on Cupti activity type -template -struct CuptiActivity : public ITraceActivity { - explicit CuptiActivity(const T* activity, const ITraceActivity* linked) - : activity_(*activity), linked_(linked) {} - int64_t timestamp() const override { - return nsToUs(unixEpochTimestamp(activity_.start)); - } - int64_t duration() const override { - return nsToUs(activity_.end - activity_.start); - } - // TODO(T107507796): Deprecate ITraceActivity - int64_t correlationId() const override {return 0;} - int32_t getThreadId() const override {return 0;} - const ITraceActivity* linkedActivity() const override {return linked_;} - int flowType() const override {return kLinkAsyncCpuGpu;} - int flowId() const override {return correlationId();} - const T& raw() const {return activity_;} - const TraceSpan* traceSpan() const override {return nullptr;} - - protected: - const T& activity_; - const ITraceActivity* linked_{nullptr}; -}; - -// CUpti_ActivityAPI - CUDA runtime activities -struct RuntimeActivity : public CuptiActivity { - explicit RuntimeActivity( - const CUpti_ActivityAPI* activity, - const ITraceActivity* linked, - int32_t threadId) - : CuptiActivity(activity, linked), threadId_(threadId) {} - int64_t correlationId() const override {return activity_.correlationId;} - int64_t deviceId() const override {return processId();} - int64_t resourceId() const override {return threadId_;} - ActivityType type() const override {return ActivityType::CUDA_RUNTIME;} - bool flowStart() const override; - const std::string name() const override {return runtimeCbidName(activity_.cbid);} - void log(ActivityLogger& logger) const override; - const std::string metadataJson() const override; - - private: - const int32_t threadId_; -}; - -// CUpti_ActivityAPI - CUDA runtime activities -struct OverheadActivity : public CuptiActivity { - explicit OverheadActivity( - const CUpti_ActivityOverhead* activity, - const ITraceActivity* linked, - int32_t threadId=0) - : CuptiActivity(activity, linked), threadId_(threadId) {} - - int64_t timestamp() const override { - return nsToUs(unixEpochTimestamp(activity_.start)); - } - int64_t duration() const override { - return nsToUs(activity_.end - activity_.start); - } - // TODO: Update this with PID ordering - int64_t deviceId() const override {return -1;} - int64_t resourceId() const override {return threadId_;} - ActivityType type() const override {return ActivityType::OVERHEAD;} - bool flowStart() const override; - const std::string name() const override {return overheadKindString(activity_.overheadKind);} - void log(ActivityLogger& logger) const override; - const std::string metadataJson() const override; - - private: - const int32_t threadId_; -}; - -// Base class for GPU activities. -// Can also be instantiated directly. -template -struct GpuActivity : public CuptiActivity { - explicit GpuActivity(const T* activity, const ITraceActivity* linked) - : CuptiActivity(activity, linked) {} - int64_t correlationId() const override {return raw().correlationId;} - int64_t deviceId() const override {return raw().deviceId;} - int64_t resourceId() const override {return raw().streamId;} - ActivityType type() const override; - bool flowStart() const override {return false;} - const std::string name() const override; - void log(ActivityLogger& logger) const override; - const std::string metadataJson() const override; - const T& raw() const {return CuptiActivity::raw();} -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivity.tpp b/plugins/tensorboard-plugins/libkineto/src/CuptiActivity.tpp deleted file mode 100644 index 1ff2dafe06b..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivity.tpp +++ /dev/null @@ -1,111 +0,0 @@ - /* - * Copyright (c) Facebook, Inc. and its affiliates. - * All rights reserved. - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "CuptiActivity.h" - -#include - -#include "Demangle.h" -#include "output_base.h" - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -template<> -inline const std::string GpuActivity::name() const { - return demangle(raw().name); -} - -template<> -inline ActivityType GpuActivity::type() const { - return ActivityType::CONCURRENT_KERNEL; -} - -static inline std::string memcpyName(uint8_t kind, uint8_t src, uint8_t dst) { - return fmt::format( - "Memcpy {} ({} -> {})", - memcpyKindString((CUpti_ActivityMemcpyKind)kind), - memoryKindString((CUpti_ActivityMemoryKind)src), - memoryKindString((CUpti_ActivityMemoryKind)dst)); -} - -template<> -inline ActivityType GpuActivity::type() const { - return ActivityType::GPU_MEMCPY; -} - -template<> -inline const std::string GpuActivity::name() const { - return memcpyName(raw().copyKind, raw().srcKind, raw().dstKind); -} - -template<> -inline ActivityType GpuActivity::type() const { - return ActivityType::GPU_MEMCPY; -} - -template<> -inline const std::string GpuActivity::name() const { - return memcpyName(raw().copyKind, raw().srcKind, raw().dstKind); -} - -template<> -inline const std::string GpuActivity::name() const { - const char* memory_kind = - memoryKindString((CUpti_ActivityMemoryKind)raw().memoryKind); - return fmt::format("Memset ({})", memory_kind); -} - -template<> -inline ActivityType GpuActivity::type() const { - return ActivityType::GPU_MEMSET; -} - -inline void RuntimeActivity::log(ActivityLogger& logger) const { - logger.handleActivity(*this); -} - -inline void OverheadActivity::log(ActivityLogger& logger) const { - logger.handleActivity(*this); -} - -inline bool OverheadActivity::flowStart() const { - return false; -} - -inline const std::string OverheadActivity::metadataJson() const { - return ""; -} - -template -inline void GpuActivity::log(ActivityLogger& logger) const { - logger.handleGpuActivity(*this); -} - -inline bool RuntimeActivity::flowStart() const { - return activity_.cbid == CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 || - (activity_.cbid >= CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020 && - activity_.cbid <= CUPTI_RUNTIME_TRACE_CBID_cudaMemset2DAsync_v3020) || - activity_.cbid == - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000 || - activity_.cbid == - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000; -} - -inline const std::string RuntimeActivity::metadataJson() const { - return fmt::format(R"JSON( - "cbid": {}, "correlation": {})JSON", - activity_.cbid, activity_.correlationId); -} - -template -inline const std::string GpuActivity::metadataJson() const { - return ""; -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.cpp deleted file mode 100644 index 5718bed2f89..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.cpp +++ /dev/null @@ -1,343 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "CuptiActivityApi.h" - -#include -#include - -#include "cupti_call.h" -#include "Logger.h" - -using namespace std::chrono; - -namespace KINETO_NAMESPACE { - -// TODO: do we want this to be configurable? -// Set to 2MB to avoid constantly creating buffers (espeically for networks -// that has many small memcpy such as sparseNN) -// Consider putting this on huge pages? -constexpr size_t kBufSize(2 * 1024 * 1024); - -CuptiActivityApi& CuptiActivityApi::singleton() { - static CuptiActivityApi instance; - return instance; -} - -void CuptiActivityApi::pushCorrelationID(int id, CorrelationFlowType type) { -#ifdef HAS_CUPTI - if (!singleton().externalCorrelationEnabled_) { - return; - } - VLOG(2) << "pushCorrelationID(" << id << ")"; - switch(type) { - case Default: - CUPTI_CALL(cuptiActivityPushExternalCorrelationId( - CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0, id)); - break; - case User: - CUPTI_CALL(cuptiActivityPushExternalCorrelationId( - CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1, id)); - } -#endif -} - -void CuptiActivityApi::popCorrelationID(CorrelationFlowType type) { -#ifdef HAS_CUPTI - if (!singleton().externalCorrelationEnabled_) { - return; - } - switch(type) { - case Default: - CUPTI_CALL(cuptiActivityPopExternalCorrelationId( - CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0, nullptr)); - break; - case User: - CUPTI_CALL(cuptiActivityPopExternalCorrelationId( - CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1, nullptr)); - } -#endif -} - -static int getSMCount() { -#ifdef HAS_CUPTI - // There may be a simpler way to get the number of SMs.... - // Look for domain_d - this has 80 instances on Volta and - // 56 instances on Pascal, corresponding to the number of SMs - // FIXME: This does not work on Turing and later - uint32_t domainCount{0}; - CUPTI_CALL(cuptiDeviceGetNumEventDomains(0, &domainCount)); - std::vector ids(domainCount); - size_t sz = sizeof(CUpti_EventDomainID) * domainCount; - CUPTI_CALL(cuptiDeviceEnumEventDomains(0, &sz, ids.data())); - for (CUpti_EventDomainID id : ids) { - char name[16]; - name[0] = '\0'; - sz = sizeof(name); - CUPTI_CALL(cuptiEventDomainGetAttribute( - id, CUPTI_EVENT_DOMAIN_ATTR_NAME, &sz, name)); - if (strncmp(name, "domain_d", sz) == 0) { - uint32_t count{0}; - sz = sizeof(count); - CUPTI_CALL(cuptiDeviceGetEventDomainAttribute( - 0, id, CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, &sz, &count)); - return count; - } - } -#endif - - return -1; -} - -int CuptiActivityApi::smCount() { - static int sm_count = getSMCount(); - return sm_count; -} - -static bool nextActivityRecord( - uint8_t* buffer, - size_t valid_size, - CUpti_Activity*& record) { -#ifdef HAS_CUPTI - CUptiResult status = CUPTI_CALL_NOWARN( - cuptiActivityGetNextRecord(buffer, valid_size, &record)); - if (status != CUPTI_SUCCESS) { - if (status != CUPTI_ERROR_MAX_LIMIT_REACHED) { - CUPTI_CALL(status); - } - record = nullptr; - } -#endif - return record != nullptr; -} - -void CuptiActivityApi::setMaxBufferSize(int size) { - maxGpuBufferCount_ = 1 + size / kBufSize; -} - -void CuptiActivityApi::forceLoadCupti() { -#ifdef HAS_CUPTI - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); -#endif -} - -#ifdef HAS_CUPTI -void CUPTIAPI CuptiActivityApi::bufferRequestedTrampoline( - uint8_t** buffer, - size_t* size, - size_t* maxNumRecords) { - singleton().bufferRequested(buffer, size, maxNumRecords); -} - -void CuptiActivityApi::bufferRequested( - uint8_t** buffer, size_t* size, size_t* maxNumRecords) { - std::lock_guard guard(mutex_); - if (allocatedGpuTraceBuffers_.size() >= maxGpuBufferCount_) { - stopCollection = true; - LOG(WARNING) << "Exceeded max GPU buffer count (" - << allocatedGpuTraceBuffers_.size() - << " > " << maxGpuBufferCount_ - << ") - terminating tracing"; - } - - auto buf = std::make_unique(kBufSize); - *buffer = buf->data(); - *size = kBufSize; - - allocatedGpuTraceBuffers_[*buffer] = std::move(buf); - - *maxNumRecords = 0; -} -#endif - -std::unique_ptr -CuptiActivityApi::activityBuffers() { - { - std::lock_guard guard(mutex_); - if (allocatedGpuTraceBuffers_.empty()) { - return nullptr; - } - } - -#ifdef HAS_CUPTI - VLOG(1) << "Flushing GPU activity buffers"; - time_point t1; - if (VLOG_IS_ON(1)) { - t1 = system_clock::now(); - } - // Can't hold mutex_ during this call, since bufferCompleted - // will be called by libcupti and mutex_ is acquired there. - CUPTI_CALL(cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED)); - if (VLOG_IS_ON(1)) { - flushOverhead = - duration_cast(system_clock::now() - t1).count(); - } -#endif - std::lock_guard guard(mutex_); - // Transfer ownership of buffers to caller. A new map is created on-demand. - return std::move(readyGpuTraceBuffers_); -} - -#ifdef HAS_CUPTI -int CuptiActivityApi::processActivitiesForBuffer( - uint8_t* buf, - size_t validSize, - std::function handler) { - int count = 0; - if (buf && validSize) { - CUpti_Activity* record{nullptr}; - while ((nextActivityRecord(buf, validSize, record))) { - handler(record); - ++count; - } - } - return count; -} -#endif - -const std::pair CuptiActivityApi::processActivities( - CuptiActivityBufferMap& buffers, - std::function handler) { - std::pair res{0, 0}; -#ifdef HAS_CUPTI - for (auto& pair : buffers) { - // No lock needed - only accessed from this thread - auto& buf = pair.second; - res.first += processActivitiesForBuffer(buf->data(), buf->size(), handler); - res.second += buf->size(); - } -#endif - return res; -} - -void CuptiActivityApi::clearActivities() { - { - std::lock_guard guard(mutex_); - if (allocatedGpuTraceBuffers_.empty()) { - return; - } - } - // Can't hold mutex_ during this call, since bufferCompleted - // will be called by libcupti and mutex_ is acquired there. -#ifdef HAS_CUPTI - CUPTI_CALL(cuptiActivityFlushAll(0)); -#endif - // FIXME: We might want to make sure we reuse - // the same memory during warmup and tracing. - // Also, try to use the amount of memory required - // for active tracing during warmup. - std::lock_guard guard(mutex_); - // Throw away ready buffers as a result of above flush - readyGpuTraceBuffers_ = nullptr; -} - -#ifdef HAS_CUPTI -void CUPTIAPI CuptiActivityApi::bufferCompletedTrampoline( - CUcontext ctx, - uint32_t streamId, - uint8_t* buffer, - size_t /* unused */, - size_t validSize) { - singleton().bufferCompleted(ctx, streamId, buffer, 0, validSize); -} - -void CuptiActivityApi::bufferCompleted( - CUcontext ctx, - uint32_t streamId, - uint8_t* buffer, - size_t /* unused */, - size_t validSize) { - - std::lock_guard guard(mutex_); - auto it = allocatedGpuTraceBuffers_.find(buffer); - if (it == allocatedGpuTraceBuffers_.end()) { - LOG(ERROR) << "bufferCompleted called with unknown buffer: " - << (void*) buffer; - return; - } - - if (!readyGpuTraceBuffers_) { - readyGpuTraceBuffers_ = std::make_unique(); - } - // Set valid size of buffer before moving to ready map - it->second->setSize(validSize); - (*readyGpuTraceBuffers_)[it->first] = std::move(it->second); - allocatedGpuTraceBuffers_.erase(it); - - // report any records dropped from the queue; to avoid unnecessary cupti - // API calls, we make it report only in verbose mode (it doesn't happen - // often in our testing anyways) - if (VLOG_IS_ON(1)) { - size_t dropped = 0; - CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); - if (dropped != 0) { - LOG(WARNING) << "Dropped " << dropped << " activity records"; - } - } -} -#endif - -void CuptiActivityApi::enableCuptiActivities( - const std::set& selected_activities) { -#ifdef HAS_CUPTI - static bool registered = false; - if (!registered) { - CUPTI_CALL( - cuptiActivityRegisterCallbacks(bufferRequestedTrampoline, bufferCompletedTrampoline)); - } - - externalCorrelationEnabled_ = false; - for (const auto& activity : selected_activities) { - if (activity == ActivityType::GPU_MEMCPY) { - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); - } - if (activity == ActivityType::GPU_MEMSET) { - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET)); - } - if (activity == ActivityType::CONCURRENT_KERNEL) { - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); - } - if (activity == ActivityType::EXTERNAL_CORRELATION) { - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION)); - externalCorrelationEnabled_ = true; - } - if (activity == ActivityType::CUDA_RUNTIME) { - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); - } - if (activity == ActivityType::OVERHEAD) { - CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD)); - } - } -#endif - - // Explicitly enabled, so reset this flag if set - stopCollection = false; -} - -void CuptiActivityApi::disableCuptiActivities( - const std::set& selected_activities) { -#ifdef HAS_CUPTI - for (const auto& activity : selected_activities) { - if (activity == ActivityType::GPU_MEMCPY) { - CUPTI_CALL(cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY)); - } - if (activity == ActivityType::GPU_MEMSET) { - CUPTI_CALL(cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMSET)); - } - if (activity == ActivityType::CONCURRENT_KERNEL) { - CUPTI_CALL(cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); - } - if (activity == ActivityType::EXTERNAL_CORRELATION) { - CUPTI_CALL(cuptiActivityDisable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION)); - } - if (activity == ActivityType::CUDA_RUNTIME) { - CUPTI_CALL(cuptiActivityDisable(CUPTI_ACTIVITY_KIND_RUNTIME)); - } - if (activity == ActivityType::OVERHEAD) { - CUPTI_CALL(cuptiActivityDisable(CUPTI_ACTIVITY_KIND_OVERHEAD)); - } - } - externalCorrelationEnabled_ = false; -#endif -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.h b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.h deleted file mode 100644 index 92af51ecac9..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityApi.h +++ /dev/null @@ -1,100 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#ifdef HAS_CUPTI -#include -#endif - -#include "ActivityType.h" -#include "CuptiActivityBuffer.h" - - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -#ifndef HAS_CUPTI -using CUpti_Activity = void; -#endif - -class CuptiActivityApi { - public: - enum CorrelationFlowType { - Default, - User - }; - - CuptiActivityApi() = default; - CuptiActivityApi(const CuptiActivityApi&) = delete; - CuptiActivityApi& operator=(const CuptiActivityApi&) = delete; - - virtual ~CuptiActivityApi() {} - - static CuptiActivityApi& singleton(); - - virtual int smCount(); - static void pushCorrelationID(int id, CorrelationFlowType type); - static void popCorrelationID(CorrelationFlowType type); - - void enableCuptiActivities( - const std::set& selected_activities); - void disableCuptiActivities( - const std::set& selected_activities); - void clearActivities(); - - virtual std::unique_ptr activityBuffers(); - - virtual const std::pair processActivities( - CuptiActivityBufferMap&, - std::function handler); - - void setMaxBufferSize(int size); - - std::atomic_bool stopCollection{false}; - int64_t flushOverhead{0}; - - static void forceLoadCupti(); - - private: -#ifdef HAS_CUPTI - int processActivitiesForBuffer( - uint8_t* buf, - size_t validSize, - std::function handler); - static void CUPTIAPI - bufferRequestedTrampoline(uint8_t** buffer, size_t* size, size_t* maxNumRecords); - static void CUPTIAPI bufferCompletedTrampoline( - CUcontext ctx, - uint32_t streamId, - uint8_t* buffer, - size_t /* unused */, - size_t validSize); -#endif // HAS_CUPTI - - int maxGpuBufferCount_{0}; - CuptiActivityBufferMap allocatedGpuTraceBuffers_; - std::unique_ptr readyGpuTraceBuffers_; - std::mutex mutex_; - bool externalCorrelationEnabled_{false}; - - protected: -#ifdef HAS_CUPTI - void bufferRequested(uint8_t** buffer, size_t* size, size_t* maxNumRecords); - void bufferCompleted( - CUcontext ctx, - uint32_t streamId, - uint8_t* buffer, - size_t /* unused */, - size_t validSize); -#endif -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityBuffer.h b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityBuffer.h deleted file mode 100644 index 1c3fbef62c8..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityBuffer.h +++ /dev/null @@ -1,51 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "ITraceActivity.h" - -namespace KINETO_NAMESPACE { - -class CuptiActivityBuffer { - public: - explicit CuptiActivityBuffer(size_t size) : size_(size) { - buf_.reserve(size); - } - CuptiActivityBuffer() = delete; - CuptiActivityBuffer& operator=(const CuptiActivityBuffer&) = delete; - CuptiActivityBuffer(CuptiActivityBuffer&&) = default; - CuptiActivityBuffer& operator=(CuptiActivityBuffer&&) = default; - - size_t size() const { - return size_; - } - - void setSize(size_t size) { - assert(size <= buf_.capacity()); - size_ = size; - } - - uint8_t* data() { - return buf_.data(); - } - - private: - - std::vector buf_; - size_t size_; - - std::vector> wrappers_; -}; - -using CuptiActivityBufferMap = - std::map>; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.cpp deleted file mode 100644 index fa2ef2f3a8c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include - -namespace chrono = std::chrono; - -namespace KINETO_NAMESPACE { - -#ifdef _WIN32 -uint64_t epochs_diff() { - // On Windows, steady_clock wraps the QueryPerformanceCounter function. - // https://docs.microsoft.com/en-us/cpp/standard-library/steady-clock-struct?view=msvc-160 - auto steady = - chrono::time_point_cast(chrono::steady_clock::now()); - auto system = - chrono::time_point_cast(chrono::system_clock::now()); - - auto time_since_unix = system.time_since_epoch().count(); - auto time_since_boot = steady.time_since_epoch().count(); - return time_since_unix - time_since_boot; -} - -uint64_t unixEpochTimestamp(uint64_t ts) { - static uint64_t diff = epochs_diff(); - return ts + diff; -} -#else -uint64_t unixEpochTimestamp(uint64_t ts) { - return ts; -} -#endif // _WIN32 - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.h b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.h deleted file mode 100644 index 78de8373d5f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityPlatform.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include - -namespace KINETO_NAMESPACE { - -// cupti's timestamps are platform specific. This function convert the raw -// cupti timestamp to time since unix epoch. So that on different platform, -// correction can work correctly. -uint64_t unixEpochTimestamp(uint64_t ts); - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.cpp deleted file mode 100644 index 97c23ef047d..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.cpp +++ /dev/null @@ -1,841 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "CuptiActivityProfiler.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAS_CUPTI -#include -#endif - -#include "Config.h" -#include "time_since_epoch.h" -#ifdef HAS_CUPTI -#include "CuptiActivity.h" -#include "CuptiActivity.tpp" -#include "CuptiActivityApi.h" -#endif // HAS_CUPTI -#ifdef HAS_ROCTRACER -#include "RoctracerActivityApi.h" -#endif -#include "output_base.h" - -#include "Logger.h" -#include "ThreadUtil.h" - -using namespace std::chrono; -using namespace libkineto; -using std::string; - -namespace KINETO_NAMESPACE { - -void CuptiActivityProfiler::transferCpuTrace( - std::unique_ptr cpuTrace) { - std::lock_guard guard(mutex_); - const string& trace_name = cpuTrace->span.name; - if (currentRunloopState_ != RunloopState::CollectTrace && - currentRunloopState_ != RunloopState::ProcessTrace) { - VLOG(0) << "Trace collection not in progress - discarding span " - << trace_name; - return; - } - - cpuTrace->span.iteration = iterationCountMap_[trace_name]++; - - VLOG(0) << "Received iteration " << cpuTrace->span.iteration << " of span " - << trace_name << " (" << cpuTrace->activities.size() << " activities / " - << cpuTrace->gpuOpCount << " gpu activities)"; - traceBuffers_->cpu.push_back(std::move(cpuTrace)); -} - -#ifdef HAS_ROCTRACER -CuptiActivityProfiler::CuptiActivityProfiler(RoctracerActivityApi& cupti, bool cpuOnly) -#else -CuptiActivityProfiler::CuptiActivityProfiler(CuptiActivityApi& cupti, bool cpuOnly) -#endif - : cupti_(cupti), - flushOverhead_{0, 0}, - setupOverhead_{0, 0}, - cpuOnly_{cpuOnly}, - currentRunloopState_{RunloopState::WaitForRequest}, - stopCollection_{false} {} - -void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) { - LOG(INFO) << "Processing " << traceBuffers_->cpu.size() - << " CPU buffers"; - VLOG(0) << "Profile time range: " << captureWindowStartTime_ << " - " - << captureWindowEndTime_; - logger.handleTraceStart(metadata_); - for (auto& cpu_trace : traceBuffers_->cpu) { - string trace_name = cpu_trace->span.name; - VLOG(0) << "Processing CPU buffer for " << trace_name << " (" - << cpu_trace->span.iteration << ") - " - << cpu_trace->activities.size() << " records"; - VLOG(0) << "Span time range: " << cpu_trace->span.startTime << " - " - << cpu_trace->span.endTime; - processCpuTrace(*cpu_trace, logger); - LOGGER_OBSERVER_ADD_EVENT_COUNT(cpu_trace->activities.size()); - } - -#ifdef HAS_CUPTI - if (!cpuOnly_) { - VLOG(0) << "Retrieving GPU activity buffers"; - traceBuffers_->gpu = cupti_.activityBuffers(); - if (VLOG_IS_ON(1)) { - addOverheadSample(flushOverhead_, cupti_.flushOverhead); - } - if (traceBuffers_->gpu) { - const auto count_and_size = cupti_.processActivities( - *traceBuffers_->gpu, - std::bind(&CuptiActivityProfiler::handleCuptiActivity, this, std::placeholders::_1, &logger)); - LOG(INFO) << "Processed " << count_and_size.first - << " GPU records (" << count_and_size.second << " bytes)"; - LOGGER_OBSERVER_ADD_EVENT_COUNT(count_and_size.first); - } - } -#endif // HAS_CUPTI -#ifdef HAS_ROCTRACER - if (!cpuOnly_) { - VLOG(0) << "Retrieving GPU activity buffers"; - const int count = cupti_.processActivities(logger); - LOG(INFO) << "Processed " << count - << " GPU records"; - LOGGER_OBSERVER_ADD_EVENT_COUNT(count); - } -#endif // HAS_ROCTRACER - - for (const auto& session : sessions_){ - LOG(INFO) << "Processing child profiler trace"; - session->processTrace(logger); - } - - finalizeTrace(*config_, logger); -} - -CuptiActivityProfiler::CpuGpuSpanPair& CuptiActivityProfiler::recordTraceSpan( - TraceSpan& span, int gpuOpCount) { - TraceSpan gpu_span(gpuOpCount, span.iteration, span.name, "GPU: "); - auto& iterations = traceSpans_[span.name]; - iterations.push_back({span, gpu_span}); - return iterations.back(); -} - -void CuptiActivityProfiler::processCpuTrace( - libkineto::CpuTraceBuffer& cpuTrace, - ActivityLogger& logger) { - if (cpuTrace.activities.size() == 0) { - LOG(WARNING) << "CPU trace is empty!"; - return; - } - - CpuGpuSpanPair& span_pair = recordTraceSpan(cpuTrace.span, cpuTrace.gpuOpCount); - TraceSpan& cpu_span = span_pair.first; - for (auto const& act : cpuTrace.activities) { - VLOG(2) << act.correlationId() << ": OP " << act.activityName; - if (config_->selectedActivityTypes().count(act.type())) { - act.log(logger); - } - clientActivityTraceMap_[act.correlationId()] = &span_pair; - activityMap_[act.correlationId()] = &act; - - recordThreadInfo(act.resourceId(), act.getThreadId(), act.deviceId()); - } - logger.handleTraceSpan(cpu_span); -} - -#ifdef HAS_CUPTI -inline void CuptiActivityProfiler::handleCorrelationActivity( - const CUpti_ActivityExternalCorrelation* correlation) { - if (correlation->externalKind == CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0) { - cpuCorrelationMap_[correlation->correlationId] = correlation->externalId; - } else if (correlation->externalKind == CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1){ - userCorrelationMap_[correlation->correlationId] = correlation->externalId; - } else { - LOG(ERROR) << "Invalid CUpti_ActivityExternalCorrelation sent to handleCuptiActivity"; - } -} -#endif // HAS_CUPTI - -static GenericTraceActivity createUserGpuSpan( - const libkineto::ITraceActivity& cpuTraceActivity, - const libkineto::ITraceActivity& gpuTraceActivity) { - GenericTraceActivity res( - *cpuTraceActivity.traceSpan(), - ActivityType::GPU_USER_ANNOTATION, - cpuTraceActivity.name()); - res.startTime = gpuTraceActivity.timestamp(); - res.device = gpuTraceActivity.deviceId(); - res.resource = gpuTraceActivity.resourceId(); - res.endTime = - gpuTraceActivity.timestamp() + gpuTraceActivity.duration(); - res.id = cpuTraceActivity.correlationId(); - return res; -} - -void CuptiActivityProfiler::GpuUserEventMap::insertOrExtendEvent( - const ITraceActivity& userActivity, - const ITraceActivity& gpuActivity) { - StreamKey key(gpuActivity.deviceId(), gpuActivity.resourceId()); - CorrelationSpanMap& correlationSpanMap = streamSpanMap_[key]; - auto it = correlationSpanMap.find(userActivity.correlationId()); - if (it == correlationSpanMap.end()) { - auto it_success = correlationSpanMap.insert({ - userActivity.correlationId(), createUserGpuSpan(userActivity, gpuActivity) - }); - it = it_success.first; - } - GenericTraceActivity& span = it->second; - if (gpuActivity.timestamp() < span.startTime || span.startTime == 0) { - span.startTime = gpuActivity.timestamp(); - } - int64_t gpu_activity_end = gpuActivity.timestamp() + gpuActivity.duration(); - if (gpu_activity_end > span.endTime) { - span.endTime = gpu_activity_end; - } -} - -const CuptiActivityProfiler::CpuGpuSpanPair& CuptiActivityProfiler::defaultTraceSpan() { - static TraceSpan span(0, 0, "Unknown", ""); - static CpuGpuSpanPair span_pair(span, span); - return span_pair; -} - -void CuptiActivityProfiler::GpuUserEventMap::logEvents(ActivityLogger *logger) { - for (auto const& streamMapPair : streamSpanMap_) { - for (auto const& correlationSpanPair : streamMapPair.second) { - correlationSpanPair.second.log(*logger); - } - } -} - -#ifdef HAS_CUPTI -inline bool CuptiActivityProfiler::outOfRange(const ITraceActivity& act) { - bool out_of_range = act.timestamp() < captureWindowStartTime_ || - (act.timestamp() + act.duration()) > captureWindowEndTime_; - if (out_of_range) { - VLOG(2) << "TraceActivity outside of profiling window: " << act.name() - << " (" << act.timestamp() << " < " << captureWindowStartTime_ << " or " - << (act.timestamp() + act.duration()) << " > " << captureWindowEndTime_; - } - return out_of_range; -} - -inline static bool isBlockListedRuntimeCbid(CUpti_CallbackId cbid) { - // Some CUDA calls that are very frequent and also not very interesting. - // Filter these out to reduce trace size. - if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 || - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 || - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 || - // Don't care about cudaEvents - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 || - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 || - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 || - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 || - cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020) { - return true; - } - - return false; -} - -void CuptiActivityProfiler::handleRuntimeActivity( - const CUpti_ActivityAPI* activity, - ActivityLogger* logger) { - if (isBlockListedRuntimeCbid(activity->cbid)) { - return; - } - VLOG(2) << activity->correlationId - << ": CUPTI_ACTIVITY_KIND_RUNTIME, cbid=" << activity->cbid - << " tid=" << activity->threadId; - int32_t tid = activity->threadId; - const auto& it = resourceInfo_.find({processId(), tid}); - if (it != resourceInfo_.end()) { - tid = it->second.id; - } - const ITraceActivity* linked = linkedActivity( - activity->correlationId, cpuCorrelationMap_); - const auto& runtime_activity = - traceBuffers_->addActivityWrapper(RuntimeActivity(activity, linked, tid)); - checkTimestampOrder(&runtime_activity); - if (outOfRange(runtime_activity)) { - return; - } - runtime_activity.log(*logger); -} - -void CuptiActivityProfiler::handleOverheadActivity( - const CUpti_ActivityOverhead* activity, - ActivityLogger* logger) { - VLOG(2) << ": CUPTI_ACTIVITY_KIND_OVERHEAD" << " overheadKind=" << activity->overheadKind; - - const auto& overhead_activity = - traceBuffers_->addActivityWrapper(OverheadActivity(activity, nullptr)); - overhead_activity.log(*logger); -} - - -inline void CuptiActivityProfiler::updateGpuNetSpan( - const ITraceActivity& gpuOp) { - if (!gpuOp.linkedActivity()) { - VLOG(0) << "Missing linked activity"; - return; - } - const auto& it = clientActivityTraceMap_.find( - gpuOp.linkedActivity()->correlationId()); - if (it == clientActivityTraceMap_.end()) { - // No correlation id mapping? - return; - } - TraceSpan& gpu_span = it->second->second; - if (gpuOp.timestamp() < gpu_span.startTime || gpu_span.startTime == 0) { - gpu_span.startTime = gpuOp.timestamp(); - } - if ((gpuOp.timestamp() + gpuOp.duration()) > gpu_span.endTime) { - gpu_span.endTime = gpuOp.timestamp() + gpuOp.duration(); - } -} - -// I've observed occasional broken timestamps attached to GPU events... -void CuptiActivityProfiler::checkTimestampOrder(const ITraceActivity* act1) { - // Correlated GPU runtime activity cannot - // have timestamp greater than the GPU activity's - const auto& it = correlatedCudaActivities_.find(act1->correlationId()); - if (it == correlatedCudaActivities_.end()) { - correlatedCudaActivities_.insert({act1->correlationId(), act1}); - return; - } - - // Activities may be appear in the buffers out of order. - // If we have a runtime activity in the map, it should mean that we - // have a GPU activity passed in, and vice versa. - const ITraceActivity* act2 = it->second; - if (act2->type() == ActivityType::CUDA_RUNTIME) { - // Buffer is out-of-order. - // Swap so that runtime activity is first for the comparison below. - std::swap(act1, act2); - } - if (act1->timestamp() > act2->timestamp()) { - LOG(WARNING) << "GPU op timestamp (" << act2->timestamp() - << ") < runtime timestamp (" << act1->timestamp() << ") by " - << act1->timestamp() - act2->timestamp() << "us"; - LOG(WARNING) << "Name: " << act2->name() - << " Device: " << act2->deviceId() - << " Stream: " << act2->resourceId(); - } -} - -inline void CuptiActivityProfiler::handleGpuActivity( - const ITraceActivity& act, - ActivityLogger* logger) { - if (outOfRange(act)) { - return; - } - checkTimestampOrder(&act); - VLOG(2) << act.correlationId() << ": " - << act.name(); - recordStream(act.deviceId(), act.resourceId(), ""); - act.log(*logger); - updateGpuNetSpan(act); - if (config_->selectedActivityTypes().count(ActivityType::GPU_USER_ANNOTATION)) { - const auto& it = userCorrelationMap_.find(act.correlationId()); - if (it != userCorrelationMap_.end()) { - const auto& it2 = activityMap_.find(it->second); - if (it2 != activityMap_.end()) { - recordStream(act.deviceId(), act.resourceId(), "context"); - gpuUserEventMap_.insertOrExtendEvent(*it2->second, act); - } - } - } -} - -const ITraceActivity* CuptiActivityProfiler::linkedActivity( - int32_t correlationId, - const std::unordered_map& correlationMap) { - const auto& it = correlationMap.find(correlationId); - if (it != correlationMap.end()) { - const auto& it2 = activityMap_.find(it->second); - if (it2 != activityMap_.end()) { - return it2->second; - } - } - return nullptr; -} - -template -inline void CuptiActivityProfiler::handleGpuActivity( - const T* act, ActivityLogger* logger) { - const ITraceActivity* linked = linkedActivity( - act->correlationId, cpuCorrelationMap_); - const auto& gpu_activity = - traceBuffers_->addActivityWrapper(GpuActivity(act, linked)); - handleGpuActivity(gpu_activity, logger); -} - -void CuptiActivityProfiler::handleCuptiActivity(const CUpti_Activity* record, ActivityLogger* logger) { - switch (record->kind) { - case CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION: - handleCorrelationActivity( - reinterpret_cast( - record)); - break; - case CUPTI_ACTIVITY_KIND_RUNTIME: - handleRuntimeActivity( - reinterpret_cast(record), logger); - break; - case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: - handleGpuActivity( - reinterpret_cast(record), logger); - break; - case CUPTI_ACTIVITY_KIND_MEMCPY: - handleGpuActivity( - reinterpret_cast(record), logger); - break; - case CUPTI_ACTIVITY_KIND_MEMCPY2: - handleGpuActivity( - reinterpret_cast(record), logger); - break; - case CUPTI_ACTIVITY_KIND_MEMSET: - handleGpuActivity( - reinterpret_cast(record), logger); - break; - case CUPTI_ACTIVITY_KIND_OVERHEAD: - handleOverheadActivity (reinterpret_cast(record), logger); - break; - default: - LOG(WARNING) << "Unexpected activity type: " << record->kind; - break; - } -} -#endif // HAS_CUPTI - -void CuptiActivityProfiler::configureChildProfilers() { - // If child profilers are enabled create profiler sessions - for (auto& profiler: profilers_) { - int64_t start_time_ms = duration_cast( - profileStartTime_.time_since_epoch()).count(); - LOG(INFO) << "Running child profiler " << profiler->name() << " for " - << config_->activitiesDuration().count() << " ms"; - auto session = profiler->configure( - start_time_ms, - config_->activitiesDuration().count(), - config_->selectedActivityTypes(), - *config_ - ); - if (session) { - sessions_.push_back(std::move(session)); - } - } -} - -void CuptiActivityProfiler::configure( - const Config& config, - const time_point& now) { - std::lock_guard guard(mutex_); - if (isActive()) { - LOG(ERROR) << "CuptiActivityProfiler already busy, terminating"; - return; - } - - config_ = config.clone(); - - if (config_->activitiesDuration().count() == 0) { - // Use default if not specified - config_->setActivitiesDuration( - config_->activitiesDurationDefault()); - } - - // Ensure we're starting in a clean state - resetTraceData(); - -#if !USE_GOOGLE_LOG - // Add a LoggerObserverCollector to collect all logs during the trace. - loggerCollectorMetadata_ = std::make_unique(); - Logger::addLoggerObserver(loggerCollectorMetadata_.get()); -#endif // !USE_GOOGLE_LOG - - profileStartTime_ = config_->requestTimestamp(); - - if (config_->hasProfileStartIteration()) { - profileStartIter_ = config_->profileStartIteration(); - profileEndIter_ = profileStartIter_ + config_->activitiesRunIterations(); - } else { - - profileStartIter_ = -1; - profileEndIter_ = (std::numeric_limits::max)(); - - if (profileStartTime_ < now) { - LOG(ERROR) << "Not starting tracing - start timestamp is in the past. Time difference (ms): " << duration_cast(now - profileStartTime_).count(); - return; - } else if ((profileStartTime_ - now) < config_->activitiesWarmupDuration()) { - LOG(ERROR) << "Not starting tracing - insufficient time for warmup. Time to warmup (ms): " << duration_cast(profileStartTime_ - now).count() ; - return; - } - } - - if (LOG_IS_ON(INFO)) { - config_->printActivityProfilerConfig(LIBKINETO_DBG_STREAM); - } - if (!cpuOnly_ && !libkineto::api().client()) { - if (profileStartIter_ < 0) { - LOG(INFO) << "GPU-only tracing for " - << config_->activitiesDuration().count() << "ms"; - } else { - LOG(INFO) << "GPU-only tracing for " - << config_->activitiesRunIterations() << " iterations"; - } - } - - // Set useful metadata into the logger. - LOGGER_OBSERVER_SET_TRACE_DURATION_MS(config_->activitiesDuration().count()); - if (!config_->requestTraceID().empty()) { - LOGGER_OBSERVER_SET_TRACE_ID(config_->requestTraceID()); - } - if (!config_->requestGroupTraceID().empty()) { - LOGGER_OBSERVER_SET_GROUP_TRACE_ID(config_->requestGroupTraceID()); - } - LOGGER_OBSERVER_ADD_DESTINATION(config_->activitiesLogUrl()); - -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - if (!cpuOnly_) { - // Enabling CUPTI activity tracing incurs a larger perf hit at first, - // presumably because structures are allocated and initialized, callbacks - // are activated etc. After a while the overhead decreases and stabilizes. - // It's therefore useful to perform some warmup before starting recording. - LOG(INFO) << "Enabling GPU tracing"; - cupti_.setMaxBufferSize(config_->activitiesMaxGpuBufferSize()); - - time_point timestamp; - if (VLOG_IS_ON(1)) { - timestamp = system_clock::now(); - } -#ifdef HAS_CUPTI - cupti_.enableCuptiActivities(config_->selectedActivityTypes()); -#else - cupti_.enableActivities(config_->selectedActivityTypes()); -#endif - if (VLOG_IS_ON(1)) { - auto t2 = system_clock::now(); - addOverheadSample( - setupOverhead_, duration_cast(t2 - timestamp).count()); - } - } -#endif // HAS_CUPTI || HAS_ROCTRACER - - if (profilers_.size() > 0) { - configureChildProfilers(); - } - - if (libkineto::api().client()) { - libkineto::api().client()->warmup(config_->isOpInputsCollectionEnabled()); - } - if (profileStartIter_ >= 0) { - LOG(INFO) << "Tracing starting on iteration = " << profileStartIter_; - } else { - LOG(INFO) << "Tracing starting in " - << duration_cast(profileStartTime_ - now).count() << "s"; - } - - traceBuffers_ = std::make_unique(); - captureWindowStartTime_ = captureWindowEndTime_ = 0; - currentRunloopState_ = RunloopState::Warmup; -} - -void CuptiActivityProfiler::startTraceInternal(const time_point& now) { - captureWindowStartTime_ = libkineto::timeSinceEpoch(now); - VLOG(0) << "Warmup -> CollectTrace"; - for (auto& session: sessions_){ - LOG(INFO) << "Starting child profiler session"; - session->start(); - } - currentRunloopState_ = RunloopState::CollectTrace; -} - -void CuptiActivityProfiler::stopTraceInternal(const time_point& now) { - if (captureWindowEndTime_ == 0) { - captureWindowEndTime_ = libkineto::timeSinceEpoch(now); - } -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - if (!cpuOnly_) { - time_point timestamp; - if (VLOG_IS_ON(1)) { - timestamp = system_clock::now(); - } -#ifdef HAS_CUPTI - cupti_.disableCuptiActivities(config_->selectedActivityTypes()); -#else - cupti_.disableActivities(config_->selectedActivityTypes()); -#endif - if (VLOG_IS_ON(1)) { - auto t2 = system_clock::now(); - addOverheadSample( - setupOverhead_, duration_cast(t2 - timestamp).count()); - } - } -#endif // HAS_CUPTI || HAS_ROCTRACER - - if (currentRunloopState_ == RunloopState::CollectTrace) { - VLOG(0) << "CollectTrace -> ProcessTrace"; - } else { - LOG(WARNING) << "Called stopTrace with state == " << - static_cast::type>( - currentRunloopState_.load()); - } - for (auto& session: sessions_){ - LOG(INFO) << "Stopping child profiler session"; - session->stop(); - } - currentRunloopState_ = RunloopState::ProcessTrace; -} - -void CuptiActivityProfiler::resetInternal() { - resetTraceData(); - currentRunloopState_ = RunloopState::WaitForRequest; -} - -bool CuptiActivityProfiler::isWarmupDone( - const time_point& now, - int64_t currentIter) const { - // is it a time based config - if (profileStartIter_ < 0) { - // qualify that this check is not being called from application step() API - // this avoids races between the step() API and periodically invoked - // profiler run loop step() method - return (currentIter < 0) && (now >= profileStartTime_); - } - // this is an iteration based config - if (currentIter < 0) { - return false; - } - return currentIter >= profileStartIter_; -} - -bool CuptiActivityProfiler::isCollectionDone( - const time_point& now, - int64_t currentIter) const { - // is it a time based config - if (profileStartIter_ < 0) { - // qualify that this check is not being called from application step() API - return (currentIter < 0) && (now >= profileEndTime_); - } - // this is an iteration based config - if (currentIter < 0) { - return false; - } - return currentIter >= profileEndIter_; -} - -const time_point CuptiActivityProfiler::performRunLoopStep( - const time_point& now, - const time_point& nextWakeupTime, - int64_t currentIter) { - auto new_wakeup_time = nextWakeupTime; - bool warmup_done = false, collection_done = false; - - VLOG_IF(1, currentIter >= 0) << "Run loop on application step(), iteration = " - << currentIter; - - switch (currentRunloopState_) { - case RunloopState::WaitForRequest: - VLOG(1) << "State: WaitForRequest"; - // Nothing to do - break; - - case RunloopState::Warmup: - VLOG(1) << "State: Warmup"; - warmup_done = isWarmupDone(now, currentIter); -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - // Flushing can take a while so avoid doing it close to the start time - if (!cpuOnly_ && currentIter < 0 && - (profileStartIter_ >= 0 || nextWakeupTime < profileStartTime_)) { - cupti_.clearActivities(); - } - - if (cupti_.stopCollection) { - // Go to process trace to clear any outstanding buffers etc - LOG(WARNING) << "Trace terminated during warmup"; - std::lock_guard guard(mutex_); - stopTraceInternal(now); - resetInternal(); - VLOG(0) << "Warmup -> WaitForRequest"; - break; - } -#endif // HAS_CUPTI || HAS_ROCTRACER - - if (warmup_done) { - UST_LOGGER_MARK_COMPLETED(kWarmUpStage); - if (profileStartIter_ < 0 && - (now > profileStartTime_ + milliseconds(10))) { - LOG(WARNING) - << "Tracing started " - << duration_cast(now - profileStartTime_).count() - << "ms late!"; - } else { - LOG(INFO) << "Tracing started"; - } - startTrace(now); - if (libkineto::api().client()) { - libkineto::api().client()->start(); - } - if (nextWakeupTime > profileEndTime_) { - new_wakeup_time = profileEndTime_; - } - } else if (nextWakeupTime > profileStartTime_) { - new_wakeup_time = profileStartTime_; - } - - break; - - case RunloopState::CollectTrace: - VLOG(1) << "State: CollectTrace"; - // captureWindowStartTime_ can be set by external threads, - // so recompute end time. - // FIXME: Is this a good idea for synced start? - if (profileStartIter_ < 0) { - std::lock_guard guard(mutex_); - profileEndTime_ = time_point( - microseconds(captureWindowStartTime_)) + - config_->activitiesDuration(); - } - - collection_done = isCollectionDone(now, currentIter); - - // TODO revisit stopCollection_ is not used right now - if (collection_done || stopCollection_.exchange(false) -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - || cupti_.stopCollection -#endif // HAS_CUPTI || HAS_ROCTRACER - ){ - // Update runloop state first to prevent further updates to shared state - LOG(INFO) << "Tracing complete."; - if (currentIter > 0) { - LOG(INFO) << "This state change was invoked by application's step() call"; - } - // FIXME: Need to communicate reason for stopping on errors - if (libkineto::api().client()) { - libkineto::api().client()->stop(); - } - std::lock_guard guard(mutex_); - stopTraceInternal(now); - VLOG_IF(0, collection_done) << "Reached profile end time"; - - UST_LOGGER_MARK_COMPLETED(kCollectionStage); - } else if (profileStartIter_ >= 0) { - // nothing to do here - } else if (now < profileEndTime_ && profileEndTime_ < nextWakeupTime) { - new_wakeup_time = profileEndTime_; - } - - break; - - case RunloopState::ProcessTrace: - VLOG(1) << "State: ProcessTrace"; - // skip this state transition if it called from the step() api - // of the profiler. - // else it could lead to a race between the profiler thread and an - // application thread calling step() - if (currentIter >= 0) { - return new_wakeup_time; - } - // FIXME: Probably want to allow interruption here - // for quickly handling trace request via synchronous API - std::lock_guard guard(mutex_); - processTraceInternal(*logger_); - UST_LOGGER_MARK_COMPLETED(kPostProcessingStage); - resetInternal(); - VLOG(0) << "ProcessTrace -> WaitForRequest"; - break; - } - - return new_wakeup_time; -} - -void CuptiActivityProfiler::finalizeTrace(const Config& config, ActivityLogger& logger) { - LOG(INFO) << "Recorded nets:"; - { - for (const auto& it : iterationCountMap_) { - LOG(INFO) << it.first << ": " << it.second << " iterations"; - } - iterationCountMap_.clear(); - } - - // Process names - int32_t pid = processId(); - string process_name = processName(pid); - if (!process_name.empty()) { - logger.handleDeviceInfo( - {pid, process_name, "CPU"}, captureWindowStartTime_); - if (!cpuOnly_) { - // GPU events use device id as pid (0-7). - constexpr int kMaxGpuCount = 8; - for (int gpu = 0; gpu < kMaxGpuCount; gpu++) { - logger.handleDeviceInfo( - {gpu, process_name, fmt::format("GPU {}", gpu)}, - captureWindowStartTime_); - } - } - } - - // Thread & stream info - for (auto pair : resourceInfo_) { - const auto& resource = pair.second; - logger.handleResourceInfo(resource, captureWindowStartTime_); - } - - for (const auto& iterations : traceSpans_) { - for (const auto& span_pair : iterations.second) { - const TraceSpan& gpu_span = span_pair.second; - if (gpu_span.opCount > 0) { - logger.handleTraceSpan(gpu_span); - } - } - } - - // Overhead info - overheadInfo_.push_back(ActivityLogger::OverheadInfo("CUPTI Overhead")); - for(const auto& info : overheadInfo_) { - logger.handleOverheadInfo(info, captureWindowStartTime_); - } - - gpuUserEventMap_.logEvents(&logger); - -#if !USE_GOOGLE_LOG - // Save logs from LoggerCollector objects into Trace metadata. - auto LoggerMD = loggerCollectorMetadata_->extractCollectorMetadata(); - std::unordered_map> LoggerMDString; - for (auto& md : LoggerMD) { - LoggerMDString[toString(md.first)] = md.second; - } -#endif // !USE_GOOGLE_LOG - - logger.finalizeTrace(config, std::move(traceBuffers_), captureWindowEndTime_, LoggerMDString); -} - -void CuptiActivityProfiler::resetTraceData() { -#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER) - if (!cpuOnly_) { - cupti_.clearActivities(); - } -#endif // HAS_CUPTI || HAS_ROCTRACER - activityMap_.clear(); - cpuCorrelationMap_.clear(); - correlatedCudaActivities_.clear(); - gpuUserEventMap_.clear(); - traceSpans_.clear(); - clientActivityTraceMap_.clear(); - traceBuffers_ = nullptr; - metadata_.clear(); - sessions_.clear(); -#if !USE_GOOGLE_LOG - Logger::removeLoggerObserver(loggerCollectorMetadata_.get()); -#endif // !USE_GOOGLE_LOG -} - - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.h b/plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.h deleted file mode 100644 index 208833a4db7..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiActivityProfiler.h +++ /dev/null @@ -1,364 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "ThreadUtil.h" -#include "TraceSpan.h" -#include "libkineto.h" -#include "output_base.h" -#include "GenericTraceActivity.h" -#include "IActivityProfiler.h" -#include "LoggerCollector.h" - -namespace KINETO_NAMESPACE { - -class Config; -class CuptiActivityApi; -class RoctracerActivityApi; - -class CuptiActivityProfiler { - public: - CuptiActivityProfiler(CuptiActivityApi& cupti, bool cpuOnly); - CuptiActivityProfiler(RoctracerActivityApi& rai, bool cpuOnly); - CuptiActivityProfiler(const CuptiActivityProfiler&) = delete; - CuptiActivityProfiler& operator=(const CuptiActivityProfiler&) = delete; - - bool isActive() const { - return currentRunloopState_ != RunloopState::WaitForRequest; - } - - // Invoke at a regular interval to perform profiling activities. - // When not active, an interval of 1-5 seconds is probably fine, - // depending on required warm-up time and delayed start time. - // When active, it's a good idea to invoke more frequently to stay below - // memory usage limit (ACTIVITIES_MAX_GPU_BUFFER_SIZE_MB) during warmup. - const std::chrono::time_point performRunLoopStep( - const std::chrono::time_point& now, - const std::chrono::time_point& nextWakeupTime, - int64_t currentIter = -1); - - // Used for async requests - void setLogger(ActivityLogger* logger) { - logger_ = logger; - } - - // Synchronous control API - void startTrace( - const std::chrono::time_point& now) { - std::lock_guard guard(mutex_); - startTraceInternal(now); - } - - void stopTrace(const std::chrono::time_point& now) { - std::lock_guard guard(mutex_); - stopTraceInternal(now); - } - - // Process CPU and GPU traces - void processTrace(ActivityLogger& logger) { - std::lock_guard guard(mutex_); - processTraceInternal(logger); - } - - void reset() { - std::lock_guard guard(mutex_); - resetInternal(); - } - - // Set up profiler as specified in config. - void configure( - const Config& config, - const std::chrono::time_point& now); - - // Registered with client API to pass CPU trace events over - void transferCpuTrace( - std::unique_ptr cpuTrace); - - Config& config() { - return *config_; - } - - inline void recordThreadInfo() { - int32_t sysTid = systemThreadId(); - // Note we're using the lower 32 bits of the (opaque) pthread id - // as key, because that's what CUPTI records. - int32_t tid = threadId(); - int32_t pid = processId(); - std::lock_guard guard(mutex_); - recordThreadInfo(sysTid, tid, pid); - } - - // T107508020: We can deprecate the recordThreadInfo(void) once we optimized profiler_kineto - void recordThreadInfo(int32_t sysTid, int32_t tid, int32_t pid) { - if (resourceInfo_.find({pid, tid}) == resourceInfo_.end()) { - resourceInfo_.emplace( - std::make_pair(pid, tid), - ActivityLogger::ResourceInfo( - pid, - sysTid, - sysTid, // sortindex - fmt::format("thread {} ({})", sysTid, getThreadName()))); - } - } - - void addMetadata(const std::string& key, const std::string& value) { - std::lock_guard guard(mutex_); - metadata_[key] = value; - } - - void addChildActivityProfiler( - std::unique_ptr profiler) { - std::lock_guard guard(mutex_); - profilers_.push_back(std::move(profiler)); - } - - protected: - - using CpuGpuSpanPair = std::pair; - static const CpuGpuSpanPair& defaultTraceSpan(); - - private: - - // Map of gpu activities to user defined events - class GpuUserEventMap { - public: - // Insert a user defined event which maps to the gpu trace activity. - // If the user defined event mapping already exists this will update the - // gpu side span to include the span of gpuTraceActivity. - void insertOrExtendEvent(const ITraceActivity& cpuTraceActivity, - const ITraceActivity& gpuTraceActivity); - // Log out the events to the logger - void logEvents(ActivityLogger *logger); - - void clear() { - streamSpanMap_.clear(); - } - - private: - // device id and stream name - using StreamKey = std::pair; - - // map of correlation id to TraceSpan - using CorrelationSpanMap = - std::unordered_map; - std::map streamSpanMap_; - }; - - GpuUserEventMap gpuUserEventMap_; - // id -> activity* - std::unordered_map activityMap_; - // cuda runtime id -> pytorch op id - // CUPTI provides a mechanism for correlating Cuda events to arbitrary - // external events, e.g.operator activities from PyTorch. - std::unordered_map cpuCorrelationMap_; - // CUDA runtime <-> GPU Activity - std::unordered_map - correlatedCudaActivities_; - std::unordered_map userCorrelationMap_; - - // data structure to collect cuptiActivityFlushAll() latency overhead - struct profilerOverhead { - int64_t overhead; - int cntr; - }; - - bool isWarmupDone( - const std::chrono::time_point& now, - int64_t currentIter) const; - - bool isCollectionDone( - const std::chrono::time_point& now, - int64_t currentIter) const; - - void startTraceInternal( - const std::chrono::time_point& now); - - void stopTraceInternal( - const std::chrono::time_point& now); - - void processTraceInternal(ActivityLogger& logger); - - void resetInternal(); - - void finalizeTrace(const Config& config, ActivityLogger& logger); - - void configureChildProfilers(); - - // Process a single CPU trace - void processCpuTrace( - libkineto::CpuTraceBuffer& cpuTrace, - ActivityLogger& logger); - - // Create resource names for streams - inline void recordStream(int device, int id, const char* postfix) { - if (resourceInfo_.find({device, id}) == resourceInfo_.end()) { - resourceInfo_.emplace( - std::make_pair(device, id), - ActivityLogger::ResourceInfo( - device, id, id, fmt::format( - "stream {} {}", id, postfix))); - } - } - - // Record client trace span for subsequent lookups from activities - // Also creates a corresponding GPU-side span. - CpuGpuSpanPair& recordTraceSpan(TraceSpan& span, int gpuOpCount); - - // Returns true if net name is to be tracked for a specified number of - // iterations. - bool iterationTargetMatch(libkineto::CpuTraceBuffer& trace); - - // net name to id - int netId(const std::string& netName); - - const ITraceActivity* linkedActivity( - int32_t correlationId, - const std::unordered_map& correlationMap); - -#ifdef HAS_CUPTI - // Process generic CUPTI activity - void handleCuptiActivity(const CUpti_Activity* record, ActivityLogger* logger); - - // Process specific GPU activity types - void updateGpuNetSpan(const ITraceActivity& gpuOp); - bool outOfRange(const ITraceActivity& act); - void handleCorrelationActivity( - const CUpti_ActivityExternalCorrelation* correlation); - void handleRuntimeActivity( - const CUpti_ActivityAPI* activity, ActivityLogger* logger); - void handleOverheadActivity( - const CUpti_ActivityOverhead* activity, ActivityLogger* logger); - void handleGpuActivity(const ITraceActivity& act, - ActivityLogger* logger); - template - void handleGpuActivity(const T* act, ActivityLogger* logger); -#endif // HAS_CUPTI - - void resetTraceData(); - - void addOverheadSample(profilerOverhead& counter, int64_t overhead) { - counter.overhead += overhead; - counter.cntr++; - } - int64_t getOverhead(const profilerOverhead& counter) { - if (counter.cntr == 0) { - return 0; - } - return counter.overhead / counter.cntr; - } - - void checkTimestampOrder(const ITraceActivity* act1); - - // On-demand request configuration - std::unique_ptr config_; - - // Logger used during trace processing - ActivityLogger* logger_; - - // Calls to CUPTI is encapsulated behind this interface -#ifdef HAS_ROCTRACER - RoctracerActivityApi& cupti_; // Design failure here -#else - CuptiActivityApi& cupti_; -#endif - - enum class RunloopState { - WaitForRequest, - Warmup, - CollectTrace, - ProcessTrace - }; - - // Start and end time used for triggering and stopping profiling - std::chrono::time_point profileStartTime_; - std::chrono::time_point profileEndTime_; - int64_t profileStartIter_ = -1, profileEndIter_ = -1; - - - // All recorded trace spans, both CPU and GPU - // Trace Id -> list of iterations. - // Using map of lists for the iterator semantics, since we are recording - // pointers to the elements in this structure. - std::map> traceSpans_; - - // Maintain a map of client trace activity to trace span. - // Maps correlation id -> TraceSpan* held by traceSpans_. - using ActivityTraceMap = std::unordered_map; - ActivityTraceMap clientActivityTraceMap_; - - // Cache thread names and system thread ids for pthread ids, - // and stream ids for GPU streams - std::map< - std::pair, - ActivityLogger::ResourceInfo> resourceInfo_; - - std::vector overheadInfo_; - - // the overhead to flush the activity buffer - profilerOverhead flushOverhead_; - // the overhead to enable/disable activity tracking - profilerOverhead setupOverhead_; - - bool cpuOnly_{false}; - - // *************************************************************************** - // Below state is shared with external threads. - // These need to either be atomic, accessed under lock or only used - // by external threads in separate runloop phases from the profiler thread. - // *************************************************************************** - - // Mutex to protect non-atomic access to below state - std::mutex mutex_; - - // Runloop phase - std::atomic currentRunloopState_{RunloopState::WaitForRequest}; - - // Keep track of the start time of the first net in the current trace. - // This is only relevant to Caffe2 as PyTorch does not have nets. - // All CUDA events before this time will be removed - // Can be written by external threads during collection. - int64_t captureWindowStartTime_{0}; - // Similarly, all CUDA API events after the last net event will be removed - int64_t captureWindowEndTime_{0}; - - // span name -> iteration count - std::map iterationCountMap_; - // Flag used to stop tracing from external api callback. - // Needs to be atomic since it's set from a different thread. - std::atomic_bool stopCollection_{false}; - - // Buffers where trace data is stored - std::unique_ptr traceBuffers_; - - // Trace metadata - std::unordered_map metadata_; - - // child activity profilers - std::vector> profilers_; - - // a vector of active profiler plugin sessions - std::vector> sessions_; - - // LoggerCollector to collect all LOGs during the trace -#if !USE_GOOGLE_LOG - std::unique_ptr loggerCollectorMetadata_; -#endif // !USE_GOOGLE_LOG -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.cpp deleted file mode 100644 index 1876003998d..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.cpp +++ /dev/null @@ -1,260 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "CuptiCallbackApi.h" - -#include -#include -#include -#include -#include - -#ifdef HAS_CUPTI -#include "cupti_call.h" -#endif -#include "Logger.h" - - -namespace KINETO_NAMESPACE { - -// limit on number of handles per callback type -constexpr size_t MAX_CB_FNS_PER_CB = 8; - -// Reader Writer lock types -using ReaderWriterLock = std::shared_timed_mutex; -using ReaderLockGuard = std::shared_lock; -using WriteLockGuard = std::unique_lock; - -static ReaderWriterLock callbackLock_; - -/* Callback Table : - * Overall goal of the design is to optimize the lookup of function - * pointers. The table is structured at two levels and the leaf - * elements in the table are std::list to enable fast access/inserts/deletes - * - * | - * -> cb id 0 -> std::list of callbacks - * ... - * -> cb id n -> std::list of callbacks - * | - * ... - * CallbackTable is the finaly table type above - * See type declrartions in header file. - */ - - -/* callback_switchboard : is the global callback handler we register - * with CUPTI. The goal is to make it as efficient as possible - * to re-direct to the registered callback(s). - * - * Few things to care about : - * a) use if/then switches rather than map/hash structures - * b) avoid dynamic memory allocations - * c) be aware of locking overheads - */ -#ifdef HAS_CUPTI -static void CUPTIAPI callback_switchboard( -#else -static void callback_switchboard( -#endif - void* /* unused */, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const CUpti_CallbackData* cbInfo) { - - // below statement is likey going to call a mutex - // on the singleton access - CuptiCallbackApi::singleton().__callback_switchboard( - domain, cbid, cbInfo); -} - - -void CuptiCallbackApi::__callback_switchboard( - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const CUpti_CallbackData* cbInfo) { - VLOG(0) << "Callback: domain = " << domain << ", cbid = " << cbid; - CallbackList *cblist = nullptr; - - switch (domain) { - - // add the fastest path for kernel launch callbacks - // as these are the most frequent ones - case CUPTI_CB_DOMAIN_RUNTIME_API: - switch (cbid) { - case CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000: - cblist = &callbacks_.runtime[ - CUDA_LAUNCH_KERNEL - __RUNTIME_CB_DOMAIN_START]; - break; - default: - break; - } - break; - - case CUPTI_CB_DOMAIN_RESOURCE: - switch (cbid) { - case CUPTI_CBID_RESOURCE_CONTEXT_CREATED: - cblist = &callbacks_.resource[ - RESOURCE_CONTEXT_CREATED - __RESOURCE_CB_DOMAIN_START]; - break; - case CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING: - cblist = &callbacks_.resource[ - RESOURCE_CONTEXT_DESTROYED - __RESOURCE_CB_DOMAIN_START]; - break; - default: - break; - } - break; - - default: - return; - } - - // ignore callbacks that are not handled - if (cblist == nullptr) { - return; - } - - // make a copy of the callback list so we avoid holding lock - // in common case this should be just one func pointer copy - std::array callbacks; - int num_cbs = 0; - { - ReaderLockGuard rl(callbackLock_); - int i = 0; - for (auto it = cblist->begin(); - it != cblist->end() && i < MAX_CB_FNS_PER_CB; - it++, i++) { - callbacks[i] = *it; - } - num_cbs = i; - } - - for (int i = 0; i < num_cbs; i++) { - auto fn = callbacks[i]; - fn(domain, cbid, cbInfo); - } -} - -CuptiCallbackApi& CuptiCallbackApi::singleton() { - static CuptiCallbackApi instance; - return instance; -} - -CuptiCallbackApi::CuptiCallbackApi() { -#ifdef HAS_CUPTI - lastCuptiStatus_ = CUPTI_ERROR_UNKNOWN; - lastCuptiStatus_ = CUPTI_CALL_NOWARN( - cuptiSubscribe(&subscriber_, - (CUpti_CallbackFunc)callback_switchboard, - nullptr)); - - initSuccess_ = (lastCuptiStatus_ == CUPTI_SUCCESS); -#endif -} - -CuptiCallbackApi::CallbackList* CuptiCallbackApi::CallbackTable::lookup( - CUpti_CallbackDomain domain, CuptiCallBackID cbid) { - size_t idx; - - switch (domain) { - - case CUPTI_CB_DOMAIN_RESOURCE: - assert(cbid >= __RESOURCE_CB_DOMAIN_START); - assert(cbid < __RESOURCE_CB_DOMAIN_END); - idx = cbid - __RESOURCE_CB_DOMAIN_START; - return &resource.at(idx); - - case CUPTI_CB_DOMAIN_RUNTIME_API: - assert(cbid >= __RUNTIME_CB_DOMAIN_START); - assert(cbid < __RUNTIME_CB_DOMAIN_END); - idx = cbid - __RUNTIME_CB_DOMAIN_START; - return &runtime.at(idx); - - default: - LOG(WARNING) << " Unsupported callback domain : " << domain; - return nullptr; - } -} - -bool CuptiCallbackApi::registerCallback( - CUpti_CallbackDomain domain, - CuptiCallBackID cbid, - CuptiCallbackFn cbfn) { - CallbackList* cblist = callbacks_.lookup(domain, cbid); - - if (!cblist) { - LOG(WARNING) << "Could not register callback -- domain = " << domain - << " callback id = " << cbid; - return false; - } - - // avoid duplicates - auto it = std::find(cblist->begin(), cblist->end(), cbfn); - if (it != cblist->end()) { - LOG(WARNING) << "Adding duplicate callback -- domain = " << domain - << " callback id = " << cbid; - return true; - } - - if (cblist->size() == MAX_CB_FNS_PER_CB) { - LOG(WARNING) << "Already registered max callback -- domain = " << domain - << " callback id = " << cbid; - } - - WriteLockGuard wl(callbackLock_); - cblist->push_back(cbfn); - return true; -} - -bool CuptiCallbackApi::deleteCallback( - CUpti_CallbackDomain domain, - CuptiCallBackID cbid, - CuptiCallbackFn cbfn) { - CallbackList* cblist = callbacks_.lookup(domain, cbid); - if (!cblist) { - LOG(WARNING) << "Attempting to remove unsupported callback -- domain = " << domain - << " callback id = " << cbid; - return false; - } - - // Locks are not required here as - // https://en.cppreference.com/w/cpp/container/list/erase - // "References and iterators to the erased elements are invalidated. - // Other references and iterators are not affected." - auto it = std::find(cblist->begin(), cblist->end(), cbfn); - if (it == cblist->end()) { - LOG(WARNING) << "Could not find callback to remove -- domain = " << domain - << " callback id = " << cbid; - return false; - } - - WriteLockGuard wl(callbackLock_); - cblist->erase(it); - return true; -} - -bool CuptiCallbackApi::enableCallback( - CUpti_CallbackDomain domain, CUpti_CallbackId cbid) { -#ifdef HAS_CUPTI - if (initSuccess_) { - lastCuptiStatus_ = CUPTI_CALL_NOWARN( - cuptiEnableCallback(1, subscriber_, domain, cbid)); - return (lastCuptiStatus_ == CUPTI_SUCCESS); - } -#endif - return false; -} - -bool CuptiCallbackApi::disableCallback( - CUpti_CallbackDomain domain, CUpti_CallbackId cbid) { -#ifdef HAS_CUPTI - if (initSuccess_) { - lastCuptiStatus_ = CUPTI_CALL_NOWARN( - cuptiEnableCallback(0, subscriber_, domain, cbid)); - return (lastCuptiStatus_ == CUPTI_SUCCESS); - } -#endif - return false; -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.h b/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.h deleted file mode 100644 index 4526f3750b4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApi.h +++ /dev/null @@ -1,130 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#ifdef HAS_CUPTI -#include -#endif -#include -#include -#include -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "CuptiCallbackApiMock.h" - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - - -/* CuptiCallbackApi : Provides an abstraction over CUPTI callback - * interface. This enables various callback functions to be registered - * with this class. The class registers a global callback handler that - * redirects to the respective callbacks. - * - * Note: one design choice we made is to only support simple function pointers - * in order to speed up the implementation for fast path. - */ - -using CuptiCallbackFn = void(*)( - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const CUpti_CallbackData* cbInfo); - - -class CuptiCallbackApi { - - public: - - /* Global list of supported callback ids - * use the class namespace to avoid confusing with CUPTI enums*/ - enum CuptiCallBackID { - CUDA_LAUNCH_KERNEL = 0, - // can possibly support more callback ids per domain - // - __RUNTIME_CB_DOMAIN_START = CUDA_LAUNCH_KERNEL, - - // Callbacks under Resource CB domain - RESOURCE_CONTEXT_CREATED, - RESOURCE_CONTEXT_DESTROYED, - - __RUNTIME_CB_DOMAIN_END = RESOURCE_CONTEXT_CREATED, - __RESOURCE_CB_DOMAIN_START = RESOURCE_CONTEXT_CREATED, - - __RESOURCE_CB_DOMAIN_END = RESOURCE_CONTEXT_DESTROYED + 1, - }; - - - CuptiCallbackApi(const CuptiCallbackApi&) = delete; - CuptiCallbackApi& operator=(const CuptiCallbackApi&) = delete; - - static CuptiCallbackApi& singleton(); - - bool initSuccess() const { - return initSuccess_; - } - -#ifdef HAS_CUPTI - CUptiResult getCuptiStatus() const { - return lastCuptiStatus_; - } -#endif - - bool registerCallback( - CUpti_CallbackDomain domain, - CuptiCallBackID cbid, - CuptiCallbackFn cbfn); - - // returns false if callback was not found - bool deleteCallback( - CUpti_CallbackDomain domain, - CuptiCallBackID cbid, - CuptiCallbackFn cbfn); - - bool enableCallback(CUpti_CallbackDomain domain, CUpti_CallbackId cbid); - bool disableCallback(CUpti_CallbackDomain domain, CUpti_CallbackId cbid); - - - // Please do not use this method. This has to be exposed as public - // so it is accessible from the callback handler - void __callback_switchboard( - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const CUpti_CallbackData* cbInfo); - - private: - - explicit CuptiCallbackApi(); - - // For callback table design overview see the .cpp file - using CallbackList = std::list; - - // level 2 tables sizes are known at compile time - constexpr static size_t RUNTIME_CB_DOMAIN_SIZE - = (__RUNTIME_CB_DOMAIN_END - __RUNTIME_CB_DOMAIN_START); - - constexpr static size_t RESOURCE_CB_DOMAIN_SIZE - = (__RESOURCE_CB_DOMAIN_END - __RESOURCE_CB_DOMAIN_START); - - // level 1 table is a struct - struct CallbackTable { - std::array runtime; - std::array resource; - - CallbackList* lookup(CUpti_CallbackDomain domain, CuptiCallBackID cbid); - }; - - CallbackTable callbacks_; - bool initSuccess_ = false; - -#ifdef HAS_CUPTI - CUptiResult lastCuptiStatus_; - CUpti_SubscriberHandle subscriber_; -#endif -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApiMock.h b/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApiMock.h deleted file mode 100644 index fd51267274f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiCallbackApiMock.h +++ /dev/null @@ -1,32 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -// Provides data structures to mock CUPTI Callback API -#ifndef HAS_CUPTI - -enum CUpti_CallbackDomain { - CUPTI_CB_DOMAIN_RESOURCE, - CUPTI_CB_DOMAIN_RUNTIME_API, -}; -enum CUpti_CallbackId { - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000, - CUPTI_CBID_RESOURCE_CONTEXT_CREATED, - CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, -}; - -using CUcontext = void*; - -struct CUpti_ResourceData { - CUcontext context; -}; - -constexpr int CUPTI_API_ENTER = 0; -constexpr int CUPTI_API_EXIT = 0; - -struct CUpti_CallbackData { - CUcontext context; - const char* symbolName; - int callbackSite; -}; -#endif // HAS_CUPTI diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.cpp deleted file mode 100644 index 7f1d48c1d00..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.cpp +++ /dev/null @@ -1,112 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "CuptiEventApi.h" - -#include - -#include "Logger.h" -#include "cupti_call.h" - -using namespace std::chrono; -using std::vector; - -namespace KINETO_NAMESPACE { - -CuptiEventApi::CuptiEventApi(CUcontext context) - : context_(context) { - CUPTI_CALL(cuptiGetDeviceId(context_, (uint32_t*)&device_)); -} - -CUpti_EventGroupSets* CuptiEventApi::createGroupSets( - vector& ids) { - CUpti_EventGroupSets* group_sets = nullptr; - CUptiResult res = CUPTI_CALL(cuptiEventGroupSetsCreate( - context_, sizeof(CUpti_EventID) * ids.size(), ids.data(), &group_sets)); - - if (res != CUPTI_SUCCESS || group_sets == nullptr) { - const char* errstr = nullptr; - CUPTI_CALL(cuptiGetResultString(res, &errstr)); - throw std::system_error(EINVAL, std::generic_category(), errstr); - } - - return group_sets; -} - -void CuptiEventApi::destroyGroupSets(CUpti_EventGroupSets* sets) { - CUPTI_CALL(cuptiEventGroupSetsDestroy(sets)); -} - -bool CuptiEventApi::setContinuousMode() { - // Avoid logging noise for CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED - CUptiResult res = CUPTI_CALL_NOWARN(cuptiSetEventCollectionMode( - context_, CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS)); - if (res == CUPTI_ERROR_LEGACY_PROFILER_NOT_SUPPORTED) { - return false; - } - // Log warning on other errors - CUPTI_CALL(res); - return (res == CUPTI_SUCCESS); -} - -void CuptiEventApi::enablePerInstance(CUpti_EventGroup eventGroup) { - uint32_t profile_all = 1; - CUPTI_CALL(cuptiEventGroupSetAttribute( - eventGroup, - CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, - sizeof(profile_all), - &profile_all)); -} - -uint32_t CuptiEventApi::instanceCount(CUpti_EventGroup eventGroup) { - uint32_t instance_count = 0; - size_t s = sizeof(instance_count); - CUPTI_CALL(cuptiEventGroupGetAttribute( - eventGroup, CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT, &s, &instance_count)); - return instance_count; -} - -void CuptiEventApi::enableGroupSet(CUpti_EventGroupSet& set) { - CUptiResult res = CUPTI_CALL_NOWARN(cuptiEventGroupSetEnable(&set)); - if (res != CUPTI_SUCCESS) { - const char* errstr = nullptr; - CUPTI_CALL(cuptiGetResultString(res, &errstr)); - throw std::system_error(EIO, std::generic_category(), errstr); - } -} - -void CuptiEventApi::disableGroupSet(CUpti_EventGroupSet& set) { - CUPTI_CALL(cuptiEventGroupSetDisable(&set)); -} - -void CuptiEventApi::readEvent( - CUpti_EventGroup grp, - CUpti_EventID id, - vector& vals) { - size_t s = sizeof(int64_t) * vals.size(); - CUPTI_CALL(cuptiEventGroupReadEvent( - grp, - CUPTI_EVENT_READ_FLAG_NONE, - id, - &s, - reinterpret_cast(vals.data()))); -} - -vector CuptiEventApi::eventsInGroup(CUpti_EventGroup grp) { - uint32_t group_size = 0; - size_t s = sizeof(group_size); - CUPTI_CALL(cuptiEventGroupGetAttribute( - grp, CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS, &s, &group_size)); - size_t events_size = group_size * sizeof(CUpti_EventID); - vector res(group_size); - CUPTI_CALL(cuptiEventGroupGetAttribute( - grp, CUPTI_EVENT_GROUP_ATTR_EVENTS, &events_size, res.data())); - return res; -} - -CUpti_EventID CuptiEventApi::eventId(const std::string& name) { - CUpti_EventID id{0}; - CUPTI_CALL(cuptiEventGetIdFromName(device_, name.c_str(), &id)); - return id; -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.h b/plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.h deleted file mode 100644 index 79610f93f0e..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiEventApi.h +++ /dev/null @@ -1,49 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include - -namespace KINETO_NAMESPACE { - -// C++ interface to CUPTI Events C API. -// Virtual methods are here mainly to allow easier testing. -class CuptiEventApi { - public: - explicit CuptiEventApi(CUcontext context_); - virtual ~CuptiEventApi() {} - - CUdevice device() { - return device_; - } - - virtual CUpti_EventGroupSets* createGroupSets( - std::vector& ids); - virtual void destroyGroupSets(CUpti_EventGroupSets* sets); - - virtual bool setContinuousMode(); - - virtual void enablePerInstance(CUpti_EventGroup eventGroup); - virtual uint32_t instanceCount(CUpti_EventGroup eventGroup); - - virtual void enableGroupSet(CUpti_EventGroupSet& set); - virtual void disableGroupSet(CUpti_EventGroupSet& set); - - virtual void - readEvent(CUpti_EventGroup g, CUpti_EventID id, std::vector& vals); - virtual std::vector eventsInGroup(CUpti_EventGroup g); - - virtual CUpti_EventID eventId(const std::string& name); - - protected: - // Unit testing - CuptiEventApi() : context_(nullptr), device_(0) {} - - private: - CUcontext context_; - CUdevice device_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.cpp deleted file mode 100644 index 36401e74341..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.cpp +++ /dev/null @@ -1,107 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "CuptiMetricApi.h" - -#include - -#include "Logger.h" -#include "cupti_call.h" - -using namespace std::chrono; -using std::vector; - -namespace KINETO_NAMESPACE { - -CUpti_MetricID CuptiMetricApi::idFromName(const std::string& name) { - CUpti_MetricID metric_id{~0u}; - CUptiResult res = - CUPTI_CALL(cuptiMetricGetIdFromName(device_, name.c_str(), &metric_id)); - if (res == CUPTI_ERROR_INVALID_METRIC_NAME) { - LOG(WARNING) << "Invalid metric name: " << name; - } - return metric_id; -} - -// Return a map of event IDs and names for a given metric id. -// Note that many events don't have a name. In that case the name will -// be set to the empty string. -std::map CuptiMetricApi::events( - CUpti_MetricID metric_id) { - uint32_t num_events = 0; - CUPTI_CALL(cuptiMetricGetNumEvents(metric_id, &num_events)); - vector ids(num_events); - size_t array_size = num_events * sizeof(CUpti_EventID); - CUPTI_CALL(cuptiMetricEnumEvents(metric_id, &array_size, ids.data())); - std::map res; - for (CUpti_EventID id : ids) { - // Attempt to lookup name from CUPTI - constexpr size_t kMaxEventNameLength = 64; - char cupti_name[kMaxEventNameLength]; - size_t size = kMaxEventNameLength; - CUPTI_CALL( - cuptiEventGetAttribute(id, CUPTI_EVENT_ATTR_NAME, &size, cupti_name)); - cupti_name[kMaxEventNameLength - 1] = 0; - - // CUPTI "helpfully" returns "event_name" when the event is unnamed. - if (size > 0 && strcmp(cupti_name, "event_name") != 0) { - res.emplace(id, cupti_name); - } else { - res.emplace(id, ""); - } - } - return res; -} - -CUpti_MetricValueKind CuptiMetricApi::valueKind(CUpti_MetricID metric) { - CUpti_MetricValueKind res{CUPTI_METRIC_VALUE_KIND_FORCE_INT}; - size_t value_kind_size = sizeof(res); - CUPTI_CALL(cuptiMetricGetAttribute( - metric, CUPTI_METRIC_ATTR_VALUE_KIND, &value_kind_size, &res)); - return res; -} - -CUpti_MetricEvaluationMode CuptiMetricApi::evaluationMode( - CUpti_MetricID metric) { - CUpti_MetricEvaluationMode eval_mode{ - CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE}; - size_t eval_mode_size = sizeof(eval_mode); - CUPTI_CALL(cuptiMetricGetAttribute( - metric, CUPTI_METRIC_ATTR_EVALUATION_MODE, &eval_mode_size, &eval_mode)); - return eval_mode; -} - -// FIXME: Consider caching value kind here -SampleValue CuptiMetricApi::calculate( - CUpti_MetricID metric, - CUpti_MetricValueKind kind, - vector& events, - vector& values, - int64_t duration) { - CUpti_MetricValue metric_value; - CUPTI_CALL(cuptiMetricGetValue( - device_, - metric, - events.size() * sizeof(CUpti_EventID), - events.data(), - values.size() * sizeof(int64_t), - reinterpret_cast(values.data()), - duration, - &metric_value)); - - switch (kind) { - case CUPTI_METRIC_VALUE_KIND_DOUBLE: - case CUPTI_METRIC_VALUE_KIND_PERCENT: - return SampleValue(metric_value.metricValueDouble); - case CUPTI_METRIC_VALUE_KIND_UINT64: - case CUPTI_METRIC_VALUE_KIND_INT64: - case CUPTI_METRIC_VALUE_KIND_THROUGHPUT: - return SampleValue(metric_value.metricValueUint64); - case CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL: - return SampleValue((int)metric_value.metricValueUtilizationLevel); - default: - assert(false); - } - return SampleValue(-1); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.h b/plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.h deleted file mode 100644 index f45d38cd616..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiMetricApi.h +++ /dev/null @@ -1,38 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -#include -#include - -#include "SampleListener.h" - -namespace KINETO_NAMESPACE { - -// C++ interface to CUPTI Metrics C API. -// Virtual methods are here mainly to allow easier testing. -class CuptiMetricApi { - public: - explicit CuptiMetricApi(CUdevice device) : device_(device) {} - virtual ~CuptiMetricApi() {} - - virtual CUpti_MetricID idFromName(const std::string& name); - virtual std::map events(CUpti_MetricID metric_id); - - virtual CUpti_MetricValueKind valueKind(CUpti_MetricID metric); - virtual CUpti_MetricEvaluationMode evaluationMode(CUpti_MetricID metric); - - virtual SampleValue calculate( - CUpti_MetricID metric, - CUpti_MetricValueKind kind, - std::vector& events, - std::vector& values, - int64_t duration); - - private: - CUdevice device_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.cpp deleted file mode 100644 index d1b08ab2c13..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.cpp +++ /dev/null @@ -1,504 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#ifdef HAS_CUPTI -#include -#if defined(CUDART_VERSION) && CUDART_VERSION > 10000 && CUDART_VERSION < 11040 -#include -#include -#include -#endif // cuda version > 10.00 and < 11.04 -#endif // HAS_CUPTI - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "ScopeExit.h" -#include "CuptiNvPerfMetric.h" -#include "Logger.h" - -namespace KINETO_NAMESPACE { - -// Add a namespace to isolate these utility functions that are only -// going to be used by the CuptiRangeProfiler. These included calls -// to NVIDIA PerfWorks APIs. -namespace nvperf { - - -// Largely based on NVIDIA sample code provided with CUDA release -// files Metric.cpp and Eval.cpp - -// ------------------------------------------------- -// Metric and Counter Data Configuration -// ------------------------------------------------- - - -// Note: Be carful before modifying the code below. There is a specific -// sequence one needs to follow to program the metrics else things may -// stop working. We tried to keep the flow consistent with the example -// code from NVIDIA. Since most of the programmability comes from -// the CUPTI profiler metric names this should be okay. - -// Only supported on CUDA RT Version between 10.0 and 11.04. -// After CUDA RT 11.04, the structure has changed. -// TODO update the structure NVPA_RawMetricsConfig to support 11.04 -#if defined(CUDART_VERSION) && CUDART_VERSION > 10000 && CUDART_VERSION < 11040 - -bool getRawMetricRequests( - NVPA_MetricsContext* metricsContext, - std::vector metricNames, - std::vector& rawMetricsDeps, - std::vector& rawMetricRequests) { - bool isolated = true; - /* Bug in collection with collection of metrics without instances, keep it - * to true*/ - bool keepInstances = true; - - for (const auto& metricName : metricNames) { - - NVPW_MetricsContext_GetMetricProperties_Begin_Params - getMetricPropertiesBeginParams = { - NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE, nullptr}; - getMetricPropertiesBeginParams.pMetricsContext = metricsContext; - getMetricPropertiesBeginParams.pMetricName = metricName.c_str(); - - if (!NVPW_CALL( - NVPW_MetricsContext_GetMetricProperties_Begin( - &getMetricPropertiesBeginParams))) { - return false; - } - - for (const char** metricDepsIt = - getMetricPropertiesBeginParams.ppRawMetricDependencies; - *metricDepsIt; - ++metricDepsIt) { - rawMetricsDeps.push_back(*metricDepsIt); - } - - NVPW_MetricsContext_GetMetricProperties_End_Params - getMetricPropertiesEndParams = { - NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE, nullptr}; - getMetricPropertiesEndParams.pMetricsContext = metricsContext; - - if (!NVPW_CALL(NVPW_MetricsContext_GetMetricProperties_End( - &getMetricPropertiesEndParams))) { - return false; - } - } - - for (const auto& rawMetricName : rawMetricsDeps) { - NVPA_RawMetricRequest metricRequest = {NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE, nullptr}; - metricRequest.pMetricName = rawMetricName.c_str(); - metricRequest.isolated = isolated; - metricRequest.keepInstances = keepInstances; - rawMetricRequests.push_back(metricRequest); - VLOG(1) << "Adding raw metric struct : raw metric = " << rawMetricName - << " isolated = " << isolated << " keepinst = " << keepInstances; - } - - if (rawMetricRequests.size() == 0) { - LOG(WARNING) << "CUPTI Profiler was unable to configure any metrics"; - return false; - } - return true; -} - -// Setup CUPTI Profiler Config Image -bool getProfilerConfigImage( - const std::string& chipName, - const std::vector& metricNames, - std::vector& configImage, - const uint8_t* counterAvailabilityImage) { - - NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = { - NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE, nullptr}; - metricsContextCreateParams.pChipName = chipName.c_str(); - - if (!NVPW_CALL( - NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams))) { - return false; - } - - NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = { - NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE, nullptr}; - metricsContextDestroyParams.pMetricsContext = - metricsContextCreateParams.pMetricsContext; - - SCOPE_EXIT([&]() { - NVPW_MetricsContext_Destroy( - (NVPW_MetricsContext_Destroy_Params*)&metricsContextDestroyParams); - }); - - // Get all raw metrics required for given metricNames list - std::vector rawMetricRequests; - - // note: we need a variable at this functions scope to hold the string - // pointers for underlying C char arrays. - std::vector rawMetricDeps; - - if (!getRawMetricRequests( - metricsContextCreateParams.pMetricsContext, - metricNames, - rawMetricDeps, - rawMetricRequests)) { - return false; - } - - NVPA_RawMetricsConfigOptions metricsConfigOptions = { - NVPA_RAW_METRICS_CONFIG_OPTIONS_STRUCT_SIZE, nullptr}; - metricsConfigOptions.activityKind = NVPA_ACTIVITY_KIND_PROFILER; - metricsConfigOptions.pChipName = chipName.c_str(); - NVPA_RawMetricsConfig* rawMetricsConfig; - if (!NVPW_CALL( - NVPA_RawMetricsConfig_Create( - &metricsConfigOptions, &rawMetricsConfig))) { - return false; - } - - // TODO check if this is required - if (counterAvailabilityImage) { - NVPW_RawMetricsConfig_SetCounterAvailability_Params - setCounterAvailabilityParams = { - NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE, nullptr}; - setCounterAvailabilityParams.pRawMetricsConfig = rawMetricsConfig; - setCounterAvailabilityParams.pCounterAvailabilityImage = - counterAvailabilityImage; - if (!NVPW_CALL( - NVPW_RawMetricsConfig_SetCounterAvailability( - &setCounterAvailabilityParams))) { - return false; - } - } - - NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = { - NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE, nullptr}; - rawMetricsConfigDestroyParams.pRawMetricsConfig = rawMetricsConfig; - SCOPE_EXIT([&]() { - NVPW_RawMetricsConfig_Destroy( - (NVPW_RawMetricsConfig_Destroy_Params*)&rawMetricsConfigDestroyParams); - }); - - // Start a Raw Metric Pass group - NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = { - NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE, nullptr}; - beginPassGroupParams.pRawMetricsConfig = rawMetricsConfig; - if (!NVPW_CALL( - NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams))) { - return false; - } - - // Add all raw metrics - NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = { - NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE, nullptr}; - addMetricsParams.pRawMetricsConfig = rawMetricsConfig; - addMetricsParams.pRawMetricRequests = rawMetricRequests.data(); - addMetricsParams.numMetricRequests = rawMetricRequests.size(); - if (!NVPW_CALL( - NVPW_RawMetricsConfig_AddMetrics(&addMetricsParams))) { - return false; - } - - // End pass group - NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = { - NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE, nullptr}; - endPassGroupParams.pRawMetricsConfig = rawMetricsConfig; - if (!NVPW_CALL( - NVPW_RawMetricsConfig_EndPassGroup(&endPassGroupParams))) { - return false; - } - - // Setup Config Image generation - NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParams = { - NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE, nullptr}; - generateConfigImageParams.pRawMetricsConfig = rawMetricsConfig; - if (!NVPW_CALL( - NVPW_RawMetricsConfig_GenerateConfigImage(&generateConfigImageParams))) { - return false; - } - - // Get the Config Image size... nearly there - NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParams = { - NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE, nullptr}; - getConfigImageParams.pRawMetricsConfig = rawMetricsConfig; - getConfigImageParams.bytesAllocated = 0; - getConfigImageParams.pBuffer = nullptr; - if (!NVPW_CALL( - NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParams))) { - return false; - } - - configImage.resize(getConfigImageParams.bytesCopied); - - // Write the Config image binary - getConfigImageParams.bytesAllocated = configImage.size(); - getConfigImageParams.pBuffer = configImage.data(); - if (!NVPW_CALL( - NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParams))) { - return false; - } - - return true; -} - -bool getCounterDataPrefixImage( - const std::string& chipName, - const std::vector& metricNames, - std::vector& counterDataImagePrefix) { - - NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = { - NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE, nullptr}; - metricsContextCreateParams.pChipName = chipName.c_str(); - - if (!NVPW_CALL( - NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams))) { - return false; - } - - NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = { - NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE, nullptr}; - metricsContextDestroyParams.pMetricsContext = - metricsContextCreateParams.pMetricsContext; - - - SCOPE_EXIT([&]() { - NVPW_MetricsContext_Destroy( - (NVPW_MetricsContext_Destroy_Params*)&metricsContextDestroyParams); - }); - - // Get all raw metrics required for given metricNames list - std::vector rawMetricRequests; - - // note: we need a variable at this functions scope to hold the string - // pointers for underlying C char arrays. - std::vector rawMetricDeps; - - if (!getRawMetricRequests( - metricsContextCreateParams.pMetricsContext, - metricNames, - rawMetricDeps, - rawMetricRequests)) { - return false; - } - - // Setup Counter Data builder - NVPW_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = { - NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE, nullptr}; - counterDataBuilderCreateParams.pChipName = chipName.c_str(); - if (!NVPW_CALL( - NVPW_CounterDataBuilder_Create(&counterDataBuilderCreateParams))) { - return false; - } - - NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderDestroyParams = { - NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE, nullptr}; - counterDataBuilderDestroyParams.pCounterDataBuilder = - counterDataBuilderCreateParams.pCounterDataBuilder; - SCOPE_EXIT([&]() { - NVPW_CounterDataBuilder_Destroy(( - NVPW_CounterDataBuilder_Destroy_Params*)&counterDataBuilderDestroyParams); - }); - - // Add metrics to counter data image prefix - NVPW_CounterDataBuilder_AddMetrics_Params addMetricsParams = { - NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE, nullptr}; - addMetricsParams.pCounterDataBuilder = - counterDataBuilderCreateParams.pCounterDataBuilder; - addMetricsParams.pRawMetricRequests = rawMetricRequests.data(); - addMetricsParams.numMetricRequests = rawMetricRequests.size(); - if (!NVPW_CALL( - NVPW_CounterDataBuilder_AddMetrics(&addMetricsParams))) { - return false; - } - - // Get image prefix size - NVPW_CounterDataBuilder_GetCounterDataPrefix_Params - getCounterDataPrefixParams = { - NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE, nullptr}; - getCounterDataPrefixParams.pCounterDataBuilder = - counterDataBuilderCreateParams.pCounterDataBuilder; - getCounterDataPrefixParams.bytesAllocated = 0; - getCounterDataPrefixParams.pBuffer = nullptr; - if (!NVPW_CALL( - NVPW_CounterDataBuilder_GetCounterDataPrefix( - &getCounterDataPrefixParams))) { - return false; - } - - counterDataImagePrefix.resize(getCounterDataPrefixParams.bytesCopied); - - // Now write counter data image prefix - getCounterDataPrefixParams.bytesAllocated = counterDataImagePrefix.size(); - getCounterDataPrefixParams.pBuffer = counterDataImagePrefix.data(); - if (!NVPW_CALL( - NVPW_CounterDataBuilder_GetCounterDataPrefix( - &getCounterDataPrefixParams))) { - return false; - } - - return true; -} - -// ------------------------------------------------- -// Metric and Counter Evaluation Utilities -// ------------------------------------------------- - -std::string getRangeDescription( - const std::vector& counterDataImage, - int rangeIndex) { - std::vector descriptionPtrs; - - NVPW_Profiler_CounterData_GetRangeDescriptions_Params getRangeDescParams = { - NVPW_Profiler_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE, nullptr}; - getRangeDescParams.pCounterDataImage = counterDataImage.data(); - getRangeDescParams.rangeIndex = rangeIndex; - - if (!NVPW_CALL( - NVPW_Profiler_CounterData_GetRangeDescriptions(&getRangeDescParams))) { - return ""; - } - - descriptionPtrs.resize(getRangeDescParams.numDescriptions); - getRangeDescParams.ppDescriptions = descriptionPtrs.data(); - - if (!NVPW_CALL( - NVPW_Profiler_CounterData_GetRangeDescriptions(&getRangeDescParams))) { - return ""; - } - - std::string rangeName; - - for (size_t i = 0; i < getRangeDescParams.numDescriptions; i++) { - if (i > 0) { - rangeName.append("/"); - } - rangeName.append(descriptionPtrs[i]); - } - return rangeName; -} - -CuptiProfilerResult evalMetricValues( - const std::string& chipName, - const std::vector& counterDataImage, - const std::vector& metricNames, - bool verbose) { - - if (!counterDataImage.size()) { - LOG(ERROR) << "Counter Data Image is empty!"; - return {}; - } - - NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = { - NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE, nullptr}; - metricsContextCreateParams.pChipName = chipName.c_str(); - if (!NVPW_CALL( - NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams))) { - return {}; - } - - NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = { - NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE, nullptr}; - metricsContextDestroyParams.pMetricsContext = - metricsContextCreateParams.pMetricsContext; - SCOPE_EXIT([&]() { - NVPW_MetricsContext_Destroy( - (NVPW_MetricsContext_Destroy_Params*)&metricsContextDestroyParams); - }); - - NVPW_CounterData_GetNumRanges_Params getNumRangesParams = { - NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE, nullptr}; - getNumRangesParams.pCounterDataImage = counterDataImage.data(); - if (!NVPW_CALL( - NVPW_CounterData_GetNumRanges(&getNumRangesParams))) { - return {}; - } - - // TBD in the future support special chars in metric name - // for now these are default - const bool isolated = true; - - // API takes a 2D array of chars - std::vector metricNamePtrs; - - for (const auto& metric : metricNames) { - metricNamePtrs.push_back(metric.c_str()); - } - - CuptiProfilerResult result{ - .metricNames = metricNames}; - - for (size_t rangeIndex = 0; rangeIndex < getNumRangesParams.numRanges; - ++rangeIndex) { - - CuptiRangeMeasurement rangeData { - .rangeName = getRangeDescription(counterDataImage, rangeIndex)}; - rangeData.values.resize(metricNames.size()); - - // First set Counter data image with current range - NVPW_MetricsContext_SetCounterData_Params setCounterDataParams = { - NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE, nullptr}; - - setCounterDataParams.pMetricsContext = - metricsContextCreateParams.pMetricsContext; - setCounterDataParams.pCounterDataImage = counterDataImage.data(); - setCounterDataParams.isolated = isolated; - setCounterDataParams.rangeIndex = rangeIndex; - - NVPW_CALL(NVPW_MetricsContext_SetCounterData(&setCounterDataParams)); - - - // Now we can evaluate GPU metrics - NVPW_MetricsContext_EvaluateToGpuValues_Params evalToGpuParams = { - NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE, nullptr}; - evalToGpuParams.pMetricsContext = - metricsContextCreateParams.pMetricsContext; - evalToGpuParams.numMetrics = metricNamePtrs.size(); - evalToGpuParams.ppMetricNames = metricNamePtrs.data(); - evalToGpuParams.pMetricValues = rangeData.values.data(); - - if (!NVPW_CALL(NVPW_MetricsContext_EvaluateToGpuValues(&evalToGpuParams))) { - LOG(WARNING) << "Failed to evaluate metris for range : " - << rangeData.rangeName; - continue; - } - - if (verbose) { - for (size_t i = 0; i < metricNames.size(); i++) { - LOG(INFO) << "rangeName: " << rangeData.rangeName - << "\tmetricName: " << metricNames[i] - << "\tgpuValue: " << rangeData.values[i]; - } - } - - result.rangeVals.emplace_back(std::move(rangeData)); - } - - return result; -} - -#else - -bool getProfilerConfigImage( - const std::string& /*chipName*/, - const std::vector& /*metricNames*/, - std::vector& /*configImage*/, - const uint8_t* /*counterAvailabilityImage*/) { - return false; -} - -bool getCounterDataPrefixImage( - const std::string& /*chipName*/, - const std::vector& /*metricNames*/, - std::vector& /*counterDataImagePrefix*/) { - return false; -} - -CuptiProfilerResult evalMetricValues( - const std::string& /*chipName*/, - const std::vector& /*counterDataImage*/, - const std::vector& /*metricNames*/, - bool /*verbose*/) { - return {}; -} - -#endif // cuda version > 10.00 and < 11.04 - -} // namespace nvperf -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.h b/plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.h deleted file mode 100644 index d5dd1b1c1d2..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiNvPerfMetric.h +++ /dev/null @@ -1,71 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "Logger.h" - -namespace KINETO_NAMESPACE { - -struct CuptiRangeMeasurement { - std::string rangeName; - std::vector values; -}; - -struct CuptiProfilerResult { - std::vector metricNames; - // rangeName, list values - std::vector rangeVals; -}; - -/* Utilities for CUPTI and NVIDIA PerfWorks Metric API - */ - -#define NVPW_CALL(call) \ - [&]() -> bool { \ - NVPA_Status _status_ = call; \ - if (_status_ != NVPA_STATUS_SUCCESS) { \ - LOG(WARNING) << fmt::format( \ - "function {} failed with error ({})", \ - #call, \ - (int)_status_); \ - return false; \ - } \ - return true; \ - }() - -// fixme - add a results string -// nvpperfGetResultString(_status_, &_errstr_); - -namespace nvperf { - -// Setup CUPTI profiler configuration blob and counter data image prefix -bool getProfilerConfigImage( - const std::string& chipName, - const std::vector& metricNames, - std::vector& configImage, - const uint8_t* counterAvailabilityImage = nullptr); - -// Setup CUPTI profiler configuration blob and counter data image prefix -bool getCounterDataPrefixImage( - const std::string& chipName, - const std::vector& metricNames, - std::vector& counterDataImagePrefix); - -/* NV Perf Metric Evaluation helpers - * - utilities to read binary data and obtain metrics for ranges - */ -CuptiProfilerResult evalMetricValues( - const std::string& chipName, - const std::vector& counterDataImage, - const std::vector& metricNames, - bool verbose = false); - - -} // namespace nvperf -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.cpp deleted file mode 100644 index e5f18ed7b0b..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include -#ifdef HAS_CUPTI -#include -#include -#endif // HAS_CUPTI -#include -#include - -#ifdef HAS_CUPTI -#include "cupti_call.h" -#endif - -#include "time_since_epoch.h" -#include "Logger.h" -#include "Demangle.h" - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "CuptiCallbackApiMock.h" -#include "CuptiRangeProfilerApi.h" - -#if HAS_CUPTI_RANGE_PROFILER -#include -#include -#include "cupti_call.h" -#endif // HAS_CUPTI_RANGE_PROFILER - -namespace KINETO_NAMESPACE { - -#if HAS_CUPTI_RANGE_PROFILER -constexpr char kRootUserRangeName[] = "__profile__"; -constexpr int kCallbacksCountToFlush = 500; - -// Should we set Counter availability image ourselves? -// Disabled this right now as this call conflicts with DCGM -// It is not clear why it should conflict except it being a profiler API call -// TODO Revisit -constexpr bool kSetCounterAvail = false; - -// Shared state to track one Cupti Profiler API per Device -namespace { -// per device profiler maps -std::unordered_map profiler_map; -std::unordered_map enable_flag; -std::unordered_map disable_flag; - -std::mutex contextMutex_; -std::unordered_map ctx_to_dev; -std::set active_devices; -} - -// forward declarations -void __trackCudaCtx(CUcontext ctx, uint32_t device_id, CUpti_CallbackId cbid); -void __trackCudaKernelLaunch(CUcontext ctx, const char* kernelName); - -/// Helper functions - -// Available raw counters -std::vector getCounterAvailiability(CUcontext cuContext) { - std::vector counterAvailabilityImage; - CUpti_Profiler_GetCounterAvailability_Params getCounterAvailabilityParams = { - CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, nullptr}; - getCounterAvailabilityParams.ctx = cuContext; - CUPTI_CALL( - cuptiProfilerGetCounterAvailability(&getCounterAvailabilityParams)); - - counterAvailabilityImage.clear(); - counterAvailabilityImage.resize( - getCounterAvailabilityParams.counterAvailabilityImageSize); - - getCounterAvailabilityParams.pCounterAvailabilityImage = - counterAvailabilityImage.data(); - CUPTI_CALL( - cuptiProfilerGetCounterAvailability(&getCounterAvailabilityParams)); - - return counterAvailabilityImage; -} - -std::string getChipName(int deviceId) { - // Get chip name for the cuda device - CUpti_Device_GetChipName_Params getChipNameParams = { - CUpti_Device_GetChipName_Params_STRUCT_SIZE, nullptr}; - - getChipNameParams.deviceIndex = deviceId; - CUPTI_CALL(cuptiDeviceGetChipName(&getChipNameParams)); - - return getChipNameParams.pChipName; -} - -inline uint32_t getDevID(CUcontext ctx) { - uint32_t device_id = UINT32_MAX; - CUPTI_CALL(cuptiGetDeviceId(ctx, &device_id)); - if (device_id == UINT32_MAX) { - LOG(ERROR) << "Could not determine dev id for = " << ctx; - } - return device_id; -} - -// We use CUPTI Callback functions in three ways : -// 1. Track cuda contexts and maintain a list of active GPUs to profile -// 2. Callbacks on kernel launches to track the name of automatic -// ranges that correspond to names of kernels -// 3. Lastly CUPTI profiler has to be enabled on the same thread executing -// the CUDA kernels. We use Callbacks to enable the profiler -// asynchronously from another thread. - -void disableKernelCallbacks(); - -void trackCudaCtx( - CUpti_CallbackDomain /*domain*/, - CUpti_CallbackId cbid, - const CUpti_CallbackData* cbInfo) { - auto *d = reinterpret_cast(cbInfo); - auto ctx = d->context; - uint32_t device_id = getDevID(ctx); - - if (device_id == UINT32_MAX) { - return; - } - - __trackCudaCtx(ctx, device_id, cbid); -} - -void __trackCudaCtx(CUcontext ctx, uint32_t device_id, CUpti_CallbackId cbid) { - std::lock_guard g(contextMutex_); - if (cbid == CUPTI_CBID_RESOURCE_CONTEXT_CREATED) { - VLOG(0) << "CUPTI Profiler observed CUDA Context created = " - << ctx << " device id = " << device_id; - active_devices.insert(device_id); - if constexpr (kSetCounterAvail) { - if (active_devices.size() == 1) { - CuptiRBProfilerSession::setCounterAvailabilityImage( - getCounterAvailiability(ctx)); - } - } - ctx_to_dev[ctx] = device_id; - - } else if (cbid == CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING) { - VLOG(0) << "CUPTI Profiler observed CUDA Context destroyed = " - << ctx << " device id = " << device_id; - auto it = active_devices.find(device_id); - if (it != active_devices.end()) { - active_devices.erase(it); - ctx_to_dev.erase(ctx); - } - } -} - -void trackCudaKernelLaunch( - CUpti_CallbackDomain /*domain*/, - CUpti_CallbackId /*cbid*/, - const CUpti_CallbackData* cbInfo) { - VLOG(1) << " Trace : Callback name = " - << (cbInfo->symbolName ? cbInfo->symbolName: "") - << " context ptr = " << cbInfo->context; - auto ctx = cbInfo->context; - // should be in CUPTI_API_ENTER call site - if (cbInfo->callbackSite != CUPTI_API_ENTER) { - return; - } - __trackCudaKernelLaunch(ctx, cbInfo->symbolName); -} - -void __trackCudaKernelLaunch( - CUcontext ctx, - const char* kernelName) { - VLOG(0) << " Tracking kernel name = " << (kernelName ? kernelName : "") - << " context ptr = " << ctx; - - uint32_t device_id = 0; - auto it = ctx_to_dev.find(ctx); - if (it == ctx_to_dev.end()) { - // Warning here could be too noisy - VLOG(0) << " Could not find corresponding device to ctx = " << ctx; - return; - } else { - device_id = it->second; - } - - auto pit = profiler_map.find(device_id); - if (pit == profiler_map.end() || pit->second == nullptr) { - return; - } - auto profiler = pit->second; - - if (enable_flag[device_id]) { - LOG(INFO) << "Callback handler is enabling cupti profiler"; - profiler->startAndEnable(); - enable_flag[device_id] = false; - - } else if (disable_flag[device_id]) { - LOG(INFO) << "Callback handler is disabling cupti profiler"; - profiler->disableAndStop(); - return; - } - - if (profiler->curRange_ == CUPTI_AutoRange) { - profiler->logKernelName(kernelName ? kernelName : "__missing__"); - } - - /* TODO add per kernel time logging - if (measure_per_kernel) { - profiler->kernelStartTs_.push_back( - std::chrono::high_resolution_clock::now()); - } - */ - - // periodically flush profiler data from GPU - if (profiler->numCallbacks_ % kCallbacksCountToFlush == 0) { - profiler->flushCounterData(); - } - profiler->numCallbacks_++; -} - -void enableKernelCallbacks() { - auto& cbapi = CuptiCallbackApi::singleton(); - bool status = cbapi.enableCallback( - CUPTI_CB_DOMAIN_RUNTIME_API, - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000); - if (!status) { - LOG(WARNING) << "CUPTI Range Profiler unable to " - << "enable cuda kernel launch callback"; - return; - } - LOG(INFO) << "CUPTI Profiler kernel callbacks enabled"; -} - -void disableKernelCallbacks() { - auto& cbapi = CuptiCallbackApi::singleton(); - bool status = cbapi.disableCallback( - CUPTI_CB_DOMAIN_RUNTIME_API, - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000); - if (!status) { - LOG(WARNING) << "CUPTI Range Profiler unable to " - << "disable cuda kernel launch callback"; - return; - } - LOG(INFO) << "CUPTI Profiler kernel callbacks disabled"; -} - -// static -std::set CuptiRBProfilerSession::getActiveDevices() { - std::lock_guard g(contextMutex_); - return active_devices; -} - -// static -void CuptiRBProfilerSession::initCupti() { - CUpti_Profiler_Initialize_Params profilerInitializeParams = { - CUpti_Profiler_Initialize_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerInitialize(&profilerInitializeParams)); -} - -// static -void CuptiRBProfilerSession::deInitCupti() { - CUpti_Profiler_DeInitialize_Params profilerDeInitializeParams = { - CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerDeInitialize(&profilerDeInitializeParams)); -} - -// static -void CuptiRBProfilerSession::staticInit() { - CuptiRBProfilerSession::initCupti(); - - // Register CUPTI callbacks - auto& cbapi = CuptiCallbackApi::singleton(); - CUpti_CallbackDomain domain = CUPTI_CB_DOMAIN_RESOURCE; - bool status = cbapi.registerCallback( - domain, CuptiCallbackApi::RESOURCE_CONTEXT_CREATED, trackCudaCtx); - status = status && cbapi.registerCallback( - domain, CuptiCallbackApi::RESOURCE_CONTEXT_DESTROYED, trackCudaCtx); - status = status && cbapi.enableCallback( - domain, CUPTI_CBID_RESOURCE_CONTEXT_CREATED); - status = status && cbapi.enableCallback( - domain, CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING); - - if (!status) { - LOG(WARNING) << "CUPTI Range Profiler unable to attach cuda context " - << "create and destroy callbacks"; - CUPTI_CALL(cbapi.getCuptiStatus()); - return; - } - - domain = CUPTI_CB_DOMAIN_RUNTIME_API; - status = cbapi.registerCallback( - domain, CuptiCallbackApi::CUDA_LAUNCH_KERNEL, trackCudaKernelLaunch); - - if (!status) { - LOG(WARNING) << "CUPTI Range Profiler unable to attach cuda kernel " - << "launch callback"; - return; - } -} - -// static -std::vector& CuptiRBProfilerSession::counterAvailabilityImage() { - static std::vector counterAvailabilityImage_; - return counterAvailabilityImage_; -} - - -// Setup the profiler sessions -CuptiRBProfilerSession::CuptiRBProfilerSession( - const std::vector& metricNames, - int deviceId, - int maxRanges, - int numNestingLevels, - CUcontext cuContext) - : metricNames_(metricNames), - chipName_(getChipName(deviceId)), - deviceId_(deviceId), - maxRanges_(maxRanges), - numNestingLevels_(numNestingLevels), - cuContext_(cuContext) { - CuptiRBProfilerSession::initCupti(); - - LOG(INFO) << "Initializing CUPTI profiler session : device = " << deviceId - << " chip = " << chipName_; - /* Generate configuration for metrics, this can also be done offline*/ - NVPW_InitializeHost_Params initializeHostParams = { - NVPW_InitializeHost_Params_STRUCT_SIZE, nullptr}; - NVPW_CALL(NVPW_InitializeHost(&initializeHostParams)); - - if (metricNames.size()) { - if (!nvperf::getProfilerConfigImage( - chipName_, - metricNames, - configImage, - CuptiRBProfilerSession::counterAvailabilityImage().data())) { - LOG(ERROR) << "Failed to create configImage or counterDataImagePrefix"; - return; - } - if (!nvperf::getCounterDataPrefixImage( - chipName_, - metricNames, - counterDataImagePrefix)) { - LOG(ERROR) << "Failed to create counterDataImagePrefix"; - return; - } - } else { - LOG(ERROR) << "No metrics provided to profile"; - return; - } - - if (!createCounterDataImage()) { - LOG(ERROR) << "Failed to create counterDataImage"; - return; - } - - LOG(INFO) << "Size of structs\n" - << " config image size = " << configImage.size() << " B" - << " counter data image prefix = " - << counterDataImagePrefix.size() << " B" - << " counter data image size = " << counterDataImage.size() / 1024 - << " KB" - << " counter sb image size = " - << counterDataScratchBuffer.size() << " B"; - - beginPassParams_ = {CUpti_Profiler_BeginPass_Params_STRUCT_SIZE, nullptr}; - endPassParams_ = {CUpti_Profiler_EndPass_Params_STRUCT_SIZE, nullptr}; - - initSuccess_ = true; - profiler_map[deviceId] = this; -} - -// used in unittests only -CuptiRBProfilerSession::CuptiRBProfilerSession(int deviceId, CUcontext ctx) - : deviceId_(deviceId), cuContext_(ctx) { - initSuccess_ = true; - profiler_map[deviceId] = this; -} - -void CuptiRBProfilerSession::startInternal( - CUpti_ProfilerRange profilerRange, - CUpti_ProfilerReplayMode profilerReplayMode) { - LOG(INFO) << "Starting profiler session: profiler range = " - << ((profilerRange == CUPTI_AutoRange) ? "autorange" : "userrange") - << " replay mode = " - << ((profilerReplayMode == CUPTI_KernelReplay) ? "kernel" : "user"); - if (!initSuccess_) { - LOG(WARNING) << __func__ << "() bailing out since initialization failed"; - return; - } - - if (cuContext_ == nullptr) { - for (const auto& it : ctx_to_dev) { - if (it.second == deviceId_) { - cuContext_ = it.first; - break; - } - } - LOG(INFO) << " Cupti Profiler using CUDA context = " << cuContext_; - } - - profilerStartTs_ = std::chrono::high_resolution_clock::now(); - curRange_ = profilerRange; - curReplay_ = profilerReplayMode; - - CUpti_Profiler_BeginSession_Params beginSessionParams = { - CUpti_Profiler_BeginSession_Params_STRUCT_SIZE, nullptr}; - - beginSessionParams.ctx = cuContext_; - beginSessionParams.counterDataImageSize = counterDataImage.size(); - beginSessionParams.pCounterDataImage = counterDataImage.data(); - beginSessionParams.counterDataScratchBufferSize = - counterDataScratchBuffer.size(); - beginSessionParams.pCounterDataScratchBuffer = counterDataScratchBuffer.data(); - beginSessionParams.range = profilerRange; - beginSessionParams.replayMode = profilerReplayMode; - beginSessionParams.maxRangesPerPass = maxRanges_; - beginSessionParams.maxLaunchesPerPass = maxRanges_; - - auto status = CUPTI_CALL(cuptiProfilerBeginSession(&beginSessionParams)); - if (status != CUPTI_SUCCESS) { - LOG(WARNING) << "Failed to start CUPTI profiler"; - initSuccess_ = false; - return; - } - - // Set counter configuration - CUpti_Profiler_SetConfig_Params setConfigParams = { - CUpti_Profiler_SetConfig_Params_STRUCT_SIZE, nullptr}; - - setConfigParams.ctx = cuContext_; - setConfigParams.pConfig = configImage.data(); - setConfigParams.configSize = configImage.size(); - setConfigParams.passIndex = 0; - setConfigParams.minNestingLevel = 1; - setConfigParams.numNestingLevels = numNestingLevels_; - status = CUPTI_CALL(cuptiProfilerSetConfig(&setConfigParams)); - - if (status != CUPTI_SUCCESS) { - LOG(WARNING) << "Failed to configure CUPTI profiler"; - initSuccess_ = false; - return; - } - profilerInitDoneTs_ = std::chrono::high_resolution_clock::now(); - - if (curRange_ == CUPTI_AutoRange) { - enableKernelCallbacks(); - } - profilingActive_ = true; -} - -void CuptiRBProfilerSession::stop() { - if (!initSuccess_) { - LOG(WARNING) << __func__ << "() bailing out since initialization failed"; - return; - } - LOG(INFO) << "Stop profiler session on device = " << deviceId_; - - CUpti_Profiler_UnsetConfig_Params unsetConfigParams = { - CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerUnsetConfig(&unsetConfigParams)); - - CUpti_Profiler_EndSession_Params endSessionParams = { - CUpti_Profiler_EndSession_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerEndSession(&endSessionParams)); - - disableKernelCallbacks(); - - profilerStopTs_ = std::chrono::high_resolution_clock::now(); - profilingActive_ = false; -} - -void CuptiRBProfilerSession::beginPass() { - if (!initSuccess_) { - LOG(WARNING) << __func__ << "() bailing out since initialization failed"; - return; - } - CUPTI_CALL(cuptiProfilerBeginPass(&beginPassParams_)); -} - -bool CuptiRBProfilerSession::endPass() { - if (!initSuccess_) { - LOG(WARNING) << __func__ << "() bailing out since initialization failed"; - return true; - } - CUPTI_CALL(cuptiProfilerEndPass(&endPassParams_)); - return endPassParams_.allPassesSubmitted; -} - -void CuptiRBProfilerSession::flushCounterData() { - LOG(INFO) << "Flushing counter data on device = " << deviceId_; - CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = { - CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerFlushCounterData(&flushCounterDataParams)); -} - -/// Enable and disable the profiler -void CuptiRBProfilerSession::enable() { - if (!initSuccess_) { - LOG(WARNING) << __func__ << "() bailing out since initialization failed"; - return; - } - CUpti_Profiler_EnableProfiling_Params enableProfilingParams = { - CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerEnableProfiling(&enableProfilingParams)); -} - -void CuptiRBProfilerSession::disable() { - if (!initSuccess_) { - LOG(WARNING) << __func__ << "() bailing out since initialization failed"; - return; - } - CUpti_Profiler_DisableProfiling_Params disableProfilingParams = { - CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerDisableProfiling(&disableProfilingParams)); -} - -/// User range based profiling -void CuptiRBProfilerSession::pushRange(const std::string& rangeName) { - LOG(INFO) << " CUPTI pushrange ( " << rangeName << " )"; - CUpti_Profiler_PushRange_Params pushRangeParams = { - CUpti_Profiler_PushRange_Params_STRUCT_SIZE, nullptr}; - pushRangeParams.pRangeName = rangeName.c_str(); - CUPTI_CALL(cuptiProfilerPushRange(&pushRangeParams)); -} - -void CuptiRBProfilerSession::popRange() { - LOG(INFO) << " CUPTI pop range"; - CUpti_Profiler_PopRange_Params popRangeParams = { - CUpti_Profiler_PopRange_Params_STRUCT_SIZE, nullptr}; - CUPTI_CALL(cuptiProfilerPopRange(&popRangeParams)); -} - -void CuptiRBProfilerSession::startAndEnable() { - startInternal(curRange_, curReplay_); - if (curReplay_ == CUPTI_UserReplay) { - beginPass(); - } - enable(); - if (curRange_ == CUPTI_UserRange) { - pushRange(kRootUserRangeName); - } - enable_flag[deviceId_] = false; -} - -void CuptiRBProfilerSession::disableAndStop() { - if (curRange_ == CUPTI_UserRange) { - popRange(); - } - disable(); - if (curReplay_ == CUPTI_UserReplay) { - endPass(); - flushCounterData(); - } - stop(); - disable_flag[deviceId_] = false; -} - -void CuptiRBProfilerSession::asyncStartAndEnable( - CUpti_ProfilerRange profilerRange, - CUpti_ProfilerReplayMode profilerReplayMode) { - LOG(INFO) << "Starting CUPTI profiler asynchronously on device = " - << deviceId_ << " profiler range = " - << ((profilerRange == CUPTI_AutoRange) ? "autorange" : "userrange") - << " replay mode = " - << ((profilerReplayMode == CUPTI_KernelReplay) ? "kernel" : "user"); - curReplay_ = profilerReplayMode; - curRange_ = profilerRange; - enable_flag[deviceId_] = true; - enableKernelCallbacks(); -} - -void CuptiRBProfilerSession::asyncDisableAndStop() { - LOG(INFO) << "Stopping CUPTI profiler asynchronously on device = " - << deviceId_ << " cu context = " << cuContext_; - disable_flag[deviceId_] = true; -} - - -CuptiProfilerResult CuptiRBProfilerSession::evaluateMetrics( - bool verbose) { - if (!initSuccess_) { - LOG(WARNING) << "Profiling failed, no results to return"; - return {}; - } - if (profilingActive_) { - disableAndStop(); - } - - LOG(INFO) << "Total kernels logged = " << kernelNames_.size(); - if (verbose) { - for (const auto& kernel : kernelNames_) { - std::cout << demangle(kernel) << std::endl; - } - LOG(INFO) << "Profiler Range data : "; - } - - auto results = nvperf::evalMetricValues( - chipName_, counterDataImage, metricNames_, verbose /*verbose*/); - - // profiler end-end duration - auto duration_ms = std::chrono::duration_cast( - profilerStopTs_ - profilerStartTs_); - - auto init_dur_ms = std::chrono::duration_cast( - profilerInitDoneTs_ - profilerStartTs_); - LOG(INFO) << "Total profiler time = " << duration_ms.count() << " ms"; - LOG(INFO) << "Total profiler init time = " << init_dur_ms.count() << " ms"; - - return results; -} - -std::unique_ptr CuptiRBProfilerSession::getProfilerTraceSpan() { - return std::make_unique( - timeSinceEpoch(profilerStartTs_), - timeSinceEpoch(profilerStopTs_), - "__cupti_profiler__" - ); -} - -void CuptiRBProfilerSession::saveCounterData( - const std::string& /*CounterDataFileName*/, - const std::string& /*CounterDataSBFileName*/) { - /* TBD write binary files for counter data and counter scratch buffer */ -} - -/// Setup counter data -bool CuptiRBProfilerSession::createCounterDataImage() { - CUpti_Profiler_CounterDataImageOptions counterDataImageOptions; - counterDataImageOptions.pCounterDataPrefix = counterDataImagePrefix.data(); - counterDataImageOptions.counterDataPrefixSize = counterDataImagePrefix.size(); - counterDataImageOptions.maxNumRanges = maxRanges_; - counterDataImageOptions.maxNumRangeTreeNodes = maxRanges_; - counterDataImageOptions.maxRangeNameLength = 64; - - // Calculate size of counter data image - CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = { - CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE, nullptr}; - calculateSizeParams.pOptions = &counterDataImageOptions; - calculateSizeParams.sizeofCounterDataImageOptions = - CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; - - CUPTI_CALL( - cuptiProfilerCounterDataImageCalculateSize(&calculateSizeParams)); - counterDataImage.resize(calculateSizeParams.counterDataImageSize); - - // Initialize counter data image - CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams = { - CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE, nullptr}; - initializeParams.sizeofCounterDataImageOptions = - CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE; - initializeParams.pOptions = &counterDataImageOptions; - initializeParams.counterDataImageSize = - calculateSizeParams.counterDataImageSize; - initializeParams.pCounterDataImage = counterDataImage.data(); - CUPTI_CALL(cuptiProfilerCounterDataImageInitialize(&initializeParams)); - - // Calculate counter Scratch Buffer size - CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params - scratchBufferSizeParams = { - CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE, nullptr}; - - scratchBufferSizeParams.counterDataImageSize = - calculateSizeParams.counterDataImageSize; - scratchBufferSizeParams.pCounterDataImage = - initializeParams.pCounterDataImage; - CUPTI_CALL(cuptiProfilerCounterDataImageCalculateScratchBufferSize( - &scratchBufferSizeParams)); - - counterDataScratchBuffer.resize( - scratchBufferSizeParams.counterDataScratchBufferSize); - - // Initialize scratch buffer - CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params - initScratchBufferParams = { - CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE, nullptr}; - - initScratchBufferParams.counterDataImageSize = - calculateSizeParams.counterDataImageSize; - - initScratchBufferParams.pCounterDataImage = - initializeParams.pCounterDataImage; - initScratchBufferParams.counterDataScratchBufferSize = - scratchBufferSizeParams.counterDataScratchBufferSize; - initScratchBufferParams.pCounterDataScratchBuffer = - counterDataScratchBuffer.data(); - - CUPTI_CALL(cuptiProfilerCounterDataImageInitializeScratchBuffer( - &initScratchBufferParams)); - - return true; -} - -#elif defined(HAS_CUPTI) - -// Create empty stubs for the API when CUPTI is not present. -CuptiRBProfilerSession::CuptiRBProfilerSession( - const std::vector& metricNames, - int deviceId, - int maxRanges, - int numNestingLevels, - CUcontext cuContext) - : metricNames_(metricNames), - deviceId_(deviceId), - maxRanges_(maxRanges), - numNestingLevels_(numNestingLevels), - cuContext_(cuContext) {} -void CuptiRBProfilerSession::stop() {} -void CuptiRBProfilerSession::enable() {} -void CuptiRBProfilerSession::disable() {} -void CuptiRBProfilerSession::beginPass() {} -bool CuptiRBProfilerSession::endPass() { return true; } -void CuptiRBProfilerSession::flushCounterData() {} -void CuptiRBProfilerSession::pushRange(const std::string& /*rangeName*/) {} -void CuptiRBProfilerSession::popRange() {} -void CuptiRBProfilerSession::asyncStartAndEnable( - CUpti_ProfilerRange /*profilerRange*/, - CUpti_ProfilerReplayMode /*profilerReplayMode*/) {} -void CuptiRBProfilerSession::asyncDisableAndStop() {} -CuptiProfilerResult CuptiRBProfilerSession::evaluateMetrics(bool verbose) { - static CuptiProfilerResult res; - return res; -}; -void CuptiRBProfilerSession::saveCounterData( - const std::string& /*CounterDataFileName*/, - const std::string& /*CounterDataSBFileName*/) {} -void CuptiRBProfilerSession::initCupti() {} -void CuptiRBProfilerSession::deInitCupti() {} -void CuptiRBProfilerSession::staticInit() {} -bool CuptiRBProfilerSession::createCounterDataImage() { return true; } -void CuptiRBProfilerSession::startInternal( - CUpti_ProfilerRange /*profilerRange*/, - CUpti_ProfilerReplayMode /*profilerReplayMode*/) {} -std::vector& CuptiRBProfilerSession::counterAvailabilityImage() { - static std::vector _vec; - return _vec; -} -#endif // HAS_CUPTI_RANGE_PROFILER - -namespace testing { - -void trackCudaCtx(CUcontext ctx, uint32_t device_id, CUpti_CallbackId cbid) { -#if HAS_CUPTI_RANGE_PROFILER - __trackCudaCtx(ctx, device_id, cbid); -#endif // HAS_CUPTI_RANGE_PROFILER -} - -void trackCudaKernelLaunch(CUcontext ctx, const char* kernelName) { -#if HAS_CUPTI_RANGE_PROFILER - __trackCudaKernelLaunch(ctx, kernelName); -#endif // HAS_CUPTI_RANGE_PROFILER -} - -} // namespace testing -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.h b/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.h deleted file mode 100644 index 98a0b3ea5f4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerApi.h +++ /dev/null @@ -1,220 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#ifdef HAS_CUPTI -#include -#include -// Using CUDA 11 and above due to usage of API: cuptiProfilerGetCounterAvailability. -#if defined(CUDART_VERSION) && CUDART_VERSION >= 10000 && CUDART_VERSION < 11040 && CUDA_VERSION >= 11000 -#define HAS_CUPTI_RANGE_PROFILER 1 -#endif // CUDART_VERSION > 10.00 and < 11.04 && CUDA_VERSION >= 11.00 -#endif // HAS_CUPTI - -#if HAS_CUPTI_RANGE_PROFILER -#include -#include -#include -#else -using CUpti_ProfilerRange = enum -{ - CUPTI_AutoRange, - CUPTI_UserRange, -}; - -using CUpti_ProfilerReplayMode = enum -{ - CUPTI_KernelReplay, - CUPTI_UserReplay, -}; -#endif // HAS_CUPTI_RANGE_PROFILER - -#include -#include -#include -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "TraceSpan.h" -#include "CuptiCallbackApi.h" -#include "CuptiNvPerfMetric.h" - -/* Cupti Range based profiler session - * See : https://docs.nvidia.com/cupti/Cupti/r_main.html#r_profiler - */ - -namespace KINETO_NAMESPACE { - -class CuptiRBProfilerSession { - public: - // Initialize and configure CUPTI Profiler counters. - // - Metric names must be provided as string vector. - // - Supported values by CUPTI can be found at - - // https://docs.nvidia.com/cupti/Cupti/r_main.html#r_host_metrics_api - explicit CuptiRBProfilerSession( - const std::vector& metricNames, - int deviceId, - int maxRanges, - int numNestingLevels = 1, - CUcontext cuContext = 0); - - virtual ~CuptiRBProfilerSession() = default; - - // Start profiling session - // This function has to be called from the CPU thread running - // the CUDA context. If this is not the case asyncStartAndEnable() - // can be used - void start( - CUpti_ProfilerRange profilerRange = CUPTI_AutoRange, - CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_KernelReplay) { - startInternal(profilerRange, profilerReplayMode); - } - - // Stop profiling session - virtual void stop(); - - virtual void enable(); - virtual void disable(); - - // Profiler passes - // GPU hardware has limited performance monitoring resources - // the CUPTI profiler may need to run multiple passes to collect - // data for a given range - // If we use kernel replay model the kernels are automatically replayed - // else, you can use the beginPass() and endPass() functions below - // for user to manage the replays - - // starts a profiler pass with given kernels in between - virtual void beginPass(); - - // end a profiler pass with given kernels in between - // returns true if no more passes are required - virtual bool endPass(); - - // flushes the counter data - required if you use user replay - virtual void flushCounterData(); - - // Each pass can contain multiple of ranges - // metrics configured in a pass are collected per each range-stack. - virtual void pushRange(const std::string& rangeName); - virtual void popRange(); - - // utilities for common operations - void startAndEnable(); - void disableAndStop(); - - // Async APIs : these will can be called from another thread - // outside the CUDA context being profiled - void asyncStartAndEnable( - CUpti_ProfilerRange profilerRange = CUPTI_AutoRange, - CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_KernelReplay); - void asyncDisableAndStop(); - - void printMetrics() { - evaluateMetrics(true); - } - - std::unique_ptr getProfilerTraceSpan(); - - virtual CuptiProfilerResult evaluateMetrics(bool verbose = false); - - void saveCounterData( - const std::string& CounterDataFileName, - const std::string& CounterDataSBFileName); - - // This is not thread safe so please only call after - // profiling has stopped - const std::vector& getKernelNames() const { - return kernelNames_; - } - - int deviceId() const { - return deviceId_; - } - - bool profilingActive() const { - return profilingActive_; - } - - static std::set getActiveDevices(); - - static void initCupti(); - - static void deInitCupti(); - - static void staticInit(); - - static void setCounterAvailabilityImage(std::vector img) { - counterAvailabilityImage() = img; - } - protected: - CuptiRBProfilerSession(int deviceId, CUcontext ctx); - - virtual void startInternal( - CUpti_ProfilerRange profilerRange, - CUpti_ProfilerReplayMode profilerReplayMode); - - CUpti_ProfilerRange curRange_ = CUPTI_AutoRange; - CUpti_ProfilerReplayMode curReplay_ = CUPTI_KernelReplay; - - private: - - bool createCounterDataImage(); - - - // log kernel name that used with callbacks - void logKernelName(const char* kernel) { - std::lock_guard lg(kernelNamesMutex_); - kernelNames_.emplace_back(kernel); - } - - std::vector metricNames_; - std::string chipName_; - - uint32_t deviceId_ = 0; - int maxRanges_; - int numNestingLevels_; - CUcontext cuContext_; - - - // data buffers for configuration and counter data collection - std::vector counterDataImagePrefix; - std::vector configImage; - std::vector counterDataImage; - std::vector counterDataScratchBuffer; - - std::chrono::time_point profilerStartTs_; - std::chrono::time_point - profilerInitDoneTs_; - std::chrono::time_point profilerStopTs_; - - std::mutex kernelNamesMutex_; - // raw kernel names (not demangled) - std::vector kernelNames_; - - uint32_t numCallbacks_ = 0; - - static std::vector& counterAvailabilityImage(); - -#if HAS_CUPTI_RANGE_PROFILER - CUpti_Profiler_BeginPass_Params beginPassParams_; - CUpti_Profiler_EndPass_Params endPassParams_; -#endif - - bool initSuccess_ = false; - bool profilingActive_ = false; - - friend void __trackCudaKernelLaunch(CUcontext ctx, const char* kernelName); -}; - -// called directly only in unit tests -namespace testing { - -void trackCudaCtx(CUcontext ctx, uint32_t device_id, CUpti_CallbackId cbid); -void trackCudaKernelLaunch(CUcontext ctx, const char* kernelName); - -} // namespace testing - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.cpp b/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.cpp deleted file mode 100644 index 04b1ad0cb3f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include - -#include -#include - -#include -#include - -using namespace std::chrono; - -namespace KINETO_NAMESPACE { - -// number of ranges affect the size of counter data binary used by -// the CUPTI Profiler. these defaults can be tuned -constexpr int KMaxAutoRanges = 1500; // supports 1500 kernels -constexpr int KMaxUserRanges = 10; // enable upto 10 sub regions marked by user - -constexpr char kCuptiProfilerMetricsKey[] = "CUPTI_PROFILER_METRICS"; -constexpr char kCuptiProfilerPerKernelKey[] = "CUPTI_PROFILER_ENABLE_PER_KERNEL"; -constexpr char kCuptiProfilerMaxRangesKey[] = "CUPTI_PROFILER_MAX_RANGES"; - -CuptiRangeProfilerConfig::CuptiRangeProfilerConfig(Config& cfg) - : parent_(&cfg), - cuptiProfilerPerKernel_(false), - cuptiProfilerMaxRanges_(0) {} - -bool CuptiRangeProfilerConfig::handleOption(const std::string& name, std::string& val) { - VLOG(0) << " handling : " << name << " = " << val; - // Cupti Range based Profiler configuration - if (!name.compare(kCuptiProfilerMetricsKey)) { - activitiesCuptiMetrics_ = splitAndTrim(val, ','); - } else if (!name.compare(kCuptiProfilerPerKernelKey)) { - cuptiProfilerPerKernel_ = toBool(val); - } else if (!name.compare(kCuptiProfilerMaxRangesKey)) { - cuptiProfilerMaxRanges_ = toInt64(val); - } else { - return false; - } - return true; -} - -void CuptiRangeProfilerConfig::setDefaults() { - if (activitiesCuptiMetrics_.size() > 0 && cuptiProfilerMaxRanges_ == 0) { - cuptiProfilerMaxRanges_ = - cuptiProfilerPerKernel_ ? KMaxAutoRanges : KMaxUserRanges; - } -} - -void CuptiRangeProfilerConfig::printActivityProfilerConfig(std::ostream& s) const { - if (activitiesCuptiMetrics_.size() > 0) { - s << "Cupti Profiler metrics : " - << fmt::format("{}", fmt::join(activitiesCuptiMetrics_, ", ")) << std::endl; - s << "Cupti Profiler measure per kernel : " - << cuptiProfilerPerKernel_ << std::endl; - s << "Cupti Profiler max ranges : " << cuptiProfilerMaxRanges_ << std::endl; - } -} - -void CuptiRangeProfilerConfig::registerFactory() { - Config::addConfigFactory( - kCuptiProfilerConfigName, - [](Config& cfg) { return new CuptiRangeProfilerConfig(cfg); }); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.h b/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.h deleted file mode 100644 index 549b8a4e8b4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/CuptiRangeProfilerConfig.h +++ /dev/null @@ -1,86 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include "Config.h" - -#include -#include -#include -#include - -namespace KINETO_NAMESPACE { - -constexpr char kCuptiProfilerConfigName[] = "cupti_rb_profiler"; - -class CuptiRangeProfilerConfig : public AbstractConfig { - public: - bool handleOption(const std::string& name, std::string& val) override; - - void validate( - const std::chrono::time_point& - fallbackProfileStartTime) override {} - - static CuptiRangeProfilerConfig& get(const Config& cfg) { - return dynamic_cast(cfg.feature( - kCuptiProfilerConfigName)); - } - - Config& parent() const { - return *parent_; - } - - std::vector activitiesCuptiMetrics() const { - return activitiesCuptiMetrics_; - } - - bool cuptiProfilerPerKernel() const { - return cuptiProfilerPerKernel_; - } - - int64_t cuptiProfilerMaxRanges() const { - return cuptiProfilerMaxRanges_; - } - - void setSignalDefaults() override { - setDefaults(); - } - - void setClientDefaults() override { - setDefaults(); - } - - void printActivityProfilerConfig(std::ostream& s) const override; - - static void registerFactory(); - protected: - AbstractConfig* cloneDerived(AbstractConfig& parent) const override { - CuptiRangeProfilerConfig* clone = new CuptiRangeProfilerConfig(*this); - clone->parent_ = dynamic_cast(&parent); - return clone; - } - - private: - CuptiRangeProfilerConfig() = delete; - explicit CuptiRangeProfilerConfig(Config& parent); - explicit CuptiRangeProfilerConfig( - const CuptiRangeProfilerConfig& other) = default; - - // some defaults will depend on other configuration - void setDefaults(); - - // Associated Config object - Config* parent_; - - // Counter metrics exposed via CUPTI Profiler API - std::vector activitiesCuptiMetrics_; - - // Collect profiler metrics per kernel - autorange made - bool cuptiProfilerPerKernel_{false}; - - // max number of ranges to configure the profiler for. - // this has to be set before hand to reserve space for the output - int64_t cuptiProfilerMaxRanges_ = 0; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/DaemonConfigLoader.h b/plugins/tensorboard-plugins/libkineto/src/DaemonConfigLoader.h deleted file mode 100644 index 9b0ed928636..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/DaemonConfigLoader.h +++ /dev/null @@ -1,27 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include - -namespace KINETO_NAMESPACE { - -class DaemonConfigLoader { - public: - virtual ~DaemonConfigLoader() {} - - // Return the base config from the daemon - virtual std::string readBaseConfig() = 0; - - // Return a configuration string from the daemon, if one has been posted. - virtual std::string readOnDemandConfig(bool events, bool activities) = 0; - - // Returns the number of tracked contexts for this device. The daemon has a - // global view. If an unexpedted error occurs, return -1. - virtual int gpuContextCount(uint32_t device) = 0; - - virtual void setCommunicationFabric(bool enabled) = 0; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/Demangle.cpp b/plugins/tensorboard-plugins/libkineto/src/Demangle.cpp deleted file mode 100644 index f84f0b8ec36..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/Demangle.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "Demangle.h" - -#ifndef _MSC_VER -#include -#endif -#include -#include - -namespace KINETO_NAMESPACE { - -static constexpr int kMaxSymbolSize = 1024; - -std::string demangle(const char* name) { -#ifndef _MSC_VER - if (!name) { - return ""; - } - - if (strlen(name) > kMaxSymbolSize) { - return name; - } - - int status; - size_t len = 0; - char* demangled = abi::__cxa_demangle(name, nullptr, &len, &status); - if (status != 0) { - return name; - } - std::string res(demangled); - // The returned buffer must be freed! - free(demangled); - return res; -#else - // TODO: demangling on Windows - if (!name) { - return ""; - } else { - return name; - } -#endif -} - -std::string demangle(const std::string& name) { - return demangle(name.c_str()); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/Demangle.h b/plugins/tensorboard-plugins/libkineto/src/Demangle.h deleted file mode 100644 index 6dcf0776f1a..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/Demangle.h +++ /dev/null @@ -1,12 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -namespace KINETO_NAMESPACE { - -std::string demangle(const char* name); -std::string demangle(const std::string& name); - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/EventProfiler.cpp b/plugins/tensorboard-plugins/libkineto/src/EventProfiler.cpp deleted file mode 100644 index dbf27552389..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/EventProfiler.cpp +++ /dev/null @@ -1,635 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "EventProfiler.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "CuptiEventApi.h" -#include "Logger.h" - -using namespace std::chrono; -using std::accumulate; -using std::endl; -using std::map; -using std::ostream; -using std::string; -using std::unique_ptr; -using std::vector; - -namespace KINETO_NAMESPACE { - -static std::mutex& logMutex() { - static std::mutex instance; - return instance; -} - -// --------------------------------------------------------------------- -// class Event -// --------------------------------------------------------------------- - -// Compute domain instance percentiles -PercentileList& Event::percentiles( - PercentileList& pcs, - const SampleSlice& slice) const { - vector instance_values; - instance_values.reserve(instanceCount); - for (int i = 0; i < instanceCount; i++) { - instance_values.push_back(sumInstance(i, slice)); - } - return KINETO_NAMESPACE::percentiles(instance_values, pcs); -} - -// Add up all samples for a given domain instance -int64_t Event::sumInstance(int i, const SampleSlice& slice) const { - auto r = toIdxRange(slice); - auto start = samples_.cbegin(); - std::advance(start, r.first); - auto end = start; - std::advance(end, r.second); - return accumulate(start, end, 0ul, [i](int64_t a, const Sample& b) { - return a + b.second[i]; - }); -} - -// Add up all samples across all domain instances -int64_t Event::sumAll(const SampleSlice& slice) const { - int64_t res = 0; - for (int i = 0; i < instanceCount; i++) { - res += sumInstance(i, slice); - } - return res; -} - -// Print raw sample values for all domains -void Event::printSamples(ostream& s, CUdevice device) const { - // Don't mess up output with interleaved lines - // Probably OK to reuse logMutex() here since this is - // used for debugging, but need to keep an eye on it. - std::lock_guard lock(logMutex()); - s << "Device " << device << " " << name << ":" << endl; - for (const auto& sample : samples_) { - const auto& vals = sample.second; - for (int64_t val : vals) { - s << val << " "; - } - s << endl; - } -} - -// --------------------------------------------------------------------- -// class Metric -// --------------------------------------------------------------------- -Metric::Metric( - string name, - CUpti_MetricID id, - vector events, - CUpti_MetricEvaluationMode eval_mode, - CuptiMetricApi& cupti_metrics) - : name(std::move(name)), - id_(id), - events_(std::move(events)), - evalMode_(eval_mode), - cuptiMetrics_(cupti_metrics), - valueKind_(cuptiMetrics_.valueKind(id)) {} - -// Return per-SM vector as well as total -struct Metric::CalculatedValues Metric::calculate( - map& event_map, - nanoseconds sample_duration, - const SampleSlice& slice) { - vector metric_values; - vector ev_values; - ev_values.reserve(events_.size()); - if (evalMode_ & CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE) { - int instance_count = instanceCount(event_map); - metric_values.reserve(instance_count); - for (int i = 0; i < instance_count; i++) { - ev_values.clear(); - for (CUpti_EventID event_id : events_) { - ev_values.push_back(event_map[event_id].sumInstance(i, slice)); - } - metric_values.push_back(cuptiMetrics_.calculate( - id_, valueKind_, events_, ev_values, sample_duration.count())); - } - } - - // FIXME: Check assumption that all instances are profiled - ev_values.clear(); - for (CUpti_EventID event_id : events_) { - ev_values.push_back(event_map[event_id].sumAll(slice)); - } - SampleValue total = cuptiMetrics_.calculate( - id_, valueKind_, events_, ev_values, sample_duration.count()); - if (evalMode_ & CUPTI_METRIC_EVALUATION_MODE_AGGREGATE) { - metric_values.push_back(total); - } - return {metric_values, std::move(total)}; -} - -void Metric::printDescription(ostream& s) const { - s << fmt::format("{} ({})", name, fmt::join(events_, ",")) << endl; -} - -// --------------------------------------------------------------------- -// class EventGroupSet -// --------------------------------------------------------------------- - -// Each domain has a set of counters. -// Some counters in a domain can be collected simultaneously in a "group" -// Counters from different domains can also be collected at the same time -// Therefore we have a "set of groups", or group set, with counters that -// can all be collected at once. -EventGroupSet::EventGroupSet( - CUpti_EventGroupSet& set, - map& events, - CuptiEventApi& cupti) - : set_(set), events_(events), cuptiEvents_(cupti), enabled_(false) { - for (int g = 0; g < set.numEventGroups; g++) { - CUpti_EventGroup grp = set.eventGroups[g]; - // Profile all domain instances - cuptiEvents_.enablePerInstance(grp); - uint32_t instance_count = cuptiEvents_.instanceCount(grp); - for (const auto& id : cuptiEvents_.eventsInGroup(grp)) { - VLOG(0) << "Instance count for " << id << ":" << instance_count; - events_[id].instanceCount = instance_count; - } - } -} - -EventGroupSet::~EventGroupSet() { - // Disable EventGroupSet in Cupti. - if (enabled_) { - setEnabled(false); - } -} - -// Enable or disable this group set -void EventGroupSet::setEnabled(bool enabled) { - if (enabled && !enabled_) { - cuptiEvents_.enableGroupSet(set_); - } else if (!enabled && enabled_) { - cuptiEvents_.disableGroupSet(set_); - } - enabled_ = enabled; -} - -// Collect counter values for each counter in group set -void EventGroupSet::collectSample() { - auto timestamp = system_clock::now(); - for (int g = 0; g < set_.numEventGroups; g++) { - CUpti_EventGroup grp = set_.eventGroups[g]; - for (const auto& id : cuptiEvents_.eventsInGroup(grp)) { - Event& ev = events_[id]; - vector vals(ev.instanceCount); - // FIXME: Use cuptiEventGroupReadAllEvents - cuptiEvents_.readEvent(grp, id, vals); - - if (VLOG_IS_ON(0)) { - for (int64_t v : vals) { - if (v == CUPTI_EVENT_OVERFLOW) { - LOG(WARNING) << "Counter overflow detected " - << "- decrease sample period!" << endl; - } - } - } - - ev.addSample(timestamp, vals); - } - } - - if (VLOG_IS_ON(1)) { - auto t2 = system_clock::now(); - VLOG(1) << "Device " << cuptiEvents_.device() << " Sample (us): " - << duration_cast(t2 - timestamp).count(); - } -} - -// Print names of events in this group set, ordered by group -void EventGroupSet::printDescription(ostream& s) const { - for (int g = 0; g < set_.numEventGroups; g++) { - s << " Events in group " << g << ": "; - for (const auto& id : cuptiEvents_.eventsInGroup(set_.eventGroups[g])) { - s << id << " (" << events_[id].name << ") "; - } - s << endl; - } -} - -// --------------------------------------------------------------------- -// class EventProfiler -// --------------------------------------------------------------------- - -// Find nearest factor of a number by linear search, -// starting at hi and lo - hi searches up and lo searches down -static int nearestFactor(int hi, int lo, int number) { - return number % hi == 0 - ? hi - : number % lo == 0 ? lo : nearestFactor(hi + 1, lo - 1, number); -} - -static int nearestFactor(int count, int max) { - return nearestFactor(count, count, max); -} - -void EventProfiler::initEvents(const std::set& eventNames) { - events_.clear(); - // Build event map - for (const auto& name : eventNames) { - events_.emplace(cuptiEvents_->eventId(name), name); - } -} - -void EventProfiler::initMetrics(const std::set& metricNames) { - metrics_.clear(); - // Add events from metrics - metrics_.reserve(metricNames.size()); - for (const auto& metric_name : metricNames) { - CUpti_MetricID metric_id = cuptiMetrics_->idFromName(metric_name); - if (metric_id == ~0) { - continue; - } - - const auto& events = cuptiMetrics_->events(metric_id); - vector event_ids; - event_ids.reserve(events.size()); - for (const auto& pair : events) { - CUpti_EventID id = pair.first; - const string& event_name = pair.second; - if (event_name.empty()) { - // For unnamed events, use metric name and event id - // FIXME: For subsequent metrics using the same event, - // this will be confusing - events_.emplace(id, metric_name + "_" + event_name); - } else { - events_.emplace(id, event_name); - } - event_ids.push_back(id); - } - metrics_.emplace_back( - metric_name, - metric_id, - event_ids, - cuptiMetrics_->evaluationMode(metric_id), - *cuptiMetrics_); - } -} - -bool EventProfiler::initEventGroups() { - sets_.clear(); - if (eventGroupSets_) { - cuptiEvents_->destroyGroupSets(eventGroupSets_); - eventGroupSets_ = nullptr; - } - if (events_.empty()) { - return true; - } - - // Determine sets of groups to be collected - vector ids; - ids.reserve(events_.size()); - for (const auto& ev : events_) { - ids.push_back(ev.first); - } - eventGroupSets_ = cuptiEvents_->createGroupSets(ids); - VLOG(0) << "Number of group sets: " << eventGroupSets_->numSets; - for (int i = 0; i < eventGroupSets_->numSets; i++) { - sets_.push_back( - EventGroupSet(eventGroupSets_->sets[i], events_, *cuptiEvents_)); - } - return !sets_.empty(); -} - -static unique_ptr alignAndValidateConfigs( - Config& base, - Config* onDemand) { - auto now = system_clock::now(); - if (!onDemand || - now > - (onDemand->eventProfilerOnDemandStartTime() + - onDemand->eventProfilerOnDemandDuration())) { - base.validate(now); - return base.clone(); - } - - auto res = base.clone(); - res->addEvents(onDemand->eventNames()); - res->addMetrics(onDemand->metricNames()); - - int sample_period = - std::min(base.samplePeriod().count(), onDemand->samplePeriod().count()); - if (sample_period < base.samplePeriod().count() && - (base.samplePeriod().count() % sample_period) != 0) { - sample_period = nearestFactor(sample_period, base.samplePeriod().count()); - LOG(WARNING) - << "On-demand sample period must be a factor of base sample period. " - << "Adjusting from " << onDemand->samplePeriod().count() << "ms to " - << sample_period << "ms."; - } - base.setSamplePeriod(milliseconds(sample_period)); - base.validate(now); - res->setSamplePeriod(base.samplePeriod()); - res->setMultiplexPeriod(base.multiplexPeriod()); - res->validate(now); - onDemand->setSamplePeriod(base.samplePeriod()); - onDemand->setMultiplexPeriod(base.multiplexPeriod()); - onDemand->validate(now); - - return res; -} - -static milliseconds minReportPeriod(const Config& config, int num_sets) { - return config.multiplexPeriod() * num_sets; -} - -static bool canSupportReportPeriod(const Config& config, int num_sets) { - // Can we get through the groups an even number per report period? - milliseconds min_report_period = minReportPeriod(config, num_sets); - return (config.reportPeriod().count() % min_report_period.count()) == 0; -} - -static int completeSamplesPerReport(const Config& config, int num_sets) { - if (num_sets <= 1) { - return config.reportPeriod() / config.samplePeriod(); - } - // Numnber of complete sample collections in the report period - // E.g. if report period is 10000ms, sample period 500ms, - // multiplex period 2000ms and num_sets is 5 then # of complete samples is - // (2000ms / 500ms) * (10000ms / 2000ms / 5) = 4 * 1 = 4 - int samples_per_multiplex_period = - config.multiplexPeriod() / config.samplePeriod(); - int multiplex_periods_per_report = - config.reportPeriod() / config.multiplexPeriod(); - return (multiplex_periods_per_report / num_sets) * - samples_per_multiplex_period; -} - -static bool canSupportSamplesPerReport(const Config& config, int num_sets) { - // Can samples per report can be honored with an exact *full* set of samples? - // We don't support partial samples at this point. - int full_samples_per_report = completeSamplesPerReport(config, num_sets); - return (full_samples_per_report % config.samplesPerReport()) == 0; -} - -static void adjustConfig(Config& config, int num_sets) { - // Don't change sample period and multiplex period here, since that can - // cause overflows and perf degradation. Report period and samples per - // report is OK to change (with warning). - if (!canSupportReportPeriod(config, num_sets)) { - milliseconds min_report_period = minReportPeriod(config, num_sets); - LOG(WARNING) << "Report period must be a multiple of " - << min_report_period.count() << "ms (" << num_sets - << " event sets * " << config.multiplexPeriod().count() - << "ms multiplex period), in order to get complete samples."; - auto new_report_period = - Config::alignUp(config.reportPeriod(), min_report_period); - double sf = - ((double)new_report_period.count()) / config.reportPeriod().count(); - int new_samples_per_report = std::round(config.samplesPerReport() * sf); - LOG(WARNING) << "Adjusting report period from " - << config.reportPeriod().count() << "ms to " - << new_report_period.count() << "ms"; - if (new_samples_per_report != config.samplesPerReport()) { - LOG(WARNING) << "Adjusting samples per report from " - << config.samplesPerReport() << " to " - << new_samples_per_report; - } - config.setReportPeriod(new_report_period); - config.setSamplesPerReport(new_samples_per_report); - } - // Ensure that samples per report can be honored with - // an exact *full* set of samples. Don't support partial - // samples at this point. - if (!canSupportSamplesPerReport(config, num_sets)) { - int full_samples_per_report = completeSamplesPerReport(config, num_sets); - int adjusted_count = - nearestFactor(config.samplesPerReport(), full_samples_per_report); - LOG(WARNING) - << "Samples per report must be such that an even number of " - << "complete samples can be aggregated in each report period. Adjusting" - << " from " << config.samplesPerReport() << " to " << adjusted_count - << " (complete sample count is " << full_samples_per_report << ")"; - config.setSamplesPerReport(adjusted_count); - } -} - -// Prepare profiler -EventProfiler::EventProfiler( - std::unique_ptr cupti_events, - std::unique_ptr cupti_metrics, - vector>& loggers, - vector>& onDemandLoggers) - : cuptiEvents_(std::move(cupti_events)), - cuptiMetrics_(std::move(cupti_metrics)), - loggers_(loggers), - onDemandLoggers_(onDemandLoggers) {} - -void EventProfiler::reportSamples() { - dispatchSamples(*config_, loggers_, baseSamples_); - baseSamples_ += completeSamplesPerReport(*config_, sets_.size()); -} - -void EventProfiler::reportOnDemandSamples() { - dispatchSamples(*onDemandConfig_, onDemandLoggers_, onDemandSamples_); - onDemandSamples_ += completeSamplesPerReport(*onDemandConfig_, sets_.size()); -} - -EventProfiler::~EventProfiler() { - if (eventGroupSets_) { - for (auto& set : sets_) { - set.setEnabled(false); - } - cuptiEvents_->destroyGroupSets(eventGroupSets_); - } - VLOG(0) << "Stopped event profiler for device " << device(); -} - -void EventProfiler::updateLoggers(Config& config, Config* on_demand_config) { - // Update loggers. - for (auto& logger : loggers_) { - std::lock_guard lock(logMutex()); - logger->update(config); - } - - if (on_demand_config) { - // Update onDemand loggers. - for (auto& logger : onDemandLoggers_) { - std::lock_guard lock(logMutex()); - logger->update(*on_demand_config); - } - } -} - -bool EventProfiler::applyConfig(const Config& config) { - // Initialize events, metrics, and event group sets. - // TODO: Send warnings / errors back to dyno for onDemand config - try { - if (!initEventsAndMetrics(config)) { - return false; - } - } catch (const std::exception& ex) { - LOG(WARNING) << "Failed to apply config (" << ex.what() << ")"; - return false; - } - - return true; -} - -bool EventProfiler::initEventsAndMetrics(const Config& config) { - initEvents(config.eventNames()); - initMetrics(config.metricNames()); - // We now have the total list of events to collect - // They need to be organized into groups for multiplexing - if (!initEventGroups()) { - LOG(WARNING) << "No events/metrics initialized successfully"; - return false; - } - - if (VLOG_IS_ON(1)) { - printMetrics(LIBKINETO_DBG_STREAM); - printSets(LIBKINETO_DBG_STREAM); - } - return true; -} - -void EventProfiler::printSets(ostream& s) const { - for (int i = 0; i < sets_.size(); i++) { - s << "Set " << i << endl; - sets_[i].printDescription(s); - } -} - -void EventProfiler::printMetrics(ostream& s) const { - s << "Metrics:" << endl; - for (const Metric& m : metrics_) { - m.printDescription(s); - } -} - -void EventProfiler::printAllSamples(ostream& s, CUdevice device) const { - for (const auto& pair : events_) { - const Event& ev = pair.second; - ev.printSamples(s, device); - } -} - -void EventProfiler::enableNextCounterSet() { - if (sets_.size() > 1) { - auto t1 = system_clock::now(); - - VLOG(1) << "Disabling set " << curEnabledSet_; - sets_[curEnabledSet_].setEnabled(false); - curEnabledSet_ = (curEnabledSet_ + 1) % sets_.size(); - VLOG(1) << "Enabling set " << curEnabledSet_; - sets_[curEnabledSet_].setEnabled(true); - - if (VLOG_IS_ON(1)) { - auto t2 = system_clock::now(); - VLOG(1) << "Switch (us): " - << duration_cast(t2 - t1).count(); - } - } -} - -// Notify listeners of collected samples -void EventProfiler::dispatchSamples( - const Config& config, - const vector>& loggers, - int sample_offset) { - Sample sample(events_.size() + metrics_.size()); - // Normalize values to per second - auto delta = config.reportPeriod() / config.samplesPerReport(); - double sf = 1000.0 * sets_.size() / delta.count(); - for (int i = 0; i < config.samplesPerReport(); i++) { - sample.stats.clear(); - sample.deltaMsec = (delta * i).count(); - SampleSlice slice = {sample_offset, i, config.samplesPerReport()}; - VLOG(1) << "Slice: " << sample_offset << ", " << i << ", " - << config.samplesPerReport(); - for (const auto& pair : events_) { - const Event& ev = pair.second; - int64_t total = std::round(sf * ev.sumAll(slice)); - PercentileList pcs = initPercentiles(config.percentiles()); - normalize(ev.percentiles(pcs, slice), sf); - sample.stats.push_back({ev.name, std::move(pcs), SampleValue(total)}); - } - - for (auto& m : metrics_) { - // calculate returns a pair of per-SM vector and a total - auto vals = m.calculate(events_, delta, slice); - PercentileList pcs = initPercentiles(config.percentiles()); - sample.stats.push_back( - {m.name, std::move(percentiles(vals.perInstance, pcs)), vals.total}); - } - - for (auto& logger : loggers) { - std::lock_guard lock(logMutex()); - logger->handleSample(device(), sample, config.ipcFabricEnabled()); - } - } - - if (VLOG_IS_ON(2)) { - printAllSamples(LIBKINETO_DBG_STREAM, device()); - } -} - -void EventProfiler::configure(Config& config, Config* onDemandConfig) { - if (!sets_.empty()) { - sets_[curEnabledSet_].setEnabled(false); - clearSamples(); - } - - config_ = config.clone(); - onDemandConfig_ = onDemandConfig ? onDemandConfig->clone() : nullptr; - mergedConfig_ = alignAndValidateConfigs(*config_, onDemandConfig_.get()); - if (!applyConfig(*mergedConfig_)) { - LOG(WARNING) << "Failed to apply config!"; - mergedConfig_ = config_->clone(); - applyConfig(*config_); - } - if (!sets_.empty()) { - // Make timing adjustments based on multiplexing requirements. - adjustConfig(*config_, sets_.size()); - if (onDemandConfig_) { - int duration = onDemandConfig_->eventProfilerOnDemandDuration().count(); - LOG(INFO) << "On demand profiler activated for " << duration << " secs"; - adjustConfig(*onDemandConfig_, sets_.size()); - } - // If events or metrics were added or removed, need to tell loggers - updateLoggers(*config_, onDemandConfig_.get()); - } - - curEnabledSet_ = 0; - if (!sets_.empty()) { - sets_[0].setEnabled(true); - } else { - VLOG(0) << "No counters profiled!"; - } - - baseSamples_ = 0; - onDemandSamples_ = 0; -} - -void EventProfiler::collectSample() { - if (sets_.empty()) { - return; - } - sets_[curEnabledSet_].collectSample(); - if (VLOG_IS_ON(1)) { - printAllSamples(LIBKINETO_DBG_STREAM, device()); - } -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/EventProfiler.h b/plugins/tensorboard-plugins/libkineto/src/EventProfiler.h deleted file mode 100644 index fafd5b9bb83..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/EventProfiler.h +++ /dev/null @@ -1,341 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "Config.h" -#include "CuptiEventApi.h" -#include "CuptiMetricApi.h" -#include "SampleListener.h" - -namespace KINETO_NAMESPACE { - -// Helper function for computing percentiles (nearest-rank). -// Modifies the input. -template -inline PercentileList& percentiles(std::vector values, PercentileList& pcs) { - auto size = values.size(); - for (auto& x : pcs) { - int idx = std::min(size - 1, (x.first * size) / 100); - std::nth_element(values.begin(), values.begin() + idx, values.end()); - x.second = SampleValue(values[idx]); - } - return pcs; -} - -// Helper function for normalizing a percentile list -// Modifies the input -inline PercentileList& normalize(PercentileList& pcs, double sf) { - for (auto& pc : pcs) { - pc.second *= sf; - } - return pcs; -} - -// A slice of the sample buffer -struct SampleSlice { - // Start offset (samples) - int offset; - // Slice number - int index; - // Out of this many - int count; -}; - -// A sampled event -class Event { - public: - /* implicit */ Event(std::string name) : name(std::move(name)) {} - /* implicit */ Event(const char* name) : name(name) {} - Event() : name("INVALID") {} - - Event(const Event&) = delete; - Event& operator=(const Event&) = delete; - Event(Event&&) = default; - Event& operator=(Event&&) = default; - - void addSample( - std::chrono::time_point timestamp, - const std::vector& values) { - assert(values.size() == instanceCount); - samples_.emplace_back(timestamp, values); - } - - // Sum samples for a single domain instance - int64_t sumInstance(int i, const SampleSlice& slice) const; - - // Sum all samples across all domain instances - int64_t sumAll(const SampleSlice& slice) const; - - // Create list of percentiles - PercentileList& percentiles(PercentileList& pcs, const SampleSlice& slice) - const; - - void eraseSamples(int count) { - auto end = samples_.begin(); - std::advance(end, count); - samples_.erase(samples_.begin(), end); - } - - void clearSamples() { - samples_.clear(); - } - - int sampleCount() { - return samples_.size(); - } - - void printSamples(std::ostream& s, CUdevice device) const; - - // Event name (see nvprof --query-events) - std::string name; - - // Number of domain instances for this event, e.g. number of SMs - int instanceCount = 0; - - private: - std::pair toIdxRange(const SampleSlice& slice) const { - int size = (samples_.size() - slice.offset) / slice.count; - return std::make_pair(slice.offset + (slice.index * size), size); - } - - // List of collected samples, where each sample has values for - // one or more domain instances - using Sample = std::pair< - std::chrono::time_point, - std::vector>; - std::list samples_; -}; - -class Metric { - public: - Metric( - std::string name, - CUpti_MetricID id, - std::vector events, - CUpti_MetricEvaluationMode eval_mode, - CuptiMetricApi& cupti_metrics); - - struct CalculatedValues { - std::vector perInstance; - SampleValue total; - }; - - struct CalculatedValues calculate( - std::map& events, - std::chrono::nanoseconds sample_duration, - const SampleSlice& slice); - - int instanceCount(std::map& events) { - return events[events_[0]].instanceCount; - } - - void printDescription(std::ostream& s) const; - - std::string name; - - private: - CUpti_MetricID id_; - std::vector events_; - CUpti_MetricEvaluationMode evalMode_; - // Calls to CUPTI is encapsulated behind this interface - CuptiMetricApi& cuptiMetrics_; - CUpti_MetricValueKind valueKind_; -}; - -/** - * A set of event groups. - * Holds all the events that may be collected in a single pass. - * A group contains one or more counters for a single domain. - * A group set contains zero or one groups per domain. - */ -class EventGroupSet { - public: - EventGroupSet( - CUpti_EventGroupSet& set, - std::map& events, - CuptiEventApi& cupti); - ~EventGroupSet(); - - EventGroupSet(const EventGroupSet&) = delete; - EventGroupSet& operator=(const EventGroupSet&) = delete; - EventGroupSet(EventGroupSet&&) = default; - EventGroupSet& operator=(EventGroupSet&&) = delete; - - // Number of groups = number of domains profiled - int groupCount() const { - return set_.numEventGroups; - } - - void setEnabled(bool enabled); - // Take a sample of counters in this group set - void collectSample(); - void printDescription(std::ostream& s) const; - - private: - CUpti_EventGroupSet& set_; - std::map& events_; - // Calls to CUPTI is encapsulated behind this interface - CuptiEventApi& cuptiEvents_; - bool enabled_; -}; - -// The sampler -class EventProfiler { - public: - explicit EventProfiler( - std::unique_ptr cupti_events, - std::unique_ptr cupti_metrics, - std::vector>& loggers, - std::vector>& onDemandLoggers); - EventProfiler(const EventProfiler&) = delete; - EventProfiler& operator=(const EventProfiler&) = delete; - ~EventProfiler(); - - void configure(Config& config, Config* onDemandConfig); - - bool isOnDemandActive() { - return !!onDemandConfig_; - } - - // Print the counter sets. Multiple sets will be multiplexed. - void printSets(std::ostream& s) const; - - // Print metrics descriptions - void printMetrics(std::ostream& s) const; - - bool enableForDevice(Config& cfg); - - CUdevice device() { - return cuptiEvents_->device(); - } - - bool setContinuousMode() { - return cuptiEvents_->setContinuousMode(); - } - - std::chrono::milliseconds samplePeriod() { - return mergedConfig_->samplePeriod(); - } - - std::chrono::milliseconds multiplexPeriod() { - return mergedConfig_->multiplexPeriod(); - } - - std::chrono::milliseconds reportPeriod() { - return config_->reportPeriod(); - } - - std::chrono::milliseconds onDemandReportPeriod() { - return onDemandConfig_->reportPeriod(); - } - - // Read values of currently running counters. - void collectSample(); - - void reportSamples(); - void reportOnDemandSamples(); - - bool enabled() { - return sets_.size() > 0; - } - - bool multiplexEnabled() { - return sets_.size() > 1; - } - - // Multiplex counters. - void enableNextCounterSet(); - - void eraseReportedSamples() { - int erase_count = baseSamples_; - if (onDemandConfig_ && - onDemandConfig_->eventProfilerOnDemandDuration().count() > 0) { - erase_count = std::min(baseSamples_, onDemandSamples_); - } - eraseSamples(erase_count); - baseSamples_ -= erase_count; - onDemandSamples_ -= erase_count; - } - - void clearSamples() { - for (auto& pair : events_) { - pair.second.clearSamples(); - } - baseSamples_ = 0; - onDemandSamples_ = 0; - } - - private: - // Functions to initialize profiler based on Config settings. - bool applyConfig(const Config& config); - bool initEventsAndMetrics(const Config& config); - void initEvents(const std::set& eventNames); - void initMetrics(const std::set& metricNames); - bool initEventGroups(); - - PercentileList initPercentiles(const std::vector& percentiles) { - PercentileList res; - res.reserve(percentiles.size()); - for (int p : percentiles) { - res.emplace_back(p, SampleValue(0)); - } - return res; - } - - // Notify listeners of collected samples - void dispatchSamples( - const Config& config, - const std::vector>& loggers, - int report_nr); - - void eraseSamples(int count) { - for (auto& pair : events_) { - pair.second.eraseSamples(count); - } - } - - void updateLoggers(Config& config, Config* on_demand_config); - - // Print all collected samples since last clear. - void printAllSamples(std::ostream& s, CUdevice device) const; - - // Calls to CUPTI is encapsulated behind these interfaces - std::unique_ptr cuptiEvents_; - std::unique_ptr cuptiMetrics_; - // The CUpti API reports event IDs, we must map them to our event objects - std::map events_; - // List of metrics - std::vector metrics_; - // The countert sets needed to collect all counters - std::vector sets_; - // The event group set object returned by Cupti. - // Saved s.t. we can call cuptiEventGroupSetsDestroy to free memory when - // the object is no longer needed. - CUpti_EventGroupSets* eventGroupSets_ = nullptr; - // Current multiplexed counter set - int curEnabledSet_{0}; - - std::unique_ptr config_; - std::unique_ptr onDemandConfig_; - std::unique_ptr mergedConfig_; - int baseSamples_{0}; - int onDemandSamples_{0}; - - // Shared between profiler threads - // Vectors are read-only but calling loggers require lock - const std::vector>& loggers_; - const std::vector>& onDemandLoggers_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/EventProfilerController.cpp b/plugins/tensorboard-plugins/libkineto/src/EventProfilerController.cpp deleted file mode 100644 index 0427cc7a90c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/EventProfilerController.cpp +++ /dev/null @@ -1,423 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "EventProfilerController.h" - -#include -#include -#include - -#include "ConfigLoader.h" -#include "CuptiEventApi.h" -#include "CuptiMetricApi.h" -#include "EventProfiler.h" -#include "output_csv.h" - -#include "Logger.h" -#include "ThreadUtil.h" - -using namespace std::chrono; -using std::unique_ptr; -using std::vector; - -namespace KINETO_NAMESPACE { - -namespace { - -vector(const Config&)>>& -loggerFactories() { - static vector(const Config&)>> - factories; - return factories; -} - -vector(const Config&)>>& -onDemandLoggerFactories() { - static vector(const Config&)>> - factories; - return factories; -} - -vector> makeLoggers(const Config& config) { - vector> loggers; - for (const auto& factory : loggerFactories()) { - loggers.push_back(factory(config)); - } - loggers.push_back(std::make_unique()); - loggers.push_back(std::make_unique()); - return loggers; -} - -vector> makeOnDemandLoggers( - const Config& config) { - vector> loggers; - for (const auto& factory : onDemandLoggerFactories()) { - loggers.push_back(factory(config)); - } - loggers.push_back(std::make_unique()); - return loggers; -} - -vector>& loggers(const Config& config) { - static auto res = makeLoggers(config); - return res; -} - -vector>& onDemandLoggers( - const Config& config) { - static auto res = makeOnDemandLoggers(config); - return res; -} - -} // anon namespace - -// Keep an eye on profiling threads. -// We've observed deadlocks in Cuda11 in libcuda / libcupti.. -namespace detail { - -class HeartbeatMonitor { - - public: - ~HeartbeatMonitor() { - stopMonitoring(); - } - - static HeartbeatMonitor& instance() { - static HeartbeatMonitor monitor; - return monitor; - } - - void profilerHeartbeat() { - int32_t tid = systemThreadId(); - std::lock_guard lock(mutex_); - profilerAliveMap_[tid]++; - } - - void setPeriod(seconds period) { - { - std::lock_guard lock(mutex_); - if (period_ == period) { - return; - } - period_ = period; - } - if (period == seconds(0)) { - stopMonitoring(); - } else { - startMonitoring(); - } - } - - private: - HeartbeatMonitor() = default; - - void monitorLoop() { - std::unique_lock lock(mutex_); - while(!stopMonitor_) { - auto cv_status = condVar_.wait_for(lock, seconds(period_)); - // Don't perform check on spurious wakeup or on notify - if (cv_status == std::cv_status::timeout) { - for (auto& pair : profilerAliveMap_) { - int32_t tid = pair.first; - int& i = pair.second; - if (i == 0) { - LOG(ERROR) << "Thread " << tid << " appears stuck!"; - } - i = 0; - } - } - } - } - - void startMonitoring() { - if (!monitorThread_) { - VLOG(0) << "Starting monitoring thread"; - stopMonitor_ = false; - monitorThread_ = std::make_unique( - &HeartbeatMonitor::monitorLoop, this); - } - } - - void stopMonitoring() { - if (monitorThread_) { - VLOG(0) << "Stopping monitoring thread"; - stopMonitor_ = true; - condVar_.notify_one(); - monitorThread_->join(); - monitorThread_ = nullptr; - VLOG(0) << "Monitoring thread terminated"; - } - } - - std::map profilerAliveMap_; - std::unique_ptr monitorThread_; - std::mutex mutex_; - std::condition_variable condVar_; - std::atomic_bool stopMonitor_{false}; - seconds period_{0}; -}; - -} // namespace detail - -namespace { -// Profiler map singleton -std::map>& profilerMap() { - static std::map> instance; - return instance; -} - -void reportLateSample( - int sleepMs, - int sampleMs, - int reportMs, - int reprogramMs) { - LOG_EVERY_N(WARNING, 10) << "Lost sample due to delays (ms): " << sleepMs - << ", " << sampleMs << ", " << reportMs << ", " - << reprogramMs; -} - -void configureHeartbeatMonitor( - detail::HeartbeatMonitor& monitor, const Config& base, const Config* onDemand) { - seconds base_period = - base.eventProfilerHeartbeatMonitorPeriod(); - seconds on_demand_period = !onDemand ? seconds(0) : - onDemand->eventProfilerHeartbeatMonitorPeriod(); - monitor.setPeriod( - on_demand_period > seconds(0) ? on_demand_period : base_period); -} - -} // anon namespace - -void EventProfilerController::addLoggerFactory( - std::function(const Config&)> factory) { - loggerFactories().push_back(factory); -} - -void EventProfilerController::addOnDemandLoggerFactory( - std::function(const Config&)> factory) { - onDemandLoggerFactories().push_back(factory); -} - -EventProfilerController::EventProfilerController( - CUcontext context, - ConfigLoader& configLoader, - detail::HeartbeatMonitor& heartbeatMonitor) - : configLoader_(configLoader), heartbeatMonitor_(heartbeatMonitor) { - auto cupti_events = std::make_unique(context); - auto cupti_metrics = - std::make_unique(cupti_events->device()); - configLoader_.addHandler( - ConfigLoader::ConfigKind::EventProfiler, this); - auto config = configLoader.getConfigCopy(); - profiler_ = std::make_unique( - std::move(cupti_events), - std::move(cupti_metrics), - loggers(*config), - onDemandLoggers(*config)); - profilerThread_ = std::make_unique( - &EventProfilerController::profilerLoop, this); -} - -EventProfilerController::~EventProfilerController() { - if (profilerThread_) { - // signaling termination of the profiler loop - stopRunloop_ = true; - profilerThread_->join(); - } - configLoader_.removeHandler( - ConfigLoader::ConfigKind::EventProfiler, this); - VLOG(0) << "Stopped event profiler"; -} - -// Must be called under lock -void EventProfilerController::start(CUcontext ctx, ConfigLoader& configLoader) { - profilerMap()[ctx] = unique_ptr( - new EventProfilerController( - ctx, configLoader, detail::HeartbeatMonitor::instance())); -} - -// Must be called under lock -void EventProfilerController::stop(CUcontext ctx) { - profilerMap()[ctx] = nullptr; -} - -bool EventProfilerController::canAcceptConfig() { - std::lock_guard guard(mutex_); - return !newOnDemandConfig_; -} - -void EventProfilerController::acceptConfig(const Config& config) { - if (config.eventProfilerOnDemandDuration().count() == 0) { - // Ignore - not for this profiler - return; - } - std::lock_guard guard(mutex_); - if (newOnDemandConfig_) { - LOG(ERROR) << "On demand request already queued - ignoring new request"; - return; - } - newOnDemandConfig_ = config.clone(); - LOG(INFO) << "Received new on-demand config"; -} - -bool EventProfilerController::enableForDevice(Config& cfg) { - // FIXME: Use device unique id! - if (!cfg.eventProfilerEnabledForDevice(profiler_->device())) { - return false; - } - // context count includes the new context - int instances = configLoader_.contextCountForGpu(profiler_->device()); - VLOG(0) << "Device context count: " << instances; - return instances >= 0 && instances <= cfg.maxEventProfilersPerGpu(); -} - -void EventProfilerController::profilerLoop() { - // We limit the number of profilers that can exist per GPU - auto config = configLoader_.getConfigCopy(); - if (!enableForDevice(*config)) { - VLOG(0) << "Not starting EventProfiler - profilers for GPU " - << profiler_->device() << " exceeds profilers per GPU limit (" - << config->maxEventProfilersPerGpu() << ")"; - return; - } - - if (!profiler_->setContinuousMode()) { - VLOG(0) << "Continuous mode not supported for GPU " - << profiler_->device() << ". Not starting Event Profiler."; - return; - } - - VLOG(0) << "Starting Event Profiler for GPU " << profiler_->device(); - setThreadName("CUPTI Event Profiler"); - - time_point next_sample_time; - time_point next_report_time; - time_point next_on_demand_report_time; - time_point next_multiplex_time; - std::unique_ptr on_demand_config = nullptr; - bool reconfigure = true; - bool restart = true; - int report_count = 0; - int on_demand_report_count = 0; - while (!stopRunloop_) { - heartbeatMonitor_.profilerHeartbeat(); - if (configLoader_.hasNewConfig(*config)) { - config = configLoader_.getConfigCopy(); - VLOG(0) << "Base config changed"; - report_count = 0; - reconfigure = true; - } - - auto now = system_clock::now(); - if (on_demand_config && - now > (on_demand_config->eventProfilerOnDemandStartTime() + - on_demand_config->eventProfilerOnDemandDuration())) { - on_demand_config = nullptr; - LOG(INFO) << "On-demand profiling complete"; - reconfigure = true; - } - - if (!profiler_->isOnDemandActive()) { - std::lock_guard lock(mutex_); - if (newOnDemandConfig_) { - VLOG(0) << "Received on-demand config, reconfiguring"; - on_demand_config = std::move(newOnDemandConfig_); - reconfigure = true; - on_demand_report_count = 0; - } - } - - if (reconfigure) { - try { - profiler_->configure(*config, on_demand_config.get()); - } catch (const std::exception& ex) { - LOG(ERROR) << "Encountered error while configuring event profiler: " - << ex.what(); - // Exit profiling entirely when encountering an error here - // as it indicates a serious problem or bug. - break; - } - configureHeartbeatMonitor( - heartbeatMonitor_, *config, on_demand_config.get()); - reconfigure = false; - restart = true; - } - - if (restart) { - now = system_clock::now(); - next_sample_time = now + profiler_->samplePeriod(); - next_report_time = now + profiler_->reportPeriod(); - if (profiler_->isOnDemandActive()) { - next_on_demand_report_time = now + profiler_->onDemandReportPeriod(); - } - next_multiplex_time = now + profiler_->multiplexPeriod(); - // Collect an initial sample and throw it away - // The next sample is the first valid one - profiler_->collectSample(); - profiler_->clearSamples(); - restart = false; - } - - auto start_sleep = now; - while (now < next_sample_time) { - /* sleep override */ - std::this_thread::sleep_for(next_sample_time - now); - now = system_clock::now(); - } - int sleep_time = duration_cast(now - start_sleep).count(); - - auto start_sample = now; - profiler_->collectSample(); - now = system_clock::now(); - int sample_time = duration_cast(now - start_sample).count(); - - next_sample_time += profiler_->samplePeriod(); - if (now > next_sample_time) { - reportLateSample(sleep_time, sample_time, 0, 0); - restart = true; - continue; - } - - auto start_report = now; - if (now > next_report_time) { - VLOG(1) << "Report #" << report_count++; - profiler_->reportSamples(); - next_report_time += profiler_->reportPeriod(); - } - if (profiler_->isOnDemandActive() && now > next_on_demand_report_time) { - VLOG(1) << "OnDemand Report #" << on_demand_report_count++; - profiler_->reportOnDemandSamples(); - next_on_demand_report_time += profiler_->onDemandReportPeriod(); - } - profiler_->eraseReportedSamples(); - now = system_clock::now(); - int report_time = duration_cast(now - start_report).count(); - - if (now > next_sample_time) { - reportLateSample(sleep_time, sample_time, report_time, 0); - restart = true; - continue; - } - - auto start_multiplex = now; - if (profiler_->multiplexEnabled() && now > next_multiplex_time) { - profiler_->enableNextCounterSet(); - next_multiplex_time += profiler_->multiplexPeriod(); - } - now = system_clock::now(); - int multiplex_time = - duration_cast(now - start_multiplex).count(); - - if (now > next_sample_time) { - reportLateSample(sleep_time, sample_time, report_time, multiplex_time); - restart = true; - } - - VLOG(0) << "Runloop execution time: " - << duration_cast(now - start_sample).count() << "ms"; - } - - VLOG(0) << "Device " << profiler_->device() - << ": Exited event profiling loop"; -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/EventProfilerController.h b/plugins/tensorboard-plugins/libkineto/src/EventProfilerController.h deleted file mode 100644 index 007a82faa92..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/EventProfilerController.h +++ /dev/null @@ -1,63 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include - -#include - -#include "ConfigLoader.h" - -namespace KINETO_NAMESPACE { - -class Config; -class ConfigLoader; -class EventProfiler; -class SampleListener; - -namespace detail { -class HeartbeatMonitor; -} - -class EventProfilerController : public ConfigLoader::ConfigHandler { - public: - EventProfilerController(const EventProfilerController&) = delete; - EventProfilerController& operator=(const EventProfilerController&) = delete; - - ~EventProfilerController(); - - static void start(CUcontext ctx, ConfigLoader& configLoader); - static void stop(CUcontext ctx); - - static void addLoggerFactory( - std::function(const Config&)> factory); - - static void addOnDemandLoggerFactory( - std::function(const Config&)> factory); - - bool canAcceptConfig() override; - - void acceptConfig(const Config& config) override; - - private: - explicit EventProfilerController( - CUcontext context, - ConfigLoader& configLoader, - detail::HeartbeatMonitor& heartbeatMonitor); - bool enableForDevice(Config& cfg); - void profilerLoop(); - - ConfigLoader& configLoader_; - std::unique_ptr newOnDemandConfig_; - detail::HeartbeatMonitor& heartbeatMonitor_; - std::unique_ptr profiler_; - std::unique_ptr profilerThread_; - std::atomic_bool stopRunloop_{false}; - std::mutex mutex_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/GenericTraceActivity.cpp b/plugins/tensorboard-plugins/libkineto/src/GenericTraceActivity.cpp deleted file mode 100644 index 4e00b1256c4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/GenericTraceActivity.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "GenericTraceActivity.h" -#include "output_base.h" - -namespace libkineto { - void GenericTraceActivity::log(ActivityLogger& logger) const { - logger.handleGenericActivity(*this); - } -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/ILoggerObserver.cpp b/plugins/tensorboard-plugins/libkineto/src/ILoggerObserver.cpp deleted file mode 100644 index f0106578811..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ILoggerObserver.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "ILoggerObserver.h" - -#if !USE_GOOGLE_LOG - -#include -#include - -namespace libkineto { - -struct LoggerTypeName { - constexpr LoggerTypeName(const char* n, LoggerOutputType t) : name(n), type(t) {}; - const char* name; - LoggerOutputType type; -}; - -static constexpr std::array LoggerMap{{ - {"VERBOSE", LoggerOutputType::VERBOSE}, - {"INFO", LoggerOutputType::INFO}, - {"WARNING", LoggerOutputType::WARNING}, - {"ERROR", LoggerOutputType::ERROR}, - {"STAGE", LoggerOutputType::STAGE}, - {"???", LoggerOutputType::ENUM_COUNT} -}}; - -static constexpr bool matchingOrder(int idx = 0) { - return LoggerMap[idx].type == LoggerOutputType::ENUM_COUNT || - ((idx == (int) LoggerMap[idx].type) && matchingOrder(idx + 1)); -} -static_assert(matchingOrder(), "LoggerTypeName map is out of order"); - -const char* toString(LoggerOutputType t) { - if(t < VERBOSE || t >= ENUM_COUNT) { - return LoggerMap[ENUM_COUNT].name; - } - return LoggerMap[(int)t].name; -} - -LoggerOutputType toLoggerOutputType(const std::string& str) { - for (int i = 0; i < LoggerTypeCount; i++) { - if (str == LoggerMap[i].name) { - return LoggerMap[i].type; - } - } - throw std::invalid_argument(fmt::format("Invalid activity type: {}", str)); -} - -} // namespace libkineto - - -#endif // !USE_GOOGLE_LOG diff --git a/plugins/tensorboard-plugins/libkineto/src/Logger.cpp b/plugins/tensorboard-plugins/libkineto/src/Logger.cpp deleted file mode 100644 index dbde765f51f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/Logger.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "Logger.h" -#include "ILoggerObserver.h" - -#ifndef USE_GOOGLE_LOG - -#include -#include -#include -#include -#include - -#include -#include - -#include "ThreadUtil.h" - -namespace KINETO_NAMESPACE { - -std::atomic_int Logger::severityLevel_{VERBOSE}; -std::atomic_int Logger::verboseLogLevel_{-1}; -std::atomic Logger::verboseLogModules_{~0ull}; - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wglobal-constructors" -std::mutex Logger::loggerObserversMutex_; -#pragma GCC diagnostic pop - - -Logger::Logger(int severity, int line, const char* filePath, int errnum) - : buf_(), out_(LIBKINETO_DBG_STREAM), errnum_(errnum), messageSeverity_(severity) { - buf_ << toString((LoggerOutputType) severity) << ":"; - - const auto tt = - std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - const char* file = strrchr(filePath, '/'); - buf_ << fmt::format("{:%Y-%m-%d %H:%M:%S}", fmt::localtime(tt)) << " " - << processId() << ":" << systemThreadId() << " " - << (file ? file + 1 : filePath) << ":" << line << "] "; -} - -Logger::~Logger() { -#ifdef __linux__ - if (errnum_ != 0) { - thread_local char buf[1024]; - buf_ << " : " << strerror_r(errnum_, buf, sizeof(buf)); - } -#endif - - { - std::lock_guard guard(loggerObserversMutex_); - for (auto* observer : loggerObservers()) { - // Output to observers. Current Severity helps keep track of which bucket the output goes. - if (observer) { - observer->write(buf_.str(), (LoggerOutputType) messageSeverity_); - } - } - } - - // Finally, print to terminal or console. - out_ << buf_.str() << std::endl; -} - -void Logger::setVerboseLogModules(const std::vector& modules) { - uint64_t mask = 0; - if (modules.empty()) { - mask = ~0ull; - } else { - for (const std::string& name : modules) { - mask |= hash(name.c_str()); - } - } - verboseLogModules_ = mask; -} - -void Logger::addLoggerObserver(ILoggerObserver* observer) { - if (observer == nullptr) { - return; - } - std::lock_guard guard(loggerObserversMutex_); - loggerObservers().insert(observer); -} - -void Logger::removeLoggerObserver(ILoggerObserver* observer) { - std::lock_guard guard(loggerObserversMutex_); - loggerObservers().erase(observer); -} - -void Logger::addLoggerObserverDevice(int64_t device) { - std::lock_guard guard(loggerObserversMutex_); - for (auto observer : loggerObservers()) { - observer->addDevice(device); - } -} - -void Logger::addLoggerObserverEventCount(int64_t count) { - std::lock_guard guard(loggerObserversMutex_); - for (auto observer : loggerObservers()) { - observer->addEventCount(count); - } -} - -void Logger::setLoggerObserverTraceDurationMS(int64_t duration) { - std::lock_guard guard(loggerObserversMutex_); - for (auto observer : loggerObservers()) { - observer->setTraceDurationMS(duration); - } -} - -void Logger::setLoggerObserverTraceID(const std::string& tid) { - std::lock_guard guard(loggerObserversMutex_); - for (auto observer : loggerObservers()) { - observer->setTraceID(tid); - } -} - -void Logger::setLoggerObserverGroupTraceID(const std::string& gtid) { - std::lock_guard guard(loggerObserversMutex_); - for (auto observer : loggerObservers()) { - observer->setGroupTraceID(gtid); - } -} - -void Logger::addLoggerObserverDestination(const std::string& dest) { - std::lock_guard guard(loggerObserversMutex_); - for (auto observer : loggerObservers()) { - observer->addDestination(dest); - } -} - -} // namespace KINETO_NAMESPACE - -#endif // USE_GOOGLE_LOG diff --git a/plugins/tensorboard-plugins/libkineto/src/Logger.h b/plugins/tensorboard-plugins/libkineto/src/Logger.h deleted file mode 100644 index 868fc84b9f4..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/Logger.h +++ /dev/null @@ -1,244 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -#define LIBKINETO_DBG_STREAM std::cerr - -#if USE_GOOGLE_LOG - -#include - -#define SET_LOG_SEVERITY_LEVEL(level) -#define SET_LOG_VERBOSITY_LEVEL(level, modules) -#define LOGGER_OBSERVER_ADD_DEVICE(device) -#define LOGGER_OBSERVER_ADD_EVENT_COUNT(count) -#define LOGGER_OBSERVER_SET_TRACE_DURATION_MS(duration) -#define LOGGER_OBSERVER_SET_TRACE_ID(tid) -#define LOGGER_OBSERVER_SET_GROUP_TRACE_ID(gtid) -#define LOGGER_OBSERVER_ADD_DESTINATION(dest) -#define UST_LOGGER_MARK_COMPLETED(stage) - -#else // !USE_GOOGLE_LOG -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "ILoggerObserver.h" - -#ifdef _MSC_VER -// unset a predefined ERROR (windows) -#undef ERROR -#endif // _MSC_VER - -namespace KINETO_NAMESPACE { - -class Logger { - public: - Logger(int severity, int line, const char* filePath, int errnum = 0); - ~Logger(); - - inline std::ostream& stream() { - return buf_; - } - - static inline void setSeverityLevel(int level) { - severityLevel_ = level; - } - - static inline int severityLevel() { - return severityLevel_; - } - - static inline void setVerboseLogLevel(int level) { - verboseLogLevel_ = level; - } - - static inline int verboseLogLevel() { - return verboseLogLevel_; - } - - // This is constexpr so that the hash for a file name is computed at compile - // time when used in the VLOG macros. - // This way, there is no string comparison for matching VLOG modules, - // only a comparison of pre-computed hashes. - // No fancy hashing needed here. It's pretty inefficient (one character - // at a time) but the strings are not large and it's not in the critical path. - static constexpr uint64_t rol(uint64_t val, int amount) { - return val << amount | val >> (63 - amount); - } - static constexpr uint64_t hash(const char* s) { - uint64_t hash = hash_rec(s, 0); - return hash & rol(0x41a0240682483014ull, hash & 63); - } - static constexpr uint64_t hash_rec(const char* s, int off) { - // Random constants! - return (!s[off] ? 57ull : (hash_rec(s, off + 1) * 293) ^ s[off]); - } - static constexpr const char* basename(const char* s, int off = 0) { - return !s[off] - ? s - : s[off] == '/' ? basename(&s[off + 1]) : basename(s, off + 1); - } - - static void setVerboseLogModules(const std::vector& modules); - - static inline uint64_t verboseLogModules() { - return verboseLogModules_; - } - - static void clearLoggerObservers() { - std::lock_guard g(loggerObserversMutex_); - loggerObservers().clear(); - } - - static void addLoggerObserver(ILoggerObserver* observer); - - static void removeLoggerObserver(ILoggerObserver* observer); - - static void addLoggerObserverDevice(int64_t device); - - static void addLoggerObserverEventCount(int64_t count); - - static void setLoggerObserverTraceDurationMS(int64_t duration); - - static void setLoggerObserverTraceID(const std::string& tid); - - static void setLoggerObserverGroupTraceID(const std::string& gtid); - - static void addLoggerObserverDestination(const std::string& dest); - - private: - std::stringstream buf_; - std::ostream& out_; - int errnum_; - int messageSeverity_; - static std::atomic_int severityLevel_; - static std::atomic_int verboseLogLevel_; - static std::atomic verboseLogModules_; - static std::set& loggerObservers() { - static auto* inst = new std::set(); - return *inst; - } - static std::mutex loggerObserversMutex_; -}; - -class VoidLogger { - public: - VoidLogger() {} - void operator&(std::ostream&) {} -}; - -} // namespace KINETO_NAMESPACE - -#ifdef LOG // Undefine in case these are already defined (quite likely) -#undef LOG -#undef LOG_IS_ON -#undef LOG_IF -#undef LOG_EVERY_N -#undef LOG_IF_EVERY_N -#undef DLOG -#undef DLOG_IF -#undef VLOG -#undef VLOG_IF -#undef VLOG_EVERY_N -#undef VLOG_IS_ON -#undef DVLOG -#undef LOG_FIRST_N -#undef CHECK -#undef DCHECK -#undef DCHECK_EQ -#undef PLOG -#undef PCHECK -#undef LOG_OCCURRENCES -#endif - -#define LOG_IS_ON(severity) \ - (severity >= libkineto::Logger::severityLevel()) - -#define LOG_IF(severity, condition) \ - !(LOG_IS_ON(severity) && (condition)) ? (void)0 : libkineto::VoidLogger() & \ - libkineto::Logger(severity, __LINE__, __FILE__).stream() - -#define LOG(severity) LOG_IF(severity, true) - -#define LOCAL_VARNAME_CONCAT(name, suffix) _##name##suffix##_ - -#define LOCAL_VARNAME(name) LOCAL_VARNAME_CONCAT(name, __LINE__) - -#define LOG_OCCURRENCES LOCAL_VARNAME(log_count) - -#define LOG_EVERY_N(severity, rate) \ - static int LOG_OCCURRENCES = 0; \ - LOG_IF(severity, LOG_OCCURRENCES++ % rate == 0) \ - << "(x" << LOG_OCCURRENCES << ") " - -template -struct __to_constant__ { - static const uint64_t val = n; -}; -#define FILENAME_HASH \ - __to_constant__::val -#define VLOG_IS_ON(verbosity) \ - (libkineto::Logger::verboseLogLevel() >= verbosity && \ - (libkineto::Logger::verboseLogModules() & FILENAME_HASH) == FILENAME_HASH) - -#define VLOG_IF(verbosity, condition) \ - LOG_IF(VERBOSE, VLOG_IS_ON(verbosity) && (condition)) - -#define VLOG(verbosity) VLOG_IF(verbosity, true) - -#define VLOG_EVERY_N(verbosity, rate) \ - static int LOG_OCCURRENCES = 0; \ - VLOG_IF(verbosity, LOG_OCCURRENCES++ % rate == 0) \ - << "(x" << LOG_OCCURRENCES << ") " - -#define PLOG(severity) \ - libkineto::Logger(severity, __LINE__, __FILE__, errno).stream() - -#define SET_LOG_SEVERITY_LEVEL(level) \ - libkineto::Logger::setSeverityLevel(level) - -#define SET_LOG_VERBOSITY_LEVEL(level, modules) \ - libkineto::Logger::setVerboseLogLevel(level); \ - libkineto::Logger::setVerboseLogModules(modules) - -// Logging the set of devices the trace is collect on. -#define LOGGER_OBSERVER_ADD_DEVICE(device_count) \ - libkineto::Logger::addLoggerObserverDevice(device_count) - -// Incrementing the number of events collected by this trace. -#define LOGGER_OBSERVER_ADD_EVENT_COUNT(count) \ - libkineto::Logger::addLoggerObserverEventCount(count) - -// Record duration of trace in milliseconds. -#define LOGGER_OBSERVER_SET_TRACE_DURATION_MS(duration) \ - libkineto::Logger::setLoggerObserverTraceDurationMS(duration) - -// Record the trace id when given. -#define LOGGER_OBSERVER_SET_TRACE_ID(tid) \ - libkineto::Logger::setLoggerObserverTraceID(tid) - -// Record the group trace id when given. -#define LOGGER_OBSERVER_SET_GROUP_TRACE_ID(gtid) \ - libkineto::Logger::setLoggerObserverGroupTraceID(gtid) - -// Log the set of destinations the trace is sent to. -#define LOGGER_OBSERVER_ADD_DESTINATION(dest) \ - libkineto::Logger::addLoggerObserverDestination(dest) - -// UST Logger Semantics to describe when a stage is complete. -#define UST_LOGGER_MARK_COMPLETED(stage) \ - LOG(libkineto::LoggerOutputType::STAGE) << "Completed Stage: " << stage - -#endif // USE_GOOGLE_LOG diff --git a/plugins/tensorboard-plugins/libkineto/src/LoggerCollector.h b/plugins/tensorboard-plugins/libkineto/src/LoggerCollector.h deleted file mode 100644 index bb05aab218d..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/LoggerCollector.h +++ /dev/null @@ -1,70 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#if !USE_GOOGLE_LOG - -#include -#include -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "ILoggerObserver.h" - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -class LoggerCollector : public ILoggerObserver { - public: - LoggerCollector() : buckets_() {} - - void write(const std::string& message, LoggerOutputType ot = ERROR) override { - // Skip STAGE output type which is only used by USTLoggerCollector. - if (ot != STAGE) { - buckets_[ot].push_back(message); - } - } - - const std::map> extractCollectorMetadata() override { - return buckets_; - } - - void reset() override { - trace_duration_ms = 0; - event_count = 0; - destinations.clear(); - } - - void addDevice(const int64_t device) override { - devices.insert(device); - } - - void setTraceDurationMS(const int64_t duration) override { - trace_duration_ms = duration; - } - - void addEventCount(const int64_t count) override { - event_count += count; - } - - void addDestination(const std::string& dest) override { - destinations.insert(dest); - } - - protected: - std::map> buckets_; - - // These are useful metadata to collect from CUPTIActivityProfiler for internal tracking. - std::set devices; - int64_t trace_duration_ms{0}; - std::atomic event_count{0}; - std::set destinations; - -}; - -} // namespace KINETO_NAMESPACE - -#endif // !USE_GOOGLE_LOG diff --git a/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.cpp b/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.cpp deleted file mode 100644 index 73eff13e2a0..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.cpp +++ /dev/null @@ -1,569 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "RoctracerActivityApi.h" - -#include -#include -#include - -#include "Demangle.h" -#include "output_base.h" -#include "ThreadUtil.h" - -typedef uint64_t timestamp_t; - -static timestamp_t timespec_to_ns(const timespec& time) { - return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; - } - -using namespace std::chrono; - -namespace KINETO_NAMESPACE { - -constexpr size_t kBufSize(2 * 1024 * 1024); - -RoctracerActivityApi& RoctracerActivityApi::singleton() { - static RoctracerActivityApi instance; - return instance; -} - -RoctracerActivityApi::RoctracerActivityApi() { - gpuTraceBuffers_ = std::make_unique>(); -} - -RoctracerActivityApi::~RoctracerActivityApi() { - disableActivities(std::set()); - endTracing(); -} - -void RoctracerActivityApi::pushCorrelationID(int id, CorrelationFlowType type) { -#ifdef HAS_ROCTRACER - if (!singleton().externalCorrelationEnabled_) { - return; - } - // placeholder -#endif -} - -void RoctracerActivityApi::popCorrelationID(CorrelationFlowType type) { -#ifdef HAS_ROCTRACER - if (!singleton().externalCorrelationEnabled_) { - return; - } - // placeholder -#endif -} - -void RoctracerActivityApi::setMaxBufferSize(int size) { - maxGpuBufferCount_ = 1 + size / kBufSize; -} - -int RoctracerActivityApi::processActivities( - ActivityLogger& logger) { - // Find offset to map from monotonic clock to system clock. - // This will break time-ordering of events but is status quo. - - timespec t0, t1, t00; - clock_gettime(CLOCK_REALTIME, &t0); - clock_gettime(CLOCK_MONOTONIC, &t1); - clock_gettime(CLOCK_REALTIME, &t00); - - const timestamp_t toffset = (timespec_to_ns(t0) >> 1) + (timespec_to_ns(t00) >> 1) - timespec_to_ns(t1); - - int count = 0; - - // Basic Api calls - - for (auto &item : rows_) { - GenericTraceActivity a; - a.startTime = (item.begin + toffset) / 1000; - a.endTime = (item.end + toffset) / 1000; - a.id = item.id; - a.device = item.pid; - a.resource = item.tid; - a.activityType = ActivityType::CUDA_RUNTIME; - a.activityName = std::string(roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, item.cid, 0)); - a.flow.id = item.id; - a.flow.type = kLinkAsyncCpuGpu; - a.flow.start = true; - - logger.handleGenericActivity(a); - ++count; - } - - // Malloc/Free calls - for (auto &item : mallocRows_) { - GenericTraceActivity a; - a.startTime = (item.begin + toffset) / 1000; - a.endTime = (item.end + toffset) / 1000; - a.id = item.id; - a.device = item.pid; - a.resource = item.tid; - a.activityType = ActivityType::CUDA_RUNTIME; - a.activityName = std::string(roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, item.cid, 0)); - a.flow.id = item.id; - a.flow.type = kLinkAsyncCpuGpu; - a.flow.start = true; - - a.addMetadata("ptr", item.ptr); - if (item.cid == HIP_API_ID_hipMalloc) { - a.addMetadata("size", item.size); - } - - logger.handleGenericActivity(a); - ++count; - } - - // HipMemcpy calls - for (auto &item : copyRows_) { - GenericTraceActivity a; - a.startTime = (item.begin + toffset) / 1000; - a.endTime = (item.end + toffset) / 1000; - a.id = item.id; - a.device = item.pid; - a.resource = item.tid; - a.activityType = ActivityType::CUDA_RUNTIME; - a.activityName = std::string(roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, item.cid, 0)); - a.flow.id = item.id; - a.flow.type = kLinkAsyncCpuGpu; - a.flow.start = true; - - a.addMetadata("src", item.src); - a.addMetadata("dst", item.dst); - a.addMetadata("size", item.size); - a.addMetadata("kind", item.kind); - if ((item.cid == HIP_API_ID_hipMemcpyAsync) || (item.cid == HIP_API_ID_hipMemcpyWithStream)) { - a.addMetadata("stream", fmt::format("{}", reinterpret_cast(item.stream))); - } - - logger.handleGenericActivity(a); - ++count; - } - - // Kernel Launch Api calls - - for (auto &item : kernelRows_) { - GenericTraceActivity a; - a.startTime = (item.begin + toffset) / 1000; - a.endTime = (item.end + toffset) / 1000; - a.id = item.id; - a.device = item.pid; - a.resource = item.tid; - a.activityType = ActivityType::CUDA_RUNTIME; - a.activityName = std::string(roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, item.cid, 0)); - a.flow.id = item.id; - a.flow.type = kLinkAsyncCpuGpu; - a.flow.start = true; - - if (item.functionAddr != nullptr) { - a.addMetadataQuoted( - "kernel", demangle(hipKernelNameRefByPtr(item.functionAddr, item.stream))); - } - else if (item.function != nullptr) { - a.addMetadataQuoted( - "kernel", demangle(hipKernelNameRef(item.function))); - } - a.addMetadata("grid dim", fmt::format("[{}, {}, {}]", item.gridX, item.gridY, item.gridZ)); - a.addMetadata("block dim", fmt::format("[{}, {}, {}]", item.workgroupX, item.workgroupY, item.workgroupZ)); - a.addMetadata("shared size", item.groupSegmentSize); - a.addMetadata("stream", fmt::format("{}", reinterpret_cast(item.stream))); - - // Stash launches to tie to the async ops - kernelLaunches_[a.id] = a; - - // Stash kernel names to tie to the async ops - std::string name; - if (item.functionAddr != nullptr) { - name = demangle(hipKernelNameRefByPtr(item.functionAddr, item.stream)); - } - else if (item.function != nullptr) { - name = demangle(hipKernelNameRef(item.function)); - } - if (!name.empty()) { - uint32_t string_id = reverseStrings_[name]; - if (string_id == 0) { - string_id = nextStringId_++; - reverseStrings_[name] = string_id; - strings_[string_id] = name; - } - kernelNames_[item.id] = string_id; - } - - logger.handleGenericActivity(a); - ++count; - } - - // Async Ops - - for (auto& buffer : *gpuTraceBuffers_) { - const roctracer_record_t* record = (const roctracer_record_t*)(buffer.data); - const roctracer_record_t* end_record = (const roctracer_record_t*)(buffer.data + buffer.validSize); - GenericTraceActivity a; - - while (record < end_record) { - if ((record->domain == ACTIVITY_DOMAIN_HIP_API) && (loggedIds_.contains(record->op))) { - const char *name = roctracer_op_string(record->domain, record->op, record->kind); - a.device = record->process_id; - a.resource = record->thread_id; - - a.startTime = (record->begin_ns + toffset) / 1000; - a.endTime = (record->end_ns + toffset) / 1000; - a.id = record->correlation_id; - - a.activityType = ActivityType::CUDA_RUNTIME; - a.activityName = std::string(name); - a.flow.id = record->correlation_id; - a.flow.type = kLinkAsyncCpuGpu; - a.flow.start = true; - - logger.handleGenericActivity(a); - ++count; - } - else if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { - // Overlay launch metadata for kernels - auto kit = kernelLaunches_.find(record->correlation_id); - if (kit != kernelLaunches_.end()) { - a = (*kit).second; - } - - const char *name = roctracer_op_string(record->domain, record->op, record->kind); - a.device = record->device_id; - a.resource = record->queue_id; - - a.startTime = (record->begin_ns + toffset) / 1000; - a.endTime = (record->end_ns + toffset) / 1000; - a.id = record->correlation_id; - - a.activityType = ActivityType::CONCURRENT_KERNEL; - a.activityName = std::string(name); - a.flow.id = record->correlation_id; - a.flow.type = kLinkAsyncCpuGpu; - - auto it = kernelNames_.find(record->correlation_id); - if (it != kernelNames_.end()) { - a.activityName = strings_[it->second]; - } - - logger.handleGenericActivity(a); - ++count; - } - - roctracer_next_record(record, &record); - } - } - return count; -} - -void RoctracerActivityApi::clearActivities() { - gpuTraceBuffers_->clear(); - rows_.clear(); - kernelRows_.clear(); - copyRows_.clear(); - mallocRows_.clear(); - kernelLaunches_.clear(); -} - -void RoctracerActivityApi::api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) -{ - RoctracerActivityApi *dis = &singleton(); - - if (domain == ACTIVITY_DOMAIN_HIP_API && dis->loggedIds_.contains(cid)) { - const hip_api_data_t* data = (const hip_api_data_t*)(callback_data); - - // Pack callbacks into row structures - - static timespec timestamp; // FIXME verify thread safety - - if (data->phase == ACTIVITY_API_PHASE_ENTER) { - clock_gettime(CLOCK_MONOTONIC, ×tamp); // record proper clock - } - else { // (data->phase == ACTIVITY_API_PHASE_EXIT) - timespec endTime; - timespec startTime { timestamp }; - clock_gettime(CLOCK_MONOTONIC, &endTime); // record proper clock - - switch (cid) { - case HIP_API_ID_hipLaunchKernel: - case HIP_API_ID_hipExtLaunchKernel: - case HIP_API_ID_hipLaunchCooperativeKernel: // Should work here - { - auto &args = data->args.hipLaunchKernel; - dis->kernelRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - args.function_address, - nullptr, - args.numBlocks.x, - args.numBlocks.y, - args.numBlocks.z, - args.dimBlocks.x, - args.dimBlocks.y, - args.dimBlocks.z, - args.sharedMemBytes, - args.stream - ); - } - break; - case HIP_API_ID_hipHccModuleLaunchKernel: - case HIP_API_ID_hipModuleLaunchKernel: - case HIP_API_ID_hipExtModuleLaunchKernel: - { - auto &args = data->args.hipModuleLaunchKernel; - dis->kernelRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - nullptr, - args.f, - args.gridDimX, - args.gridDimY, - args.gridDimZ, - args.blockDimX, - args.blockDimY, - args.blockDimZ, - args.sharedMemBytes, - args.stream - ); - } - break; - case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: - case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: -#if 0 - { - auto &args = data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList__val; - dis->kernelRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - args.function_address, - nullptr, - args.numBlocks.x, - args.numBlocks.y, - args.numBlocks.z, - args.dimBlocks.x, - args.dimBlocks.y, - args.dimBlocks.z, - args.sharedMemBytes, - args.stream - ); - } -#endif - break; - case HIP_API_ID_hipMalloc: - dis->mallocRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - data->args.hipMalloc.ptr__val, - data->args.hipMalloc.size - ); - break; - case HIP_API_ID_hipFree: - dis->mallocRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - data->args.hipFree.ptr, - 0 - ); - break; - case HIP_API_ID_hipMemcpy: - { - auto &args = data->args.hipMemcpy; - dis->copyRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - args.src, - args.dst, - args.sizeBytes, - args.kind, - static_cast(0) // use placeholder? - ); - } - break; - case HIP_API_ID_hipMemcpyAsync: - case HIP_API_ID_hipMemcpyWithStream: - { - auto &args = data->args.hipMemcpyAsync; - dis->copyRows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime), - args.src, - args.dst, - args.sizeBytes, - args.kind, - args.stream - ); - } - break; - default: - dis->rows_.emplace_back(data->correlation_id, - domain, - cid, - processId(), - systemThreadId(), - timespec_to_ns(startTime), - timespec_to_ns(endTime) - ); - break; - } - } - } -} - -void RoctracerActivityApi::activity_callback(const char* begin, const char* end, void* arg) -{ - size_t size = end - begin; - uint8_t *buffer = (uint8_t*) malloc(size); - auto &gpuTraceBuffers = singleton().gpuTraceBuffers_; - memcpy(buffer, begin, size); - gpuTraceBuffers->emplace_back(buffer, size); -} - -void RoctracerActivityApi::enableActivities( - const std::set& selected_activities) { -#ifdef HAS_ROCTRACER - if (!registered_) { - roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, nullptr); // Magic encantation - - // Set some api calls to ignore - loggedIds_.setInvertMode(true); // Omit the specified api - loggedIds_.add("hipGetDevice"); - loggedIds_.add("hipSetDevice"); - loggedIds_.add("hipGetLastError"); - loggedIds_.add("__hipPushCallConfiguration"); - loggedIds_.add("__hipPopCallConfiguration"); - loggedIds_.add("hipCtxSetCurrent"); - loggedIds_.add("hipEventRecord"); - loggedIds_.add("hipEventQuery"); - loggedIds_.add("hipGetDeviceProperties"); - loggedIds_.add("hipPeekAtLastError"); - loggedIds_.add("hipModuleGetFunction"); - loggedIds_.add("hipEventCreateWithFlags"); - - // Enable API callbacks - if (loggedIds_.invertMode() == true) { - // exclusion list - enable entire domain and turn off things in list - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, nullptr); - const std::unordered_map &filter = loggedIds_.filterList(); - for (auto it = filter.begin(); it != filter.end(); ++it) { - roctracer_disable_op_callback(ACTIVITY_DOMAIN_HIP_API, it->first); - } - } - else { - // inclusion list - only enable things in the list - const std::unordered_map &filter = loggedIds_.filterList(); - roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API); - for (auto it = filter.begin(); it != filter.end(); ++it) { - roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, it->first, api_callback, nullptr); - } - } - //roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, nullptr); - - // Allocate default tracing pool - roctracer_properties_t properties; - memset(&properties, 0, sizeof(roctracer_properties_t)); - properties.buffer_size = 0x1000; - roctracer_open_pool(&properties); - - // Enable async op collection - roctracer_properties_t hcc_cb_properties; - memset(&hcc_cb_properties, 0, sizeof(roctracer_properties_t)); - hcc_cb_properties.buffer_size = 0x4000; - hcc_cb_properties.buffer_callback_fun = activity_callback; - roctracer_open_pool_expl(&hcc_cb_properties, &hccPool_); - roctracer_enable_domain_activity_expl(ACTIVITY_DOMAIN_HCC_OPS, hccPool_); - - registered_ = true; - } - - for (const auto& activity : selected_activities) { - if (activity == ActivityType::EXTERNAL_CORRELATION) { - externalCorrelationEnabled_ = true; - } - } - - roctracer_start(); -#endif -} - -void RoctracerActivityApi::disableActivities( - const std::set& selected_activities) { -#ifdef HAS_ROCTRACER - roctracer_stop(); - roctracer_flush_activity_expl(hccPool_); - - for (const auto& activity : selected_activities) { - if (activity == ActivityType::EXTERNAL_CORRELATION) { - externalCorrelationEnabled_ = false; - } - } -#endif -} - -void RoctracerActivityApi::endTracing() { - if (registered_ == true) { - roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API); - //roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX); - - roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS); - roctracer_close_pool_expl(hccPool_); - } -} - - -ApiIdList::ApiIdList() -: invert_(true) -{ -} - -void ApiIdList::add(std::string apiName) -{ - uint32_t cid = 0; - if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, apiName.c_str(), &cid, nullptr) == ROCTRACER_STATUS_SUCCESS) { - filter_[cid] = 1; - } -} -void ApiIdList::remove(std::string apiName) -{ - uint32_t cid = 0; - if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, apiName.c_str(), &cid, nullptr) == ROCTRACER_STATUS_SUCCESS) { - filter_.erase(cid); - } -} - -bool ApiIdList::loadUserPrefs() -{ - // placeholder - return false; -} -bool ApiIdList::contains(uint32_t apiId) -{ - return (filter_.find(apiId) != filter_.end()) ? !invert_ : invert_; // XOR -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.h b/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.h deleted file mode 100644 index 28280253e7c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityApi.h +++ /dev/null @@ -1,171 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAS_ROCTRACER -#include -#include -#include -#include -#include -#endif - -#include "ActivityType.h" -#include "GenericTraceActivity.h" -#include "RoctracerActivityBuffer.h" - - -namespace KINETO_NAMESPACE { - -using namespace libkineto; - -class ApiIdList -{ -public: - ApiIdList(); - bool invertMode() { return invert_; } - void setInvertMode(bool invert) { invert_ = invert; } - void add(std::string apiName); - void remove(std::string apiName); - bool loadUserPrefs(); - bool contains(uint32_t apiId); - const std::unordered_map &filterList() { return filter_; } - -private: - std::unordered_map filter_; - bool invert_; -}; - -struct roctracerRow { - roctracerRow(uint64_t id, uint32_t domain, uint32_t cid, uint32_t pid - , uint32_t tid, uint64_t begin, uint64_t end) - : id(id), domain(domain), cid(cid), pid(pid), tid(tid), begin(begin), end(end) {} - uint64_t id; // correlation_id - uint32_t domain; - uint32_t cid; - uint32_t pid; - uint32_t tid; - uint64_t begin; - uint64_t end; -}; - -struct kernelRow : public roctracerRow { - kernelRow(uint64_t id, uint32_t domain, uint32_t cid, uint32_t pid - , uint32_t tid, uint64_t begin, uint64_t end - , const void *faddr, hipFunction_t function - , unsigned int gx, unsigned int gy, unsigned int gz - , unsigned int wx, unsigned int wy, unsigned int wz - , size_t gss, hipStream_t stream) - : roctracerRow(id, domain, cid, pid, tid, begin, end), functionAddr(faddr) - , function(function), gridX(gx), gridY(gy), gridZ(gz) - , workgroupX(wx), workgroupY(wy), workgroupZ(wz), groupSegmentSize(gss) - , stream(stream) {} - const void* functionAddr; - hipFunction_t function; - unsigned int gridX; - unsigned int gridY; - unsigned int gridZ; - unsigned int workgroupX; - unsigned int workgroupY; - unsigned int workgroupZ; - size_t groupSegmentSize; - hipStream_t stream; -}; - -struct copyRow : public roctracerRow { - copyRow(uint64_t id, uint32_t domain, uint32_t cid, uint32_t pid - , uint32_t tid, uint64_t begin, uint64_t end - , const void* src, const void *dst, size_t size, hipMemcpyKind kind - , hipStream_t stream) - : roctracerRow(id, domain, cid, pid, tid, begin, end) - , src(src), dst(dst), size(size), kind(kind), stream(stream) {} - const void *src; - const void *dst; - size_t size; - hipMemcpyKind kind; - hipStream_t stream; -}; - -struct mallocRow : public roctracerRow { - mallocRow(uint64_t id, uint32_t domain, uint32_t cid, uint32_t pid - , uint32_t tid, uint64_t begin, uint64_t end - , const void* ptr, size_t size) - : roctracerRow(id, domain, cid, pid, tid, begin, end) - , ptr(ptr), size(size) {} - const void *ptr; - size_t size; -}; - - -class RoctracerActivityApi { - public: - enum CorrelationFlowType { - Default, - User - }; - - RoctracerActivityApi(); - RoctracerActivityApi(const RoctracerActivityApi&) = delete; - RoctracerActivityApi& operator=(const RoctracerActivityApi&) = delete; - - virtual ~RoctracerActivityApi(); - - static RoctracerActivityApi& singleton(); - - static void pushCorrelationID(int id, CorrelationFlowType type); - static void popCorrelationID(CorrelationFlowType type); - - void enableActivities( - const std::set& selected_activities); - void disableActivities( - const std::set& selected_activities); - void clearActivities(); - - int processActivities(ActivityLogger& logger); - - void setMaxBufferSize(int size); - - std::atomic_bool stopCollection{false}; - - private: - bool registered_{false}; - void endTracing(); - -#ifdef HAS_ROCTRACER - roctracer_pool_t *hccPool_{NULL}; - static void api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); - static void activity_callback(const char* begin, const char* end, void* arg); - - //Name cache - uint32_t nextStringId_{2}; - std::map strings_; - std::map reverseStrings_; - std::map kernelNames_; - - ApiIdList loggedIds_; - - // Api callback data - std::deque rows_; - std::deque kernelRows_; - std::deque copyRows_; - std::deque mallocRows_; - std::map kernelLaunches_; -#endif - - int maxGpuBufferCount_{0}; - std::unique_ptr> gpuTraceBuffers_; - bool externalCorrelationEnabled_{true}; -}; - -} // namespace KINETO_NAMESPACE - diff --git a/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityBuffer.h b/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityBuffer.h deleted file mode 100644 index cd8a5709a84..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/RoctracerActivityBuffer.h +++ /dev/null @@ -1,30 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include - -namespace KINETO_NAMESPACE { - -class RoctracerActivityBuffer { - public: - // data must be allocated using malloc. - // Ownership is transferred to this object. - RoctracerActivityBuffer(uint8_t* data, size_t validSize) - : data(data), validSize(validSize) {} - - ~RoctracerActivityBuffer() { - free(data); - } - - // Allocated by malloc - uint8_t* data{nullptr}; - - // Number of bytes used - size_t validSize; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/SampleListener.h b/plugins/tensorboard-plugins/libkineto/src/SampleListener.h deleted file mode 100644 index bff86ad122a..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/SampleListener.h +++ /dev/null @@ -1,146 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include - -namespace KINETO_NAMESPACE { - -class Config; - -class SampleValue { - public: - template - explicit SampleValue(T v) { - init(v); - } - - SampleValue(const SampleValue&) = default; - SampleValue& operator=(const SampleValue&) = delete; - SampleValue(SampleValue&&) = default; - SampleValue& operator=(SampleValue&&) = default; - - bool isInt() const { - return type_ == INT64; - } - - int64_t getInt() const { - assert(isInt()); - return int_; - } - - bool isDouble() const { - return type_ == DOUBLE; - } - - double getDouble() const { - assert(isDouble()); - return dbl_; - } - - inline void operator*=(double x) { - assert(isDouble() || isInt()); - if (isDouble()) { - dbl_ *= x; - } else { - int_ = std::round(int_ * x); - } - } - - inline bool operator<(const SampleValue& o) const { - if (type_ != o.type_) { - return type_ < o.type_; - } else if (type_ == INT64) { - return int_ < o.int_; - } else if (type_ == DOUBLE) { - return dbl_ < o.dbl_; - } - assert(false); - return true; - } - - void print(std::ostream& s) const { - if (type_ == INT64) { - s << int_; - } else if (type_ == DOUBLE) { - s << dbl_; - } else { - assert(false); - } - } - - private: - enum Type { INT64, DOUBLE }; - - template - void init(T v); - - Type type_{INT64}; - union { - int64_t int_{0}; - double dbl_; - }; -}; - -template <> -inline void SampleValue::init(uint64_t v) { - int_ = v, type_ = INT64; -} -template <> -inline void SampleValue::init(int64_t v) { - int_ = v, type_ = INT64; -} -template <> -inline void SampleValue::init(int v) { - int_ = v, type_ = INT64; -} -template <> -inline void SampleValue::init(double v) { - dbl_ = v, type_ = DOUBLE; -} - -inline std::ostream& operator<<(std::ostream& out, const SampleValue& s) { - s.print(out); - return out; -} - -using PercentileList = std::vector>; - -struct Stat { - const std::string& name; - const PercentileList percentileValues; - SampleValue total; -}; - -struct Sample { - Sample(int stats_count) { - stats.reserve(stats_count); - } - - // Offset in milliseconds from first sample in report - int deltaMsec; - std::vector stats; -}; - -// Inherit from this to be notified of samples -class SampleListener { - public: - SampleListener(const SampleListener&) = delete; - SampleListener& operator=(const SampleListener&) = delete; - - virtual ~SampleListener(){}; - - // Report bucketed & aggregated values for event - virtual void handleSample(int device, const Sample& sample, bool from_new_version) = 0; - - virtual void update(const Config& config) = 0; - - protected: - SampleListener() = default; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/ScopeExit.h b/plugins/tensorboard-plugins/libkineto/src/ScopeExit.h deleted file mode 100644 index b9a6bc83ef9..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ScopeExit.h +++ /dev/null @@ -1,29 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -// Implement a simple scope handler allowing a function to release -// resources when an error or exception occurs - -template -class ScopeExit { - public: - explicit ScopeExit(T t) : t(t) {} - ~ScopeExit() { - t(); - } - T t; -}; - -template -ScopeExit makeScopeExit(T t) { - return ScopeExit(t); -}; - -// Add a level of indirection so __LINE__ is expanded -#define __kINETO_CONCAT(name, line) name##line -#define ANON_VAR(name, line) __kINETO_CONCAT(name, line) - -#define SCOPE_EXIT(func) \ - const auto ANON_VAR(SCOPE_BLOCK, __LINE__) = \ - makeScopeExit([=]() { func; }) diff --git a/plugins/tensorboard-plugins/libkineto/src/ThreadUtil.cpp b/plugins/tensorboard-plugins/libkineto/src/ThreadUtil.cpp deleted file mode 100644 index 0f67d54d585..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/ThreadUtil.cpp +++ /dev/null @@ -1,203 +0,0 @@ -#include "ThreadUtil.h" - -#ifndef _MSC_VER -#include -#include -#include -#include -#else // _MSC_VER -#include -#include -#define WIN32_LEAN_AND_MEAN -#define NOGDI -#include -#include -#undef ERROR -#endif // _MSC_VER - -#ifdef __ANDROID__ -#include -#endif - -#include -#include -#include - -namespace libkineto { - -namespace { -thread_local int32_t _pid = 0; -thread_local int32_t _tid = 0; -thread_local int32_t _sysTid = 0; -} - -int32_t processId() { - if (!_pid) { -#ifndef _MSC_VER - _pid = (int32_t)getpid(); -#else - _pid = (int32_t)GetCurrentProcessId(); -#endif - } - return _pid; -} - -int32_t systemThreadId() { - if (!_sysTid) { -#ifdef __APPLE__ - _sysTid = (int32_t)syscall(SYS_thread_selfid); -#elif defined _MSC_VER - _sysTid = (int32_t)GetCurrentThreadId(); -#else - _sysTid = (int32_t)syscall(SYS_gettid); -#endif - } - return _sysTid; -} - -int32_t threadId() { - if (!_tid) { -#ifdef __APPLE__ - uint64_t tid; - pthread_threadid_np(nullptr, &tid); - _tid = tid; -#elif defined _MSC_VER - _tid = (int32_t)GetCurrentThreadId(); -#else - pthread_t pth = pthread_self(); - int32_t* ptr = reinterpret_cast(&pth); - _tid = *ptr; -#endif - } - return _tid; -} - -namespace { -static constexpr size_t kMaxThreadNameLength = 16; - -static constexpr const char* basename(const char* s, int off = 0) { - return !s[off] - ? s - : s[off] == '/' ? basename(&s[off + 1]) : basename(s, off + 1); -} -#if defined(_MSC_VER) -void *getKernel32Func(const char* procName) { - return GetProcAddress(GetModuleHandleA("KERNEL32.DLL"), procName); -} -#endif -} - -bool setThreadName(const std::string& name) { -#ifdef __APPLE__ - return 0 == pthread_setname_np(name.c_str()); -#elif defined _MSC_VER - // Per https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreaddescription - // Use runtime linking to set thread description - static auto _SetThreadDescription = reinterpret_cast(getKernel32Func("SetThreadDescription")); - if (!_SetThreadDescription) { - return false; - } - std::wstring_convert> conv; - std::wstring wname = conv.from_bytes(name); - HRESULT hr = _SetThreadDescription(GetCurrentThread(), wname.c_str()); - return SUCCEEDED(hr); -#else - return 0 == pthread_setname_np(pthread_self(), name.c_str()); -#endif -} - -std::string getThreadName() { -#ifndef _MSC_VER - char buf[kMaxThreadNameLength] = ""; - if ( -#ifndef __ANDROID__ - pthread_getname_np(pthread_self(), buf, kMaxThreadNameLength) != 0 -#else - prctl(PR_GET_NAME, buf, kMaxThreadNameLength) != 0 -#endif - ) { - return "Unknown"; - } - return buf; -#else // _MSC_VER - static auto _GetThreadDescription = reinterpret_cast(getKernel32Func("GetThreadDescription")); - if (!_GetThreadDescription) { - return "Unknown"; - } - PWSTR data; - HRESULT hr = _GetThreadDescription(GetCurrentThread(), &data); - if (!SUCCEEDED(hr)) { - return ""; - } - std::wstring_convert> conv; - std::string name = conv.to_bytes(data); - LocalFree(data); - return name; -#endif -} - -// Linux: -// Extract process name from /proc/pid/cmdline. This does not have -// the 16 character limit that /proc/pid/status and /prod/pid/comm has. -std::string processName(int32_t pid) { -#ifdef __linux__ - FILE* cmdfile = fopen(fmt::format("/proc/{}/cmdline", pid).c_str(), "r"); - if (cmdfile != nullptr) { - char* command = nullptr; - int scanned = fscanf(cmdfile, "%ms", &command); - fclose(cmdfile); - if (scanned > 0 && command) { - std::string ret(basename(command)); - free(command); - return ret; - } - } - std::cerr << "Failed to read process name for pid " << pid << std::endl; -#endif - return ""; -} - -// Max number of parent pids to collect, just for extra safeguarding. -constexpr int kMaxParentPids = 10; - -// Return a pair of -static std::pair parentPidAndCommand(int32_t pid) { -#ifdef __linux__ - FILE* statfile = fopen(fmt::format("/proc/{}/stat", pid).c_str(), "r"); - if (statfile == nullptr) { - return std::make_pair(0, ""); - } - int32_t parent_pid; - char* command = nullptr; - int scanned = fscanf(statfile, "%*d (%m[^)]) %*c %d", &command, &parent_pid); - fclose(statfile); - std::pair ret; - if (scanned == 2) { - ret = std::make_pair(parent_pid, std::string(command)); - } else { - std::cerr << "Failed to parse /proc/" << pid << "/stat" << std::endl; - ret = std::make_pair(0, ""); - } - - // The 'm' character in the format tells fscanf to allocate memory - // for the parsed string, which we need to free here. - free(command); - return ret; -#else - return std::make_pair(0, ""); -#endif -} - -std::vector> pidCommandPairsOfAncestors() { - std::vector> pairs; - pairs.reserve(kMaxParentPids + 1); - int32_t curr_pid = processId(); - for (int i = 0; i <= kMaxParentPids && curr_pid > 1; i++) { - std::pair ppid_and_comm = parentPidAndCommand(curr_pid); - pairs.push_back(std::make_pair(curr_pid, ppid_and_comm.second)); - curr_pid = ppid_and_comm.first; - } - return pairs; -} - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/WeakSymbols.cpp b/plugins/tensorboard-plugins/libkineto/src/WeakSymbols.cpp deleted file mode 100644 index 540a5ac8f97..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/WeakSymbols.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include - -#ifndef _MSC_VER -extern "C" { -// This function is needed to avoid superfluous dependency on GNU OpenMP library when cuPTI is linked statically -// For more details see https://github.com/pytorch/pytorch/issues/51026 -__attribute__((weak)) int acc_get_device_type() { - throw std::runtime_error("Dummy implementation of acc_get_device_type is not supposed to be called!"); -} - -} // extern "C" -#endif diff --git a/plugins/tensorboard-plugins/libkineto/src/cupti_call.h b/plugins/tensorboard-plugins/libkineto/src/cupti_call.h deleted file mode 100644 index fd6ebae7691..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/cupti_call.h +++ /dev/null @@ -1,33 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -#ifdef HAS_CUPTI - -#include - -#define CUPTI_CALL(call) \ - [&]() -> CUptiResult { \ - CUptiResult _status_ = call; \ - if (_status_ != CUPTI_SUCCESS) { \ - const char* _errstr_ = nullptr; \ - cuptiGetResultString(_status_, &_errstr_); \ - LOG(WARNING) << fmt::format( \ - "function {} failed with error {} ({})", \ - #call, \ - _errstr_, \ - (int)_status_); \ - } \ - return _status_; \ - }() - -#define CUPTI_CALL_NOWARN(call) call - -#else - -#define CUPTI_CALL(call) call -#define CUPTI_CALL_NOWARN(call) call - -#endif // HAS_CUPTI diff --git a/plugins/tensorboard-plugins/libkineto/src/cupti_strings.cpp b/plugins/tensorboard-plugins/libkineto/src/cupti_strings.cpp deleted file mode 100644 index 4535273a277..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/cupti_strings.cpp +++ /dev/null @@ -1,502 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "cupti_strings.h" - -namespace libkineto { - -const char* memcpyKindString( - CUpti_ActivityMemcpyKind kind) { - switch (kind) { - case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: - return "HtoD"; - case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: - return "DtoH"; - case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: - return "HtoA"; - case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: - return "AtoH"; - case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA: - return "AtoA"; - case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: - return "AtoD"; - case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: - return "DtoA"; - case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: - return "DtoD"; - case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: - return "HtoH"; - case CUPTI_ACTIVITY_MEMCPY_KIND_PTOP: - return "PtoP"; - default: - break; - } - return ""; -} - -const char* memoryKindString( - CUpti_ActivityMemoryKind kind) { - switch (kind) { - case CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN: - return "Unknown"; - case CUPTI_ACTIVITY_MEMORY_KIND_PAGEABLE: - return "Pageable"; - case CUPTI_ACTIVITY_MEMORY_KIND_PINNED: - return "Pinned"; - case CUPTI_ACTIVITY_MEMORY_KIND_DEVICE: - return "Device"; - case CUPTI_ACTIVITY_MEMORY_KIND_ARRAY: - return "Array"; - case CUPTI_ACTIVITY_MEMORY_KIND_MANAGED: - return "Managed"; - case CUPTI_ACTIVITY_MEMORY_KIND_DEVICE_STATIC: - return "Device Static"; - case CUPTI_ACTIVITY_MEMORY_KIND_MANAGED_STATIC: - return "Managed Static"; - case CUPTI_ACTIVITY_MEMORY_KIND_FORCE_INT: - return "Force Int"; - default: - return "Unrecognized"; - } -} - -const char* overheadKindString( - CUpti_ActivityOverheadKind kind) { - switch (kind) { - case CUPTI_ACTIVITY_OVERHEAD_UNKNOWN: - return "Unknown"; - case CUPTI_ACTIVITY_OVERHEAD_DRIVER_COMPILER: - return "Driver Compiler"; - case CUPTI_ACTIVITY_OVERHEAD_CUPTI_BUFFER_FLUSH: - return "Buffer Flush"; - case CUPTI_ACTIVITY_OVERHEAD_CUPTI_INSTRUMENTATION: - return "Instrumentation"; - case CUPTI_ACTIVITY_OVERHEAD_CUPTI_RESOURCE: - return "Resource"; - case CUPTI_ACTIVITY_OVERHEAD_FORCE_INT: - return "Force Int"; - default: - return "Unrecognized"; - } -} - - - -static const char* runtimeCbidNames[] = { - "INVALID", - "cudaDriverGetVersion", - "cudaRuntimeGetVersion", - "cudaGetDeviceCount", - "cudaGetDeviceProperties", - "cudaChooseDevice", - "cudaGetChannelDesc", - "cudaCreateChannelDesc", - "cudaConfigureCall", - "cudaSetupArgument", - "cudaGetLastError", - "cudaPeekAtLastError", - "cudaGetErrorString", - "cudaLaunch", - "cudaFuncSetCacheConfig", - "cudaFuncGetAttributes", - "cudaSetDevice", - "cudaGetDevice", - "cudaSetValidDevices", - "cudaSetDeviceFlags", - "cudaMalloc", - "cudaMallocPitch", - "cudaFree", - "cudaMallocArray", - "cudaFreeArray", - "cudaMallocHost", - "cudaFreeHost", - "cudaHostAlloc", - "cudaHostGetDevicePointer", - "cudaHostGetFlags", - "cudaMemGetInfo", - "cudaMemcpy", - "cudaMemcpy2D", - "cudaMemcpyToArray", - "cudaMemcpy2DToArray", - "cudaMemcpyFromArray", - "cudaMemcpy2DFromArray", - "cudaMemcpyArrayToArray", - "cudaMemcpy2DArrayToArray", - "cudaMemcpyToSymbol", - "cudaMemcpyFromSymbol", - "cudaMemcpyAsync", - "cudaMemcpyToArrayAsync", - "cudaMemcpyFromArrayAsync", - "cudaMemcpy2DAsync", - "cudaMemcpy2DToArrayAsync", - "cudaMemcpy2DFromArrayAsync", - "cudaMemcpyToSymbolAsync", - "cudaMemcpyFromSymbolAsync", - "cudaMemset", - "cudaMemset2D", - "cudaMemsetAsync", - "cudaMemset2DAsync", - "cudaGetSymbolAddress", - "cudaGetSymbolSize", - "cudaBindTexture", - "cudaBindTexture2D", - "cudaBindTextureToArray", - "cudaUnbindTexture", - "cudaGetTextureAlignmentOffset", - "cudaGetTextureReference", - "cudaBindSurfaceToArray", - "cudaGetSurfaceReference", - "cudaGLSetGLDevice", - "cudaGLRegisterBufferObject", - "cudaGLMapBufferObject", - "cudaGLUnmapBufferObject", - "cudaGLUnregisterBufferObject", - "cudaGLSetBufferObjectMapFlags", - "cudaGLMapBufferObjectAsync", - "cudaGLUnmapBufferObjectAsync", - "cudaWGLGetDevice", - "cudaGraphicsGLRegisterImage", - "cudaGraphicsGLRegisterBuffer", - "cudaGraphicsUnregisterResource", - "cudaGraphicsResourceSetMapFlags", - "cudaGraphicsMapResources", - "cudaGraphicsUnmapResources", - "cudaGraphicsResourceGetMappedPointer", - "cudaGraphicsSubResourceGetMappedArray", - "cudaVDPAUGetDevice", - "cudaVDPAUSetVDPAUDevice", - "cudaGraphicsVDPAURegisterVideoSurface", - "cudaGraphicsVDPAURegisterOutputSurface", - "cudaD3D11GetDevice", - "cudaD3D11GetDevices", - "cudaD3D11SetDirect3DDevice", - "cudaGraphicsD3D11RegisterResource", - "cudaD3D10GetDevice", - "cudaD3D10GetDevices", - "cudaD3D10SetDirect3DDevice", - "cudaGraphicsD3D10RegisterResource", - "cudaD3D10RegisterResource", - "cudaD3D10UnregisterResource", - "cudaD3D10MapResources", - "cudaD3D10UnmapResources", - "cudaD3D10ResourceSetMapFlags", - "cudaD3D10ResourceGetSurfaceDimensions", - "cudaD3D10ResourceGetMappedArray", - "cudaD3D10ResourceGetMappedPointer", - "cudaD3D10ResourceGetMappedSize", - "cudaD3D10ResourceGetMappedPitch", - "cudaD3D9GetDevice", - "cudaD3D9GetDevices", - "cudaD3D9SetDirect3DDevice", - "cudaD3D9GetDirect3DDevice", - "cudaGraphicsD3D9RegisterResource", - "cudaD3D9RegisterResource", - "cudaD3D9UnregisterResource", - "cudaD3D9MapResources", - "cudaD3D9UnmapResources", - "cudaD3D9ResourceSetMapFlags", - "cudaD3D9ResourceGetSurfaceDimensions", - "cudaD3D9ResourceGetMappedArray", - "cudaD3D9ResourceGetMappedPointer", - "cudaD3D9ResourceGetMappedSize", - "cudaD3D9ResourceGetMappedPitch", - "cudaD3D9Begin", - "cudaD3D9End", - "cudaD3D9RegisterVertexBuffer", - "cudaD3D9UnregisterVertexBuffer", - "cudaD3D9MapVertexBuffer", - "cudaD3D9UnmapVertexBuffer", - "cudaThreadExit", - "cudaSetDoubleForDevice", - "cudaSetDoubleForHost", - "cudaThreadSynchronize", - "cudaThreadGetLimit", - "cudaThreadSetLimit", - "cudaStreamCreate", - "cudaStreamDestroy", - "cudaStreamSynchronize", - "cudaStreamQuery", - "cudaEventCreate", - "cudaEventCreateWithFlags", - "cudaEventRecord", - "cudaEventDestroy", - "cudaEventSynchronize", - "cudaEventQuery", - "cudaEventElapsedTime", - "cudaMalloc3D", - "cudaMalloc3DArray", - "cudaMemset3D", - "cudaMemset3DAsync", - "cudaMemcpy3D", - "cudaMemcpy3DAsync", - "cudaThreadSetCacheConfig", - "cudaStreamWaitEvent", - "cudaD3D11GetDirect3DDevice", - "cudaD3D10GetDirect3DDevice", - "cudaThreadGetCacheConfig", - "cudaPointerGetAttributes", - "cudaHostRegister", - "cudaHostUnregister", - "cudaDeviceCanAccessPeer", - "cudaDeviceEnablePeerAccess", - "cudaDeviceDisablePeerAccess", - "cudaPeerRegister", - "cudaPeerUnregister", - "cudaPeerGetDevicePointer", - "cudaMemcpyPeer", - "cudaMemcpyPeerAsync", - "cudaMemcpy3DPeer", - "cudaMemcpy3DPeerAsync", - "cudaDeviceReset", - "cudaDeviceSynchronize", - "cudaDeviceGetLimit", - "cudaDeviceSetLimit", - "cudaDeviceGetCacheConfig", - "cudaDeviceSetCacheConfig", - "cudaProfilerInitialize", - "cudaProfilerStart", - "cudaProfilerStop", - "cudaDeviceGetByPCIBusId", - "cudaDeviceGetPCIBusId", - "cudaGLGetDevices", - "cudaIpcGetEventHandle", - "cudaIpcOpenEventHandle", - "cudaIpcGetMemHandle", - "cudaIpcOpenMemHandle", - "cudaIpcCloseMemHandle", - "cudaArrayGetInfo", - "cudaFuncSetSharedMemConfig", - "cudaDeviceGetSharedMemConfig", - "cudaDeviceSetSharedMemConfig", - "cudaCreateTextureObject", - "cudaDestroyTextureObject", - "cudaGetTextureObjectResourceDesc", - "cudaGetTextureObjectTextureDesc", - "cudaCreateSurfaceObject", - "cudaDestroySurfaceObject", - "cudaGetSurfaceObjectResourceDesc", - "cudaMallocMipmappedArray", - "cudaGetMipmappedArrayLevel", - "cudaFreeMipmappedArray", - "cudaBindTextureToMipmappedArray", - "cudaGraphicsResourceGetMappedMipmappedArray", - "cudaStreamAddCallback", - "cudaStreamCreateWithFlags", - "cudaGetTextureObjectResourceViewDesc", - "cudaDeviceGetAttribute", - "cudaStreamDestroy", - "cudaStreamCreateWithPriority", - "cudaStreamGetPriority", - "cudaStreamGetFlags", - "cudaDeviceGetStreamPriorityRange", - "cudaMallocManaged", - "cudaOccupancyMaxActiveBlocksPerMultiprocessor", - "cudaStreamAttachMemAsync", - "cudaGetErrorName", - "cudaOccupancyMaxActiveBlocksPerMultiprocessor", - "cudaLaunchKernel", - "cudaGetDeviceFlags", - "cudaLaunch_ptsz", - "cudaLaunchKernel_ptsz", - "cudaMemcpy_ptds", - "cudaMemcpy2D_ptds", - "cudaMemcpyToArray_ptds", - "cudaMemcpy2DToArray_ptds", - "cudaMemcpyFromArray_ptds", - "cudaMemcpy2DFromArray_ptds", - "cudaMemcpyArrayToArray_ptds", - "cudaMemcpy2DArrayToArray_ptds", - "cudaMemcpyToSymbol_ptds", - "cudaMemcpyFromSymbol_ptds", - "cudaMemcpyAsync_ptsz", - "cudaMemcpyToArrayAsync_ptsz", - "cudaMemcpyFromArrayAsync_ptsz", - "cudaMemcpy2DAsync_ptsz", - "cudaMemcpy2DToArrayAsync_ptsz", - "cudaMemcpy2DFromArrayAsync_ptsz", - "cudaMemcpyToSymbolAsync_ptsz", - "cudaMemcpyFromSymbolAsync_ptsz", - "cudaMemset_ptds", - "cudaMemset2D_ptds", - "cudaMemsetAsync_ptsz", - "cudaMemset2DAsync_ptsz", - "cudaStreamGetPriority_ptsz", - "cudaStreamGetFlags_ptsz", - "cudaStreamSynchronize_ptsz", - "cudaStreamQuery_ptsz", - "cudaStreamAttachMemAsync_ptsz", - "cudaEventRecord_ptsz", - "cudaMemset3D_ptds", - "cudaMemset3DAsync_ptsz", - "cudaMemcpy3D_ptds", - "cudaMemcpy3DAsync_ptsz", - "cudaStreamWaitEvent_ptsz", - "cudaStreamAddCallback_ptsz", - "cudaMemcpy3DPeer_ptds", - "cudaMemcpy3DPeerAsync_ptsz", - "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", - "cudaMemPrefetchAsync", - "cudaMemPrefetchAsync_ptsz", - "cudaMemAdvise", - "cudaDeviceGetP2PAttribute", - "cudaGraphicsEGLRegisterImage", - "cudaEGLStreamConsumerConnect", - "cudaEGLStreamConsumerDisconnect", - "cudaEGLStreamConsumerAcquireFrame", - "cudaEGLStreamConsumerReleaseFrame", - "cudaEGLStreamProducerConnect", - "cudaEGLStreamProducerDisconnect", - "cudaEGLStreamProducerPresentFrame", - "cudaEGLStreamProducerReturnFrame", - "cudaGraphicsResourceGetMappedEglFrame", - "cudaMemRangeGetAttribute", - "cudaMemRangeGetAttributes", - "cudaEGLStreamConsumerConnectWithFlags", - "cudaLaunchCooperativeKernel", - "cudaLaunchCooperativeKernel_ptsz", - "cudaEventCreateFromEGLSync", - "cudaLaunchCooperativeKernelMultiDevice", - "cudaFuncSetAttribute", - "cudaImportExternalMemory", - "cudaExternalMemoryGetMappedBuffer", - "cudaExternalMemoryGetMappedMipmappedArray", - "cudaDestroyExternalMemory", - "cudaImportExternalSemaphore", - "cudaSignalExternalSemaphoresAsync", - "cudaSignalExternalSemaphoresAsync_ptsz", - "cudaWaitExternalSemaphoresAsync", - "cudaWaitExternalSemaphoresAsync_ptsz", - "cudaDestroyExternalSemaphore", - "cudaLaunchHostFunc", - "cudaLaunchHostFunc_ptsz", - "cudaGraphCreate", - "cudaGraphKernelNodeGetParams", - "cudaGraphKernelNodeSetParams", - "cudaGraphAddKernelNode", - "cudaGraphAddMemcpyNode", - "cudaGraphMemcpyNodeGetParams", - "cudaGraphMemcpyNodeSetParams", - "cudaGraphAddMemsetNode", - "cudaGraphMemsetNodeGetParams", - "cudaGraphMemsetNodeSetParams", - "cudaGraphAddHostNode", - "cudaGraphHostNodeGetParams", - "cudaGraphAddChildGraphNode", - "cudaGraphChildGraphNodeGetGraph", - "cudaGraphAddEmptyNode", - "cudaGraphClone", - "cudaGraphNodeFindInClone", - "cudaGraphNodeGetType", - "cudaGraphGetRootNodes", - "cudaGraphNodeGetDependencies", - "cudaGraphNodeGetDependentNodes", - "cudaGraphAddDependencies", - "cudaGraphRemoveDependencies", - "cudaGraphDestroyNode", - "cudaGraphInstantiate", - "cudaGraphLaunch", - "cudaGraphLaunch_ptsz", - "cudaGraphExecDestroy", - "cudaGraphDestroy", - "cudaStreamBeginCapture", - "cudaStreamBeginCapture_ptsz", - "cudaStreamIsCapturing", - "cudaStreamIsCapturing_ptsz", - "cudaStreamEndCapture", - "cudaStreamEndCapture_ptsz", - "cudaGraphHostNodeSetParams", - "cudaGraphGetNodes", - "cudaGraphGetEdges", - "cudaStreamGetCaptureInfo", - "cudaStreamGetCaptureInfo_ptsz", - "cudaGraphExecKernelNodeSetParams", - "cudaThreadExchangeStreamCaptureMode", - "cudaDeviceGetNvSciSyncAttributes", - "cudaOccupancyAvailableDynamicSMemPerBlock", - "cudaStreamSetFlags", - "cudaStreamSetFlags_ptsz", - "cudaGraphExecMemcpyNodeSetParams", - "cudaGraphExecMemsetNodeSetParams", - "cudaGraphExecHostNodeSetParams", - "cudaGraphExecUpdate", - "cudaGetFuncBySymbol", - "cudaCtxResetPersistingL2Cache", - "cudaGraphKernelNodeCopyAttributes", - "cudaGraphKernelNodeGetAttribute", - "cudaGraphKernelNodeSetAttribute", - "cudaStreamCopyAttributes", - "cudaStreamCopyAttributes_ptsz", - "cudaStreamGetAttribute", - "cudaStreamGetAttribute_ptsz", - "cudaStreamSetAttribute", - "cudaStreamSetAttribute_ptsz", - "cudaDeviceGetTexture1DLinearMaxWidth", - "cudaGraphUpload", - "cudaGraphUpload_ptsz", - "cudaGraphAddMemcpyNodeToSymbol", - "cudaGraphAddMemcpyNodeFromSymbol", - "cudaGraphAddMemcpyNode1D", - "cudaGraphMemcpyNodeSetParamsToSymbol", - "cudaGraphMemcpyNodeSetParamsFromSymbol", - "cudaGraphMemcpyNodeSetParams1D", - "cudaGraphExecMemcpyNodeSetParamsToSymbol", - "cudaGraphExecMemcpyNodeSetParamsFromSymbol", - "cudaGraphExecMemcpyNodeSetParams1D", - "cudaArrayGetSparseProperties", - "cudaMipmappedArrayGetSparseProperties", - "cudaGraphExecChildGraphNodeSetParams", - "cudaGraphAddEventRecordNode", - "cudaGraphEventRecordNodeGetEvent", - "cudaGraphEventRecordNodeSetEvent", - "cudaGraphAddEventWaitNode", - "cudaGraphEventWaitNodeGetEvent", - "cudaGraphEventWaitNodeSetEvent", - "cudaGraphExecEventRecordNodeSetEvent", - "cudaGraphExecEventWaitNodeSetEvent", - "cudaEventRecordWithFlags", - "cudaEventRecordWithFlags_ptsz", - "cudaDeviceGetDefaultMemPool", - "cudaMallocAsync", - "cudaMallocAsync_ptsz", - "cudaFreeAsync", - "cudaFreeAsync_ptsz", - "cudaMemPoolTrimTo", - "cudaMemPoolSetAttribute", - "cudaMemPoolGetAttribute", - "cudaMemPoolSetAccess", - "cudaArrayGetPlane", - "cudaMemPoolGetAccess", - "cudaMemPoolCreate", - "cudaMemPoolDestroy", - "cudaDeviceSetMemPool", - "cudaDeviceGetMemPool", - "cudaMemPoolExportToShareableHandle", - "cudaMemPoolImportFromShareableHandle", - "cudaMemPoolExportPointer", - "cudaMemPoolImportPointer", - "cudaMallocFromPoolAsync", - "cudaMallocFromPoolAsync_ptsz", - "cudaSignalExternalSemaphoresAsync", - "cudaSignalExternalSemaphoresAsync", - "cudaWaitExternalSemaphoresAsync", - "cudaWaitExternalSemaphoresAsync", - "cudaGraphAddExternalSemaphoresSignalNode", - "cudaGraphExternalSemaphoresSignalNodeGetParams", - "cudaGraphExternalSemaphoresSignalNodeSetParams", - "cudaGraphAddExternalSemaphoresWaitNode", - "cudaGraphExternalSemaphoresWaitNodeGetParams", - "cudaGraphExternalSemaphoresWaitNodeSetParams", - "cudaGraphExecExternalSemaphoresSignalNodeSetParams", - "cudaGraphExecExternalSemaphoresWaitNodeSetParams", - "SIZE" -}; - -const char* runtimeCbidName(CUpti_CallbackId cbid) { - constexpr int names_size = - sizeof(runtimeCbidNames) / sizeof(runtimeCbidNames[0]); - if (cbid < 0 || cbid >= names_size) { - return runtimeCbidNames[CUPTI_RUNTIME_TRACE_CBID_INVALID]; - } - return runtimeCbidNames[cbid]; -} - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/cupti_strings.h b/plugins/tensorboard-plugins/libkineto/src/cupti_strings.h deleted file mode 100644 index bbfebb98364..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/cupti_strings.h +++ /dev/null @@ -1,14 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include - -namespace libkineto { - -const char* memoryKindString(CUpti_ActivityMemoryKind kind); -const char* memcpyKindString(CUpti_ActivityMemcpyKind kind); -const char* runtimeCbidName(CUpti_CallbackId cbid); -const char* overheadKindString(CUpti_ActivityOverheadKind kind); - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/init.cpp b/plugins/tensorboard-plugins/libkineto/src/init.cpp deleted file mode 100644 index 4e1022485ac..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/init.cpp +++ /dev/null @@ -1,139 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include - -#include "ActivityProfilerProxy.h" -#include "Config.h" -#ifdef HAS_CUPTI -#include "CuptiCallbackApi.h" -#include "CuptiActivityApi.h" -#include "EventProfilerController.h" -#endif -#include "cupti_call.h" -#include "libkineto.h" - -#include "Logger.h" - -namespace KINETO_NAMESPACE { - -#ifdef HAS_CUPTI -static bool initialized = false; -static std::mutex initMutex; - -static void initProfilers( - CUpti_CallbackDomain /*domain*/, - CUpti_CallbackId /*cbid*/, - const CUpti_CallbackData* cbInfo) { - CUpti_ResourceData* d = (CUpti_ResourceData*)cbInfo; - CUcontext ctx = d->context; - - VLOG(0) << "CUDA Context created"; - std::lock_guard lock(initMutex); - - if (!initialized) { - libkineto::api().initProfilerIfRegistered(); - initialized = true; - VLOG(0) << "libkineto profilers activated"; - } - if (getenv("KINETO_DISABLE_EVENT_PROFILER") != nullptr) { - VLOG(0) << "Event profiler disabled via env var"; - } else { - ConfigLoader& config_loader = libkineto::api().configLoader(); - config_loader.initBaseConfig(); - EventProfilerController::start(ctx, config_loader); - } -} - -// Some models suffer from excessive instrumentation code gen -// on dynamic attach which can hang for more than 5+ seconds. -// If the workload was meant to be traced, preload the CUPTI -// to take the performance hit early on. -// https://docs.nvidia.com/cupti/r_main.html#r_overhead -static bool shouldPreloadCuptiInstrumentation() { - return getenv("PRELOAD_CUPTI_INSTRUMENTATION"); -} - -static void stopProfiler( - CUpti_CallbackDomain /*domain*/, - CUpti_CallbackId /*cbid*/, - const CUpti_CallbackData* cbInfo) { - CUpti_ResourceData* d = (CUpti_ResourceData*)cbInfo; - CUcontext ctx = d->context; - - LOG(INFO) << "CUDA Context destroyed"; - std::lock_guard lock(initMutex); - EventProfilerController::stop(ctx); -} -#endif // HAS_CUPTI - -} // namespace KINETO_NAMESPACE - -// Callback interface with CUPTI and library constructors -using namespace KINETO_NAMESPACE; -extern "C" { - -// Return true if no CUPTI errors occurred during init -bool libkineto_init(bool cpuOnly, bool logOnError) { - bool success = true; -#ifdef HAS_CUPTI - if (!cpuOnly) { - // libcupti will be lazily loaded on this call. - // If it is not available (e.g. CUDA is not installed), - // then this call will return an error and we just abort init. - auto& cbapi = CuptiCallbackApi::singleton(); - bool status = false; - - if (cbapi.initSuccess()){ - const CUpti_CallbackDomain domain = CUPTI_CB_DOMAIN_RESOURCE; - status = cbapi.registerCallback( - domain, CuptiCallbackApi::RESOURCE_CONTEXT_CREATED, initProfilers); - status = status && cbapi.registerCallback( - domain, CuptiCallbackApi::RESOURCE_CONTEXT_DESTROYED, stopProfiler); - - if (status) { - status = cbapi.enableCallback( - domain, CuptiCallbackApi::RESOURCE_CONTEXT_CREATED); - status = status && cbapi.enableCallback( - domain, CuptiCallbackApi::RESOURCE_CONTEXT_DESTROYED); - } - } - - if (!cbapi.initSuccess() || !status) { - success = false; - cpuOnly = true; - if (logOnError) { - CUPTI_CALL(cbapi.getCuptiStatus()); - LOG(WARNING) << "CUPTI initialization failed - " - << "CUDA profiler activities will be missing"; - LOG(INFO) << "If you see CUPTI_ERROR_INSUFFICIENT_PRIVILEGES, refer to " - << "https://developer.nvidia.com/nvidia-development-tools-solutions-err-nvgpuctrperm-cupti"; - } - } - } - - if (shouldPreloadCuptiInstrumentation()) { - CuptiActivityApi::forceLoadCupti(); - } -#endif // HAS_CUPTI - - ConfigLoader& config_loader = libkineto::api().configLoader(); - libkineto::api().registerProfiler( - std::make_unique(cpuOnly, config_loader)); - - return success; -} - -// The cuda driver calls this function if the CUDA_INJECTION64_PATH environment -// variable is set -int InitializeInjection(void) { - LOG(INFO) << "Injection mode: Initializing libkineto"; - libkineto_init(false /*cpuOnly*/, true /*logOnError*/); - return 1; -} - -void suppressLibkinetoLogMessages() { - SET_LOG_SEVERITY_LEVEL(ERROR); -} - -} // extern C diff --git a/plugins/tensorboard-plugins/libkineto/src/libkineto_api.cpp b/plugins/tensorboard-plugins/libkineto/src/libkineto_api.cpp deleted file mode 100644 index 9a622e4f5e5..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/libkineto_api.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "libkineto.h" - -#include "ConfigLoader.h" -#include "ThreadUtil.h" - -namespace libkineto { - -LibkinetoApi& api() { - static LibkinetoApi instance(ConfigLoader::instance()); - return instance; -} - -void LibkinetoApi::initClientIfRegistered() { - if (client_) { - if (clientRegisterThread_ != threadId()) { - fprintf( - stderr, - "ERROR: External init callback must run in same thread as registerClient " - "(%d != %d)\n", - threadId(), - (int)clientRegisterThread_); - } else { - client_->init(); - } - } -} - -void LibkinetoApi::registerClient(ClientInterface* client) { - client_ = client; - if (client && activityProfiler_) { - // Can initialize straight away - client->init(); - } - // Assume here that the external init callback is *not* threadsafe - // and only call it if it's the same thread that called registerClient - clientRegisterThread_ = threadId(); -} - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/src/output_base.h b/plugins/tensorboard-plugins/libkineto/src/output_base.h deleted file mode 100644 index 29d0d57768c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/output_base.h +++ /dev/null @@ -1,104 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include - -#ifdef HAS_CUPTI -#include -#include "CuptiActivity.h" -#endif // HAS_CUPTI -#include "ActivityBuffers.h" -#include "GenericTraceActivity.h" -#include "ThreadUtil.h" -#include "TraceSpan.h" - -namespace KINETO_NAMESPACE { - class Config; - class GpuKernelActivity; - struct RuntimeActivity; -} - -namespace libkineto { - -using namespace KINETO_NAMESPACE; - -class ActivityLogger { - public: - - virtual ~ActivityLogger() = default; - - struct DeviceInfo { - DeviceInfo(int64_t id, const std::string& name, const std::string& label) : - id(id), name(name), label(label) {} - int64_t id; - const std::string name; - const std::string label; - }; - - struct ResourceInfo { - ResourceInfo( - int64_t deviceId, - int64_t id, - int64_t sortIndex, - const std::string& name) : - id(id), sortIndex(sortIndex), deviceId(deviceId), name(name) {} - int64_t id; - int64_t sortIndex; - int64_t deviceId; - const std::string name; - }; - - struct OverheadInfo { - explicit OverheadInfo(const std::string& name) : name(name) {} - const std::string name; - }; - - virtual void handleDeviceInfo( - const DeviceInfo& info, - uint64_t time) = 0; - - virtual void handleResourceInfo(const ResourceInfo& info, int64_t time) = 0; - - virtual void handleOverheadInfo(const OverheadInfo& info, int64_t time) = 0; - - virtual void handleTraceSpan(const TraceSpan& span) = 0; - - virtual void handleActivity( - const libkineto::ITraceActivity& activity) = 0; - virtual void handleGenericActivity( - const libkineto::GenericTraceActivity& activity) = 0; - -#ifdef HAS_CUPTI - virtual void handleGpuActivity( - const GpuActivity& activity) = 0; - virtual void handleGpuActivity( - const GpuActivity& activity) = 0; - virtual void handleGpuActivity( - const GpuActivity& activity) = 0; - virtual void handleGpuActivity( - const GpuActivity& activity) = 0; -#endif // HAS_CUPTI - - virtual void handleTraceStart( - const std::unordered_map& metadata) = 0; - - void handleTraceStart() { - handleTraceStart(std::unordered_map()); - } - - virtual void finalizeTrace( - const KINETO_NAMESPACE::Config& config, - std::unique_ptr buffers, - int64_t endTime, - std::unordered_map>& metadata) = 0; - - protected: - ActivityLogger() = default; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/output_csv.cpp b/plugins/tensorboard-plugins/libkineto/src/output_csv.cpp deleted file mode 100644 index e56c0229398..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/output_csv.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "output_csv.h" - -#include -#include -#include - -#include -#include - -#include "Config.h" -#include "Logger.h" - -namespace KINETO_NAMESPACE { - -static void write_header( - std::ostream& out, - const std::vector& percentiles) { - out << "timestamp,delta_ms,device,event_name"; - for (int p : percentiles) { - out << ",p" << p; - } - out << ",total" << std::endl; -} - -void EventCSVLogger::update(const Config& config) { - eventNames_.clear(); - eventNames_.insert(config.eventNames().begin(), config.eventNames().end()); - eventNames_.insert(config.metricNames().begin(), config.metricNames().end()); - if (config.percentiles() != percentiles_) { - percentiles_ = config.percentiles(); - if (out_) { - write_header(*out_, percentiles_); - } - } -} - -void EventCSVLogger::handleSample(int device, const Sample& sample, bool from_new_version) { - using namespace std::chrono; - if (out_) { - auto now = system_clock::now(); - auto time = system_clock::to_time_t(now); - for (const Stat& s : sample.stats) { - if (eventNames_.find(s.name) == eventNames_.end()) { - continue; - } - *out_ << fmt::format("{:%Y-%m-%d %H:%M:%S}", fmt::localtime(time)) << ","; - *out_ << sample.deltaMsec << ","; - *out_ << device << ","; - *out_ << s.name; - for (const auto& p : s.percentileValues) { - *out_ << "," << p.second; - } - *out_ << "," << s.total << std::endl; - } - } -} - -void EventCSVFileLogger::update(const Config& config) { - if (config.eventLogFile() != filename_) { - if (of_.is_open()) { - of_.close(); - out_ = nullptr; - percentiles_.clear(); - } - filename_ = config.eventLogFile(); - if (!filename_.empty()) { - of_.open(filename_, std::ios::out | std::ios::trunc); - out_ = &of_; - } - } - EventCSVLogger::update(config); -} - -void EventCSVDbgLogger::update(const Config& config) { - if (out_ && config.verboseLogLevel() < 0) { - out_ = nullptr; - } else if (!out_ && config.verboseLogLevel() >= 0) { - out_ = &LIBKINETO_DBG_STREAM; - } - if (config.verboseLogLevel() >= 0) { - percentiles_.clear(); - EventCSVLogger::update(config); - } -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/output_csv.h b/plugins/tensorboard-plugins/libkineto/src/output_csv.h deleted file mode 100644 index bca29f4db99..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/output_csv.h +++ /dev/null @@ -1,39 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once -#include "SampleListener.h" - -#include -#include -#include - -namespace KINETO_NAMESPACE { - -class EventCSVLogger : public SampleListener { - public: - void update(const Config& config) override; - void handleSample(int device, const Sample& sample, bool from_new_version) override; - - protected: - EventCSVLogger() : out_(nullptr) {} - - std::ostream* out_; - std::set eventNames_; - std::vector percentiles_; -}; - -class EventCSVFileLogger : public EventCSVLogger { - public: - void update(const Config& config) override; - - private: - std::ofstream of_; - std::string filename_; -}; - -class EventCSVDbgLogger : public EventCSVLogger { - public: - void update(const Config& config) override; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/output_json.cpp b/plugins/tensorboard-plugins/libkineto/src/output_json.cpp deleted file mode 100644 index 0ef22339fad..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/output_json.cpp +++ /dev/null @@ -1,583 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "output_json.h" - -#include -#include -#include -#include - -#include "Config.h" -#ifdef HAS_CUPTI -#include "CuptiActivity.h" -#include "CuptiActivity.tpp" -#include "CuptiActivityApi.h" -#include "CudaDeviceProperties.h" -#endif // HAS_CUPTI -#include "Demangle.h" -#include "TraceSpan.h" - -#include "Logger.h" - -using std::endl; -using namespace libkineto; - -namespace KINETO_NAMESPACE { - -static constexpr int kSchemaVersion = 1; -static constexpr char kFlowStart = 's'; -static constexpr char kFlowEnd = 'f'; - -#ifdef __linux__ -static constexpr char kDefaultLogFileFmt[] = - "/tmp/libkineto_activities_{}.json"; -#else -static constexpr char kDefaultLogFileFmt[] = "libkineto_activities_{}.json"; -#endif - -std::string& ChromeTraceLogger::sanitizeStrForJSON(std::string& value) { -// Replace all backslashes with forward slash because Windows paths causing JSONDecodeError. -#ifdef _WIN32 - std::replace(value.begin(), value.end(), '\\', '/'); -#endif - return value; -} - -void ChromeTraceLogger::metadataToJSON( - const std::unordered_map& metadata) { - for (const auto& kv : metadata) { - traceOf_ << fmt::format(R"JSON( - "{}": {},)JSON", kv.first, kv.second); - } -} - -void ChromeTraceLogger::handleTraceStart( - const std::unordered_map& metadata) { - traceOf_ << fmt::format(R"JSON( -{{ - "schemaVersion": {},)JSON", kSchemaVersion); - -#ifdef HAS_CUPTI - traceOf_ << fmt::format(R"JSON( - "deviceProperties": [{} - ],)JSON", devicePropertiesJson()); -#endif - - metadataToJSON(metadata); - traceOf_ << R"JSON( - "traceEvents": [)JSON"; -} - -static std::string defaultFileName() { - return fmt::format(kDefaultLogFileFmt, processId()); -} - -void ChromeTraceLogger::openTraceFile() { - traceOf_.open(fileName_, std::ofstream::out | std::ofstream::trunc); - if (!traceOf_) { - PLOG(ERROR) << "Failed to open '" << fileName_ << "'"; - } else { - LOG(INFO) << "Tracing to " << fileName_; - } -} - -ChromeTraceLogger::ChromeTraceLogger(const std::string& traceFileName) { - fileName_ = traceFileName.empty() ? defaultFileName() : traceFileName; - traceOf_.clear(std::ios_base::badbit); - openTraceFile(); -} - -static int64_t us(int64_t timestamp) { - // It's important that this conversion is the same here and in the CPU trace. - // No rounding! - return timestamp / 1000; -} - -void ChromeTraceLogger::handleDeviceInfo( - const DeviceInfo& info, - uint64_t time) { - if (!traceOf_) { - return; - } - - // M is for metadata - // process_name needs a pid and a name arg - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "name": "process_name", "ph": "M", "ts": {}, "pid": {}, "tid": 0, - "args": {{ - "name": "{}" - }} - }}, - {{ - "name": "process_labels", "ph": "M", "ts": {}, "pid": {}, "tid": 0, - "args": {{ - "labels": "{}" - }} - }}, - {{ - "name": "process_sort_index", "ph": "M", "ts": {}, "pid": {}, "tid": 0, - "args": {{ - "sort_index": {} - }} - }},)JSON", - time, info.id, - info.name, - time, info.id, - info.label, - time, info.id, - info.id < 8 ? info.id + 0x1000000ll : info.id); - // clang-format on -} - -void ChromeTraceLogger::handleResourceInfo( - const ResourceInfo& info, - int64_t time) { - if (!traceOf_) { - return; - } - - // M is for metadata - // thread_name needs a pid and a name arg - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "name": "thread_name", "ph": "M", "ts": {}, "pid": {}, "tid": {}, - "args": {{ - "name": "{}" - }} - }}, - {{ - "name": "thread_sort_index", "ph": "M", "ts": {}, "pid": {}, "tid": {}, - "args": {{ - "sort_index": {} - }} - }},)JSON", - time, info.deviceId, info.id, - info.name, - time, info.deviceId, info.id, - info.sortIndex); - // clang-format on -} - -void ChromeTraceLogger::handleOverheadInfo( - const OverheadInfo& info, - int64_t time) { - if (!traceOf_) { - return; - } - - // TOOD: reserve pid = -1 for overhead but we need to rethink how to scale this for - // other metadata - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "name": "process_name", "ph": "M", "ts": {}, "pid": -1, "tid": 0, - "args": {{ - "name": "{}" - }} - }}, - {{ - "name": "process_sort_index", "ph": "M", "ts": {}, "pid": -1, "tid": 0, - "args": {{ - "sort_index": {} - }} - }},)JSON", - time, - info.name, - time, - 0x100000All); - // clang-format on -} - -void ChromeTraceLogger::handleTraceSpan(const TraceSpan& span) { - if (!traceOf_) { - return; - } - - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "X", "cat": "Trace", "ts": {}, "dur": {}, - "pid": "Spans", "tid": "{}", - "name": "{}{} ({})", - "args": {{ - "Op count": {} - }} - }}, - {{ - "name": "process_sort_index", "ph": "M", "ts": {}, - "pid": "Spans", "tid": 0, - "args": {{ - "sort_index": {} - }} - }},)JSON", - span.startTime, span.endTime - span.startTime, - span.name, - span.prefix, span.name, span.iteration, - span.opCount, - span.startTime, - // Large sort index to appear at the bottom - 0x20000000ll); - // clang-format on - - addIterationMarker(span); -} - -void ChromeTraceLogger::addIterationMarker(const TraceSpan& span) { - if (!traceOf_) { - return; - } - - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "name": "Iteration Start: {}", "ph": "i", "s": "g", - "pid": "Traces", "tid": "Trace {}", "ts": {} - }},)JSON", - span.name, - span.name, span.startTime); - // clang-format on -} - -static std::string traceActivityJson(const ITraceActivity& activity) { - // clang-format off - int64_t ts = activity.timestamp(); - int64_t duration = activity.duration(); - if (activity.type() == ActivityType::GPU_USER_ANNOTATION) { - // The GPU user annotations start at the same time as the - // first associated GPU activity. Since they appear later - // in the trace file, this causes a visualization issue in Chrome. - // Make it start one us earlier. - ts--; - duration++; // Still need it to end at the orginal point - } - return fmt::format(R"JSON( - "name": "{}", "pid": {}, "tid": {}, - "ts": {}, "dur": {})JSON", - activity.name(), activity.deviceId(), activity.resourceId(), - ts, duration); - // clang-format on -} - -void ChromeTraceLogger::handleGenericInstantEvent( - const libkineto::ITraceActivity& op) { - if (!traceOf_) { - return; - } - - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "i", "s": "t", "name": "{}", - "pid": {}, "tid": {}, - "ts": {}, - "args": {{ - {} - }} - }},)JSON", - op.name(), op.deviceId(), op.resourceId(), - op.timestamp(), op.metadataJson()); -} - -void ChromeTraceLogger::handleActivity( - const libkineto::ITraceActivity& op) { - if (!traceOf_) { - return; - } - - if (op.type() == ActivityType::CPU_INSTANT_EVENT) { - handleGenericInstantEvent(op); - return; - } - - const std::string op_metadata = op.metadataJson(); - std::string separator = ""; - if (op_metadata.find_first_not_of(" \t\n") != std::string::npos) { - separator = ",\n "; - } - std::string span = ""; - if (op.traceSpan()) { - span = fmt::format(R"JSON( - "Trace name": "{}", "Trace iteration": {},)JSON", - op.traceSpan()->name, - op.traceSpan()->iteration); - } - - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "X", "cat": "{}", {}, - "args": {{{} - "External id": {}{}{} - }} - }},)JSON", - toString(op.type()), traceActivityJson(op), - // args - span, - op.correlationId(), separator, op_metadata); - // clang-format on - if (op.flowId() > 0) { - handleGenericLink(op); - } -} - -void ChromeTraceLogger::handleGenericActivity( - const libkineto::GenericTraceActivity& op) { - handleActivity(op); -} - -void ChromeTraceLogger::handleGenericLink(const ITraceActivity& act) { - static struct { - int type; - char longName[24]; - char shortName[16]; - } flow_names[] = { - {kLinkFwdBwd, "forward_backward", "fwd_bwd"}, - {kLinkAsyncCpuGpu, "async_cpu_to_gpu", "async_gpu"} - }; - for (auto& flow : flow_names) { - if (act.flowType() == flow.type) { - // Link the activities via flow ID in source and destination. - // The source node must return true from flowStart() - // and the destination node false. - if (act.flowStart()) { - handleLink(kFlowStart, act, act.flowId(), flow.longName, flow.shortName); - } else { - handleLink(kFlowEnd, act, act.flowId(), flow.longName, flow.shortName); - } - return; - } - } - LOG(ERROR) << "Unknown flow type: " << act.flowType(); -} - -void ChromeTraceLogger::handleLink( - char type, - const ITraceActivity& e, - int64_t id, - const std::string& cat, - const std::string& name) { - if (!traceOf_) { - return; - } - - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "{}", "id": {}, "pid": {}, "tid": {}, "ts": {}, - "cat": "{}", "name": "{}", "bp": "e" - }},)JSON", - type, id, e.deviceId(), e.resourceId(), e.timestamp(), cat, name); - // clang-format on -} - -#ifdef HAS_CUPTI -// GPU side kernel activity -void ChromeTraceLogger::handleGpuActivity( - const GpuActivity& activity) { - if (!traceOf_) { - return; - } - const CUpti_ActivityKernel4* kernel = &activity.raw(); - constexpr int threads_per_warp = 32; - float blocks_per_sm = -1.0; - float warps_per_sm = -1.0; - int sm_count = smCount(kernel->deviceId); - if (sm_count) { - blocks_per_sm = - (kernel->gridX * kernel->gridY * kernel->gridZ) / (float) sm_count; - warps_per_sm = - blocks_per_sm * (kernel->blockX * kernel->blockY * kernel->blockZ) - / threads_per_warp; - } - - // Calculate occupancy - float occupancy = KINETO_NAMESPACE::kernelOccupancy( - kernel->deviceId, - kernel->registersPerThread, - kernel->staticSharedMemory, - kernel->dynamicSharedMemory, - kernel->blockX, - kernel->blockY, - kernel->blockZ, - blocks_per_sm); - - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "X", "cat": "Kernel", {}, - "args": {{ - "queued": {}, "device": {}, "context": {}, - "stream": {}, "correlation": {}, - "registers per thread": {}, - "shared memory": {}, - "blocks per SM": {}, - "warps per SM": {}, - "grid": [{}, {}, {}], - "block": [{}, {}, {}], - "est. achieved occupancy %": {} - }} - }},)JSON", - traceActivityJson(activity), - // args - us(kernel->queued), kernel->deviceId, kernel->contextId, - kernel->streamId, kernel->correlationId, - kernel->registersPerThread, - kernel->staticSharedMemory + kernel->dynamicSharedMemory, - blocks_per_sm, - warps_per_sm, - kernel->gridX, kernel->gridY, kernel->gridZ, - kernel->blockX, kernel->blockY, kernel->blockZ, - (int) (0.5 + occupancy * 100.0)); - // clang-format on - - auto to_id = activity.correlationId(); - handleLink(kFlowEnd, activity, to_id, "async_cpu_to_gpu", "async_gpu"); -} - -static std::string bandwidth(uint64_t bytes, uint64_t duration) { - return duration == 0 ? "\"N/A\"" : fmt::format("{}", bytes * 1.0 / duration); -} - -// GPU side memcpy activity -void ChromeTraceLogger::handleGpuActivity( - const GpuActivity& activity) { - if (!traceOf_) { - return; - } - const CUpti_ActivityMemcpy& memcpy = activity.raw(); - VLOG(2) << memcpy.correlationId << ": MEMCPY"; - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "X", "cat": "Memcpy", {}, - "args": {{ - "device": {}, "context": {}, - "stream": {}, "correlation": {}, - "bytes": {}, "memory bandwidth (GB/s)": {} - }} - }},)JSON", - traceActivityJson(activity), - // args - memcpy.deviceId, memcpy.contextId, - memcpy.streamId, memcpy.correlationId, - memcpy.bytes, bandwidth(memcpy.bytes, memcpy.end - memcpy.start)); - // clang-format on - - int64_t to_id = activity.correlationId(); - handleLink(kFlowEnd, activity, to_id, "async_cpu_to_gpu", "async_gpu"); -} - -// GPU side memcpy activity -void ChromeTraceLogger::handleGpuActivity( - const GpuActivity& activity) { - if (!traceOf_) { - return; - } - const CUpti_ActivityMemcpy2& memcpy = activity.raw(); - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "X", "cat": "Memcpy", {}, - "args": {{ - "fromDevice": {}, "inDevice": {}, "toDevice": {}, - "fromContext": {}, "inContext": {}, "toContext": {}, - "stream": {}, "correlation": {}, - "bytes": {}, "memory bandwidth (GB/s)": {} - }} - }},)JSON", - traceActivityJson(activity), - // args - memcpy.srcDeviceId, memcpy.deviceId, memcpy.dstDeviceId, - memcpy.srcContextId, memcpy.contextId, memcpy.dstContextId, - memcpy.streamId, memcpy.correlationId, - memcpy.bytes, bandwidth(memcpy.bytes, memcpy.end - memcpy.start)); - // clang-format on - - int64_t to_id = activity.correlationId(); - handleLink(kFlowEnd, activity, to_id, "async_cpu_to_gpu", "async_gpu"); -} - -void ChromeTraceLogger::handleGpuActivity( - const GpuActivity& activity) { - if (!traceOf_) { - return; - } - const CUpti_ActivityMemset& memset = activity.raw(); - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "ph": "X", "cat": "Memset", {}, - "args": {{ - "device": {}, "context": {}, - "stream": {}, "correlation": {}, - "bytes": {}, "memory bandwidth (GB/s)": {} - }} - }},)JSON", - traceActivityJson(activity), - // args - memset.deviceId, memset.contextId, - memset.streamId, memset.correlationId, - memset.bytes, bandwidth(memset.bytes, memset.end - memset.start)); - // clang-format on - - int64_t to_id = activity.correlationId(); - handleLink(kFlowEnd, activity, to_id, "async_cpu_to_gpu", "async_gpu"); -} -#endif // HAS_CUPTI - -void ChromeTraceLogger::finalizeTrace( - const Config& /*unused*/, - std::unique_ptr /*unused*/, - int64_t endTime, - std::unordered_map>& metadata) { - if (!traceOf_) { - LOG(ERROR) << "Failed to write to log file!"; - return; - } - LOG(INFO) << "Chrome Trace written to " << fileName_; - // clang-format off - traceOf_ << fmt::format(R"JSON( - {{ - "name": "Record Window End", "ph": "i", "s": "g", - "pid": "", "tid": "", "ts": {} - }} - ],)JSON", - endTime); - -#if !USE_GOOGLE_LOG - std::unordered_map PreparedMetadata; - for (const auto& kv : metadata) { - // Skip empty log buckets, ex. skip ERROR if its empty. - if (!kv.second.empty()) { - std::string value = "["; - // Ex. Each metadata from logger is a list of strings, expressed in JSON as - // "ERROR": ["Error 1", "Error 2"], - // "WARNING": ["Warning 1", "Warning 2", "Warning 3"], - // ... - int mdv_count = kv.second.size(); - for (const auto& v : kv.second) { - value.append("\"" + v + "\""); - if(mdv_count > 1) { - value.append(","); - mdv_count--; - } - } - value.append("]"); - PreparedMetadata[kv.first] = sanitizeStrForJSON(value); - } - } - metadataToJSON(PreparedMetadata); -#endif // !USE_GOOGLE_LOG - - // Putting this here because the last entry MUST not end with a comma. - traceOf_ << fmt::format(R"JSON( - "traceName": "{}" -}})JSON", sanitizeStrForJSON(fileName_)); - // clang-format on - - traceOf_.close(); -} - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/output_json.h b/plugins/tensorboard-plugins/libkineto/src/output_json.h deleted file mode 100644 index 5a8a81e4a9f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/output_json.h +++ /dev/null @@ -1,91 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include -#include - -#ifdef HAS_CUPTI -#include -#endif -#include "GenericTraceActivity.h" -#include "output_base.h" - -namespace KINETO_NAMESPACE { - // Previous declaration of TraceSpan is struct. Must match the same here. - struct TraceSpan; -} - -namespace KINETO_NAMESPACE { - -class Config; - -class ChromeTraceLogger : public libkineto::ActivityLogger { - public: - explicit ChromeTraceLogger(const std::string& traceFileName); - - // Note: the caller of these functions should handle concurrency - // i.e., we these functions are not thread-safe - void handleDeviceInfo( - const DeviceInfo& info, - uint64_t time) override; - - void handleOverheadInfo(const OverheadInfo& info, int64_t time) override; - - void handleResourceInfo(const ResourceInfo& info, int64_t time) override; - - void handleTraceSpan(const TraceSpan& span) override; - - void handleActivity(const ITraceActivity& activity) override; - void handleGenericActivity(const GenericTraceActivity& activity) override; - -#ifdef HAS_CUPTI - void handleGpuActivity(const GpuActivity& activity) override; - void handleGpuActivity(const GpuActivity& activity) override; - void handleGpuActivity(const GpuActivity& activity) override; - void handleGpuActivity(const GpuActivity& activity) override; -#endif // HAS_CUPTI - - void handleTraceStart( - const std::unordered_map& metadata) override; - - void finalizeTrace( - const Config& config, - std::unique_ptr buffers, - int64_t endTime, - std::unordered_map>& metadata) override; - - std::string traceFileName() const { - return fileName_; - } - - private: - - // Create a flow event (arrow) - void handleLink( - char type, - const ITraceActivity& e, - int64_t id, - const std::string& cat, - const std::string& name); - - void addIterationMarker(const TraceSpan& span); - - void openTraceFile(); - - void handleGenericInstantEvent(const ITraceActivity& op); - - void handleGenericLink(const ITraceActivity& activity); - - void metadataToJSON(const std::unordered_map& metadata); - - std::string& sanitizeStrForJSON(std::string& value); - - std::string fileName_; - std::ofstream traceOf_; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/src/output_membuf.h b/plugins/tensorboard-plugins/libkineto/src/output_membuf.h deleted file mode 100644 index ef6aadeb657..00000000000 --- a/plugins/tensorboard-plugins/libkineto/src/output_membuf.h +++ /dev/null @@ -1,130 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include -#include - -#ifdef HAS_CUPTI -#include -#endif - -#include "Config.h" -#include "GenericTraceActivity.h" -#ifdef HAS_CUPTI -#include "CuptiActivity.h" -#include "CuptiActivity.tpp" -#endif // HAS_CUPTI -#include "output_base.h" - -namespace KINETO_NAMESPACE { - -class Config; - -class MemoryTraceLogger : public ActivityLogger { - public: - MemoryTraceLogger(const Config& config) : config_(config.clone()) { - activities_.reserve(100000); - } - - // Note: the caller of these functions should handle concurrency - // i.e., these functions are not thread-safe - void handleDeviceInfo( - const DeviceInfo& info, - uint64_t time) override { - deviceInfoList_.emplace_back(info, time); - } - - void handleResourceInfo(const ResourceInfo& info, int64_t time) override { - resourceInfoList_.emplace_back(info, time); - } - - void handleOverheadInfo(const OverheadInfo& info, int64_t time) override {} - - void handleTraceSpan(const TraceSpan& span) override { - // Handled separately - } - - template - void addActivityWrapper(const T& act) { - wrappers_.push_back(std::make_unique(act)); - activities_.push_back(wrappers_.back().get()); - } - - // Just add the pointer to the list - ownership of the underlying - // objects must be transferred in ActivityBuffers via finalizeTrace - void handleActivity(const ITraceActivity& activity) override { - activities_.push_back(&activity); - } - void handleGenericActivity(const GenericTraceActivity& activity) override { - addActivityWrapper(activity); - } - -#ifdef HAS_CUPTI - void handleGpuActivity(const GpuActivity& activity) override { - addActivityWrapper(activity); - } - void handleGpuActivity(const GpuActivity& activity) override { - addActivityWrapper(activity); - } - void handleGpuActivity(const GpuActivity& activity) override { - addActivityWrapper(activity); - } - void handleGpuActivity(const GpuActivity& activity) override { - addActivityWrapper(activity); - } -#endif // HAS_CUPTI - - void handleTraceStart( - const std::unordered_map& metadata) override { - metadata_ = metadata; - } - - void finalizeTrace( - const Config& config, - std::unique_ptr buffers, - int64_t endTime, - std::unordered_map>& metadata) override { - buffers_ = std::move(buffers); - endTime_ = endTime; - } - - const std::vector* traceActivities() { - return &activities_; - } - - void log(ActivityLogger& logger) { - logger.handleTraceStart(metadata_); - for (auto& activity : activities_) { - activity->log(logger); - } - for (auto& p : deviceInfoList_) { - logger.handleDeviceInfo(p.first, p.second); - } - for (auto& p : resourceInfoList_) { - logger.handleResourceInfo(p.first, p.second); - } - for (auto& cpu_trace_buffer : buffers_->cpu) { - logger.handleTraceSpan(cpu_trace_buffer->span); - } - // Hold on to the buffers - logger.finalizeTrace(*config_, nullptr, endTime_, loggerMetadata_); - } - - private: - - std::unique_ptr config_; - // Optimization: Remove unique_ptr by keeping separate vector per type - std::vector activities_; - std::vector> wrappers_; - std::vector> deviceInfoList_; - std::vector> resourceInfoList_; - std::unique_ptr buffers_; - std::unordered_map metadata_; - std::unordered_map> loggerMetadata_; - int64_t endTime_{0}; -}; - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/test/CMakeLists.txt b/plugins/tensorboard-plugins/libkineto/test/CMakeLists.txt deleted file mode 100644 index ca54460b36c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -cmake_minimum_required(VERSION 3.5 FATAL_ERROR) - -# TODO diff --git a/plugins/tensorboard-plugins/libkineto/test/ConfigTest.cpp b/plugins/tensorboard-plugins/libkineto/test/ConfigTest.cpp deleted file mode 100644 index 16bc86e751c..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/ConfigTest.cpp +++ /dev/null @@ -1,315 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "include/Config.h" - -#include -#include -#include -#include - -using namespace std::chrono; -using namespace KINETO_NAMESPACE; - -TEST(ParseTest, Whitespace) { - Config cfg; - // Check that various types of whitespace is ignored - EXPECT_TRUE(cfg.parse("")); - EXPECT_TRUE(cfg.parse(" ")); - EXPECT_TRUE(cfg.parse("\t")); - EXPECT_TRUE(cfg.parse("\n")); - EXPECT_TRUE(cfg.parse(" ")); - EXPECT_TRUE(cfg.parse("\t \n \t\t\n\n")); - // Only the above characters are supported - EXPECT_FALSE(cfg.parse("\r\n")); -} - -TEST(ParseTest, Comment) { - Config cfg; - // Anything following a '#' should be ignored, up to a newline - EXPECT_TRUE(cfg.parse("# comment")); - EXPECT_TRUE(cfg.parse(" # ~!@#$")); - EXPECT_TRUE(cfg.parse("\t#abc")); - EXPECT_TRUE(cfg.parse("###\n##")); - EXPECT_TRUE(cfg.parse("EVENTS=util ##ok")); - EXPECT_TRUE(cfg.parse("EVENTS=util ## EVENTS=instruction")); - // Whatever appears before the comment must be valid format - EXPECT_FALSE(cfg.parse("util ## not ok")); - EXPECT_FALSE(cfg.parse("## ok \n blah # not OK")); - // Check that a comment does not affect config parsing - EXPECT_TRUE(cfg.parse("SAMPLE_PERIOD_MSECS = 1 # Sample every millisecond")); - EXPECT_EQ(cfg.samplePeriod(), milliseconds(1)); -} - -TEST(ParseTest, Format) { - Config cfg; - // The basic format is just "name = value". - // Where both value and name can be almost anything. - // Leading and trailing whitespace should be removed - // for both 'name' and 'value', but internal whitespace is not. - EXPECT_FALSE(cfg.parse("events")); - EXPECT_TRUE(cfg.parse("events=")); - EXPECT_FALSE(cfg.parse("=events=")); - EXPECT_TRUE(cfg.parse("events=1,2,3")); - // Only one setting per line - EXPECT_FALSE(cfg.parse("events = 1,2,3 ; metrics = 4,5,6")); - // Names are case sensitive - EXPECT_TRUE(cfg.parse("EVENTS = 1,2,3 \n metrics = 4,5,6")); - EXPECT_EQ(cfg.eventNames(), std::set({"1", "2", "3"})); - EXPECT_EQ(cfg.metricNames().size(), 0); - // Leading and trailing whitespace removed for event and metric names, - // but not internal. - EXPECT_TRUE( - cfg.parse("EVENTS = 1, 2, 3 \n \tMETRICS\t = \t4,\t5\t,\ts i x ")); - EXPECT_EQ(cfg.eventNames(), std::set({"1", "2", "3"})); - EXPECT_EQ(cfg.metricNames(), std::set({"4", "5", "s i x"})); -} - -TEST(ParseTest, DefaultActivityTypes) { - Config cfg; - cfg.validate(std::chrono::system_clock::now()); - auto all_activities = activityTypes(); - // TODO: introduce optional activities - EXPECT_EQ(cfg.selectedActivityTypes(), - std::set(all_activities.begin(), all_activities.end() - 1)); -} - -TEST(ParseTest, ActivityTypes) { - Config cfg; - EXPECT_FALSE(cfg.parse("ACTIVITY_TYPES")); - EXPECT_TRUE(cfg.parse("ACTIVITY_TYPES=")); - EXPECT_FALSE(cfg.parse("=ACTIVITY_TYPES=")); - - EXPECT_EQ(cfg.selectedActivityTypes(), - std::set({ActivityType::CPU_OP, - ActivityType::CPU_INSTANT_EVENT, - ActivityType::PYTHON_FUNCTION, - ActivityType::USER_ANNOTATION, - ActivityType::GPU_USER_ANNOTATION, - ActivityType::GPU_MEMCPY, - ActivityType::GPU_MEMSET, - ActivityType::CONCURRENT_KERNEL, - ActivityType::EXTERNAL_CORRELATION, - ActivityType::GLOW_RUNTIME, - ActivityType::CUDA_RUNTIME, - ActivityType::CUDA_PROFILER_RANGE})); - - Config cfg2; - EXPECT_TRUE(cfg2.parse("ACTIVITY_TYPES=gpu_memcpy,gpu_MeMsEt,kernel")); - EXPECT_EQ(cfg2.selectedActivityTypes(), - std::set({ActivityType::GPU_MEMCPY, - ActivityType::GPU_MEMSET, - ActivityType::CONCURRENT_KERNEL})); - - EXPECT_TRUE(cfg2.parse("ACTIVITY_TYPES = cuda_Runtime,")); - EXPECT_EQ(cfg2.selectedActivityTypes(), - std::set({ActivityType::CUDA_RUNTIME})); - - // Should throw an exception because incorrect activity name - EXPECT_FALSE(cfg2.parse("ACTIVITY_TYPES = memcopy,cuda_runtime")); - - EXPECT_TRUE(cfg2.parse("ACTIVITY_TYPES = cpu_op")); - EXPECT_EQ(cfg2.selectedActivityTypes(), - std::set({ActivityType::CPU_OP})); -} - -TEST(ParseTest, SamplePeriod) { - Config cfg; - EXPECT_TRUE(cfg.parse("SAMPLE_PERIOD_MSECS=10")); - EXPECT_EQ(cfg.samplePeriod(), milliseconds(10)); - EXPECT_TRUE(cfg.parse("SAMPLE_PERIOD_MSECS=0")); - cfg.validate(std::chrono::system_clock::now()); - // 0 should be adjustd up to 1 - EXPECT_EQ(cfg.samplePeriod(), milliseconds(1)); - // Negative and non-int values should fail - EXPECT_FALSE(cfg.parse("SAMPLE_PERIOD_MSECS=-10")); - EXPECT_FALSE(cfg.parse("SAMPLE_PERIOD_MSECS=1.5")); - EXPECT_FALSE(cfg.parse("SAMPLE_PERIOD_MSECS=")); - EXPECT_FALSE(cfg.parse("SAMPLE_PERIOD_MSECS=string")); - EXPECT_EQ(cfg.samplePeriod(), milliseconds(1)); -} - -TEST(ParseTest, MultiplexPeriod) { - Config cfg; - auto now = std::chrono::system_clock::now(); - - EXPECT_TRUE(cfg.parse("SAMPLE_PERIOD_MSECS=100\nMULTIPLEX_PERIOD_MSECS=100")); - EXPECT_EQ(cfg.multiplexPeriod(), milliseconds(100)); - EXPECT_TRUE(cfg.parse("MULTIPLEX_PERIOD_MSECS = 0")); - cfg.validate(now); - // Adjusted to match sample period - EXPECT_EQ(cfg.multiplexPeriod(), milliseconds(100)); - EXPECT_TRUE(cfg.parse("MULTIPLEX_PERIOD_MSECS \t= \t 750 \n")); - cfg.validate(now); - // Adjusted to match multiple of sample period - EXPECT_EQ(cfg.multiplexPeriod(), milliseconds(800)); - EXPECT_FALSE(cfg.parse("MULTIPLEX_PERIOD_MSECS=-10")); - EXPECT_FALSE(cfg.parse("MULTIPLEX_PERIOD_MSECS=1.5")); - EXPECT_FALSE(cfg.parse("MULTIPLEX_PERIOD_MSECS=")); - EXPECT_FALSE(cfg.parse("MULTIPLEX_PERIOD_MSECS=string")); - // Previous value not affected - EXPECT_EQ(cfg.multiplexPeriod(), milliseconds(800)); -} - -TEST(ParseTest, ReportPeriod) { - Config cfg; - EXPECT_TRUE(cfg.parse("REPORT_PERIOD_SECS=1")); - EXPECT_EQ(cfg.reportPeriod(), seconds(1)); - // Whitespace - EXPECT_TRUE(cfg.parse("REPORT_PERIOD_SECS = \t100")); - EXPECT_EQ(cfg.reportPeriod(), seconds(100)); - // Invalid types - EXPECT_FALSE(cfg.parse("REPORT_PERIOD_SECS=-1")); - EXPECT_EQ(cfg.reportPeriod(), seconds(100)); -} - -TEST(ParseTest, SamplesPerReport) { - Config cfg; - auto now = std::chrono::system_clock::now(); - - EXPECT_TRUE(cfg.parse(R"( - SAMPLE_PERIOD_MSECS = 1000 - REPORT_PERIOD_SECS = 1 - SAMPLES_PER_REPORT = 10)")); - cfg.validate(now); - // Adjusted down to one sample per report - EXPECT_EQ(cfg.samplesPerReport(), 1); - EXPECT_TRUE(cfg.parse(R"( - SAMPLE_PERIOD_MSECS = 1000 - REPORT_PERIOD_SECS = 10 - SAMPLES_PER_REPORT = 10)")); - cfg.validate(now); - // No adjustment needed - EXPECT_EQ(cfg.samplesPerReport(), 10); - EXPECT_TRUE(cfg.parse(R"( - SAMPLE_PERIOD_MSECS = 1000 - REPORT_PERIOD_SECS = 2 - SAMPLES_PER_REPORT = 10)")); - cfg.validate(now); - // Adjusted to 2 samples per report - EXPECT_EQ(cfg.samplesPerReport(), 2); - EXPECT_TRUE(cfg.parse(R"( - SAMPLE_PERIOD_MSECS = 200 - REPORT_PERIOD_SECS = 2 - SAMPLES_PER_REPORT = 10)")); - cfg.validate(now); - // No adjustment needed - EXPECT_EQ(cfg.samplesPerReport(), 10); - EXPECT_TRUE(cfg.parse("SAMPLES_PER_REPORT=0")); - cfg.validate(now); - // Adjusted up to 1 - EXPECT_EQ(cfg.samplesPerReport(), 1); - // Invalid value types - EXPECT_FALSE(cfg.parse("SAMPLES_PER_REPORT=-10")); - EXPECT_FALSE(cfg.parse("SAMPLES_PER_REPORT=1.5")); - EXPECT_EQ(cfg.samplesPerReport(), 1); - - EXPECT_TRUE(cfg.parse(R"( - SAMPLE_PERIOD_MSECS=1000 - MULTIPLEX_PERIOD_MSECS=500 # Must be a multiple of sample period - REPORT_PERIOD_SECS=0 # Must be non-zero multiple of multiplex period - SAMPLES_PER_REPORT=5 # Max report period / multiplex period)")); - cfg.validate(now); - // Multiple adjustments - EXPECT_EQ(cfg.samplePeriod(), milliseconds(1000)); - EXPECT_EQ(cfg.multiplexPeriod(), milliseconds(1000)); - EXPECT_EQ(cfg.reportPeriod(), seconds(1)); - EXPECT_EQ(cfg.samplesPerReport(), 1); -} - -TEST(ParseTest, EnableSigUsr2) { - Config cfg; - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=yes")); - EXPECT_TRUE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=no")); - EXPECT_FALSE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=YES")); - EXPECT_TRUE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=NO")); - EXPECT_FALSE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=Y")); - EXPECT_TRUE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=N")); - EXPECT_FALSE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=T")); - EXPECT_TRUE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=F")); - EXPECT_FALSE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=true")); - EXPECT_TRUE(cfg.sigUsr2Enabled()); - EXPECT_TRUE(cfg.parse("ENABLE_SIGUSR2=false")); - EXPECT_FALSE(cfg.sigUsr2Enabled()); - EXPECT_FALSE(cfg.parse("ENABLE_SIGUSR2= ")); - EXPECT_FALSE(cfg.parse("ENABLE_SIGUSR2=2")); - EXPECT_FALSE(cfg.parse("ENABLE_SIGUSR2=-1")); - EXPECT_FALSE(cfg.parse("ENABLE_SIGUSR2=yep")); -} - -TEST(ParseTest, DeviceMask) { - Config cfg; - // Single device - EXPECT_TRUE(cfg.parse("EVENTS_ENABLED_DEVICES = 0")); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(0)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(1)); - - // Two devices, internal whitespace - EXPECT_TRUE(cfg.parse("EVENTS_ENABLED_DEVICES = 1, 2")); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(0)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(1)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(2)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(3)); - - // Three devices, check that previous devices are ignored - EXPECT_TRUE(cfg.parse("EVENTS_ENABLED_DEVICES = 0, 2,4")); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(0)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(1)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(2)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(3)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(4)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(5)); - - // Repeated numbers have no effect - EXPECT_TRUE(cfg.parse("EVENTS_ENABLED_DEVICES = 0,1,1,1,2,3,2,1,3,7,7,3")); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(0)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(1)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(2)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(3)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(4)); - EXPECT_FALSE(cfg.eventProfilerEnabledForDevice(6)); - EXPECT_TRUE(cfg.eventProfilerEnabledForDevice(7)); - - // 8 is larger than the max allowed - EXPECT_FALSE(cfg.parse("EVENTS_ENABLED_DEVICES = 3,8")); - - // 300 cannot be held in an uint8_t - EXPECT_FALSE(cfg.parse("EVENTS_ENABLED_DEVICES = 300")); - - // Various illegal cases - EXPECT_FALSE(cfg.parse("EVENTS_ENABLED_DEVICES = 0,1,two,three")); - EXPECT_FALSE(cfg.parse("EVENTS_ENABLED_DEVICES = 0,1,,2")); - EXPECT_FALSE(cfg.parse("EVENTS_ENABLED_DEVICES = -1")); - EXPECT_FALSE(cfg.parse("EVENTS_ENABLED_DEVICES = 1.0")); -} - -TEST(ParseTest, RequestTime) { - Config cfg; - system_clock::time_point now = system_clock::now(); - int64_t tgood_ms = - duration_cast(now.time_since_epoch()).count(); - EXPECT_TRUE(cfg.parse(fmt::format("REQUEST_TIMESTAMP = {}", tgood_ms))); - - tgood_ms = duration_cast((now - seconds(5)).time_since_epoch()) - .count(); - EXPECT_TRUE(cfg.parse(fmt::format("REQUEST_TIMESTAMP = {}", tgood_ms))); - - int64_t tbad_ms = - duration_cast((now - seconds(20)).time_since_epoch()) - .count(); - EXPECT_FALSE(cfg.parse(fmt::format("REQUEST_TIMESTAMP = {}", tbad_ms))); - - EXPECT_FALSE(cfg.parse("REQUEST_TIMESTAMP = 0")); - EXPECT_FALSE(cfg.parse("REQUEST_TIMESTAMP = -1")); - - tbad_ms = duration_cast((now + seconds(10)).time_since_epoch()) - .count(); - EXPECT_FALSE(cfg.parse(fmt::format("REQUEST_TIMESTAMP = {}", tbad_ms))); -} diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiActivityProfilerTest.cpp b/plugins/tensorboard-plugins/libkineto/test/CuptiActivityProfilerTest.cpp deleted file mode 100644 index 6e67980ee31..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiActivityProfilerTest.cpp +++ /dev/null @@ -1,629 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include -#include -#include -#include -#include - -#ifdef __linux__ -#include -#include -#include -#endif - -#include "include/libkineto.h" -#include "include/Config.h" -#include "src/CuptiActivityProfiler.h" -#include "src/ActivityTrace.h" -#include "src/CuptiActivityApi.h" -#include "src/output_base.h" -#include "src/output_json.h" -#include "src/output_membuf.h" - -#include "src/Logger.h" -#include "test/MockActivitySubProfiler.h" - -using namespace std::chrono; -using namespace KINETO_NAMESPACE; - -#define CUDA_LAUNCH_KERNEL CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 -#define CUDA_MEMCPY CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020 - -namespace { -const TraceSpan& defaultTraceSpan() { - static TraceSpan span(0, 0, "Unknown", ""); - return span; -} -} - -// Provides ability to easily create a few test CPU-side ops -struct MockCpuActivityBuffer : public CpuTraceBuffer { - MockCpuActivityBuffer(int64_t startTime, int64_t endTime) { - span = TraceSpan(startTime, endTime,"Test trace"); - gpuOpCount = 0; - } - - void addOp(std::string name, int64_t startTime, int64_t endTime, int64_t correlation) { - GenericTraceActivity op(span, ActivityType::CPU_OP, name); - op.startTime = startTime; - op.endTime = endTime; - op.resource = systemThreadId(); - op.id = correlation; - activities.push_back(std::move(op)); - span.opCount++; - } -}; - -// Provides ability to easily create a few test CUPTI ops -struct MockCuptiActivityBuffer { - void addCorrelationActivity(int64_t correlation, CUpti_ExternalCorrelationKind externalKind, int64_t externalId) { - auto& act = *(CUpti_ActivityExternalCorrelation*) malloc(sizeof(CUpti_ActivityExternalCorrelation)); - act.kind = CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION; - act.externalId = externalId; - act.externalKind = externalKind; - act.correlationId = correlation; - activities.push_back(reinterpret_cast(&act)); - } - - void addRuntimeActivity( - CUpti_runtime_api_trace_cbid_enum cbid, - int64_t start_us, int64_t end_us, int64_t correlation) { - auto& act = createActivity( - start_us, end_us, correlation); - act.kind = CUPTI_ACTIVITY_KIND_RUNTIME; - act.cbid = cbid; - act.threadId = threadId(); - activities.push_back(reinterpret_cast(&act)); - } - - void addKernelActivity( - int64_t start_us, int64_t end_us, int64_t correlation) { - auto& act = createActivity( - start_us, end_us, correlation); - act.kind = CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL; - act.deviceId = 0; - act.streamId = 1; - act.name = "kernel"; - act.gridX = act.gridY = act.gridZ = 1; - act.blockX = act.blockY = act.blockZ = 1; - activities.push_back(reinterpret_cast(&act)); - } - - void addMemcpyActivity( - int64_t start_us, int64_t end_us, int64_t correlation) { - auto& act = createActivity( - start_us, end_us, correlation); - act.kind = CUPTI_ACTIVITY_KIND_MEMCPY; - act.deviceId = 0; - act.streamId = 2; - act.copyKind = CUPTI_ACTIVITY_MEMCPY_KIND_HTOD; - act.srcKind = CUPTI_ACTIVITY_MEMORY_KIND_PINNED; - act.dstKind = CUPTI_ACTIVITY_MEMORY_KIND_DEVICE; - activities.push_back(reinterpret_cast(&act)); - } - - template - T& createActivity( - int64_t start_us, int64_t end_us, int64_t correlation) { - T& act = *static_cast(malloc(sizeof(T))); - bzero(&act, sizeof(act)); - act.start = start_us * 1000; - act.end = end_us * 1000; - act.correlationId = correlation; - return act; - } - - ~MockCuptiActivityBuffer() { - for (CUpti_Activity* act : activities) { - free(act); - } - } - - std::vector activities; -}; - -// Mock parts of the CuptiActivityApi -class MockCuptiActivities : public CuptiActivityApi { - public: - virtual int smCount() override { - return 10; - } - - virtual const std::pair processActivities( - CuptiActivityBufferMap&, /*unused*/ - std::function handler) override { - for (CUpti_Activity* act : activityBuffer->activities) { - handler(act); - } - return {activityBuffer->activities.size(), 100}; - } - - virtual std::unique_ptr - activityBuffers() override { - auto map = std::make_unique(); - auto buf = std::make_unique(100); - uint8_t* addr = buf->data(); - (*map)[addr] = std::move(buf); - return map; - } - - void bufferRequestedOverride(uint8_t** buffer, size_t* size, size_t* maxNumRecords) { - this->bufferRequested(buffer, size, maxNumRecords); - } - - std::unique_ptr activityBuffer; -}; - - -// Common setup / teardown and helper functions -class CuptiActivityProfilerTest : public ::testing::Test { - protected: - void SetUp() override { - profiler_ = std::make_unique( - cuptiActivities_, /*cpu only*/ false); - cfg_ = std::make_unique(); - cfg_->validate(std::chrono::system_clock::now()); - loggerFactory.addProtocol("file", [](const std::string& url) { - return std::unique_ptr(new ChromeTraceLogger(url)); - }); - } - - std::unique_ptr cfg_; - MockCuptiActivities cuptiActivities_; - std::unique_ptr profiler_; - ActivityLoggerFactory loggerFactory; -}; - -void checkTracefile(const char* filename) { -#ifdef __linux__ - // Check that the expected file was written and that it has some content - int fd = open(filename, O_RDONLY); - if (!fd) { - perror(filename); - } - EXPECT_TRUE(fd); - // Should expect at least 100 bytes - struct stat buf{}; - fstat(fd, &buf); - EXPECT_GT(buf.st_size, 100); - close(fd); -#endif -} - -TEST(CuptiActivityProfiler, AsyncTrace) { - std::vector log_modules( - {"CuptiActivityProfiler.cpp", "output_json.cpp"}); - SET_LOG_VERBOSITY_LEVEL(1, log_modules); - - MockCuptiActivities activities; - CuptiActivityProfiler profiler(activities, /*cpu only*/ true); - - char filename[] = "/tmp/libkineto_testXXXXXX.json"; - mkstemps(filename, 5); - - Config cfg; - - int iter = 0; - int warmup = 5; - auto now = system_clock::now(); - auto startTime = now + seconds(10); - - bool success = cfg.parse(fmt::format(R"CFG( - ACTIVITIES_WARMUP_PERIOD_SECS = {} - ACTIVITIES_DURATION_SECS = 1 - ACTIVITIES_LOG_FILE = {} - PROFILE_START_TIME = {} - )CFG", warmup, filename, duration_cast(startTime.time_since_epoch()).count())); - - EXPECT_TRUE(success); - EXPECT_FALSE(profiler.isActive()); - - auto logger = std::make_unique(cfg.activitiesLogFile()); - - // Usually configuration is done when now is startTime - warmup to kick off warmup - // but start right away in the test - profiler.configure(cfg, now); - profiler.setLogger(logger.get()); - - EXPECT_TRUE(profiler.isActive()); - - // fast forward in time and we have reached the startTime - now = startTime; - - // Run the profiler - // Warmup - // performRunLoopStep is usually called by the controller loop and takes - // the current time and the controller's next wakeup time. - profiler.performRunLoopStep( - /* Current time */ now, /* Next wakeup time */ now); - - auto next = now + milliseconds(1000); - - // performRunLoopStep can also be called by an application thread to update iteration count - // since this config does not use iteration this should have no effect on the state - while (++iter < 20) { - profiler.performRunLoopStep(now, now, iter); - } - - // Runloop should now be in collect state, so start workload - // Perform another runloop step, passing in the end profile time as current. - // This should terminate collection - profiler.performRunLoopStep( - /* Current time */ next, /* Next wakeup time */ next); - // One step needed for each of the Process and Finalize phases - // Doesn't really matter what times we pass in here. - - EXPECT_TRUE(profiler.isActive()); - - auto nextnext = next + milliseconds(1000); - - while (++iter < 40) { - profiler.performRunLoopStep(next, next, iter); - } - - EXPECT_TRUE(profiler.isActive()); - - profiler.performRunLoopStep(nextnext,nextnext); - profiler.performRunLoopStep(nextnext,nextnext); - - // Assert that tracing has completed - EXPECT_FALSE(profiler.isActive()); - - checkTracefile(filename); -} - -TEST(CuptiActivityProfiler, AsyncTraceUsingIter) { - std::vector log_modules( - {"CuptiActivityProfiler.cpp", "output_json.cpp"}); - SET_LOG_VERBOSITY_LEVEL(1, log_modules); - - auto runIterTest = [&]( - int start_iter, int warmup_iters, int trace_iters) { - - LOG(INFO ) << "Async Trace Test: start_iteration = " << start_iter - << " warmup iterations = " << warmup_iters - << " trace iterations = " << trace_iters; - - MockCuptiActivities activities; - CuptiActivityProfiler profiler(activities, /*cpu only*/ true); - - char filename[] = "/tmp/libkineto_testXXXXXX.json"; - mkstemps(filename, 5); - - Config cfg; - - int iter = 0; - auto now = system_clock::now(); - - bool success = cfg.parse(fmt::format(R"CFG( - PROFILE_START_ITERATION = {} - ACTIVITIES_WARMUP_ITERATIONS={} - ACTIVITIES_ITERATIONS={} - ACTIVITIES_DURATION_SECS = 1 - ACTIVITIES_LOG_FILE = {} - )CFG", start_iter, warmup_iters, trace_iters, filename)); - - EXPECT_TRUE(success); - EXPECT_FALSE(profiler.isActive()); - - auto logger = std::make_unique(cfg.activitiesLogFile()); - - // Usually configuration is done when now is startIter - warmup iter to kick off warmup - // but start right away in the test - while (iter < (start_iter - warmup_iters)) { - profiler.performRunLoopStep(now, now, iter++); - } - - profiler.configure(cfg, now); - profiler.setLogger(logger.get()); - - EXPECT_TRUE(profiler.isActive()); - - // fast forward in time, mimicking what will happen in reality - now += seconds(10); - auto next = now + milliseconds(1000); - - // this call to runloop step should not be effecting the state - profiler.performRunLoopStep(now, next); - EXPECT_TRUE(profiler.isActive()); - - // start trace collection - while (iter < start_iter) { - profiler.performRunLoopStep(now, next, iter++); - } - - // Runloop should now be in collect state, so start workload - - while (iter < (start_iter + trace_iters)) { - profiler.performRunLoopStep(now, next, iter++); - } - - // One step is required for each of the Process and Finalize phases - // Doesn't really matter what times we pass in here. - if (iter >= (start_iter + trace_iters)) { - profiler.performRunLoopStep(now, next, iter++); - } - EXPECT_TRUE(profiler.isActive()); - - auto nextnext = next + milliseconds(1000); - - profiler.performRunLoopStep(nextnext, nextnext); - profiler.performRunLoopStep(nextnext, nextnext); - - // Assert that tracing has completed - EXPECT_FALSE(profiler.isActive()); - - checkTracefile(filename); - }; - - // start iter = 50, warmup iters = 5, trace iters = 10 - runIterTest(50, 5, 10); - // should be able to start at 0 iteration - runIterTest(0, 0, 2); - runIterTest(0, 5, 5); -} - -TEST_F(CuptiActivityProfilerTest, SyncTrace) { - using ::testing::Return; - using ::testing::ByMove; - - // Verbose logging is useful for debugging - std::vector log_modules( - {"CuptiActivityProfiler.cpp"}); - SET_LOG_VERBOSITY_LEVEL(2, log_modules); - - // Start and stop profiling - CuptiActivityProfiler profiler(cuptiActivities_, /*cpu only*/ false); - int64_t start_time_us = 100; - int64_t duration_us = 300; - auto start_time = time_point(microseconds(start_time_us)); - profiler.configure(*cfg_, start_time); - profiler.startTrace(start_time); - profiler.stopTrace(start_time + microseconds(duration_us)); - - profiler.recordThreadInfo(); - - // Log some cpu ops - auto cpuOps = std::make_unique( - start_time_us, start_time_us + duration_us); - cpuOps->addOp("op1", 120, 150, 1); - cpuOps->addOp("op2", 130, 140, 2); - cpuOps->addOp("op3", 200, 250, 3); - profiler.transferCpuTrace(std::move(cpuOps)); - - // And some GPU ops - auto gpuOps = std::make_unique(); - gpuOps->addRuntimeActivity(CUDA_LAUNCH_KERNEL, 133, 138, 1); - gpuOps->addRuntimeActivity(CUDA_MEMCPY, 210, 220, 2); - gpuOps->addRuntimeActivity(CUDA_LAUNCH_KERNEL, 230, 245, 3); - gpuOps->addKernelActivity(150, 170, 1); - gpuOps->addMemcpyActivity(240, 250, 2); - gpuOps->addKernelActivity(260, 320, 3); - cuptiActivities_.activityBuffer = std::move(gpuOps); - - // Have the profiler process them - auto logger = std::make_unique(*cfg_); - profiler.processTrace(*logger); - - // Profiler can be reset at this point - logger owns the activities - profiler_->reset(); - - // Wrapper that allows iterating over the activities - ActivityTrace trace(std::move(logger), loggerFactory); - EXPECT_EQ(trace.activities()->size(), 9); - std::map activityCounts; - std::map resourceIds; - for (auto& activity : *trace.activities()) { - activityCounts[activity->name()]++; - resourceIds[activity->resourceId()]++; - } - for (const auto& p : activityCounts) { - LOG(INFO) << p.first << ": " << p.second; - } - EXPECT_EQ(activityCounts["op1"], 1); - EXPECT_EQ(activityCounts["op2"], 1); - EXPECT_EQ(activityCounts["op3"], 1); - EXPECT_EQ(activityCounts["cudaLaunchKernel"], 2); - EXPECT_EQ(activityCounts["cudaMemcpy"], 1); - EXPECT_EQ(activityCounts["kernel"], 2); - EXPECT_EQ(activityCounts["Memcpy HtoD (Pinned -> Device)"], 1); - - auto sysTid = systemThreadId(); - // Ops and runtime events are on thread sysTid - EXPECT_EQ(resourceIds[sysTid], 6); - // Kernels are on stream 1, memcpy on stream 2 - EXPECT_EQ(resourceIds[1], 2); - EXPECT_EQ(resourceIds[2], 1); - -#ifdef __linux__ - char filename[] = "/tmp/libkineto_testXXXXXX.json"; - mkstemps(filename, 5); - trace.save(filename); - // Check that the expected file was written and that it has some content - int fd = open(filename, O_RDONLY); - if (!fd) { - perror(filename); - } - EXPECT_TRUE(fd); - // Should expect at least 100 bytes - struct stat buf{}; - fstat(fd, &buf); - EXPECT_GT(buf.st_size, 100); -#endif -} - -TEST_F(CuptiActivityProfilerTest, GpuUserAnnotationTest) { - // Verbose logging is useful for debugging - std::vector log_modules( - {"CuptiActivityProfiler.cpp"}); - SET_LOG_VERBOSITY_LEVEL(2, log_modules); - - // Start and stop profiling - CuptiActivityProfiler profiler(cuptiActivities_, /*cpu only*/ false); - int64_t start_time_us = 100; - int64_t duration_us = 300; - auto start_time = time_point(microseconds(start_time_us)); - profiler.configure(*cfg_, start_time); - profiler.startTrace(start_time); - profiler.stopTrace(start_time + microseconds(duration_us)); - - int64_t kernelLaunchTime = 120; - profiler.recordThreadInfo(); - - // set up CPU event - auto cpuOps = std::make_unique( - start_time_us, start_time_us + duration_us); - cpuOps->addOp("annotation", kernelLaunchTime, kernelLaunchTime + 10, 1); - profiler.transferCpuTrace(std::move(cpuOps)); - - // set up a couple of GPU events and correlate with above CPU event. - // CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1 is used for user annotations. - auto gpuOps = std::make_unique(); - gpuOps->addCorrelationActivity(1, CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1, 1); - gpuOps->addKernelActivity(kernelLaunchTime + 5, kernelLaunchTime + 10, 1); - gpuOps->addCorrelationActivity(1, CUPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1, 1); - gpuOps->addKernelActivity(kernelLaunchTime + 15, kernelLaunchTime + 25, 1); - cuptiActivities_.activityBuffer = std::move(gpuOps); - - // process trace - auto logger = std::make_unique(*cfg_); - profiler.processTrace(*logger); - - ActivityTrace trace(std::move(logger), loggerFactory); - std::map counts; - for (auto& activity : *trace.activities()) { - counts[activity->name()]++; - } - - // We should now have an additional annotation activity created - // on the GPU timeline. - EXPECT_EQ(counts["annotation"], 2); - EXPECT_EQ(counts["kernel"], 2); - - auto& annotation = trace.activities()->at(0); - auto& kernel1 = trace.activities()->at(1); - auto& kernel2 = trace.activities()->at(2); - auto& gpu_annotation = trace.activities()->at(3); - EXPECT_EQ(gpu_annotation->type(), ActivityType::GPU_USER_ANNOTATION); - EXPECT_EQ(gpu_annotation->timestamp(), kernel1->timestamp()); - EXPECT_EQ( - gpu_annotation->duration(), - kernel2->timestamp() + kernel2->duration() - kernel1->timestamp()); - EXPECT_EQ(gpu_annotation->deviceId(), kernel1->deviceId()); - EXPECT_EQ(gpu_annotation->resourceId(), kernel1->resourceId()); - EXPECT_EQ(gpu_annotation->correlationId(), annotation->correlationId()); - EXPECT_EQ(gpu_annotation->name(), annotation->name()); -} - -TEST_F(CuptiActivityProfilerTest, SubActivityProfilers) { - using ::testing::Return; - using ::testing::ByMove; - - // Verbose logging is useful for debugging - std::vector log_modules( - {"CuptiActivityProfiler.cpp"}); - SET_LOG_VERBOSITY_LEVEL(2, log_modules); - - // Setup example events to test - GenericTraceActivity ev{defaultTraceSpan(), ActivityType::GLOW_RUNTIME, ""}; - ev.device = 1; - ev.resource = 0; - - int64_t start_time_us = 100; - int64_t duration_us = 1000; - auto start_time = time_point(microseconds(start_time_us)); - - std::vector test_activities{3, ev}; - test_activities[0].startTime = start_time_us; - test_activities[0].endTime = start_time_us + 5000; - test_activities[0].activityName = "SubGraph A execution"; - test_activities[1].startTime = start_time_us; - test_activities[1].endTime = start_time_us + 2000; - test_activities[1].activityName = "Operator foo"; - test_activities[2].startTime = start_time_us + 2500; - test_activities[2].endTime = start_time_us + 2900; - test_activities[2].activityName = "Operator bar"; - - auto mock_activity_profiler = - std::make_unique(test_activities); - - MockCuptiActivities activities; - CuptiActivityProfiler profiler(activities, /*cpu only*/ true); - profiler.addChildActivityProfiler( - std::move(mock_activity_profiler)); - - profiler.configure(*cfg_, start_time); - profiler.startTrace(start_time); - EXPECT_TRUE(profiler.isActive()); - - profiler.stopTrace(start_time + microseconds(duration_us)); - EXPECT_TRUE(profiler.isActive()); - - char filename[] = "/tmp/libkineto_testXXXXXX.json"; - mkstemps(filename, 5); - LOG(INFO) << "Logging to tmp file " << filename; - - // process trace - auto logger = std::make_unique(*cfg_); - profiler.processTrace(*logger); - profiler.setLogger(logger.get()); - - ActivityTrace trace(std::move(logger), loggerFactory); - trace.save(filename); - const auto& traced_activites = trace.activities(); - - // Test we have all the events - EXPECT_EQ(traced_activites->size(), test_activities.size()); - - // Check that the expected file was written and that it has some content - int fd = open(filename, O_RDONLY); - if (!fd) { - perror(filename); - } - EXPECT_TRUE(fd); - - // Should expect at least 100 bytes - struct stat buf{}; - fstat(fd, &buf); - EXPECT_GT(buf.st_size, 100); -} - -TEST_F(CuptiActivityProfilerTest, BufferSizeLimitTestWarmup) { - CuptiActivityProfiler profiler(cuptiActivities_, /*cpu only*/ false); - - auto now = system_clock::now(); - auto startTime = now + seconds(10); - - int maxBufferSizeMB = 3; - - auto startTimeEpoch = std::to_string(duration_cast(startTime.time_since_epoch()).count()); - std::string maxBufferSizeMBStr = std::to_string(maxBufferSizeMB); - cfg_->handleOption("ACTIVITIES_MAX_GPU_BUFFER_SIZE_MB", maxBufferSizeMBStr); - cfg_->handleOption("PROFILE_START_TIME", startTimeEpoch); - - - EXPECT_FALSE(profiler.isActive()); - profiler.configure(*cfg_, now); - EXPECT_TRUE(profiler.isActive()); - - for (size_t i = 0; i < maxBufferSizeMB; i++) { - uint8_t* buf; - size_t gpuBufferSize; - size_t maxNumRecords; - cuptiActivities_.bufferRequestedOverride(&buf, &gpuBufferSize, &maxNumRecords); - } - - // fast forward to startTime and profiler is now running - now = startTime; - - profiler.performRunLoopStep(now, now); - - auto next = now + milliseconds(1000); - profiler.performRunLoopStep(next, next); - profiler.performRunLoopStep(next, next); - profiler.performRunLoopStep(next, next); - - EXPECT_FALSE(profiler.isActive()); -} diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiCallbackApiTest.cpp b/plugins/tensorboard-plugins/libkineto/test/CuptiCallbackApiTest.cpp deleted file mode 100644 index 253b696da54..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiCallbackApiTest.cpp +++ /dev/null @@ -1,239 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "src/Logger.h" -#include "src/CuptiCallbackApi.h" - -#include -#include -#include -#include - -using namespace std::chrono; -using namespace KINETO_NAMESPACE; -using namespace libkineto; - -const size_t some_data = 42; - -std::atomic simple_cb_calls = 0; - -void simple_cb( - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const CUpti_CallbackData* cbInfo) { - - // simple arg check - EXPECT_EQ(domain, CUPTI_CB_DOMAIN_RUNTIME_API); - EXPECT_EQ(cbid, CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000); - EXPECT_EQ(*reinterpret_cast(cbInfo), some_data); - - simple_cb_calls++; -} - -void atomic_cb( - CUpti_CallbackDomain /*domain*/, - CUpti_CallbackId /*cbid*/, - const CUpti_CallbackData* /*cbInfo)*/) { - // do some atomics in a loop - for (int i = 0; i < 1000; i++) { - // would have used release consistency but this is fine - simple_cb_calls++; - } -} - -void empty_cb( - CUpti_CallbackDomain /*domain*/, - CUpti_CallbackId /*cbid*/, - const CUpti_CallbackData* /*cbInfo*/) { -} - -TEST(CuptiCallbackApiTest, SimpleTest) { - auto& api = CuptiCallbackApi::singleton(); - - auto addSimpleCallback = [&]() -> bool { - bool ret = api.registerCallback( - CUPTI_CB_DOMAIN_RUNTIME_API, - CuptiCallbackApi::CUDA_LAUNCH_KERNEL, - &simple_cb - ); - return ret; - }; - EXPECT_TRUE(addSimpleCallback()) << "Failed to add callback"; - - // duplicate add should be okay - EXPECT_TRUE(addSimpleCallback()) << "Failed to re-add callback"; - - simple_cb_calls = 0; - - // simulate callback - api.__callback_switchboard( - CUPTI_CB_DOMAIN_RUNTIME_API, - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000, - reinterpret_cast(&some_data)); - - EXPECT_EQ(simple_cb_calls, 1); - - bool ret = api.deleteCallback( - CUPTI_CB_DOMAIN_RUNTIME_API, - CuptiCallbackApi::CUDA_LAUNCH_KERNEL, - &simple_cb - ); - - EXPECT_TRUE(ret) << "Failed to remove callback"; - - ret = api.deleteCallback( - CUPTI_CB_DOMAIN_RUNTIME_API, - CuptiCallbackApi::CUDA_LAUNCH_KERNEL, - &atomic_cb - ); - - EXPECT_FALSE(ret) << "oops! deleted a callback that was never added"; -} - -TEST(CuptiCallbackApiTest, AllCallbacks) { - auto& api = CuptiCallbackApi::singleton(); - - auto testCallback = [&]( - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - CuptiCallbackApi::CuptiCallBackID kineto_cbid) -> bool { - - bool ret = api.registerCallback(domain, kineto_cbid, atomic_cb); - EXPECT_TRUE(ret) << "Failed to add callback"; - - if (!ret) { - return false; - } - - simple_cb_calls = 0; - api.__callback_switchboard(domain, cbid, nullptr); - EXPECT_EQ(simple_cb_calls, 1000); - ret = simple_cb_calls == 1000; - - EXPECT_TRUE(api.deleteCallback(domain, kineto_cbid, atomic_cb)); - - return ret; - }; - - EXPECT_TRUE( - testCallback( - CUPTI_CB_DOMAIN_RESOURCE, - CUPTI_CBID_RESOURCE_CONTEXT_CREATED, - CuptiCallbackApi::RESOURCE_CONTEXT_CREATED)) - << "Failed to run callback for RESOURCE_CONTEXT_CREATED"; - - EXPECT_TRUE( - testCallback( - CUPTI_CB_DOMAIN_RESOURCE, - CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING, - CuptiCallbackApi::RESOURCE_CONTEXT_DESTROYED)) - << "Failed to run callback for RESOURCE_CONTEXT_DESTROYED"; - - EXPECT_TRUE( - testCallback( - CUPTI_CB_DOMAIN_RUNTIME_API, - CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000, - CuptiCallbackApi::CUDA_LAUNCH_KERNEL)) - << "Failed to run callback for CUDA_LAUNCH_KERNEL"; - -} - -TEST(CuptiCallbackApiTest, ContentionTest) { - auto& api = CuptiCallbackApi::singleton(); - const CUpti_CallbackDomain domain = CUPTI_CB_DOMAIN_RUNTIME_API; - const CUpti_CallbackId cbid = CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000; - const CuptiCallbackApi::CuptiCallBackID kineto_cbid = - CuptiCallbackApi::CUDA_LAUNCH_KERNEL; - - bool ret = api.registerCallback(domain, kineto_cbid, empty_cb); - EXPECT_TRUE(ret) << "Failed to add callback"; - - const int iters = 10000; - const int num_readers = 8; - - simple_cb_calls = 0; - - // simulate callbacks being executed on multiple threads in parallel - // during this interval add a new atomic_callback. - // this test ensured mutual exclusion is working fine - auto read_fn = [&](int tid){ - auto start_ts = high_resolution_clock::now(); - for (int i = 0; i < iters; i++) { - api.__callback_switchboard(domain, cbid, nullptr); - } - auto runtime_ms = duration_cast( - high_resolution_clock::now() - start_ts); - LOG(INFO) << "th " << tid << " done in " << runtime_ms.count() << " ms"; - }; - - - std::vector read_ths; - for (int i = 0; i< num_readers; i++) { - read_ths.emplace_back(read_fn, i); - } - - ret = api.registerCallback(domain, kineto_cbid, atomic_cb); - EXPECT_TRUE(ret) << "Failed to add callback"; - - for (auto& t : read_ths) { - t.join(); - } - - //EXPECT_GT(simple_cb_calls, 0) - // << "Atomic callback should have been called at least once."; - - api.deleteCallback(domain, kineto_cbid, empty_cb); - api.deleteCallback(domain, kineto_cbid, atomic_cb); -} - -TEST(CuptiCallbackApiTest, Bechmark) { - - constexpr int iters = 1000; - // atomic bench a number of times to get a baseline - - const CUpti_CallbackDomain domain = CUPTI_CB_DOMAIN_RUNTIME_API; - const CUpti_CallbackId cbid = CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000; - const CuptiCallbackApi::CuptiCallBackID kineto_cbid = - CuptiCallbackApi::CUDA_LAUNCH_KERNEL; - - LOG(INFO) << "Iteration count = " << iters; - - const bool use_empty = true; - auto cbfn = use_empty ? &empty_cb : &atomic_cb; - - // warmup - for (int i = 0; i < 50; i++) { - (*cbfn)(domain, cbid, nullptr); - } - - auto start_ts = high_resolution_clock::now(); - for (int i = 0; i < iters; i++) { - (*cbfn)(domain, cbid, nullptr); - } - auto delta_baseline_ns = duration_cast( - high_resolution_clock::now() - start_ts); - LOG(INFO) << "Baseline runtime = " << delta_baseline_ns.count() << " ns"; - - - auto& api = CuptiCallbackApi::singleton(); - bool ret = api.registerCallback(domain, kineto_cbid, cbfn); - EXPECT_TRUE(ret) << "Failed to add callback"; - - // warmup - for (int i = 0; i < 50; i++) { - api.__callback_switchboard(domain, cbid, nullptr); - } - - start_ts = high_resolution_clock::now(); - for (int i = 0; i < iters; i++) { - api.__callback_switchboard(domain, cbid, nullptr); - } - - auto delta_callback_ns = duration_cast( - high_resolution_clock::now() - start_ts); - LOG(INFO) << "Callback runtime = " << delta_callback_ns.count() << " ns"; - - LOG(INFO) << "Callback runtime per iteration = " << - (delta_callback_ns.count() - delta_baseline_ns.count()) / (double) iters - << " ns"; - -} diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiProfilerApiTest.cu b/plugins/tensorboard-plugins/libkineto/test/CuptiProfilerApiTest.cu deleted file mode 100644 index 54ad51b0a1f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiProfilerApiTest.cu +++ /dev/null @@ -1,353 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include -#include - -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "src/Logger.h" -#include "src/CuptiRangeProfilerApi.h" - -#define DRIVER_API_CALL(apiFuncCall) \ - do { \ - CUresult _status = apiFuncCall; \ - if (_status != CUDA_SUCCESS) { \ - LOG(ERROR) << "Failed invoking CUDA driver function " \ - << #apiFuncCall << " status = " \ - << _status; \ - exit(-1); \ - } \ - } while (0) - -#define EXPECT(expr)\ - if (!(expr)) {\ - }; - -using namespace KINETO_NAMESPACE; - -static int numRanges = 1; - -using Type = double; - -// Device code -__global__ void VecAdd(const Type* A, const Type* B, Type* C, int N) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - if (i < N) { - C[i] = A[i] + B[i]; - } -} - -// Device code -__global__ void VecSub(const Type* A, const Type* B, Type* C, int N) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - if (i < N) { - C[i] = A[i] - B[i]; - } -} - -static void initVec(Type* vec, int n) { - for (int i = 0; i < n; i++) { - vec[i] = i; - } -} - -static void cleanUp( - Type* h_A, - Type* h_B, - Type* h_C, - Type* h_D, - Type* d_A, - Type* d_B, - Type* d_C, - Type* d_D) { - if (d_A) - cudaFree(d_A); - if (d_B) - cudaFree(d_B); - if (d_C) - cudaFree(d_C); - if (d_D) - cudaFree(d_D); - - // Free host memory - if (h_A) - free(h_A); - if (h_B) - free(h_B); - if (h_C) - free(h_C); - if (h_D) - free(h_D); -} - -/* Benchmark application used to test profiler measurements - * This simply runs two kernels vector Add and Vector Subtract - */ - -void VectorAddSubtract() { - int N = 50000; - size_t size = N * sizeof(Type); - int threadsPerBlock = 0; - int blocksPerGrid = 0; - Type *h_A, *h_B, *h_C, *h_D; - Type *d_A, *d_B, *d_C, *d_D; - int i; - Type sum, diff; - - // Allocate input vectors h_A and h_B in host memory - h_A = (Type*)malloc(size); - h_B = (Type*)malloc(size); - h_C = (Type*)malloc(size); - h_D = (Type*)malloc(size); - - // Initialize input vectors - initVec(h_A, N); - initVec(h_B, N); - memset(h_C, 0, size); - memset(h_D, 0, size); - - // Allocate vectors in device memory - cudaMalloc((void**)&d_A, size); - cudaMalloc((void**)&d_B, size); - cudaMalloc((void**)&d_C, size); - cudaMalloc((void**)&d_D, size); - - // Copy vectors from host memory to device memory - cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice); - cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice); - - // Invoke kernel - threadsPerBlock = 256; - blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; - LOG(INFO) << fmt::format( - "Launching kernel: blocks {}, thread/block {}", - blocksPerGrid, - threadsPerBlock); - - VecAdd<<>>(d_A, d_B, d_C, N); - - VecSub<<>>(d_A, d_B, d_D, N); - - // Copy result from device memory to host memory - // h_C contains the result in host memory - cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost); - cudaMemcpy(h_D, d_D, size, cudaMemcpyDeviceToHost); - - // Verify result - for (i = 0; i < N; ++i) { - sum = h_A[i] + h_B[i]; - diff = h_A[i] - h_B[i]; - if (h_C[i] != sum || h_D[i] != diff) { - LOG(ERROR) << "Result verification failed"; - break; - } - } - - cleanUp(h_A, h_B, h_C, h_D, d_A, d_B, d_C, d_D); -} - -#if HAS_CUPTI_RANGE_PROFILER -bool runTestWithAutoRange( - int deviceNum, - const std::vector& metricNames, - CUcontext cuContext, - bool async) { - - // create a CUPTI range based profiling profiler - // this configures the counter data as well - CuptiRBProfilerSession profiler( - metricNames, deviceNum, 2, 1, async ? nullptr : cuContext); - - CUpti_ProfilerRange profilerRange = CUPTI_AutoRange; - CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_KernelReplay; - - if (async) { - profiler.asyncStartAndEnable(profilerRange, profilerReplayMode); - } else { - profiler.start(profilerRange, profilerReplayMode); - profiler.enable(); - } - - VectorAddSubtract(); - - if (!async) { - profiler.disable(); - // stop profiler - profiler.stop(); - } else { - profiler.asyncDisableAndStop(); - } - - auto result = profiler.evaluateMetrics(true); - - // check results - EXPECT_EQ(result.metricNames.size(), 3); - EXPECT_EQ(result.rangeVals.size(), 2); - - for (const auto& measurement : result.rangeVals) { - EXPECT_EQ(measurement.values.size(), 3); - - if (measurement.values.size() == 3) { - // smsp__warps_launched.avg - EXPECT_NE(measurement.values[0], 0); - // smsp__sass_thread_inst_executed_op_dadd_pred_on.sum - // each kernel has 50000 dadd ops - EXPECT_EQ(measurement.values[1], 50000); - // sm__inst_executed_pipe_tensor.sum - //EXPECT_EQ(measurement.values[2], 0); - } - } - return true; -} - -bool runTestWithUserRange( - int deviceNum, - const std::vector& metricNames, - CUcontext cuContext, - bool async = false) { - - // create a CUPTI range based profiling profiler - // this configures the counter data as well - CuptiRBProfilerSession profiler( - metricNames, deviceNum, numRanges, 1, async ? nullptr : cuContext); - - CUpti_ProfilerRange profilerRange = CUPTI_UserRange; - CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_UserReplay; - - if (async) { - profiler.asyncStartAndEnable(profilerRange, profilerReplayMode); - { VectorAddSubtract(); } - profiler.disableAndStop(); - } else { - profiler.start(profilerRange, profilerReplayMode); - - /* User takes the resposiblity of replaying the kernel launches */ - bool replay = true; - do { - profiler.beginPass(); - { - profiler.enable(); - - std::string rangeName = "vecAddSub"; - profiler.pushRange(rangeName); - - { VectorAddSubtract(); } - - profiler.popRange(); - profiler.disable(); - } - LOG(INFO) << "Replay starting."; - replay = profiler.endPass(); - - } while (!replay); - - // stop profiler - profiler.stop(); - } - VectorAddSubtract(); - auto result = profiler.evaluateMetrics(true); - - // check results - EXPECT_EQ(result.metricNames.size(), 3); - EXPECT_EQ(result.rangeVals.size(), 1); - - if (result.rangeVals.size() > 0) { - const auto& measurement = result.rangeVals[0]; - EXPECT_EQ(measurement.values.size(), 3); - - if (measurement.values.size() == 3) { - // smsp__warps_launched.avg - EXPECT_NE(measurement.values[0], 0); - // smsp__sass_thread_inst_executed_op_dadd_pred_on.sum - // in async mode multiple passes are not supported yet - if (!async) { - EXPECT_EQ(measurement.values[1], 100000); - } - // sm__inst_executed_pipe_tensor.sum - //EXPECT_EQ(measurement.values[2], 0); - } - } - return true; -} -#endif // HAS_CUPTI_RANGE_PROFILER - -int main(int argc, char* argv[]) { - - CUdevice cuDevice; - - int deviceCount, deviceNum; - int computeCapabilityMajor = 0, computeCapabilityMinor = 0; - - printf("Usage: %s [device_num]\n", argv[0]); - - DRIVER_API_CALL(cuInit(0)); - DRIVER_API_CALL(cuDeviceGetCount(&deviceCount)); - - if (deviceCount == 0) { - LOG(ERROR) << "There is no device supporting CUDA."; - return -2; - } - - if (argc > 1) - deviceNum = atoi(argv[1]); - else - deviceNum = 0; - LOG(INFO) << "CUDA Device Number: " << deviceNum; - - DRIVER_API_CALL(cuDeviceGet(&cuDevice, deviceNum)); - DRIVER_API_CALL(cuDeviceGetAttribute( - &computeCapabilityMajor, - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, - cuDevice)); - DRIVER_API_CALL(cuDeviceGetAttribute( - &computeCapabilityMinor, - CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, - cuDevice)); - - LOG(INFO) << "Compute Cabapbility = " - << fmt::format("{},{}",computeCapabilityMajor, computeCapabilityMinor); - - if (computeCapabilityMajor < 7) { - LOG(ERROR) << "CUPTI Profiler is not supported with compute capability < 7.0"; - return -2; - } - - CuptiRBProfilerSession::staticInit(); - - // metrics to profile - std::vector metricNames = { - "smsp__warps_launched.avg", - "smsp__sass_thread_inst_executed_op_dadd_pred_on.sum", - "sm__inst_executed_pipe_tensor.sum", - }; - - CUcontext cuContext; - DRIVER_API_CALL(cuCtxCreate(&cuContext, 0, cuDevice)); - - VectorAddSubtract(); - -#if HAS_CUPTI_RANGE_PROFILER - CuptiRBProfilerSession::staticInit(); - - if (!runTestWithUserRange(deviceNum, metricNames, cuContext, false)) { - LOG(ERROR) << "Failed to profiler test benchmark in user range"; - } else if (!runTestWithAutoRange(deviceNum, metricNames, cuContext, false)) { - LOG(ERROR) << "Failed to profiler test benchmark in auto range"; - } else if (!runTestWithUserRange(deviceNum, metricNames, cuContext, true)) { - LOG(ERROR) << "Failed to profiler test benchmark in user range async"; - } else if (!runTestWithAutoRange(deviceNum, metricNames, cuContext, true)) { - LOG(ERROR) << "Failed to profiler test benchmark in auto range async"; - } - - CuptiRBProfilerSession::deInitCupti(); -#else - LOG(WARNING) << "CuptiRBProfilerSession is not supported."; -#endif // HAS_CUPTI_RANGE_PROFILER - DRIVER_API_CALL(cuCtxDestroy(cuContext)); - - - return 0; -} diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerApiTest.cpp b/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerApiTest.cpp deleted file mode 100644 index 28cad722c53..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerApiTest.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include -#include - -#include "include/libkineto.h" -#include "include/Config.h" -#include "src/CuptiRangeProfilerApi.h" - -#include "src/Logger.h" -#include "test/CuptiRangeProfilerTestUtil.h" - -using namespace KINETO_NAMESPACE; - -#if HAS_CUPTI_PROFILER - -TEST(CuptiRangeProfilerApiTest, contextTracking) { - std::vector log_modules( - {"CuptiRangeProfilerApi.cpp"}); - SET_LOG_VERBOSITY_LEVEL(1, log_modules); - - std::array data; - std::array contexts; - for (int i = 0; i < data.size(); i++) { - contexts[i] = reinterpret_cast(&data[i]); - } - - // simulate creating contexts, this calls the trackCudaContexts - // function that would otherwise be called via a callback - uint32_t dev = 0; - for (auto ctx : contexts) { - simulateCudaContextCreate(ctx, dev++); - } - - EXPECT_EQ( - CuptiRBProfilerSession::getActiveDevices(), - std::set({0, 1, 2})); - - simulateCudaContextDestroy(contexts[1], 1); - - EXPECT_EQ( - CuptiRBProfilerSession::getActiveDevices(), - std::set({0, 2})); - - simulateCudaContextDestroy(contexts[0], 0); - simulateCudaContextDestroy(contexts[2], 2); - - EXPECT_TRUE( - CuptiRBProfilerSession::getActiveDevices().empty()); -} - -TEST(CuptiRangeProfilerApiTest, asyncLaunchUserRange) { - std::vector log_modules( - {"CuptiRangeProfilerApi.cpp"}); - SET_LOG_VERBOSITY_LEVEL(1, log_modules); - - // this is bad but the pointer is never accessed - CUcontext ctx0 = reinterpret_cast(10); - simulateCudaContextCreate(ctx0, 0 /*device_id*/); - - auto session = std::make_unique(0, ctx0); - session->asyncStartAndEnable(CUPTI_UserRange, CUPTI_UserReplay); - - simulateKernelLaunch(ctx0, "hello"); - simulateKernelLaunch(ctx0, "foo"); - simulateKernelLaunch(ctx0, "bar"); - - session->asyncDisableAndStop(); - // stop happens after next kernel is run - simulateKernelLaunch(ctx0, "bar"); - simulateCudaContextDestroy(ctx0, 0 /*device_id*/); - - EXPECT_EQ(session->passes_ended, 1); - EXPECT_EQ(session->ranges_ended, 1); - EXPECT_TRUE(session->enabled); -} - -TEST(CuptiRangeProfilerApiTest, asyncLaunchAutoRange) { - std::vector log_modules( - {"CuptiRangeProfilerApi.cpp"}); - SET_LOG_VERBOSITY_LEVEL(1, log_modules); - - // this is bad but the pointer is never accessed - CUcontext ctx0 = reinterpret_cast(10); - CUcontext ctx1 = reinterpret_cast(11); - - simulateCudaContextCreate(ctx0, 0 /*device_id*/); - - auto session = std::make_unique(0, ctx0); - session->asyncStartAndEnable(CUPTI_AutoRange, CUPTI_KernelReplay); - - simulateKernelLaunch(ctx0, "hello"); - simulateKernelLaunch(ctx0, "foo"); - simulateKernelLaunch(ctx1, "kernel_on_different_device"); - simulateKernelLaunch(ctx0, "bar"); - - session->asyncDisableAndStop(); - // stop happens after next kernel is run - simulateKernelLaunch(ctx0, "bar"); - simulateCudaContextDestroy(ctx0, 0 /*device_id*/); - - EXPECT_EQ(session->passes_ended, 0); - EXPECT_EQ(session->ranges_ended, 0); - EXPECT_TRUE(session->enabled); - - EXPECT_EQ( - session->getKernelNames(), - std::vector({"hello", "foo", "bar"})) - << "Kernel names were not tracked"; -} - -#endif // HAS_CUPTI_PROFILER diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerConfigTest.cpp b/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerConfigTest.cpp deleted file mode 100644 index 3f568968238..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerConfigTest.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "include/Config.h" -#include "src/CuptiRangeProfilerConfig.h" - -#include -#include -#include -#include - -using namespace std::chrono; -using namespace KINETO_NAMESPACE; - -class CuptiRangeProfilerConfigTest : public ::testing::Test { - protected: - void SetUp() override { - CuptiRangeProfilerConfig::registerFactory(); - } -}; - -TEST_F(CuptiRangeProfilerConfigTest, ConfigureProfiler) { - Config cfg; - std::vector metrics = { - "kineto__cuda_core_flops", - "sm__inst_executed.sum", - "l1tex__data_bank_conflicts_pipe_lsu.sum", - }; - auto metricsConfigStr = - fmt::format("CUPTI_PROFILER_METRICS = {}", fmt::join(metrics, ",")); - - EXPECT_TRUE(cfg.parse(metricsConfigStr)); - EXPECT_TRUE(cfg.parse("CUPTI_PROFILER_ENABLE_PER_KERNEL = true")); - EXPECT_TRUE(cfg.parse("CUPTI_PROFILER_MAX_RANGES = 42")); - - const CuptiRangeProfilerConfig& cupti_cfg = - CuptiRangeProfilerConfig::get(cfg); - - EXPECT_EQ(cupti_cfg.activitiesCuptiMetrics(), metrics); - EXPECT_EQ(cupti_cfg.cuptiProfilerPerKernel(), true); - EXPECT_EQ(cupti_cfg.cuptiProfilerMaxRanges(), 42); - -} - -TEST_F(CuptiRangeProfilerConfigTest, RangesDefaults) { - Config cfg, cfg_auto; - - // do not set max ranges in config, check defaults are sane - EXPECT_TRUE(cfg.parse("CUPTI_PROFILER_METRICS = kineto__cuda_core_flops")); - EXPECT_TRUE(cfg.parse("CUPTI_PROFILER_ENABLE_PER_KERNEL = false")); - - cfg.setSignalDefaults(); - - EXPECT_TRUE(cfg_auto.parse("CUPTI_PROFILER_METRICS = kineto__cuda_core_flops")); - EXPECT_TRUE(cfg_auto.parse("CUPTI_PROFILER_ENABLE_PER_KERNEL = true")); - - cfg_auto.setClientDefaults(); - - int user_ranges, auto_ranges; - - user_ranges = CuptiRangeProfilerConfig::get(cfg).cuptiProfilerMaxRanges(); - auto_ranges = CuptiRangeProfilerConfig::get(cfg_auto).cuptiProfilerMaxRanges(); - - EXPECT_GE(user_ranges, 1) << " in user range mode default to at least 1 ranges"; - EXPECT_GE(auto_ranges, 1000) << " in auto range mode default to at least 1000 ranges"; - - EXPECT_GT(auto_ranges, user_ranges); -} diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerTestUtil.h b/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerTestUtil.h deleted file mode 100644 index 861b65fd701..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiRangeProfilerTestUtil.h +++ /dev/null @@ -1,96 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "CuptiRangeProfilerApi.h" - -namespace KINETO_NAMESPACE { - -#if HAS_CUPTI_PROFILER - -class MockCuptiRBProfilerSession : public CuptiRBProfilerSession { - public: - MockCuptiRBProfilerSession(int deviceId, CUcontext ctx) - : CuptiRBProfilerSession(deviceId, ctx) {} - - void beginPass() override { - LOG(INFO) << " Mock CUPTI begin pass"; - passes_started++; - } - - bool endPass() override { - passes_ended++; - return true; - } - - void flushCounterData() override {} - - void pushRange(const std::string& rangeName) override { - LOG(INFO) << " Mock CUPTI pushrange ( " << rangeName << " )"; - ranges_started++; - } - - void popRange() override { - LOG(INFO) << " Mock CUPTI poprange"; - ranges_ended++; - } - - void stop() override { - runChecks(); - } - - void enable() override { - enabled = true; - } - void disable() override {} - - CuptiProfilerResult evaluateMetrics(bool /*verbose*/) override { - return result; - } - -protected: - void startInternal( - CUpti_ProfilerRange profilerRange, - CUpti_ProfilerReplayMode profilerReplayMode) override { - curRange_ = profilerRange; - curReplay_ = profilerReplayMode; - } - -private: - void runChecks() { - EXPECT_EQ(passes_started, passes_ended); - EXPECT_EQ(ranges_started, ranges_ended); - } - - public: - int passes_started = 0; - int passes_ended = 0; - int ranges_started = 0; - int ranges_ended = 0; - bool enabled = false; - - CuptiProfilerResult result; - -}; - -inline void simulateCudaContextCreate(CUcontext context, uint32_t dev) { - testing::trackCudaCtx( - context, dev, CUPTI_CBID_RESOURCE_CONTEXT_CREATED); -} - -inline void simulateCudaContextDestroy(CUcontext context, uint32_t dev) { - testing::trackCudaCtx( - context, dev, CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING); -} - -inline void simulateKernelLaunch( - CUcontext context, const std::string& kernelName) { - testing::trackCudaKernelLaunch(context, kernelName.c_str()); -} - -#endif // HAS_CUPTI_PROFILER - -} // namespace KINETO_NAMESPACE diff --git a/plugins/tensorboard-plugins/libkineto/test/CuptiStringsTest.cpp b/plugins/tensorboard-plugins/libkineto/test/CuptiStringsTest.cpp deleted file mode 100644 index 405f9404a49..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/CuptiStringsTest.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include - -#include "src/cupti_strings.h" - -using namespace KINETO_NAMESPACE; - -TEST(CuptiStringsTest, Valid) { - ASSERT_STREQ( - runtimeCbidName(CUPTI_RUNTIME_TRACE_CBID_INVALID), "INVALID"); - ASSERT_STREQ( - runtimeCbidName(CUPTI_RUNTIME_TRACE_CBID_cudaDriverGetVersion_v3020), - "cudaDriverGetVersion"); - ASSERT_STREQ(runtimeCbidName - (CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020), - "cudaDeviceSynchronize"); - ASSERT_STREQ( - runtimeCbidName(CUPTI_RUNTIME_TRACE_CBID_cudaStreamSetAttribute_ptsz_v11000), - "cudaStreamSetAttribute_ptsz"); -} - -TEST(CuptiStringsTest, Invalid) { - ASSERT_STREQ(runtimeCbidName(-1), "INVALID"); - // We can't actually use CUPTI_RUNTIME_TRACE_CBID_SIZE here until we - // auto-generate the string table, since it may have more entries than - // the enum in the version used to compile. - ASSERT_STREQ(runtimeCbidName(1000), "INVALID"); -} diff --git a/plugins/tensorboard-plugins/libkineto/test/EventProfilerTest.cpp b/plugins/tensorboard-plugins/libkineto/test/EventProfilerTest.cpp deleted file mode 100644 index cb36c826a7f..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/EventProfilerTest.cpp +++ /dev/null @@ -1,578 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "src/EventProfiler.h" - -#include -#include -#include - -using namespace std::chrono; -using namespace KINETO_NAMESPACE; - -TEST(PercentileTest, Create) { - PercentileList pct = {{10, SampleValue(0)}, - {49, SampleValue(0)}, - {50, SampleValue(0)}, - {90, SampleValue(0)}}; - - percentiles({0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, pct); - EXPECT_EQ(pct[0].second.getInt(), 10); - EXPECT_EQ(pct[1].second.getInt(), 50); - EXPECT_EQ(pct[2].second.getInt(), 50); - EXPECT_EQ(pct[3].second.getInt(), 90); - - percentiles({80, 10, 20, 70, 60, 40, 90, 30, 50, 0, 100}, pct); - EXPECT_EQ(pct[0].second.getInt(), 10); - EXPECT_EQ(pct[1].second.getInt(), 50); - EXPECT_EQ(pct[2].second.getInt(), 50); - EXPECT_EQ(pct[3].second.getInt(), 90); - - percentiles({80}, pct); - EXPECT_EQ(pct[0].second.getInt(), 80); - EXPECT_EQ(pct[1].second.getInt(), 80); - EXPECT_EQ(pct[2].second.getInt(), 80); - EXPECT_EQ(pct[3].second.getInt(), 80); - - percentiles({80, 50}, pct); - EXPECT_EQ(pct[0].second.getInt(), 50); - EXPECT_EQ(pct[1].second.getInt(), 50); - EXPECT_EQ(pct[2].second.getInt(), 80); - EXPECT_EQ(pct[3].second.getInt(), 80); -} - -TEST(PercentileTest, Normalize) { - PercentileList pct = { - {10, SampleValue(10)}, {50, SampleValue(100.0)}, {90, SampleValue(2000)}}; - - normalize(pct, 2.5); - - EXPECT_EQ(pct[0].second.getInt(), 25); - EXPECT_EQ((int)pct[1].second.getDouble(), 250); - EXPECT_EQ(pct[2].second.getInt(), 5000); -} - -TEST(EventTest, SumSamples) { - Event ev; - ev.instanceCount = 4; - auto t = system_clock::now(); - ev.addSample(t, {1, 2, 3, 4}); - ev.addSample(t, {10, 20, 30, 40}); - ev.addSample(t, {100, 200, 300, 400}); - - EXPECT_EQ(ev.sumInstance(0, {0, 0, 3}), 1); - EXPECT_EQ(ev.sumInstance(0, {0, 1, 3}), 10); - EXPECT_EQ(ev.sumInstance(0, {0, 2, 3}), 100); - - EXPECT_EQ(ev.sumInstance(0, {0, 0, 1}), 111); - - EXPECT_EQ(ev.sumInstance(3, {0, 0, 1}), 444); - - // Non-zero offset - EXPECT_EQ(ev.sumInstance(0, {1, 0, 2}), 10); - EXPECT_EQ(ev.sumInstance(0, {1, 1, 2}), 100); - EXPECT_EQ(ev.sumInstance(0, {1, 0, 1}), 110); - - ev.addSample(t, {1000, 2000, 3000, 4000}); - - EXPECT_EQ(ev.sumInstance(0, {1, 0, 3}), 10); - EXPECT_EQ(ev.sumInstance(0, {1, 1, 3}), 100); - EXPECT_EQ(ev.sumInstance(0, {2, 1, 2}), 1000); - EXPECT_EQ(ev.sumInstance(0, {2, 0, 1}), 1100); - - EXPECT_EQ(ev.sumAll({0, 0, 4}), 10); - EXPECT_EQ(ev.sumAll({1, 0, 3}), 100); - EXPECT_EQ(ev.sumAll({2, 1, 2}), 10000); - EXPECT_EQ(ev.sumAll({0, 1, 2}), 11000); - EXPECT_EQ(ev.sumAll({0, 0, 1}), 11110); -} - -TEST(EventTest, Percentiles) { - Event ev; - ev.instanceCount = 4; - auto t = system_clock::now(); - ev.addSample(t, {3, 2, 1, 4}); - ev.addSample(t, {30, 20, 10, 40}); - ev.addSample(t, {300, 200, 100, 400}); - - PercentileList pct = { - {10, SampleValue(0)}, {50, SampleValue(0)}, {90, SampleValue(0)}}; - - ev.percentiles(pct, {0, 0, 3}); - EXPECT_EQ(pct[0].second.getInt(), 1); - EXPECT_EQ(pct[1].second.getInt(), 3); - EXPECT_EQ(pct[2].second.getInt(), 4); - - ev.percentiles(pct, {0, 0, 1}); - EXPECT_EQ(pct[0].second.getInt(), 111); - EXPECT_EQ(pct[1].second.getInt(), 333); - EXPECT_EQ(pct[2].second.getInt(), 444); -} - -class MockCuptiMetrics : public CuptiMetricApi { - public: - MockCuptiMetrics() : CuptiMetricApi(0) {} - MOCK_METHOD1(idFromName, CUpti_MetricID(const std::string& name)); - MOCK_METHOD1( - events, - std::map(CUpti_MetricID metric_id)); - MOCK_METHOD1(valueKind, CUpti_MetricValueKind(CUpti_MetricID metric)); - MOCK_METHOD1( - evaluationMode, - CUpti_MetricEvaluationMode(CUpti_MetricID metric)); - MOCK_METHOD5( - calculate, - SampleValue( - CUpti_MetricID metric, - CUpti_MetricValueKind kind, - std::vector& events, - std::vector& values, - int64_t duration)); -}; - -TEST(MetricTest, Calculate) { - using ::testing::Return; - MockCuptiMetrics metrics; - - // The events used for the ipc metrics: instructions and cycles - // Pretend we have 2 SMs and 2 samples of each event - Event instr("instructions"); - instr.instanceCount = 2; - auto t = system_clock::now(); - instr.addSample(t, {100, 200}); - instr.addSample(t, {300, 400}); - - Event cycles("cycles"); - cycles.instanceCount = 2; - cycles.addSample(t, {1000, 1200}); - cycles.addSample(t, {1300, 1300}); - - // 2 & 3 are the event ids we specified in the metric - std::map events; - events[2] = std::move(instr); - events[3] = std::move(cycles); - - // Define an ipc metric - EXPECT_CALL(metrics, valueKind(1)) - .Times(1) - .WillOnce(Return(CUPTI_METRIC_VALUE_KIND_DOUBLE)); - Metric m( - "ipc", 1, {2, 3}, CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, metrics); - - // Calculate metric for first sample - // Since evaluation mode is CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE, - // Cupti API will be called three times: once for each SM (2) and once - // to get the total across SMs. - std::vector ids = {2, 3}; - std::vector vals = {100, 1000}; - EXPECT_CALL( - metrics, calculate(1, CUPTI_METRIC_VALUE_KIND_DOUBLE, ids, vals, 1000)) - .Times(1) - .WillOnce(Return(SampleValue(0.1))); - vals = {200, 1200}; - EXPECT_CALL( - metrics, calculate(1, CUPTI_METRIC_VALUE_KIND_DOUBLE, ids, vals, 1000)) - .Times(1) - .WillOnce(Return(SampleValue(0.17))); - vals = {300, 2200}; - EXPECT_CALL( - metrics, calculate(1, CUPTI_METRIC_VALUE_KIND_DOUBLE, ids, vals, 1000)) - .Times(1) - .WillOnce(Return(SampleValue(0.14))); - auto v = m.calculate(events, nanoseconds(1000), {0, 0, 2}); - - EXPECT_EQ(v.perInstance.size(), 2); - EXPECT_EQ(v.perInstance[0].getDouble(), 0.1); - EXPECT_EQ(v.perInstance[1].getDouble(), 0.17); - EXPECT_EQ(v.total.getDouble(), 0.14); - - // Calculate second sample. - // Change evaluation mode to CUPTI_METRIC_EVALUATION_MODE_AGGREGATE. - // Now we should get only one call to the Cupti API for the total. - EXPECT_CALL(metrics, valueKind(1)) - .Times(1) - .WillOnce(Return(CUPTI_METRIC_VALUE_KIND_DOUBLE)); - Metric m2("ipc", 1, {2, 3}, CUPTI_METRIC_EVALUATION_MODE_AGGREGATE, metrics); - vals = {700, 2600}; - EXPECT_CALL( - metrics, calculate(1, CUPTI_METRIC_VALUE_KIND_DOUBLE, ids, vals, 1000)) - .Times(1) - .WillOnce(Return(SampleValue(0.27))); - v = m2.calculate(events, nanoseconds(1000), {0, 1, 2}); - - EXPECT_EQ(v.perInstance.size(), 1); - EXPECT_EQ(v.perInstance[0].getDouble(), 0.27); - EXPECT_EQ(v.total.getDouble(), 0.27); -} - -class MockCuptiEvents : public CuptiEventApi { - public: - MOCK_METHOD1( - createGroupSets, - CUpti_EventGroupSets*(std::vector& ids)); - MOCK_METHOD1(destroyGroupSets, void(CUpti_EventGroupSets* sets)); - MOCK_METHOD0(setContinuousMode, bool()); - MOCK_METHOD1(enablePerInstance, void(CUpti_EventGroup eventGroup)); - MOCK_METHOD1(instanceCount, uint32_t(CUpti_EventGroup eventGroup)); - MOCK_METHOD1(enableGroupSet, void(CUpti_EventGroupSet& set)); - MOCK_METHOD1(disableGroupSet, void(CUpti_EventGroupSet& set)); - MOCK_METHOD3( - readEvent, - void(CUpti_EventGroup g, CUpti_EventID id, std::vector& vals)); - MOCK_METHOD1(eventsInGroup, std::vector(CUpti_EventGroup g)); - MOCK_METHOD1(eventId, CUpti_EventID(const std::string& name)); -}; - -TEST(EventGroupSetTest, CollectSample) { - using ::testing::_; - using ::testing::Return; - using ::testing::SetArgPointee; - const CUpti_EventGroup g1{nullptr}; - const CUpti_EventGroup g2{reinterpret_cast(0x1000)}; - CUpti_EventGroup groups[] = {g1, g2}; - CUpti_EventGroupSet set; - set.eventGroups = groups; - set.numEventGroups = 2; - - std::map events; - Event instr("instructions"); - events[4] = std::move(instr); - Event cycles("cycles"); - events[5] = std::move(cycles); - Event branches("branches"); - events[10] = std::move(branches); - - MockCuptiEvents cupti_events; - EXPECT_CALL(cupti_events, enablePerInstance(g1)).Times(1); - EXPECT_CALL(cupti_events, enablePerInstance(g2)).Times(1); - EXPECT_CALL(cupti_events, instanceCount(g1)).Times(1).WillOnce(Return(80)); - EXPECT_CALL(cupti_events, instanceCount(g2)).Times(1).WillOnce(Return(40)); - std::vector events_in_group1 = {4, 5}; - EXPECT_CALL(cupti_events, eventsInGroup(g1)) - .Times(1) - .WillOnce(Return(events_in_group1)); - std::vector events_in_group2 = {10}; - EXPECT_CALL(cupti_events, eventsInGroup(g2)) - .Times(1) - .WillOnce(Return(events_in_group2)); - EventGroupSet group_set(set, events, cupti_events); - - EXPECT_EQ(group_set.groupCount(), 2); - EXPECT_EQ(events[4].instanceCount, 80); - EXPECT_EQ(events[5].instanceCount, 80); - EXPECT_EQ(events[10].instanceCount, 40); - - // This should not cause any Cupti API action as the group - // set is already disabled - group_set.setEnabled(false); - - // Activate group set - if activated twice, only the first - // should cause cupti API to be called - EXPECT_CALL(cupti_events, enableGroupSet(_)).Times(1); - group_set.setEnabled(false); - group_set.setEnabled(true); - - EXPECT_CALL(cupti_events, eventsInGroup(g1)) - .Times(1) - .WillOnce(Return(events_in_group1)); - EXPECT_CALL(cupti_events, eventsInGroup(g2)) - .Times(1) - .WillOnce(Return(events_in_group2)); - EXPECT_CALL(cupti_events, readEvent(g1, 4, _)).Times(1); - EXPECT_CALL(cupti_events, readEvent(g1, 5, _)).Times(1); - EXPECT_CALL(cupti_events, readEvent(g2, 10, _)).Times(1); - group_set.collectSample(); - - EXPECT_EQ(events[4].sampleCount(), 1); - EXPECT_EQ(events[5].sampleCount(), 1); - EXPECT_EQ(events[10].sampleCount(), 1); -} - -class MockLogger : public SampleListener { - public: - MOCK_METHOD3(handleSample, void(int device, const Sample& sample, bool from_new_version)); - MOCK_METHOD1(update, void(const Config& config)); -}; - -class EventProfilerTest : public ::testing::Test { - protected: - void SetUp() override { - auto cupti_events_ptr = std::make_unique(); - auto cupti_metrics_ptr = std::make_unique(); - cuptiEvents_ = cupti_events_ptr.get(); - cuptiMetrics_ = cupti_metrics_ptr.get(); - loggers_.push_back(std::make_unique()); - onDemandLoggers_.push_back(std::make_unique()); - profiler_ = std::make_unique( - std::move(cupti_events_ptr), - std::move(cupti_metrics_ptr), - loggers_, - onDemandLoggers_); - - for (int i = 0; i < kEventGroupCount; i++) { - eventGroups_[i] = &eventGroups_[i]; - } - for (int i = 0; i < kGroupSetCount; i++) { - // Default size to 1 but can be changed by test - groupSet_[i].numEventGroups = 1; - // Two groups per set - groupSet_[i].eventGroups = &eventGroups_[i * 2]; - } - groupSets_.numSets = 1; - groupSets_.sets = groupSet_; - } - - MockCuptiEvents* cuptiEvents_; - MockCuptiMetrics* cuptiMetrics_; - std::vector> loggers_; - std::vector> onDemandLoggers_; - constexpr static int kEventGroupCount = 4; - constexpr static int kGroupSetCount = 2; - CUpti_EventGroup eventGroups_[kEventGroupCount]; - CUpti_EventGroupSet groupSet_[kGroupSetCount]; - CUpti_EventGroupSets groupSets_; - std::unique_ptr profiler_; -}; - -TEST_F(EventProfilerTest, ConfigureFailure) { - using namespace testing; - - // Default config has no counters enabled. - // Check that profiler remains disabled. - Config cfg; - profiler_->configure(cfg, nullptr); - - EXPECT_FALSE(profiler_->enabled()); - - // There is no event named "cycles" - // In this case the profiler should print a warning and remain disabled - bool parsed = cfg.parse("EVENTS = cycles"); - EXPECT_TRUE(parsed); - - // EventProfiler should handle exception thrown from createGroupSets - // Configuration will be applied twice - once for combined base + on-demand - // and then again falling back to base - EXPECT_CALL(*cuptiEvents_, eventId("cycles")) - .Times(2) - .WillRepeatedly(Return(0)); - std::vector ids = {0}; - EXPECT_CALL(*cuptiEvents_, createGroupSets(ids)) - .Times(2) - .WillRepeatedly(Throw( - std::system_error(EINVAL, std::generic_category(), "Event ID"))); - profiler_->configure(cfg, nullptr); - - EXPECT_FALSE(profiler_->enabled()); -} - -TEST_F(EventProfilerTest, ConfigureBase) { - using namespace testing; - - // Test normal path, simple base config - Config cfg; - bool parsed = cfg.parse("EVENTS = elapsed_cycles_sm"); - EXPECT_TRUE(parsed); - - // One valid event - expect one call to eventId and createGroupSets - EXPECT_CALL(*cuptiEvents_, eventId("elapsed_cycles_sm")) - .Times(1) - .WillOnce(Return(5)); - std::vector ids = {5}; - EXPECT_CALL(*cuptiEvents_, createGroupSets(ids)) - .Times(1) - .WillOnce(Return(&groupSets_)); - EXPECT_CALL(*cuptiEvents_, enablePerInstance(eventGroups_[0])).Times(1); - EXPECT_CALL(*cuptiEvents_, instanceCount(eventGroups_[0])) - .Times(1) - .WillOnce(Return(80)); - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[0])) - .Times(1) - .WillOnce(Return(ids)); - EXPECT_CALL(*cuptiEvents_, enableGroupSet(_)).Times(1); - - profiler_->configure(cfg, nullptr); - - EXPECT_TRUE(profiler_->enabled()); -} - -TEST_F(EventProfilerTest, ConfigureOnDemand) { - using namespace testing; - - // Test base + on-demand config, one event and one metric - Config cfg, on_demand_cfg; - bool parsed = cfg.parse(R"( - EVENTS = active_cycles - SAMPLE_PERIOD_MSECS=500 - REPORT_PERIOD_SECS=10 - SAMPLES_PER_REPORT=5 - )"); - EXPECT_TRUE(parsed); - - parsed = on_demand_cfg.parse(R"( - METRICS = ipc - EVENTS_DURATION_SECS=60 - SAMPLE_PERIOD_MSECS=200 - MULTIPLEX_PERIOD_MSECS=2000 - REPORT_PERIOD_SECS=3 - SAMPLES_PER_REPORT=10 - )"); - EXPECT_TRUE(parsed); - - // One event - EXPECT_CALL(*cuptiEvents_, eventId("active_cycles")) - .Times(1) - .WillOnce(Return(3)); - // One metric - EXPECT_CALL(*cuptiMetrics_, idFromName("ipc")).Times(1).WillOnce(Return(10)); - std::map ipc_events; - ipc_events[4] = "instructions"; - ipc_events[5] = "elapsed_cycles_sm"; - EXPECT_CALL(*cuptiMetrics_, events(10)).Times(1).WillOnce(Return(ipc_events)); - EXPECT_CALL(*cuptiMetrics_, evaluationMode(10)) - .Times(1) - .WillOnce(Return(CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE)); - EXPECT_CALL(*cuptiMetrics_, valueKind(10)) - .Times(1) - .WillOnce(Return(CUPTI_METRIC_VALUE_KIND_DOUBLE)); - std::vector ids = {3, 4, 5}; - groupSet_[0].numEventGroups = 2; - groupSets_.numSets = 2; - EXPECT_CALL(*cuptiEvents_, createGroupSets(ids)) - .Times(1) - .WillOnce(Return(&groupSets_)); - // Specified CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE per instance above - // So check that it's enabled - EXPECT_CALL(*cuptiEvents_, enablePerInstance(eventGroups_[0])).Times(1); - EXPECT_CALL(*cuptiEvents_, enablePerInstance(eventGroups_[1])).Times(1); - EXPECT_CALL(*cuptiEvents_, enablePerInstance(eventGroups_[2])).Times(1); - std::vector ids_g1{3}, ids_g2{4}, ids_g3{5}; - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[0])) - .Times(1) - .WillOnce(Return(ids_g1)); - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[1])) - .Times(1) - .WillOnce(Return(ids_g2)); - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[2])) - .Times(1) - .WillOnce(Return(ids_g3)); - EXPECT_CALL(*cuptiEvents_, enableGroupSet(_)).Times(1); - - profiler_->configure(cfg, &on_demand_cfg); - - EXPECT_TRUE(profiler_->enabled()); - EXPECT_EQ(profiler_->samplePeriod().count(), 250); - EXPECT_EQ(profiler_->multiplexPeriod().count(), 1000); - EXPECT_EQ(profiler_->reportPeriod().count(), 10000); - EXPECT_EQ(profiler_->onDemandReportPeriod().count(), 4000); -} - -TEST_F(EventProfilerTest, ReportSample) { - using namespace testing; - - // Test base + on-demand config, one event and one metric - Config cfg, on_demand_cfg; - bool parsed = cfg.parse("EVENTS = active_cycles"); - EXPECT_TRUE(parsed); - - parsed = on_demand_cfg.parse(R"( - METRICS = ipc - EVENTS_DURATION_SECS=60 - )"); - EXPECT_TRUE(parsed); - - // One event - EXPECT_CALL(*cuptiEvents_, eventId("active_cycles")) - .Times(1) - .WillOnce(Return(3)); - // One metric - EXPECT_CALL(*cuptiMetrics_, idFromName("ipc")).Times(1).WillOnce(Return(10)); - std::map ipc_events; - ipc_events[4] = "instructions"; - ipc_events[5] = "elapsed_cycles_sm"; - EXPECT_CALL(*cuptiMetrics_, events(10)).Times(1).WillOnce(Return(ipc_events)); - EXPECT_CALL(*cuptiMetrics_, evaluationMode(10)) - .Times(1) - .WillOnce(Return(CUPTI_METRIC_EVALUATION_MODE_PER_INSTANCE)); - EXPECT_CALL(*cuptiMetrics_, valueKind(10)) - .Times(1) - .WillOnce(Return(CUPTI_METRIC_VALUE_KIND_DOUBLE)); - std::vector ids = {3, 4, 5}; - groupSet_[0].numEventGroups = 2; - groupSets_.numSets = 2; - EXPECT_CALL(*cuptiEvents_, createGroupSets(ids)) - .Times(1) - .WillOnce(Return(&groupSets_)); - EXPECT_CALL(*cuptiEvents_, instanceCount(_)) - .Times(3) - .WillRepeatedly(Return(4)); - std::vector ids_g1{3}, ids_g2{4}, ids_g3{5}; - // These will be called by collectSample() as well, which is called twice - // per group set - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[0])) - .Times(3) - .WillRepeatedly(Return(ids_g1)); - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[1])) - .Times(3) - .WillRepeatedly(Return(ids_g2)); - EXPECT_CALL(*cuptiEvents_, eventsInGroup(eventGroups_[2])) - .Times(3) - .WillRepeatedly(Return(ids_g3)); - EXPECT_CALL(*cuptiEvents_, enableGroupSet(_)).Times(1); - - profiler_->configure(cfg, &on_demand_cfg); - - EXPECT_TRUE(profiler_->enabled()); - - EXPECT_CALL(*cuptiEvents_, readEvent(_, _, _)) - .Times(6) - .WillRepeatedly(Invoke( - [](CUpti_EventGroup g, CUpti_EventID id, std::vector& vals) { - vals = {1, 2, 3, 4}; - })); - - // Need to collect four times - twice for each group set - profiler_->collectSample(); - profiler_->collectSample(); - EXPECT_CALL(*cuptiEvents_, disableGroupSet(_)).Times(1); - EXPECT_CALL(*cuptiEvents_, enableGroupSet(_)).Times(1); - profiler_->enableNextCounterSet(); - profiler_->collectSample(); - profiler_->collectSample(); - - std::vector ipc_ids = {4, 5}; - // Called once for each instance (4) and once for the total. - // x2 since we recompute per logger. - EXPECT_CALL( - *cuptiMetrics_, - calculate(10, CUPTI_METRIC_VALUE_KIND_DOUBLE, ipc_ids, _, 2000000000)) - .Times(10) - .WillRepeatedly(Return(SampleValue(0.3))); - auto& logger = dynamic_cast(*loggers_[0]); - EXPECT_CALL(logger, handleSample(0, _, _)) - .Times(1) - .WillOnce(Invoke([](int device, const Sample& sample, bool from_new_version) { - // Sample will include all stats - logger must pick the - // ones it wants. - EXPECT_EQ(sample.stats.size(), 4); - EXPECT_EQ(sample.stats[0].name, "active_cycles"); - EXPECT_EQ(sample.stats[1].name, "instructions"); - EXPECT_EQ(sample.stats[2].name, "elapsed_cycles_sm"); - EXPECT_EQ(sample.stats[3].name, "ipc"); - // 2 samples, each with values {1, 2, 3, 4} - // i.e. {2, 4, 6, 8} total - EXPECT_EQ(sample.stats[0].total.getInt(), 20); - EXPECT_EQ(sample.stats[0].percentileValues[0].second.getInt(), 2); - EXPECT_EQ(sample.stats[0].percentileValues.back().second.getInt(), 8); - // ipc is always 0.3 from mocked calculate function above - EXPECT_EQ(sample.stats[3].total.getDouble(), 0.3); - EXPECT_EQ(sample.stats[3].percentileValues[0].second.getDouble(), 0.3); - EXPECT_EQ( - sample.stats[3].percentileValues.back().second.getDouble(), 0.3); - })); - profiler_->reportSamples(); - - auto& on_demand_logger = dynamic_cast(*onDemandLoggers_[0]); - EXPECT_CALL(on_demand_logger, handleSample(0, _, _)).Times(1); - profiler_->reportOnDemandSamples(); - - EXPECT_CALL(*cuptiEvents_, disableGroupSet(_)).Times(1); -} diff --git a/plugins/tensorboard-plugins/libkineto/test/LoggerObserverTest.cpp b/plugins/tensorboard-plugins/libkineto/test/LoggerObserverTest.cpp deleted file mode 100644 index 30ba4a824af..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/LoggerObserverTest.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include - -// TODO(T90238193) -// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude -#include "include/libkineto.h" -#include "src/Logger.h" -#include "LoggerCollector.h" - -using namespace KINETO_NAMESPACE; - -#if !USE_GOOGLE_LOG - -constexpr char InfoTestStr[] = "Checking LOG(INFO)"; -constexpr char WarningTestStr[] = "Checking LOG(WARNING)"; -constexpr char ErrorTestStr[] = "Checking LOG(ERROR)"; - -TEST(LoggerObserverTest, SingleCollectorObserver) { - // Add a LoggerObserverCollector to collect all logs during the trace. - std::unique_ptr lCollector = std::make_unique(); - Logger::addLoggerObserver(lCollector.get()); - - LOG(INFO) << InfoTestStr; - LOG(WARNING) << WarningTestStr; - LOG(ERROR) << ErrorTestStr; - - auto LoggerMD = lCollector->extractCollectorMetadata(); - EXPECT_TRUE(LoggerMD[LoggerOutputType::INFO][0].find(InfoTestStr) != std::string::npos); - EXPECT_TRUE(LoggerMD[LoggerOutputType::WARNING][0].find(WarningTestStr) != std::string::npos); - EXPECT_TRUE(LoggerMD[LoggerOutputType::ERROR][0].find(ErrorTestStr) != std::string::npos); - - Logger::removeLoggerObserver(lCollector.get()); -} - -#define NUM_OF_MESSAGES_FOR_EACH_TYPE 10 -#define NUM_OF_WRITE_THREADS 200 - -// Writes NUM_OF_MESSAGES_FOR_EACH_TYPE messages for each INFO, WARNING, and ERROR. -// NOLINTNEXTLINE(clang-diagnostic-unused-parameter) -void* writeSeveralMessages(void* ptr) { - for(int i=0; i lc1 = std::make_unique(); - std::unique_ptr lc2 = std::make_unique(); - std::unique_ptr lc3 = std::make_unique(); - std::unique_ptr lc4 = std::make_unique(); - Logger::addLoggerObserver(lc1.get()); - Logger::addLoggerObserver(lc2.get()); - Logger::addLoggerObserver(lc3.get()); - Logger::addLoggerObserver(lc4.get()); - - // Launch NUM_OF_WRITE_THREADS threads writing several messages. - pthread_t ListOfThreads[NUM_OF_WRITE_THREADS]; - for (int i=0; iextractCollectorMetadata(); - int InfoCount = 0, WarnCount = 0, ErrorCount = 0; - for (auto& md : lc1MD) { - InfoCount += md.first == LoggerOutputType::INFO ? md.second.size() : 0; - WarnCount += md.first == LoggerOutputType::WARNING ? md.second.size() : 0; - ErrorCount += md.first == LoggerOutputType::ERROR ? md.second.size() : 0; - } - - EXPECT_EQ(InfoCount, NUM_OF_WRITE_THREADS * NUM_OF_MESSAGES_FOR_EACH_TYPE); - EXPECT_EQ(WarnCount, NUM_OF_WRITE_THREADS * NUM_OF_MESSAGES_FOR_EACH_TYPE); - EXPECT_EQ(ErrorCount, NUM_OF_WRITE_THREADS * NUM_OF_MESSAGES_FOR_EACH_TYPE); - - Logger::removeLoggerObserver(lc1.get()); - Logger::removeLoggerObserver(lc2.get()); - Logger::removeLoggerObserver(lc3.get()); - Logger::removeLoggerObserver(lc4.get()); -} - -#endif // !USE_GOOGLE_LOG - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.cpp b/plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.cpp deleted file mode 100644 index 89f1d536ca8..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include -#include -#include - -#include "test/MockActivitySubProfiler.h" - -namespace libkineto { - -const std::set supported_activities {ActivityType::CPU_OP}; -const std::string profile_name{"MockProfiler"}; - -void MockProfilerSession::processTrace(ActivityLogger& logger) { - for (const auto& activity: activities()) { - activity.log(logger); - } -} - -const std::string& MockActivityProfiler::name() const { - return profile_name; -} - -const std::set& MockActivityProfiler::availableActivities() const { - return supported_activities; -} - -MockActivityProfiler::MockActivityProfiler( - std::vector& activities) : - test_activities_(activities) {}; - -std::unique_ptr MockActivityProfiler::configure( - const std::set& /*activity_types*/, - const Config& /*config*/) { - auto session = std::make_unique(); - session->set_test_activities(std::move(test_activities_)); - return session; -}; - -std::unique_ptr MockActivityProfiler::configure( - int64_t /*ts_ms*/, - int64_t /*duration_ms*/, - const std::set& activity_types, - const Config& config) { - return configure(activity_types, config); -}; - -} // namespace libkineto - diff --git a/plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.h b/plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.h deleted file mode 100644 index 36eaa13d1a5..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/MockActivitySubProfiler.h +++ /dev/null @@ -1,72 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once - -#include -#include -#include - -#include "include/IActivityProfiler.h" - -namespace libkineto { - -class MockProfilerSession: public IActivityProfilerSession { - - public: - explicit MockProfilerSession() {} - - void start() override { - start_count++; - status_ = TraceStatus::RECORDING; - } - - void stop() override { - stop_count++; - status_ = TraceStatus::PROCESSING; - } - - std::vector& activities() override { - return test_activities_; - } - - std::vector errors() override { - return {}; - } - - void processTrace(ActivityLogger& logger) override; - - void set_test_activities(std::vector&& acs) { - test_activities_ = std::move(acs); - } - - int start_count = 0; - int stop_count = 0; - private: - std::vector test_activities_; -}; - - -class MockActivityProfiler: public IActivityProfiler { - - public: - explicit MockActivityProfiler(std::vector& activities); - - const std::string& name() const override; - - const std::set& availableActivities() const override; - - std::unique_ptr configure( - const std::set& activity_types, - const Config& config) override; - - std::unique_ptr configure( - int64_t ts_ms, - int64_t duration_ms, - const std::set& activity_types, - const Config& config) override; - - private: - std::vector test_activities_; -}; - -} // namespace libkineto diff --git a/plugins/tensorboard-plugins/libkineto/test/PidInfoTest.cpp b/plugins/tensorboard-plugins/libkineto/test/PidInfoTest.cpp deleted file mode 100644 index b86cfb36d05..00000000000 --- a/plugins/tensorboard-plugins/libkineto/test/PidInfoTest.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#include "include/ThreadUtil.h" - -#include -#include - -#include -#include - -using namespace KINETO_NAMESPACE; - -TEST(ThreadNameTest, setAndGet) { - setThreadName("ThreadNameTest"); - EXPECT_EQ(getThreadName(), "ThreadNameTest"); - - setThreadName(""); - EXPECT_EQ(getThreadName(), ""); - - // Spaces etc are ok - setThreadName("Name w/ spaces"); - EXPECT_EQ(getThreadName(), "Name w/ spaces"); - - // More than 16 chars is not OK - setThreadName("More than 16 characters"); - EXPECT_EQ(getThreadName(), "Name w/ spaces"); -} diff --git a/profiler/README.md b/profiler/README.md index 1669e3524e5..549ffefc14c 100644 --- a/profiler/README.md +++ b/profiler/README.md @@ -91,6 +91,7 @@ ascend pytorch profiler数据目录结构如下: | profiler版本 | 发布日期 | 下载链接 | 校验码 | | ------------ | ---------- | ------------------------------------------------------------ | ------------------------------------------------------------ | + | 1.2.0 | 2024-07-25 | [msprof_analyze-1.2.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.2.0/msprof_analyze-1.2.0-py3-none-any.whl) | 6a4366e3beca40b4a8305080e6e441d6ecafb5c05489e5905ac0265787555f37 | | 1.1.2 | 2024-07-12 | [msprof_analyze-1.1.2-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.2/msprof_analyze-1.1.2-py3-none-any.whl) | af62125b1f9348bf491364e03af712fc6d0282ccee3fb07458bc9bbef82dacc6 | | 1.1.1 | 2024-06-20 | [msprof_analyze-1.1.1-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.1/msprof_analyze-1.1.1-py3-none-any.whl) | 76aad967a3823151421153d368d4d2f8e5cfbcb356033575e0b8ec5acea8e5e4 | | 1.1.0 | 2024-05-28 | [msprof_analyze-1.1.0-py3-none-any.whl](https://ptdbg.obs.myhuaweicloud.com/profiler/package/1.1.0/msprof_analyze-1.1.0-py3-none-any.whl) | b339f70e7d1e45e81f289332ca64990a744d0e7ce6fdd84a8d82e814fa400698 | diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md index 47e64a90ba4..77027110559 100644 --- a/profiler/advisor/README.md +++ b/profiler/advisor/README.md @@ -72,6 +72,7 @@ msprof-analyze的advisor功能是将Ascend PyTorch Profiler或者msprof采集的 | | block_dim_analysis | block dim算子调优 | | | operator_no_bound_analysis | operator no bound | | | graph | 融合算子图调优 | +| | freq_analysis | AI Core算子降频分析 | | scheduling | timeline_fusion_ops | 亲和API替换调优 | | | timeline_op_dispatch | 识别算子下发问题(路径3/路径5) | @@ -132,6 +133,8 @@ cluster模块的分析包含快慢卡和快慢链路分析,仅识别问题, overall模块的分析包含当前训练任务慢卡的性能拆解,按照计算、通信和下发三个维度进行耗时的统计,可以基于该分析识别到训练性能瓶颈是计算、通信还是下发问题,同样不提供调优建议。 +![输入图片说明](./img/overall_0.png) + ![输入图片说明](./img/overall.png) schedule模块包含亲和API、aclOpCompile、syncBatchNorm、SynchronizeStream等多项检测。 @@ -152,7 +155,7 @@ torch_npu.npu.config.allow_internal_format = False ![schedule_3](./img/schedule_3.png) -computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 +computation模块从device计算性能维度进行分析,能够识别AI CPU、计算bound、动态Shape、AI Core算子降频分析等问题并给出相应建议。此处不再详细展开,按照报告进行调优即可。 ![computation_1](./img/computation_1.png) diff --git a/profiler/advisor/common/profiling/ge_info.py b/profiler/advisor/common/profiling/ge_info.py index 9996ec611a2..4fd5846d88d 100644 --- a/profiler/advisor/common/profiling/ge_info.py +++ b/profiler/advisor/common/profiling/ge_info.py @@ -17,12 +17,13 @@ class GeInfo(ProfilingParser): """ ge info file """ - FILE_PATTERN = r"ge_info.db" FILE_PATTERN_MSG = "ge_info.db" FILE_INFO = "ge info" STATIC_OP_STATE = "0" DYNAMIC_OP_STATE = "1" + file_pattern_list = [r"ge_info.db"] + def __init__(self, path: str) -> None: super().__init__(path) self.op_state_info_list = None diff --git a/profiler/advisor/common/profiling/msprof.py b/profiler/advisor/common/profiling/msprof.py index 9453986b822..750c5481e67 100644 --- a/profiler/advisor/common/profiling/msprof.py +++ b/profiler/advisor/common/profiling/msprof.py @@ -33,10 +33,11 @@ class Msprof(ProfilingParser): msprof """ - FILE_PATTERN = r"^msprof[_\d]+.json$" FILE_PATTERN_MSG = "msprof_*.json" FILE_INFO = "msprof" + file_pattern_list = [r"^msprof[_\d]+.json$"] + def __init__(self, path: str) -> None: super().__init__(path) self._tasks: List[TaskInfo] = [] diff --git a/profiler/advisor/common/profiling/op_summary.py b/profiler/advisor/common/profiling/op_summary.py index d79439dbad8..4744b5029ad 100644 --- a/profiler/advisor/common/profiling/op_summary.py +++ b/profiler/advisor/common/profiling/op_summary.py @@ -16,13 +16,13 @@ class OpSummary(ProfilingParser): """ op summary """ - - FILE_PATTERN = r"^op_summary_[_\d]+\.csv$" FILE_PATTERN_MSG = "op_summary_*.csv" FILE_INFO = "op summary" STATIC_OP_STATE = "static" DYNAMIC_OP_STATE = "dynamic" + file_pattern_list = [r"^op_summary_[_\d]+\.csv$"] + def __init__(self, path: str) -> None: super().__init__(path) self.op_list: List[OpInfo] = [] diff --git a/profiler/advisor/common/profiling/tasktime.py b/profiler/advisor/common/profiling/tasktime.py index 3ce09a78385..732ff0f3679 100644 --- a/profiler/advisor/common/profiling/tasktime.py +++ b/profiler/advisor/common/profiling/tasktime.py @@ -17,11 +17,11 @@ class TaskTime(ProfilingParser): """ task time info """ - - FILE_PATTERN = r"^task_time_[_\d]+\.json$" FILE_PATTERN_MSG = "task_time*.json" FILE_INFO = "task time" + file_pattern_list = [r"^task_time_[_\d]+\.json$"] + def __init__(self, path: str) -> None: super().__init__(path) self._tasks: List[TaskInfo] = [] diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml index 4ef76105a07..b8c92fe074d 100644 --- a/profiler/advisor/config/profiling_data_version_config.yaml +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -1,18 +1,19 @@ versions: - version: 8.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: - mindstudio_profiler_output: - [ op_summary, msprof ] + mindstudio_profiler_output: [ op_summary, msprof ] class_attr: op_summary: OpSummary msprof: Msprof file_attr: - op_summary: ^op_summary_\d{14}\.csv$ msprof: ^msprof_\d{14}\.json$ + op_summary: [ kernel_details.csv, '^op_summary_\d{14}\.csv$' ] - version: 7.0.0 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -28,13 +29,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 7.0.RC1 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -50,13 +52,14 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\d+_\d{14}\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+_\d+_\d{14}\.csv$'] task_time: ^task_time_\d+_\d+_\d+_\d{14}\.json$ msprof: ^msprof_\d+_\d+_\d+_\d{14}\.json$ ge_info: ge_info.db - version: 6.3.RC2 dirs_pattern: + ASCEND_PROFILER_OUTPUT: [ op_summary ] ^PROF_\d{6}_\d{17}_\w+$: ^device_\d+$: summary: @@ -72,9 +75,7 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+\.csv$ + op_summary: [ kernel_details.csv, '^op_summary_\d+_\d+\.csv$'] task_time: ^task_time_\d+_\d+\.json$ msprof: ^msprof_\d+_\d+\.json$ ge_info: ge_info.db - - diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 99a19d3b60e..ebd90951abf 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -43,14 +43,21 @@ class ProfilingDataset(Dataset): self.build_from_pattern(value, join_prof_path(current_path, key)) elif isinstance(dirs_pattern, list): for item in dirs_pattern: + if hasattr(self, item) and getattr(self, item): + # 避免重复构建kernel_details.csv, op_summary.csv的数据对象 + continue + file_pattern_list = self.current_version_pattern.get('file_attr').get(item) data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] - data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) + if not hasattr(data_class, "file_pattern_list"): + continue + setattr(data_class, "file_pattern_list", self.current_version_pattern.get('file_attr').get(item)) data_object = data_class(current_path) is_success = data_object.parse_data() if is_success: setattr(self, item, data_object) else: - logger.warning("Skip parse %s from local path %s", self.current_version_pattern.get('class_attr').get(item), current_path) + logger.info("Skip parse %s with file pattern %s from local path %s", + self.current_version_pattern.get('class_attr').get(item), file_pattern_list, current_path) else: logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py index bb4caeb29e5..51996617c2b 100644 --- a/profiler/advisor/dataset/profiling/profiling_parser.py +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -12,10 +12,10 @@ class ProfilingParser: """ profiling """ - FILE_PATTERN = "" FILE_PATTERN_MSG = "" FILE_INFO = "" - FILE_PATH = "" + + file_pattern_list = [] def __init__(self, path: str) -> None: self._path = path @@ -37,15 +37,20 @@ class ProfilingParser: return False def _parse_from_file(self): - file_list = get_file_path_from_directory(self._path, self.file_match_func(self.FILE_PATTERN)) - if not file_list: - return False - ## get last file - file = file_list[-1] - self.FILE_PATH = file - if len(file_list) > 1: - logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) - return self.parse_from_file(file) + + if not isinstance(self.file_pattern_list, list): + self.file_pattern_list = [self.file_pattern_list] + + for file_pattern in self.file_pattern_list: + file_list = get_file_path_from_directory(self._path, self.file_match_func(file_pattern)) + if not file_list: + continue + ## get last file + target_file = file_list[-1] + if len(file_list) > 1: + logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, target_file) + return self.parse_from_file(target_file) + return False @staticmethod def get_float(data) -> float: diff --git a/profiler/advisor/img/overall.png b/profiler/advisor/img/overall.png index 6d5da107a3f7f6c8c655922bd80d193708fe71aa..1883d4c97388b1cfb774d05fc9e0d368d0c66901 100644 GIT binary patch literal 49616 zcmd42Rajlk(lrVz$ijjINpN=w1PSi$?yztI1b26Wy9ak)!QFxc2<`+6?j*Q#X7;!D z`(^*n|D2n1b8hGC*;2E*yK0P4;YtdUXvp};FfcG^(o$k7FfcDjVPHV)U=Z-cihKzH z28I+yT1;5Y9rmc>QI$Zmm6_*xx?Dwgwq=SkzE}nOjcO15q26oN-|5{p1ovujcG^l7 zjHy9oqmx(e&WT?`;?Q}`k08}B*wvM(y5pqJQq0BANQQs$uh)te|vg z=6NKktE+o{I=y)T5`qE4z>>ni`TI{}-o!300xvHrYOWlq<@eOoRptG7xzXD@UDJ7YHo~&Ivom$`KOMqgz<`9h zhrr)QM}2O$Qx&`JtQ?GtwhwD^o{#O;{GJ~L4cc!$xTQYdTD&MS4d`AP+HGoT?EJBC zGv3gUT+`*-SYO`^efvD$HTHa2SUU%;%gybVflo8kZI1_l<7Olej%c*6^- z7fhT1`s)P|T=^X_<8OF0o`!Cw{~r0EV<-zS6N#Iv@4~7prl%$HpC9&bcW}ng($l)R z%CzHjY$0iE@>^cy?t4~`p@lX!H9A`b=>Df2uo8Pr5X$-aT4m+d)6?~F6%&{d?+vaz z>_1&ug~7mS9783C{`tmV7BlVn&``7U2_KU&H3M%H^uLV*fW}NplIj23xB}C^+p)o8 z^8G*4&&3757lmZK@whwAd4}3~8*qDc?sEBxe1kn-bc6pz`p-Jn>EQIppikR%c3thh z#g!{hhhDLoY&qFs-Sm+dzW=*gM)7VGkSr`JCL=tG9xUvC1_}%HkA~N{d8pbiveVS) z^vTHZtnW(!XKqrI-EJoXk9J6W+~*XlAJPp_{1l&^Gva>T3i5l>d+TT1*LP{doDAn^ z?nqh&%VKJI4!uM(HARK{&!2Zkf&GD8X=r$RuCg;3_KZ#bG~a4hq$w6~`2)SUOaJa@ z*zSSyLeVe7AiZl=ezb@Y^XYCyk|AC2Id4?)q03`;Ek&dFDa5ve9!INVQ#QS;or3cI z>&(<}*JF*L*T|72(?3SAPXjM#KrR)l7=4x~II_gEpY|2^ll|`f(q1RHBzp@#bUg{i za)tGkJZL_V!&l3|Ym8TwtQFH#KNybIV4TU^-1zxKAH4Pc&(vXHNg+s}MjS1_G#$*8 zhhPo10GAy_!(o)bjed(~)mJ^_Piv1qpSn;`B70l&n%T7Ip`V`JPbG6knHYWAYT4aR z(VyXb%#bqUkp7t?DJeuZKwTj4auAE7Pz>5?$FG08vH7kGH{n~WOeaJ{9?5y~a*#!H zN8pL(7wli#OM?vfu&4|1@cZDtU5>k4By>z{_3YHA%mejoPN zp630$LT>yX*TSAYmMvqEt(0^4)pU(2O3@+*nBk}Q=p z#QAVOv6zti`Udu&-wqStLjf@fXgKBkDf}NV13Y~O8x~xd(Hq+v@b7~G29}E)CSY5l zB47TWjUXfsY=m;U2U@~^)^AS%!qBp$Q^IXU%PTV1+yu-M(r@umOe-O^>4bl>yc=LtBtmlqd-S2>T5{v`jb6$WH0 zzW#7~K8vd*Xk%x$<(SH>@3~);x{S=Q^uI@;`>O!Mr?b?qXOid55tc5!zYj5Vu0Zwm zTi`bH{{0mlxZ8jmZagI=+xJqPC+i8g`WF43-5&mT_X~CJgC7QuYODM`Yo+I0;ltz+w8pS@gs1#;o;!|JQ*&LA;9tJq{2~DOCD`I)h^zaFW4}0r9yi-Uw*~_vs!n`zr&k*4F7KWdp}Ia zoG+8)|BRo@0~Ue^L;n8j#&D!iMud8v*4ITau>Wc3DggsR0ipcmjkpvr0cNbZt{T&& z1pmxXi3s?Z6#AFXLs&tx7+OsdlK=I;GASTN_7yUqkzRZX20RLTYv-B%D$(gmaTuE~ zxQckY2<6QS&2yZiI^{wTZ9=U2xdnrfQ5{DwH2#_fJe%Jc{nsl7N@i;l9>>DgWuO4~ z05b}>N~)%Ly`cFelt%Z_jeA>`(ptN{_N&|mwSey0`1W7;$+ADJnObDe2Z^}IU>ci; zGQyT{m9~|Bx~T5VslWrP8<7BO$=Y(cc-0qS!$OO&UoB6H!>CP$z;nrBLR085v^W#+ ziUzKhh^&W>IWF4g450nixNB}+!pT0OyA6m{MaXcta4FFk(3E8ZCo}_VmW2(D$kVot z+~Ll=^QE9agN#U=^;)Yjf2?WPVjd-NTMmX<*r2v1w$c?!77dw~PuG>j0W*Rb3CJK! zEh{s}lPM~2-6<+^_L6w8O5%99%fLnb$0UnV21)ootlj=BdE#8hs|UA6+k5R8Ui}I! zYo541gBYA%WgbN9jW)l^cBJ!iwH!-IYbWoe?gD{sB$~ zT3KeZ+plBi=uNI2mV1xgRGS0LR{beic2kjhvsJDz5jug51@9I~)Mr8sIvpZja^_pC zdMCWcB@ON?E*A}YSX%CVBJ7{r6uT#xzOU8aDM*(y2w9x&|)2U854~XPU10E6b3Np5vx4NtY2RBJEuDYU-4Lf-H6DQVNzA{ zg6Z&2EHhjmQ}#UNO8aPA)t7vx@KhEmH2Vx@f9OjQevo56NtSDJ9$#a(51HYXGFpF_ ztJBpdoARY-B6-0Yrw%%V4CI3%9n879_&n*?5AoNSQ+EUCh7bJW8r#;jUzCGEvbEpF z%*~IgFg%=03&31f4O$T^GgLv3w5bFh?@S%vGFfc&k3>hGGA7;6CSOOF_JSduKd-QW zpg*;xje~1J+N8Fy^Y_fRcL)9|{Drs6MPr)#v=zwapL|T5Y^A_I6Yjcyi^-sh%k_Vo z)LI+DAp3%SOjd$rjBiLfm7~*65<5X_=d+oPNeB9pPK-52h$R2ZG%}y=eRBZj7wQ0$ z@DC20LYg$0F9gU7<)l^9XTu!GY~?`leVh5~5gW80UffIef$Ns*druDBV<(n`C|9Qk z1`s-52kU1jZuuBVJD=N@u*_U9B)#TP{IpcePTfQfh0C|ov!k$6(xi0?)yN)F<!ME2oRlV#+LK07Na0rMj~UIH?{~L)gD=;QZ74T{OVWywS6OOz)E=J#F0(Hyd?~_! zyY>N<^pkqaF8!}|SY5S(tXzF!Lu9z%mS*!p(+(B;mp+f9mw^}YyhMlJ%F+nAal|rv zn^tC}CpS8U&eJhjGpg5mwh^Xk)4KDyR9UYsA=pebllD2}5Fwm~?);ag*V2Zh6W3`1 zRBt?GzWAB$JrnRy*9CZ>_B5N(4e}+d>!i2Ln!KvHxn)Z-QJg$TFJ3-_P}+acq0H|7 z#UCoTc&dEe@4CWBO|UIz`m*h=_Y-|G*Rlb~+TvEj!rwW17iC;>$tQ2&aTJte*tVn) zLx~69ny56tP`5llt&Wp4!5Ao>iX!^u_wH@)$)LB+UAXjFPVp&9`(19 zu`m(46`g`u7Sx8g#zXEUCQ(W@p~1~6$yEGEi=ywzBULw{8ZHldLNbJwDIITz zbZnh0&TUh-Jm7fmjOmQR#YS0gJzH7CbP({GZbf`uj;fS$!>%NUr% zI8Q1t*=PwDi>0~3BPu=yv5M8m5`whiJ%ki+sHOP~&;yoxsXR6K%p3~T?+V{NApH($lGw>s6)eL?)+SMIP$R1&u)|j_I=fmDhJ3mG zTz-{Qxup1@%vzPctUM}KMEII!<~y@-n$<|lRHO#UKrG^3dEqb2mo&AN!Jmt;%4nF% z>gb~w`JXyH4XYAjRX&qLlMLzHv>RN6bCJ&6+Q1*$%E#8ewY)DCe4VJP2$}3rL037J z&#orrn#M8uP4V&z{eqe2+h0@@g*D|#5#3YENmbN*w7g(_x*JQen4x%vO{~R`-F#m~;VbOGo6%9gH?tEi)77e- z4OK}}>UN5YpQ5frsAXy&4&42`&PQhW!;M}};xf*y;{?7@7B{m^6l*B!&hH`MZXePq zQW9=1z|vX*$2ETvsG`M*=wC(El_DlP)_ljCfdh+%ryZUF)-du)WY!jlV3=gnFiJ*C z@@B7!;|Xefstnr;RKm#b%iH`I9k8&~85_1YGlBa!n6@kqvL5HxI=km}9ccR&FH2|S zSZo|!>W$i0+8q6$MB{M)jY#fzwFNm&cemvYLwy5&1j zY`HRDy6IPf)cL$E<-dD>&>m*>WA^WUUDVmawY{L~4WINb0~GcRU^=w zp`1fC!Jm5TCVkC0Ab=yIS3DRqECF>lLf43ehA?!!ol#MA%d#uk$Wyv^X+186cD!{w znKD&4K1;&VcId6C3>DdBTns8g6fwF}k&4rp_Q_LHlvxO`Vg9x_Kd_|%H)reX!MFNd z0aKb7Bf36pC2k*hIyn{i3l{qU~Ye0@CIkVb6tg4&OJLbwOHw zLr<}Iw~^B9-<{k zG3OAM3eQ8mg?*4k7o>}gGkZVCGi=#PjEAyET2Py$ZjM$i9lVgohVv}XA$q^2&=1O6 zP@8lRx473faa0YU@~J=VSu%L-sjBsiB8Fv`1E5y$R|+~RYp=a_Rm2t;M8~D|>`KC( z#xi-uzZuY%V$|$mlhtM1S*B%(jF_;-LoTC_rCrCIJ!k_}gUhp3U;}{7|FKU%9O(9S ztaM4@xL!nfrZW4OkyESTd*lO@`mV&dm$bhg#tLLF%2LqhXEv={DD=-NVz*y1x0yRM zo3RvQq7ORzwt7G!H2i-&O}JjDBB)`)rF_S^1Uynl`zyw*&tj!9M0ZT${Gj1s!++gk zFJMD9s|XaimgpGKP45)}O>*on0c%HT@an4#6j(GFZ_wS`4O|IEgzbeSlYAyu({7nE zP_hni@h{h0QY&l{1WEW#gsSCvrZQb0)p?>ww=>MYBH*Z75mjivsVbbY*R?e+J=}VZ!Ln zSgtR@z$*2_5Iz)2{3{s~hWx9ysuRv;R}v5;^4`W%iV39t5=raY>|etHy(~-4$>^Fg zwJ};Hbbto9p~?2*!Z9P z)5o-`UVJX0ZE7lhRU7QjG*+vLW{pxlgW+v~^YPEBfprJ_1H)(xN_=DhGV(HP*70IQ zbw2Am83J1-^>Rw6ZFI~A`d?Upt3M$?R*2{en)`&P6{Jvi9l6|30SimE)nccPaPn_24=MHZUA?qx6ajvO}vF}V+gre zq~u2M9(#hT0=s$Ol;Vr{G5ksD!-zgBqd@!LxYi0Y3F8pF?s=mQz~iA=N+Ui%zs=|q zeD~mm4ee>oC3~eJl|H^qV$eukv76u5pNK1VCWkuUMy;@-5hswko-O1s12MpiDCOIb zIWX|jchbh{mkY+$=L=3`Vy6l4axxe73A6wMRW+?xG+iISox~dShjBAiwcHT zm{IcOt>nAK&kfup64`Te6Rs^i=u2aioYfc$#wME!6;`2j=| zW3et_kL=3&qLOErZ55+fYcFwNMH|%=U}82M_kSj$&EVWV$kT*PZ$w4%ltR}e<&W+5 zv#{l|d?tRV(JVY!-6{R}^qon#JC1(&PR8@B<0%H$&sxP~^s>g*wF&M~fKzUwT+UgQ z8al3g{DQEgIV4rbi^Ot8Bqrv4#O82u0{P+rKF?~Vc2i;2x2J6f&TU&H#K$WSz%>ayUcKJASzA$?pfzqb2Q4Dw%GLm4o94(}YU!cqWRZ2>9J>^hIGmlv>kfQBA{b zP_~2;R%lO3_$p2CxX{>@e-k-uc^cZE$|S#e5%^m@I(^?yUecuu3p%na$0D1q5SlDa z^Uh&M!S7m~=MI^~-13U(SpyO9Hl+eJDVV&|*e|WI;?ky&>`W3{yFH(7TN$_sisD^bA9R9tAr%HikkM#oxX2*k%E>~}hCxd{zsCT`Jr@R^Z=@E(N^am^Ed z8oHog42_xQ|Ckr!X#FK^_<{V3V~fm(Uy3dm_Nm>*6dJ6qEko{c!8PA2t>FPvio{$M z2iy0KG7FOZd>Ria+n#+nH6bg>*9yTqc|dbVP7+u-RP6-~{iSUuFkX&{bgY-{b z5(<7BlZ#PbNZMD#b=nF;xV7&OR=3!`(nvvtm4=3~R&8qO7BI~=+BRQ|I{Ie>uv=Yb z8abS`_}1{2MnK@jFIh^)s!D2H;V531ohPVn^jg71EdZ`+Ilxyn9ak?0_O!PIZIkN@7qomR6E|>xk8LD^(9AOr}Ro^5?=s zjtqgf#X1Qnd7n4_qXj64e`_=tB_Uh2#wV@d9XC0pSx{8SuQhBVTGje~j)c0^=jp~G zA$nv^TeaQ_fnq+|JWiprx^k~>N6z^^=^HXby=PLbqI9TlT!zF{=de}QiF}N1=9|v6 z&ssXWj)(6wd&Ts63pRM&?@W~UsYh5O#xJ^dVn>8gY<+4a!3J0=c+$mcQ>nu~BH#SD zS@r6(Mg;s@Td5^%7SUl%$4PTFmb3aVxA(-9l}tF_Fo&17;=4$4j(wu96#h&K-r_@aiRf<#TWH4cgetcUiMY=?BOb7pe@@8-o=+tgymyqEMHB;ol9Qj zXY0qaJHxvbdwoi2{qo$|gVi{4-O8v-)D;_t)}uz$l$XysG5%z_Gezl?{>pF+_oS;^ zc0>?s9+i`p^i`>gD;ZC%1wo;L@@?1M(u$zNatcT&eMXNsu~KmP0n5)x+Wl~HH=#re znoY@vs?*xst7V}Z%>-smB*u(N&4m}L^AnTL_Zns-BC|ch0akF!RM5^ny9pq1!tcRC z;FKp(icNL&Q(D`qjDW}wJT8F~ZD-_Foq?n+yIZAZup;`n!CF*Ms%r6$_y-3)65~qd zXV+M40z<*mk4nc9EEG_-uU$rU>Jb9`QN@)D%xFysujVx(&T@dmBwWUG)Re=f23Ge< z9LDO9_D4{IZ-MASkob?LCvi4Cnj@?EM%Q8%LZT7J>(>4ou><25(W#pzf&gGZuBq{^ zxpL(QqbU_EI0CZXv2uGRGB?%_5Ra9dDdQSZ?5j_-xRd9eVKh}N(1yg=d|EPSmW4R< z()Es6f7r&(gR<&%AX@Ji6Xwlcz&7M6qcG-Jp{FP=c+2?VJQP4p23UwDOL}5sV*6>U z2@9sg#I}$SS14R^`L!Hlr?$p3>I5)NBQT5u~jZx}zU<;ye_(-(C+RBW-jyUAs)jUv8yvUTIArV zHrE*z-Ey8bE4_6CUwfY&{@>mucgbq4TxiHp;6~rr9tT{95w)1(rNsP6qCmq4m=WIH3>% zF6~VxEpk((g-b(|@=pzO$c%U8FNb!gP1w%vPiyIZ^SxJY?|Eo5nqrfa%$RZ`FE^Yo zSqu^%DV#Y&)NKphUOtY<2|L*42{5ZVj!=&gZ>d;y6tf`HZ(Fyhc_{`mt=8QN=%SV% zy?TGjj}TyHdHxks>q5L>>OS$Iir5#kl%@?7lsL4t5Hjh zEe;pTZ85YO*%0YXsDRfR`TJMx-MCG<3$<4P^eMvoS5c(UWm_ZkCUoD4uLCvB`w-dw zIFCyp!mmiFXIDjdC-z(=@N(q-jRO!z{hz#@` z2C9&K2d}GVrFEDIU#=BfChHX3?O0|s$ruwv0*CUQ0;Vx-aCnnW7$^2sxA!?2^t(w& z(o0bo-w7jxiVrS5DH*+|g}uT!G};x#E-r=LtsFcNfAgvfZvsH$x|i1|6>MzZ26dz@ zl`woAc|f@l(@a{UcR@_EVm~I$0_bBn4LKO>{-iyO)8(_20lFi#Da*FvNXJ6nVMJ(3>7q`{b{+k^$5tWQ851__k@R~nfTy%i#j(EL6f zNCH`vEb*@(0sl1%8sti~D6!BK222T6R!1BA#n$V!)5O5#-es;^9|7I-Va+1YTmpeo z8v!h|f90wH>XI7~0%_%Cur4!p3P_H?JPtc3{EDSTF#khped?lGrnM}2@eC5&r;>bt zI;-lGD!R2*RVCx}OXZILTSq!E&$N~zxD23bI-v#P<{qRXGN|<~g1EF4g8(o}AV`dS zBV{Ag=uUs4`dgl17_HW^Y4FR(!{NH3KD#7)h_x+LHGirsUfg4;K-9l#Fnq@|z_H-X z50QX~Hxx?Q^eSs>8AL~X_@$cXTZ^B*VPILFwCAz;__{oZ=+X*FDVEnyG~EA|^1q9_ z8&DN>$}|ql8l7#q5BklO`C?Z+_S@=rJWGLhwUxwf_1RVR*vs5}a94#g;^MqQkg@^6 zFYgEwx^<+8KiJq%>TlLIZKI?hS4N<2j`oFajm36}i|_-i>ur{utxE{#3te(iHkw2! z=M??=XxJAFGOH3guWE~+_`97OcXI}o3WiST{aXozXuFD&Z~JJq3DfKOt>D+S;%0sO zvUwCk;w~$Vr>?gmU+y#E1-dvT%9606t3~~1V|BZsDK4i1-5!iIa(PLe& z9#y@xYG74wEH(yU#R+FyNo8)7Aqs%Jv2fsx48k-taa4xP*qWCXb>2R#Y|AH~$Cq(r zg5edSC(R-r21>s8v+FDNzZgBnLkJt=C}-EU-IB-JSF}4m3pozS!^JvKv#3t zJGYT45gR$G%>tClSZqYl%u`plpfr0nzIHP5>{DS?I2sSWbvzBdxfHNH?CZN#MDXEH zY#B+bFTwAE-0I)Omc1s+tNt=el)Z)@3-A2}`;eQ>rFiWnduN?euBz@VLJb>cWi-Jt zC3fZ(z|371a3T4`m4G7aDR=fpL$+A4-eBbqms5!p;Q6SxF=hOS zvqD{6TGZZv{_^VLAA@0vL&}08M?~W%Zf9bU4?yG7+8;*3?ofu=l_qg{S-`dpDOsJ^ z;9z`s2jcg*Ii@G$v#XzvN=|DemUVwQPNSy_vwCC3`km%&zE6AvM-6gG!UDaL%(isQ4#YDDxtr z=Px1CqYKcX*|+_f-ztL&njVDjq5P%$n9Jr{<2xh5BWOluOc#yOW8CI}%~Zr!F6heK8q`8osl7z=5T!1GO;97l^V4Oant-iXQWVOB+L7&v>iwac2&7u&dK z3YJE#iEx>OO!@7%0(zB-(h_OI$~i!)TELq za;<<)nhcmIjcWBA(+2uU_LdK(fe5oC{j(yP?J+BNN{jaczUH2?(50&tg(>RO!g%<( z4NE@m{(PeR?mMfRO8QgBhTb`!?UNzg4=Za666_hX0tp3u6J-AOIZw{qB3%7~5!3ya z**S-HeHk%9uK|{~1nH8zmJ+HV7!~*WM^G|}AjL^fKAwzFy%^-fHDvOqG!c;!0=&ex zs{1cWvmvpn<% zXk797%fhKz6g%}!ADXTjeHQ+!V*HU)$pgc#r2LAw+vqYe8%rZehVZ1$LSLEHt?H$q zhVvvMWo2nrF=*85_AKekrrEH1mYMYi*7f=08QXz;t7ef^)*{fW8o2}{rG()Jpx|jk9Z-_Gu)md zOQ%ryBI`GE>%6A>Vji@m(I*P&LS%@T@#h}-jQZs=ywj#vlBHBKKOp7rodiW%IQU|@ zze|5Fx~D+x8FQx*l|^pzTwq_XMD&SX`(1`FFUHj>q0Fy(RQQWZnX_5kBI1aQrnfY#IGa`%6PKO+L`i2{ZB47$ORJSj z9TkVRwxLaKFTG7Qn9Xy%id?}*!pX=C z=1u|IX)hTshma=+RmB0p+e!ZS-?L@_55W9x?u;$CIb59q!PDa*ug6RPqEx_o9QLF~9{ z++fda&$+*2Z@RAao`FCnV)nbI3?Q{a=Rz0w581=_Z6}{v_y5f#-L?N;O(K&FH&->O z{9i)bF|ZabZ3QLty|=$V5*)y;WJrZj3i$(}xj!N`CXBiqD8w`-3esaE)Qt+HPUcD? z_?WOzc)7RXFr~&=w3I>JAVj@LAm_4{_X4B6(0vK3=4Osm-aK6$HbD=r6otXjUU~t0 zBkZ-%^QHkvON?j;X0X)01Ymwvh6giREFd*3BLe;2!;IF&`ZHy7_!OZbs?&>quiGbs z@>zRca}oMZJaRTxhd?4-v|9OSl=`<38mm1pwC0S$Kih-?1COE_A0GHKHFCyg(Ib{G z4B=e_N`a`NHBWXIGH+fMqb9|O7?pt#@TwEOb`4lsks*~s>b6$t;`L)+l z{aVHPG$^4mDY&_o4z!A;wQt&dBIRuy{xaauX7Ea|BpdmrjIWN6`Um)eFa}& zq8+1~^Lztv30Yd3@0p#Bn<%|mlSdtvm*k$6r|t|S5^*{gP+?y*R{A++z!fxUzGqJD z@Zpr=v9szJkbAbK^cIJV;tfG-@}5y-;{Kv1qNg^-cm_w&#d?RdV@3=Pe+|-b$7IT9 z@1;-dn7`9g%EJ01**Qbdb7jQ8$m*2t2so40N4--t$SfhIcB&gWedae4=2<+j+u!r6 zX-W>p2`GAx`&gMI=U}D@*G>s_e5GtCdP>ocy>FV<&MG&^@1a4~$5c49lSRwD#LGCQ zcZeTsFK&LrRk#_th5qieA*ES|%1LjQ5SBuaR@0XGjRu?sk4KW&;DVirGS-_ki?zKj zbWeh%utg%W-$XHO^YR9Ywf15>w>am1k zhVQ*~L)1C+TSE??h4Owdig>g|2Crr9Hiz39zM56(rST~W@1u@@*I>@F;bv_&#e-8q zPXxbK5~qA|c<@TO0g~ThL5;OCU;2bA;QT3GA92uO>B24FGbV&T(l5*3KNT*;oObrL z#hFOl0Mnc$>V}TRgr7#5>S{O^7T+UlmW`=k_?^#WIAQATIvh9MnBlvxcBC-H0h6|d zQ4hI#nrIB{LFAf8Pg@qjkw>F3JVwqlH^R0Xj7Swe)Pq$l@>j%Yu4|&7MNo1_JCiL& z9)!fG+BkwSmvWUC*kGyi#W1zlQVXd>456D=JXtpUJ}(XhI5z{r50Xld2dm_r5&PX6 zFuTO}Y(~jh!Zc=etadW1Yw^${(_Y%1y`yti-EdHe4ikzOZ)T_WUwPfJg^6RP& z)=9;Zd+8mxSsEf;Nk!@X`=f&%MTSed$~f)Ms=Nla=!qM7?$JMN({xT{&D!3Cse#XQ zUSG!_=5cBoUn4CNvOV>+3k|{K)!E*+tNb~R&quM(p<<_&){VtYS+bdMYgREr9*svV zf&C7PL#qYOk&eKz@O-MvaYD{DO%T3nAdTKM{bj+g-c6-VwPeRK7lZ)*WAIV5;KSab z@v`dtAME|4O|}My>6!iaw_D1g0{-9>W}C6red>S)yfgNx+&07t_CS}wlkZb_T7US> zL!PiO#k)=PD|*bXdF+2}0yzpOyC~i4gt!-U&y~M*l-pfHS<_MBBG6HeG}aXzcZI5I z(I(FWYGDyIQ{{q_23!4#TSE%tvd49wGh#z!#=VRnazhU6^ zR>F;JXedOu6dI9r(?)xF{PYLZ9fVHPUHioy#UAWkUgrpSKG6*cgtdSviiS3{J4D%y+gGEKe2z_Jv1aft@kIX z&{iLX0n3R1#86)BPUmI3?=F%`Ev*4K|A6%M?DBqiWR=T7$9tz2M8-Gg25!(pS6tAD zuj$+Bcv$u3t_uVo8iY<`Ovc~2tf}&A>4ljd{dGlI041Qu>B1`K>m@(Avc#{|kBC*Y zIW(aDwdP8sTNe~rP*WVg8uaH(zkbb&p>c(s*RHSqsy*wWV~r)c;EIjLszBn8!|&Gv zZBKiw^6mJ}ZZpSLu?-ExEsB_We+p-4-N$dJID0HTSIq5sO?iE^ykdCO!Q}fpS;rkc zC93Ho3+iG*R{H||Lhab|58t7I^A|>j70$)ZuFadDe)W?3sa_zQ(-W@zQh4Z@t2V-6 zQiIuoij|6d6eDa`f8=K)z+ck#7E-nZJ`1XOU}5C(tdR)*R<+z z&hNTN))^E2UdM5n(S25#*Rf!4BDg0mHnDRCgpk``dRXw`gmBOZZ&ZkZlCHCbb-XYN z7{NXJ7a9$B5Lxw~u8u~z5s6irv#p{KbsSr44SBeA#ES67=@F3bXWO3gSrWA}{22un zFlb~SSL|^F2mrKj@gI}|VHQ+CNVaIE?S<`*4@%Y$V9))`LMFR^T zJ2>Za3{2^UGm1vt6GWC{($!fzH=ne-1i2?ZU+a5{) z_Lk0#`~FuLjArrE7I^3OW4?DTfF3t+|D^Jdg6I$Q+ZJwJ>;lC7a^X^&dO(}qrRrf0 z06#`;c8aX~8$WJJEZ8x}fYJS)8_W)$ji@g4A`wU^fC?6ZA(TW^dj~ckEilfVV2{dJ z_xFHX*ZOX<{X6MzwNtDY@;n!bjYCIxl@)2JF-}7D_~5_mTbi@myf`Ox98ZDrpZ9>+ zsef{(!hBme*((2dul8@PQ_RV3=|eW)d|o2KaO9v|zwj$QULGKW2V0Gzi{<$cyc`A>f;DFseT`^ z+fS}*qjvmWY-fY2A6c5vm{Nb4o*OX=k&C8pSRL+;;{h3Sr~*fq9!nzqYymc{!Ty#j z%pHy}<1w_T zKOW_p0{C_GV~p6{v~HWXkr$aqRK5QAWbv^;RJ`iu5Bktw4`)ic>o;Xvmtq9rN#M%S zA*V|x6W#x40mwEOaJ0*jXz zf5p6)zSO(ORK*XBQ$9Oh%WPyyqd@@d|Uw_`L1!6RcBE0yvsP_9UMwfB9Iq zh~X#5Y@ri=f|t>e2wQTZW$tTBkGC6)0&QldR{g=WRwBsqW@rlt$9vTBYmrM7Ujwg9)u zg@aTfA7m``7@)52LRNREPqJjyLAi!ZhZ!v_*6tW{p^k&=(@UcOctHX2P6 z+4aJ7>~#M+H0F-?AVKHM(PffAPkp!#6TordUKm}^`V2r-qD$3+Mp5Jc!)AO?3u+@k zf!cQKSp}5%1dW<~SO|hM-W)#TBA%4c zl66$2nsawr79<5xFV?!Zu+Cf;lEygIc4P60I$-O?%JRl;kf{Uplrn(yC4@)v-H7N% z{hTQ!#3!3bf9~?6{gF&J-oteNITNW56&xnW08j=Gs^Of1x1L=M9dar>{#*-TsxeO8 z3FQSqXF~PwuCDhej98iZ^fEf=o&r-lKm}HDMR2~;@=r%ykFtiuT`k6leM)7>kgc;v zuD15E`3Lr_fR*6xa_W-p+AHeoGlCb;UW7;6j0ObBYoY8g z;=!rwy)Ev?>jI=wK2L8tQX3aT?-JHuSBaqUlXGvQOQVNo=nHlG02M_DPni~^Y#gDM zJ!7ofRs@p18y7-1n&9PSR2uS!w9xxgbypt?H4_V<875}LkFb2Bhl_j9F2O>CrFLtr zf?LC&%(BQ4!-PHS&LJ}i1r>PUtW29bfu9*_OtNWqx&a?a<2V%3o{RuR zl4~I%yQ+5tFt0^rswy4um*ze$Thb(nl$%K!p&L3a`n+t|*DH!X`$qIit*o0IfY-e= z)NoZqeqAY-vyh%pYpl`yscjH1pq`e-&>B0y3Ei4IiqD^x|Jl9%2NR|`a@_?Oxe--K z0Nb^~{EAWjY!#D{GW1+qx({ECi^N!bp9NoQZDsup*Vn?~tqO~c_lU0e!cR*$}YvnLZ2uiLA`du@Rc=2zKF1~X`&OS$LM3O*Es5ntqAtJoPBBZ_4-9V z+V7_^4kk+(_(S)ijCBHv92n&w;a>W$b+ge#AdA8JUaCb2)yLQY{eNoaE-!7kC2+MA z??3?yB`pBEq}@4-qr!yANxf5gFeQi0?m$P~yEkeg0x{uz=Sofb@<&EeGK|%$l=t=c zp<4z(Wk>u75c)nGETjvS3hJ-lMo+pr%0<+VUrN3f6{{)1)xsAnJ@d~TQB~Y?cxNE! z*J`;J`Twx@mO*j7+q!R%;I6^l-3jh)!JWo~y9a{11b2cv1b3IJ%*Uv*b80DI8SX-A1}F12lHxcHfOR#F3;l&IpAp$cvpUeqq!E1xW^QBfE)Tjh2l zY|EI4k{qDsB@WN$EuDDPlVdR5AFo?Ihv?20mdg=-|O5qhaGKt{LO0Fhi-|b@mh=`W;=+wTB z6%G>zBwozs`SjMR=B`qGWO@-C11o=k{7b zy+D}1sp$JiQQZ&QTKie_WVO08{o_dApxlc8ACv!ofXTbo!5x~w0_t>9*Y8-lF07aj4;y2UYDm#J zl8%4Ab)`MEe+TgPWtV3O=<@+8r$evcdAHo1nkb+Cle*8}$&P2Zd(s{*kccyy# zxu|mUf1@k>npFQwx?dHwyaDJY5qKS0nO~iG7oBp4)+=)L{;Irr$ZsO=qUa)YyMz{| zdDPh)8Bmk2c)GeeHg)D*7pPBO;Sb1XFc`guO+H6FpfeQ7rA3+;HXq3KAw{Km%x)}+ z{?-y4UG|4@PG{}{KdLyFQ7(p4*Y?FVCK~!%X4vcJvp%>HQ`aU@`%iuAGv~ruw#Ch= zb4+7MZuqEeiGnNqoa1m^ogopvA&^+NS}SL~-cIsR>}*d^7E+^w>$e@gT}yamcm>Q? z0W%Mq5hd2q+jC}dsAz2|TimvklR~6JZ@_U8m&@;SExtQ#L5eZ+^X$&;B@K3qz4*@) zZd=+C(>Gx$od;2;JBgnIEbaAPci3Si?0(qmGI~8)e{){b-RHJF*|RWU=8>3H2h`e` zD%E(Z4tzpzC~Ijdoo5Du=d{FOau z(#;+E0n;BYciZtjWK^D%)GU9hDZrsrKh0|-bH-!)zr@e)y>E25wEp&_ZjWbh#dX&+ z#5C8e%Q=f}Hhq`+GB0%(z7f?S73^&aQ@E-8vD1W+&gI0x<~_JRnkGKQ;(=nywl=OU zMMbYHc=%;wYmCgHK9V>f#RH-{1pZ$szr2jHe5;KuT_(I19yk2k9zJlRTz_uk*F09N zV8*;C#sCu)T)!JTTD=wJnU7ly;6xWW%C}1xpfZOKtb4soaKA6S*XEX4lQn z%lVTUnuO=MA7?nb!Q+s`u4l+Lxo8rv2xI$cX>c;e`NCz&JH|BfYgrHg-uB5P)H~it zNUjd0)8m(vIsgL-kge+{YEHh}HR$F6n{M%uY}$T_vQ#<>Qa;j&=!_=;zObdmbY+EO zPxfA-XAn1Z>DA+%c$9jL1+C4?pU?_@B<%?r7r{sVLcm8+zahkRY4?+7Tu|Cp2-KmU zyi#D?Wk3$j`1~||2;7X9`gHeW%!oV_i$(082G=6ELeB%5{p|IcZke=b1V}2$YhJn> zboT<#$v?C==Vv;t{FRp-c;bgx3EWcVMAw30Qf>lNIWzBwdmR9Yh!R&8MUd7>*6oe+ z)l+$CYb4P$tI#5=0xmTz*nrPelLpdh%%;KWSNSS&ewh+FuIbk_LB^Q0QiG`>duGn+ zUkD5@aqZ}c5>Qhaap`qpzu{eV<%{Agir&BfF$|Rq$YzjgGnncK6&jqs#D?IrN6yoUCZ-qVgZJ8U<#ibD+NUy_h}!rW z!^8kE9#dgHo?wHq`7l3shi&|5q2Myd?@p5qHvKiIHmvY7=1ynbw9Mzi``*8VdJ~FN zla;ZdmAM;hH8$?X+jQG+s{BeQaL6_Y;t0K(I6^dGCn^0PO3dy@e#ikK#FpR^Z;AB> zwTphWF;r?rOjCq7WYOQW2gd>K@trP0NJ&n*4-kgEVdF#{3wYP?tx7kcARNb%L8dnk zAz)IG)%j?CYV0%fhu88wYR_D7TQto=1WE-iDm28G^;$%BZNiA=&t#fMi*l~~Kv`LU zN-tPI51B+mrb_e4Ly%g{_mn!BD)uT%;@zZZAs|%8civ*3c@*Jo9xb?Q6@D7tkU^L0 z1Ey^S&TLR_K^7mlrYJ5azb)jR*&Z7mI9e3%OI@x0O!mKTqejll5=N+MchYW`OvIg z1Ltrh!l&O`HHxhcey0YEY8s1InbMwOQZJw1s{^X|tJY?fCh+rXLM7d%iXLR-BI!(l zd;U(-=pVaKb*My?Z1+@6+eFUBX20kZ6sh*VM0ikFoI}le(A%d^1;smj^KcCeA*@Ui%`y^~QBMlu*ah4py++T@T6K_vF7O>)X26|s z_-fNB#t|Jqq<%XU_96*j7HSCZ2#enVgG8bC#lbA+P~K{ilbgY4;3*iIj`9HMolsqM z@b((5Ds!R^*;{see!J3zk-NOiD_)fvC>OM~mC|^M@`!MwlDY6(-;2{qss(l!Y}Ft? zD2D!+QI2vr)ZThvM){N5#e;w!TZD*OJb}Da^}bm7`mN9osRzD>)bMS)4%LXj%RDvn z@@WAb9Jt*G^zaw{5f-y${))FKn$es@w041jB#g>)Cm-={A-5l5eV7#BEP|i}KLeR( zQ(Jy^j3R*0E&h}5s?e7q1gTmF?w#6-(U7PeG~`^4Xe790(7Pv4#Wy#jW>5rrg?c(oUU_9|1ZB5-96>w7(z-9^7z}>6BSxixE1VP4~2M2VzRp^$@$iQ)L!P3l$z;ymj zhXPD{c#8S7D9B^(>eP<=Df`jJS`WS(>!`}vLm@kS1kqaWlDMk87wzLB0`SD?IDK=o zU^><|guH4Yq73fR3`P5*a7`5%Vbb}P*yvwnwDg>;G= z8&IE{8_FEVgcE5YPFJ@6v(5N*vQylQ1GE`z)FkqL`@)zLUF6{Z=32-hbTdY^9a}$8 zvm@qFa)SuLxUnuZJ@^X#(l!4=Nw%|kuE(HtbnT4FaA8U{gYu6qtN(@8RPLWN7_y*aL`B=3W;CnyJB$46tRB?MWvYd zM#KY5IyOX5p&>HX|8^EraOGoEVO;S}bL}+it|oc_iS^W5p=VQOGp%nmWW)Q%CZLx0 z8<@_D;Q(zNxM~Ub;3;*FcIkpXsNxMPoIw});q@Tkr6?8!EnT@J9FQ=`CvG)eLImROizzVDm*lRiR@7c4f?3NGa*N7eX;rgX znv2IgSftY@<9R?F0Nt3=K?Oa!^NnylVC8N2Tc6=N)DMerO2~}6Z`v_#x|8nA%ProJ)J$ywE4YJ1OzR^A* zC8u^|9rF=>R{7}Rjon?eHG^3-UfpfVaN<%MHzWZ*mdW)YLUoPOadR_GkE&gNJCX&! zXnNPct$^qs>WPnOi;KJPt?5q5WV!CS;Ic-pK1Yicl_(aSvFO^2Fy*AXb&vT>LXQF3 zAcl_Vg@pyFwU*YrLqa^RB~gBw>r7STlETWkC@R{vu4P;t_UDF)NC^1yo>P%6sW4BK zO=69_2kf6a3Yp04oj63ep&X@<@uxr*5Dw2$io3P~Lcx~b-VJCGX(H^-B`eOT}n>|Q98v;Wy{3gPiw?zaR#p7S&#q;OnX6Y|< zEbqeipW0#FJuPW!P@~A&V5^vJOL*~LBbO1?iv?$$2aV}u`pHi7y%C!}!rK%(4;wuV z0P;{!#kX+dsIyNbiDp;a-d-880pE&jyffnttCJsmWOn@}e-&GL)E3L#Xa4}yhb=P- z=$>3Ry#34MF|iFtTPW(IqRS$ObKy8ZO3SCIHSmaEG-}ps^4A;yM~z9Ud05iBE$as` z?mQ4u{Zjcy3{j4`l{Zov^DK#7xp8GI=XbVL7I+?v=saxB^b=eRUkkAN!L0z%G9}v) z2%s`vGK2%>8R%I^eS!t0P3UX+;lLPmjyd{k7}XLw|7&RxP;yG-bAoI;I#Il6*vv}5)7+G`LlF;f1t?}k z)1WnrHx@y)ikKHW_dRbU`Z5cfb3z}HMG2{#2&7*nCXo1+vwcEal;d6M_^LyYtrQNX zW?~=S-VT!AgHFjwh9(o&onmXDEc=(vUo-lR9gQJUPpZ&SxuL({_b$ymI(A67FHUL7 z58w9pSbDW&*OJ@DP9JgmjOy~7XCj@P*$K3hAm=8wV>!h??hcFtX-|qDDuU#IkgN&Q zzNb5#QW{x6)$OBHD*)n;a23d6{el2V(bHL4SrGU@5YaO(yV`T7--sx1z3;gy5F+NR z%;dKN7q@QJmWB~aA0hzECxwq@39OP^yCryN&VsgJ^;K-K!HnjlA>n;Lrp=s@A_T} zFA8E(O{76g>Z{Ecb3Z6+3;PJ0g(*490{x-`s+cV?R0?a-Tv!X{%*GV!EAfXyRvl+O z3#Wh)>_7%#NB0uq;E60aFy_R(u^{vEDb!J^|2WE*WymAb*0Qva7#OS-Cjhlp>3g15DZg~M@CBHpyYSw#cCxZ+Xt;PUgC zurwVZWl@xsBg2cV44tXssoMe(4sKi^({z&AA(2C0Jf-uYjaXg{!-!j=Ryk3z(t3 z6As}+wPjE>3(`cF-dR!ltKr&AQIk_vHE?lp2DzWv`kZINSlrvoQ~6UM+@}p-vZs7Q zdv8R0?J9KabL?XQDT{`}yRCO^7ZsTit4bCH%JeD@sHl`M6cDHKuJ>UTw_P2!va@}g zd|{WOc>?C_8cqj9a#CdU-mvz74A;mfpxnW@=ple;$p2M#1LBuw$30HZnWVpFrV)>b zcn3HhIq6$(X!rrrZS%{|EzBLu>)71z%`%*~30}dd^&Jx>>!8d93~%CO~s1B?J9rwK#K z&0~JP7NwHD3M)=>;V|bnN)c0-2iDC>Kz(L-h{(x2_HWy&lb~3qWJ;$k%G)iNq^b*d z>4l$+@bvoiRx+A(XvvhfK$c=lpO(Q{Jm)r>pBg1s7SnN|(Y2Tss?H0u^7;x_;&Kj2 zm+IEahLQg7mdf>je}<~On+5op zU+Yz{{}N89Va8DN4Gr4;{#)~5hrX@1JYY*yN1>-EnkjyEJ z=K5~h5*Z4v`O|?Q4r1kXcbTP~jt9fOdCuIW>+f;-m0GHYMF*S{)IKW`6dl@+Kc6cU6)}M{EoPZ&;VgJ*^A6|{sELn`&JJ|8b1fo`xq+^muceSkDUw~< zKfwJ82w4x(R~`vlpPG&@l=AOu+61LnpOy@O_@T>8IGW5*fQ|p_%w)^RgWQaA-uevL zGTJ|{q~!;va6q@ycN)c$4mdP_$3^{V38{SV_x@5r6jnnN zTgze9{tsK^f!4%P0F0H=$$U-*Ud~&i@=lcn79&pUjT`z`^6e)5N)+%?Z6CE4c$E4@ zm{&6s|CQov&*d;y_e*U2a1y>j zc~y0RUBYM{{m;=wU2V9_Y^I5jSRu$L%zj+}n~z@d4H8BPXzKm|&D=laR4gie3L;F( z8*YXmBSrQ`=EMG*%tryhA}as}bFQ#J&M^dykoX?{q8G+=Te?Qke(&c3lT9k?^+u{A z^K$25fg;$BO#-z2X*rqw*-C+oa|GU;q(`B5KK4Y}9yh2}|0F(T%M^rqaPmVYnaiKO z1otAhtpA*=N3pI(l1<)+ENxopn&UtQAK{^ojURgpMM@R6)+4CupWCZe*%iX%)#{yd z0YZ=KJ(QXQfG2iGxoW!sMlb#|j2J~zzqTo43109wi7`=A324NRx0u!9> zk$TZVjQZz3vYTGIVgGMUXmD9*3@h7{O=zdLq#>6;6hH&ovFFjy{d^<*LoRU*TI*nc zaT95iWXtaG&KK5rHNF3Sv$f)XQTchcz3WrFLrvLrlF9EpAx8v$+mw>!17toeSxg*! zWrm+Gi5jy&H(ehe*G~&OZUsbwKFGeG zvu4$u$LNgDsj+16u3!Vu_oPH%h&p7BE%JFoF4s>iPW7>JoSZD5ge9+JaA|D11LPbT zT>yl`Ot?Ml;R00b^N-j+n439FafSw>uh_e`maM(lk7S^(q-#Zmc+k}i+`w*4<9L#* zxt_YcCuBNNXkdITescTVW{jdEK36*5B-^LCj|)T`igOdbv>6@iN}#FTxYIHIT4mP$ zL_O3$ab3gmVnBKdL?R6!W4_#xBWhKTFTyH>f+z%0wlIK<9@6w5zt&F$Xdw%p3~91} zi>4p*b2&h)HJ=+WE_CH+`X>++5zM(2@(L(($ulz0Z9CR7k5TsqTo7fH^6R)MBz03o z06x{xKcCR}KJsrMk9Xdy^7gQ7S*3< z9e<)3_n-CY1i8bg?Kw4KQa#>)yQTA*D@ac{DE~7+`8TN-(;D$lwo@@!*sK@s%pnss zCnYuJ^1LOq9j8IQu|zvSzKlG360r|^VNT=TI%0g~^0B$uwAU)_ zY`=U4#imKxbo)?NU#EgFfRO_x205+OxiVL-;yZ7DY1zOI(7>!S&!Xpp#DQa;%DYQj z{t=ae$aSUc|F{q~Zx_OILazvs!vPyFDB9=nprZPb|Rbq)YKCpyW@=Y9I) z00q8VQe(a#{m*-A7)R5iEeVkFy;5sd2QU)@p3TgIys8qi zLuvgBtlt^oY_G$izCB%$gXIwyPdFgd(+f;56CeMJIJfDIEbyxBDJAt7Lj`UwF}Zr^ zqsz76Tz<_5NM_OnIfIm}?@jXrk&^_cB@u|nb@F3|Lho-Rx_oJu5FtkGifd;6wJ5vc zvgxE76n^~Qgr>?@qSlk7yyLRRTBH z#uEVby0%A{B(2syn7T2mV`=mCb=m86o-?}M=IQ=BRv_XTnM1fLk_1Ig^q{}DrIEt% z7Wcgp9m=A8={KG|j@&C$hM$#>42t5q1A&yTQ%xxvBvh`jtVIiju@=>csBrMEKHpf% zeV&;gy*0{amk2A&+?b5xW-VR!vW_}#vaHNcwXCF1b@`srY1)c*L5rN`pTR0AAB4=~g90;f3NAI*u}=i@H+CQFTQ-cAdF&#ZPe!!VS}Q(4Q)|+~ z_><|}sGnU)QZS0Weq>mEGC^X;Jouh`rlAAAfI154f+-tF$@g^89ZTEbU~I}a#ZhSu zYTS+kc}+FYcKSl{!h##3YyR;JMX`?#AOQ#0bMXUe3~+J`5+EI8!%r3f!*#)fj9x_F zIZS*NILhb;$?jL(YK>%vkK@mM1Y%WqLin|9*}y{sY@oQ6W&;@f9L;%oAxi7eSA}K~ z{V=KBa@iyxQJ|wfZ!F*dsxl7Imoi|W^0U%L7Y-P}jeq%55rZm(e^q_S$3y)*O=(qcE=ko*|@XzfcJTND|48nhMsl4B*@%ep(i4p?6?~eE#zyLgPL)At; z15o!c&~q;WO7K)D?5_o2D-~0Ccm*ghu)c0$_sKE=Uj}`WlSfw}%=OlZG9)38M-cnc zIX+mQ@Hok>LjWiuCUd{yAUnL#{jrGyhMtHQxsaF4m-}}lat8-c`Fxn{s)hjSA_(rg z;m~)GNvMeU&~U(^g_>)}GQo6~5bW(sq1F7iM>uc$lEII>`tPx;r6 z26eQK#UYjC&Yxin?g@bWm@~qwDW9r-**fo4fW5Z5^|0X610qvCO_%D&5f|G`Qn)F7 z66rJn$};9|3gGEwvy~zl|Hf5*5bG(;lKq!kt0NU`3I4Tgz zqDw0D&oSVW5n$|DT6U0_m44$&$lG`EgTv6DR>OHnx4aF?b{WBd0e|$!hxu~boz&Hu zq7E&LsVxQ7LHPD(BPo5ztX2kIspJ)gbWtz^Alc?_$y*;6xzs(Z;m30BKb3DVO17-6 z!TLUP2L(7tRyI^gNaCF;82bNw3XVWUdkaHc)igl6Lvir7u|#_p?YQ0^tJUdF8}+0)wF)j9VJ69iyv6u}J`8{I;q#>kURRDdt9M;I^-_uMek z>RVWLt^3IPU>FI^0itc&@h_CPXls;!Y<;peBna_Ki1b!JTH%~t99KFIX)$UnkqU9V zn9TJhxlBC){qKhdO8Q&RK?P8I)^4j;(Ol_3uI5(_{Ri@H?~4@t$E$hb zs?W~dNGy^jzvI#+u)iG1NM;21AuOOF2zpMyEA>Ns3z&q|E3Klfay9p0WOIAfuF*-u zN%JWg?aN4j=khd0y*D}k9T@r51Su?GyJa13kYBJoV(Y=YdS`z*ywhMMN^kHi)acpd z>ow;OeL6KM?PgAn)#b5sr&ibZQoHvgtb((dZLtLM+Q~2x=(>(A`REa<+^WS(I%>&j zBV!hsw5NPy7c#p+dQ;fBQW8apr+tk3;k@HD$-8Lta6hE$`(dNM)6p@MdWL3XFz$>} zxb3^!=29o}>3$~+m4>0P!$|np&^xt)2wN`|nY_WE@VQn25^;Msy=F|;?Kn^O(>Hc# z3m%P>q(8?B(Q6v=tWihmex}7LrG2U?89B1Ht4xT9n`Q5a-!(vXg{?V*sh&gazpE2T zrlp6ckA7`)b|xh}+wstP7goPj6xK(DjAUu_yn3Tc#P!Fx{-ux^L)s*12!Cp_44)`A zf9hdXlT}hIk-Ac`Kh6NHx?Ui0k3kE*@`IV@Q78x>c_z|-Ak@7tONb5 zBX*Y8@k@Koajn98Kht388Ce>xG%9=?o4(Ld9?{B=9sBgU-R^R}uhuVPR3tb0-d-M2 zzpL2P*TDZL-b`Ta#R|o4ee_~|Dfm%^^Ukk1GOk5nb17AsL z>hsxS)2NDCUmHz@026vUwQ9_abM>swVR?#n4pOWXV{+ej)n3@{_;*&+G@HZ++DBrs zCA`5rYR2WeECI6^sUS$EmiBBsxPztOr4*5Os)oY^^mwAlv>j-gc5o2Gs!0M!C9EPJ z^SoE?S*0l}ea0yg%WX)5s9N9%wkaqTwKdlm5x1>BwpGx8Igjd%2^eUoUFr= z2;5oJ@bOq|j6m|8>Om9ylhHf9QfF<@bC&F(lhjmh9N!P)IDw@Pc4^(EoxESXG_5k| z!sXk^J649Mn0Da5Qw_0_;@phR4@2aXV77uYm{kpmq}w1RwW}5G)hVEsHkM zobusrVcg;F9!~kvrcRDJcH<>>Qx4Ja0I+X@PqQ#54incnsK0Q|rJKq)-2$a!%0tBs zAy>X;S|MVBqR~Wjy5Ud7I*_^bbSgZ445SqYES(`v7RVI6pBow}fgJKDEs>(CUoEH> zGqJwUq&Ai0U__PqsEMw;rflO?`6KXaoYr+MsB;^T|L^rdxha zCJ~#8V%eVhF^4i+oCy!jowbvkVWJ#O4HxDV_Y@fCw7nSnT;(nJHhk*Jy|jEB zX@o{&Q837uy^)am_;M7|V#Mpo!Rtv=c>JwuCLrcSvP+(t&oeCyXXe$frb289<FGB5q?mT?HGy=daYCR-R?si*<= zPP>Y3xyE{*@d}>v#rY+DBF6>(~n&6602mDwI0 zMEG8*)Haq@kI%6V!Fquvij+(&Lb);I^S1vi|clO z?fg+#f_jCNeoU&mma?1K#p98*S&8~waj%Nu)*(mhg&n%Lw}XgiLUS3HlH<@Qnd{WdPYkf}p|_Ei9+`x|9$8 zGdt|S8u0`V2BE+RIky73=8+qEwUr4|sHRtIhBT0(mXFxkQtt}6oQW6e?ac6M{OjBI zs#k9!#W-PPqR5}8l@U9_R=*prOiT?i1Zw<*tV%;e${Nvh#Uc<)qQ8G%q>Rbtt^FeA zWBY^Ad-QzTBTuA;IK@<4993 zr4lFX}*QSBdk{E0EvWnblt(1w78j zO)J0n4x`ve$ZUoocB{_IMkDi-k zD;Q>iH^KGj;^1}Q+EEg{aREJmu#=96tTXe_- ziS>H~XL#h_slkitlG4aWx>EfZvq{4WNP{h()miCcxF0jo zJ$C%ngiF1y%KM!P-7$FmXmPEtKC+E{@rnb40OnPiy^-AW%Q`k>R4<;H)qUqym({^d zkMOYW(?l?6i9)QV3|v+<35G0evj5_ArE6iF(*OQ_*Pn$0xx@!UmP@ynM?D5?^>V>+ z$K*O3&t8v2+5tQLJw~dU@#~V)J@k9rAgN!KY3THG1cOk11zX4b;yOyZ3Bkf3i{k;| zebmpwr(FF{R}gV>l!n#P~4Ktp4s?g)g z%{*v2>nKxbhCgZs&->!2qwpYS-<}r-Fz{6G>aoxvrrz|f8M4Gnnatq=RzNrV_cX|) zwWImkHkUb-R!Hb_;tes(gX?heOpSGG6RDwZP^v0@#D^hIS8M79Un>w=j)hGZgx!K63*0U8-M zxV<-7h3F?#KG^IKXAme&5$}-1f_=HUW3*I>G4h)#6YG(M0*Z5~0aexs zz<6z#fW`8+PoVm=?zna53bQZ6X3H#Mw~iIuIc*n9!I)Sz!k~ku`Uo^m{cpG~*)p}V z_`d*V=|m9Pe1p#1@r>T=(&|E%tFHwuPL3&u`{h0@BjDPV;k%ydt9o3W=~PfxDZ@Im z+y)*bR%+g1Qe0a7^2=?;xgiF48QF}@aMJS`E~jnD(8$H%;2~1%>bo!>#zLUV^ zRn-e0-N`5>S5ds^t(uReYBv}J40}PiOKUYdmy3sMvXYb+G)EXh)JM&M5#!b#yNp6t83ce616P*4mlb``BbLiRCA+(kLdF*?9 z@0f+YHfh}{sp=ks{%+l~5p=}7(a%(mZ714h3uL!>{W!r6^6&wx)RYHFzbgun19|+V zbz~)ekva4sO0tWhj5>LVjkqEh?ppEd1jB`R*RWDHowO1jFl$&dl^FaZw@$q0I;jRR zzHv%heqJ<^Dt4(Zi2|{&DK4bjAbE;Sqk)k>>GvG7u`z}uo~CwSJ>oe+IUr zkbSH|)Evu}>B3*?IGrefg8-L(iEteLjx?daTl6xSg7wuFXIjJWt%F{6L(etC0_teZ zKmJVC@6^6*Qes8s19dcHk_C1!Z58f?a=}TBpE4$WRX8dh(6YrUbVqUm+f3BKE^Mg5 zmqLyg*&L$kl`=C`jWT_)LN6>%LWTT4K5PxMHmq+VhMcAB#kSS{=(O~qk#jkasq};x z=*L&Pqq8l5J!y6L-hKQa?@QbLCl9(EDC_s$Cs7}^!nDTcIZ<4g2|ht#0KGSq;{VY7 zXJToBVZwD6f*w63lUGh^(K%{;-!1g`V}CJ(DBgnSss@q2DGY`JktA>D8MKVaS%(Zv zCV4MFm76J1fRJnq+MaH_C5)8lidEvn~I zAbw5VKBoV*J?J|~O^FTb;n5q~7|0}FBH|5jKj?@EMwuKK6k=fD>rG)3!b}#*NntXt z+TbMKfC3~bF6x}f$Tz3332+J>U@^jlfizk_@plf)Rh2fP16@g+zPO{b!ik`Vdc=1B zE=!2w;y6S=CR_XyAXpBx1DjM(<&bJ0?EMAhe7i0}zJqpgP=xULAU@}`Lt4XD94Rh4 zDDBpT1~u?Os(;oadWt4Pf?6NA@`P!a=w;pD@S_EDV{Shc22J*&aj*ox+afkbGF-y1 z3bMGZGQltIiQ`IX0PYZr+peZdVqb1|!mA!vh6OavnD5P<*~6(h@XJ9cnueF|p$`$< zA4tu=x0ZoMnn@63-)GBc@_C>7h=4NjL{z(I0qL&XGg!Q}3k$2gCtp4rF5cE~k{YHw zNx#IJe|@2>fj$^~l*E^NFhlkOCyce>!bj^=sM4xk#m6bseaR&*SuVJ#g zvcp9ePsHt+CY_8RyQB~o z{Shh2{VeHlyLOoQLQS(Y%NECUy69YU@jd9g!=1gSfT(2M^4jeh;cI&i%8!<7ds0ja zd1vuQN{y3JAiw=Ji*%zeNnO9}*akECVq1ezf{m_aa(3!zP<-o1ik;)!UY2xWjVT_x z6(%P5t5QIW4_Ne*pD(%^q@k4s-#(b4z`t1lObU|xBfIDL&SJN_cnAJc`@;$20Tg&h zuU96)C&Ri3*IMO$HL95?^C{`f-CzWw&q>+Y1ocxYIeWxRKYjVH%!3H?vFL z#k31!md{-gvFa`1TBK86nw&;N`*2YUWAv-)l1gtTu_~5C^W9j8ms-xncdz;F$t$vp z)x#5a=bbfmaTMC!pmc}R@3$CGr5u~D3M3m{z@q5j#OUK^4DJtjHl??8Ee&4Vf61ag zf+NvXv=v2ZFuPNINhCgA#Y-}tEPLAM3;&g-;6iwk1qB9O3e#z}J-O-i8hiTZ`_%bh zwqpoW0bM%VPB{H9-vW-d6^VI*NpVprcrKPYXp9TAf$ftzkG=b@|-X64o!j z-+>#VY0f4(?~sPs`YSaHj;`V>N+#qu8<$df>cr^>@B7XWc=#AnF!1x2ugqi#U4?`p zxg*H8NOaL4!Ij1H_g?DA=$W{x5aM!OmJButNXe`pt;6|)$ZF`|^YnUeXC>7Nh7%nU zt^J7;I+_M!o1-f3bFzjEW{?(C3w(lJ`6aVI>a2K<^`Q%EHkT-b?>mGa z!<@!jW1v>aCNgMy<{EqX-4cr2g_V9Hp@Ch{(154lRvU*_cm2GA=8u2n%t8&M+Fl7+ zqLra}CE-D*AM3QB zH^+mVRrOCN?a*4gXT6eh#2aC_`-Gh^z2M38SXlxMY9~LpNv3Va=;MB+rl}f^KW{RG zqGPmjsDNWVnEn1s3jS+sv2Ze-Wfn%=v@)Iw|6FMukHf}CaK^>04GuMNn!@A`8J7b# zO}Op^iXN#icOl9&>t9z(NBGY;aryYc8B0Xw&eF9}{M&+(k1tw4i7@?0fp|!H_klU*s?8C$%9+G6O zy*Ag<+4bP26!aAVxrArz+svEd)9(%m74Lwat69DG*M9o71l}SCVV9vhc!1YNjAJv< z@uc5l3=|aR&;Ar7D%@VV{w`hq8)W-kw+FY^=Q75C_3{_L4 zp8abO`Kbdw_UkL5_PoErt_m6apVvi#xn{amCGiCv>4EJ7$?%Yom_n+r3&pBI!$j~0 z-1D#XV$kNnd@@eDPY+naLW-`Z>hjU0M{GIRC}bN2jipvG^d9 z4D7JD9ICEPKu^yOUOA#@*P$2cgzU(WIogYNq_YJDr$b#Reu;DHn6tA&!V5Z?ZDNHz zEuO(eud{ZezZM@UyjYe5$tu(zF;}`4SK}BM+Rd;u6BFh5e4KLVO}FV+BGb+2%JAa9 zJZye1xF;c9$xEix7_<25C(5NnU@47hXvg72vh~+uzd*PRl`KW z`z%?SCaj~(+=S;5xwjHm*go9M@50xP2c6t+7u=cEEwM#lDeshsewRs4Zhc05fU8h& z1Ik?1Q6i^B2nl$ICS^ojWPq3K4#{+JC8y7a?>_mzIHPuUXNCl#Lox>|nsuaOMjm2c{_TBVtaas=emJdKg24{xod1b1!|> z@a@ac^^;YQwMzZ*ht&+6Us9znL+)RWHwhuJWZCVFBhb}K3;66lX_zFhw|_15N_wSN z(2q-oNoO`GYr05ptD zSP@p&QesuU@K=Z9B;9m8neO;Zume@7eN`L<6Y=DT+o;{=h4%!0Z_{Gm#c7%OiO+=={g7g!x4e0%qfLTY-XG_jI~KyM$?U%%D=PVLTU5p* zUTZoyr$JMxxVrwdAD&^>C9bk9dq^$LN#!_s`#5c2xBeCwG6pn%Eq_`r>{`8}z_Xx1i zA+7>`i{wQHYL$IqChK{B2E)s{)JH%;!fC0}gYm=Uj0q|33=JT0ylHNQYskRssZxDc z8rVO*e@X>8*GwYzzMK@OU`ow3>)@>oC+XytAWNAKu&|;418tJ7Dhv)duqp^angS#% zh41}h>rcBsz)a2|1Pbb#G(4u<0)1%rnG*|e)dG7<8P z0IXGCKau&j%_dG!fkrb%ZBTaVzZwa*K{D};R^$fAwf;IDn9>Om)<4%kCkcrqdO`YU z#b!~sWyf|AAMWlgrEC85a>`xl9~1YjU-?%<^Y|bTa?Sv#^V%YT;H+;UUH|hbd_9gC-U6*QI+*<2lAVi(6@B;{)u@T+Q{&-3pA@ zU*C^^tnC@2Mo0AB9c)d8^`r(AX$eR${`RpWzTKLD8#J^ym8ePol8OK9po2Q6t)}aC z@$-1kP*iQm$|)UDZE}~yvQ`xM#*EG~Gr|{aEOS5kE&dA=jFjc&rWPWRlT|TNjo?jJ zm*vwElT)gyUz2xbJW|CmFa2#O+@9>VkGnN2R_#*95(3j+3kVU)x|>GrdAA%r7`3RP ze_Hrj>B6t$9XBO@pf%xsDApK6YXGYqPNgk6p&tR4ZM6%Vx=__voWuq{s$1a^_0ZVy z;U9_bB8R1^Em~V6I(*qVuckizlDhL?=4NT6N4@)?YTD+*h1&Y&$uZ8YF`A>r<`k6v zWT|GHOA_2fLm!V;=-cijjHED1t=HDW;3`6|#(Q%W+tu|tFeAKg;;xR=6n-4;%h9{K zt@3^No^#f;QwLLLHnLeO=fBPFcYS)UNgnGVGxKM1T8@;p;pvx@@8Q${zMvkdw7;VMbuH~lX?E-5U z)K3G3r3GMp{&>`oeKBDn!YwPbl5=fk*}J2Zirn zl_rC4b}|#c-C!w$&45Bp4I={QL|WA67dQU|l0cURE_lU|w{q5ztV!q4c@+4O39lPT z>Z4wH#N+y{Lu%0>{~z|=Yi~E=aOm3~6h#ysVk;V>pSLTw#sJYuRzm(@`&5qU)}oqG zJ;J~&dB>MJ{JI{23U%W|TcEYLzvE}=OksDtE9pcyLr8xZ1W4I*-Oa2QowFx zd|dfma#FB-pIh2Ig#T0QlWAcAZEY71&A2zas=IMH+<-O7hEU?%?ZsL1PS8s9F04!p z=M9O7X*AzRmX;cmL3{uC`f$(Pl26qka?h@Psg9UdB?(P~S!GlAP zpuyc8g1ftGf&}*raGHEsYkh0g-c{$t0n!$p%q2KKURaSR6S^U6zrpHd?zjRY;ElOpz z$RjDGaEDu*U%V537nNf9$gd)q9sJNHM_n)PoQg4sJlQwlxQuotVEyST)Y0U@jxp(t zEIwit&@1p&9zW*sX%M(lFsp3-a7?(T)YjHCv)nV$;()giPG6pVTGXjb0R0{!y@nuGChi}Cdd_@;-YxszQHq41`WNAL67)5a?q=XVNVY{(s)O>2&_y?te`AKe)Dqo^!2DawX6ycM9!EBzG zueLr#5+4?UgrXi6YNi?7nhIVKrB2GrAM?adSG?|ZJaeP2C8?2S2Zwcf+7*c0KUDXbPOQsy#Umwz8t&In6<3x0Yh7aXIVr z$L5s>C$&g;zlyP-(rv!qvzLf9v5G0T87Dg*e$6ZAzFzGVshuJS+*4)KU-n>`567(P zlc2S(o!rT_{b<1*l&?n>?poc+u3CsuO+`)Ik;oxLgAqo@l;34R6TSQ~fWNqrE2{=- zI6tv?B^KY32zW3I4ouv%oXfQn$V9<#7OC?4Fbwos+53<6?DX1C*MxZSSxVYYDq%w> z!LhA|t&G>42W6ZYxE_kvIfPh`h$SMg-Ld0OZzXV^m7}zwTvF6%O@Ukax{PVQBsg`9 zcoI^EsnzvlEBHv_2Y$6#d{Xe$ogNfRw<9>Hca|w2VyuRB@pNm@Qd4GJx=W{_yBM!A zS^RwT;f=9;e-N!>DD`J$Nvw?HU7irgLk0LMX6>9P-;Wbflvv8ix;;zh$W(CF=4@#O zDjE>jfodaGbf2>GL?D+EgaJy? z_7+jSSI6aTlxfg)-g|mYz*lUUDs35|ANfym0lt;=y`~tFR%}W3d+NSC=U$_~Y|xAj(mZh&SEH1*qV4AOBFx?vT8 z={{B6ie{o#bGam^F~@T0;7QO}(>EgxP#W>YqCkLiJLzKo>YOSf)P+HIsE8GOThhMNs_PyO>{0-n4V~WZPV50Qk*ZXjpw4KHsn*fuqt<1e6K0hV4L49yT6*B zY0`d~H+4JNSS=>xbBFJHJJ8}g#{Tg`1z_dN1*Dxe$pErt4isZkf+2DU1Edh3+H3ob zuUlGkl|~AH-wL^=-gcjpT0E2fY=xjbi7gz4% z!j}Ergo8cNPmutU`yu2hUF85$uJ@uZkvr!StQ6dq%nv2U`kF3IM9&E46 zD|`A@Ezi|l4>u*~2{w7lOUx+^R;*dUv*6PZ+H5IKuJ&5fyXH&Da0e;AmIki^;icJD z`r+Q&AJ|X|Hbv^!`ekE^s5eh#!X1k%Sylk72POm*RM)XilEfU)-1w!oE=YQCg2jtG zC2vEYpV`LjKGvZ;(bD_aMpU)}qR}esx%EkXDo*fCoK5EiML5}7x)^})OzTLu#M4CI zo1d5#g&bgMCLjpL5(vycO=J%Z-de(wX|hwDr-n}7-jJpCIvetMjBruJ98c$F;wu8U1dP!ZvQo|qA*wm zt=JpYzK^fSPu!Bo4G9viKH>afvVC*Eg67xk=ER(AP98;b5(a=?)?L|ZfZfLncN_jm zx}(hyqV#v+Xw+*fW{q#ZSOVpzaeYg|Q>FR&`kr&2B0#-K ztI!=QuZdz>Iu^OI7L}WA?>U&*yz2icO$rTl`1bQJ){Ix-3_&Y%-DjX7lkOdsA>E#} z3$R0sKnbhB(-4YlNvF09Lw1rt4a)jk<{16Yx_9zL9VOCbvT08rB~;vZ`jJ|w9iq9R z%D1v+Yl_ilqXtoTHZ=7Iqt>T6r*mDzZ>I-H;h{Fg?_D)Ky!k<$$1k(n1A-xkE zypdTc$CCE?HI7ts=n3X~8{fpOK4N+URg%|w`TbNzyVUMyh-zVNLk5wB9A{RD-Zviz z@e2JuJPC@D5MGF?*gQGaRoXR9stv7??yNmty zzTFjXPK*g(X?8yp;_yip3YyJFP0jbA&(tcy`&=l1A46zX zxz)fFZI>`^MTPfLcNLWf9uXc7ugPj}T$J=)mvwtXbo|{-Lfft7Qq(r~_s{*ub zE$+(O#R6s885Bs*Jz2f5ylOV_AEssvlGIl{>hh z@vmlphQQ@pS&b($@wNoFB~;03!czZA7<5bG`6+v*OcG~(i@*(B4t z$YTSyX(C3Gf&fxJcgJL1cRND;e0unPm2Cbr9pr-c_CB#&s(e(w8d`X}_Z{l*LfR!X z=^jp8tCw;{-=~qX_EAkwyhCB%Aqc|aPzyxWCr1+qoc$DJs{7H?M4nB~w@%VH=gu4~ z6NFgWU5R5Q&&U(3A*!@wJ7BewHjdB38G-p-v{Q|!)+>oq(=#D@Nx?*Y2r(*51&+Z) z{h)w-uo)SHB{WF6SkcPT4OJfMyUT82&;_MyU<7a@39`p6nZz0rp9B%IqS)?6wLEx9fjIP3`nzFCRB6 zN=cjRn2n!%LVlNbnFySi(F~ftYakbK)0-*nvu!@8(e11*tP*n=QQGmRZoIS+KFtn! zo%pRUk%Y;K8c(y>9V%Fvk!h0rcL{zH#yP}Sk0~eLVNbN&knrWYvy=av$&vQpLZg>t zA!8)oSK1LM>v!~$P8gYy@z^HS5jp0kIZ>9+`0d{9rJNGq41VCgp6bFLL1X#E!WtZ`8d*J zsarE6`}>K3?lwNeWp6fO@tcQR|M;^Xr)^K4ihh=ngyt?`Y?53&pbM%@PwdLx-GQ%e zT2$-WG`wnMjYGiiqCivRS!CH9UMkDcvUw@mVP1TXC(ux2n3^Wtq0JGMCFYvG`LXp8 z6W4-LJuIPY%1-e(Ryr>kaqZt81aEgwe0@G!8_&mZqbcWmdwF?3#UEoUHW1f<&E*y_}ybGHx5^W92>y#e)@*XHdZJyU-*y5aM%)R3H_6 zVpPTK>R^3^VhON;n)sL^9CKlgi?nwOHUg4W+%KR}+g+l+L-v*7esd%5k-RPO?9F^* zT)rJ{@&FJ+OFoc%;w$U;ax!nDi|Oq5jVquY7GwxBA^Kx#5#50VZhQ^49O)$>)hG!p zUi$^m+kW7sMFzq*z41S!UjXnL`O}7k3IJ=rPHZ4@01*{@R3EEw2xxT`i_?(x=U`6@ zTw5)P%^8NRK?6+jUm=jdU8*IEVHLnw5DZ!V9u>$W!j>@-Bcy(KWVlJg(k_<9}cFLOqE76~yWnc`?JgY5U407~r^>Ma0lyfar!QP{;pH zhCFX51_e-aLqomg(C9!WTL`WcwfFR0m)3+{FoCnMN^hwMfr_en6V6`~vi`PP5lIuF zm~{i%KLtoMqY8HoaXh;auoGh52n!h6^Iz(>vxY3u5p6vUUX}vJ$1Mz$2n0L??u>Z(3 z@8xxQ+Hyg5JWM%}{?GRKA%6@LV2{JDbc_N>85#$m+|PEGzrB;@F)RSMk!5LBa3z&s zIR7kp03k=0owT#}M;w2rNVYW|SJ%G>a}WW02v{)j@5GXyuEOa+ofvDsf>yI2!@u?V zqZXli9M>O_>Nz#opuqS%=iYxup9;6AmfGzi{$>{W`hMxYO#)U`Ok`q^_>f94a{u)q z40J)ktxyDXZV{J!0%l|ay3b!K&+#Ye@J@FU8-&&fOII}2>=2WULqO!&gPD7*Ig+lj z16+`*HNu)SsJAa#oCOLVdxVIF1XnfM^hX?s+{rrLJ?e<@8&LB_50~O6^rU@*z)kZq z;(gaHQHBO1AejZz1TZ5)nM97wX%;7+NY>n6ZGkHh;=N%2*`AWrEp@;2TimDQ z-C%$wT6Ag@8to?!My!f)I{ctsJY%b6Bwd+{+q7mEeW(!4T)cCJEemg2kb3kSruo#D zcDxg{C@)56DfjjR;l970T|=M(29=w+NAxh3WK}zV3F_OSx+%{pY$nk%1&R$kwWEh) zhlnbsN(FrdD&yRU-EGV&AcbncqLUMdq* z{?_oPPpTYs{0dD>4F69Bxb@&uzzN07Ug2p`!91+{*(Ah#w_A^p;~vvb9yX$qQv#rW zjM%DOcVoSkF@ufU)SvI(3aq8>QVfC>YT^spJ>|-R-T>ucXVUgA8*{7{_L~*Wj5nMs z`}L)0KjG^E(_8pPyOcx-FKSf}xA*>VF?(6E2iZS%J&?XHx;7_D)1V{G!XrS#%0qA% zQ9$^FkH@ZEW;d%SI z&R0yCFI2DQr;+AwRs-3H%S;x$biy%pPaUIfNI%8M{7fPAX z+Zzcf6v=>~5wD*@)HkbLZ3f;`Vn}Hx+?rT@_GD7;Vlwb6pKj-d%pz`3(C%kV|4gnz zW{i!G{kDqlA^gMvgXOy8gu}v7Mj;hCNf7b~;VUD$6bA6AT5SqsTRxk$pD1bw^WuS7 zeFShKnFClLjaANv`m6rufTcm2L>kX&HK#dQfPXtuOiRQHY#PwDQo%Rmm`=1ALdDOH zHRpv7x3e-=_S_HpIWmi5STpiz_Te+KJI-cMA*h*zvDQc>On1gi*H0tF?CK=2c1+GUX;%yqO{YpBCEy2H>4z3 z$u}sUYw~ARyDZHru~-%Boqbc)vUTyuUN{^%*=3mAZqAdya~6aP!H)h6Z&^PDj+FnT z=U&FvJoKcUeq2DtcO_#tu~q?;JiLBG#cn;-Y!s9rtUvswgzB8C%mU#xTL7SjYl4pt z-kHOwBVw*I>w}`PE(Ox!&Civy)C^OdDtA)p_nS679qFm9={(IMfbGNUz@)PoklMrV z5-FPGH}`M9Th9?OP8T+1fPmo8uXX%t+W{}O!LDk-K_qy*dOdRB7T+oeB`RHamI0vv zACq+W`U^hAYej+axJ1&oE9nZwgx1>7Y{|Z&RrTZ>BG}?B!4U6;xpt8}u}HMzuFK{n z9Up2^Ts5MI*fPzTw!@72z3Lo#m9lB;v#7+-UcfizhA+2dS_Z25tKfxG_9gA;1p|R5 zHc#smV0(hK@^O?@cl04ppuIoQMsv$-uf4os?5gksK zI^3Rvch1rk5r_UVY?IXytVv|?*K>NoPUV@Vm|D}m^QT;Ow8Ood8K;VQu?L$03dOTX z?ecQup=N5bkYAXdvNyC^Mc4@h6*nI7F9X@2m?y8&R=9}PacY$XN{*^Nt-`(=L%Jq) zlg8OhV%0#&z2+T9jTVLh@lYA3z~c;Y;Bhl(9qu(a90feH!usK++COX+=-${;;a>fa zwA&Co&n!K@2|dS98?ZfXX1&k8%!$^t-!s2Tw5+G$pLa}dU3dozkk5gAonoad=@w+x;efQovsAW*WQP^@NN)80<-QlLt1|e+J7X14+6^2?Y|5`8+vjjL0K=E zC|?+$>J@uXRuVHZZ{hxW10*C5P{lTjBAEVLtQ-jnkqDjx!Mg(3W*I*n<|(A56$t{dF5 z(fi{zJaXJj$dnqsKYvhTw(J9Dkwx%!V|iw*k7EVosK!M%ZqCk>g_V!x@VV=LM1nHv zow5Cuf`4s^r&?0P*(6|`Ehv{AwP|O3AwGW6@%O7~5}j3k#OPmDU62@3tH+_f-+ueB z^G|D7M&<;N}I7?-iP3* zHUP@VnlE$HGgMmna9;-|`t2ZBTI65A^Y(Wo%3W?Ho`G@qiM1rT z7IYrnDy?0z$>DqX&wdQFm4%b44k5mq{L!I4YWnQzDI3^)I`JPaAQznOz;E|YR75wW zM&J@H1iNHJ6p%d9!OymR;Mq=B?a1at;SU9>BH6n~i}lmX`S^z=R!ej0w^QhPAcR@v z%#-F6E!v-IGg~?wKJ-trXxGQc(lxi3`_G?Y~XQDC}1vfmG_C!>oG@qXKp7! zqAxUETjN`l<8%k#=WPUD`m)I;3^~z?)li9C;wl*5{UP*7X zZ*+4&lOcLJM&t(!PHHXQ*jm#9)jGry2c&1KrdlQi;8q$a@6p&~GA*4@);a*tpjT83 zLh>O+yU0$?Z-V2_UKDMJq3O)~vl*a~mtuZlu;lW336=I0^pVkZI0TXb&Zj5q3?4}$wejU&ahCSBvCdmOFd08>ea%tc_bJIyW}!uR zrBkHN5`2OHg{EnuM$1*#thQF78Zx`=cy(q>^$XJdp&~;qPUcLKIT0;d}sfpp)VShAWKb?r;@5fB^ zyd=O$P4S)=$q&ii%SgTg58Qotr~8yreT>=0Nhe%R2MUh8Jw?q|ZhyM>t$|QE?0#Z= zX1(Ee-Z4{-eqSjqMq-IOIRi~UB6;~g2`*xemluOR+sE1Gr@^dxPj4!(~g_{D9md=gVZ1pmG$Z0G=5i%Sh7Mkt-o=C*#BtP9~^7L_lI;*RW{tXmp*hq2p zPEKB(n>t-qcP821q~J5Ga2c#8^v(eICxx|{&jEvLId@|%ywo`Gp~Hks@Y5j9&n1y7 zDPDq*%5R-W;dQMPGt2j*kB1Hy#+f?vxYl5AA3L?m;^cgLAKud+^+L2zoxA;wtDa>F zB}Z+jI19Q%7mjZVC%#mIiw?2YtS zg6XD7KC4I)iYniY4#`rNQ>PH7s4$HV?;lLg?5MuB8sS;c8P)%ItwS^5N-MjL(zi~M zoci?(l{X#1HbypyR$!N+G&+1-rxgOFQAbTDL5waXU18ssXIXtA)JUR-ryqcq*@&=L zxtUCV!OJF;Kk(8SmaG20f;sN}y@@XN{{7RwH02BN7%%^k8|R>#N7c06r&p4lFgi%x zd0jJ!k;leYv~A=-{Gjo%Vs>AyrgJGpHb40q(Be&CQcW0cG;w{AuC_mMLF+0>0Dqt+#~v=CI#@t0E}^A>rWMM z?s?*?kYn}97-LR^vGg-OJ&X#k4SFV-N>QmlZ&0*(ae)?zVQ%L=6(QHI$nPvfVQoZZ zvqo$D)+3$mS&Aq>kSf4p)>vuJSkKEV)FweKqq?{D*-kc z$unYzU_#89sehYmy!scNgdGAT<*E&!2PUBNrea zSXEqFB=)=%C2i8Bc9kryi~;5GdP)SFG~8xUkdReILX`{>#?<~EMdW5Ldhqf4gazpN z+b!rmmY$M7=PC@YDQc(>h^ZWUSJpoZx}MhKYb?`2Qy{B>Ur_}3#x^4En9J7=!pmv5IPXS z705un3Crs6BN!lh!6@w6B*0=H>s6_Eg~T4V=!W7B`Q|JDcNcC3{Al8C^?n<=?|~ z&|zI^>__Z~>7jYOVhKGIg}iO;E|iXQnHDdP|0uo6YDc*%&DyhFJk)z;b3L$rPT+Jg zy*BPq8=`bVNSgX|fd6$cg2(FuZ(uKJ$-;jl%iz%C8(1e6ET|a+T!P%FF4GT((*VHY z_q=6yFVe|b30if&88d|VHt&hoDSUvSJX;iF{Z$Y-kn`C2+N z#_c#q!#E2=5+f9lGf$pqIX~b1_H~sU-cJ+CzI8vhfFj886AJU<^y_jHp#S0$7rB@A zO(}T1jePUu_69Iq2inNN)GtyfJJMgvB&bV~_zJqm#M`od#fBQ&VfG_9Z2O#%Hb{xg zTrcR^D$qy8Uh-a3m=BmU;_yX)`s{FZyXp=3MZ)R94@+uvd>ucUxwKF67u)x$3xOA8oGX(${(|pMF%)ye9yN-EIambm(Jc7#s$VKxi zLLO(RmC8 z$qDekMhBPxYtl#4IV;zy=1aDI#pHpw;g+9ZQ@f4+;v%Pab-Z?{PztJiz7cXSOL3ca z-fW8O%XuDGpF=oO0|px{deRKX>oU2>-ajLo2D@$C&$Tz}h@en-zL(RDWAVIf@$ljm z{4$1*q8q!cE}o`%Ja*QMV=(NHY(5zIz9>>QlG;RtzuEv#W>3G6B3M0<^5)L|de+zq z61yt*ONHyYhfkiLjXKiz@#ij7p?=bX+LU?wwx^V>|4@ILW_R>pSi z%D~q*#zz~Q=TV_0M-*~wg!>sPeXktdzb5~x)x>Q`w)-BTmM^A)R)ZH1V=E3|Rr4&8 zc&OoztDD=C$JyTqzcpPTGP=^g7a($|8aYqXe#oom%h<8|gdg2-^C7`9X6f-pqiH93 zNzc4B#9>7W8n_VL&8dpS!sFCP9GYQ8$vbNv8kFXB_@%-3g~ey1BHYW~mftV(loK_X zbaydiTtofKP3DxB?+~l#RZvjfxrJ{QLp57QZ%oY!($(vW6c>KUr z7FhN#vY_r*JmH5am!P!VD7$e5p(6Q!MK`S&)?TCn#M0)?LHY8Zx1+-=yAs!)@)axf zV(CCHm7%!NE&(RcRJV(p1vm3~TuYsY`KV)+9&@d-K}4mMKw=h|BGS_C%O;r#q6aN1 zmlxZjE~sb@l6EB#`MlPGS~?vWpTcHWd0L7T&Ph53=*^$fzAUTyGOaV2RdcyrTQC80 zG{R+3)NQ}}+1ZuJ&Ls}e*UKoxUga{tcv4kW5vON|JohIXgvr7@WkrzT_xRgH4U2kB z;%GT*1TFDS5R%K8O7^{K?<2g%B1J&Ov_ZrEsMtHL`db#!PrBM=n3<*+D+|?Rx|+FB zM_$zj-)jra!&(1f39}EE%hkqD%z7`E9_XSTm`fqxDzB%FyzgX>ppTNG+5OfZ)>9z) zb8-{Rp{ZS-LpMW@o&+sF9aD<~G3FU2GR*rXvMS1MFa94qbwFi*3~Ak0Esh@$mRG?m zCzMJUYI$Rkajyvn%=WJ)BU;+30#9b?e1{tjRjN`1WTX~AA5C8N6XZLJU_;{CupyMz zj~f=XE#IY&3ka^X`7@z!HsXEJ;fWlz!bc1_ttx|W%nEQ`|JpI~MZw1c?#b9rSC=qP_;{HX4&s_`9Y$?qt3|JOh%8WlQ+?r!mhe=9EOHY}Y z#ZIIWlMXb^yta?ISR#;sQfgvw;)$^H+T(Z3XLek}f^E#E64fSarZa89{H~6u(U>hs z*Z(crMv7JD*qNt-O72rdaWD8MoHl)ljA2_NVU0=Cd(cl&pxTy$DzVc>UKyjxCn|k7 z@nFu~Gn`Gb=iD*$wHoNU7feoYt~^qpPrnoFU-xpO#Hm4`>-ObHPPp>*7GcG}?jJ>S z$O`9!i#YNi?e1ym&s_;f|17pAqz7MmHEt|lBdaZe$DT%&e+Urg0g1+RftPz89|K;f zeXbhb6i9*b_VH-Am0D7$P8uF5(lHmjEDtLAcACiG)PSS8wdeA_ z<^I*;Jq=J^@5sL*Uu4F@zt6P6Wj|J^qgviPUIIBa(asH;k=Hbn`_S%UR898!R!iK@ z2Ti8bOVI9f=W%;G4FiIQmZ6p#dLMhQyYXssgi@x`Jc8jOD_$BcMnPIb(GX2Oe=Ydo zjk>lpvl(XUtaud=Pei~^@6LUpMyU}k(*oa{tHCG4UMn#*V%;02X}oo|{8`hzM&kA> zE81J95NyyjsP`40^cwfKLKV`Jq2B#KJ1P>@r3v~9(A;&hT@S`I)px&5ode)Gv5RC| z1*BgOog`}F(PzLE2%gp`IOZOLVQ#28(gsZ5K2K2}CBgSL7ycyQF;1>2`E?+er4}8; zO|cQaqK*r)t4)Ky7q~*NJ+Cy6#S@ zASa-y<}l`D%MoA>5Bb>E1EW*BY|xZ^g`3?5=(GmGQfon1*_Upm&(PS@yig&ac{9VR zD(2#VfA$i$3}4|hIcyUAVs;XRniapW{$b*Jw3!d8x@}8VTY+p#*Y2-*3|ooV){$Kf zOPVOJp!lmFF?nsL*XW^2Aw5Z&#vAIQREe6n;Y~4lH#eJtn)GqRH3PBqUkO*DjDVRH zub?K8Icc{ApJ!6GJy=YF0H*q%oo`nw`GfqL?l~Mirw_<&l~=en2x%UiiB!9J(X9P`I*S0Hn7{NzC5^YYhxa`}#VTELojRWaZtzk=s{! z%f!Bx&*ZiT+S~?+sM#2G4NCok-A?{Es?5j+9am=K)<6DxB=F5MA^ro9jS!KI4q5(v zGQ#tF^nPlECrl*#1xj9ty4sfe^gPFJXSDtrM%7YxGL{_z0#Z>%LR6KZRkS$$Z{6@9 zm*rfRC}52fm9i1Z1h^J|4LtqR)+Yc4;&8YOzT&?8HfK;tWkW3N>@S+gA&y&*`0OG7 zqbYBbO=j}|qw{U=jN=AZm5;;4sO1y0ngGUsk?qSWq-#Ud#2S@U`lR!U zv_&yR$$6@Tl5<^AKJ>k398Moj3LUGv1(v(JoPeMxu{DyeJMU281AI~6$Fe3KU?1Y+ z{u&MQ{dwGv-S*2Kx%L&#-ER{1ISnY9PS4`^J>W+^zg@e26Z!7Lr+@tOL*H-L`3?Wz z`apu{fu~>Uy-2vOA^LaU-RlA&-iG=SWdGUr_itP+QjfhE5p@6bkEhdU&@a!})&u@I zvY+kA9$y!@7jyT2j9|G6<5=Mxf<`219)j-qUp4=4y`VpO2zu?SpU6sJeH}qJSx(oI zePulnw13wS`C785AglGXj}wjPe=PF%sZe0w^+y)EQVPHWZr*qA`m;V~k--b%bVJb1 zQ0*VG+uw2#c{`G;)Uqu9!=CSGhr;s`@U7V;#yS{DFe2qGg3;ibe-qPNEXT>pQ3 z_^S1kiUjoNAK(6*7M{RUs)4)R8pm^qsF+tM+~rioZh4Xj+(V=i?;qXtBth@*3iH;L zzPf>H*WWPy8-eDZTgco(xmxU>(g3YWMn)j+hkxqzvRKn&%0C;BzgzK{EI^s3 zTz`-_?p%0fC$(97)Q_E=?)*ivs8m1{BYz*4k6mxo=;HOJQAXzqc+o)#HIB*B?PQj}Y(QrU?!b}+05 z-!rhs9@m^&g2%DXf`kwAKgro%Xy9^#|ESv+nQ?j3?VlOvFLjWbPU~6<>s!mesB~t@ zM)%J?Q+~aLWZmEE!N6YsIXv=*Iz6O*2->au@h6jzo8cJy{g({qbvB(z1v0xv!L}2I zj=ck+)dlAkk=_$w#Kf6g7UN%f@oF8G1D6^&?8GF}<+3xd;k&zs4+sv4YUB8-UNwhh zD;SA*^}{Rg(*U8m zj`=YtZGXUmh~NiDi@-f)6}~IQmlwBEUe4}a>soE=@E9dN6qY}69ypU``f{;A2%hP7 z*~7<+onPV4?5_H>T8c4>w25{2c0fdG9m5t~Iyo=1FJZYk+;?2t+=WFp59Hs~LTIoR zYX^sR=q2c_apwuL3<~P!j|CU=>{nJ2XR5oD)T#YY5`EdHRPzhef2svAwii^PCz>by z$?Ge}xt*|3*SX%{D)aSZ(y(GT}R(#H20cwh2Qa(DjpwuXn!cHcc8!C zPlzAM$N<{NPB&)TJmYXF-}6aYbvM_#zr|5OU5_R_IGDo*qb7Vi;WOMj{a7jCvJE&> zLQ-C6H!(P=egW7FP$L=ZL+skKx~cltruWDq#Cc0?PQBT4K#XcI>T))PixDqPE-N~u zy4*UC=GE!^!E^RJ^sKW=%ii-_cPjT}?5#>4&t{cm4-F7@FMaXQ zL~^H)PLGTfGF<9;_A1JJOg_12?zGuV{v>kMvydcWgBml4d8H>rt;BYu0gAk@?{TiD7}!hMF>l(+>d>e&&x5vUB2=S20|Lpikh zG?CqW(LKgwWn6=-=P%Rq2Ow7RvKyP-Wl*?|Uc#N%888PdhI-vBG%~kw| zt!E+y%oVZl4`U_|%3|J7zhC7a*h>Dpmo-13__ijtFh&2nnXpc~cxvMJ!2%&~)<>HY zOTWt6)x{U$3q^^i%e6o5o(uB8_jme8*(0u4_u9nd06f~PUz?@`TT|OfX34$SlPZW% zHdqVVw<}1RDxz~J9|~LN%l+8iA-f(|SP9zARN&$NuonsVm$(r^bhHP?9SsMGE4r@z znz2e7Rd>mF`%jyqj&l7o&OHs-0R`W$3w^wff<#*Yr;h&Zoza9VMMvTF7AWDMlRy?R zoG;zt|SgTMyq z(@hoc@z4mDs#A90LdhRzC3N#uxRTAl+>5PdqiSPJN>$S5Gn*Oj2c9US3T2$*`2}{$ zWD+Z_(O+(rX04;R-U1nim|T_gP&dQdWI?hUnxp&-5J8orxMiCPT$ep`vg6!-W6F+l z9@VPa(}56e_}QMUp4#@C@pTc}gPrRVgZoYcyUj~FrD)UQ%pX;5L>%oSjc>%W+8x@J zDHx3#C-p3{2`HW^?sbznLagI|Sh=q0cGbUW>dP<&!KbyBfMJ!>h=tMvD}Dxz)&{7@$v76ODb8aPF|1 za)lHutWGae@#n-|`&>X!ubrzLg+41YHebof_o^V|B-#lY)W3iVV{$$X=dFP4dauS* zR?6jQO+zx)mo0|*mbnqY5Dmzb;t2c%OuO_M9R1z?xHeti-z zH@sFm>V@VD8$99%_tujJ&Pc(P56mh}%;^~49?q$#hR%s>#LKyHzTR3**aRPEqHV{g zviCwM(ioj4#cFms9etC2Mk-ygf5?8$l=;*4egfV4aQpnubnV3wcj0W2%oa8CPz`T1 zr0)QlI06+Nw-{}mGoOum^}C;Jnu*%e!YoB3Ve2G3pI8W1&pVZ>^pD5`tFp7{9xvpI zY-G4NkMOFsUHdbs&`>N>zGlivtzXdAm&vUK%}bOg)kWeD*G|`v5}-KmC5(JF7_Wx7 zrgJ>3hILZKy>=zN|CMg`F*n;tjQFi5cwhZZ^=A&uj8%H3JPd7^hf9o;E}&KIuJ zVrT`6V`Dj44JYn<1+G_1*We7j#PgWOok5Rvn%87VIm;+Z5+mcXTv6cgdHCX_Xzz1u zflWLYI|b1Vx(9SYJ|ut>#SykDAP)LH!&;sdb^ku*>2qrP;iY1dMS&aSkO;TW0V&-&3eEh7&v+*LVR(Src{Du$KvcY&qc2lsD z>Dj`HCbrfe%|t}T>)GBUFvtvM9VwmD=0d(RWWC;o%o(@iT9hY=9se}u(PFx~w9OLu zL-`=FH>3ZQb?Apd=Yjy5(2B!u40eX;kB49U_8X*m3K>`}ty}S_#ub;dDx2oXts45# zwiz@_Uyp{cMD1+{7JikS;)RC)*o4|gKxF9$;qN6Xp~d1wG4pG|l5cnGpNhPl<*on@Mp-M^j{EM1ebS}f^^*5*i5;Tnh~7?jZy zoxAU7Md^n$p3I`|5!dQ7T0EZ5pQpN;v*{z9R?yly@56_o%sgR1!sbE7)~4P}W)$+2 zE+hQ@S9)F+rP)xNx0)}P8uC?au_wzjSi^h|eZ(i^QH6A1)~q90a8PG&)t!7~jWCa% z;bV{(o1IQ&n!iLkd2era3u@ARXc5>w`suk zjO3UdpYhccLLd_dbLnI2`860fMJUX+>~E0YT>em*I-fVyuMvHY8D5jSrkmFDGk1u# z29m1Y)IYNkH)_<+!Z}SZ8WueU-s}{QQ{hvol2hYl(mwbStr7fcs|1J5WNle2=6ryncAlEf^->ThvQ*I zFZ$K}_*W~f!cJJUBF#aREl-PIJb@5^-6$(wul@Flb)=rBGFFu%n_RI`(a~$QBSdbk zU`AwRF+62=Z~*6R{>K!(@q86~c}xt+6isXzrMFK+bmR?EnA2tqgJ{%YT=KEhn3H1Z zduIxkDaq9?nz+)?*XQ0At%sq3a(k2_ZzHtm6jh17KEWimPEzB4t92*jWlfM*D?5Yp z$+EX0cuF5&6dl^QlTXH(ccRVvxDNyDzdg z+H2jM&xQluv@cP1u&UoRN?O%X^o=6jl*YzEX?-C zk5up`a+lj5vScT>HDR-q)_u3lo_&wgx{%yZT5xh)Q+$PYF+KGB>d8cCa_-2t_~|q` z1J>j8uD=G{P_RU=FL>`9Nu$b^VoE-ZO)$jY<0CfT(-t;0#%VgT8L@>M580;&8gTN^n5qwlNv^AVf*T9PRieJVNlvXW7MkKX%LZzt02ZV5tpWsJ8+8SD*hJdp7;|} zF4b3=o?}{fq;|9KSJH~~q%{|o5>SkV_2K>HL;gIPJCFDQ&}8droe}Md zbad9F=FWsSvn)_L+pt_5T!hUfK5LgvJ4|4b^{>Yy zmOQYEZeJ?SfNSBh?yky$mG^zB*)<<@^#{Aj zdH6~^gk@rN#+2n8sOw8V0k%&BXvjkDz4KVm)aKwHs0yc>VOqt(aYD|y!lZM>nL*u$ z`f5UmIGvT~owpRYagJGmBUtq?!s^!%UuD=qemyc|cd^jzl<;v1Ys)ArKr4rY zam^Fm(fBjtw6D=l8%FJ{DQT}3Ac2auK;Y1D61*48eI(*E(OUMk{Y}j(cKFzJO8;=V zY`w=H%u4vCYDn8m_2eJ`)39$n~*L8GI9bB z^{OR9uJXb5Z*kpv;_VTu7{gR|PBbAM!K&2$f|X41s4z!gUZJ8w=)=+t_9Sqx2wKx9 z|144739%i%(S_8*qs0$)^)+nqxYe$X=O5DX%iz1B`v+y>L`Kut(-eome`V|rv5Nu$ z%mKVLd-r@Uc-D|ey3B+}vyJ{y$*%U9?bf~#pAqO~$puNbz=?y2ya3KOy*PCspxLdyu@=BrB*kGQ=n zb6jh-qjvVWfv_N7t&~0C1R@fCmdkC)a*e~%TG{lykE2AcW9e5wn8i7#s~4N~YE?_m zb^KL^Fgk7XWE5%+@*cjhGx#e~e-A|J22y`T>NFVV(r4|t>-8xyH|BQC4&JjS`J$uq z>rsWeSinTXAUfw5}9GAqhI%Z~bu zHtt>&g^VmtMSU*#q)eRP{H|8@6~+olWAJRv-8k=OE*Vps zK9;2rivG^%=T5RX7pB-5EQr^wO!B&@+R3zI>9hFIENbqA;U%~H4JjLL;JnS5@WHGKy4ZhfBu_H7l_@ch2mc2w!1m+@jh zaR+MvokM{?kfcrQ-Fw1(u}A6IA^hPCi6J6-+4xpr}|P12KOe8Zx(VT>`Z z)$D9>&u+h|o4osR|Bhv$yW$7_X<4#3soYEuC~r+f=2kriaZ*!AYc^X3;ER$!d93mF zB1PTak~_H=YVpu~_4@g2%w{1c`Yt8iv{jK!(+XxSDx`a}yoO>zhR?NjYT*)xv|Q?L z(M@jaogKsKzJrqgF_)}|TETWzz~n_LG+SQXjRuuoXYRR9nYeSVGs#ZJG-;Kq?8U(L z?l}bx4w{Vf*KF;H=i?W&eyY#CD4|e`Ar-9SuWNO}AA%}nKFjTWmA-iMalNnQs|(q1 ziH@?hS*-Fnw`8+&pFIBTcwvUG5}yB}oLg|`%v}Ke;IjGxzOiC2h`Yt%xcOXlH#V|< zf6nRUT}3>9p6C%L%|m6wE$kv_0&0$bb5!baC*`4K(5&VeejG?Xa>YmG>ccl@Q8i&IOE(Q{@kwzM$U3HP*upSRh%9*K;bWy0R z^scUhZj4w?W|SQksSdpbI;OBp8&>QLE(MU<=O#ZDrhVT|1EEojR&NO+vaL853jix0 zd2)J|5u3x(^|KaIls$yB(h-n;AHg=$<15#W&i zu}`0B3tZ+>D=c(<$Nd>o1ld@)Rc4>ru<)v4E(+c>T(_3mE~7AaTv(9m{GJ-j-Px)1 zBNcQh+#JU`9}#{2Q|FMohygBZZx9ZBU5Nsx&y)K(@O_1+RLnB?`L+SP(50h?P%70L zx|8+Gh#?*79b1k&dWP0R4Xd+}sj|)4m=*u|-c1UC+B%_<`8n42wm=?UI*9W|J~VlR zJu1>0Z*De8!!C;!$ahcZ3+MK^BfVC_`3%*LGxH4xW4};iL5l}}217TIhk^j2%j_aM zmE>n1Zyr7bWqSzcHq(S}uFe2D5i13O)~uTy4$M^S%;ih*Ll`D*)vc{9@k8FgWAdv@ z_c7GSe3Z;NrKl7Tq3-VP78Z7NYg_-!6aTqcw2y9?9UdOe)~W-qRZ|+zsX`qzraN5_8Nf|sy?l+z-Dx{Wy>;ftwJJa? z65Kt(!GV`klWw9#`RADaoRuM~n>iO(zGkgsPQmuf7wwk@A*!gih!$QE!zDu+y-7%$ zngT~^hkgbYzb*#gIzKISMmbegpPS7P5Zu52fQX0~Kc~JO@NAt12&Eij!!@1IKDDt) zxVnw&-!(F!s_#D}eLIWnAJ*qjThRCM`r6UamSl!96?0Cf6N(Vt(XEX6tDlwSzI?VT zg0^-RVxG}vB8BDft{YXzD6HHGan*#oc6%v-(f}^}p!}aZ<%eBmp|~8sWC2Slq|9z%fKo1dNF$9A zp)xZ8>E6BMwi1Dm%7jp6YeJZ%RI?TMY7d-6cxj}cQ!&$DqrBR~esTujmo6XKwBn@` z^>t;w;`bj-+Zqj&@*(Rx<7M_qIjn9ApVu4sNDGqQ%2a0ALUUw}OjWDtDmt+OqVk|i zlOh)r_bBWsn_MK^*O%+ePsw8EaA}Lg>SF|(Df$)=_I3M^bf1IiKHuJ@B8ei;bt&NN5+Zs6 zTH`5vGtM7A^?qlRTwNMu9pWb#GSUOsH#xyeQbi5dG_6{2Y zNa_9eJcatBYXPSuZ&-;Po0Kc)?SvYBldUcW!Iov2yoIytidE?LwG)!@$$S5%nfPJ`M{HXJ@X z3KgeU+3n0b=i_*jPd_0=R`sSC{|ynEExu|cqgp@hAQmAQrX>LsGRTs>AY~M~S|J}Y zodaNn+8rUjah#3)!lhJ!lNX!y!8rbU6L)Gsn>+WGNoDA?YG2J#<~2{HJDy<%a5b(( z@=)~{`Xz9x27VYGvP97E&O4yt*gHb*5AJ7c$K)DyLw%D)Gbmk*R(f&5$sRPfxT&Qg;i{1Nc|(@A3hRan_;vSjhpMW+_c&AE+ANzzn%{p-(ccF)=u zHPU*PfnA_puVXo$8v+@h)#5202u#V+g(VwGgnl$BMCy;pId;M3PNWkyDP>ssXX}4T z!|eeO+YVV#4eZUt>bQnXxuq_&3s82}3EdH0NEQ zap*H^GxfusAZub1EldI!aXqccCoSq6!qJ$nd-=-|V)$Yw>1&6c1L9akDPZCS5 z;H&+rwi!@b_=M2^z{a^~Gv=W=75oAtXO@;h0Q(hozZPLak_pPuYetCCg(wr8w5k(} zvBv2G$2r-w^Ww11Nm1JUT0KYR{+aD&GqRMNqpoIc&W22=4IbJDtZJEbv}4;K(+xsN=DFJcxU~Y6wJ{jUZ40Xp{2o5P_QNavvS?5L z_yh%bA!H#P7S3gePvjCJl{UKcP5;J?s?n@+{N4Ee)h#Q496bLo<*S#K!1sVMToLp5 zczLO-b?iM*BJ!`=D2PPmLJ*q-ev3{1MOcAPxuPI{wN=Xc=Gy;78#(FjL03EIrwc%B z;eSCjE~{Yg0fo&r2QBTK!$0dNe)T*bzNPXALyEuJFv@~!PIvSJAN{71LX1NeE)7iFBNsD_UUm1?JRWD99&+eWnxI|HKsGbxeQ^n!Y`T8rc*=;=P#W$C}A zbdmcj>Wmr{2Zy7xHZ~g7j)zL%+1iE%ph2M72QhdryLV3A|v#m0)ImEc8!_{xBSlxyL%K@zXnb}OucpQ8mc3fNc zmsR=IV7Wa2t!~ziPfptHF9S(o{cI&m-z<36Pc)A%>F{A(fO_L8Nt2);u*TKw92^`> zOntN4|4n!eZf=0sfyeQQ@IEYXl6aA&P#l4%LF8RREy5zL!XhFf!ouD@E(o)?Vj3nc zX1V(@#6YIksaV{2Kc!3{M1UcbkeIOx?*TFXKH_CBd-_tZt_~|?LXD61w$VIBqy7#e z5#5cpbUTT5p61W5j3FVPXPK#DKy(rU+4DMi)7~L$bOKdqVsgTDHvkOX@!^*|6-3l{ zSJ5X7$Vn487Ytb&NdLWE)QAf7+T0k+r_1Wt*5PXV_)wN+r-}GtXJ8Z@92hiV$jfcO z^6Ij$@G_73tL-7MJ&PH41|%5K|AH7;go+jxGW@z+3v7`Pb@c3gcJ}M{^$mD!GJYUG`#79P)j)PRSb`gLXOQ`R!B-Q{oe)#^@zn)Axirn%0Bx7Y#2bb(E%(2 z7+qkHerxNB2_%rSIz%>2po#lAg0;hwdI;>{!0o!r9Svv^7GO?-))#bjQ%Onx2U5UJ zNI06WRXaRtXKwNFV{u!&)R*XDeBO%~5-)TiK;?`x=OPdtiD8Ys5@^6K@gxZx$r(n3 zPbl0K%^aML>FS=Dal3F$ZZn}zdX*vSmA``M{7@3iFdAUKp#v1IxiA6}?IJV8hM2Si+d7Tr{>Mbh)oJWAFD91UF=$6+`Oe2uzQy`P{ z1>{g}fe5)DO1caiz7YLm-Ef4NqAYJ~c+lxRX3D9bii@2hCuZ_gb$D$|gIVx%-j zDgNmR50`hxlkHF3a16+;VesWu}dHij;K<&Xj zojd)G2Zz|tjveM;ksZ}h^|rai(Wdd^u&~B)G%aXT%olCY>jORJP0Cqh#`|bO6KZIF zqH|o}4Yg45betMzGS`px0aL?V{7&3ezOrP74-v@WOCcZzBOZ_{Buo5A*7vCzMn2uMmdoI+feg`ITVNVAD zK&Qew)gfd1DLw&SB=@{UuGVH*kME$v3?Cr{F{I6O@!mOBrv#r%wY_r#{#?zwg@UCc z*vWJ|uoykTT8xQU0P4U=MULUW&!c0I=)zv-;Hcp5)tfuLH!?#?*%M!VUy`*QcT6&* ztqlS7mGuB9;e;X!r9?kk<>KZ?qX==v@g&I}T23`>&f}9dlM;F$W)c;mGV~(U%M(J6 zA=I&5x07j30H?X<=_#|_uEIxaNQPkH-bY?ivjqO0-0pfkQh@2DiEvy}ae8Sbb~YAi z+F+!Hi-A;Km?;MUQ_z`SiVeU1n8H9o)LxaHvt2{^Fi9Ep{qfXuySg&|u{BL9=0~~2 zKMQt#iIP6)Vt&IXiuP)kwYl0rN1%4Q_YatMIRHn04N}HU~^8n%aB~niwg4do5`a0-=qbb zqL>pUm}iLk&i^dc`_;(|z55Qk4Y@}Jm)r~!a+exyicX^p;95kh(L!Ul zj9V>UW(dB|kn?2sR_eP{lhfQ)R6>K}JaD);y&CMOohs9h{s_B;G6t}@X7v{y^DUIT zyKidUlg<$2k~j6mzGI7ksEz{dXYR^8SK}We$k)VPCq_%*9b<_pu9jd9QJ^W+UTDzP z5@p6+gC1;R&v{WG3(4iAzNwv8UZq=RPav+fUS^oQ(TsZw>E=+k=xuZ+=BUHS_2Ls_u*|L}X2A>2$T1cqohUElK4)(0%hG zUHC=S05N?(^%`c2`Bwq&uaCGne&Z{@CKKn}xEeXQ8x0`1K+$MP7ze`RxgOs(?ut9Wc1>=ZGmb$q=57Jp_s&?)eW1@+%>V zSeddfhsyk#(h}i;XQXcd4bfR4*RM+7AFiy&)@tnDiXqpF64UBL_8u`@B-no%-&42pGQe< zi}xm3AIXa|Pk8QtAk+co2IcSNPck##?$L8}azZ*g7xDIKZEXRv1&ku=5Kz~+rYnf$4FyO`T7l6qBET{g5B-|Y6c0Oat@W6|cTt99iCavmUk0bY>K99%oHMV$S2p`{H1k6mP>k^=9P zj~2{+UB_SDCMLqe1Tv*PRWDN^VG&{Pt=a8*r(*y&0Wzs-+f6k9o&l1Mgm!?%-9?-~ zzkmAjW?g3W`9!JlB_5E(VTfz%&&q=#8vFUTqcpo}kw0z>li{)2VCDfwpzY{_@ii3nHx zaS4ONfJGNV8^86h?mu`#yp{d%=NkO_;kz@snvwyY{PPk9p4S}92uRGY-@G}J*-O**;ldAA1AZUPM=wD=`ET9!5jug&lg#UeRu2mdtn2A06X11pM|?R&GnXXStxhTr>o(%r&iHL z3SBe+M)FvG@zVv28iH`gsp6b05W_jxxVZS(lte|uQ-G>~TpyB^o`kq13WSK;)Av+6 zOXI~r4a!Sa%-%~R0N-D)wY9wFqAC$Vk_;TB3j9=GfA$r?qX8?l2NcdQuP3*~J~3X- zh?(|6w7B*crz_i2b-Wx*vTc_D-JBjUKsxX5#XXOceU%aJ92Pgl93>aXLQBGA`8N8U zpxYR@N4S8^5(oh*;sAd#TC>v_Us{5X08VxL9rt8S2I5$nnOR<54&Ry%1!`w5Uj>|m z7I1^h$-2zp)ggoSom&^DN$|>?;{9s#`daJrBfx9uyO2w#9(JEIy2I1YR8YMr#z8+r6&`F7+6GnD+4jtjSaitiork zXRksv{{)eMmnFYc!Vl{QHa7tv9w;e86C$Fx2ao~McG7{edOc_52$veTi(~KIGv64J zm&AJrk`m-Dv~R(mmW_fJqC0Pbzuo@BV??}%s4$m_@mtjG(EkEE6oJO^e7KC5_p)5t z5Y@DPs`-{fEDMP!iXxEUPS&N-ZA(3R)QhM(U>~?Y2S_t;2^Ak9kmRRS)Ju>pc)}+G z22IFxWR8K3W+ya+jY9gAc<;pv5iodN%ml0Bqjb*YZ zId*FX6%)H<^VHNUe?{BBYR&&2R*#S;xQni^DcV$FrrsJ^rr$V;qOtxgBv6Ze@xswIx=$ue=1+^F2vdR&9_2D7tO zR?l)>MhQ&lwgze5tcBHi0Cx}((Qb-N^A&3&EywNT>>Ic|R1nL?OH6gv^e z;7knMSbaYP%&%F|ml9>0U}CmTN6@SHGiennoJ^A|zUxm1U0ZgcjGY{8o?hWU9O29*LNoF zV4^$mGMgG7ttQV6)X+AE$NSIL@^SZZRZa%;MCz!_G6o!WyF$PPbkr6!5wWPRBWyX3 z;jmeq7^k|iQZljK#&i1MBEQbYBs1QFs7QOka`q%H1+|NE8C)kd0Cc0c?KD82d2Ff` zLN@@SRUc4!Av6(WDy?NPXU@NN+pYiwOo*mrHgnuw8Q>;K0pC0Q5-1ShiAz-}Vd30= zR(>bRbj_D0(Tv+pXM5SeBkBAqFyW~w5FIUR0!ZqD+p=~Rd)sfQr1fho=cpcsm3-v@ zj@q1526;DD>D(#OU}0hM-g$1K*1*FzokK~hLVuF7J^=m1+haR{p1zQ|gb+e**NM$7 z(|o6;bHqwdkb{C3fI=J5k<^>+XK)~E)i|vqF}CV@(~wSMH-!Rh4xFY9qcOlPe&Td6 zYS#D;na4sxQXSzd3;<~w!7C9moID+$q-EdWS2milJj+|XE>D>^!LUt?rncC%mGpVm zKV|C@GdjV@*t4JWZhE$eW4M$&5?~INGy%bfj6VOxP1hqkA4@fFRVgb)V&ToV=MgTm%R3jZLcf=(!jY30ugE z`?1>`=RTzpRtpmWT4RJX+K?*8HMZ^S&@)Ayju^PTTdVb1n}Li?xw~g&V4FUqdj(IZ;qeIiQdlnY4IR+dP#| zY6MEF6}S?Sh1B>b&KlJ?Ufj) z-Z^%OxBbK0JvHPDOdNED2st9*LdFKocf4gcMc7-<#AQ!{WZVzZ1f+3h%J&(oQ_{bF zpciIGNg=)pTi>wen#&Jpy~g*>EDoFE_F4&IoeJaL`g+e8e$!i%~$R#|E@ML7h?!2yvhtr#`6 zOW;T^!I@_-t!A4nHxun0Xo!QoNWg@*S2}We`UoTm);LZlKcb}c!!5%3vAvOEBu)H2 zL^QAlNXpPfeK5pibl9n2UVFZYDTRun+~+^iBQ8mdvbB467T0Sq^n)i!;YFml5So@y zvS@moQof0o@LLKm&udyk!yY|J)lFsE&+;@#ZOh_$s~{`J#!<~hwIA;<+4((j$;d3; zoXYe$Hg$3ka;*|Op}R6m%(A@12~mZ#c6h4C``p@a)-HOhbUNZShVa^fU0-s(QL;#__;Er_r+Ha2wK{6-$=tdX#%c;JIXa7k zc$-FB>VT(>(PW7oH`(L%j&Dhf)YVzf&V@ljeS8uo^aEkC5V`ZWaVI3aRxFIM9~Ajpn~9 zS{teZYNPqQNU(o5YGT0YmN4Yr>)bJ{m{Ky+;Oh|8K|=*L{wjnCiG*`GuKhp(xHLpv z1Y}ljxL?PzsmR8Y)GbREp|)dXa+)35NLhA~|GUbX{pZlJ>^iv=d57?NtAMnW-$-S< zcNOb46vR@EDGWY=@F-&E-+}39HMnUXJCSZMhw-a?+_P&{^GJBzaV7~=20n0A=y)e5 zpJ$TZBA(t@LONTXnoktB6jm_q9*C4u+9QrSd%8AB>;6@_rqh4cRj$+r@`kBS#>wKr zl^jJDz#d+TI;DRzyBj2yE;n~w3Baz2da^{{?zIBrygw+~xf7S@+CT(HgUnDOMX@gw4QpO*y{swL8EWr!eD z%x?xzCdiR+HW{n|qM*@_baz}}Shdr&Xs1+nv+y;}X7}k~$DNy#Rtl?gpbbkNw9; zUSn^qUCC~j0x)ijOtR>~`c6#t26gDR_MDoSDg86;cPg?}5o(vz*83gsv3ET+xX@0C z*ct4Um2Mw$!Zs8k5@|pQLP4uph2#UY(#0UIhxn_MY?c=en7YR=rSF25*9(0}1|R0l zFx-j&9BVQ0P3~GseKaroF#fUA*S|UKjxAfT2Z@`iIb4_8y0gS0l<{Ib>)wKxIvv~? zzPi({!-iv)DwnPm5LJ6JF}dx6M^5lkOJ$Rx-wTIMb=O?|m@u%Ln_2=jHH`NvQ13n4 zcS|PL%!a*f$X8_g#*EVAl5)8l*ws=V(Q{ll)-&TZYh*q{JUjC!LR<(-mN{l5xUB9S z{+iZ7W0`JV^Q?QYt4rP47bno8&#vr{XEv_R(5ivo&K7n1hP9aHs>EUzk?l(y75AO^ zY55%Hs^>htCD-S=C2$rG`A}_*#5PmY`JWOX!xx+ zbn3)(j;CaH(ZSQaW9Mlb4E^g_i{vPfdXc8E}H3YK8j>229K9MWg~- z1%yc^0k&`k+e-54RI>M!VPbcXR(e(MqT?u6=vXiN!n30 zylojzb=zwN`Ad3~I9A^@>u*``@{j7mL6wecJhr%3mKelxuB8^<**kMaR`y}w?Y#6O83XVq$xy5HSY6+?{|;+E}{27(bZjIApqX( zt#hfGsGBA9+sl*#0yw3*u;&?v#VpM)VZb>fIeE$i9>8{_Fp-9>ovzP_tD8PY=j~5@ za^5S{jc2^BQv;g)K=#!>47l*?up_b+`{nBe>qJaW4SQju(;gXGn?Ia!#w;CI_I5st z+o^th+@&C1R3e#i=n;kMEpv2Ls_Le8c@&~rmTWWv2<;qK2bYhu<#&4mx-)?Ur%3K$ z=(c@--QX)U)Ml>b`NL`%)^eXU0g=f{tm+Y;IN?Q^c>5if=17nJ(c;x3*kdYH%xi1G5o{gUks{oZ6E06I}X!Yi{m|IR5eDui}29 zdMj6udTw|)z&dowrg=MsrwnI(Y8-8pC96CY4~#rc_CMU1v2NxhFl_}pWYOAV{w?|Z z>FeImf1|eEAeo#hT|At?Prd8!TxcwGO1|ts z6-q-52SVfytE4?~?yC~#r#HTh?aA1t7YWM<6(f-$Lr>Y736Q#t24D}mhsr*Bj7!O> znnq#ZpXPcjX1TK86Esp!FtxYwcCGSCPx9tlsqm0$loH`pxi^3U%Bp^2*``6{Bs(*e25PVUCYl&N10C)ARqM1jMVb3DD zZMCfpfPK|nT)^?1#V(U&f%xooH7}dI!T+TaH0{%RLA~n+9GZkiPajoxQMF(WskS}* z3Y6Lcq!&O(MfS$pGB8ieEIYDFF7?pC3);_w*bcT@Htyo_?kGf3TY| z_c$PR>FG-V_vqy0q-?NaH|y%Oa2exHl*m zHzfopj0b)~=3XMEW4;xQH*t_nfqCPlcKh~;8)=6oJOF}n4_<7?C}Bll;GT6TT^e%0 z$|9oc;l(<=e=`sLEqKa z9@)R*hGO$8Vzl5LG-gv1XaY8v&st#v}_LpwD7j?i5`YAetkBt%Z( zrua3n5KZ=$;3lo3*87&Mg4C}J?fG%@@W!%@oP^%O}IAn`hMKk}NX~Icf{0`V z$qYkAa?UgG_PDouKj$1c-{0?D>;2=Jg?IPeU0q#uT~*b6`)sdN78=#(adFo5tpyqI zr)3}ev&#Fa&FtiA_tHETsG~UJNK*}#v9QnnW=qIcr2~6+4?Fw)OWiwbR$Q_X)gnxr zzu;>%rrp=i8ZfKBL&NbQc!dM9R=~^Xk(!y7Y(z&0UicgGr93x-)0_Pw_1Iq@?#lwh z75?2u6w@XoWNYjA{Cvk*UyW$oz^)e)@;;mnf7}xy1MOsHaz5V8hn<@70sDX~v2oBC zvV$HTF3nOlZt!wF-d!oKH+ZD>NNwi1?R?<%6km{UuY$lkEE0BjS zFDB0TRph?Zn|&(=dG6$UyxWx+pUjwW#sLm4!u<}*RLDUKDKa&=&f~ZiWf3tzhkD{m zaiVcjaScxq8f|=Ub8ctpR??k>Gwc}GjVkxG@p%HO6cJw(YPgiI(AYRBm1_10k!(b~ zk54S@TPIe)+^I7#QNjXEAs$|b`#&-)l~d)QwnzNm5>`Z*ri3KUu!N0O*sH#qZOwgs zk0WX0B&qs!qHtujWub3vKJ+j}WwV`bRvdZ!;i*2(hZCPO*6O!lG6}s^$2PJ=vS-~i z!Eqnh&!u@WFZp-Gw+6)<^t!(ro!&DCBY1K=`s)s<98~74Pnq~T;mSY_#7=(bzl43O zx^$TIAb#XDh5_uU_KkPM3yM|-erd4avlS-p1?=0-7F8X+4AhqI`^iovWAe%Is?d8}(mT_{@PubuVxzSy2vNCi73Q@HfrGuZ-uySf9w8-OM*$v+h{h;mu1A&V0<5)(61wDg=C;U$3cE5<>AZvBvg8u=r}R0 zNZ-Q&zr!p)d4|F$L#3&CCv*og8MR1tlFgcXFYACcFFgr)9G+R`5xqVn=Q^X}TPp9A zr_y8J0T;*lB|xnxap4X3>{M1X+pW}y@Izb}#lf_#NHaxqf zq9-g@As-{Rz^175@-Kh+Z%E~@Ep_caHB)aQ&`1f{h-5k`D7j?3!!xzp{jzHF@)DHk zZI0ZnY*asG8nR&RFU=N0`?f;{`V8cYof4k5?dC+$CTB3h%a)R2zW?N!vtT{F_Q)R% zeD5Cxbi2UkJ(AIeKOh^Sw3>$c8u1PoUaUp5kzq7#%sxC9Pj5T zEq>fdrj4Y9I6V$K{RWfAMDK{JeUCFLv1KFP`;+l9jw_l3I9!oU&Aqot$hfgp(loMp zw@A`ed*sb?ZGjS5BWm)F92(dg7 zO3iq5`EbD1t?=h7iV15xGYa8dg@>o~zTZl&DM;poA^l=derKakH)$8i4 zOHs59hpm1&EKwvR;cBc3k+fM^sX@!~t|&87V6tD!cb%p~CW`=QM-Sqd9KmJwt|J{!MS^lp)-ybK&YH*&81Nh(n zYZm@@Gyi9J1{csKR??SG3vvNm_y30XKji$|N&c3exSip|IzJVq{WfL|0osWwil&LnUW&vwfzfVItQbMdVaBn+Nxj4 zv&f|;zmwsfc0X^Cb{afmFSVIMcEEG!u&=LweieSQodtJa6BQPLurfJ& zR*y!vfwA9^OMGD(sEgKPa_K zAs&8WWR@lyk$RsU;9O+A=hoE0)>=<@J!&eX!Dn~4)$gz`Ib8Ixl0Xc#PT;qGR)~mP zX96jTTJL^L1wmzrp=V2bFg<{djD-C#*bv}Y3WiI8q_fkdfuf%t_+|ra3$V3uy~oBN zl`jTb0)&adcd25>`{Cf!_`$EcXWb3g=zsCpV;+>IAMb-Y=PUwBpQA25KlEf!9J^6c z1H?O;o%mp3(;20XZoVNr8`}X0~1d@2%6TH8lmj+7tXfTTr4y5UDJm=th&|#dif95jFPQ*3rw>bEsmk3VQgGhT) z8^ENymIg`Z=Ucs`VLlI<-Jw^zLKLJHqr^^h1!rU!tD4X7i29%_)GSagudPGZgU`C~ z!=X>PF^Hb z+39V55$(6I`%Vf;RmizsMS4`rMr`U2cn2m*ow0!?K4Ja69vOavHQ_x8J?>BgBLj?{ z{Zw69+bjmJl&NWPM`Pq5fcr zK&;khLqg4LKg-l^%()%7ouMG>Ydb(SI}<6xBfe{8(aKJ?KL@OE6DYA}>U1mI0; zX3v?=I=8*7G68lKP)Cp5{f#Zlp7B~=h?B2xwDR{z;9rL)KWUv6p7}*o<-NBG2vch~ zrHJ>ThMHLr!DrU(=~7{)UYAJS(~>0sr%n@A9knqMC@WLRLJh7#G)zsV0AnV+rvr+hrD<*Cq+&KcTT0b;3kWU zbW#;vKp;kHGkTUCMX+zk+LPA4p38JnkvTF@Ix?eEH>1fDfT1o#k&oIlFgfRQ$%4i{ zZl_}8bN-?LMA?;6T&RpAUbtg#6G!sqUElgz_*z}6pO4nYf%a_9Tyr7RQgo{6L>EFm zAcpFxn%Um!6Kgmz&YjK&CTIPFEbA9b>--gRmQxpu<1%=IzigNtUlj51>Nd`;2vL!M^cYip#jt^;ieIm5Tfttots#^W9yhK1SWA8Q)&QknE8&GKVR|OeJ@%ibpjD zD^B0#9J2p0&^e!b*U2{S7B%@w1!gEf>8e9>zR6D5kIms*^DZTYBOc_4?9d_f)> z9Ul25!am!a5SCm5v?UiL9c^$tD>0aK`fJrJ7eE3+m%on~>! z|GVPDb3VFT$+Rn)jEstx=oJIZ!e1n57?;(rPM7NK*!a=Q+LRlgx~SqN7?}RIij^SR zi0W}*UOKQtjoihMfewP|oZdT(lUs#TcPXsY=xW2@0bgV{s&!`LD)%5hTE|jLh@Ty9 z{gphYJ81>+f45o%CAy0gcCt~ox5TbT&;m0)CgP%fBDL}J+roVRUID3R*#1SjlPxNV zl#H<|b|z=FIqj+4EZt)LLBtuy3pvjJyUg*8zqHS|d`>4C0QZXFF#l0`DC?qdug>ZI z=lu)tSR-9)MWQTJ!DKu4jPLHU%KbZ~o34~rtpCf*|F#|s9EX~JzAyLRLTESX0hh9? zFrCrd%d+IL*B36HPWqSAB(9<&w{B} zuOA@7i~P%VdpZ@{VND#4o*eb~Y8}IURqrxA0mMfPg*-U|7wcpdw9FRdr*RK6$2 zM}D;~^Zx~62s4#5*M5_Tq7^eVGD=EqfNagRY|Ue!ItUp19W0k(*pCnuWZMCN-bt$C zi8H}^QDXySe-cAj_Lcn(|4YajwUWpOP^yr^)vN67t(N!@6?2qY94YC%1G3&{W2zAF z8-UuAL_Hh=BZ0rzDz-S1Vw7rE{_eH2Ion6&hXU9W10**)*v0(z*8v3qz^v~w0qWEx0yyk4 z;l25fm`6L<8&H6c{he0;`3P1qp$@P%E5Ggx#wB8?)#m0@dUuGh_u)21YJgGU%*Y#&^-3TNy#(;xm5@5SBg53A(qq;5tK3#?MRoN@0BHOHm%AZHMfhGj}tc;+<#1+2hY zbJKH(WwQ|67CPO~ode(0>vYILlI9aAZYem`=5U=cw5k%|mcA1=pI`&?p@ckt4+g<085+)5E z=>WgypMitZ{rHKy3UKy$M1=8!6FT55xW6f`0t>%9)K-Zm`mU9dTHo!AR>3F=%#~M{ zZxSXETwQepyM&CEnx8eVf?5Sqv zl3=N4`h4PzB^P*^f}yx$iWfnS7`U*Em@vt5IRCcPi$>VRM{~07sqHtYIkf$)E?UY) zbTI`u!@6ayLCb;&2{?Gc_v?g7`qaL_%{G!@W6yJLzr@_{KutmzuZ~{`9301W&~jct zaVPEq;EaiV!eI1w1&TE>R~H_E(eJV4VNTH*JQR1V$4W2+Jr-%7kMJz4y#h|OHTGj? zk|fbf@n#XWJ8>{(TZR6}O%B=xe7A81ysXVN{>lyNBJA^@J#p*uLcuiT#m!Q*{Ic23 zW8o3K*K$nh)9@2${4H^=e9oG8jcM^fQiMsN7`1(?BJN1}B4d6Dus*tzIt!Z(PFn0gYuZ zw~{gsJ0>T=`dFm^8ObK?GJ`KJfv7W1_aL7J=1LB?`_%kvNgDoO`c;`PzWFGM!@-#l z6Dq8}E8UT_iRw7B_u`{|mVNyt+O1{|Hor*#*2^s%A*mQJMQZ*#7cdHz_~M&z1*Iw# z5CDiB3FJ=*XJsEd@Bh&y4IL1FZ}Xk2+;NlK#b5aLW4PbRQdE?2gI|U7 zE4vMMqlpTVB9Zf0Tf}5!Nj=90e2kM~l^OOG;59`K{{9i3$bKDtUpE`C`L&P*sE|Hl z&-duqK}pGXt0#Pr(mX}mNRdI@Bppi~r(1C-n%IhUfS^i1R?+5d!D8H00d2A(V3>=S zI=%wkQZ}Ld?5X*Em9=O5p4-5+D#0K=Xxq$mIUYIkUDcHkE$~Jg|A(K4Lw^+vU&N?X z9uXOFd^mYriGE)lgOr4pZ^zq?2&YS9i@7ua`x3vGaI~N5!7MXNCZJp6hOPvhK%>)1 zhAF;6?ReFPFi?G(=)IJqsL2G+MS;)onT;fSYYCI8#&;9864q5&Y#3oe{AnT%4j)$= z;SNW)7)8-)GmY8N0rl9W=6F3Zrl2bW<-WPZ?(vrb{$T&zd7xVX0<)1X`RiiOU$|r> z8zC~*5;plr(VQusnfgubHvOw0=afsl%8NPw>P?6l zekSW2*cwKgAbhjEnv?kAvnjVhONa7fC9zM7n;BC}LQ{UjmIhkt8f{^ICqI*l=^bX|}%*uyZ`Ch9}- zPv!!}5hKOX7 z8fjG)y0^1OK;6S3e7SLAEukH2J-G#V$$@3#x9Q5!h-61A8J76GP~)rwBhe|Id(rVm ziBsm!`hMugY_NcpsvsewOsZt_qyoH80X$u9RX97s{iY^ zKesO0#(O$V@+ta7F^1NSUb%Xun`VG#Elkv>p`AX9lOynpSLq^!2FYH#$(?cDor}A6 zysH6I?}CC`LoY+)B2}D|QQdoXhKh?J-9Zf@A7`CJD>%O%bi2|J4=s-}i-FTjU)swC zyjlS$@PA|?-T{+&a5RweVhQ&Jr_pR~q_wTLVlmjK*cPlDbPwDa@Z0tKoSV{a z*ft|fTutK<+Cgmr&n2d3=wzda`95*JsI=EAl?)i2;2-=!HQKjmIN&#?PU^ec(~!O9 z-7Lj8-Z~J!pE=apaN-Kd%e&1*c|AcImLbYeTT1;4Uw`G`S@vu$ zdyOUJ=N`*d)>Fp43H8{pcQ7#A&&~C9yX*WBCX*Bxf*Z3~rTe-WBB^uz_~`ycD3 z(<1mF$rZUfg0@<%ySwJJY;+JqlE{lE-52l8CUgb&5(2|b%R$XwdF+;@0R18~J~du) zBN}>7!@kHuljYK33|Jv-X8+INE~1uxQ_tCCYd6dHXk3e-BAbA>F=mHMp}a`N<8X<|gMR8oY*(M3J>iK)EUM^x0S)4gsG}R8zNc$6t1(fX#Y{A8Bz&i{pm2&VE_HGSzOBZ#fzRU$JBNGyNiT1ExHv z$X2hzp9r9yDAsuI?0W8WSywn6U4q{j>Vt3OR4mll?A+2#GipJ&dZ{B0j$`B43@L9G?2nFihsrpKbg%L* zhPPaOEL*W=P~D7>M#~xuQ%Dy()Icgzx<LyRzE?E~0H@H_V^K&h@c^ z+LxBFbN-rGiLbVbU^2dacbYwJC+vE@(d`%Pz&I}{|ML+7)m*meHaa?%a?m?XPXp7c z2=Y$C7^`2eetX_-i*(7n11p4khIo(!yV@ZATeFgKj}~|9mTF)!@K3w-E0XRBR+ucU zfhySI^-tT5f;BLB9sea>n1fYSis=o2XL{i;}Y}im~ZLsYI z!cb^+=h6_11GzivV$^G-(ueNjPi%&X^-2=kpRaK%8K~m&On*%LP zwh8HP8k2=OcUPWyl_uDY)!%Vw&+{rhx-9raRx@gM0oPi< zHn|+Ge!7Qh-t#-LBZ9ZjDw5-ImL8X*d07KKd)dX@2~%~-4eiLHl>9%E8EkZ-av&i|IGUbuve@kW9VTc0?|Ig_f7_9MIpbWwLANU#?Y0!x5#CAer|MEZ?=4g77uAO5yA*8@;{a;lln$(1mNIBG@?O z5+*v1qK_0Kf7NU{VsvSANj4%T*^nSO&6L|HFeE%(fgQE5sdb+wH88h+w<2X`(OV4w z-LEWOOZQp~B}x|8#ID5c&LJ#HEAsTcel3U5ynxwEBOhp|-Z}E@(GR1Dg)E?EzZfth68k^)L2-l@38*o#_Q>a zIqoqFEg$QD(S@>|gt6CA#yIGjYQbHq${O*tS(Kqwy+qVSYGC>85t$Vk_MYG-gEH=D zl1Zm7=E_W3*#3Tlw@c(6&5MGV&ztagRcFze*c= zsX({pB-qyn(ycCb_pPs|8e1Xbu2RszMYv*sd&j!?^?K6lhoBYM41jPMMPMjY1YU6G z-8Jg|yW%GC80v$OdTSye6UtNJ=5NConAA}H(rE3rHtz}ZK#qI{@d+I)Q{3(*YRtBw z1C*%ven=RGsA~cO#(WE7S{xt!ON}|YWUX0Ew{>JA9FQl9kyuQr5<46VM6WU>Z&0gT zc}vgFsP11Ah;iE3mZq36x9JVS_|}2D06m}c2Q?E}f6>QlV!+iXZM`*rOUc3%>9?9i z(0#oMmyqmDqt46Hzltj{W05I{%mG;+e?60$2mzmJ88kg(f5`r8pF~C%>+_i83x~vC zLrc?6{!LTD*6FnAF}qe0(5s-F2bj?-p8tH1@WKs4wz~n{PmW@K2gWr>`Zs$Xb_X>b zNptZ7WOc^^*_g|}6eK{X;1ORopbheXvBeEckI27h+y`wp_98$HeUbRT@*wQ_9V+mD z_$UgOP$%=nL~W<ZSk0k)(2c(GM*y}#trqRp4t@;CJNPH#{lnWO&3^NCAu%Zu49Il40%-FxX*sR9 z$s~|k)8!X)fO<;+>K%}rj`^zSpFqLxpFja~CJ14e(13>}1ogN8$Bun5A)J4#rVp?O zHT47bw7Bw!bN+4rMr9-&f4?yb?>w^tcw;UuPPev8Dds%NE;AT-jP@jWP1Q@lyDxIX zF?}QdOx1hfPf+AY)HDWsbIv~kgn;fY;Phj-1ZbRf+XzF{uY#G9c|F8j1p58@KMY#P zZ*&%7=8+4AIhfLaAKdB5M$NBWbEJ`S&R-V@xO7=76ik2d&s!gd{Sj~$CDG4R zu;3R!P{Lp3x1ps0KP9b&Zi3MV;skd}M%!Yv^TNOKz3@y)ZLMK1<*X>C2_x2JrDlWq z-!_fnpLmzZ#_u@$TLlny(GB`^4fKEI{O4hQ45Apqel%CS6O1Vs|4?gJuM|7gcm%4d zLBPxK0V5M1p#%wdS{j5x!i&^3-hlDGEak+kdNFQrr|A;W4u}qbfTMquAfowqIHDCl zKm`*BAOdTDcbQldV2l?NwpTHtw*`_M@_Gcp1U8s@yVS@%-hhhbU~ypJ{bjFsxbwBy zt6O1V&jA(fsS7FxD!Kq((znFL161@pR{b+{qwc$bNjfPIb|tjn28tknY1L}f&j94k z4&+{s#{>Er2CYh$)bN4auYrV4K5L7=Uks+CP?G$|B}`mr0)fC1G6WlgtO2asv8b9P zP%j3cUiq!%abPSULq*Py*DII=hWCvQ>$MyR%Y0y)Kp5(OSp%z0<_}uER8qMJ_M{mW zt;^R*Qt%6t|A)nVZGV4#u{|SO~U@weZpFopYcWf53|g9~|>mjMRC7D0uZM zFtIy-kioi^bkT}J2~)f;o>kL~kyRzuIAUc$-3qiDdIPrI4e8gIKL>$!*EoMiS`0zD zt*nIj^06jpXQn-0;tb^KsB(U7^;-Li31*lKAS~%pf?2<7j36x8JraSjOgiFfGyll( zQfeMA25`T}u7%%Gcld+{9%crrq4M8UW1=mJ@joO<6pZOf7QBmL*p6B8MdKLI-0Hoe zT_S1@5wN|zAejJFv4U*wzpwiL(UJ{@cBYQkI5|5{BaU>_rjKTKg%}wb`5YYbhA#7k ztcVU(As+IP=^ZR-SV!YGD80H_w?@?6endk!HeyN5-$?V7$C^$`PZxUOgc;NUJtznU z*B}6S-*ZH`yI&wM#e{VUhopg}m+rt+9PkG;&V3`#>`?YPhC40wF)F)pqrFZLcP4RI zHg8VedsT9qr897jWMJk}jv$Qi;RpVw?+fw=Yoi#9?Mba-Q4RLJ^cw8QPpB1X&JN+S zO_mYSXB!EDg+pR)x`r!yrY+Z~#)NUZ+O!!(MQl1Mw1fEdR@v4L?e}Kc1e+Yv*dtvm zA6uFaM3Sei?!12_EdF)scF;ZM)O|DjT)(#Nsj|4fv=o9Sv+Y>g;cDYJpB9h+x!nD- z>>4AOn{$i$pgm>^1&wp=?w~w617i{+fPJQg8)kqFRNlxNA)s)(!U7E@bvu*7VXW%M zNwjoQ^ca7Bb)?rbqbG{?Ca&@!i#)7PQp)0y1P#14+5<&;L}8Ii-|R;wpDp3_X2#p( z65ZBRxLsM%BP#^?8EN1hT$g{fyghJs#l6IM&uxb7%IbH!;x!UpMEX*p%3+v$_SQ|C zjK>0xj=K9E>}zD1i9YR!UO(;z$8)rR0c*eL8rothMl+3EtR1G&{=vq^P9Ml{e4}s86^+4B#pKzIRdl>)0joJmB<6F%qteN zf2BspCgC{QAwJAKm-*`X>CrEA5l3d+%l=UPJ?%X!x9{q5~Axp?fhCD>5D{azKZgQh68_SRtfZEs=hA& z8vU0(Mth?6H+Yhe#V@jX;>okt$#TPmx&p=Po0c`qwFMViGC99bwTIZ5Rt&^ehh7oW zC|?NX$>26FpQ5v-%GrU#zP!OqzI7 zB}U~Ay+HcnA=mONE`41v%WC(7lai19#-(vwt&riDDiT;~T@M(K==0?}l*4*+#Cyx! z>a)^=6+`-Ft=eHJ#|2|v=)QR5vLoWQmb_eVjqZZ#;T{t-G-|H@=I5LO~<2RpWC(B3I^OXUE1Yh zEqCF2rFIEU@3mQnKMoaArYpC1_Kk3f2#KGtN75e2dt7DG1r8T>(piQ%a(I^%V_m)_ zrh`lp;rnm^3{vZVf&+zfv_*<$*ZShxz8F9Go}6qu$1T*CoZOS1O%er!SjeBM-^mtR z7=@p+FsC&~tuPnJ)L|b}BG6u5ex)>NC9*?rmqdTC!H0->IN@-u4OzC^?u-A}N|QQq z(|)k1k4bxzJ*%l>S}XsV>vOG+0rs?O=o`abu@J5WO>M307oUwiFJw zpSvU_*X6L3rhi!#hZ4Y}pX+aHti4n6sxZ_znhdlJDC=pr7r|@XEVEu48n0|vfIJvh z9cgisc2$?yD_gWBXNjPE3g>($6|Q4|=K9GiYZY+i=dG|>w#llxEa_hvUWF^l={15~ z;8ZeAb_m+;E~Yp(Dbxt?1QQ9Y(OP(-N~s1WVA=<=j#&slYvmV>-rXt1-U%6)#!CD2e4xa`&|aO`s0)&Hhx;JB=Jp1`R)`h4Ubf%N!o@B>p*>3~ zXb=%=kr-WC*7no7+---|y22Y}E;$P?oljO`N>EA9x3dy9+2L~V;88U22~$J%apXfn zjQRLV@3}}?De6nOTUv$N%BnD5)?o4fsb2noWRTLgxS@R((l^%jeu1T2teG@Hg_nyl zpYpj3!Ov}tN_|t`j}4zF=%m^gj#89{Il3h+_nw>guCrdg^-D0LM!>!|#+5XGA62kS zxN~`rBsHXRVXLp?*N=rx`^S9pf;?;Z?B)@}g!zOLiM&^jlwMC^$wr{*g#)8mfv~^@ zyO@$!4knB%LGe#x~j#L_|Z^5cRaG=)i%i;0^{x?%c%X~oFQnH<4(^iovqfv>au5+ zK)K{e9vUjS$0Phc3)xcg6#3LC>vpnTqUU;dPSuCSy{W-;(Ss}ko)u^!!4cM5ucdtB z?RdJ^0SuM%@Ebw9t8FEx&j_m9#u(4XIT6sFX2rH|)2Tm&c9_QmF zCdNNk(NaUpYmGM1$Uk^pJ{(zOnswmnB;@f!2oD)Os{XO;Xats=lUea(+BQdojTu4_ zmO3~6-0x2<0R9^rI;k2(9CqqV45MnCSGyHrq*e&j+M5^pG} z9bhD-W~*|y(B8`J=kodJ(KYVAR<>QUtWmyFdqry<-fCU_mFUh_txrYkTuYCom)zi+ zT5;29J`)TG7V_LL1ahv;(=-J~T<7L`^2U6#C_N0vXYMwQu<4DeKbbD4h{MsoQ(41g z<;|{Plc(`q)}*z1Y#xG(NqQ{tm>kCOgQF{f*S?>9{?iDybKmmqrBZ&*iFT%Y#7 zBQZ76bZ`%Hq7;veoW8u#au}f5b?%!N>KM+KAU&H;=5{?l$D!On%}i$Tso$s!z>T*2 zQXu*SYRxZV{A2gcPZ$Da0SItUkvh+tbN+AefPZX8nJh(pC|MXZA!4|kSl#3dtv#%H zVK&a|zSOCAvlY4(*^-%zpW}(z7r_SBT(-w~rHf$m)^2*O^ZhD=)aB6vAGHpCWAbnv z)YUG95IHR^6%m_0H~Pys?9Hmz>r@7F8Ktx;%Umb&CasbaC=ztJ=NzVV+~T z6D{(Rf>xai+(qzbKMp!3Ip&F`mfpRbY-_bm0NP3+OBNRFhcyp`V3kLQeOu=_4-wIjQO|;nzM?eQ37Wj448ZLdK5T#9~Mho4dFsN zW~qdRrb@dCXPQ%hdXRqEAP!r`o{fQWKv8(|qn*SCuU`NhJ$x%^J ziIwhNS`PP?&mr%-zL2zi2I~h88iIjG@8f(H7D1&MBO6?&XJ)?U9|;MqcH@m1lwNo~ zeEwQ-b#jGH3Z{_eyi(K!@~55|3L&_JLrNxk5uWf03g;#BF0N(NuE+dE`_-ch1}_O5 zgS%GBr0zSuP#W&1p_2-P-QTSYg?$T)qMiK0pM5+!>!aukHAYvC@+m9X*N%4HU0GC6 zvWcd7^mfV;*ZGCent< zP$BPbO{BUi+|A8<=Xz)9#F%-!v}A{#(UFLz_f-RUBW0b6#qGVN!8Kw(Ck|eUuxMxO@IylTrHQ-iUu=j=#I}gGH|5t3>(} z0qUDy^ObLu%=o(Q*jfGxu`(u#Wbl@K(Rc-9&OBREF5D#D$hspvvZr_9%Iy;d$+=sR zL;6*)C!Q$$PHT^q(dk{XDmQ=KopxgeuNY;AfPrz`tY)UM$})WE=MBlc)**M^yMn@#*!EUFV&P|Ha1{2ZSzC?WV#ud((L+CY!=jPf?TvQ*<~>58 zW%3~I#ZjgGPwj%uE@`yQ*NCvnqqV?Rw~{^7PGb5o!c@1#A_&g-=JAdT66?ZC7c!c< zwUs@+>b9gey1iv<#7i8n-^Y$y&w1x#6U;j!Z7t?;Zi4M=1tfB?xT4>U-i^ZNdpBFB zepqng%Sj)NxNC^;^zyc!7;$~Y;1WaS)sP%onUY;wjBqHd9UgL3o}X3BL~dEPBL72W zB56g4n8P+y(ZY@*&6B&kS$^)hLBG@=PT=Ss_3}w918R0w_l#~XSs6K z(|yoSTA7g>KMVJUJ`2MK0`h|(?ZaUGo16l;KaL989?=C49rx=E~tbYyV7SYAAywy71SIw_dZoEb8seEt~ zgwZ^IPu0$#8|}s`i@1bWDQS9?G2}*1aOCZ@;M7ppR=bB~xEzf%skF~;^=MdGwO2RZ z%a0=-Hf}PNX^W(V%R;x2m&>GmJc88yJfyh{jr; zemBrg!f@12=Z9_e++hMGEoHH-C;UOOHR)`nUb3xvp4}m>t||q6z8wvcyx>!BPIX%U z!~qdX8pPE(W)Oxaw~}9HlJO=MMI}|wZfNCQQLk}T$yZ+2F;N?|-Q>fES(RmDX;&rY zWwhpmrV4vT=0R1<3M8GEH?K!g-qXM3Co5-D=Z&knH@9N;s$cul{$STD-c<=6(!52x zM&QkkHsD8r^yUt9p^8WMx(OKc16LB~=!8T))m$HGnr#+GZ7|11N3D>__N7X(^x{1# zE_lhtm+K|Q#ARR;c~~jPH(a9b44GLb?AOnJ#yt~$^C-u#R*HoZziqKkP({~&)xx!< zDWnYE!Sm)IN8ZjT0Qr>()QSYvuP)76vuZN>l3U2iSRrBgy8ZnNcY23^QMASniMW@@ zxs7Pr8cTJMSz^h%km@a~y`Ptda;O=KmQc?rEEDQu&emL@-Ri%i`{fhs+ko^EapF~d zt?|K_AR{85UVh}4Vj!Uy} z+y4#s70p7J4+C3Rw$D)O$F3eQ#g8mXWfpNP4Oi$HFO=W^bh~(R*=GC^T}B;+_&{C` z9v}wiwH~up;8V z3nL3Rx*3ZSYgNz+h`Z&Ro76v;Q=}b=TFBSmI!t-R8;QhSRxS z#}SZB_l)@Ag9Dzg?}~J9oa?J=Iv8J$M8_-N3@H7es#KoU%G^h(yIz>lck{d_Wa0{4 zzGDI@WrEh_uirEqB0BWS(6f2=s;UuXIUC|=bY$OaLGD{ig717CxfWf=rOFZvPCh>r zc(f_4d61 zTTuo&rOZsFo&phE#o}a+P|cMqI4x+$ks5`}4jtC)p6@8f?k^U#9#Y_P9gOxRCI&_5 zC{ZLsFD|VuM|U2|;L zn3U}^Z~-w5!opSmsT%CsaYP5&4RT^IHRwLgWQ(pLv`2>BM631a⪙~lKwWjjh{Hj zUM6ucd@m=Q%m@~Qli0sTSm@f2Y&g(y*TS?s=aKK1T8p-F(So5R&2fZbn=lM%8ib*R ze@Ks81Tk)Lwx3)5?M0CvH2BGruGi~3Oc0My+am@>)Jy#OaW@gqehDURSD;-_2o=iBsZ3Z2*7zx4V>=MMFR!-c zrMh`c^XP|8+xsDpZbTQ!?<<9m0+>3r#P0`380^%1gs9_et~) zYCfb1d>r7W+#c)&Ag{^ihFmaft$EoHSbDn)M$?dIGZ6`KB&F7;|2U#>A7AMR`k z+Gw_!TG?TB$~zvFVutqQ8->GbwDs8k_3{tFog!&jIy#2C3G&{azR%~1B9GoGz5P}& zhA8(Ww?zn=S%ohWa8HHGtHxMJXOx;MoTGE+&5wdZ7>Tt(P~>S;Y~acbXoZ^Y!S*!< z%CyeaXnZCIgUF*dUKz)vD_3CpuiJmt-MlzeCqEwBov)*=qWjT4y1t#)}2xBDs!N~K%vL_2m4|*8{*4%QV+P60f zma@0?;wiZx$sPkCrK&&mQ@8U$3{R~;C$ESO!B2Ttt#{P)RK#P1*pJe^ceTEkWBZUR>7P+-SR8>4j(eJWW6rjYX6mF zpor@&eB*M;(*sw=4wvY2v%1FJN@FN${FsF$VWues10D}nMh>Un2!?I>jUMNV*6P>x zoeEg06hr!&_ay8YFBH>Wq?)CIHTq=HGyCsx?H z;??zkMQG7&ifH8yWOnG=<@&KzFdgS+JsZhaV(yD^2#w#ZZyVEqG!BG(DK+A1%Jng< zT&I1%jc_m)V{%?UTH!c1hM1+O-QfF&j+KK;j~2OoODSt{H+~`JM(b8t9dUe#QP)uTK%A+-t8-LayUUO?4=7Q zMn{X(o57zx z)rdX3f2?O`t)BL7V#;8f$TOnYT)aolC0VL@dWn|{9@N4TV+un;OAob07k+S+6m6sx zsCfHD-Q*f;Ybj99AjYXvxi~nQ?+#&Gd2Z+6&t&S2OcA5&tf4*{eQlvQV7~G|@N%@ttmdL_(iX5oy9%?q5E5eyM{uaw6OUml!|I3P zI3(LaSau~=Xi;i@z9$v^Wj^0QVl8EZ^6rz99IdpS!Y)q@_V$bc{_ZaHY&29Q-Ud!z-=dVs|7ggo`XcZLUYL=amDJ>4R{JqUM zrE9Qnpky;&$IE@NVF2y6bFJ9YjC|Es=xR8sRt6dVWilq|qLb=Qsl&NePIA3i((z>q zx4coK(8clBU80jQaQFw_ZHU%K#(zSKz=y~(s_Q*TLzVB1=(6TDecvgtUPukxG44-T zC{iNpx`xQM6y5%*bf-sG|iMUz4*DJ3KgF|UwG+;Ag?*`H-qMBsO9Re9DZwH^^kdOwZMm6xCohh430bhO^<|R zp))toR(aV*dyj%_4Er}_Yur>fp6ZedW5X?0^LA{OZfFa}Ezb0g+ezp|4#E)IjY#8iFL6!dYnPPG>H5wdqm@gWns`c%VqIHi zIY#^0v2ir8UX3GoDl0d@4wHJi_p5vthD8BPugmo89$IRy<(a_%%A;4 zSkJ?t?v7^DqFWojlt(`s+gMM_i-9ohw(gG&Vt5$uFCXcCFxReg&ZrPS(J^$5*6fJ7 zlE-gf@;c7+CK+Lpcazd^diRGFD#It!Jh@uy#FCW|s`nI5(?d#fWE_#X)@DI6ITa-V zq5U#ftc)03ZEk`u)ajD+=bD;UWN~p$ujBd4;sLKe_{`WC)h1~WETQ{JmK}RxsqD5$ zhdxd2emSI}Zdb`F@!V+W_!HSBHgUtrl#zj@90j+n%nv$I6(1q2d!s8UdgExfLvBqn zS+I=w1>>)mj`_21MYja6RQEXW4(;~0tA7xw?WlA(SRf>*SPi4Mk2+Zn5gR5NHO+Pj zh6QM*9MFUyOC;%XgW^Hgj2(Zt!JLPFt8pzec+9X0ty=6>pd)v##tfFV{hZjw*9 zyF(jindY5V<6N;!8H-<=TmprfZ(;rL)=hC&)wKW9-gkvXl`Y|##*7FeIf+P6k|LmF zktPaAk_-xnk_E|8L?nk65y?W6bC4Vr5y?sx1Oz1KoZ;4X&diy6ru!Tp?%N&a`{tqD zyY>pTs@AIY*I&&jqYWF&-9jB3as`th>I)GEF5Mxn5qmqdmi`QK3JJ_BgM$)lWlvdb zMHy_J8%)u1TU1a|VP?KAs7Tg;pJo(urxL>2_Gh9qHkUh_Hl@3&bLt6mO~WQt{NER*Ffb*tN_k)01ZSVpgdkc(sqMfa>G0-Xf3Lk9!yKiBMis`0TOhG#<8_ zqJ2L8`-}6Pch{pd_TxpuSI6z zoefCyN$lHvV_lwh+&q(gCQVMxjmtbbqb^GqRdZ^GA;UqZ@B26Q=kkz^xFWSRyPIJf zKDN`wcBwFZckf+OVn@gKz^)=%VzhLNkus@}MP7W-*@nlH4qNzU+=0*Os~P-~ft0$` z;W9VGIPBMZM<@M^#S92b7fD}uMtV45kD# z%0i?@6$Fcv#j;n{k{5(|de(X~3g>IS=*^qgZpbRPrQIU4oHFus?%J_%Hr|SJX^QPC zOrS~*c8tx{^TE@VDd>SQ%1ttwoYhiuaBJ>*)s&UxJzLliP{&Yf>sYB3MbbfK`9R`j zZHpP9Zpdc;;B)Is5X^GvatJvUo&ILkYv#Gva+xtI&`SA^(RM<&VDxtjHJcB&?=Fd9 zPKvKc1eo^@Yqjmp&gzaV#{7A4tPh!-9NEN^pVYNwada|&; zDvJ7oNzk1vCQj{XUy+^*N-7JvGa1i$lqK-x`w5j*1@gF?v8!GFW zrYX-9MRxK}?I`6ZGLdAO+)ZnBxsWa4HaIsclI@dZFjpKF)iASFY_+zWek=5WTiRs6 zqR;1Jd=vS!0%pWTx`CNJ*G{ZdAFw1oxgzAKClJVFoW6l(iFC(Y!KbTB?$)-Py9pZ2N$th-kegOcf?(9fRf$6ryi z&!gs=JiK;aG_|;S)11V?TlcrA7%pjf8+A9!H3P}yd&+M5cJ03@NA+rT6tjAl#xp84 zQ4kkQ3{krTC?N?za%1UdzuL=ZNj8tzWNYQ6_LsX{j^fM-Zyt}9nR8x$zuWbri1jixAQ#{*6o{h8n66->lj@AUVgIIFSThRHrP%zUB*17zgUZbw5L=hK}o3ddi;_4xs%n}by|wo%pBf; z*DB25*5Bodf1c!HyeSXjOwEYdJ9h*vuGK z6*=GuKb8b_2N(t$&ZB}?Gnt_^T>ByWW}llg+|+K}R%SqcK2Li^N4%v1u3)-@pCC6Dqi;W0 z_>0*bv}g=$krHyv-W6>S@Hkw97^Kyvlng+d zg8+SM!MH5|x!o6r59{98FR2Ky;Q#<)L{%s#0@{-vvUrp8xI*CO4luQ6CYU-vfEwUJ zA^EnAu$Ig6U{3n$c%a?8B?`AX65=V)$OcHQ4(%9#&sGruK9+g;1uwv>5thy+A z6_)yl_hIP$i^Fgu^&42bMchZFb z;~$50z0r~p0Glg^_HD8$+(1&QpnW-A`2dl(0EqnXEKNSZzha?%wNdjxYCihXkHX92 zFk=&7V7~$Gu4qeq1`;BZ0TcQ;pvwfXxh%9-0*f9i`R{WZIsWqkQ&yU?>l5Aw%i&;1 zE#uII_I-%L7(x3~pnYZo#@7M;ewLInY;K^$f^ZLkB>K-3J1;>hFWk=5JzpCz^(or8 zkxS>iVR)^Xudwa@PaL%2Uz*hTox&?2stn-gkRlJDwc2)DXvxuZNbpvvQbmB+m}4h> z8_%65>uNziOXbvKa2Ws;6-LL%VPMr!Qj94niZ3|D;K1GV~rM z$18Ost(k}?1pQEjX)KGZ=u`lBFHECcG6ExU^Ei6ar;+9x%OXWn+qoJ=T4!eehj)JP zn|&)n)1yR)>79A$`MFx_(w`kX%O~X{Dh<#I0KorSOE||5>xLy|-GR&U?{+Yv3)55x z!SN-4u!dZP^kCud5=gSiLXf!AEqWzR$4Bvs3%WD@wsQ|0^V=@UMFXSJi--1$KP1t{ z#Tz%6_@#ZG$qii;D!HW5e?9j%1P0{$Z?%5@O}KHysr?j;4$wC}ioT_i!UXVr!c$N8 zdH#U~09|YE?U8y?bro@TX0L`MSJhPQZ%&B|^Gc$FmnG4Vc1*89SZGeg=6Z3&UBjOG zf>6N#vLZ?TqLBtwcpo%n#b*%Yt`%I|O)Q6mgVNH%^xufeNI~=)cH(CDnSGvjgaz7M z3T711eS!H#Pt@vkMsj6dJc>c`cq4vSUDVsaafGI?5opfQF!Lv7>=4cT0FlGT(FsRQB`rw}dvlX`7vm++lEv1T?UU)jLv2NZ zOfG3K`8)wXJWr%?yJR`4ANxsBaPU7maI#rc2I;^D=zzEN>q+RqN$3FCPI?G*KoUC8 zoJSOEfqIXzpT?M&JLbtM&_6JYX0B6UR-UEbnGKrHmXJt_*IA7Y9pynfA^m@M!dd}A z2_8xWxSL#TXgfg2Yv69wcghkEI{Gi5+^0Rl24}qCrDy!Sf?CgR#LT zOPiq-bNzu4p*oG)=_x`MX=)=su>p*<%$DDdxgwb(btjA6UiW#Y+yFHoErmtCO057? zFvQHT$a8zSAahXu#7XbQbLMElgf9*Sm`XPR?_h&!t_yEtt{K`m@pN>K~!#yJp)iOGKfNFijcp}6CmvWEettyX~9ZVs$?a3iiZ|kMc~KONZ9au2U$Lyy(m> zlKS$(;$Y4?$8XS{%b;j_(`T&#(7yl+b_iAu*ge3s2WCnud4X8D=)V}gXA=KA|6m@V z_KTm$_QRab?)%jw2MZdv^@&9VSQVnQ$n`iqVEZHV3sbHF&IU*(#`>#!#Gh0xgTt6u z2_px3jD7oo?eSX~|t2Pd@ukuTB)EljW_fd=2y_sKNA|BE>SwjQ-GBT1yYEt-F9*qY`VW_V3FHv$?PvD0fKk|Fe_<7 z`Og8<2@}dWA<+bLE(WgCk5^IgCtC4O?+aFnvHv09H8{cZJ!lUL7kL6pQ^KW$q zZPsZ2LJh434Q`2};DVdgV(8%GDfyXm$PF0X((=~r6VSfczZh+B?a$rBgW=Z-SOPnO z?Faq?lmo7PPPpSISne;b{c=k;Mj0%JCbZA)NU9*hKmjxLHA@{d@FiqUF|S3JfCg&( z#SVdMH>nPM4tKEk!SvLZC(S|!?|=Z)E+bN)(Ex%K#$M}-kW?J`Z`ZyIv@#pw*!j~S zhw9Ncvbl_a-QRazY2_cwau`t`qhF9Mjmslu3Y^Y=zWO=`djFHh$ACYb%O8{++hbP0 zjq-HN0l|=A^z6fITuh*ikn^Gc@c2C!1y1Tt$iT-gtmMx!Dsr}0RdEy>0Ac`ytjGh% zipcPdWpHs&X}N94tiVR;VR#ZBISz(-P&Q&fpoELOj%oMv%ufW1gso+6nMkb|0r+TB z8xE-c&rg6db;g-HEkcI*9*#_Vs{LC6M1H+LvJ7 z;i^Z%p$A$T%lvFZvdK=>7;9;FXvO$Xo~U5Dn{E7kU+y9hENt!nN4@?vEWfMQ+HSmr z%;;}81`A1IH>=Q(1HLa8?wW<9W~@UB8^^l`&d9h~rLltR!?k{nrk@!%AYmxc*O5ef zTPAbk{GxFKl|CdLHr9I2+3% z0+xfIuZ)4!0NLtbmEeW!c5xM|q0K%D{XkhQi6}UQ0~QUdkWNUr!%309|9~&>PpXOX zWJ&s}foI1mffzjCFLWb_Csm!2bWD5^k`SU}I;)Ne6jol|Fm4*k>byT;*rEtUw@l^!3CS5)xR! zMGk+uJFSWYpGI(VwHLVSJIO*fE{L=e`!g{wZgyOn2r55i<^hLiY)cWWDZGf#aBk1^ z`YtU2?c!Bmj=Zk(Qa(WPU1}zWb6;_ETH|{!l!F9b&0Ze!(I^r!Y1pCgRv#%c{EZSRGSh(~L;8kei+6|rVuB=*n@IcOW+oIndokNdm z9;gT*!dAiL=Qjf`{NJSlqAH#F8rVsI_fep8-V#RU{|N-0V=d9;_B)$H+fFX{P*!YxodvrQ2j8^}%%uoL(24=bI){*G#TRiB)oJ=EfoP|(k!nY+N8+kNI%HNdQ~Ox9ig13-)VWBHxC z_<-{mNEcuLQq_d`Pj+Hu;aJ~zQl$&qu+`TmQep%l@o)NA;vjaL$3cx8E(mloATbc@ zm=K@)c^36@6O87M$Mvxs@Ri7a*v;ziz7)La<b%Xoiq?J-?*Kj>a(ALA-cnx zc8ftqIGk=RkTR>?QTWj;v@;pMJqIPV#;jTW4;l#E#(7#hIX2I@9Czt5^$3BhTUk)Y2>Exx_`4Y6x$+%Fl%yW85G-e#oPzL{jb zW;dAj3O~Ze)0lemN7_>5shTcL&LoL@JMJ~CbB4Uj8xs|=)^7BipO@fm=Te`DZyn3` zq-(=2Rfk!p{q}CYV|EpjpfzX2+hemj`PnWc)uLD+(jb0E!RdU*E0qnqjq^3mTyv%@ zCTW9Fqk1u}y^lF7xaP2{{uAM|`{m;cFcPk7IRf{@R99@P(V4q;Go&rcF*`*s+3sY7 zO3m=DYTa*ACEA$W=J9NZy|DSAqM#q$t###5rnSy;UVX%KcgC$DXqkVebp-OX#kCa2 z`*ABq3mA@gOwZz^JiW8?&OMS-854PPmy(L5J_g|`RCk*QM((;#ykkIUZcXXT*x@TR z3uM(KJ>m8Cx~^k;V*Ahc^%AgUe()Xd+iliqFuoHfv0U9@9$r~|hwXUSZYGmax^@zW z;P9N#N%2Y%6*;rqxgqghhlWckzBC>*${O0NKKi>ItNkVJ$4IV5m5{sH;aZoS$GzzC zDN%7skFNC#y;peQFxMVpSyj~M%yRKyZA+g73ER?q%K$aA;N8^E96juzoqpnI7zc;V zHjfb=z4Ic1lhiDS{Te+w7q7keSRExq67w!Za$r4$Ynn#>g?GKo#g3xNVG-AJ{psqQ zAw30Xx}7MRmo0hB{7nnJD!f7$X$-3ue8ZSJa5nJHiJuM3jC9~Jp-OpPPLppFUF@`D za9JtFr4-ci zp%d6s)P6J`lSVClmQ`p`%TPPGf$E;a_11i&#NZ0{ILtsIv@fX|V>0C(HEQ>^Y(exI zeS)RYh}m29MiFf51=w}oJYiC3kf$H#JA10pYe?c^NKannNWWi~=75)LWz+v}y%LBh;yFFoA;MK7N8T+?L>?aIKhV?@&lu(7`cd zw^8uv5hg;vK+Rb@SNG%DSs8^dC8hMvU)gye6hNJpfAQK?Dj^=O8HFQVy_etGttXM)!^<|Yx((mnC+G(%qP2` z*_mYCrR_|;feZ(`u3n0ZRB(BQO@*d|U@i8_(=!SBOg4HWt|v6@U1nk>mFZjwrFFo1 z)MfBU_2G`;;eiF#TI8zvA}ZImJ`J|H=P=5c5=npLJSd=3KQ;HzvK zwUJP|^KgQe;`o`c$l_PW*gV^kYU3mZ7j#G}clG!Bb;-TCyD2hDl!=&MmJg=&&4@=t zNWDw6Fkkvokf*{Eg(ql}Xn}{rE78h>x;2Rt=#1mo%ENu~Ia(htI_2opSIHdX`q-Tt z?YRSSUQBd2%b1ED^`eWDpfg*3CMuhRja+e7Db*B#pO1Gj_#KiC6fH$Um;1ld>*?939lzG z6kc@L6a@A=v;@dlz3`$CIEQXC#2$yJV)(`JlzIP#274evSmY24g(%JwjrVC7E41jh z@nhbRvN)<4asTh}w9k#fu2}n!#o^A^91@G zX62x7ca7rEnKNhF6cv3>oM_nCu3uc7yOf0?Dk~`8?lSIe6XR1?)7EYtBOxKyhO!zF zn)pI+6NA;29I_Fi<>R(lYmS2Y?wZxG)zU^nP0iP^um{E8+@WH9LYwW}H?lHo zD6^iLjpAEtoj9^KC)6-v@W9{|F-hpshAA1jyds7D zQeexb^Zonx?XOq9y~@M41;;?fK$;_ZdKe7mCKT|Xrlc&hZuoB>>2-I92Ll2E4(sX3 zQ46_1g@td^CN734y9`7d&i50Z$9;N_)ZK2+)13WM?fI^|yWGkFx~UI;cHOuoySSmz zU%q^CoUr2HUL7B-hW>+)_uFMmx$c227iBKxzAd^Y#KQx{D?oDHa4nSgB5g6qyAg!< z^sY%GZoy)1tvvI?U#H!V;79YjY_o($IBZzDm}M?cvz$k1E0TmohdV$g@-*3a08k*m zy|g^>4B>gh>Q1x$51cs@eCAAXT^%&gcdR8TDRHvYtG#|W&qas%$E$~BNbkQv*}cZb z#^vSZ_LsOt-8{hA)|SV~`8Y4{#L|*AzgWByosq6Ax;0i*i@x&hOC5D}bxlo8)p^{S zsC3uRB4@d2+fq;`RIbVAil`|Htw5rtrp@M0uGTEM)@r+NABB#j=nbW@a;*IU9e0Z& zW^(4#89>DsP{*A02w#sFzk+k;deo#;c$;sd#8v0t}Pp#5*%`R>BaxA3EG`YXi~?7jX6m=u2YE1n z8wnTAWMq{Ups$>fi^;4c#C4qmdC08uKfYi*HjrhuIZKy+MM3|*{Z|fjR7H|0LYs$e z-jzr`8Nfu3j*U*1`nHP_!2pA;NYgnS?EAOl<{!h%$3Fj4)p9FJB{D?O+pRBfI-;Xx z%x>ZkZ)zAxOiCmk9b1x?zSPi=W$NPJRd8OO-cGSFbO*at-{;mtaP;FxM6Y2bx5iJ! zeUg@QClzZbs@+ zV3W`uP&Gb2ZpR%F5g8d4t*xo5+B`O8p6$gnv;Z0S*oQ=z7G1!20?VeNDMX zSDBR#C#=V{d^oPWB+{RNSz)Cocc$~9UnObu#=ch(p82_hNU_G%CpyF<(i8AF%ved_ z>WI12ulfJo#8eE9Y5rF`a4OKBI~n7oo4h!>Y^R8902K7(k>CWX_iKdfYVwtv&RQ`p zeH2^k2?a_UavJ8RHD(2{PdWW9q!&iXXHIHgvwtXeF@*QqN>bdX`Hr@j@_`_!gdX-9 zSt%mP?!ESYrF#WfH`hiwhzQ-C?@ca_E=$Yk#h?Ho5)@mZ=S>k&bkCkn;CZYCz^bN5 zkbQ+iAf0)^ayK9L=C%9%e$HwXZV6@eT%Eay_pG~o?sh}ySSG%*H(A)OZ&VD8sY}xQ z(G;cO{!x65iVBWwhWwT1!^{Czczy$A>nn^~Yut$na2h|NQR*TYdWX8~(W0t_E#BN! znysOsy9^as4~4}v%ng#~ZfO`cfs}(WP91rpIVuzaB}OxgZkg4COo8149_LwhQeWCE z@AeOQCn%(7$ zYGPI4v2dwcrL;GYJG7=>$YC;R_g2&Pm8{~=yEyi^#)m}F;em8JeYMBiOwzaV#>T(y zcCT&SD4A>D%_d7%w_mFr(56XACtGnJ4GiEWv6PsW8y?Kuja}(ATu*k)UhHrsIwojp zZtmk;v|f-?JgiWhPGpwv|MWx8NQ4;rmTQ-|atljGyGwC9eO$1*HGhOSdY4A9x6o*1 zQHVUEx5eXf;-k(l8{CE47@tRq;v1x$+eRA*QU}YLX%GMXFymZ|nIu_Dn41$?Uv;>q zq&r_)-BEhOqRFl!xgS4{H#^HvYg{5hVJ^Ca{C)@%p=gZSPGZhTZSP<`@P&&t@c7ap zK@!=S@)lPnKef8}tUXTNuZz>?JLHUlnR2h2q^4kar1&(V%u`2d^WCN2x23SXkkz!2XjEUO!8;(HzZHxnc25XrAa|Ngr6NDIGC|*pth*PKaz6g`InRnSanO zD7oD?Bkogvb7e@NRTHz{-sX(kqFmg&%OOpfCC4X@3b>r?22XkubCIOlBBLnFso`yYcQ7`zbaPWEfP)~+0us=vfUc;9`#d$)u5 zc}PKP#qNA0T@9;Jz&UOv`b_~ze9cO$%&CTghuX`n6}meZ1$NyNY5qiH*E}EhiTvgsRx>2*4^c%>lvDxw)cPo;3CaYut8qcM zO;eX9FR9=Y!m_ZQ(A_BtT9KBjy<=^^qIdM8C!6hV`SnJ@dp5`R0#*X0%Dq_AiBa4= z@`K%-F^h*I`~}2JI%bE~`*C<5)?+`v7>B0I7>l#7OcO;{C=NbO(>9EkD0`zccWCOw z4X0!^j3()y6z;B{x^#Hnq4}>nd+E*HJ`_ei?G3|pfk>e$gOgXgH zkGrgI%}ej9ge(5Y5<|C7VtksI7vZ>;#~?`sA77=s@TGlI(e)uhcYf&fs`U3KN@;YZ z&iw3Rzdc#ER=;Otw3BjeX=A1#>C~NOuGPCa^<~Q=a-08%NrXR= z$gIfJk8URPX(M~z49yJdbe6@On_R_Yij9&xNrlmMRZYhHw2A6hStmS+&U)Ap9s78M z&1I){e?G%m(Ubk4CynX*rT`A*M+z(F68I!FqiMlt9?2=sjb;^2R}omiLi zKDY3n!5Niu0u);1Y#!hhu?cH`hQyi5CjFUw%2Hpi;Mi?5Ae-KJ2D1;W4=QtzN@Raq z0PxGY?=QxiDoOF3Be9<|CWU`(=d68Xe;YG)m9Nr)wFu4qp8>qR8oTjftUaNHs#q|< zzYvP#J^EVK4SQF1<9;oIx5NJNCJl}$%J90|NtcG$kR1@Kngi$bgErIJ2Fp<3?6jL6X1C z+uX7;IC8SFl&!b8PJXYjIF3W&_Zh-Z0sejEk00pk(^DJ@j{77gpr`RlTwdS6!C`2k zt#C@r;w{d1&m!+`tyRvQJcY!msHmt~3?2suLl8yGXZriTun{L0LkkNVgHbiMwePz- zjuXp$e71+p`^eX43J1*kmvM(tB<@0V4W2A<9BVGO+I+XO)nZu;F*pbjfKZL+V(sl> z{ZLBWMMn*e{>|*HO-;=ecDvgUg@X7N9ECskl@=a3a>TrE@xOfJ`|9`a@E}Cm;HBW& zKplQpK_0#c9re`8V<-@v63dj!0q_~F6_!xP<@tLY+&1h{)X+^b_@hQjBx2XC0-d}1 z`c_uU+PqYL{sIo`^J;870$lkVLhLXs!+BC|J}j(k%`Htb{>?JpwjKA@_jm3PC4_KdqIN}OOvEJtz6J?&S33%HLYv=pmnF18Mpjmq zXpi5ScMunn?x+c->cfX0A4JV87h!@RTrrW7vdKo5zqp=0pmMzU0y#UGw7uVUq6#En4)F?qtr%~`Bu`Su&N>qM3ZkA6%PqrZnh+R? z8`W7=bxjs$&dSJ^t`Wt&xRdf4^6~AcJFh~eF=DTl?2(#QZi~E zpkf(z8A?v#@V78sy==;t=|ziUI=T#~+tDVh_E8Bx`VrOmw*3 z7lud#T2yniB+yY7srG^GITL2&+6CLcARYZf0_*VWv)Ft7M7R&mN^v+5naE8BJwa&k z>~QoM?4BJd9QF$m>>I8bATTg x!$0cqm$m(mI{c#!|ER;ChWr18v-rNZ?_IJVUXWL=`#$)SM9GR~irjtre*n3g-NXO@ diff --git a/profiler/advisor/img/overall_0.png b/profiler/advisor/img/overall_0.png new file mode 100644 index 0000000000000000000000000000000000000000..f74cf2dcf131f36df9901e20ea327d509c6fee67 GIT binary patch literal 56377 zcmeFZc{tSVA3yp$tx8nZvR9T$sD!e&ptA2Wwk#3GIt&K$Xdwxe5JE_{5Ms>O%9?#& z$5^wDnXwPXa&GGR{+?=n-{)M{Ip_La=bT^H{4tr&=llNllc1O|boH-4 z{@k^P<_M4cPhX-KR!}OPidMhdnB`{Mk%Krn9lc$g$D0nJGfIq$BqdGy zU02Fg^xjNV%OaAJMyqhfrY4QI!wPdS_a+Bzws#c!jnagpw?ygtq)&5N{#=IqSnqwJW7fE_M!70s#(cQ1s ztW;g=t5e7vTu~)b*UhKHtn0`2U0rBG4)}>Tacn{_4@8?1(Ne*fJ9Xm4a36z|f-Fa3 zS28@$FnrcpmR2crTX(%zuXAr zjh9`S8P4fb!ZrQnW^cNlSsrH0I2_Q5)vxR+3RTFoC9#z)2pK9eVoaZ;cdXx5=TFz! zQ?}NOlT$`!Gw6!a7@AI%B1~t`Rm?62KgSLuYXas7sg@OK>F2*Il7{I|Gm{8)c*tXe zUT|edk?rMpd{y1uxS^WqtG$){l3YA)(<{ygasBeB*mOikSOfv5k6e4)QX(ULKl6B( zKEfS(T5SdDfF#eTi)2(E=P&CiNkFg2T&0*22iA#|%%~ECb9@}3uekIKO>|dTj|_UH zlezbz#3#z|$N1@W+yr9n3#?6c!YTM!b}&56O%`fB)ea#a-qB^WP=y9Og~pZ z^ZB7)G{pyK>a=vMjR;I+u^`X*F2@XHO&BJ!KER%5bf21ZcXL-7d78nDyXUCV#N{hK zy%;eFqf^&ivs&)=vmPCOobU`p7zU*xq9+Fsj!R)&dL+GK&o%FL=w zsd47<2ZMB+{hb3|NGlhIg@;hARFO#2xizHK8>+gja6}b{^2R8}O!x63-@5h6&MQd690V#c#uXvj^M-(h2TyN=bTl+?=U}X`w zlXgv@Y^JaDLh)_`S362p(o#g=X?>Am86E9~=03QHGo>&phuD`h5S6ROm2pK=->8O9OhXa#3)Qi!K4oEgjiWQ-TkbvWUy`OcPg z^P`O=OQX~X1-)0b5sxh(i3BHp{9ryc_xMO$+9G+m63!f$$QrIC-5FZ4Cqkx&MUZam z%=GM1(SEH{Th$#m$uE-P!@1HZ;Z;JaJ||#dUE!GAHfz0=lD$LJL9}9d=Qup)oJ*1VH|8!ONsaCMj`!rJvJWow70uY9-Sme;!yu`7Ae>Q{Cg zw;3}HZ(@LDzsBm8@f+yg&Z}+TSVd4>M)dhsh$U#_NabB0Z1tr6hl-Jif!v#XhFMC zZuC|}e&mf~eVhuOQV(Wa#g2lac-j7g>U0g-8Li(M`YJ=p|Yd#r5g|D z(rLw=<2o3U1A|V=O3$715qg8uPYHN1^db`RQgBxd>$MTftIrP|XH-a0KEvd&-lJ@` z`Y^LDWqy=ZM5*23VZ^dbjncgg5tPF2GY4$PM)!XFyA`QS&Ay6h*7 zKJmh0r}tjPFo%aYG$3QsrGGe8J|2wmVsA$Z$4+!=`MN6*i4_?2Z`&e`dyPJK*dPvD ziZ&C17Fu|0Fi4-v0-u;9m=|2fd7(wY&wZUz{Qwrq(;j{?XHFBpieK9^nDItR%!7Q=esg zM7#irmDQC`ZtEFb=6Hb!=VZ+-o7JWF;~l9q4;lj!(T_%P2q{Su`_OAc<27+Y(*#1? zlMEe+1<0`pkuM+e$n-F*T4PHBHoc0H z!?S0L4VQF8th`jGo5EyflrkD>yIK_>F12Y*pfwBGn(0IcZANaf?|09cLmZ}rGA70* zF?MTrRxu8Cfu+wqJk%GvE;)OvgFPj*Li?j7q#-lyGPqIA_%6O-C9BYQ&K978@&b>u5dc13r=>-Dcb0hhrgHIgT^C!IdPwlv8vAqm;Z=dCeDv@-V zMQM7!P}cy>$L$|8+a@nax~=mrW);-CT-lP)bHhY}YY{zz~}Bmv7ILpD{LLTA}sJr@{}a z_@brMR@)Cu1(!3V>D>*~XJo#~1oJ?=3f$L7AnDyZE?`m9L1TInlX(x_S0aO9;u?_g zZoiW9xbo@ep!tzXW_^ouojyw`M!V){OwB(&PLh}zV=C|O8UDsOojBK5oG|K?$*er0 zENf+w?N&P$4)I02&7HhGy-PiILeac4WGDyc6OLcB9gH8rOKga{*g4*DmSHr#!9 ziUs%u{}ACRnkNpC>qb}_<4KF-JEnW{VC7bE=*a6Tb;AO<0Foqi;eu%FzUo%X>193V zkai9}uUbi_cmakA*tpiZ9I2}8Mxcrpm)(e>?UbATXfzXYf0<%eM9+!i8vGf~yqod9 z*UlIv!qc{loEqK4QwYcpg0!)ev6btp$&^?0B>C z?MrB=L;F$aVAgqU`gEeJ?&0)vrsU4c7*%E_(%@QIVDj0KVOH3iClnaE6?(BSdr4}l z^Awsl>e%_t)fom6+|7+R-|5fsumikG9wlE`=>A$$=MOjxD^SySvr_0v3kkf&baug)$MGJ!WT3t3}Y$r?;kWt za^HVBajUKpc3{CP&mrUBQ+BOn)_}~k28@;-PJeXn`PtN_P3NgJU8_h|9Dn2)n_gdO z!`Ty=`aUtayoj@^kdtqO;5O#(lIfqttZR$-N#1tedLL8ZLFu^Jyon=(R%*Wq%EZmz znihxUc&69jKM6bMzt0-^Cf(lQX&$^N-HvG`WR_a_v@Axy7 z4MJ>l79q9@ned0x5q$yQ_3UdYsQmFc*f28^Ic$+g}WhM}dEKzkSh zbdD}vjMRF{BydU&A?3FF(>3#bHfOCrg!xoP>BMqQ>m+4qndjGavC3YN@Xh~p@@fg} zgIdB8Ljxn{U9(Tm?x04E3Bv>U_(A<5ZMA3p-d`S-tBmZ^NXv%_ZiMsPRy|;3P8@zg zxZ9atZ6K36&!FWGu?+x%LK^2bX(vilX@Eg;4d z$yFVwCx^U5$By)QiK(2n>Cg;bb4F+0>^>pc&-fioc>86~4HRA1UD-!RcZ@c9FT>W1 z`r~uToZ&YQWJKr+afyj)``E`;p2&xkG$g-Ds0PFo-*O3%>`YBz z%5IH%`g}Us!tG!tE3ONIcP~b$&j~NjFF zz>=VkC}yn{+T1i5iOs#$hDmkej;fx?G{j%I4@n7S*gd{{ z1BD`i-FjuOGw*2*?H@|pou+&AAlRwLG+K-eK--FL%S5b5Vy@fa2Kpdf4c}H)ZcJUc zQ+mpL4hk(6DBa(;hTENnJlb*mxdeMP$}SVzWTQIs&8%ol@7=)jFpJNwKlUX7>`U5V zTiKBv#KljM71(0hbE{f+zH!VxAp#}fUIk)C#dp;mQJ=_n760vW+R16l_u)RJQ?|4z z;^J~>$HF%$4aOVxN^i1C+QQb6u#)GO`cj(c-O@GH&xF`VqmV2Ewb1#S>yYI%X}n%H zMwK|2yOBq#3y(j(Ie0ql7GHu)_6@eHctITF-RVn+;jj>;=r*xH4qMqaGMsN8r+_)JhbE2)=dybkXV&-4G0^M`pcJS7$*r!!jS2(VM zxRH?6G&LQJrE4u|z2`5b>>dHDc zRp^Vq+=eKdzC<%)Q8^FG zg8gA@xHHbDvWIeJ`R}{xQ$}F<>Di7`LV_j?yi1)5Z?yFf2UdPXu9>aK9tjCs8v+3c z4|fm9R67(WMlc@aRlF(vQk%`Kx0pW^(KTN7nE17q@*LmRL{ye+TtZy zi5y&UG#_tEiJ?9q&kM`aM5DN~qhH7@JG|KlZ;nw}oW5t`d=<`*QH8;+>;vrfz1s4{ z2$~QsUFf|+6U}X8%B%jYfv0El&PtV{)p-H)MwghdtFDQYyb8mL!|ukr+;l7KGBFTG zozRStYdy8j2KH4J6fMCh6`y%~h6Hr-G=?Uffq+iHIj2L!v(F|>HfQ=l3pne@ zKgX0mG_l_=x!{_SnJV>1Ic-1v54l_iX%(x_o2C2yJKp+IuKY;+hVJlJ)y+tWUQ~)u z-Nmz>3S-$3tQhU9wm>dRtto2v*hr!hZA&pW2Tjt8-v?8glEYrDow(nTDtX2>*0w`8 zn*CNo_946c_i1P9GPS-q$6muLp{j00Bhwm`8);lY*6ptPxjp; z7;!YaZmFn*3UlQYPZl_hE*pWZ9kqkbr5=;&o6Ad8&=jl?2rcf{mg&5Ne&<*SIr2$1 zS9bkGZ)oUc*wpH*8JUBe`MPO zoUkM7(pYl_8gr1!=3+grdWtH-dED1s;?b&!Xq<$8F|05v{cs)`GE8q1`%9FHK9I ziL-4U4<1gd8`>Xmwyc*EmEq#iKOJ8t;|;vm?B{W%K6p3r{fK4T@ZswvBbSD82eL3x z`nhi_Et|K)X0~KghHKmniRAWAvnkVpR?FoAMe+7R>IG0t8I-a)P@Ctdv>=X^yV;+c z{cPEo9Otz47R~2L$*#Pa@pQQEtK0?%4WCjaGHP_Ertu!RVjKZgXjF27uH)$@BkAIz zjTd#F@O8(YP!jd+f)l(KT&X?1Hh6&jrM^xRoY8 zVl_gy59zWZp987*idp?q(CElep-(vB7`zD6hOots`vWsds+-_rp+5BfT5zBlN5Qp4jUv3Hs`L00OhLB!{o zL@5rnnNXG$)yLuCnk6<{?17yYc1kEsfBb>(AcMSFIH*7)ms%GbHbrmU`EGlE_?-jB zm)AlYY#5M%!*bhuE}Kcqc)NR;B0lNNS0MZ|Hl1~iGTxz~j*im%r9Ig{i8XTx;~?3Z zV(DynrSDpvs@^UKFv@U&K;A3ObH3O`tvivm2DMx)3N_7T~zzZdq;3v+pRFw8iHUpSiY z$-FDT=^ET_u`u|!QTyG@>ZgtW=cqK6YVxocwD6tYbEZ>2g#GjHSLI=Xm~BY>r$2A6 zSAHDiF;V!J=Xlqqsr~-B4rr1BirT10zP|ZuQry)zH}_6y+uxh_&kMgk_-j%RrcC9j zfA}>Vc;X=1&3EVKVyEBlv--m?ocRdusROl9d7fVZmE<_a(kjn8d8d;6Wl_)f{`qg* zRPOW1jNQN7wzGOc!G9A_ZQZ@q+X7U!iih@J^}_N*tNwm!dZDK`1Tt5P%^w>0&rJj} z`rOk)IOy%D4nq{#j!OUX{Bu{-|Ia04@PZnuA`aBpM#?4ObuXyUV zcFWH?&hwu8y?ey9rJ^?j1BA(fj~~tIp!`a2Z|_uuzgUsR^|xth_ibI~1E!|7ukYNx z{^NN$)1)PFYS+v2WbBF*;Lq~i+AvmgB)P++kxPF^iJvS&p3V6y$eD<(LlaKjzDpq>d>d|qBJ>3|q775o27p7*@;rAuDJ>f@?$m&J2DY$ROgcN%uP{ z?o>ta8OH=*C{?HxRbiUl0YS{_LT@0|y@hMb!w?er;osTNf5ucWbw4W*5hANB1*orC zb$iVndMrOr^@oN>Y?CT1+NWj~`{ulTKP_I~v`sFBVD{BP6e(oK`;Wv^Pf@$i3XGY4 z{6HdBw>_L~hc^A$xAGjpWkp{FLyH-wo%s2u9nsxDWmq42;-If?YD-O|c@FxG$F_&a z>NrAWQTK4dSN3(ulrEGsBa~divB=;>GRO3~F&0yI0dFFshdyB7JG5av!YH#iL3Bcp z5QO1nH+DG8*E?NK-}8)w)3nA!NKJ+7-m6DqGm%@ap(IHd=94vPQ#F}UdbY!k#7#KP z@sz%KDRyNxGlh7#nQM>Vq`2SkncugJMgUeg26iqi#)qf>{axP&a(V`sv;#zZ_(?N|7b#vQ-G)R5>Ow;L0ZCxSS_g=p7_bYrZ(>$4_~5LcLa~e zX+!oC4c-!^-HgHx&y5b8SY2P}eri?OO33Nfwqikg3`I%}Di6Fr=j>6Kn>p$JBASkh zHL&T)!yp-WhE@Wl`JMZdB0I;y8A{*C6lU1=z?TRz1!)U|kp1r`|Dg|ynLZ)+ zlXL$*a|33e3x>GYAcGgji=*#dBTHrb8s3={H(|fqPARzyL#&OC%;yJjZy6hgB@BGP zdZ_xZe+_IB+A=9k>m9jL5QH)~pP)CF59p{hqLK#z!} zZrEjV6>3*KFdapgVa)#3b(`i=Q@&6{;eutctM4VFeG>i2Vi@?H3&S${l30z)%o6cZ z^KB2SWJaHe1CC?a$62+B?v1q9jAgU@#WS+26Kxx9yYd{aJ0hth*f!oE|HjFFK)8M! z2$kZ>l$=hmye!p|pFBPG)(T5WVXSpLIl*~l&Csgxkyk79BN3f7V5tXL@}7MNO&+;{ z>YVna;ssg?f#8$Z9B`vjgOp;4Xh*5GLD$p;cE_}}R%-<)yXdaqCBmW$2k zubDA&_&CvnC>uN`9It`wg3d9K9laJtwTG`^PNm5C;L`$3wxk;#AeQr)$wzVgA7!YN zKP^Mxnfplcoz(JqGci#4=v!plGXCy5{@g_;3@Y?ldL17so_&vA5{RH;%jr~;lK=hC z9p~}CYJ2+yTcO(8?Y1+rP-+Q|8S}vp#zo5}fesRm1H}iWG{SxgRX?1HLGZ4!^)Zov zefveeEsPHON{Ls$Kqqf?SbVNxivD9Ktqu6QT+Mf$ieFQ#ep4`fwe_@GL~ zb?U`@?#s86TboQm@L@{C#g?jS!^$?7vb(d4i~78@5LBnYyvwdTYom%$okQB`ZCM%8 z!^UQ@{BhvioSn9IhEo?I_}bmOzP=!BpR(&NE%Nr9!Dly*yX>}ZT3lY<9E>#e^<4Y3 zpPFj#$vriTH1+MpVx^qM8+wf#z@24afVIASG`^d4d?E}_*Pk!}2X2Y|F?h&bkcNY7 zUr;~G+jUB7bA5FXa-?ubaIi_C(~0=9=gkw%@#J3$ zr|%L2NXVj0NaJ7%?RNGi!AQ)k*ViD$<|48>TzM%NtwO0dbs%j=^m$9PyNP+4a zkji&G{5>Lrzw{n`eT;jHI6nqAynjLpVWfa-$aY710%T>d?X*Dn9T8h_H&ACOnA&Zq2)KR@`piGFzU zuh^3y!?55hD$?;&0Wti)LtZb_Y)q-PR2v-qSL{iTw_N}9Piy%hpD572`rjchUhUS> zI%?AVc=BJdrw=zDWou9~mW!Zx`WH;{eWFh|H8VC-00R0SVDL|i`WuscL1UUgO+c2n z{}q!|QJ|dt8{_?_f_#1owC2AHDEH*i(reUe->=kqrPAuR$O3Uh2RckB}4i5eo2=p@yi_UpJ2qPt|d?J#Qrp>4PGw{d%i9wP)zi z_QMB#fP0*n7-t!u08>wrR@l`nz0fez(9pkxr;8jA zH`zv#&X2jb(tM(Z`T=BKRaI3xEDZP(2_rA8St}H{Y7n6m5BJ=l`T)ZpK#WgCGAfuA z*VNqDIgbjaVjzF+65F%sHa7x1U|MZ$Z&Va;HHvo@^B% zml47U^-mMpBG6mq>+4`S8TVRjXnxY3A4(249I_XdSYGNLG%l6)AsBjAVXfZ35G;b6 zYQzYMzz`s?s*#fys3fU8A8q&e2ly+B7y(CHCNAdoJv^W2QrWJ^kET+q=V>~Q9Ao1s ze%>64q!2p3y_;MPIrrME3mWWfO*IEp1_>hRrcsQM+BI0wH=0GNfQ;ClePwHBk(HQ1F(5OT#pVC`aYTBE6V%t``j&{)6Q3P^0UOTgM-|%Z+!Ar=NymFEXbASksfTy3apuc3HJNAGG=jFd!v z|5{GSKW%qMoi6P-X9Rn|v|Y`@YHQo*N=A6Q!xY^WHtSW(Y6E}V-%5>|*r4rnE`)JPFE%^H8LH1x{0 z*MVSHLQgJ>n~N`}5iMctpGw!JhS2MiXur{|h4|pccgw#TuHV|WgN1IiI@f1|(lp9G zpUX)p3ETPfz433C0zvA*VmJ=V&4FhK?8=I(P(Wp*h~)oTE*hKwbyQap7N7~gsaVd*3K?LezJBtEh*g%AE`jaVdSI{NA{pZFvh64WO(RO zr--21zJ(f-P`e}94XC81at7$RzMSrQ-E`OuhuO-_5Tzg`N3YRQ?cK~_FWPGc1Y<>|Woe#{ zbH_=?vR`2(56!{hiZTZMdE%6m_&9ofm?~}W_$D1~UGN*C z?Ckja{R{|?+rRoZ8y>LL?~|}y`k{}n=)Beq536kFj&P~qF%U+0)6&{mAJYAqHd=cTx-&Z9@O6*aAmqDHgzrnQ)d zAB!2yla%0`$5VAXy@}uACBpG+@$S8?1hXyqW7bLnB%iI|4WDo)@xIq2{6U40Nq4jJ z7%{z1NYZ9^k(b>YQE5pJkBNjny4ujVfi0s{E+y3cWgL=lKt1m37&lS+xSrYUXHo?0 zP|*+z#ew&BL&XEdh1~(}k2;M_l1Av$zcopP-Z(YA&|U7jS4#F^*@3U{vL&Yr2a&{V zH$_8IP51@wJ#(`mVD)lbCjDhqvYc01?;O8W|0VbEuNK843JT&wN~mUD?W+E-8p6>C zh{~cGEP|muU_kQdy^yUZ3^|wiX@3RbocSVy60w)_>RFSpH{_-J27T5ib4HblDE{Rx zt~Dki{thU+E&@hwQ|8%%OG&^0j?xuF38~DvukOj<{N|9b6hxs+ zRg3J%imJ;PAu;ht_l(m*lGevm`Oik-DK$BX*ucvshbHz4+{5YR$mNi93i)Pg-VR9P zF^*?Y1MvI=wHcx2K;wH!Be?Q26(XD=n5Fi}vz@^cH$o#?`^XBn)`sMk%NE85;PjQZ zh+l_e#~ki(;P~$?jjz!Y5ctLt(uT}?*(H6rXoi%0@vktyCBV31dMCwS)B<$JW#CIFXu#myc@EjCmmY0uQc)3;S)DL<8?kf(-B_f6=KNRq zs6)A~HG@3{!ta-Yi7i6#)MAFKCou$y8DvbK#2q|Q5>jp4zP2V;ZsRgJx`sc-`(Vn6 z@2N)@R8}t99C@0)UmX_Vb;8->SfHL;eXg%jE=iG5!#OFjRX6Fc8=2+6+?YB=xgt(D z=CUR0GIf<4r61os98XL8)Oy_iV^9)yLYC%X-wN%sDzD+=8>+wA*B)SBdP&$y%i2)ZMJD7;xdNd znNs*1--cw_pC5$cz1VfS9J2P|DhDp)&1C;P>8@9d>{+#ak({{dJfkpxE}cBYkY^-F zD_hwLHLrcZ6DKIS@}(fv1Uk32uXYTc)Geq!60O9u)XQkc64}l8WI{VdNx|)m1jW-? zy$bSD!#N__B};Z=L{VsfVb5HPkjT=JpfLdWrNuK=Epd5G$>D{x*@LmIPG25f#x%N> z+iJ&jXVcy@^H&00+WspJ#i z@vl|2W@oij9;F94Xh*2k1>T6()aY_4`bVx7LD10s(_aVSy5(H~ zaL&Wl5A?gBmvKJt>>O)E!r&FkM!W99*x%`~)`pI|Je0xhAHdxaSt~pKy8E}BL=f~! zKtUJ4Q8pGO6%L$#uPLa4n@bBkFb=PXRhkxW*!drHk5tDfpxhi^>q)8~>kxh_tGxgr zJKiDwhSRGQq@LE`GKjq*R>OBZY4)>N{;tWXaClRv0Vy*)hyr314kX>VtO~4GUD}E?1S4& zR7qt2qdilh0OfZKu0ck2S*AvwsGevRV7c%l5w%VTaZv8QS&wutMsIx$9O!85oyU!Z zbBtKd3MishEyCTrf~iVl|VDYilK3WYkv!qu+%$ zBYUq4(srza4MVoTLy==irCsN_+#K`1= zQUeBhABr%yn48;5+{-}R>Qv??KhD#uXLG)|5|)JRPTD7wwWSK2!iQ!BJKD-XPb=LK zt*g$BpvQh+;EN;6*P0RPmEonec@R<&=q4KzPgfMph~{>;F+Y}#nA{4-Yz78rhO^i0 z#c%r5O9_m$QnaRcL7bsL>Mh0tx1Q{4++Ei1d3von(b^QN`nveHvT3J3C8HI3GMKZG z6R|Nj536vC86X^3DC)?L5+GXZ#&y0f&eZ!zJUpz^@9q0-p}Vi#R2?~#))aPqwKNMF zPPdeZ^+0^0xjy?D9K|~J-Vx>6MIa@m^E-yK3>JMLImSGV)HF)=yDoLM6FF2S`;1}W zJn|W95DkhcG~{8G&V147XPdWLp+>Oc2s|d(apTmXlaa@9`|u<0au4|`U$s1x-(RYH zjqgpp9Zndfnd8*aFkfVK2;Vvz%r^3r9@wHguac(nUq_{G6HQKY%;FmxMLL6>Ml8xf0zBhyg9-{fjdcU6;RP`aLfm(hBNz z&^2NCRO))tNc2QJH=qR|>&j#3Qz9}3(swu(_=vYaPDv>(mFJmQ+{uwYlHMAdl`*D8+_$vxJm}XgdJmO+>GkHVk z4G+hMAbr0l@F(pA4IcV~6+%_f)n7(lAvTx)?gij{=J0T+wL3(_;VYWvBV}p6p$#*0 zmo*#f#w)At()iRVAbGQN3@8LxO62ykB{6iqOAgWzFO_e-VryNisnAs6z8W5Ph!Y{h zu>gWHkT1FP$;0^$1DTR7w9P(}mWIjqBWM3GcuQpvqrfU?9(*q}R`iTdZO8;jB3Uy% zo^z-MiZN&^;E*i=fLGt^9`ZeAwF;9BoQfsD!kC&YfYg@P)B*P*6 z0@^Yu;)w%eSfOjcS;o*XRfFzj?}>yszZd66xu!^0Hi=E9k241MT|KfiZ{w0V%p+60 zfPT>qqE)|^n9WN*PqkmOl&`rUQsG;n5lYRWsSplKm9|w_m|(^|%H|UWy|EK>ucn=# zPj8E|x4G8Tgo_+bu7+Ze!hrzAK@ z@1T)>gxam3*v`!OReswz!cOC$0xF9A)mdsN)^_QQKl>Zt0N^AZ&|e8E`0;(ozpn#q z>-KCg2Y=gNU`N|g1l05Bt7+H%VO9S@VYio_V2Cbmt zJ=Ss3zfe{8h$l6uJ^47JL6JK_iQ9)4Q!;wSjR|KG6TLR_t=4pFtsC%BjXh=>oYW?) zXJSU^%P;d734FMpevwQ*e3%YUF6S@E`a*5%K47JoN5QO`jmu;KqXAm4LO}->V4eW3 zIxsM>MVYS`QIB7yCf2{??+k5~uIHjqaBy)Ry1%E}6bWCQU(B}y^oB@7^I&8C9$(m< z;7?}d!PgSDThS!Q6^o1Eg8HRs*cs&J`fN+f=P1#QjSU|kpAzao?^_fqJ3AY2Y~T@q z+*)5<7&OAb#_Gbk-=_c+m(4r@3z)F$irdr;KwJUW1mIC~z=VPMfZ@URgAV|OJ^!OM z|M=o0Li;MI91NR>HUhW&v%yUfhDc#iIV}R0f^Vz=o=3lIYZEYG5|P5j9PI2RPQ%w^ zktgDtz)=4OStV}dfc2o|??27NIBY{C07M!L*$%kMnCR%}H;ex>DfM-90HFXh zy1A+0QkOT@>}{?U(B$^`Em_)ro}Z;h&y!|$P%fWx$)M6I z1YA@EXvr5EAU9BFh*I_PiZ(Y8(gXjlH<+|7<|{a@p5wmtEG?I(|Vx~%*CXwg3` zY+Jz0NB;}C=7*5P2%S6U@$)_3KvtPRHU9O`LtCh@T)$wU-Y8}4N~ z23iM7obH-?uLEzi)Y}WV?@r9U2b1D4r!;=NMd-WKfy(TBNj-7?(q#`&AZ;yU(`rE} zw_9X~5o(1}VTAnr!J!3lCqM{AU4vmicE5c}g_r^({@tGoT>*V?5DOTVN+$F6E~rrG zxced|2A(>wFSF@5Q;Di#+w?1zt)iT2qp%(TD+Mvey+>22o<-R{C8K~6^M6dOBL#F& zY};B9qb$;bkBCfZR`pHvMhur%8ouX+oMTZ{xo^KF zs%*LG=I^Q8P{>aHx!SrJDizn$vs_s{VbTYGCnx8XMc#nW=^z_;a4Su$tF0oESO@o+ zh%CR>%?-%eA|Y~WOl&8fO15)RXgi4;XZ&Jw)6;>*q98oZ#v=1|g%n|cG zVdLXGI5%MA%<1onE-fo7NGs+q&!|ycz>&6Ad$viWiR``Gt&a8T4|qHX98f2``I6PWWs-H*VRoVEwpgN} z7nMlGQ?Z@I5)vofadrX=tEFYovmw}d-hMv|$Bpjlf~{%ksQS>j$#aj>50|)|l#9mu zPLzok$Sjp31YD9<0Zdk}G()PN*dI#tAqED=nDimfMsi=l;1hUgp0+}LVQ(koCk9a9 zt#$1u?9U|1bR#A^p-^>u1b*_f4OmHY=2a&oo{?QjmhH62)GYT_sj!dEY)0AgvvJoH zjJt!E`KYTS7kk451yg4y^GbBkXfUBW4q>icQ-nLU>OEMr6*VlV*-6D8yxY-ag;wZ& zdi(oN?OM8eMWabV*11(Y&#`M+paS-C5q4jfhgJ>szG?v%Sqs4>dRC6Y+CImAdtynACUsddGq zd%3=CrVmP?_Qpo8{(JA+=uDi%JLJ6y9ipe{BMkGGqH!`GninsAXn`6 zKFw|21+8i}{oVZOcFcKPi6Tclt(K1Km>b_5-B@kiA@NA0rP-`eV4I(FP@8rZSVpXg zT#M*S(wTvLdmNA#Zvm?{j{a3xzZ>?cw&~y2v56S01;Fz{x8cJWB~%hRZg;kWV1$-5 zWb<2m(EhzwH4Mq_L!Xh3>z$*All8yS`RW=mNWjzWCf*B(%aX@)zIgl2%CBZwX}&T- zA5lOt(;$BjXy2Le7=uRfNIb=RP&qUJ4fQ-Cw^mLO*rzyOg;VqjEg_rpY0|yKj^;ZA z)qE-~-1;V;RVYqG`)LQ5V0B~%8@n(vtEldfMZFNoRbs*ZMCHlh*nqX5|1axjK&H@Iq#D@IWTI-CTz`$hp~R50njFy4r|xNnA9Mh(t|`mWZS(*NT~ z()`-Y%=he6@oxCHs9OJZ$b3L2`8VJ|OZxfH>tZP=h3B>7EE4WJ3s zB<}SJ=u`zqw?Ln6#r5mgA3g*ha29ZkiWUAPFTS_#!~j4SdX0nuit*3n`xFK`vzM{1 z9@k`Cwh6F~PEuK!^o2P90kGCogN~@e=gp>HC%|J6R0|$eKbI#G;22v~PR@3IE`7my zi9$lYjuXy~FzX^r-`YWujL?ALs%IP>1>Mhhs`Fdk%U@js`i58Q!UX{+LcZpJP(Qcb z;@S|Kgx*F<{TVJ~Ci2gz05>HF$b9Z|0Q#jYZy zxIsvs1L=iYrwTP4U^51%Vu}N!sCQa#6OQ57-+0LFr)}ev+vmamKh=Br-|P2L;i-Y6 z9jkc>3r5366aG9s1IACtWmSiK-z`jBJ#wjRULja?amJqhf!XzmuR?Ko8Xp!yh{>Vp zkcCmK-6!&dW4vJg+{g>?LM|pd<`4HBaP#hioge#@xpcE?+g?gE`Q%cnP>4jvO^>#Z zx95&4(vzkq*GHz?c*IO%kK+d@i)*ZQx4L|%Y)vQHGx_4)qTO7rNtQ+dYa)%oJ_Q*i zO`3rP=kg(*Onj8gOZ;~3w~yG%#a)IPU1z6~`pAPfbHLkXI_CoC=0`v0U2R!YbX*zj z>_uZiQGDAlMb^_@^};hq;ADYYbEIa5vU+I@TJy9D*Wdudr%xn;tc0v@&IDl@oX*O% zOMJ7ztXScZCOr9 zisuaEDyi~~LbHW*-gb;bWXl*V%nYYV`OUofJ>!CdenN;g`q*>by=iU}S;Fv(7S5%X z0LmMYhc36Ye~Dhgck4&KezIVzms>a|(2NmMJ8h!V0$xtM_lu)k6kUwohr1t#aXD>q zRg(qdnnBIlg_@}+(5Z#d`v;PPaZ(T7FM(HzaN`#8rgcg-mHFZ)LKW#&XI6PU><6n( z>Tg6=FGFb_SRZB$n`^TepN*JIy&@B}dM7$>Dn8 zZtjJ4uAZ6x<6Vv1yeuyY`>^Qb{Ql)GA>#_sAY~PJKOBdMJGZ3Q(ovivtxOIu|a=&UBqOrw)O7LE-p+#NN2eYW<~HA#hoM$Cc~x3%l@gS z+UNqCHBR>*8_1Kj!LPkH(z`!I*mx)S4m&&28YHS;1kcy4##iI!Mwmsf?~fM=&H^u_ zeb>X9)-J4>NM%39iaaVT3+8m%-?=e+@!jni&tyJ>_CDv! zYmI9Wgqr#jeJW&Hp)b+k&2z{p&>2FTS>?u3{`R9mMV;oU{YWgs2hTO9`!c-{@FF`^ z{$&(1JA})-c4%q5;zDQE(_8b0swS?sSL1NjHP<^MBKFs390X@7b7X0{NXLactsUMi z zWMX$$IeEJe5$FOh`vyjqGe%?>KAmxty&zr!X|8X)>WXhx_JldpjYO|5H4G2(3#dj; zm+|KpUsqOE#{VDez4upBUAqNpM@7VfAR?fGAVhlaf&x;c_fV87y@p-{M5U_`dO%9( zp+g`LkS?LOAcWA78hYsEZr<;FcieO2jPv~g_ZP=-jO}t~?WfK;pS4Iy_Cp$pgzZ1? z=lcACQ$a;V#2r(Sd4eBD8V`}4uQPB{ntvr1f86OgC6)<_=RhLS7kb_^nM}%Xy~@j{ zQArY~ZmtZH;+oelMOS-H*~XpoU3YS(kfprd=ru$_zDljc{(S9?Hu#o}IEAwN^Gx*Y zj7v?P7_o0KP&ufVuBy{B0itr zcb1q3zCMRN7R_+4$Rd|PJ_qGqNhA--yB>`tdB$;7Wc{7jt!_rP(P!MBn1j=QzjAC= z6m?7~m_qleMH%)_n*0`7&q?!F-Gs;28o)hzt-yH8v(@X&y6uRPb_`n+L^b2 z^WM}?ymy)-0%u1>M}@*E1^$u%Oq^J(uK3}8t%#1FsFFNxpy%gX`*H4K?A?pT7tITb zDIb(FiGC|!&tC|-#=selGnve-N)FcwtTpZ_3cTI=LHg0BxO*A8@8)@5nUbwH(2#x; zM1;At4o|tqIB{!4vmbk|ixFLNci-;iBvymJZ-#EM~uKugyC^nfrHCb76&cO+BeY_ zHXTZ~NEU^l_B7(%u}|I<^(z=D{3w+S+ZUCe-s>fu5xHuCf?WLp92XnzemhPKN|f$< zHt<%F+a{=&X{wU)EZ4xiOT@Ejix-#<`+lqkGrT>J(i-G%daWn+eh;k@-N<;@8sl> zq7|JJCel=tkP;m<^Ost%9XH3?Mc3zR(kA+B1RP~vrsI@|R^!7g(1V8T3(sM}0z<&GlML1j8u%9WY33c8&1oh3rAiS`48pw z+r-Q`d3lyY%69c`LiXpe1lU8Hj=9npl`NjUAlcr&)fWt7ZCAhU-2NZG_4Kb}CFgnx zFM8ZBWN+q}o_ipTyc2G=VpC4rdMG9@{;K`;xg+q1h|C+cbZK3Ph$atJh&bzhDCp^Y z7~8M#LKG}lJd~Jvq>oi%@}g_8HUIdamKuFO8s}@>ivU>&ml|J3(q`EUA?okLzIYvW z6prfg4RO=Z+|bvf`3P7PDA7@|qX!cmMuuFD785x9~R^>As$0y4dp!1%Xo%n&Da+RzLcE zv3_+`@#hvr`U_{6p$P~N@B_A5Lj;(XgbEvG_Pra%fQOrQ2fEvB?duMMprF<5y&A@~ zH#YW3LqA<--)FJL{h)_x{pQ4;(24OVVI|xLM%QJyqFC%cUUN6XP&1Yqe(vm*OOhIw zWhMsbYW*f{RhMQt;G9a&-v211dB=9aZ(o}AiVf2qkzBiB7S&ol>|TE<=k_}v)S#ag zuzHb~8#j+`L1=-{kHl&qZ}+k;lm8?_~WR+lvfZY=<}g0n&8`kJ2mwGUbMiCC|jK_W z;J_qS53aras`m7v3J(f`AFKCVFM)1beKcgTeYi!>Pj_=MAtPR7tP%4aIKz{`+pg~0 z*6SSA%$J_F)K=euNP?=dn$6lF#;fUMkOft-B~C%Jw)P(8C3E%E3z|ru%cjJO`KAzy zlStqB;VJiL6YoEk1(P}aCk zG)fXiy}7FVs@x7ozG3x3#qK6=;s5H);48EgMtzt5-kElHl;(ve4g!zSwi0e!4 zKkp>)FaLY&{V%ikw}0_rK4zZs3@<-@{=UfhG~5vF_b%7j{S2tqhKX5LJ5H9l(CgWb z0l|H=5}agYiS}sKQxNh=yLdb&gPud#BT{`suvQCLwe-!9SIn?KPVY8q;2H&^A*&+G z>77zj%TBZC`Sz%HUYBX@pd{BXBT^pPjBZ#c?2no{5*`VcC}t!<`!?S{sawmw>qGG{ z$?bopjnh}}`f;w;R&+?jOb44oWdmmlfwmaphOM7dm9m(PXVb%+cHEHhGm$}_4u{); zHS)80UDhxYhg(&wN+{Wger#PQy#)}SuAJDop~QxKtjXfTJqzh$ZGfOxhha%thH|i& zDaXVh^_1p*D;9kAV;r{5bg;MeVc10=X9l(w$R*zXo}{AfW&#FKb6*17b{FlpP zY;wA1qf6!)12+&_O~JEFtdXY2f$Z!Pzl4Q-)4K}GmemwI{)4uuF6 zE(A-l3BFy;P*2&hn^U$hkICJv`n?|5<)t0q(XlqdITmdtwZsYV(CXfG zMEldyeaWj&=fsP&J|r2axc-Dh(y`5meJkVhus-V1S6w2iM|Rn;>2O-R`ZmcHfW#lL ziL#tD|3ZJKA?c(YGKWH{C*73O!0CqpbSxYwg6l{zr2Y2sSSF=ouSlH7eGys@Rr_c!(=0jz&UJ zDkHtB6JO`CV!kV7d8NL57Y$l}zSe;7#;ajx)qDir?@!BvQ#6*GtAhMoJq~(=-bcf8 z1j6hzF%o*t!xGKQo%e)hThzwX7DpC~?RMUmzCBPO2^SH%2ljigme5P+l9I_8r6sHO z5Q$Z*NO8JCbFiettkCW&1?y6TCQ1@@bf<)87>086RTrhO?d2vy8`zH_%|Yk2;LHlD@t4~x2#3b#r<=K7 zRo~t1OG0T_EsGHF`$-g`SJh3XcZ=yZ#LozrKP!PyR=a;SJi?9$+xR24N|=>{Z`?C* z&?*^F%%RS|H~6^FvF`9L`j48nB(Y)uss(! z1mVR8*v36B%R%|(W>l;=NoQ57T9p*@92xABQMo3okp~klayNU9PNP5%E8C12(Mm5a zaTvVmJoC+qEMC+|US^qZ(QUaY=eWl!1Ljgt^rx_ETOvI9LM)ru%^^|+7ZC$8Ckt&> z&TMK4;z|{MBuq`)Wj8tQ>&9DT(0(G7Ak(7s83wki1?1Sz8TJe^PrpS!tr#`Hu!1Tw z`*LOi2Q(XGOMm2kQ;-j_cqmi_S6ZnVtS+R-GbLiL<=d6*K69G47T%wa?zOGNRLz5LcN<&79{03%L)`+!z2s1a=R~H`IrqrEV{PRy>1lTZ z$>T=bS7nf{Y5zx*RsnTy!kD4{@zF-YQm4#g`@3dxVXDl zJS>daNBA6B0;xFnShR0~aj}M=5hY;=6w3G z->w$YhGuX1OXT-RQl5?V+rVie zxhE{vpE;g?y!<4C+$ZZ%gW>BVDT&Mu!3eI(!aMjYvVy)9231UaPjV>~uPPo2HA?(l z@A&V$0g^+AoihD8uXuU1K|YN@7QI~XU`Im)&dU-3=DGLgqi2h!q1;Z`ono&NmPE#%}{6|bdmGQu@OGFmg$AuKCe4(4Q9>BjCQYxc?$i18u9}oR4qEu~v5An^* zRlv@STVMz+n?we}_w+01XVG7IYK+#xprE(uJ%pTil5U_b`D?@8HW?miAA}ALMoFMou&~R`1Wh4d3Ov6Z7{e+mD(KBLY!)3A@w~KW|4CM~Saprh z{PG~xrdsRIWmE4fiLE!HB&R#osT3Uy3Ju5?i>lO;RJOCg@3@!fM_(f$;wz2&OhXN= zuF{P|aiyB90?4W%AaAsrU^1;}Px>e67g}>*DTw*=2zdnhIK-*&f}uIIRl9%aZM5ElOgVGFg4>N{VWHDDokMs+CDS#|?cw z@zRq1SzC@ipV|3s<9}5IRXVv6``W^pGV(a#_dI-V<7* zf>-e}z5`;}`0JQvB_NU2qwu#$qMgvLTdz@3S#VO!zKiwG%k~dNfw$BV`Etj~NKUNwT4rhP#Nj%Xi1@yUoDH#dGreRuKi(&()|5Q1BIXTJ?VcKe2qNiUi&s2e)NieKC=L+9O!Hcww2I%>6HIM&`L*wqCX~ z|F5f$I9-7#Y$CvDUeaa#_b)$n7s15;vq`Dl|5_H~B!7>8TEvbr8UqEd3*dJaQ=AN6 zDo;8fqEm{Z4>J($%_F`$V*3Ttm5xZkx;|kKg~nbzHoTsd%UDD2qCnaFhm+om>)Q_& zY4Yu+LydD#Lp#S`qDDYQiY{d;I~MM479Olioi{<+?rU3u3409NeYOzt#ZdKlFfn#3 z!%HQx+oBr|y-owgr(4FCl<+8YHvPCUy!6yo8b+U5e@0aPDFT3QK9H$WDtrY-2)=wc z<>@>wWK+PQ4`h4oF&_vpPE27f@heZGcCp6w^U`IWd_VmXsDbCw!VM zChNg8VIl=KeHXa{ygVrgwx|g&C@3>}_v+HSjjsI`z1?a8!B;;~6O(K(CJqFM_R8e7 zz0_pGd67>Z+!UAkJB+o+12c<*fr+J*c=0?%Oo*{b(4W(09$tX>^ms(4bHS}A-reQk zy5rn}MJ(^_m%hx1q*@6daIFrEe{(6@OKCPlO=g=_A-M4>WZuy3=##v~27F|{LADrw z@JX5eq)%-;Z6VMr);MZycQRV7%8SIX`!iwC&9FCk?e^cc#c2o_O`$j)N3b2_%8rN6 zv{siq3$&y}K#M#bOcd#yl~NklOBNT<)ff2^kQ+#xG6NyD*e4O=ooX^bv(%u3AmgSaqz~_Y6GI0%d?~l^sR8{tNKsg^Nw~AbPokQSOLO>|_A8WZC){#a3C^QF zYi0Si{t|k|HFx{Ta$wDSwmBXuc`}aeMH?VkIhCMd8a}1Ks>HOJlgSsKuGaL$FKle9 z?pQYCbXFV+FL{jSAQh)bzr?yP`cmdjQp4Igczy1!rcbQ+?qut-=x??BOsFZR3-3C` zF66|nuzu`2_!M+u3%?&cb^oQ3-z&c~$w;S0$zH_pS32xs`ge(`_#CHF=Nk3+?OYw5 z;e`X43OZLghE*36lPvHMoLz zpZiej%iFol`TLc#f^XgRvVx#!uOtr->l!Xu*Y)0BVb2m^r0o8{o~*2CNn<$_yU7d$o~u2OQ?(+Z#V z9B^(p^F9URbksA~R8(zmv}?G`PVSHmG$GgHsU+CVnZ?W~?=|(MRo_j{J#MC?=V>*S z1KeZRUJ83SmhdqPlu`kJmzQ6j_n?+*>PqaxL5hX8zCS>RpLg0@+u-UN(V8IBXv}l* zL0J)EjNIWXm%P6CVfoD;XmCn(Ml_^4^`$e1G%+2AhwICm*%y3r8^#GNdG0x6@iXik zY0ti@0A1h%`4dBxaNn%1lxk)X@CbXR+(u<10pUsst{3!Lm%iuOb$+UOht|(s6|Ns1 z-rhS5s6gOvDM(C741Yzp5C$BR)rkO7SGJXv4e9$M;_xfE?yv`TF~HjniRc7DNAPkq&#a1ccI=cm||Sg;tLFE zR)vCV4Ky;4uc9xi4O2Q%lLW09o^Qh@&B}fZuX0Z>X>j{lg6Zu=%$U8STz*;=Q}U+< zQZL12KsLlF-*3ePlsM4rfx6P;0Wj10-Cl&_)SU|)V{)pNoRsJqVs9jd>q)Fw>^1xQ(+wvtxbRtj);QvfEVLOUAr zKrDFdhD<%g>^P4z7&~rtLor4quRrJg`sx?7sXjN{+ySw`*HAx1SAg?U!(1AVXtJmk z{f*fDd64;dFkF_C#SKRDy-<<8CEWe-tW8|B2Z_Uyh4_Q2pv?Zg5Rp>Kl9MfPu&f^ z-%I$?FYsWHTgLCotWTRI1MWUq^G1*9&fy4!;Ra}v(1&katJ&9X#>kq#&HXpIGiAng zh1gF&o1RM%!-l-0vftly;|6eRRIg!gqPg;6U?Kstpc53%Sqkjf{Yc6FYIo9b4m{b= zr^w$5EzSDUc28R0p*4|Fo1o64`Qi8G{EB{)o z!tq<|`iJk-Ne{8@pmf^+^x-*BRcPI#A!VVbx>1**-|P1V?|4Qkxb~puPclQ)?mnVR zqHTN7QA;1Zoz~Nz^6q7c^g7x|u*Gxs=nqaJf{$EpD38C_XumN@rbjq-EAaUHaqhRZ+cE6_-#BS$o}Mz@vp zXI}|W>a_c$YA?F3*c3Fbf|ncVND@uZOS`HDiU?X(hb*#zBVzBDW%YzhFh-n{9~el|vw$0i6I zyCu15r$`xhrjNe0yN7KE02EnO(`>sD#Vo>Hol8$p)D5bfEV6DZm4u-+Y>o@bn5W6#XFo5PDV8a@OS zJ55b(H$vAOEl_bEv<+WU?es)RrYHcwN^UBbci}nb*IynMRf^>-S`VZRmU1O&1SDa| zo=I_ga}58>>9p`G_ya+JyQ)>;%C!FNVff#X{(oZCstQm8LL4IbNh{Qb6QL~y{bTst zQA{S(;P=*r64NWp!OAKVn#^y8joG}-eTs%k<;DNaY5{kD@)q9`EAJS5S4{o3%5Et% zNzJvk1~M=Bxf43|u|xtdyaN2he|W8p2N1nPQV$UwhG7(l^je{2!ol%@FR|}^P9WM#^8<|?AplC3co9Pf;DOX9DMo3t=pK3 z*!-ou&j%4UmY;kYl}`&VeeN_RC7R(Do7aZSUF^%rx}5a{bT(DvZO84!P!ADkBprB& z3ctN|vG%YExr#u+5J7TwzZ+X@Pw6n?!}kCgcFNUx9cI5jP+$L1C29@IE20*au-!F0 zFXg1QHEL(Vl~R|^Tv}F;{?4nSPjrnVe8S&%pIcFRd_0tmYdiJ|IbNYVb?lgWQ;0iM5bIY{^+8yTA^QPTH)6^@9^#6&g*g}2@4 z3bXUF%hCk^cTc2ZZJD{oBQYHgLoA7+?r=QZd{>_d29xXj_}g~6kI+~?H6v6=8h?1W zpQ_#e=-YbRifTFrb1w$jSRMsb$O~Z)AH(8VwG}SXt7kIguDoM=4>B_=jJ7(E0O`kpNQRqQCnZuKqNy_YJ9y)c@6-#SV56+Ad{Ccp7QW@_RRofxY} za@x8({A&o^d#op?Syr?W$xlU%&ndoXz4kVPh%jy5Cm57YHg97a_02b;oMdy!^5EFd zSQJ!fwOTg{8L*c*XOXA(Yk}+AH7!2M_th(3&|3o`JFhIt)e3gxfjxMRb4k%EoXd|+ z&T4@_*48>vUgK*kLMzeZTR4)Qm)jBDgVeuJ{gtTKx`2j#T!_nqo!^tNtqP%q3M+WqreTRbXBuurwa4N_U(an*{&cuuNX%EG z3<^7KFS_290$~ASORTKx#p3nlhlf}@kKUGopr+%(dHpPwVgO26s_h3ptK)eD+@o_9 zpc$UWKIGH@PS%Kts101>9ft3d!P(i%uWT_^%?k9ypMPURsUaMdVuasy)MDX5y>Tr> zi{>oj$l5Aln0s*ZwEXSU9$1}E4sKHS*=Q!3)#hXIuAXGhDEgvvVpj{K9ijWsG?-i26yeeupVF!~e|+uN!eBzI z`dEi?m5eE8TgCn<67Y_STp(Of*|4Ep`aF1_9gk7X{DJvhYu0ShNv9b#`p`$hjVbLc*5<{8!Kp@u?8)c;C5QgJlHpvY8WFBiCP}#5B zmIYkBfxbJ;t%ITwj_XBKRSXf(zfo>^I{0fqQn*iz#|S;hT3!WZW%~6==v&Gv1e(Z- z`|4w^bS+LvwYw+iE^Q_O{Ngv->0ZRrO`L*oJPTi7#nxtL*yy1gOL~6QCznTE?8^2a z@wK~aZzqh8X5O5M29Bkj+HHot@MxGsrlN#!2UMr$nLKEC9A5o-vSgGh&sPkKRa43V z6gdkp5o+hwrgN=4uk1N^G~Q&ym~XSv$W}AwkPF_Xc~NBxG@gm zk`c32v%74P`nknxG#f{Tr8mBILDELt8tRm^^qqUotzSsqJbcG5Y4Z^B6#_u%* zB{K}cWvX{ZU%B9N49OyIqks80_IT-+qzeRS=NN1B@`?AUZa+Yu2=@}c3ZWCn#HQtDHy>A^U_&;2Lm1lU0$KNtYTGrE7rSHsghD zt!7s%9t^6ME(2XW@49Ok%(Z9@C`kX+HYm>8HyT$=3e0R{P z|9`6|&IKP!-VvBa#xg@Se{&{ZblpNOiV1-FlCet0ZfD!mc}%D&(U}_tJ1^FM126c2!55WIE@$$6T(5X9?TR`48O%8vO9B z&Z`39PXY1K-O;g7&)!o~2nRV0L0tA(8)NG2dO*xC*_=hZWSK<$i?M0Eo_q5;Sto_2 zq1UV~k@2-__ir?~&N5E}(0!4DI{vHzI#-ir-;FYt`uP)mC9+&?niX+D)SwW=oD1^; zO%U@Rp0#CDi7(gOAIa4|ny4)PTBl}J)AO>w(iT*Qwe@l<7fg+&6}!rABfJ+^GU@en zig-KE4t~zj^l6^mL3hLaPisX+*E2bSx7mQtsw7P2B;B?HBB23tXqdg*6cIhzW^eFZ zBcV5B=(KLED)au@H8VFv4usvNUxtmgh8!r!KZWOp)etL4TW9R1Rr4p=h!oiHN$^k( z#OqX~BF9k_5#>s5o%(^#19ay-fR#U*Ua4B?orYT$PS{{DUC;SaJ{#~EF>WlSdcnj| z;b5~I2o1gX-IO!aqP-E&zz`C@JDgoIox5QF?35e%UwDqI|0LDlWNkE`)|VsxhC5F? zCi)Nm2^m8Flj=g!g|lnNn$-Vf1^fFgr6Ol9;C=2tsXe{V8C`X_erE0PXzkN~vO>+f z``s)5%L3gGip9w7bljc9FUO&APt^QxCSCU8n$)_Y)3p$r|X8HU7 z8^gc8%zt1E+hlD3UEwWm&9Wy4jzVgYSNq^9$G1fggMA3N%v@)6v36~Cx698HibVhm zwI)M*Xphg{21I>O5YiBNZP?%*zmc1pw#;MgF|WqecZRhGiQAj^lpRWcL?=t97sR4$ zwnkk33|jpo4tzOc1$7PWd(2o%`F4osOQo6F^p1$XY;r9XFvph7 z=qU8YQ-FqR7}-vB?$HPT!pR%x0T)0E98i9eNOlloAI7kY`I4fAfs&Q>=)nDP-hd6? zZdSjLD}D@T{XU-|OB)#ef47tQ5Li41O$ZSkL377;`;oyfg;9UaL9`)Yi*>J5#?ixx-C+> zG`~Gz$L?zDYRw~WfbtUjp|GD$LzJXiE{fd7Pz474PP`0p%mIYUks%enT=Rm(b&NJ= zSbt5`xn2Zy<^1A#p0N$o+7TKvsQ9NV#@?sMu*RnC%>WBXq;%kh0_A*k?tV6}hmyFW zMJA@RYhB6AQ%c*gsHA@<5I0GT*};Yg9R(xdZVo=v6^?-(UzuC%=dm=uBoFo5aCsy8 z30MKQtnOK=1ek;Q!+3+-7#Zm0`}gro4^XE9z>Qov9jZ_o68!w`9Fu~P3}IU;yNplB zNa8__O;!JM64^wc4Vm|@sphi)E$yTH=Dbc-)J`d0o`5E_-c96``@5>_?Cu+DHScO7*BVqa@`jga~7Nd5|8_aCPqdL z;;YM5+yeYicFtql+}QWVyId<@Af1iki(rTskkTKkd*0z28oEw@M%uDh2W~hXh^DaA zXzR6rddm-M$38evI!)*A2Gn{S4fmaLYM08~mYib&UhAO74YXrOkdes&bTxLM3b^=d ztK3Y6w;W-c>FhZ?G!$=rdM!mj|B?p2xM?hdWQD~ec*z4Y?FDKk?6s|mMu2A6XRfH2 zaVLtzi|jQ_=`Nnt%r7kxo8K`aKU^WEn$J%@asZwM#Q<%!;k@&J5R3%!``N4-tMiU7zSns3y3g$*&5{JVE^D!a4uHsA9T zYk@*SUw1JnEWCD zKS@uUPbJYe-sH_#ZU>%C>Yi_{|C4gl@QieJTDD+$edJtly93uvUS0d-k~A~t`CaXe zW}|dvI4-8je64xWCGOlJ+tC+wEe82pe`cG7!)S^YwLYtOIvVftG7Ej7#w%$BG{D3x zM^0XcxL>KtdYGY$xkTR_$QJI9Fwd`UbtO*{x3l>!?3rv7=1xo(m^dX1`VwiN9h>@W zAdvPb69yR98+HK`mClg%UF%GcqD{I&33hOE=&lW>T5Sfh+x)_2XX2s3&irJw{A@bu zSU&;k z+#L-4Q|uOy!{j|K^FF0j$S7d-{)moM-m5Y-RuB;WMe_<7oQa;-Dx9sc9N3Be*sBa7 zX;RJ#wo6y+AFNMuEPKCG>-uz%uVPDz9VR|5a`kBXsVpQ7p~k8=_l%^i1#|1*-SFC< zPyD^e(8tpNoD68;SC%e!1u_T;N!{W-_y(MK+naUgQsSUCT_&?&i-Wb!8ms^r1J_n!92JiXpFB@bUZQw! zDZRz#=PTl85z+iYi?W03y|z0=l)JmkbAHa8&rq5R?RzzY+AdEX)b2J#kd+zwi612ElodK=VY=*V8ndXQ`$-`!vQd1g(gvCy7LQHtS#{$9EH@NO-wmJ!Q=#kMR6h)YZ|m!jdM3Ws z2wYaY`F^I9ffaT0OVanfbURRQPfi+G`v-uprcG7Awo4yHkvR+GP}tnV3act7i^v|c zqrNmjF{zAk_542Nb~J7!@k<-O3R64D?2Ris#?X;GQM1{Vwl!O}6NSJ5!p|hXvxgX) z$>;`=rFSg#@=QmkJM=t%Z#{q_%AZRdBo|f!f1YRrtU3ah55YY%;}Ld#IM)4x-PR$X zjV&EobM0%omCE=}g{UZ=rs`+H<Kb1o!2h$>o<7>;t1{~_pqQ3Ft zf0(c8q8t;IRbCQafwJJ++-i#ai_m;j=W`i3yRS5h4trBl{a~rYhbzxerxcqlFo08T zkVQC^wr+orwOo+?f$8GFg1aP3^*S=shhAJa)HQMUrf?}>vOtDx&Vf!pdz!FHExP(Jf} zzc!E3T)<8qW}QC60tRMoN9W3bS-0D`d2J4o6@JRiJ&_A=h6#ia7|szplZFFEEy~kd zqhU8rUT=$K>&I`<`?TbO{eE4ExNQFyX#Eoh$N~vG1iAscH+q%^&Hb1^bK2yc9qRxS zrt25?kFwB#Z#E~2!`@>WU2fdgxKR=r_UCze9u~}hbz9o*>eP#jC0oV%O@G8779t01ccQloB7WTA`$toF<)U9gRsL1Gp z5d+}MRcv)$!W*Q=!`G0?fEtf4-FUg@Pntm*lgkANK+?oCz>m29un#{j2QEyNVF7u~ z&B$5H2{WLeJR<{=~Zr$BI9n4|>>SAu5qsx{(vF_LhH?5?9g)G3u%t6iMJKt9< z^$%UXOE;h}W(Mt2FIoU3- z&G3{BYqwwkYiYG9!1sDaUl1w8jKgs)L_LLU5&=dcq1P7u zL!+RE%EXp!7T~QC64CaBvlLFV*a;0Bu%iDIjAW!0NNM z7nw`66FFk)mTUGSd}zOZ?j<|!y}L}{;~l%GQI|RpzHjY?S)q#6EA)vN?_oGka)fs$ zh}U_yqBojrCDhx{N~LecGx%;g4dhd>FG`RC=4zIDJorSV#>-O;?3wN_4Vt|Q5o2MvR-g|eWTqkBE zI{7cU4cJ$#{83}|axR}01ZJE@pTgOnFYo49qu+uK7S&9~|0~@i@42cSC!1d1!D0mh z#y#^+@_wMbVxtqDVO?Pr|BPFE7hR_Kw-x}Z`@5HrybLAE0<;sLdBk@B4Hn;)EWnQB zsd%kyCl^xYzdc|k-J@BOo%Xr*QSdL7{sbRDadaL|g-IiUDVAW(7G`Ays%TV&r?*z$ zR{@1erL3*Yz3Bx#kMPhFn%_b5+q92T%=?nfX*QLOmLULR5&u195M{RtP(wg|C<*^q ziY3St^=$+7x!PXH^lGg$^X7(TUpUUm#fLY1QzOE6E~my+$jnBe*1SPeoXR?&wZzThJBOQpK`~_g2##W{EUU9fcn_4iKl;-kiOF2 z!!M$4d09@kI~+v;d+nEgv^h8P!%3Qoe(feP&9Dp#5z{Fa?67_1eDua0f6K~;qbB*@ zPJyg*`A$vkM0f;y=cLh9)j6}=R}!d>hF_pATK>J4d&#w?xsme6eBa|)1x$AS98hI; z#9)+$s;_VLcpH*;Kk+gla&Aco{f_HiqRz|@2EYH)|)Wfv8H_~xBHNy1|3d+!I4WCZ9G4apy-yN%P1*aNcG`2>?iJ1FB&Q5;v07 zQ=t)tec-9uGC-VaG(fyg#jmrgA{*nAu=$voMj4ja>oFM%*<7%Az3c*eUbFV}H&t#z zr}I4k>pW9y;>VU)J3d^i^^N;#IJ^d7yl2j7jn;oo$CB{C&ZJ8zN#pCSg=jXjZgzWm zVSoevR%9#U<=kxFdp&j~*;fJPciCT~8BGSu9<;S}q30Lq-m8Q0of$PWiWNxBIpmWKTy49w~$n-_0*X&MUvQ{=*WO)cI+q z{2vND;GmTaSFsriOazo!1%;`_OQV^3!JG5SrRdzGUm9#-S&f-wpMM|rmNq1sln8T= z4*eEi$&A&xY18%lDfS}{F&>>j* zN;O+8-DWnZ9h=EuhpB-}i}P9q;#KGUkOfQrGobFRj%zEOvAAtFZPP0ULOZ1Qc9;q%y+ z&+?x-fhACbfsr(O<{KEx_Be^dzQ3& zo)c31hgaGUx!Y=$ z4Pax}LkD^Ib%;GNe)QGyxZ7wF(ggAA_Llh_b*26Ks1DJbSSz0glnE}Z^aot=qpRMj zWp@j#tz)>~)mkbJOMWwUz!u>8XDKr2Wpch|h+=KgkF+o3C~Dl*lI3rq#om!ZZ^X!X zFC_1L1c_a`Qg})Dcsep{p5GDVro7#!a5<%rk~_XiwmyfzZk7%SaC*`b6&)rXCU|uF zh|9gv+v;m+@7J4knR%(DNwDG~RoO#Nc488M21(x({>?4CnljO|H;qDcJ7ZV}+(3J4 zUIDe>2?pe3Cz-8OVLk7kuwtve%7|Ao$s9-(a=S8eL=B|G>CJ*sDvf(HS;}E=>ft=L z($p}&EAw#DPU2T<0n;_!bm&lV`(lU?dEn}e3@>hvt@7vTLhe-P&V_<#LQrV@q0`#A zf+)!Jw_vBk_oI#Tu5K(;NlMH4jkVH#4@Y6mifcnMOmRj(ehwA}a$LDX(!1nA! zslm9;vP9G~ie~GV=SPM>%Mmgr(g7Q5?HO-hq%&=K_sDj~(9wXr!`WuLUmMk~ z(!QHhViF9RK=HeV=}rD6TNrabp9bEnJM4VH<~|Ozq|`|>pM+6Ry|2Um$~z;KD;AaS z5*}48Hb2!R+c$>j?a6(uS!i|^j$LmT@pM+WS@05GB(&;-FE);Qjq)5!eyGc_wR!!O z4ETzIdiY*~DD5=E0zt1n-=6+bpIfy1@8dbmuis35Kk%D7m;?Nv z7g1ohKo-*LNr7Mdvbpa$FS-@P_2bxQwpYz4ot!~Q!u8|CpRM^v^&Qks7?g?3OW+&| zV9Xt>e)C@NLKK>6aQ+L&=yu`~&5^_LF98x%j3cXlwHuZoRObE4_n& zroy~E(Q9AQR_>`^&CJ!Yj^LDPH`mFmm&|^LMU&g&N{mdoz^vpIlMD`-!hl>MUP<~b zmS1jcxka~q8SbnsAw>C@UhqPU-W|7k(TQZbuyDO}O40t)@tsQ5qGtO3(3hQWTx5)Q zh_&yz#vVGLYre`7X!*6bE^I%ovufpOI|^)#W}E3&*w@oha4|gDGzGH`?x&9<>*aQS zebZbUOdR%)l-Z)BIM`ul>?$aaE)69Qfe~F1H@b74vuyX~ZVU!5qfwd!>*ujVy6f&rkom)Yae= z4KCH|Jc2>z@e0XG-gjtHtzIxvn9$%CMIB!&x%{INhth1L10Fs9<{*9d?Z5(!x>T!7 zaNs3<|G7tV*^gIU4}M70BUW01KUUMXN%hv!^Y}$mnz6Z_DBoZABH;>oP7a|N0}$0a z>I3MiZfPB1g%HhWhiNiX>$GPM~m-(5<>DEZ|nF#;@^JRoa(VTRqeoL35 z-*UJScgJ_`^Ge?i{=@C2B_ESkZ@N8q7^zk%W<83B(QK~p&nGr{g8k5;2o|TiMkm5V zU9cV=8kuze+wS;_oPJ|$udj=Ct)ErXa3BNYXAXCXv7R5)f>x5n>Q&!kpm-RfPdcX& z))mLsO0vT^_5}Ao3Ej0ZfPG7NS+0Hjt6eQzfOteS+<+=RX?XjZvXIOOLIWaOz}hcn zNiR@LSMq~)0u{dB5r-QDgmu6W71MEbviRD0tqVxXXSJmFy;`&~bzo3B0f*Q@TqQJp zDCr$H9o4wMG|v4$?R|GtQ&|_UA_|TL1QkcBqJjlb5KvmghEf%j&_P8|Ne!cxyfWa3Q(5XP>?I z*=L{M*(H9@?ETz?o>K4jwoPAlw1v5Vz~XJk*U5VlR2uXfCnn~S;l2l)_03)hYuK;G zz06F2X;mjs1A~`8VM<&W?QXac5>p~;q$Z*@bfr@HwN-FUKH}opqW0W~p|fIp-G{(J zlydumwvxNG$D-sqE`BL6JCDiA#x|tYyj+|FH>%>?r-j#Cc+U@qlILOZNqZ>9$}-sYvlqGwg2hv z28%6jKfOr@s!o&)DiZfJS!7*1j^}K-aD1OJ-d8E5Ow|G=e|p?nefh}^8f_W}vr6-J zZ@;Lv0^}WtyX0}@jclF)n}U=*SXi2 zU34dN-Ral^3&`DsL{-~DcT4@CIL;TUybi|oe0P!g%={S2nD$$-+?ZAc2xqRw1^ks@ zZT974^+)yg1id}Rw;!mSUauQIUGrGyVDD`^eEs>db9#gHkz_cc`@u5K;Z<4oq+g!1 z!}aFh=QX+d-6Sh+9An`k%u-M92f*N)-m2?o@q8nR_r0&Gmn}OongZSx@Ct0lwdRWN zTkex#;BN4pcg{=~-Wy}>3!uKfO1|?vqCF)b>&x)-BHJhJt69^=uA9bd7G!BA>n@x^ z!Luax^-E5!7;fRbx>(BId;FjwX1kx~PWK`1Ief@JA@VA&8i;wWSi`yMGpE$*hw~h4Ucm2G-osdeMWx0GP0r@>?*$H~ZLVL=wf82=O*gEsGtIUW zpnp;v2gg54YzU5@qrwi+&sinqv6QHx-q$bSX;!eHg5fcdXO0e+*4p_VoSikAt$Dsi zvS9GV9q)$lQ%*|AXl)_23v9%K+kvh8_bURlZe7;%;-6^9t#H3yE6D22Z7kk76SfZ6@_S); zr;n~6PPneIHe)$p`OxONT$u7j5BBDcoSv0nZ5@4%=M&tMnc~DJbM1o?tn=0(yBt5& zo#{wHrt<_T@k%ZYs(LL0`Nb{)neEqAR2VWMR0|mxAuWY1*YN(Sy^7l{yE&2Qe+Hb( z-vzH#reRm&+3=f5-X{06caHO13j@Xb$+}xljFqLih3wj$BXw2>%x;i9TAW?%m_GFj z_6e8}9Xqnms`W*?sVL*XT0oZv0*pO(Cy(c7z>WE;#}OOkcs|$zAx+f1VEX$eSDC`l zdmWaiva~{q^zmUftKx1K6~T>5(27U3EXMj=_vz$WiCDTdJZ)Azw+XA$UUI5lp{hPUIZHgMV?lydtv|EZdWN53Kwmk6UlqVoU}R#C$9Dg*n{O74En4 zLcsH?WZa{vt-f`k6Ajl^2>TmaD-Ud->W|$OROq4`YS)(&KWs^bafUb}m43*O62MjO z0{3Og!Tf|qruL__>BZt+4hAuXs95VWy#&4NPHZ`RAyc(8KciR*#&&JIeJ%l3p;Ms- zfBJ3&-XK)oay9SS&8x;U7xNLcgGdo+{9`|2ZZ~Zfej^Obo=yLNF?{AwXszaKS%(Sl zyYj09h{rMNGm{hNIG&U*;HjNkpHSpC!<<_I;*b@m6%Mxz1kVQ7GE4k z{+qx5S=q~F0vx&qaiX$>7JOlzBDDFNs2f3CNvhe;yrXF-NHq0Zpmx!bcK&xuxnNZ$`}(Kn`@kOk~O9=L-68b2t2Rw#Y9cwSb> z9U{|Fz=Qw$^%cNel+;Fs_<`?KXxIz76fp>B_AH4k1b0xUrh3|na%&eY5()zrk`S=`;4w|(|6ur`ZIdm;F<;*b}FjiBPe9} zLlz!Pp>FZ%gj`4pEp}5hAu6a5VxQqd`ozZXAzP)!5Sf>g#A>ajx z!DHbmRKVcYgXY7bVb+hyFjV>PrHuNV6A*QZK!qBSn#)~Sz;G!Hez4y(Qyv(JSYV1| z^rt6|bb@1YMN%Lu3WBysUt*3917>{jVbjsD)T^^B)1&w=UCzaWEPaB|=q^YitA(9p zWEaQ`3}$01SqvtaKW7}q_w7D016AcrFe@V=pcN6s?*eqder?Iu5dcW0oS6Xu;_gUb zUaE!pr0QzSWxTVDtZYy@M(^PKm7Dz%ln6~PiOKJbaVw$R_?bEmYM1@7Ucby zOP_EA6UMI*u;dv5%~I;p8WO*269rtL5sWN_bPlY*4`0R-`Po}Xm61D6>=Q*F~R=y-5pAB@dq}d*4l}g zvb8pA06yn;r{ETjz3Hze@du4)&MyWp`MGKqPv4K@^*Ww6f)rT)DuMMR+ZcxtMoRfT zzt=@rV@B5_`LLvpAXUIMWkl;5&r)0KKc?z`rgCxg`@1SBBb^mhHUXXCNFg0BR-vB7kXM&;AQ_(w4;C75!7-kn+$=J+vKPqWyWnXaHD8Zgv_0TKX3E|D=u#;}~3ck*)nRNh4m{`#LtF!{kY9)X$4 zC^Amt-UOy+oDte`5rOj-iNAPK_+$KrjHmty84IQcgv7IB;l2y`R37J~KX(`tV2B0) zPKZCZHod(NJ>6CzGoS=Kr|_o1kR)a*r^%T&d%5VuZ|f+0PA*)M?95LA4xNKE#pqt4uxdGM+_WGa=@&&*gZ?+r8p1?|B6Awa`voCOed z{rSAuM1`=0=y}F&O@GW$6xZeWuV4@@e*y;7ka)G2Re@raXGMEyXh_L63DUKC;t=wU z?undjrj8_zapya8ftKJR+Sn~a*u5RGM);WLXBh|72~2AW|B(%UUM*uTXM?B*v_J8u zyR~MlV@wz<2j{7s6MXpdur8)-Ys&U4GEt4O5m;aodrSJ|6v3O>h&B%%C0-Q+gcZ?) znw|N(aW@cKg@!U9x6ssci(1H_1aDF~5D$hp>4jC!`IMJ=^9_0u^0&)T42WDQ>ggV7 zv4~Gn9?%L1L<%R( zSrm;h)%f+zm@-G_`TTmzZLjhJxQRf^pgW99W3ki#&-nzH6()4SJ+y<)qVmFWb{II4 z`}rMhxQH356y{NThSS1$H1NTOOSS-_R4$4i#}d33&{G(zv1a~sE~t8aENeM_Ag<`nKGO=4K2Wp4Qk53>v+

1s4)RPwK<#{pTPb$MA zrj6=JUh`#R{naVdBz!SYwuQm-pgqC&1T_Qm?j_R=K|IYe%Lv29k?EJ5rI%ifQ}yQO z-9t!PU)cOxcKGZLNJ(y6bfyygw>B`pfmr^tG^zI&vuM6omQ0YzqtZg}HmgI9hpp~~ zTuv_Gg2UB!y|%>zPz-nkvW#oimY#(uHUXZ>kS0H2TmvU|(C|$C74&ZcO$3wKYw(jc zzGgUOP3Ui5o2I{*SB2j$@Sp!qY@$6rmA+(&^vjb+4bKRP@&G!8>S;gxs%mP%V#mkb zR$fBaT2^mfG4-hN*l`=@Kf>-*R=l|9cH609-+;<~%CQyWhYt&>sI7W7er)r@0P*|n zurs?8C1vN0Cf?jl9E*z)z(6qSaGLUN0pfzPuoD8do((7!YqNc&Xn!&m=k{%zkEQSrHTLSjQhcZ-XYmF>{Hmp000 zw6w=d?6RCNy$2NZi@AmoEekLaMnIMmuFE5_u~Es>6ZSdlBOK~oqDK`H%$3~SvdPOV z^T?y6XZ9F4+1kE$RUbtu$ZO&azna=#bF7*Xb!{ExQX_OVG&S}0(Q3)5&8|iH`8nrX zo|2_>2Rc1VJaQ~m6FdFN-d??0c4pT;4UK~b=b4o9AV!}rRLd^$%Brp)hPk=<4mY zdOFy;dp7>RV)%>JC6UjUKPl}ZgD?KFJ>j$tv8V$4ZdX0ct|au7HUZsxN#;JPysYr_ zp7nk`6V*96%4z8)IpCwUJw1aH6BF9h1CU&^di3iQ_>_O%nS16i2fMn9#cvOv^S^L} zfG*TFy@Uv|$DZk|r!ms?YA+V=uCF(_d2=S&bjJ<#)9Z8E)80gx9mAdYWb4nLQgE_U5Op`%&wFaNaCH=M|$M6-U?&d(U&k1K72`zR;Fl;duKTz3#zbP9?;)W zh;-}IT}@$$dq4Pq=%A?eYS5SeEw-^lIe^{42AQr!G?)p$PsU1=36SH+ox>en*~yQlsA%EfdryqQXiF zloi_B4Rg@iN1!<2{6l)3B`OV}SRrcsBPIzavM8&+lIh>9Mz0fC(to2DeR6IE6e|{m z`map-|3bZT=>IvPMxJT~?M4vPvN)t56Uo2d$Im9$y?Zy7Wd!Ewgs(pcWeOFIbPOhUaPNKhCYE$j9t^B^+N117ph_FN88A&rf$SDPtWEv?XahD?4o|w7D|CZ z=>G+MX;$w^hd0cDTtc$#>lb_Wz0Rjan`O)^$d-Sdp*2W}l@1N64z z7fA8_pl6Rj+4uB?M?QIjh3x(K(MkPaMy)FxZu0j0MgL-_e5OmY@M!0?m4C1MuYZ}_ z5;-{Gwd01hJWO@tFRgU3leN6BKmXgm8cDu_+T8^?qQZg59%N-q>*ahzRLX<&w_(e^ z?NuzUApQt^LGO27uoUzccdpz!S-u_09EI*tpcvyJ$r3d`L|B6`4wYf7F@j+Fp(_G+ z5~@e-5`nWw{`h2DRTTqLX+{b`dMV$fH;|M&m@pODNdoKlvQ!$V4})n&anl`draF<8 ze*FAPBu$452(5`61~-X}o1;qG;G==0tBw%QR4rY98^I2<%posq3`9flL*&#U0)=rh z5y7_aW1XCwaTDr_>so? zHI=A|!E!02Iar%4bRU++tZAk^W?GVcW8g66nlms%H($9h{kry3 z8Vt#&)+3!Q_T&0a{&B=8+8&KxkT#0LOoA=UPlN}uy671J#7#2XkM&KLi@V+#5h7kP zr{7PWaCi`J(bro^Y*oVM>MitDlLh!OZRumQJh*)mc%!_9v?a(bXErQQYKOwqx5 zeu#{WJkTEaE|xkk)=F=r#+M>5PUe8GZs}D2h&2r#v&=VUm`lOpGN2T8asU*#BOPJJ zdzz?PqgJD|4aWCQ)xdCvvEGtj02;Fs-`8;5#3)SBLwp@J)jMFE_Fskt{(p7ufN%$lyxGuGUj zPB+o8ZSD0_w#%^k?z*cSG5+VCdcL%vdc~^GHaQPx{h=|EPsycleq|Ggz_8WSZKN~9 zt5-VU>1n>rAqf zSd3>)s=oVS;=(K3fF&n|UEow%R6oV8{c;vx!)IG}J??Tx4jrtas8G^l7_KN@lp|xX z&7MrHDjlB*O@Pw;3`1}dYxX5Rc$nI!^8{ZR=fyo?=7?bgSBSC)<&}cGbjidnZ`6PV zfF8nP2l{do!WaSEg*B3oo3xf6(G7e+*{px(L9D-zJC58YAH;0S(4YjOC40M#f&w|) zB#YBj6a)FnmDM&477&vapPLeDr6U5nsB!R~bdJ?y^x@(1`Z(YdD||f#s2|7D@<2Tf z@6qvQO$YPJap>8MZ1OyS6w}!|J(L)@g$Oj?#L3HgA>%)EoQZ7 zH>IJqsr;8`BjmRgR34O3%kxI-+YQ&(^pX|&%Jy!|*grWwqit1KHS5mE*d!A&lMrZR zeHn_zZY6Lt`*K%zV{CsgkmOsTr}H24kUEFEFlC>`z!Waniy>0IGqm;|*sr4#y1%Iu z3|K-&6cKl^LbO#M#7iWZ*C?)Dy&u;V@aj`g0a9(Lc|hQrsE9>YY2m6~RhHMNKhE z+UUHrRZ0W0a=p3nGTyKbu^jTeNKC$auX`ybZ2axV7=T-szEbxvASb_N7{Z&JTM3G> zpP~}LZHt*IWIgQ6{8ZRBth$gz7(e!Q;EJ_E3^5RuD_mX8BjDV8_%ER~V zq=Gbg*gn1XX9ZD5YMk_ETBvr?iZ6g(A0R`X6!5s@df*7nN)lrdE{98?XOop5lF+n? z{F;YJo5L@2-&N|#Be9cFqk&vSP43&bSZAa`&z;OWlDYjrJ-uK};DyMH$$3xM9VsYZ z1GVY|o&(utUidf3+o0=n_e;?iO{k?GZS|uwnoJXEORxvTUN1~o{-cv@kS+Tm)LO0H z`6I7s?J!kb50YZakKCs0@@HXY5Dr(>C(wEM_d{j$3{o^5l48UUkKT@*L@0Opqt7oO zqNoDJ`@dvUJ+Ccn0Y&j4{J;8npkF$wI_@O-wK^oE?#(PVtnc*ni$G{E6ofgTIYKq3~cSg*15meha9b27{mq7yA3_hCR-NH1z)_l zC80LEE!z8uz8ciT)qX#;Bdrpk<29_T+S=OKcWdWOca#(Cc5M{<_>`O*dmJ3mdKb;| zvPzP@gUthz90?7LgoFfVDlv@RBnt^s3BO)9$N563t*w32(BQDEPtLIM5HMO-upSX5!%{X zH?AI5O+mkW0YZ8HdF@13^@F-8scC5oL(C= zA<<6-@wh{IHhYqc$7$0;?~bc=k+pO^EI_PqZBGFnWR^A7dUs;o&=cqhWUd4;eYbbd zx;%(IN|~~!xe16AX6b31_82o1JM>@G{>l$O^gyWbRu4#I#u~}qo|oKX+<7AOgQ7x| zrNwLenfA! z+DhspD9sbz{v#&+X3RxggV6oRO2M7YzgpPJZQ)T+q7w&&`Y#7*!NLFk3l$V9u)=Gi W%r8Mxf7cT5=j1W-qXmaue)}I{>dlJ) literal 0 HcmV?d00001 diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index b373d7badaa..83f304c2d3c 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -414,7 +414,17 @@ def format_excel_title(title: str) -> str: title = title.replace("(ns)", '') title = title.replace("(%)", '') title = title.replace(" ", "_") - return title + + # 将kernel_details中的列名转为与op_summary_x.csv中一致 + kernel_details_col_name_map = { + "name": "op_name", + "type": "op_type", + "accelerator_core": "task_type", + "start_time": "task_start_time", + "duration": "task_duration", + "wait_time": "wait_time" + } + return kernel_details_col_name_map.get(title, title) def format_float(num: float) -> float: diff --git a/profiler/affinity_cpu_bind/README.md b/profiler/affinity_cpu_bind/README.md deleted file mode 100644 index 8c3b47ed518..00000000000 --- a/profiler/affinity_cpu_bind/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# 昇腾亲和性CPU绑核工具 - -昇腾亲和性CPU绑核工具支持用户无需修改代码,直接运行工具即可实现按CPU亲和性策略绑核,提升推理或训练性能。 - -绑核工具用户arm服务器环境,对于训练或推理任务因为CPU资源调度等出现host_bound问题时使用,可改善该问题;对于非host_bound的场景无明显改善效果。 - -## 使用须知 - -使用绑核工具前手动执行npu-smi info -t topo,出现以下类似信息,说明环境支持绑核,否则请将环境HDK包升级到Ascend HDK 23.0.RC2及以上版本。 - - NPU0 NPU1 NPU2 NPU3 NPU4 NPU5 NPU6 NPU7 NPUx CPU Affinity - NPU0 X HCCS HCCS HCCS HCCS HCCS HCCS HCCS ... xx-xx - NPU1 HCCS X HCCS HCCS HCCS HCCS HCCS HCCS ... xx-xx - NPU2 HCCS HCCS X HCCS HCCS HCCS HCCS HCCS ... xx-xx - NPU3 HCCS HCCS HCCS X HCCS HCCS HCCS HCCS ... xx-xx - NPU4 HCCS HCCS HCCS HCCS X HCCS HCCS HCCS ... xx-xx - NPU5 HCCS HCCS HCCS HCCS HCCS X HCCS HCCS ... xx-xx - NPU6 HCCS HCCS HCCS HCCS HCCS HCCS X HCCS ... xx-xx - NPU7 HCCS HCCS HCCS HCCS HCCS HCCS HCCS X ... xx-xx - NPUx ... ... ... ... ... ... ... ... ... ... - -## 使用方式 - -1.执行以下命令实施绑核: - - - 直接执行绑核命令 -```bash -python3 bind_core.py -app/--application="inferenec/train cmd" -``` -该方式会自动拉起训练或推理任务,检测任务进程,并实施绑核。 - - - 手动拉起训练或推理任务后再执行绑核 -```bash -python3 bind_core.py -``` -该方式会循环查找(循环5次,每次10s,若找不到进程,则直接退出)使用到NPU的任务进程,并实施绑核。 - -2.绑核运行过程的日志会保存到当前路径的bind_core_时间戳.log。 - -3.如果推理或训练进程拉起后需要一定时间预处理,才会真正执行任务,可在执行绑核命令时设置-t/--time参数(单位秒),绑核工具会在延迟配置的时间后,再实施绑核动作。例如:python3 bind_core.py -app="cmd" -t=10,配置后工具会在10秒后执行绑核操作。 \ No newline at end of file diff --git a/profiler/affinity_cpu_bind/bind_core.py b/profiler/affinity_cpu_bind/bind_core.py deleted file mode 100644 index 7f27e924238..00000000000 --- a/profiler/affinity_cpu_bind/bind_core.py +++ /dev/null @@ -1,213 +0,0 @@ -import subprocess -import argparse -import os -import time -import logging -from datetime import datetime -from datetime import timezone - - -class PathManager: - DATA_FILE_AUTHORITY = 0o640 - - @classmethod - def create_file_safety(cls, path: str): - base_name = os.path.basename(path) - msg = f"Failed to create file: {base_name}" - if os.path.islink(path): - raise RuntimeError(msg) - if os.path.exists(path): - return - try: - os.close(os.open(path, os.O_WRONLY | os.O_CREAT, cls.DATA_FILE_AUTHORITY)) - except Exception as err: - raise RuntimeError(msg) from err - - -class BindCoreManager(): - DEFAULT_FIND_RUNNING_PID_TIMES = 5 - - def __init__(self): - self.npu_id_list = [] - self.running_pid_on_npu = {} - self.find_running_pid_times = self.DEFAULT_FIND_RUNNING_PID_TIMES - self.npu_affinity_cpu_dict = {} - self.log_file = '' - self._init_log_file() - - - def _init_log_file(self): - now_time = datetime.now(tz=timezone.utc) - time_stamp = str(now_time.year) + '_' + \ - str(now_time.month) + '_' + \ - str(now_time.day) + '_' + \ - str(now_time.hour) + '_' + \ - str(now_time.minute) + '_' + \ - str(now_time.second) - log_file_name = 'bind_core_' + time_stamp + '.log' - msg = f"Failed to create file: {log_file_name}" - try: - PathManager.create_file_safety(os.path.join(os.getcwd(), log_file_name)) - except RuntimeError as err: - raise RuntimeError(msg) from err - self.log_file = log_file_name - logging.basicConfig(filename=self.log_file, - level=logging.INFO, - filemode='w', - format='%(asctime)s-%(name)s-%(levelname)s-%(message)s') - - def _get_all_npu_id(self) -> None: - get_npu_info_cmd = 'npu-smi info -l' - get_npu_info_process = subprocess.run(get_npu_info_cmd.split(), shell=False, capture_output=True) - get_npu_id_cmd = 'grep ID' - get_npu_id_process = subprocess.run(get_npu_id_cmd.split(), shell=False, input=get_npu_info_process.stdout, capture_output=True) - res = get_npu_id_process.stdout.decode('utf-8').split() - for i in res: - if i.isdigit(): - self.npu_id_list.append(int(i)) - logging.info(f'NPU total id list: {self.npu_id_list}') - - def _get_npu_affinity(self) -> bool: - cpu_num = os.cpu_count() - cpu_num_for_each_npu = cpu_num // len(self.npu_id_list) - get_npu_topo_cmd = 'npu-smi info -t topo' - p = subprocess.run(get_npu_topo_cmd.split(), shell=False, capture_output=True) - res = p.stdout.decode('utf-8').split() - if not res: - print('[ERROR] Failed to run get npu affinity info, please check if driver version support cmd npu-smi info -t topo') - return False - - index = 0 - for v in res: - if '-' in v: - affinity_cpus = [] - cpu_lists = v.split(',') - for cpu_list in cpu_lists: - cpus = cpu_list.split('-') - if len(cpus) != 2: - continue - if int(cpus[1]) - int(cpus[0]) == cpu_num_for_each_npu - 1: - cpus[1] = str(int(cpus[1]) + cpu_num_for_each_npu) - affinity_cpus.append(cpus[0] + '-' + cpus[1]) - if index < len(self.npu_id_list): - self.npu_affinity_cpu_dict[self.npu_id_list[index]] = ','.join(affinity_cpu for affinity_cpu in affinity_cpus) - index += 1 - else: - print('[ERROR] Get affinity_cpu_list for {} npus, more than real npu num: {}'.format(index + 1, len(self.npu_id_list))) - return False - - for k in self.npu_affinity_cpu_dict.keys(): - logging.info(f'Affinity CPU list {self.npu_affinity_cpu_dict[k]} for NPU {k}') - return True - - def get_running_pid_on_npu(self) -> bool: - no_running_pids_on_npu_msg = '[INFO] Now there is no running process on all NPUs, stop bind cores' - logging.info('Begin to find running process on all NPUs') - # get running process on NPUs - for times in range(self.find_running_pid_times): - running_pid_on_npu = {} - for npu_id in self.npu_id_list: - get_npu_pids_cmd = 'npu-smi info -t proc-mem -i {} -c 0'.format(npu_id) - get_npu_pids_process = subprocess.run(get_npu_pids_cmd.split(), shell=False, capture_output=True) - res = get_npu_pids_process.stdout.decode('utf-8').split() - pid_list = [] - for value in res: - if value.startswith('id:'): - pid = value.split(':')[1] - pid_list.append(pid) - if pid_list: - running_pid_on_npu[npu_id] = list(set(pid_list)) - - if len(self.running_pid_on_npu.keys()) == len(running_pid_on_npu.keys()) and running_pid_on_npu: - self.running_pid_on_npu = running_pid_on_npu - break - - self.running_pid_on_npu = running_pid_on_npu - time.sleep(5) - - # delete repeat pid - for npu_id in self.npu_id_list: - if npu_id not in self.running_pid_on_npu: - continue - pids_on_npu = self.running_pid_on_npu[npu_id] - for npu_id_with_pids, pids in self.running_pid_on_npu.items(): - if npu_id == npu_id_with_pids: - continue - pids_on_npu = list(set(pids_on_npu) - set(pids)) - self.running_pid_on_npu[npu_id] = pids_on_npu - - if_running_process = False - for npu_id, pids in self.running_pid_on_npu.items(): - if not pids: - logging.info(f'There is no running process on NPU {npu_id}') - else: - logging.info(f'Succeed to find running process {pids} on NPU {npu_id}') - if_running_process = True - if not if_running_process: - print(no_running_pids_on_npu_msg) - return if_running_process - - def get_npu_info(self) -> bool: - try: - self._get_all_npu_id() - if not self._get_npu_affinity(): - return False - except subprocess.CalledProcessError: - return False - return True - - def run_bind_core(self): - if not self.running_pid_on_npu: - return - for npu, pid_list in self.running_pid_on_npu.items(): - if npu not in self.npu_affinity_cpu_dict.keys(): - logging.warning(f'Cannot find affinity cpu for npu: {npu}') - continue - affinity_cpu = self.npu_affinity_cpu_dict.get(npu) - for pid in pid_list: - try: - logging.info(f'Begin to bind cores for process {pid} on NPU {npu}') - set_affinity_cpu_cmd = 'taskset -pc {} {}'.format(affinity_cpu, pid) - p = subprocess.run(set_affinity_cpu_cmd.split(), shell=False, capture_output=True) - logging.info(p.stdout.decode('utf-8')) - except subprocess.CalledProcessError: - print('[ERROR] Failed to bind process {} on NPU {} with cpu cores list {}'.format(pid, npu, affinity_cpu)) - - logging.info(f'Succeed to bind process {pid} on NPU {npu} with cpu cores list {affinity_cpu}') - - def args_parse(self): - parser = argparse.ArgumentParser(description='This is a affinity cpu core bind script.') - parser.add_argument('-t', '--time', type=int, metavar='', help='Wait time before bind cores that you want to set. The unit is \'s\'.') - parser.add_argument('-app', '--application', metavar='', nargs='+', help='Training or inference command that you want to run.') - args = parser.parse_args() - if args.application: - application_cmd = ' '.join(args.application) - self.launch_process(application_cmd) - time.sleep(2) - # if time is set, wait for setting time before bind cores - if args.time: - time.sleep(args.time) - - def launch_process(self, cmd: list): - logging.info(f'Start to execute cmd: {cmd}') - try: - subprocess.Popen(cmd.split(), shell=False) - except subprocess.CalledProcessError as e: - raise RuntimeError(f'Failed to run cmd: {cmd}') from e - - -if __name__ == '__main__': - print('[INFO] Begin to run bind-cores script...') - bind_core_manager = BindCoreManager() - bind_core_manager.args_parse() - - if not bind_core_manager.get_npu_info(): - print('[ERROR] Failed to get current npus info') - exit() - - if not bind_core_manager.get_running_pid_on_npu(): - exit() - bind_core_manager.run_bind_core() - print('[INFO] End to run bind-cores script, the log is saved in {}'.format(bind_core_manager.log_file)) - - diff --git a/profiler/cli/cluster_cli.py b/profiler/cli/cluster_cli.py index 93a4a638f27..c1563898d70 100644 --- a/profiler/cli/cluster_cli.py +++ b/profiler/cli/cluster_cli.py @@ -21,7 +21,7 @@ sys.path.append(os.path.dirname(os.path.dirname(__file__))) from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup from profiler.advisor.utils.utils import debug_option from profiler.prof_common.constant import Constant -from profiler.cluster_analyse.cluster_analysis import ALL_FEATURE_LIST +from profiler.cluster_analyse.cluster_analysis import COMM_FEATURE_LIST from profiler.cluster_analyse.cluster_analysis import cluster_analysis_main @@ -33,7 +33,7 @@ context_settings['ignore_unknown_options'] = True short_help='Analyze cluster data to locate slow nodes and slow links.') @click.option('--profiling_path', '-d', type=click.Path(), required=True, help='path of the profiling data') -@click.option('--mode', '-m', type=click.Choice(ALL_FEATURE_LIST), default='all') +@click.option('--mode', '-m', type=click.Choice(COMM_FEATURE_LIST), default='all') @click.argument('args', nargs=-1) def cluster_cli(profiling_path, mode, args) -> None: required_args = ('-d', profiling_path, '-m', mode) diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index e794578da8c..f9add948ea9 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -32,6 +32,8 @@ from profiler.compare_tools.compare_backend.comparison_generator import Comparis @click.option('--enable_operator_compare', is_flag=True) @click.option('--enable_memory_compare', is_flag=True) @click.option('--enable_communication_compare', is_flag=True) +@click.option('--enable_api_compare', is_flag=True) +@click.option('--enable_kernel_compare', is_flag=True) @click.option('--disable_details', is_flag=True) @click.option('--output_path', '-o', 'output_path', type=click.Path()) @click.option('--max_kernel_num', 'max_kernel_num', type=int, help="The number of kernels per torch op is limited.") diff --git a/profiler/cluster_analyse/README.md b/profiler/cluster_analyse/README.md index fdd43ca965f..4a394e09a48 100644 --- a/profiler/cluster_analyse/README.md +++ b/profiler/cluster_analyse/README.md @@ -54,10 +54,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( | --------------------- | ------------------------------------------------------------ | -------- | | --collection_path或-d | 性能数据汇集目录,运行分析脚本之后会在该目录下自动创建cluster_analysis_output文件夹,保存分析数据。 | 是 | | --mode或-m | 数据解析模式,取值详见“**--mode参数说明**”表。 | 否 | - | --parallel_mode | 设置收集多卡、多节点db数据时的并发方式。取值为concurrent(使用concurrent.feature进程池实现并发)。
**只有-m配置cann_api_sum、compute_op_sum、hccl_sum、mstx_sum时可配置此参数。** | 否 | - | --export_type | 设置导出的数据形式。取值为db(.db格式文件)和notebook(Jupyter Notebook文件),默认值为db。
**只有-m配置cann_api_sum、compute_op_sum、hccl_sum、mstx_sum时可配置此参数。** | 否 | - | --rank_list | 对特定Rank上的数据进行统计,默认值为all(表示对所有Rank进行统计),须根据实际卡的Rank ID配置。应配置为大于等于0的整数,若所配置的值大于实际训练所运行的卡的Rank ID,则仅解析合法的RankID的数据,比如当前环境Rank ID为0到7,实际训练运行0到3卡,此时若配置Rank ID为0, 3, 4或不存在的10等其他值,则仅解析0和3。配置示例:--rank_list 0, 1, 2。
**只有-m配置cann_api_sum、compute_op_sum、hccl_sum、mstx_sum时可配置此参数。** | 否 | - | --top_num | 设置TopN耗时的通信算子的数量,默认值为15,配置示例:--top_num 20。
**只有-m配置hccl_sum时可配置此参数。** | 否 | + --mode参数说明: @@ -66,22 +63,7 @@ experimental_config = torch_npu.profiler._ExperimentalConfig( | communication_matrix | 解析通信矩阵数据。 | 否 | | communication_time | 解析通信耗时数据。 | 否 | | all | 同时解析通信矩阵communication_matrix和通信耗时数据communication_time,--mode参数默认值为all。 | 否 | - | cann_api_sum | 集群API性能数据汇总分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/CannApiSum目录下输出交付件stats.ipynb。 | 否 | - | compute_op_sum | 集群场景性能数据的device运行算子信息汇总分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/ComputeOpSum目录下输出交付件stats.ipynb。 | 否 | - | hccl_sum | 集合通信算子耗时分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/HcclSum目录下输出交付件stats.ipynb。 | 否 | - | mstx_sum | 集群场景mstx打点信息汇总分析,输入性能数据需要基于ascend_pytorch_profiler_{rank_id}.db文件。--export_type为db时,输出交付件cluster_analysis.db;--export_type为notebook时,在cluster_analysis_output/MstxSum目录下输出交付件stats.ipynb。 | 否 | - - --parallel_mode参数示例如下: - - ```bash - msprof-analyze cluster -d {cluster profiling data path} -m cann_api_sum --parallel_mode concurrent - ``` - - 或 - - ```bash - python3 cluster_analysis.py -d {cluster profiling data path} -m cann_api_sum --parallel_mode concurrent - ``` + ### 交付件 @@ -158,23 +140,6 @@ L列:Preparing,指迭代开始到首个计算或通信算子运行的时间 解析analysis.db或ascend_pytorch_profiler_{rank_id}.db生成的交付件,根据数据解析模式不同而解析不同的数据,可以使用MindStudio Insight工具展示。 -#### stats.ipynb - -- 数据解析模式为cann_api_sum时生成,保存在cluster_analysis_output/CannApiSum目录下。 - - 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群API耗时信息。 - -- 数据解析模式为compute_op_sum时生成,保存在cluster_analysis_output/ComputeOpSum目录下。 - - 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群计算算子耗时分析(将集群所有计算算子进行汇总并以图表展示),集群Rank计算算子耗时分析(将每个Rank的计算算子进行各自汇总)。 - -- 数据解析模式为hccl_sum时生成,保存在cluster_analysis_output/HcclSum目录下。 - - 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群通信算子耗时分析(将集群所有通信算子进行汇总并以图表展示),集群Rank通信算子耗时分析(将每个Rank的通信算子进行各自汇总)、Top通信算子信息展示。 - -- 数据解析模式为mstx_sum时生成,保存在cluster_analysis_output/MstxSum目录下。 - - 可使用jupyter notebook工具或MindStudio Insight工具打开,主要展示集群场景mstx打点信息,分为框架侧、CANN侧和Device侧三部分的打点信息。 diff --git a/profiler/cluster_analyse/analysis/analysis_facade.py b/profiler/cluster_analyse/analysis/analysis_facade.py index 435d77b21bf..c438fbcf322 100644 --- a/profiler/cluster_analyse/analysis/analysis_facade.py +++ b/profiler/cluster_analyse/analysis/analysis_facade.py @@ -19,8 +19,6 @@ from analysis.communication_analysis import CommunicationAnalysis from analysis.comm_matrix_analysis import CommMatrixAnalysis from analysis.step_trace_time_analysis import StepTraceTimeAnalysis from analysis.host_info_analysis import HostInfoAnalysis -from common_func.context import Context -from common_func.constant import Constant class AnalysisFacade: default_module = {CommunicationAnalysis, StepTraceTimeAnalysis, CommMatrixAnalysis, HostInfoAnalysis} @@ -38,13 +36,3 @@ class AnalysisFacade: for process in process_list: process.join() - - def recipe_analyze(self): - HostInfoAnalysis(self.params).run() - print("[INFO] Recipe analysis launched.") - try: - with Context.create_context(self.params.get(Constant.PARALLEL_MODE)) as context: - with self.params.get(Constant.RECIPE_CLASS)(self.params) as recipe: - recipe.run(context) - except Exception as e: - print("[ERROR] Recipe analysis launched failed, %s." % str(e)) diff --git a/profiler/cluster_analyse/analysis/base_analysis.py b/profiler/cluster_analyse/analysis/base_analysis.py index 7209e9b56f0..d7be4fc9cf6 100644 --- a/profiler/cluster_analyse/analysis/base_analysis.py +++ b/profiler/cluster_analyse/analysis/base_analysis.py @@ -22,8 +22,6 @@ from abc import abstractmethod from common_func.constant import Constant from common_func.file_manager import FileManager -from common_func.db_manager import DBManager -from common_func.utils import convert_unit from cluster_utils.data_transfer_adapter import DataTransferAdapter @@ -102,154 +100,3 @@ class BaseAnalysis: for rank_tup, group_dict in self.comm_ops_struct.items(): for step_id, communication_ops in group_dict.items(): self.compute_total_info(communication_ops) - - -class BaseRecipeAnalysis: - - UNIT = "Us" - DB_UNIT = "Ns" - - RANK_LIST = "rank_list" - - def __init__(self, params): - self._params = params - self._collection_dir = params.get(Constant.COLLECTION_PATH, "") - self._data_map = params.get(Constant.DATA_MAP, {}) - self._recipe_name = params.get(Constant.RECIPE_NAME, "") - self._mode = params.get(Constant.PARALLEL_MODE, "") - self._export_type = params.get(Constant.EXPORT_TYPE, "") - self._output_dir = None - self._rank_list = params.get(self.RANK_LIST, 'all') - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if self._params is not None and exc_type is not None: - print(f"[ERROR] Failed to exit analysis: {exc_val}") - traceback.print_exc(file=sys.stdout) - - def run(self, context): - pass - - @property - def base_dir(self): - return os.path.basename(os.path.dirname(__file__)) - - def _get_rank_db(self): - invalid_rank_id = [] - if self._rank_list == 'all': - rank_ids = list(self._data_map.keys()) - else: - rank_ids = [] - for rank_id in self._rank_list: - if rank_id in self._data_map.keys(): - rank_ids.append(rank_id) - else: - invalid_rank_id.append(str(rank_id)) - db_paths = [] - for rank_id in rank_ids: - rank_path = self._data_map[rank_id] - db_path = os.path.join(rank_path, Constant.SINGLE_OUTPUT, f"ascend_pytorch_profiler_{rank_id}.db") - if os.path.exists(db_path): - db_paths.append((rank_id, db_path)) - else: - print(f"[WARNING] DB file not found, rank id: {rank_id}, db path: {db_path}.") - if invalid_rank_id: - print(f"[WARNING] Invalid Rank id : [{','.join(invalid_rank_id)}].") - return db_paths - - def get_mode(self): - return self._mode - - def get_recipe_name(self): - return self._recipe_name - - def dump_data(self, data, file_name, table_name=None, index=True): - output_path = os.path.join(self._collection_dir, Constant.CLUSTER_ANALYSIS_OUTPUT) - if table_name: - result_db = os.path.join(output_path, file_name) - conn, cursor = DBManager.create_connect_db(result_db) - if isinstance(data, pd.DataFrame): - data.to_sql(table_name, conn, if_exists='replace', index=True) - else: - print(f"[ERROR] Unknown dump data type: {type(data)}") - DBManager.destroy_db_connect(conn, cursor) - else: - result_csv = os.path.join(output_path, file_name) - if isinstance(data, pd.DataFrame): - data = convert_unit(data, self.DB_UNIT, self.UNIT) - data.to_csv(result_csv, index=index) - else: - print(f"[ERROR] Unknown dump data type: {type(data)}") - - def _create_output_dir_name(self, name): - i = 1 - while os.path.exists(f"{name}-{i}"): - i += 1 - return f"{name}-{i}" - - def _create_unique_output_dir(self): - output_dir = os.path.join(self._collection_dir, Constant.CLUSTER_ANALYSIS_OUTPUT, self._recipe_name) - - if os.path.exists(output_dir): - return self._create_output_dir_name(output_dir) - return output_dir - - def _get_output_dir(self): - if self._output_dir is None: - self._output_dir = self._create_unique_output_dir() - os.makedirs(self._output_dir) - return self._output_dir - - def create_notebook(self, filename, notebook_template_dir=None, replace_dict=None): - if notebook_template_dir is None: - template_path = os.path.dirname(__file__) - else: - template_path = notebook_template_dir - output_path = os.path.join(self._get_output_dir(), filename) - template_file = os.path.join(template_path, self.base_dir, filename) - if replace_dict is None: - shutil.copy(template_file, output_path) - else: - with open(template_file, 'r') as f: - template_content = f.read() - for key, value in replace_dict.items(): - template_content = template_content.replace(str(key), str(value)) - with open(output_path, 'w') as f: - f.write(template_content) - print(f"[INFO] Notebook export path is: {self._get_output_dir()}") - - def add_helper_file(self, helper_file): - helper_output_path = os.path.join(self._get_output_dir(), helper_file) - helper_file_path = os.path.join(os.path.dirname(__file__), helper_file) - - if helper_file_path is not None: - shutil.copy(helper_file_path, helper_output_path) - - @staticmethod - def _filter_data(mapper_data): - return [(rank, data) for rank, data in mapper_data if data is not None and len(data) != 0] - - @classmethod - def add_parser_argument(cls, parser): - parser.add_argument("--rank_list", type=str, help="Rank id list", default='all') - - @classmethod - def parse_argument(cls, args_parsed) -> dict: - if args_parsed.rank_list == 'all': - return { - cls.RANK_LIST: 'all' - } - else: - rank_str_list = args_parsed.rank_list.split(",") - rank_list = [int(rank) for rank in rank_str_list if rank.isdigit()] - return { - cls.RANK_LIST: rank_list - } - - @classmethod - def get_extra_argument(cls, params) -> dict: - return { - cls.RANK_LIST: params.get(cls.RANK_LIST, "all") - } diff --git a/profiler/cluster_analyse/analysis/cann_api_sum/__init__.py b/profiler/cluster_analyse/analysis/cann_api_sum/__init__.py deleted file mode 100644 index 7101187a2c2..00000000000 --- a/profiler/cluster_analyse/analysis/cann_api_sum/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/cluster_analyse/analysis/cann_api_sum/cann_api_sum.py b/profiler/cluster_analyse/analysis/cann_api_sum/cann_api_sum.py deleted file mode 100644 index db37b004b15..00000000000 --- a/profiler/cluster_analyse/analysis/cann_api_sum/cann_api_sum.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import pandas as pd - -from analysis.base_analysis import BaseRecipeAnalysis -from common_func.constant import Constant -from common_func.utils import stdev -from cluster_statistics_export.cann_api_sum_export import CannApiSumExport - - -class CannApiSum(BaseRecipeAnalysis): - - def __init__(self, params): - super().__init__(params) - print("[INFO] CannApiSum init.") - - @property - def base_dir(self): - return os.path.basename(os.path.dirname(__file__)) - - @staticmethod - def _mapper_func(data_map, analysis_class): - df = CannApiSumExport(data_map[1], analysis_class).read_export_db() - - if df is None or df.empty: - print(f"[WARNING] There is no stats data in {data_map[1]}.") - return None - return data_map[0], df - - def mapper_func(self, context): - return context.wait( - context.map( - self._mapper_func, - self._get_rank_db(), - analysis_class=self._recipe_name - ) - ) - - def reducer_func(self, mapper_res): - stats_rank_data = self._filter_data(mapper_res) - if not stats_rank_data: - print("[ERROR] Mapper data is None.") - return - stats_rank_data = [df.assign(rank=rank) for rank, df in stats_rank_data] - stats_rank_data = pd.concat(stats_rank_data) - stats_data = self._aggregate_stats(stats_rank_data) - if self._export_type == "db": - self.dump_data(stats_rank_data, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, "CannApiSumRank") - self.dump_data(stats_data, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, "CannApiSum") - elif self._export_type == "notebook": - self.dump_data(stats_rank_data, os.path.join(self._get_output_dir(), "rank_stats.csv"), index=False) - self.dump_data(stats_data, os.path.join(self._get_output_dir(), "all_stats.csv")) - self.save_notebook() - else: - print("[ERROR] Unknown export type.") - - def run(self, context): - mapper_res = self.mapper_func(context) - self.reducer_func(mapper_res) - - @staticmethod - def _aggregate_stats(stats_res): - grouped = stats_res.groupby("name") - res = {} - total_time = grouped["totalTimeNs"].sum() - res["timeRatio"] = total_time / total_time.sum() * 100.0 - res["totalTimeNs"] = total_time - res["totalCount"] = grouped["totalCount"].sum() - res["averageNs"] = res["totalTimeNs"] / res["totalCount"] - res["Q1Ns"] = grouped["Q1Ns"].min() - res["medNs"] = grouped["medNs"].median() - res["Q3Ns"] = grouped["Q3Ns"].max() - res["minNs"] = grouped["minNs"].min() - res["maxNs"] = grouped["maxNs"].max() - res["stdev"] = grouped.apply(lambda x: stdev(x, res)) - min_value = grouped["minNs"].min() - res["minRank"] = grouped.apply( - lambda x: ", ".join( - x.loc[x["minNs"] == min_value.loc[x.name], "rank"].astype(str) - ) - ) - max_value = grouped["maxNs"].max() - res["maxRank"] = grouped.apply( - lambda x: ", ".join( - x.loc[x["maxNs"] == max_value.loc[x.name], "rank"].astype(str) - ) - ) - res = pd.concat(res.values(), axis=1, keys=res.keys()).round(1) - res.sort_values(by="totalTimeNs", ascending=False, inplace=True) - return res - - def save_notebook(self): - self.create_notebook("stats.ipynb") - self.add_helper_file("cluster_display.py") diff --git a/profiler/cluster_analyse/analysis/cann_api_sum/stats.ipynb b/profiler/cluster_analyse/analysis/cann_api_sum/stats.ipynb deleted file mode 100644 index c97f039c5a0..00000000000 --- a/profiler/cluster_analyse/analysis/cann_api_sum/stats.ipynb +++ /dev/null @@ -1,86 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CANN_API_SUM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import plotly.offline as pyo\n", - "\n", - "from IPython.display import display, HTML\n", - "\n", - "import cluster_display\n", - "\n", - "display(HTML(\"\"))\n", - "pd.set_option('display.max_columns', None)\n", - "pd.set_option('display.max_rows', None)\n", - "pyo.init_notebook_mode()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## 集群场景CANN层API统计分析\n", - "该分析脚本展示了集群场景的统计数据分析结果。需要注意以下几点:\n", - "1. 所有的时间信息单位是微秒(us);\n", - "2. Q1表示单个API耗时的25%分位数,最终结果取自所有卡的Q1值中最小值;\n", - "3. Q3表示单个API耗时的75%分位数,最终结果取自所有卡的Q3值中最大值;\n", - "4. 'minRank'展示了API最小耗时所在卡;\n", - "5. 'maxRank'展示了API最大耗时所在卡。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(\"all_stats.csv\")\n", - "display(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cluster_display.display_box(df, xaxis_title=\"name\", yaxis_title=\"duration (ns)\")\n", - "cluster_display.display_stats_scatter(df, xaxis_title=\"name\", yaxis_title=\"duration (ns)\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "per_rank_df = pd.read_csv(\"rank_stats.csv\")\n", - "cluster_display.display_stats_per_operation(per_rank_df, xaxis_title='rank', yaxis_title='duration (ns)')" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/profiler/cluster_analyse/analysis/cluster_display.py b/profiler/cluster_analyse/analysis/cluster_display.py deleted file mode 100644 index 8fc6040ccaf..00000000000 --- a/profiler/cluster_analyse/analysis/cluster_display.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import pandas as pd -import plotly.graph_objects as go -from IPython.display import display, HTML -from ipywidgets import Dropdown, fixed, interact - - -def get_stats_cols(df): - cols = df.columns.tolist() - q1 = "Q1(Us)" if "Q1(Us)" in cols else "Q1~" - q3 = "Q3(Us)" if "Q3(Us)" in cols else "Q3~" - med = "med(Us)" if "med(Us)" in cols else "med~" - std = "stdev" if "stdev" in cols else "stdev~" - return q1, q3, med, std - - -def display_box(df, x=None, **layout_args): - if x is None: - x = df.columns[0] - q1, q3, med, std = get_stats_cols(df) - fig = go.Figure() - fig.add_trace( - go.Box( - x=df[x], - q1=df[q1], - median=df[med], - q3=df[q3], - sd=df[std], - lowerfence=df["minRank"], - upperfence=df["maxRank"] - ) - ) - fig.update_layout(**layout_args) - fig.show() - - -def display_stats_scatter(df, x=None, **layout_args): - if x is None: - x = df.columns[0] - q1, q3, med, _ = get_stats_cols(df) - fig = go.Figure() - col_names = [q1, med, q3, "minRank", "maxRank"] - for name in col_names: - fig.add_trace( - go.Scatter( - x=df[x], - y=df[name], - name=name - ) - ) - fig.update_layout(**layout_args) - fig.show() - - -def display_table_per_rank(df): - if df.empty: - display(df) - return - - rank_groups = df.groupby("rank") - def display_table(name): - rank_df = rank_groups.get_group(name) - rank_df = rank_df.drop(columns=["rank"]) - display(rank_df) - - dropdown = Dropdown( - options=rank_groups.groups.keys(), - description="rank:", - disabled=False, - ) - interact( - display_table, - name=dropdown - ) - - -def display_stats_per_operation(df, x=None, box=True, scatter=True, table=True, **layout_args): - if df.empty: - display(df) - return - - if x is None: - x = df.columns[0] - - op_groups = df.groupby(x) - - def display_graphs(name): - op_df = op_groups.get_group(name) - if table: - display(op_df.reset_index(drop=True).set_index("rank")) - if box: - display_box(op_df, x=op_df["rank"], **layout_args) - if scatter: - display_stats_scatter(op_df, x=op_df["rank"], **layout_args) - - operations = list(op_groups.groups.keys()) - - if len(operations) > 1: - dropdown = Dropdown( - options=operations, - description="Operation:", - disabled=False, - value=operations[1] - ) - interact( - display_graphs, - name=dropdown - ) - dropdown.value = operations[0] - else: - display_graphs(operations[0]) - - -def display_duration_boxplots(figs, stats_df: pd.DataFrame, orientation="v", title=None, - x_title="Names", y_title="Time", legend_title="Legend"): - mean_ds = stats_df.get("Mean(Us)", None) - min_ds = stats_df.get("Min(Us)", None) - max_ds = stats_df.get("Max(Us)", None) - q1_ds = stats_df.get("Q1(Us)", None) - median_ds = stats_df.get('Median(Us)', None) - q3_ds = stats_df.get('Q3(Us)', None) - return display_boxplot(figs, stats_df.index, min_ds, q1_ds, median_ds, q3_ds, max_ds, mean_ds, - orientation=orientation, title=title, x_title=x_title, y_title=y_title, - legend_title=legend_title) - - -def display_boxplot(figs, x_axis, min_ds, q1_ds, median_ds, q3_ds, max_ds, mean_ds, orientation="v", - title=None, x_title=None, y_title="Time", legend_title="Legend"): - fig = go.Figure() - fig.add_trace( - go.Box( - x=x_axis, - lowerfence=min_ds, - q1=q1_ds, - median=median_ds, - q3=q3_ds, - upperfence=max_ds, - mean=mean_ds - ) - ) - fig.update_traces(orientation=orientation) - fig.update_layout( - xaxis_title=x_title, yaxis_title=y_title, legend_title=legend_title, - title=title, height=1024 - ) - fig.show() - if isinstance(figs, list): - figs.append(fig) - return fig - - -def display_graph(figs, x_axis, y_axes, title=None, - x_title=None, y_title=None, legend_title="Legend"): - data = None - if isinstance(y_axes, pd.DataFrame): - data = y_axes.set_index(x_axis) - elif isinstance(y_axes, dict): - data = pd.DataFrame(y_axes, index=x_axis) - elif isinstance(y_axes, pd.Series): - data = pd.DataFrame({"": y_axes}, index=x_axis) - elif isinstance(y_axes, np.ndarray): - data = pd.DataFrame({"": pd.Series(y_axes)}, index=x_axis) - else: - return - - fig = data.plot.line() - fig.update_layout( - title=title, xaxis_title=x_title, yaxis_title=y_title, legend_title=legend_title - ) - fig.show() - if isinstance(figs, list): - figs.append(fig) - return fig - - -def display_stats_per_rank_groups_combobox(rank_stats_gdf): - names = list(rank_stats_gdf.groups.keys()) - if len(names) > 1: - dropdown = Dropdown( - options=names, layout={"width": "max-content"}, value=names[1] - ) - interact( - __display_stats_per_rank_group, - selected=dropdown, - rank_stats_gdf=fixed(rank_stats_gdf) - ) - dropdown.value = names[0] - elif len(names) == 1: - __display_stats_per_rank_group(names[0], rank_stats_gdf) - else: - print("cluster_display func:input rank_stats_gdf groups is null so no need to display") - - -def __display_stats_per_rank_group(selected, rank_stats_gdf): - df = rank_stats_gdf.get_group(selected) - df = df.reset_index(drop=True) - df = df.set_index(df["Rank"]) - display(df) - - figs = [] - display_duration_boxplots(figs, df, x_title="Ranks") - display_graph( - figs, - df.index, - df[["Q1(Us)", "Median(Us)", "Q3(Us)"]], - title="50% of Distribution", - x_title="Ranks" - ) - - -def display_stats_optional_combobox(options, display_func, args, description="Option:"): - if len(options) > 1: - dropdown = Dropdown( - options=options, layout={"width": "max-content"}, value=options[1], - description=description - ) - interact( - display_func, - selected=dropdown, - args=fixed(args) - ) - dropdown.value = options[0] - elif len(options) == 1: - display_func(options[0], args) diff --git a/profiler/cluster_analyse/analysis/compute_op_sum/__init__.py b/profiler/cluster_analyse/analysis/compute_op_sum/__init__.py deleted file mode 100644 index 7101187a2c2..00000000000 --- a/profiler/cluster_analyse/analysis/compute_op_sum/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/cluster_analyse/analysis/compute_op_sum/compute_op_sum.py b/profiler/cluster_analyse/analysis/compute_op_sum/compute_op_sum.py deleted file mode 100644 index e71cf868ac9..00000000000 --- a/profiler/cluster_analyse/analysis/compute_op_sum/compute_op_sum.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import pandas as pd -from analysis.base_analysis import BaseRecipeAnalysis -from common_func.constant import Constant -from common_func.utils import describe_duration -from cluster_statistics_export.compute_op_sum_export import ComputeOpSumExport - - -class ComputeOpSum(BaseRecipeAnalysis): - - TABLE_ALL_RANK_STATS = "ComputeOpAllRankStats" - TABLE_PER_RANK_STATS_BY_OPTYPE = "ComputeOpPerRankStatsByOpType" - TABLE_PER_RANK_STATS_BY_OPNAME = "ComputeOpPerRankStatsByOpName" - - def __init__(self, params): - super().__init__(params) - print("[INFO] ComputeOpSum init.") - self.all_rank_stats = None - self.per_rank_stats_by_optype = None - self.per_rank_stats_by_opname = None - - @property - def base_dir(self): - return os.path.basename(os.path.dirname(__file__)) - - @staticmethod - def _mapper_func(data_map, analysis_class): - df = ComputeOpSumExport(data_map[1], analysis_class).read_export_db() - - if df is None or df.empty: - print(f"[WARNING] There is no stats data in {data_map[1]}.") - return None - - df["Rank"] = data_map[0] - return df - - def mapper_func(self, context): - return context.wait( - context.map( - self._mapper_func, - self._get_rank_db(), - analysis_class=self._recipe_name - ) - ) - - def reducer_func(self, mapper_res): - mapper_res = list(filter(lambda df: df is not None, mapper_res)) - if not mapper_res: - print("[ERROR] Mapper data is None.") - return - # get per rank stats by optype - self.per_rank_stats_by_optype = pd.concat( - describe_duration(df.groupby(["OpType", "TaskType"])["Duration"]).assign(Rank=df["Rank"][0]) for df in mapper_res) - self.per_rank_stats_by_optype.sort_values(by=["SumNs"], inplace=True, ascending=False) - - # get all rank stats by optype - all_op_data = pd.concat(mapper_res) - self.all_rank_stats = describe_duration(all_op_data.groupby(["OpType", "TaskType"])["Duration"]) - self.all_rank_stats.sort_values(by=["SumNs"], inplace=True, ascending=False) - - # get per rank stats by opname - self.per_rank_stats_by_opname = pd.concat( - describe_duration(df.groupby(["OpName", "OpType", "TaskType", "InputShapes"])["Duration"]).assign(Rank=df["Rank"][0]) for df in mapper_res) - self.per_rank_stats_by_opname.sort_values(by=["SumNs"], inplace=True, ascending=False) - - def run(self, context): - super().run(context) - mapper_res = self.mapper_func(context) - self.reducer_func(mapper_res) - - if self._export_type == "db": - self.save_db() - elif self._export_type == "notebook": - self.save_notebook() - else: - print("[ERROR] Unknown export type.") - - def save_notebook(self): - self.dump_data(self.all_rank_stats, os.path.join(self._get_output_dir(), "all_stats.csv")) - self.dump_data(self.per_rank_stats_by_optype, os.path.join(self._get_output_dir(), "rank_stats_by_optype.csv")) - self.dump_data(self.per_rank_stats_by_opname, os.path.join(self._get_output_dir(), "rank_stats_by_opname.csv")) - self.create_notebook("stats.ipynb") - self.add_helper_file("cluster_display.py") - - def save_db(self): - self.dump_data(self.all_rank_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_ALL_RANK_STATS) - self.dump_data(self.per_rank_stats_by_optype, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_PER_RANK_STATS_BY_OPTYPE) - self.dump_data(self.per_rank_stats_by_opname, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_PER_RANK_STATS_BY_OPNAME) diff --git a/profiler/cluster_analyse/analysis/compute_op_sum/stats.ipynb b/profiler/cluster_analyse/analysis/compute_op_sum/stats.ipynb deleted file mode 100644 index c88d2684c1f..00000000000 --- a/profiler/cluster_analyse/analysis/compute_op_sum/stats.ipynb +++ /dev/null @@ -1,164 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compute Op Summary\n", - "\n", - "集群场景计算类算子数据分析\n", - "\n", - "主要包含以下3个统计内容:\n", - "1. 按算子类型和任务类型分组的,整个集群通信算子耗时的统计情况\n", - "2. 按算子类型和任务类型分组的,每个Rank上计算类算子的耗时情况\n", - "3. 按算子名称、任务类型、输入shape分组的,每个Rank上的计算类算子的耗时情况" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 数据准备" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import display, HTML\n", - "display(HTML(\"\"))\n", - "\n", - "import plotly.offline as pyo\n", - "\n", - "def is_lab_notebook():\n", - " import re\n", - " import psutil\n", - " return any(re.search('jupyter--lab-script', x) for x in psutil.Process().parent().cmdline())\n", - "\n", - "if is_lab_notebook():\n", - " pyo.init_notebook_mode()\n", - "\n", - "import pandas as pd\n", - "pd.options.plotting.backend = \"plotly\"\n", - "pd.set_option(\"display.max_rows\", 100)\n", - "pd.set_option(\"display.width\", 1000)\n", - "\n", - "import cluster_display\n", - "\n", - "all_stats_df = pd.read_csv(\"all_stats.csv\", index_col=\"OpType\")\n", - "rank_stats_by_optype_df = pd.read_csv(\"rank_stats_by_optype.csv\", index_col=\"OpType\")\n", - "rank_stats_by_opname_df = pd.read_csv(\"rank_stats_by_opname.csv\", index_col=\"OpName\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 计算类算子耗时分析\n", - "\n", - "将整个集群所有Rank的计算类算子进行汇总,按算子类型和任务类型分类,统计分析耗时情况,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Count:算子数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(all_stats_df)\n", - "fig_all_rank = cluster_display.display_duration_boxplots(None, all_stats_df, x_title=\"OpType\")\n", - "fig_per_rank = cluster_display.display_graph(None, all_stats_df.index, all_stats_df[[\"Q1(Us)\", \"Median(Us)\", \"Q3(Us)\"]], title=\"50% of Distribution\", x_title=\"OpType\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 单个Rank的计算类算子基于算子类型的耗时分析\n", - "将集群内每个Rank的计算类算子进行汇总,按算子类型和任务类型分类,统计分析耗时情况,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Count:算子数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rank_stats_gdf = rank_stats_by_optype_df.groupby(rank_stats_by_optype_df.index)\n", - "cluster_display.display_stats_per_rank_groups_combobox(rank_stats_gdf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 单个Rank的计算类算子基于算子名的耗时分析\n", - "\n", - "将集群内每个Rank的计算类算子进行汇总,按算子名称、任务类型、输入shape分类,统计分析耗时情况,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Count:算子数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rank_stats_gdf = rank_stats_by_opname_df.groupby(rank_stats_by_opname_df.index)\n", - "cluster_display.display_stats_per_rank_groups_combobox(rank_stats_gdf)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.12.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/profiler/cluster_analyse/analysis/hccl_sum/__init__.py b/profiler/cluster_analyse/analysis/hccl_sum/__init__.py deleted file mode 100644 index 7101187a2c2..00000000000 --- a/profiler/cluster_analyse/analysis/hccl_sum/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/cluster_analyse/analysis/hccl_sum/hccl_sum.py b/profiler/cluster_analyse/analysis/hccl_sum/hccl_sum.py deleted file mode 100644 index da0c575e468..00000000000 --- a/profiler/cluster_analyse/analysis/hccl_sum/hccl_sum.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import pandas as pd -from analysis.base_analysis import BaseRecipeAnalysis -from common_func.constant import Constant -from common_func.utils import describe_duration -from cluster_statistics_export.hccl_sum_export import HcclSumExport - - -class HcclSum(BaseRecipeAnalysis): - - TABLE_ALL_RANK_STATS = "HcclAllRankStats" - TABLE_PER_RANK_STATS = "HcclPerRankStats" - TABLE_TOP_OP_STATS = "HcclTopOpStats" - - TOP_NUM = "top_num" - DEFAULT_TOP_NUM = 15 - - def __init__(self, params): - super().__init__(params) - print("[INFO] HcclSum init.") - self.per_rank_stats = None - self.all_rank_stats = None - self.top_op_stats = None - self.top_num = params.get(self.TOP_NUM, self.DEFAULT_TOP_NUM) - - @property - def base_dir(self): - return os.path.basename(os.path.dirname(__file__)) - - @staticmethod - def _mapper_func(data_map, analysis_class): - df = HcclSumExport(data_map[1], analysis_class).read_export_db() - - if df is None or df.empty: - print(f"[WARNING] There is no stats data in {data_map[1]}.") - return None - - df["Rank"] = data_map[0] - return df - - def mapper_func(self, context): - return context.wait( - context.map( - self._mapper_func, - self._get_rank_db(), - analysis_class=self._recipe_name - ) - ) - - def reducer_func(self, mapper_res): - mapper_res = list(filter(lambda df: df is not None, mapper_res)) - if not mapper_res: - print("[ERROR] Mapper data is None.") - return - self.per_rank_stats = pd.concat( - describe_duration(df.groupby("OpType")["Duration"]).assign(Rank=df["Rank"][0]) for df in mapper_res) - self.per_rank_stats.sort_values(by=["Rank"], inplace=True) - all_op_data = pd.concat(mapper_res) - self.all_rank_stats = describe_duration(all_op_data.groupby("OpType")["Duration"]) - grouped_op_stats = all_op_data.groupby("OpName") - self.top_op_stats = describe_duration(grouped_op_stats["Duration"]).nlargest(self.top_num, "MeanNs") - min_rank = [] - max_rank = [] - for op_name in self.top_op_stats.index: - df = grouped_op_stats.get_group(op_name) - min_rank.append(df[df["Duration"] == df["Duration"].min()]["Rank"].values[0]) - max_rank.append(df[df["Duration"] == df["Duration"].max()]["Rank"].values[0]) - self.top_op_stats["MinRank"] = min_rank - self.top_op_stats["MaxRank"] = max_rank - - def run(self, context): - super().run(context) - if self.top_num <= 0: - print(f"[WARNING] HcclSum: top_num is set to a invalid value, " - f"it will be reset to default value({self.DEFAULT_TOP_NUM}).") - self.top_num = self.DEFAULT_TOP_NUM - mapper_res = self.mapper_func(context) - self.reducer_func(mapper_res) - - if self._export_type == "db": - self.save_db() - elif self._export_type == "notebook": - self.save_notebook() - else: - print("[ERROR] Unknown export type.") - - def save_notebook(self): - self.dump_data(self.all_rank_stats, os.path.join(self._get_output_dir(), "all_stats.csv")) - self.dump_data(self.per_rank_stats, os.path.join(self._get_output_dir(), "rank_stats.csv")) - self.dump_data(self.top_op_stats, os.path.join(self._get_output_dir(), "top_op_stats.csv")) - self.create_notebook("stats.ipynb") - self.add_helper_file("cluster_display.py") - - def save_db(self): - self.dump_data(self.all_rank_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_ALL_RANK_STATS) - self.dump_data(self.per_rank_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_PER_RANK_STATS) - self.dump_data(self.top_op_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_TOP_OP_STATS) - - @classmethod - def add_parser_argument(cls, parser): - BaseRecipeAnalysis.add_parser_argument(parser) - parser.add_argument("--top_num", type=int, help="Duration cost top count", default=cls.DEFAULT_TOP_NUM) - - @classmethod - def parse_argument(cls, args_parsed) -> dict: - argument_dict = BaseRecipeAnalysis.parse_argument(args_parsed) - argument_dict.update({ - cls.TOP_NUM: args_parsed.top_num - }) - return argument_dict - - @classmethod - def get_extra_argument(cls, params) -> dict: - argument_dict = BaseRecipeAnalysis.get_extra_argument(params) - argument_dict.update({ - cls.TOP_NUM: params.get(cls.TOP_NUM, cls.DEFAULT_TOP_NUM) - }) - return argument_dict diff --git a/profiler/cluster_analyse/analysis/hccl_sum/stats.ipynb b/profiler/cluster_analyse/analysis/hccl_sum/stats.ipynb deleted file mode 100644 index 87f8c6d7362..00000000000 --- a/profiler/cluster_analyse/analysis/hccl_sum/stats.ipynb +++ /dev/null @@ -1,162 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# HCCL Summary\n", - "\n", - "集群场景Hccl算子数据分析\n", - "\n", - "主要包含以下3个统计内容:\n", - "1. 按算子类型分组的,整个集群通信算子耗时的统计情况\n", - "2. 按算子类型分组的,每个Rank上通信算子的耗时情况\n", - "3. 整个集群平均耗时最久的TOP通信算子" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 数据准备" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import display, HTML\n", - "display(HTML(\"\"))\n", - "\n", - "import plotly.offline as pyo\n", - "\n", - "def is_lab_notebook():\n", - " import re\n", - " import psutil\n", - " return any(re.search('jupyter--lab-script', x) for x in psutil.Process().parent().cmdline())\n", - "\n", - "if is_lab_notebook():\n", - " pyo.init_notebook_mode()\n", - "\n", - "import pandas as pd\n", - "pd.options.plotting.backend = \"plotly\"\n", - "pd.set_option(\"display.max_rows\", 100)\n", - "pd.set_option(\"display.width\", 1000)\n", - "\n", - "import cluster_display\n", - "\n", - "all_stats_df = pd.read_csv(\"all_stats.csv\", index_col=\"OpType\")\n", - "rank_stats_df = pd.read_csv(\"rank_stats.csv\", index_col=\"OpType\")\n", - "top_op_stats_df = pd.read_csv(\"top_op_stats.csv\", index_col=\"OpName\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 集群通信算子耗时分析\n", - "\n", - "将整个集群所有Rank的通信算子进行汇总,按算子类型分类,统计分析耗时情况,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Count:算子数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(all_stats_df)\n", - "fig_all_rank = cluster_display.display_duration_boxplots(None, all_stats_df, x_title=\"Hccl OpType\")\n", - "fig_per_rank = cluster_display.display_graph(None, all_stats_df.index, all_stats_df[[\"Q1(Us)\", \"Median(Us)\", \"Q3(Us)\"]], title=\"50% of Distribution\", x_title=\"Hccl OpType\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 集群Rank通信算子耗时分析\n", - "\n", - "将集群内每个Rank的通信算子进行汇总,按算子类型分类,统计分析耗时情况,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Count:算子数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "rank_stats_gdf = rank_stats_df.groupby(rank_stats_df.index)\n", - "cluster_display.display_stats_per_rank_groups_combobox(rank_stats_gdf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 集群TOP-N通信算子耗时分析\n", - "\n", - "统计集群内耗时最多的TOP-N通信算子,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Count:算子数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时\n", - "- MinRank:耗时最少算子所在的Rank\n", - "- MaxRank:耗时最长算子所在的Rank" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(top_op_stats_df)\n", - "fig_top_op = cluster_display.display_duration_boxplots(None, top_op_stats_df, x_title=\"Hccl OpName\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.12.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/profiler/cluster_analyse/analysis/mstx_sum/__init__.py b/profiler/cluster_analyse/analysis/mstx_sum/__init__.py deleted file mode 100644 index 7101187a2c2..00000000000 --- a/profiler/cluster_analyse/analysis/mstx_sum/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/cluster_analyse/analysis/mstx_sum/mstx_sum.py b/profiler/cluster_analyse/analysis/mstx_sum/mstx_sum.py deleted file mode 100644 index 46a0e18abee..00000000000 --- a/profiler/cluster_analyse/analysis/mstx_sum/mstx_sum.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import pandas as pd -from collections import namedtuple -from analysis.base_analysis import BaseRecipeAnalysis -from common_func.constant import Constant -from common_func.utils import describe_duration -from cluster_statistics_export.mstx_mark_export import MstxMarkExport -from cluster_statistics_export.mstx_step_export import MstxStepExport - - -MarkInfo = namedtuple("MarkInfo", ["name", "framework_duration", "cann_duration", "device_duration", - "tid", "start_ns"]) - - -def format_mark_info(df: pd.DataFrame, start_idx, stop_idx, name) -> MarkInfo: - start_series = df.iloc[start_idx] - stop_series = df.iloc[stop_idx] - return MarkInfo( - name=name, - framework_duration=float(stop_series["framework_ts"]-start_series["framework_ts"]), - cann_duration=float(stop_series["cann_ts"]-start_series["cann_ts"]), - device_duration=float(stop_series["device_ts"]-start_series["device_ts"]), - tid=start_series["tid"], - start_ns=start_series["cann_ts"] - ) - - -def rename_mark_msg_name(mark_stats_df: pd.DataFrame): - msg_idx_counter = {} - for idx, mark_info in enumerate(mark_stats_df.itertuples(index=False)): - msg_idx_counter.setdefault(mark_info.step_id, {}).setdefault(mark_info.name, []).append(idx) - for msg_dict in msg_idx_counter.values(): - for msg, idx_list in msg_dict.items(): - if len(idx_list) <= 1: - continue - for i, idx in enumerate(idx_list): - mark_stats_df.loc[idx, 'name'] = f"{msg}_{i}" - - -def compute_step_id(mark_stat, step_stats_df: pd.DataFrame): - for step_info in step_stats_df.itertuples(index=False): - if step_info.start_ns <= mark_stat.start_ns <= step_info.end_ns: - return step_info.step_id - print(f"[WARNING] {mark_stat.name} is not in any step.") - return 0 - - -def format_columns(df: pd.DataFrame): - formatted_df = df.rename( - { - "framework_duration": "FrameworkDurationNs", - "cann_duration": "CannDurationNs", - "device_duration": "DeviceDurationNs", - "duration": "DurationNs", - "step_id": "StepId", - "tid": "Tid", - "name": "Name" - }, - axis="columns" - ) - cols = [col for col in formatted_df.columns if not col.endswith("_ns") and col not in {"Tid"}] - return formatted_df[cols] - - -class MstxSum(BaseRecipeAnalysis): - - TABLE_FRAMEWORK_STATS = "MSTXAllFrameworkStats" - TABLE_CANN_STATS = "MSTXAllCannStats" - TABLE_DEVICE_STATS = "MSTXAllDeviceStats" - TABLE_MARK_STATS = "MSTXMarkStats" - - START_SUFFIX = "_start" - STOP_SUFFIX = "_stop" - - def __init__(self, params): - super().__init__(params) - print("[INFO] MstxSum init.") - self.mark_stats = None - self.all_fwk_stats = None - self.all_cann_stats = None - self.all_device_stats = None - - @property - def base_dir(self): - return os.path.basename(os.path.dirname(__file__)) - - @staticmethod - def _mapper_func(data_map, analysis_class): - step_df = MstxStepExport(data_map[1], analysis_class).read_export_db() - if step_df is None or step_df.empty: - step_df = pd.DataFrame({"start_ns": [0], "end_ns": [float("inf")], "step_id": [0]}) - mark_df = MstxMarkExport(data_map[1], analysis_class).read_export_db() - if mark_df is None or mark_df.empty: - print(f"[WARNING] There is no mark data in {data_map[1]}.") - return None - mark_df["framework_ts"] = mark_df["framework_ts"].astype("int64") - - mark_info = {} - mark_res = [] - mismatch_msg = [] - for idx, row in enumerate(mark_df.itertuples(index=False)): - if row.msg.endswith(MstxSum.START_SUFFIX): - msg = row.msg[:-len(MstxSum.START_SUFFIX)] - mark_info.setdefault(row.tid, {}).setdefault(msg, []).append(idx) - elif row.msg.endswith(MstxSum.STOP_SUFFIX): - msg = row.msg[:-len(MstxSum.STOP_SUFFIX)] - idx_list = mark_info.get(row.tid, {}).get(msg, []) - if not idx_list: - mismatch_msg.append((row.msg, idx)) - continue - start_idx = idx_list.pop() - mark_res.append(format_mark_info(mark_df, start_idx, idx, msg)) - - # 统计未匹配上的mark信息 - for msg_info in mark_info.values(): - for msg, idx_list in msg_info.items(): - if not idx_list: - continue - mismatch_msg.extend((msg + MstxSum.START_SUFFIX, idx) for idx in idx_list) - if mismatch_msg: - mismatch_msg.sort(key=lambda msg: msg[1]) - print(f"[WARNING] The following mark messages do not match anyone in " - f"rank {data_map[0]}: {','.join(msg[0] for msg in mismatch_msg)}.") - - mark_stats_df = pd.DataFrame(mark_res).assign(Rank=data_map[0]) - mark_stats_df["step_id"] = mark_stats_df.apply(compute_step_id, axis=1, step_stats_df=step_df) - rename_mark_msg_name(mark_stats_df) - mark_stats_df = format_columns(mark_stats_df).set_index("Name", drop=True) - return mark_stats_df - - def mapper_func(self, context): - return context.wait( - context.map( - self._mapper_func, - self._get_rank_db(), - analysis_class=self._recipe_name - ) - ) - - def reducer_func(self, mapper_res): - mapper_res = list(filter(lambda df: df is not None, mapper_res)) - if not mapper_res: - print("[ERROR] Mapper data is None.") - return - self.mark_stats = pd.concat(mapper_res) - all_fwk_stats = [] - all_cann_stats = [] - all_device_stats = [] - mark_step_df = self.mark_stats.groupby("StepId") - for step_id, df in mark_step_df: - name_gdf = df.groupby("Name") - fwk_stats = describe_duration(name_gdf["FrameworkDurationNs"]).assign(StepId=step_id) - fwk_stats.sort_values(by=["SumNs"], inplace=True, ascending=False) - all_fwk_stats.append(fwk_stats) - cann_stats = describe_duration(name_gdf["CannDurationNs"]).assign(StepId=step_id) - cann_stats.sort_values(by=["SumNs"], inplace=True, ascending=False) - all_cann_stats.append(cann_stats) - device_stats = describe_duration(name_gdf["DeviceDurationNs"]).assign(StepId=step_id) - device_stats.sort_values(by=["SumNs"], inplace=True, ascending=False) - all_device_stats.append(device_stats) - self.all_fwk_stats = pd.concat(all_fwk_stats) - self.all_cann_stats = pd.concat(all_cann_stats) - self.all_device_stats = pd.concat(all_device_stats) - - def run(self, context): - super().run(context) - mapper_res = self.mapper_func(context) - self.reducer_func(mapper_res) - - if self._export_type == "db": - self.save_db() - elif self._export_type == "notebook": - self.save_notebook() - else: - print("[ERROR] Unknown export type.") - - def save_notebook(self): - self.dump_data(self.mark_stats, os.path.join(self._get_output_dir(), "mark_stats.csv")) - self.dump_data(self.all_fwk_stats, os.path.join(self._get_output_dir(), "all_fwk_stats.csv")) - self.dump_data(self.all_cann_stats, os.path.join(self._get_output_dir(), "all_cann_stats.csv")) - self.dump_data(self.all_device_stats, os.path.join(self._get_output_dir(), "all_device_stats.csv")) - self.create_notebook("stats.ipynb") - self.add_helper_file("cluster_display.py") - - def save_db(self): - self.dump_data(self.mark_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_MARK_STATS) - self.dump_data(self.all_fwk_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_FRAMEWORK_STATS) - self.dump_data(self.all_cann_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_CANN_STATS) - self.dump_data(self.all_device_stats, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, self.TABLE_DEVICE_STATS) diff --git a/profiler/cluster_analyse/analysis/mstx_sum/stats.ipynb b/profiler/cluster_analyse/analysis/mstx_sum/stats.ipynb deleted file mode 100644 index 84672bc72b9..00000000000 --- a/profiler/cluster_analyse/analysis/mstx_sum/stats.ipynb +++ /dev/null @@ -1,180 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# MSTX Summary\n", - "\n", - "集群场景MSTX打点数据分析\n", - "\n", - "主要包含以下2个统计内容:\n", - "1. 按Step分组的,整个集群MSTX打点数据的统计情况\n", - "2. 按Name分组的,每个Rank上MSTX打点数据的统计情况" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 数据准备" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import display, HTML\n", - "display(HTML(\"\"))\n", - "\n", - "import plotly.offline as pyo\n", - "\n", - "def is_lab_notebook():\n", - " import re\n", - " import psutil\n", - " return any(re.search('jupyter--lab-script', x) for x in psutil.Process().parent().cmdline())\n", - "\n", - "if is_lab_notebook():\n", - " pyo.init_notebook_mode()\n", - "\n", - "import pandas as pd\n", - "pd.options.plotting.backend = \"plotly\"\n", - "pd.set_option(\"display.max_rows\", 100)\n", - "pd.set_option(\"display.width\", 1000)\n", - "\n", - "import cluster_display\n", - "\n", - "all_fwk_stats_gdf = pd.read_csv(\"all_fwk_stats.csv\", index_col=\"Name\").groupby(\"StepId\")\n", - "all_cann_stats_gdf = pd.read_csv(\"all_cann_stats.csv\", index_col=\"Name\").groupby(\"StepId\")\n", - "all_device_stats_gdf = pd.read_csv(\"all_device_stats.csv\", index_col=\"Name\").groupby(\"StepId\")\n", - "mark_stats_df = pd.read_csv(\"mark_stats.csv\", index_col=\"Name\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 集群MSTX数据分析\n", - "\n", - "将整个集群所有Rank的MSTX数据进行汇总,按Step划分,统计分析耗时情况,时间单位为微秒(us)\n", - "打点数据分为三种:\n", - "1. 框架侧耗时:Framework Time\n", - "2. Cann侧耗时:Cann Time\n", - "3. Device侧耗时:Devcie Time\n", - "\n", - "3种数据都包含以下统计项:\n", - "- Count:数量\n", - "- Mean:平均耗时\n", - "- Std:标准差\n", - "- Min:最小值\n", - "- Q1:四分之一分位数\n", - "- Median:中位数\n", - "- Q3:四分之三分位数\n", - "- Max:最大值\n", - "- Sum:总耗时" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def display_stats_mstx_step_combobox(selected, args):\n", - " step = selected\n", - " fwk_stats_gdf, cann_stats_gdf, device_stats_gdf = args\n", - " fwk_df = fwk_stats_gdf.get_group(step)\n", - " cann_df = cann_stats_gdf.get_group(step)\n", - " device_df = device_stats_gdf.get_group(step)\n", - " figs = []\n", - " display(HTML(\"

Framework Time Stats

\"))\n", - " display(fwk_df)\n", - " cluster_display.display_duration_boxplots(figs, fwk_df, title=\"Framework Time\", x_title=\"Name\", y_title=\"Time\")\n", - " display(HTML(\"

Cann Time Stats

\"))\n", - " display(cann_df)\n", - " cluster_display.display_duration_boxplots(figs, cann_df, title=\"Cann Time\", x_title=\"Name\", y_title=\"Time\")\n", - " display(HTML(\"

Device Time Stats

\"))\n", - " display(device_df)\n", - " cluster_display.display_duration_boxplots(figs, device_df, title=\"Device Time\", x_title=\"Name\", y_title=\"Time\")\n", - "\n", - "steps = list(all_fwk_stats_gdf.groups.keys())\n", - "if steps:\n", - " cluster_display.display_stats_optional_combobox(steps, display_stats_mstx_step_combobox, \n", - " [all_fwk_stats_gdf, all_cann_stats_gdf, all_device_stats_gdf], \"Step:\")\n", - "else:\n", - " print(\"There is no step in stats, so no need to display\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 集群Rank MSTX数据分析\n", - "\n", - "将集群内每个Rank的MSTX数据进行汇总,按打点Name分类,统计分析耗时情况,时间单位为微秒(us)\n", - "\n", - "包含以下统计项:\n", - "- Name:打点名称\n", - "- FrameworkDuration(Us):框架侧耗时\n", - "- CannDuration(Us):Cann侧耗时\n", - "- DeviceDuration(Us):Device侧耗时\n", - "- Rank:Rank序号\n", - "- StepId:Step序号" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def display_mstx_duration_by_rank(selected, args):\n", - " mark_stats_gdf = args\n", - " df = mark_stats_gdf.get_group(selected).sort_values(\"Rank\")\n", - " display(df)\n", - " fwk_duration = []\n", - " cann_duration = []\n", - " device_duration = []\n", - " step_ids = []\n", - " for step_id, step_df in df.groupby(\"StepId\"):\n", - " fwk_duration.append((step_id, step_df[\"FrameworkDuration(Us)\"].values))\n", - " cann_duration.append((step_id, step_df[\"CannDuration(Us)\"].values))\n", - " device_duration.append((step_id, step_df[\"DeviceDuration(Us)\"].values))\n", - " step_ids.append(step_id)\n", - " fwk_df = pd.concat([pd.Series(dur, name=step_id) for step_id, dur in fwk_duration], axis=1)\n", - " cann_df = pd.concat([pd.Series(dur, name=step_id) for step_id, dur in cann_duration], axis=1)\n", - " device_df = pd.concat([pd.Series(dur, name=step_id) for step_id, dur in device_duration], axis=1)\n", - " figs = []\n", - " ranks = df[\"Rank\"].drop_duplicates()\n", - " cluster_display.display_graph(figs, ranks, fwk_df[step_ids],\n", - " title=\"Framework Time\", x_title=\"Rank\", y_title=\"Time\", legend_title=\"Step\")\n", - " cluster_display.display_graph(figs, ranks, cann_df[step_ids],\n", - " title=\"Cann Time\", x_title=\"Rank\", y_title=\"Time\", legend_title=\"Step\")\n", - " cluster_display.display_graph(figs, ranks, device_df[step_ids],\n", - " title=\"Device Time\", x_title=\"Rank\", y_title=\"Time\", legend_title=\"Step\")\n", - "\n", - "mark_stats_gdf = mark_stats_df.groupby(mark_stats_df.index)\n", - "names = list(mark_stats_gdf.groups.keys())\n", - "if steps:\n", - " cluster_display.display_stats_optional_combobox(names, display_mstx_duration_by_rank, mark_stats_gdf, \"Name:\")\n", - "else:\n", - " print(\"There is no mark name in stats, so no need to display\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.12.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index a8d01dcfe34..171417c8879 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -22,36 +22,9 @@ from communication_group.communication_group_generator import CommunicationGroup from common_func.constant import Constant from common_func.file_manager import FileManager from common_func.path_manager import PathManager -from common_func import analysis_loader from analysis.analysis_facade import AnalysisFacade COMM_FEATURE_LIST = ['all', 'communication_time', 'communication_matrix'] -ALL_FEATURE_LIST = ['all', 'communication_time', 'communication_matrix', 'cann_api_sum', 'hccl_sum', 'compute_op_sum', - 'mstx_sum'] - - -def get_analysis_args(analysis_class, analysis_args): - parser = argparse.ArgumentParser(description="custom analysis args") - parser.add_argument("--parallel_mode", type=str, help="context mode", default="concurrent") - parser.add_argument("--export_type", type=str, help="export type", default="db") - analysis_class[1].add_parser_argument(parser) - return parser.parse_args(analysis_args) - -def parse_specific_params(analysis_name, analysis_args): - analysis_class = analysis_loader.get_class_from_name(analysis_name) - if not analysis_class: - print("[ERROR] undefined analysis.") - return None - - args_parsed = get_analysis_args(analysis_class, analysis_args) - specific_params = { - Constant.RECIPE_NAME: analysis_class[0], - Constant.RECIPE_CLASS: analysis_class[1], - Constant.PARALLEL_MODE: args_parsed.parallel_mode, - Constant.EXPORT_TYPE: args_parsed.export_type - } - specific_params.update(analysis_class[1].parse_argument(args_parsed)) - return specific_params class Interface: ASCEND_PT = "ascend_pt" @@ -96,51 +69,29 @@ class Interface: if data_type == Constant.INVALID: print("[ERROR] The current folder contains both DB and other files. Please check.") return - if self.analysis_mode not in COMM_FEATURE_LIST: - if data_type != Constant.DB: - print("[ERROR] The current analysis node only supports DB as input data. Please check.") - return - FileManager.create_output_dir(self.collection_path, is_overwrite=True) - params = { - Constant.COLLECTION_PATH: self.collection_path, - Constant.DATA_MAP: data_map, - Constant.DATA_TYPE: data_type, - Constant.RECIPE_NAME: self.origin_params.get(Constant.RECIPE_NAME, ""), - Constant.RECIPE_CLASS: self.origin_params.get(Constant.RECIPE_CLASS), - Constant.PARALLEL_MODE: self.origin_params.get(Constant.PARALLEL_MODE, ""), - Constant.EXPORT_TYPE: self.origin_params.get(Constant.EXPORT_TYPE, "") - } - params.update(params[Constant.RECIPE_CLASS].get_extra_argument(self.origin_params)) - AnalysisFacade(params).recipe_analyze() - else: - FileManager.create_output_dir(self.collection_path) - params = { - Constant.COLLECTION_PATH: self.collection_path, - Constant.DATA_MAP: data_map, - Constant.ANALYSIS_MODE: self.analysis_mode, - Constant.DATA_TYPE: data_type - } - comm_data_dict = CommunicationGroupGenerator(params).generate() - params[Constant.COMM_DATA_DICT] = comm_data_dict - AnalysisFacade(params).cluster_analyze() + FileManager.create_output_dir(self.collection_path) + params = { + Constant.COLLECTION_PATH: self.collection_path, + Constant.DATA_MAP: data_map, + Constant.ANALYSIS_MODE: self.analysis_mode, + Constant.DATA_TYPE: data_type + } + comm_data_dict = CommunicationGroupGenerator(params).generate() + params[Constant.COMM_DATA_DICT] = comm_data_dict + AnalysisFacade(params).cluster_analyze() def cluster_analysis_main(args=None): parser = argparse.ArgumentParser(description="cluster analysis module") parser.add_argument('-d', '--collection_path', type=str, required=True, help="profiling data path") - parser.add_argument('-m', '--mode', choices=ALL_FEATURE_LIST, + parser.add_argument('-m', '--mode', choices=COMM_FEATURE_LIST, default='all', help="different analysis mode") - args_parsed, args_remained = parser.parse_known_args(args=args) + args_parsed, _ = parser.parse_known_args(args=args) parameter = { Constant.COLLECTION_PATH: args_parsed.collection_path, Constant.ANALYSIS_MODE: args_parsed.mode } - if args_parsed.mode in COMM_FEATURE_LIST: - if args_remained: - print(f"[ERROR] The specific argument {args_remained} is not supported for communication analysis.") - return - else: - parameter.update(parse_specific_params(args_parsed.mode, args_remained)) + Interface(parameter).run() diff --git a/profiler/cluster_analyse/cluster_statistics_export/__init__.py b/profiler/cluster_analyse/cluster_statistics_export/__init__.py deleted file mode 100644 index 7101187a2c2..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/cluster_analyse/cluster_statistics_export/cann_api_sum_export.py b/profiler/cluster_analyse/cluster_statistics_export/cann_api_sum_export.py deleted file mode 100644 index 578ee937be5..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/cann_api_sum_export.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cluster_statistics_export.stats_export import StatsExport - -QUERY = """ -WITH - summary as ( - SELECT - name, - sum(endNs - startNs) AS duration, - count (*) AS num, - avg(endNs - startNs) AS avg_duration, - min(endNs - startNs) AS min_duration, - median(endNs - startNs) AS med_duration, - max(endNs - startNs) AS max_duration, - stdev(endNs - startNs) AS stdev_duration, - lower_quartile(endNs - startNs) AS lower_quartile_duration, - upper_quartile(endNs - startNs) AS upper_quartile_duration - FROM - CANN_API - GROUP BY name - ), - totals AS ( - SELECT sum(duration) AS total - FROM summary - ) -SELECT - ids.value AS "name", - round(summary.duration * 100.0 / (SELECT total FROM totals), 2) AS "durationRatio", - summary.duration AS "totalTimeNs", - summary.num AS "totalCount", - round(summary.avg_duration, 1) AS "averageNs", - round(summary.min_duration, 1) AS "minNs", - round(summary.lower_quartile_duration, 1) AS "Q1Ns", - round(summary.med_duration, 1) AS "medNs", - round(summary.upper_quartile_duration, 1) AS "Q3Ns", - round(summary.max_duration, 1) AS "maxNs", - round(summary.stdev_duration, 1) AS "stdev" -FROM - summary -LEFT JOIN - STRING_IDS AS ids - ON ids.id == summary.name -ORDER BY 2 DESC; - """ - - -class CannApiSumExport(StatsExport): - - def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) - self._query = QUERY diff --git a/profiler/cluster_analyse/cluster_statistics_export/compute_op_sum_export.py b/profiler/cluster_analyse/cluster_statistics_export/compute_op_sum_export.py deleted file mode 100644 index d70c696100b..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/compute_op_sum_export.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cluster_statistics_export.stats_export import StatsExport - - -QUERY = """ -SELECT - NAME_IDS.value AS "OpName", - OPTYPE_IDS.value AS "OpType", - TASKTYPE_IDS.value AS "TaskType", - INPUTSHAPES_IDS.value AS "InputShapes", - round(TASK.endNs - TASK.startNs) AS "Duration" -FROM - COMPUTE_TASK_INFO -LEFT JOIN TASK - ON TASK.globalTaskId == COMPUTE_TASK_INFO.globalTaskId -LEFT JOIN - STRING_IDS AS NAME_IDS - ON NAME_IDS.id == COMPUTE_TASK_INFO.name -LEFT JOIN - STRING_IDS AS OPTYPE_IDS - ON OPTYPE_IDS.id == COMPUTE_TASK_INFO.opType -LEFT JOIN - STRING_IDS AS TASKTYPE_IDS - ON TASKTYPE_IDS.id == COMPUTE_TASK_INFO.taskType -LEFT JOIN - STRING_IDS AS INPUTSHAPES_IDS - ON INPUTSHAPES_IDS.id == COMPUTE_TASK_INFO.inputShapes - """ - - -class ComputeOpSumExport(StatsExport): - - def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) - self._query = QUERY diff --git a/profiler/cluster_analyse/cluster_statistics_export/hccl_sum_export.py b/profiler/cluster_analyse/cluster_statistics_export/hccl_sum_export.py deleted file mode 100644 index f695949de1a..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/hccl_sum_export.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cluster_statistics_export.stats_export import StatsExport - - -QUERY = """ -SELECT - NAME_IDS.value AS "OpName", - TYPE_IDS.value AS "OpType", - round(endNs - startNs) AS "Duration" -FROM - COMMUNICATION_OP -LEFT JOIN - STRING_IDS AS TYPE_IDS - ON TYPE_IDS.id == COMMUNICATION_OP.opType -LEFT JOIN - STRING_IDS AS NAME_IDS - ON NAME_IDS.id == COMMUNICATION_OP.opName - """ - - -class HcclSumExport(StatsExport): - - def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) - self._query = QUERY diff --git a/profiler/cluster_analyse/cluster_statistics_export/mstx_mark_export.py b/profiler/cluster_analyse/cluster_statistics_export/mstx_mark_export.py deleted file mode 100644 index ac5355c0200..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/mstx_mark_export.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cluster_statistics_export.stats_export import StatsExport - - -QUERY = """ -WITH - FRAMEWORK_API AS ( - SELECT - PYTORCH_API.startNs, - CONNECTION_IDS.connectionId - FROM - PYTORCH_API - LEFT JOIN - CONNECTION_IDS - ON PYTORCH_API.connectionId == CONNECTION_IDS.id - ) -SELECT - MSG_IDS.value AS "msg", - MSTX_EVENTS.startNs AS "cann_ts", - TASK.startNs AS "device_ts", - FRAMEWORK_API.startNs AS "framework_ts", - MSTX_EVENTS.globalTid AS "tid" -FROM - MSTX_EVENTS -LEFT JOIN - TASK - ON MSTX_EVENTS.connectionId == TASK.connectionId -LEFT JOIN - FRAMEWORK_API - ON MSTX_EVENTS.connectionId == FRAMEWORK_API.connectionId -LEFT JOIN - STRING_IDS AS MSG_IDS - ON MSTX_EVENTS.message == MSG_IDS.id -ORDER BY - MSTX_EVENTS.startNs - """ - - -class MstxMarkExport(StatsExport): - - def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) - self._query = QUERY diff --git a/profiler/cluster_analyse/cluster_statistics_export/mstx_step_export.py b/profiler/cluster_analyse/cluster_statistics_export/mstx_step_export.py deleted file mode 100644 index c257ce675fe..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/mstx_step_export.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cluster_statistics_export.stats_export import StatsExport - - -QUERY = """ -SELECT - id AS "step_id", - startNs AS "start_ns", - endNs AS "end_ns" -FROM - STEP_TIME -ORDER BY - startNs - """ - - -class MstxStepExport(StatsExport): - - def __init__(self, db_path, recipe_name): - super().__init__(db_path, recipe_name) - self._query = QUERY diff --git a/profiler/cluster_analyse/cluster_statistics_export/stats_export.py b/profiler/cluster_analyse/cluster_statistics_export/stats_export.py deleted file mode 100644 index e6d98f48ef8..00000000000 --- a/profiler/cluster_analyse/cluster_statistics_export/stats_export.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd - -from common_func.db_manager import DBManager -from common_func.constant import Constant - - -class StatsExport: - - def __init__(self, db_path, analysis_class): - self._db_path = db_path - self._analysis_class = analysis_class - self._query = None - - def get_query(self): - return self._query - - def read_export_db(self): - query = self.get_query() - if query is None: - print(f"[ERROR] query is None.") - return - conn, cursor = DBManager.create_connect_db(self._db_path, Constant.ANALYSIS) - data = pd.read_sql(query, conn) - DBManager.destroy_db_connect(conn, cursor) - return data diff --git a/profiler/cluster_analyse/common_func/analysis_loader.py b/profiler/cluster_analyse/common_func/analysis_loader.py deleted file mode 100644 index 55e7dbc6ea9..00000000000 --- a/profiler/cluster_analyse/common_func/analysis_loader.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import importlib -import inspect -import sys - -from common_func.constant import Constant -from analysis.base_analysis import BaseRecipeAnalysis - -def is_analysis_class(obj): - return inspect.isclass(obj) and issubclass(obj, BaseRecipeAnalysis) and obj != BaseRecipeAnalysis - -def get_class_from_name(analysis_name : str): - sys.path.append(Constant.ANALYSIS_PATH) - analysis_path = f"analysis.{analysis_name}.{analysis_name}" - module = None - try: - module = importlib.import_module(analysis_path) - except Exception as e: - print(f"[ERROR] {analysis_path} not find:{e}") - - specific_analysis = inspect.getmembers(module, is_analysis_class) - if not specific_analysis: - print(f"[ERROR] {analysis_name} not found.") - return specific_analysis[0] diff --git a/profiler/cluster_analyse/common_func/constant.py b/profiler/cluster_analyse/common_func/constant.py index 80f0374c1d1..2922d6a900f 100644 --- a/profiler/cluster_analyse/common_func/constant.py +++ b/profiler/cluster_analyse/common_func/constant.py @@ -106,13 +106,3 @@ class Constant(object): CONFIG = "config" EXPER_CONFIG = "experimental_config" EXPORT_TYPE = "_export_type" - - # recipe config - ANALYSIS = "analysis" - RECIPE_NAME = "recipe_name" - RECIPE_CLASS = "recipe_class" - PARALLEL_MODE = "parallel_mode" - CLUSTER_CUSTOM_ANALYSE_PATH = os.path.abspath(os.path.dirname(__file__)) - ANALYSIS_PATH = os.path.join(CLUSTER_CUSTOM_ANALYSE_PATH, 'analysis') - - CONCURRENT_MODE = "concurrent" \ No newline at end of file diff --git a/profiler/cluster_analyse/common_func/context.py b/profiler/cluster_analyse/common_func/context.py deleted file mode 100644 index 4e3d544d376..00000000000 --- a/profiler/cluster_analyse/common_func/context.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from functools import partial -from concurrent import futures -from common_func.constant import Constant - - -class Context(object): - """abstract base class""" - - ctx_map = None - - @classmethod - def create_context(cls, mode=Constant.CONCURRENT_MODE): - if cls.ctx_map is None: - keys = [Constant.CONCURRENT_MODE] - values = [ConcurrentContext] - cls.ctx_map = dict(zip(keys, values)) - - if mode not in cls.ctx_map: - raise NotImplementedError("mode must be in {}".format(keys)) - - return cls.ctx_map[mode]() - - def __init__(self): - print("[INFO] context {} initialized.".format(self._mode)) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - if exc_type is not None: - print(f"[ERROR] Failed to exit context: {exc_val}") - - def launch(self, func, *args, **kwargs): - raise NotImplementedError - - def map(self, func, *iterables, **kwargs): - raise NotImplementedError - - def wait(self, waitable): - raise NotImplementedError - -class ConcurrentContext(Context): - - def __init__(self, executor=None): - self._mode = Constant.CONCURRENT_MODE - super().__init__() - self._custom = executor is None - self._executor = executor or futures.ProcessPoolExecutor(max_workers=os.cpu_count()) - - def __enter__(self): - if self._executor is None: - raise RuntimeError("executor is None") - return self - - def close(self): - if self._custom: - self._executor.shutdown(wait=True) - self._executor = None - - def launch(self, func, *args, **kwargs): - return self._executor.submit(func, *args, **kwargs).result() - - def map(self, func, *iterables, **kwargs): - partial_func = partial(func, **kwargs) - return list(self._executor.map(partial_func, *iterables)) - - def wait(self, waitable): - return waitable \ No newline at end of file diff --git a/profiler/cluster_analyse/common_func/db_manager.py b/profiler/cluster_analyse/common_func/db_manager.py index c0d6ad89be8..1aa7ed8740e 100644 --- a/profiler/cluster_analyse/common_func/db_manager.py +++ b/profiler/cluster_analyse/common_func/db_manager.py @@ -20,7 +20,6 @@ from common_func.constant import Constant from common_func.empty_class import EmptyClass from common_func.file_manager import check_db_path_valid from common_func.tables_config import TablesConfig -from common_func.sql_extention_func import SqlExtentionAggregateFunc class DBManager: """ @@ -42,12 +41,6 @@ class DBManager: print(f"[ERROR] {err}") return EmptyClass("empty conn"), EmptyClass("empty curs") try: - if mode == Constant.ANALYSIS: - try: - for func_name, params_count, class_name in SqlExtentionAggregateFunc: - conn.create_aggregate(func_name, params_count, class_name) - except sqlite3.Error as err: - print(f"[ERROR] {err}") if isinstance(conn, sqlite3.Connection): curs = conn.cursor() os.chmod(db_path, Constant.FILE_AUTHORITY) diff --git a/profiler/cluster_analyse/common_func/sql_extention_func.py b/profiler/cluster_analyse/common_func/sql_extention_func.py deleted file mode 100644 index 987a0d43653..00000000000 --- a/profiler/cluster_analyse/common_func/sql_extention_func.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - - -class Median: - - def __init__(self) -> None: - self.data = [] - - def step(self, value) -> None: - self.data.append(value) - - def finalize(self): - return np.median(self.data) - - -class LowerQuartile: - - def __init__(self) -> None: - self.data = [] - - def step(self, value) -> None: - self.data.append(value) - - def finalize(self): - return np.quantile(self.data, 0.25) - - -class UpperQuartile: - - def __init__(self) -> None: - self.data = [] - - def step(self, value) -> None: - self.data.append(value) - - def finalize(self): - return np.quantile(self.data, 0.75) - - -class StandardDeviation: - - def __init__(self) -> None: - self.data = [] - - def step(self, value) -> None: - self.data.append(value) - - def finalize(self): - return np.std(self.data) - - -# func_name, params_count, class -SqlExtentionAggregateFunc = [ - ('median', 1, Median), - ('lower_quartile', 1, LowerQuartile), - ('upper_quartile', 1, UpperQuartile), - ('stdev', 1, StandardDeviation) -] diff --git a/profiler/cluster_analyse/common_func/utils.py b/profiler/cluster_analyse/common_func/utils.py deleted file mode 100644 index 0a20a5c237f..00000000000 --- a/profiler/cluster_analyse/common_func/utils.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import pandas as pd - - -def format_columns(df: pd.DataFrame): - formatted_df = df.rename( - { - "25%": "Q1Ns", - "50%": "MedianNs", - "75%": "Q3Ns", - 0.25: "Q1Ns", - 0.5: "MedianNs", - 0.75: "Q3Ns", - "Q1": "Q1Ns", - "Q3": "Q3Ns", - "min": "MinNs", - "max": "MaxNs", - "median": "MedianNs", - "sum": "SumNs", - "std": "StdNs", - "mean": "MeanNs", - "count": "Count" - }, - axis="columns" - ) - - stats_cols = ["Count", "MeanNs", "StdNs", "MinNs", "Q1Ns", "MedianNs", "Q3Ns", "MaxNs", "SumNs"] - other_cols = [col for col in formatted_df.columns if col not in stats_cols] - return formatted_df[stats_cols + other_cols] - - -def describe_duration(series_groupby): - agg_df = series_groupby.agg(["min", "max", "count", "std", "mean", "sum"]) - quantile_df = series_groupby.quantile([0.25, 0.5, 0.75]) - - quantile_df = quantile_df.unstack() - quantile_df.columns = ["25%", "50%", "75%"] - - stats_df = pd.merge(agg_df, quantile_df, left_index=True, right_index=True) - formated_df = format_columns(stats_df) - formated_df.index.name = stats_df.index.name - return formated_df - - -def stdev(df, aggregated): - if len(df) <= 1: - return df["stdevNs"].iloc[0] - instance = aggregated["totalCount"].loc[df.name] - var_sum = np.dot(df["totalCount"] - 1, df["stdev"] ** 2) - deviation = df["averageNs"] - aggregated["averageNs"].loc[df.name] - dev_sum = np.dot(df["totalCount"], deviation ** 2) - return np.sqrt((var_sum + dev_sum) / (instance - 1)) - - -def convert_unit(df: pd.DataFrame, src_unit, dst_unit): - df.loc[:, df.columns.str.endswith(src_unit)] = df.loc[:, df.columns.str.endswith(src_unit)].apply(lambda x: x / 1000.0) - df = df.rename(columns=lambda x: x.replace(src_unit, "".join(["(", dst_unit, ")"]))) - return df diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md index 78ea5d89717..b40f19e92fa 100644 --- a/profiler/compare_tools/README.md +++ b/profiler/compare_tools/README.md @@ -145,6 +145,8 @@ python performance_compare.py [基准性能数据文件所在路径] [比对性 | --enable_operator_compare | 开启算子性能比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | | --enable_communication_compare | 开启通信性能比对。 | 否 | | --enable_memory_compare | 开启算子内存比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | +| --enable_kernel_compare | 开启kernel性能比对。仅针对NPU与NPU比对的场景。需要使用性能数据中的kernel_details.csv文件。 | 否 | +| --enable_api_compare | 开启API性能比对。需要使用性能数据中的trace_view.csv文件。 | 否 | | --disable_details | 隐藏明细比对,只进行统计级比对。 | 否 | 说明:以上开关均不设置的情况下,**工具默认开启所有的性能比对**,当用户设置了以上开关,则按照用户设置的开关进行性能比对,示例如下: @@ -174,9 +176,13 @@ python performance_compare.py [基准性能数据文件] [比对性能数据文 MindSpore场景仅支持**总体性能**和**通信性能**的对比。 +比对结果分为打屏和performance_comparison_result_{timestamp}.csv两种形式输出,其中打屏输出为概要信息,csv文件保存详细结果。 + ### 总体性能 -总体性能比对结果以打屏的形式呈现。 +#### 打屏结果 + +总体性能比对结果以打屏的形式呈现时,字段如下: | 字段 | 说明 | | --------------------------------------- | ------------------------------------------------------------ | @@ -196,6 +202,54 @@ MindSpore场景仅支持**总体性能**和**通信性能**的对比。 | E2E Time(Not minimal profiling) | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | | Other Time | AI CPU、DSA、TensorMove等其他算子耗时。 | +#### csv文件结果 + +总体性能比对结果在performance_comparison_result_*.xlsx中OverallMetrics的sheet页呈现时,示例如下: + +![OverallMetrics](./img/OverallMetrics.png) + +表头字段说明: + +| 字段 | 说明 | +| -------------- | --------------------------- | +| Index | 指标。 | +| Duration(ms) | 执行耗时,单位ms。 | +| Duration Ratio | 执行耗时占E2E总耗时的比例。 | +| Number | 计算算子的数量。 | + +Index列字段说明: + +| 字段 | | | 说明 | +| ---------------------------- | ------------------ | ----------------------------------- | ------------------------------------------------------------ | +| Computing Time | | | 计算流耗时,计算流所有event耗时总和。如果有多条并发计算,计算流耗时对重叠部分只会计算一次。 | +| | Flash Attention | | Flash Attention算子。 | +| | | Flash Attention (Forward) (Cube) | Flash Attention前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Forward) (Vector) | Flash Attention前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Flash Attention (Backward) (Cube) | Flash Attention反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Flash Attention (Backward) (Vector) | Flash Attention反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Conv | | Conv算子。 | +| | | Conv (Forward) (Cube) | Conv前向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Forward) (Vector) | Conv前向Vector算子。Conv前向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | | Conv (Backward) (Cube) | Conv反向算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Conv (Backward) (Vector) | Conv反向算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Matmul | | Matmul算子。 | +| | | Matmul (Cube) | Matmul算子下发的所有Cube类Kernel的总耗时,一般为执行该算子核心计算的算子。 | +| | | Matmul (Vector) | Matmul算子下发的所有Vector类Kernel的总耗时,一般为插入的转换类算子,如TransData。 | +| | Paged Attention | | Paged Attention算子。 | +| | Vector | | Vector算子。 | +| | | Vector (Trans) | 转换类Vector算子,主要包含Cast、TransPose、TransData算子。(仅针对NPU数据) | +| | | Vector ( No Trans) | 非转换类Vector算子。 | +| | Cube | | 未识别出Flash Attention、Conv和Matmul的Cube算子。 | +| | SDMA (Tensor Move) | | 拷贝类任务。 | +| | Other | | AI CPU、DSA等其他算子。 | +| Uncovered Communication Time | | | 通信未掩盖耗时,包含卡间等待时间。 | +| | Wait | | 卡间同步等待耗时。(仅针对NPU数据) | +| | Transmit | | 通信传输耗时。 | +| Free Time | | | 调度耗时 = E2E耗时 - 算子耗时 - 通信不可掩盖耗时。Free的定义为Device侧既不在通信又不在计算的时间,因此包含拷贝时间(SDMA Time)。 | +| | SDMA | | NPU为除Tensor Move外的拷贝类任务,GPU为所有拷贝类任务。 | +| | Free | | 排除SDMA的空闲耗时。 | +| E2E Time | | | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | + 可以采取最简性能数据采集的方式来减少E2E耗时的性能膨胀,示例代码如下: ```python @@ -300,3 +354,29 @@ MindSpore场景暂不支持。 步骤1:查看MemoryCompareStatistic页,找出内存占用差距TOP的算子。 步骤2:查看MemoryCompare页,搜索内存占用差距TOP的算子,查看具体占用的子算子。 + +### kernel性能 + +仅针对NPU与NPU比对的场景。 + +kernel比对结果在performance_comparison_result_*.xlsx中KernelCompare页呈现。 + +按照Kernel(Kernel类型)和Input Shapes(输入Shape)分组统计,统计信息包括: + +- Total Duration(us):总耗时,单位us。 +- Avg Duration(us):平均耗时,单位us。 +- Max Duration(us):最大耗时,单位us。 +- Min Duration(us):最小耗时,单位us。 +- Calls:调用次数。 + +### API性能 + +API比对结果在performance_comparison_result_*.xlsx中ApiCompare页呈现。 + +按照api name(API名称)组统计,统计信息包括: + +- Total Duration(ms):总耗时,单位ms。 +- Self Time(ms):Self耗时(排除掉子event),单位ms。 +- Avg Duration(ms):平均耗时,单位ms。 +- Calls:调用次数。 + diff --git a/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py new file mode 100644 index 00000000000..bc5810068b0 --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/api_compare_comparator.py @@ -0,0 +1,32 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class ApiCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_api_by_name(cls, ops: list): + ops_dict = {} + for op in ops: + ops_dict.setdefault(op.name, []).append(op) + return ops_dict + + def _compare(self): + if not self._origin_data: + return + base_ops = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_ops = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_ops or not comparison_ops: + return + base_aggregated_ops = self._aggregated_api_by_name(base_ops) + comparison_aggregated_ops = self._aggregated_api_by_name(comparison_ops) + for op_name, base_data in base_aggregated_ops.items(): + comparsion_data = comparison_aggregated_ops.pop(op_name, []) + self._rows.append(self._bean(op_name, base_data, comparsion_data).row) + if comparison_aggregated_ops: + for op_name, comparison_data in comparison_aggregated_ops.items(): + self._rows.append(self._bean(op_name, [], comparison_data).row) + update_order_id(self._rows) diff --git a/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py new file mode 100644 index 00000000000..13c0f776af6 --- /dev/null +++ b/profiler/compare_tools/compare_backend/comparator/kernel_compare_comparator.py @@ -0,0 +1,35 @@ +from compare_backend.comparator.base_comparator import BaseComparator +from compare_backend.utils.constant import Constant +from compare_backend.utils.common_func import update_order_id + + +class KernelCompareComparator(BaseComparator): + def __init__(self, origin_data: list, bean: any): + super().__init__(origin_data, bean) + + @classmethod + def _aggregated_kernel_by_type_and_shape(cls, kernels: dict): + result_dict = {} + for type_shape, shape_values in kernels.items(): + for shape, kernel_data in shape_values.items(): + kernel = [single[1] for single in kernel_data] + result_list = [type_shape, shape, sum(kernel), len(kernel), max(kernel), min(kernel)] + result_dict.setdefault(f"{type_shape}{shape}", []).extend(result_list) + return result_dict + + def _compare(self): + if not self._origin_data: + return + base_kernels = self._origin_data.get(Constant.BASE_DATA, {}) + comparison_kernels = self._origin_data.get(Constant.COMPARISON_DATA, {}) + if not base_kernels or not comparison_kernels: + return + base_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(base_kernels) + comparison_aggregated_kernels = self._aggregated_kernel_by_type_and_shape(comparison_kernels) + for type_shape, base_data in base_aggregated_kernels.items(): + comparsion_data = comparison_aggregated_kernels.pop(type_shape, []) + self._rows.append(self._bean(base_data, comparsion_data).row) + if comparison_aggregated_kernels: + for _, comparison_data in comparison_aggregated_kernels.items(): + self._rows.append(self._bean([], comparison_data).row) + update_order_id(self._rows) \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py new file mode 100644 index 00000000000..55e08a86be8 --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/api_compare_bean.py @@ -0,0 +1,47 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class ApiInfo: + def __init__(self, op_name: str, data_list: list): + self._data_list = data_list + self.name = op_name + self.total_dur = 0.0 + self.self_time = 0.0 + self.avg_dur = 0.0 + self.number = len(data_list) + self._get_info() + + def _get_info(self): + for data in self._data_list: + self.total_dur += data.api_dur + self.self_time += data.api_self_time + self.total_dur /= 1000.0 + self.self_time /= 1000.0 + self.avg_dur = self.total_dur / self.number if self.number else 0.0 + + +class ApiCompareBean: + TABLE_NAME = Constant.API_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, op_name: str, base_api: list, comparison_api: list): + self._name = op_name + self._base_api = ApiInfo(op_name, base_api) + self._comparison_api = ApiInfo(op_name, comparison_api) + + @property + def row(self): + row = [None, self._name, + self._base_api.total_dur, self._base_api.self_time, self._base_api.avg_dur, self._base_api.number, + self._comparison_api.total_dur, self._comparison_api.self_time, + self._comparison_api.avg_dur, self._comparison_api.number] + diff_fields = [calculate_diff_ratio(self._base_api.total_dur, self._comparison_api.total_dur)[1], + calculate_diff_ratio(self._base_api.self_time, self._comparison_api.self_time)[1], + calculate_diff_ratio(self._base_api.avg_dur, self._comparison_api.avg_dur)[1], + calculate_diff_ratio(self._base_api.number, self._comparison_api.number)[1]] + row.extend(diff_fields) + return row + diff --git a/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py new file mode 100644 index 00000000000..df96addc4fe --- /dev/null +++ b/profiler/compare_tools/compare_backend/compare_bean/kernel_compare_bean.py @@ -0,0 +1,75 @@ +from compare_backend.utils.common_func import calculate_diff_ratio +from compare_backend.utils.constant import Constant +from compare_backend.utils.excel_config import ExcelConfig + + +class KernelCompareInfo: + def __init__(self, data_list: list): + self._kernel_type = None + self._input_shapes = None + self._total_dur = None + self._number = None + self._max_dur = None + self._min_dur = None + if not data_list: + return + self._kernel_type = data_list[0] + self._input_shapes = data_list[1] + self._total_dur = data_list[2] + self._number = data_list[3] + self._max_dur = data_list[4] + self._min_dur = data_list[5] + + @property + def kernel_type(self): + return self._kernel_type + + @property + def input_shapes(self): + return self._input_shapes + + @property + def total_dur(self): + return self._total_dur if self._total_dur else 0.0 + + @property + def number(self): + return self._number + + @property + def max_dur(self): + return self._max_dur + + @property + def min_dur(self): + return self._min_dur + + @property + def avg_dur(self): + return self._total_dur / self._number if self._total_dur and self._number else 0.0 + + +class KernelCompareBean: + TABLE_NAME = Constant.KERNEL_TABLE + HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) + OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) + + def __init__(self, base_kernel: list, comparison_kernel: list): + self._base_kernel = KernelCompareInfo(base_kernel) + self._comparison_kernel = KernelCompareInfo(comparison_kernel) + self._kernel_type = self._base_kernel.kernel_type \ + if self._base_kernel.kernel_type else self._comparison_kernel.kernel_type + self._input_shapes = self._base_kernel.input_shapes \ + if self._base_kernel.input_shapes else self._comparison_kernel.input_shapes + + @property + def row(self): + row = [None, self._kernel_type, self._input_shapes, + self._base_kernel.total_dur, self._base_kernel.avg_dur, + self._base_kernel.max_dur, self._base_kernel.min_dur, self._base_kernel.number, + self._comparison_kernel.total_dur, self._comparison_kernel.avg_dur, + self._comparison_kernel.max_dur, self._comparison_kernel.min_dur, self._comparison_kernel.number] + diff_fields = [calculate_diff_ratio(self._base_kernel.total_dur, self._comparison_kernel.total_dur)[1], + calculate_diff_ratio(self._base_kernel.avg_dur, self._comparison_kernel.avg_dur)[1]] + row.extend(diff_fields) + return row \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py index 9c4825c0e8e..c15396e9c59 100644 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py @@ -12,6 +12,7 @@ class KernelDetailsBean: self._data = data self._op_type = "" self._name = "" + self._input_shapes = "" self._aiv_vec_time = 0.0 self._aicore_time = 0.0 self._mac_time = 0.0 @@ -27,6 +28,10 @@ class KernelDetailsBean: def name(self) -> str: return self._name + @property + def input_shapes(self) -> str: + return self._input_shapes + @property def aiv_vec_time(self) -> float: if self._aiv_vec_time == "" or self._aiv_vec_time == "N/A": @@ -109,6 +114,7 @@ class KernelDetailsBean: def init(self): self._op_type = self._data.get('Type', "") self._name = self._data.get('Name', "") + self._input_shapes = self._data.get('Input Shapes', "") self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") self._aicore_time = self._data.get("aicore_time(us)", "") self._mac_time = self._data.get('mac_time(us)', "") diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py index fdce23c6ab4..3106527c419 100644 --- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py +++ b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py @@ -17,3 +17,20 @@ class OperatorDataPrepare: else: result_data.append(level1_node) return result_data + + def get_all_layer_ops(self) -> any: + root_node = TreeBuilder.build_tree(self.profiling_data.torch_op_data, [], []) + level1_child_nodes = root_node.child_nodes + node_queue = [] + result_data = [] + for level1_node in level1_child_nodes: + if level1_node.is_step_profiler(): + node_queue.extend(level1_node.child_nodes) + else: + node_queue.append(level1_node) + while len(node_queue) > 0: + node = node_queue.pop(0) + result_data.append(node) + if node.child_nodes: + node_queue.extend(node.child_nodes) + return result_data \ No newline at end of file diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py index 292e3128154..6fe693fb067 100644 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py @@ -8,6 +8,8 @@ from compare_backend.comparator.module_comparetor import ModuleComparator from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator from compare_backend.comparator.operator_comparator import OperatorComparator from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator +from compare_backend.comparator.api_compare_comparator import ApiCompareComparator +from compare_backend.comparator.kernel_compare_comparator import KernelCompareComparator from compare_backend.comparator.overall_metrics_comparator import OverallMetricsComparator from compare_backend.compare_bean.communication_bean import CommunicationBean from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean @@ -16,6 +18,8 @@ from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean +from compare_backend.compare_bean.api_compare_bean import ApiCompareBean +from compare_backend.compare_bean.kernel_compare_bean import KernelCompareBean from compare_backend.compare_bean.overall_metrics_bean import OverallMetricsBean from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare @@ -39,8 +43,10 @@ class DetailPerformanceGenerator(BaseGenerator): return op_compare_result def compare(self): - if self._args.enable_operator_compare or self._args.enable_memory_compare or \ - self._args.enable_communication_compare: + enable_compare = [self._args.enable_operator_compare, self._args.enable_memory_compare, + self._args.enable_communication_compare, self._args.enable_api_compare, + self._args.enable_kernel_compare] + if any(enable_compare): print("[INFO] Start to compare performance detail data, please wait.") comparator_list = self._create_comparator() else: @@ -97,6 +103,18 @@ class DetailPerformanceGenerator(BaseGenerator): comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) if not self._args.disable_details: comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) + if self._args.enable_api_compare: + api_compare_result = { + Constant.BASE_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.BASE_DATA)).get_all_layer_ops(), + Constant.COMPARISON_DATA: OperatorDataPrepare( + self._profiling_data_dict.get(Constant.COMPARISON_DATA)).get_all_layer_ops()} + comparator_list.append(ApiCompareComparator(api_compare_result, ApiCompareBean)) + if self._args.enable_kernel_compare: + kernel_compare_result = { + Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).kernel_details, + Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).kernel_details} + comparator_list.append(KernelCompareComparator(kernel_compare_result, KernelCompareBean)) return comparator_list def match_torch_op(self) -> list: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py index 6ee07a65696..9daaa55ef16 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py @@ -20,6 +20,7 @@ class ProfilingResult: self.overall_metrics = ProfilingInfo(profiling_type) self.python_function_data = [] self.fwdbwd_dict = {} + self.kernel_details = {} def update_torch_op_data(self, event: TraceEventBean): event.is_torch_op = True @@ -43,6 +44,9 @@ class ProfilingResult: def update_comm_task_data(self, comm_name: str, task_event: TraceEventBean): self.communication_dict.setdefault(comm_name, {}).setdefault("comm_task", {}).setdefault( task_event.name, []).append(task_event.dur) + + def update_kernel_details(self, kernels: dict): + self.kernel_details = kernels class BaseProfilingParser(ABC): @@ -57,6 +61,8 @@ class BaseProfilingParser(ABC): self._enable_operator_compare = args.enable_operator_compare self._enable_memory_compare = args.enable_memory_compare self._enable_communication_compare = args.enable_communication_compare + self._enable_api_compare = args.enable_api_compare + self._enable_kernel_compare = args.enable_kernel_compare self._dispatch_func = self._get_dispatch_func() self._result_data = ProfilingResult(self._profiling_type) self._memory_events = [] @@ -80,6 +86,10 @@ class BaseProfilingParser(ABC): self._cpu_cube_op = cpu_cube_op return self._cpu_cube_op + @abstractmethod + def _update_kernel_details(self): + raise NotImplementedError("Function _update_kernel_details need to be implemented.") + @abstractmethod def _update_memory_list(self): raise NotImplementedError("Function _update_memory_list need to be implemented.") @@ -112,6 +122,8 @@ class BaseProfilingParser(ABC): self._update_memory_list() if self._enable_profiling_compare: self._update_overall_metrics() + if self._enable_kernel_compare: + self._update_kernel_details() self._check_result_data() return self._result_data @@ -291,7 +303,7 @@ class BaseProfilingParser(ABC): task_index += 1 def _check_result_data(self): - if self._enable_operator_compare or self._enable_memory_compare: + if self._enable_operator_compare or self._enable_memory_compare or self._enable_api_compare: if not self._result_data.torch_op_data: print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") if self._enable_operator_compare and not self._result_data.kernel_dict: @@ -300,6 +312,11 @@ class BaseProfilingParser(ABC): print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") if self._enable_communication_compare and not self._result_data.communication_dict: print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") + if self._enable_kernel_compare and not self._result_data.kernel_details: + if self._profiling_type == Constant.GPU: + print(f"[WARNING] kernel compare between GPU data and NPU data is not supported.") + else: + print(f"[WARNING] Can't find any kernel details in the file: {self._profiling_path}") def _read_trace_event(self): try: diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py index 7b1ae1a5a12..0aeeba83efb 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py @@ -33,6 +33,9 @@ class GPUProfilingParser(BaseProfilingParser): def __is_sdma_time(cls, name: str): return any(mask in name.lower() for mask in cls.SDMA_MARK_LIST) + def _update_kernel_details(self): + pass + def _update_memory_list(self): if not self._enable_memory_compare: return @@ -171,6 +174,8 @@ class GPUProfilingParser(BaseProfilingParser): func_set.add(self._picking_memory_event) if self._enable_profiling_compare: func_set.add(self._picking_flow_event) + if self._enable_api_compare: + func_set.add(self._picking_torch_op_event) return list(func_set) def _infer_compute_stream_id(self): diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py index 457a3b6be5e..cb25c252c6c 100644 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py @@ -53,8 +53,32 @@ class NPUProfilingParser(BaseProfilingParser): func_list.add(self._picking_kernel_event) func_list.add(self._picking_hccl_event) func_list.add(self._picking_flow_event) + if self._enable_api_compare: + func_list.add(self._picking_torch_op_event) return list(func_list) + def _update_kernel_details(self): + try: + kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) + except FileNotFoundError: + print("[WARNING] The file kernel_details.csv does not exist.") + except Exception: + print("[ERROR] Failed to read kernel_details.csv.") + return + if not kernel_details: + return + kernels_dict = {} + for kernel in kernel_details: + if kernel.is_invalid(): + continue + input_shapes = kernel.input_shapes if kernel.input_shapes else 'N/A' + kernels_dict.setdefault(kernel.op_type, {}).setdefault(input_shapes, []).append( + [kernel.name, kernel.duration]) + if len(kernels_dict) == 1: + print("[ERROR] Failed to enable enable_kernel_compare, type of kernel_details.csv is null.") + return + self._result_data.update_kernel_details(kernels_dict) + def _update_memory_list(self): try: memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py index 4b5947fa7bc..ab9fb43a968 100644 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ b/profiler/compare_tools/compare_backend/utils/args_manager.py @@ -69,6 +69,14 @@ class ArgsManager: def enable_communication_compare(self): return self._args.enable_communication_compare + @property + def enable_api_compare(self): + return self._args.enable_api_compare + + @property + def enable_kernel_compare(self): + return self._args.enable_kernel_compare + @classmethod def check_profiling_path(cls, file_path: str): PathManager.input_path_common_check(file_path) @@ -119,11 +127,14 @@ class ArgsManager: raise RuntimeError(msg) if not any([self._args.enable_profiling_compare, self._args.enable_operator_compare, - self._args.enable_memory_compare, self._args.enable_communication_compare]): + self._args.enable_memory_compare, self._args.enable_communication_compare, + self._args.enable_api_compare, self._args.enable_kernel_compare]): self._args.enable_profiling_compare = True self._args.enable_operator_compare = True self._args.enable_memory_compare = True self._args.enable_communication_compare = True + self._args.enable_api_compare = True + self._args.enable_kernel_compare = True base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) self.check_profiling_path(base_profiling_path) diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py index ab9bc364f44..9e6291e89e0 100644 --- a/profiler/compare_tools/compare_backend/utils/compare_args.py +++ b/profiler/compare_tools/compare_backend/utils/compare_args.py @@ -6,6 +6,8 @@ class Args: enable_operator_compare: bool = False, enable_memory_compare: bool = False, enable_communication_compare: bool = False, + enable_api_compare: bool = False, + enable_kernel_compare: bool = False, output_path: str = "", max_kernel_num: int = None, op_name_map: dict = {}, @@ -17,6 +19,8 @@ class Args: self.enable_operator_compare = enable_operator_compare self.enable_memory_compare = enable_memory_compare self.enable_communication_compare = enable_communication_compare + self.enable_api_compare = enable_api_compare + self.enable_kernel_compare = enable_kernel_compare self.output_path = output_path self.max_kernel_num = max_kernel_num self.op_name_map = op_name_map diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py index e2002588024..252aa536e1c 100644 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ b/profiler/compare_tools/compare_backend/utils/constant.py @@ -39,13 +39,16 @@ class Constant(object): # compare type OPERATOR_COMPARE = "OperatorCompare" MEMORY_COMPARE = "MemoryCompare" - + API_COMPARE = "ApiCompare" + KERNEL_COMPARE = "KernelCompare" # sheet name OPERATOR_SHEET = "OperatorCompare" MEMORY_SHEET = "MemoryCompare" OPERATOR_TOP_SHEET = "OperatorCompareStatistic" MEMORY_TOP_SHEET = "MemoryCompareStatistic" COMMUNICATION_SHEET = "CommunicationCompare" + API_SHEET = "ApiCompare" + KERNEL_SHEET = "KernelCompare" # table name OPERATOR_TABLE = "OperatorCompare" @@ -57,6 +60,8 @@ class Constant(object): MODULE_TABLE = "ModuleCompare" MODULE_TOP_TABLE = "ModuleCompareStatistic" OVERALL_METRICS_TABLE = "OverallMetrics" + API_TABLE = "ApiCompare" + KERNEL_TABLE = "KernelCompare" # memory SIZE = "Size(KB)" diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py index ae808863e77..b6be0ae2ebc 100644 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ b/profiler/compare_tools/compare_backend/utils/excel_config.py @@ -57,7 +57,7 @@ class ExcelConfig(object): DEVICE_SELF_TIME = "Device Self Time(ms)" DEVICE_TOTAL_TIME = "Device Total Time(ms)" DIFF_SELF_TIME = "Device Self Time Diff(ms)" - DIFF_TOTAL_RATIO = "Total Diff Ratio" + DIFF_TOTAL_RATIO = "Diff Total Ratio" DIFF_TOTAL_TIME = "Device Total Time Diff(ms)" DEVICE_SELF_TIME_US = "Device Self Time(us)" DEVICE_TOTAL_TIME_US = "Device Total Time(us)" @@ -71,6 +71,14 @@ class ExcelConfig(object): DURATION = "Duration(ms)" DURATION_RATIO = "Duration Ratio" DIFF_DUR_MS = "Diff Duration(ms)" + API_NAME = "api name" + TOTAL_DURATION_MS = "Total Duration(ms)" + AVG_DURATION_MS = "Avg Duration(ms)" + SELF_TIME_MS = "Self Time(ms)" + DIFF_SELF_RATIO = "Diff Self Ratio" + DIFF_AVG_RATIO = "Diff Avg Ratio" + DIFF_CALLS_RATIO = "Diff Calls Ratio" + KERNEL = "Kernel" HEADERS = { Constant.OPERATOR_TABLE: [ @@ -193,7 +201,39 @@ class ExcelConfig(object): {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, {"name": DIFF_DUR_MS, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 10}, - + ], + Constant.API_TABLE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": API_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": SELF_TIME_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION_MS, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_SELF_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_CALLS_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + ], + Constant.KERNEL_COMPARE: [ + {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, + {"name": KERNEL, "type": CellFormatType.BOLD_STR, "width": 30}, + {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": CALLS, "type": CellFormatType.DEFAULT,"width": 20}, + {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, + {"name": DIFF_AVG_RATIO, "type": CellFormatType.DEFAULT_FLOAT,"width": 20}, ] } @@ -201,7 +241,9 @@ class ExcelConfig(object): Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], Constant.MODULE_TABLE: ["E1:H1", "I1:L1"], - Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"]} + Constant.OVERALL_METRICS_TABLE: ["B1:D1", "E1:G1"], + Constant.API_TABLE: ["C1:F1", "G1:J1"], + Constant.KERNEL_TABLE: ["D1:H1", "I1:M1"]} # overall metrics index # computing time diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py index 690c46cd51c..69ee92d1232 100644 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ b/profiler/compare_tools/compare_backend/utils/torch_op_node.py @@ -64,6 +64,14 @@ class TorchOpNode: def device_dur(self): return sum([kernel.device_dur for kernel in self._kernel_list]) + @property + def api_dur(self): + return self._event.dur + + @property + def api_self_time(self): + return self.api_dur - sum(child.api_dur for child in self._child_nodes) + def add_child_node(self, child_node): self._child_nodes.append(child_node) diff --git a/profiler/compare_tools/compare_backend/utils/tree_builder.py b/profiler/compare_tools/compare_backend/utils/tree_builder.py index 34c1fe1a1f4..d5aa787ac2c 100644 --- a/profiler/compare_tools/compare_backend/utils/tree_builder.py +++ b/profiler/compare_tools/compare_backend/utils/tree_builder.py @@ -23,7 +23,8 @@ class TreeBuilder: tree_node = TorchOpNode(event, last_node) last_node.add_child_node(tree_node) last_node = tree_node - tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) + if kernel_dict: + tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) else: event.set_name(last_node.name) last_node.set_memory_allocated(event) diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py index dffb7549fcd..58bad621b03 100644 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py @@ -12,7 +12,7 @@ class WorkSheetCreator: self._work_sheet = None self._row_id = 1 self._field_format = {} - self._diff_ratio_index = None + self._diff_ratio_index = [] self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" def create_sheet(self): @@ -47,8 +47,10 @@ class WorkSheetCreator: self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) self._field_format[index] = header.get("type") - if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): - self._diff_ratio_index = index + ratio_white_list = [ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO, + ExcelConfig.DIFF_AVG_RATIO, ExcelConfig.DIFF_CALLS_RATIO, ExcelConfig.DIFF_SELF_RATIO] + if header.get("name") in ratio_white_list: + self._diff_ratio_index.append(index) self._row_id += 1 def _write_data(self): @@ -56,7 +58,7 @@ class WorkSheetCreator: for data in self._data.get("rows"): for index, cell_data in enumerate(data): cell_format = self._work_book.add_format(self._field_format.get(index)) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) @@ -76,7 +78,7 @@ class WorkSheetCreator: if index == 0: # 0 for Index field cell_style["indent"] = cell_data.count("\t") cell_format = self._work_book.add_format(cell_style) - if index == self._diff_ratio_index and cell_data and cell_data > 1: + if index in self._diff_ratio_index and cell_data and cell_data > 1: cell_format = red_ratio_format cell_data = "INF" if cell_data == float('inf') else cell_data self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) diff --git a/profiler/compare_tools/img/OverallMetrics.png b/profiler/compare_tools/img/OverallMetrics.png new file mode 100644 index 0000000000000000000000000000000000000000..b130d3607344c983a9304440e38a45fe96a4bb56 GIT binary patch literal 66941 zcmdqIXH=72w>GK|3i^maK`9z~Rhk6pT~NT#M5P8I9So6<5C}-ofb`x$q$tv)geFxW z^j-p?_YyjU651D@x9s=X-`?Yl^W*$EW3aeqa>KgUTC=QcUTfZgS{lkX|Gf9-rAwD? zs;VeyU%GUq;?kwd``5@x-#GP%tdlO6owSvoTq@`Wu8=-lu~JZ1xOAx~?8b@7Rnq6{ z_A2^Lmo5PsFWTi6yYJ?gE}dtoDk;2jGhVBa30+X>=e#GCbCt5!^fDuiCttMp32U@w zzKmza<6AuNaJL6FH=S>!*D?l2TXPchn|QWQ#3xjooOb0fr>vVYC~*hXbD{T%NeNN3 zV#3B}J?BSqY18Ms=O=R8Q5>o})aGieR;o!CZ6!*Q?eF5sz(8Ne!RC})9rAepsO}7V z&ZZj4@$a9u0f$p^I)Q7xECL_n;^H)!q}{fr9hTUeW^YB1{-{mLF|hfHP$;vUSEle+ za!qA5``cF>?mlX0_M@O;q7aXtCiPY=l`zd_*>FXd`LDaej~ZMe=h$z-2(bk)iq*ai ztEyN<;@nH-LZ4xBB}_h50yxO}rIZ#op#l|K@P)axgy=@4RQbO3uwT>pPko)bs%|Fj zZ}^%cmHXaFv624a|qXoBi4P?G&)N)okBgNVW?W8jjnvmfID#}7M#9BR+Ap<>t*aGog0?+ zKo)Uu7K?!%bEZ5qGCW zGOfV56GNE2nd#XTAu_HC;v3%}mVl(o_m2?%s8Ba}AS%v~F^)-2#x37*D-iX}-M(}w zPF6Z)Hl1o=zuqsslU0TTa2vkx6YJ=9z~iLoA%O}tw?%$YJGkrqr6x^>vHQj1&?uQ1 z;y_!9g{q15fThfAN$`NzqF=|hi3w2)^0wh6eakLcj~+&8VPf_{P$NO7CUHlB(>>{R zY56|CR_uDs(Y2?t0Ei*7-N!K}N)Z=8nViwd03@mr1Z?mO(9IK zaPaQdM%Hh(hP#+>bFk34bDcJMO*)+a_8r2Xqi@elHJ|J$AlGQ-F`^Y3ONpR@(zqi_ zf=%c)Q~wc8v+HHIFqcfg$y#ml(KU4Fum@lXB-a-dHyD)^(&!r|)Sau5ZdN2}j`*+O?W$!1S2 zORCbo_4yBgJ!yx07Y=2_BO^K39i)m>=ZkKk&}9hocM69Qk0%}x4_rWU9)(pJ2T=ri zR!odU=q)(m1b!Zy7g3Z4$L%}9$J$sA3a|=$*G^R!0u-Ge-Z|P2uPsBunTX*MffGZR zaK#>703?GMFbHN`yn;}+OEzRCGCd2s(8Z%zueL}!Mb&F6e`FKBo~xA#Ht+oYr7K6| zl*t9xYI*e4(l=IPIi5ZY61~;F29}PzRh81qb>*u@bf|x1ll&8V^+)b%#PI#zb(~o6 z94A9@WrD}&^`|ZA8inaKx<+vXFka|MFs~EP)H-*GxkoW!StZRdG}r<@Q05KBzP_(P z8u{gI*6s((%G@^T%6&On^M)Qzq&la{n0K-sa~DZfa!X;Ji$J&0opCSHRzZfKwb0$G zPXrV?KU^=6C53})vzGPGYMEvl*o@~`RRQktP22#&rPrlsmwpNg^NvOuS%-DT?=5~o zKv#`^<}{X7Sh0(g))aUhf@uV1*1Zq=9)$oJe3D%|aab+iv>%T7H#NdBj&H%z+f%Q?ClN$ zr5#xxayJ9CFA)uEY)M(P=6FG#Djc!Z0bdy0V+TiovR=MoeEOkFFw2;AdY&)48@>> z2Sxik<+RJuOT1Ru*~wCmc5%Ij;v)Hi@X8nj=xvd^Tr}a--wg69_xUf@AKg zDzO82LYHvsUGJaKN3HZ3f$ZCi$NXsD=ie%ZVI{=kM5qba2luVm+^FA>z~l=Nn^dXm zHC}ZH)%1{z$nUM z>DyR`rxI$CXzi{OTN6uW!9tw1CPX>0Vtz~eS842-ECtFE4K{A1A~{m zBoYl`1>!0LJ@?QD*@0(l(8WI(;;CdU-honqlhQC$svJPdN}WNsVBwaL$K2|%VhvE6 zt4Z2^`sIf5BG+jqKOlnF1w_9Bxd)D%xQH-AZQ!{>^(|cOtFqwZ8YvTZ;Iwam~b}y@#pzocyh?N_ZWQGvP;IsGV&>hODh&tAE}w>zp6kOHA46)7tFPqJ`pAL@I(0JQx@%pNuCn8}aB{ zUkGX$|UU(nd9c1hjuoG;Rmv_XIaNaOl%LMuOA~jViUk72%Ow>V7sDi1 zU@hUtB|Fu}jaG>i#+Yil#~o{&H!WtIkVjz0^&6!eH>&BX(fP4{c=>9j!YQwCuXG5v zBpX3oRU?`amv=s;^~Py~C_RYYd(#4i=0!mar+~UR)N!!j0MYpEQb;AP2!Us?nZJHR zh8r^vUN*zNJWx(a;a&92^chF(L628FD6yaeaq@X7m;GAvcc6k!C=Tqc&JSKJExVD1 z_G0%x4fN~;Q;ThWh~JIgCgXg-VMccv#)OWta(2bQK#|U-SEbo7%u;HFs9whdyqf?&UrwH-X8h*0DKA>JP ze835ENm>7fY}g+lvIa0bY8axRfm~4-r*sgGW4ArozDIS%vGr3JD5r&tvVFni>VkN` zU(@qJkA){Pp+I9PlIuJb07URR)u)e519zKQ4}5!Yl|w$`>?Cu5lmmDRb+{-njnaEQ zM;#UYx*Mth4`y-mjb$^u`G$5O6|#re>qO)Jr8G_wx$How%%@>sS29EfXz=BG5l*tG z3Sg%Sty`3xo`u>kf~}E9 zU&cdOcf(~TWv1&T+O^$1+y3M;Zl|mvirkJ|G=<{kv!w8_RggrwY^@sV_^GL)@jZ)9 zwrx}Mwi^YHxT>DGJ8jsgZ9aXCvqVx(9IIfv92TU zNUu31$P*YU{%-ZxWYnX*wzKP&JRQTsgKgtj1ZQrU?CAJ!t7K&s+e=HXll|`)b<>h0 z8Jq&r;i8|Gn|fEplDt=G3VBa;8H{E2(iZIqxtpB1;Dzm%h0w?}q#rM#<(`teWlaeI zyfiBLDH!Q!tplxWw*l+bIgsv-pDRe7>YtH`3+-jCIF+BjLoE zsyOtk?%=4`^y)!&8pQ8EQ8eqjMMr;%4{jk8e;~h5Qc?O{sBSm=p#1m3@ASay zhFQ&q0ETuJV;=Pqi@u6X##{PQ<);Lh(8)U4JZYm(+iO0qRr~;(G^R(`<04comBc(Q zH|jW|=bnYM-8*rmvgN1D2#6Fa!pk^?gcoR>VqA)lgp;S=Ky(sHbi|)W4eXE<}Gz2%ZMZEEX`yd1cM979;T?I`%OAF$AfTbX&LwKA94_Ft( zS(F1D8R*s>LKHV{z-?33x#u#*_oHB*v%N?|NBe2WMnf54@kh(0eCY{gC^xe0M!eSw zq~lIdT-jD-?c?m){Va8J3nbi-vn4bgdd~q)qk@*0TktxS`|+EkdQu!16%q;Bz6nP< zL2Q^8-9xVE;$?O2xmR_r-f5!OH++O{K`Nzmoa(I}t5{T=&oaH7K*+6>v_DRj5_{BU zjIGacT6)ymlm9XppUbd#?_4!)fTZdEoMBX{!j!GL`dGzAuWDk(BV0gm&SKrFev0yE z#`_ZbDNKT-C{Gi-yGP_U?N*6ju`^ydQt<_^KD9ESas=pCK^Ey4h)?g|B~REkm_Y46 zZEY}EuKc3D_;RJzyvv}LXRf6zJ8hWiKXGC}QYFx2E)46&60Ruf8J$J(QI89*kc@W~ zuP!SCEbPlmFloF+<2RK_qBMfZ!TuccVHOG0u-1<}jmOnjcn zSq9J-D4Utjg^BP$6C2aZi-fs~#~3q3fd$Yb#C>LbzfmHP|?=T8DfZkTPL4Vkpn{a;8u&B1?DWP zNa$EPKWHdsnd8^NperL)llY2p{u$fr6jF43ucl)vYt6g!+bZ~aI%JKP9e0&sRJz7J z6I3ZTA-nou#bijoN?BiQ!g#Dwpaq%d3Mcv@@`GTVR7=@uOt4GGbb3sUZxDn#ZJ8z^_#45Jiw*pC? zPRH0fzQDAgLZ$p7yI=scH?#7prw@;UAzN!qq!|YG8!l`hp?o+*_fz9!iEJi>=xbd_ zWn_gVTXK{u17O*15GtrDxq;sm?*n<_oFsZRdaf436IvrBb9obfk#Il)s#86#L$3;u zh5+|-*ZO*md_n8*+_K2t{ItAb!nf9^HKaiAjz$34Ue2pZ z04n!aS!fot!(hO#R1Dw(8Kdd%Z4In&1z6XvC{D3t@u&p49c~9s`ftJ4e(jQkJ3Ju(Hh^G>M|Y4ZC{63WTwaO zcn9P3uBuqaRK1aB;fzb^?K%_Lq_yf(b=}NfmkfJco(I7^ciA~lT9Gh6NIy>3#OqZS zCoUb||F#iGV*u`l4T`j(d*Sb66q$zQgJD$ey<~l~s7}rDIFw{>_iJ;1RLxp=ksb1l zm!qH~v!kXXGikSvz9D1ePO+hngE8uV)hLf~EV?{KxA0YWB}f9sICsy*QH7ZwI)%a- z&(DQ>At)cvNc1ZMTAM@yBt29-_OOz4M%SL!M#J>k6r+?+uMqZ{d{uFBRIkPeX|i(*RHI;8kcYOkTjTYD!{7< z2U9qegu#+3UH|(OtB2U)VfTlVxa`M5vVU_UDjI1F72zieh2hJozhLD&!Va}v%(CMQ zH=AQl?zxy=^7yC@h~RKn6DW#j!Tc9Z*MzeB2irHrUi&J7Lf zdqmPGj#B6bPK|l+HtF&oq8f8*=wF1v!#7%7mI_I%q_*NG$@X_~rE%c)1x$oorEG8F zvlhN+NG;|6CL-M=O)ogO+z$!~@mJxWNG+RcHQ?`JHr2oROCFk%FI~4tZBK#m_TNQP zKnb{@jtc%Dw=ktGWgxW#+3)Os7ZYTq8HFyA+t>A9)H;1d6-jM?!bR!t;sEL!`(NqK zpY+jz`n{F`q(=M_dU5#=k+|jDbcIggmhNRg(BD_+|3`~e!sY=XLZQY_M+diRlnboIPcYLMkMJijNZfX zFB&=w52S3Dwxq&QhK>4(4#>?mGOLD0i7IwSiVkoe@G=chz_Y!ZMxsc3AgO!E(i^SC zi+Y^2 z*2^{NYmu((ut!y12eQ8**toneDBVsY;blB~bRk@zHBCX+PC;{9a%G+06X#mu`71m* z#^xQf@dOQZm0lrplOpt`3(74&C(WV1Cf@_u59Pp(z$6fkf~3G0ACzn+ov^F>kI}l8DfU#Bm+72Cix{+q z(eK|>!$#_tZ(~YGEH){8U?rUS!l!fmIDL1}vsdTYn2dJ9LuUF>Lmk4*26bU?gh`w= z+i{jx>vu+M^4KU9oQlY*o(gTUQ{IzCv#)CYa@V)fMYatWCSP`~(T zRQOJC;ptKTZZo83=c^}NHnA(ud1HaSc{ZY{5ykyipKmk#&Ac`f4Mbi`WKBvKl1*bZzQ(JrhriQ;51ul>Q_ zEjO6#JY`w{ij(&!>`_}bsqwm$q(AyA&K|j4DRbRn3h&at`y*E?ma`&q1CkUWiH?1p zrUTCsFdV}qy17>F|Dv9gp1(HN3|oF0>MXbYGOa`A+B#bH*}`*V|0!Y`(t(mLje%(q zS6#)ibJOxr8$V45Sfc$=*&zPtIU^t=q8JR@9(_RXy5)u~{BYP-(I`aC%j2?au6S8I?s5Jd*$Vh% zX$&2TM5 zd5=+gg+Q^Ynot>0ffe^riB*Y_LyILCo*>Rf=dZ?K2pBs^pjuV{{9Z zzUH%E{*!@p7NXwSP;kWgmka0ukmtSZ-(5JWXik?2#V!Ut;s%a;6;+G?UW%nuBjep` zW(9)!CdT_$nIX{e0na4CkJfPo9}iudiRyv!?nQ;1S3Y0)kQ$eeaqxs!^{N!qFfAl% z9<#5osL5bxZ6PO=jw0alP+N~hGZJuZ@ew9~P0KDp$#Ra}zQAK>;m~P|xfzRd)hf?Q zw~^q>Z-ijH8n!I|Ub!9(`!~xtEC&V}_NdS;#C?^$t7Agy3s_eY}=b6}fPOTwZV zfv!55^at9a_HFE~r4_cm-~vKrR_%ib&gU}?el2srEx1JWm+TIXMU|RRVR8F{Isd(w zOML5`v?O$xstSp870=91$w>9v^P>d1)*?v1fR(hX9!15eM<1Bxf8tF#@+S8XGxe3j zC3`h7UCWJSjg{}-hg8aR_#DS5;`R49sLZ}cf$VB*Q-*~B`7lEi88)1Io$skYw_S~_ zNhj&(p|j3yd)Q)SGP5=*PX(E7#c@6J)cJ{gxro!CQ@Gboq(Sxc4iTOG6&7n|e=d+;akB!!*Xv5&4AVJ&vix&yta(=c^agktSGlp+pCGCCExN;@g23^_n56mo-5qo;2r{s=0OH))$NbhG8wbU3PqF7S_7$jH_PJ-i+-D6@ToSo ztZXurono~}!L4|n_)m+jqO78>amRloTRJFZK#sVwK{3BDJM*?b1yrkVe`epyCP zSb1~dJu+Qn7fmppWsOB4jy{FPppE}!#R5T9&k<62UDk> z_yBx&nauy18@Uk$TQB zmVsH=bpb=vu0cwTs*t43(u3rSZBmAVcKYY`r50K+&@ zn3i5x8W-1Y@I)S?V6^Q`n?QQ>K}_6??Zywg^^b=M7cp69$>&=!gZcmpdW75~d}!G<|B589a=(?<3gW^91r@F)U z_bnc`s~{pIJDX2$SlNW`n7)m=YaD5^Y(bArhfLL-fjx#YTWP2fdW8dsfNnNTOJQF1 zU#*Wnb#Bi(c~H;RVqwZXk`V#;A6*Og$v7V&FRi{sU1E}(JN%@O0lJJ?hn#<7K4Sd3RC?$ZzCN_7gT5z~aw*JL)fW=NluHFn8l!Kob5u#f5eDsRp?IL++^$pe4h!pu}Cz+uaY1Ut<)`du(bX~5MJR_7; zQNgCVBdOH8-L6{v?4P3>yiFS9KL$z?JKhM-P>jAAj3Ax-a;O@ku1Ok8s1B?Ak9A25 zwp=GH_k$%t*4!0mONMKuJKoiC@QvnkvCaj~&Yo271i41!50A0~@4h0H9iQuIZSAFY zHRRBK7p9lT=Q-_P&jW)$Gw*xu)%_mHrDW$0a^KrcqCv#%nx;@UGd;7tY@2`Zyk*!6 zyk3Iru~D9k(_=@bj6Q|uOX5e0^KT#3&^yCSQ}su!`QDYnh(jMvz(->4j88U+#9Tb% zQ4yhhlZL;}luwqT=ozsJovnCW{FUxy66ju;tEQ=}apfH`FB5L9X93ooQ*I$ML$ol~Lo+Yi_bN7h&_x1IgKb`D+COa+!-T3Y^7#C| zNlv%Ok6~$3=JXA%eRdMwbvH26m@lI1{Y3ZxSr8;+vR#kn)y=x0-QLnOz){^tA(#6* zRiz{}G)ig9ninR$0ai749Z-s4C?++-sm9?g`Wvjkk6me+p?eVRE(&lPWQKQs6JC=Y=0q$RdHQ`vCa=s zYv)V0HpW}~IXx)sI*$xCqklyW!937O{BnDY-`A-{=~wT0U6T!2(Sv-A#MW-NLk3aW{aIJS6cp2b@tNvyi3ApaJD`SOIwY;r)SqQe5-N6hu-65 z@ElE<;$$e!a>?C@e{ckRV#HS?e%*%~96WoVB9*|}GnJ>!ZPg;S%qH(QTEBWj05+O-cp&PS6f z=^{0W23QZrYV*7Eh`>VQlvd?PFf}B9Kh7LDR}n`%)n`T5 zXX5il)iR5saENl^EBKAl9hxMeWx$~1y^!WE*PC_sVhStA>e_N|!^(}JZ)&n840N^` z4Zj@=(K09Lc^67+7rt*yee2I?E`(+^Yaf26);YjiPO5x)bl*GaWcFV8fr(zyR%4dB z{fxo+uKaO6f0qGcJKAWOLoZhYgYldP!ZXhjgXic6X&@6_WZfoyA>yIZO3zbjT$62G z_=bj^+$xzQ1Pj)a*Q+V>^S;{d%feG4)FZ=`4$;ZVbuYX^@oSfrZc`Ns-KO;JFKTG| zv$VtWVc+gpp2mTL>4(mtD0SmFoTUzKB&VZD!$-t$HUIt4@E;2ep!Qg|d`vV{C;WhO z_&0By$Pj+eU^AGkY@v;=%(PdAl)-H0(*+r>n;JEfe53Hh*E9{Fani5jiUpIYq_(L_7AEQ6B0NnQm zT$vieqX;zwe1#k|R6@#EAB9<$_{5W8V(kLc^F${(>0QjHxhqQ9hv-ujCCnCq!!Op* z08x$U221j9WoFCx2&Y1UUxlHHhWbxO*O2dm>V67|s-~v=bdV7AEZnJ&K>NvEWhia{ z*BthEUu8%=M|h2WZA$uD08-vT42rVzG3k)a-OEJ>PGJ}!PBnF zqI33$nimD0zlp5@P#cEe=9Ua}Lb`}L1Y7buBK`-w*aCK7-c|TnB9!>rRNea@;T})t zRZ)6ffFr{8cZV@H^0ml@s(qWZbIn;9HjBZknyxVYXBovJL$43=nMm%5cvaN3ymidm4! z?K~Q{YiKg_o}L}ezuM_GPq2Y!2jm1gguHeSUg%CNAC}5Mi##1kXj&&ElQjD_DbV$W zeoJr&;oa!)_Y)n;DvATltii8T=*YGECM~e^Ci>IhKDcLHN;$_P+?>jwz^bQq*CKbDgJqqH2KN!{v$h|7oqRy z0_r);PUw^ri^E2P& zV#X$wgsMK3e!kXxiSebu_9s$+d$OsY5R(3R^ehwCnRl_%%eS_#sg3@WmX0EM`IGJv zv<@HBRu{rBDTlNYp1hlrO~8`kK0SyHh3M(qXSM4|dWK{>qW+TvLi4Z`dceV@sgNb8 z=y0UTpw-<@9k%}}D8=~byZUhI-A#qg7T))pzh-r(1J3pfF1z!6B3!>%2)Y2J$h=&R zjIGLi!ZTGj8;P=fjpyi9|8UMZUl6mZvyor8U7v>igunf?l!Oj{cqGS>3)`~W0+V+J zbEJ>3sAA-FmhEDM*)2062{6XgF*djv*KL6j7XwsuaB4|?pTYWc4h3fa+1*`7fvyb$?O8$TC?HNVMILhB-d0t<&<(7(TY{O3c35YLnL+ z;T52uE){@@ty6Mj>|ej5VLP%kvOD$ww}G^8eVs_#?osDQD8%O2^_`tigm6;LUPt)~oSCEG;i(SW8=kczPrWM90V6Rp8lz)Xs?pVj>r`GH;Fz`B{i)KD z!bvY+5A$mI+=WatH~c%D2X_MV)SH(KTq5%6)z2kw;E&7A1b=74MTS$04q3&Sf(&8D zBly(g*StE3RkoUM39v^|MS036`&g*Eggf7F%im$r2^j(YksK+HYm{O%tVaL%jgRP$*XGJGw)P9+dhH)%mifEo>%ZAI^SxeUqiwdbMrKxN* z1GMuVQ;w6VrvBww%V%6|`5k6Y^6qa1Y1c|Wo+;TkeKwht)kT?i-_**IrrnYp&mmC0 z7AehT=#cG}tpyBIShC8_h}CSu$M%FA&+T_e^4~X8J$izfH3O9}G-umAhUXUT2RBnS zBzExuOFvSC-L6O@RNdt@tQ`z4a?wHlxijeT(4EgTq%rIvX*Q>>cf}K@iwD;YK$#k~ z4GUc}1|;M*zIi@P%?*>J1sbe6+dnP^Jp|2+>;KV~q&SQX%~>3{SMorU(6?i_qq5`g zpGLRPS8$sQ2rU?H(9|Ap74<~jWk}i-Gwy8tHGu2vt-4!B+3Vm@#JEDoycG?fws>2@ z8`zUXVg;_c>s2{$ITj_D-hAKu#Yq_ZVvO5BR%|{@6$N7~XqGce8RnmtjxAvRSPNYl zSuHX#O!>?4P-YGtv`o^^%q2e`7zD=TFD2^8h2! z|Ct1Ngzj4}sU2IadO;E29?!lMA^Kf+K4Sj*b?@zVx*uItujx+Z(T&-1^K{QJRY!sj z^++N1&F)UiXIOq1#l5l5qNPon(6f{G1QUJZ$?~|v*pNu&bY`oct~GJ9!$jF(9382W zPN`!_U?J?2+oYMaER#{)y`#9Re%rKp`7&nP5|wha<#YAy%>io4`FfS*@A!B1Z0L}& zn@P%?q#(%?(DZ`e#$Q(QHm8!u;0#S*j*f3fR2bgC7fb`s%y0dUuDL>jVSq!;JFi(_ zgn|3{@Fjv`!e~79OU{@TQYfwcFkc9@kn?6BQaO^_yevWri1=dTCG|}`qUeOy{rZQ* z8VZ4({idqcqr<9%3S4jsI=(i>&NavD@{3aLuqPu*I<(k2Cd0=l)9{z?MQJ75D@MIK zmaOCQxBCZsGCT57)Rns3Jx z{@!Z)Lv=Cw9XO?F=#Z`UOpd0Kao*+ri-Kdpr8%mQ`P2AOwP3otQlt0%hKqMW2f0FN z^xKsNz%JvjKe@h+J?RN=TuNpy4sO=VP1SsC3$rIh%bt$YBE8YyeiyNFNRMIj*FI!m z^_zHAVKdqkD0^XvE$upvH}W_mdzIU_yB_lDjzor?MM~$k^-ket#VTGIInO>GrQ3}| zO!vZ4VyzbF=7#>r6hEd`p-mt{aINtVA3bh)1^QfR?T$X{7Ew(N{^@{BIVu}9H*|4` z+FuP0du)*h@f<)dvu$eRrRW;x8S)n3C}iMrlZq_ncd2~99_|$!VPLqhJLL$EYl#$j zN^-@wyu2lvhPTuRZeh9_*>3MXXe&OM~?I@c;F~(qLjp%w0E+ae7zuTGp%st(_`qAcXm~IPuQR{UADQWXKhWj zm+A7|s`IQRr|a$juiCnYr>~tocur0{y^%+rer%7Hum@V&EeLPlu<{cosZ$Zz$^Hsq z`uMqw%Ltd5QTsBc;o2E9cKqPm@{taIRZ0K=2fEAblKz$m(!{P@~`>PuzmAuQ)H}~ z1%1zOu+g#!kpLlnjpXSc;&0#YGS&wN-;$o3oZOiY-0;R9tzDk)P;5rGRMGEHFkd-( zORX^lt#0}hK4s=vanEyPGG_Wu>+bE#Ggp)0C9jEMqI6iR*Y=g&hAGc}8I@ValR@(U zfhI{p^!utc`ws=aoUbNX3v&dE|KXUVAIYr`)I(03fF5E}mQ-BBj$eblmqD{um8nh) z8QaMfVgl4bF4ZA%caKf@yskwTRnSu0E8Cj>>4olq8uWq^dxo3zZ^1D$)Iq!RG~KW^ zjuh)ij@SaaGV^O$b?;E-_uKoJm$Z%VFI});(q6v2#)4wZbc*i{#9+Pq3bQ}@?XEHn z->O$+f%^6De5h!<``OdT4nql=3=&d5o}IK>h-vk;W}W&fV%)2LqTU1(F9!_;Dc$~% zB4N2>4is_yF}88t<*a&F{g3tPxO;?YxXYV@jOy|(SJ6@Vr*gLm60&Sx$Zjt4pM6?w zT*k5ul=meVCEd+;?a4Y2HcRVL0cym(t_P?ApLYjQ)GcJaeMReV$6S=G4tiiJpI>QU zr%)>q;A2@qRn)vSbIhIJ%_m_c3Gdtd7gH zt2N6Die#TLX$7wj;_+7(&we1Zeo8*V(zh5TU6N_BMbex|$Z}PGpfMr9v<{z^8DlK3 z+w9jeIHxP0F$0X9iI3Lik8n@ePu>vy~@216q@UJNrPNv3JgwCE>jsCpMhgM>ox3 z)gQkt|HuNs337}|DRG`}+b(h1Tt(~%F-s_W=hSh{iIa!zdfNvMl!;E!KgtxKs%6Lv z{;YN-h;L@Lixm%Rd9`a3G8+Aj*o5b(WGAHhZWy!Y8Jk{Ia%`^mXwJrh>>MmOXhX*)T(!d+<_(C>M(J7j(Yl}e^oqv zNTAIKONrGtoaKoxV_+G!N!l_EwiDZ$pr89H6Hqtn^uU`IDkDttjbRN>8QETsZ8lu% zZr)}ERAf(V-paQNRa_x`uP5-A*|_B$QzMJQ^${ww?V}esHBNw9b7aet>}3Uo*zQ+S)P8sWOY)73;Bym>(!au555)3IAcPuOL4wQQZ_@<#83ZeKMbPS`@Z(j zQ1JD)zPDlz{VJsP$J7A#?|&AbmiKhpeSvFZ&>B%Ijp012t@RccysD==DAV&>^LXdVW#R4G-a*3!r{4Lz7#h5x6 zHg!qN-F#E9efy8=9SSlF5?Z@02+|Q!JU|?`*S-fwxmSzqG{6$(m}m?geY+(Fp z-GuEc?p?!{+oe^%KGI8!l-S(Kafxh#!~9({nEqw2Y0(3^pPd=eHKU5PZ6o_d`IP31 z`a_z&axE?XboOj(I9fzM5lS79aI^AH+wzXR(Cx0OTP^iJyR7f*lNt8Hj|G)~(!r%V z+>cG7#K|3#98BNS=B9C|qe2Mz$mOf}UQ;6gA?vy&uUll|~m|8n|>HjOT! zt8}*Bn$KNcua5Ymj!!;ij`<@cv5(<4Jo`yf<`ctHetwSVOeRvuk2H@d9k6ZdRjTAD zd`u`W+mpb78io{Mej+a0mmf}4pLTd$;s`fiiz2%2`@{(=68H1Oa(mxO!`U0CriTB3d zwycnh|7FHcQXpK${r3CX+sEYXfb7Q3l&l^_d_dp0Ajjf33j9UWOGgb-oBACQAb-*U zxVg#4}n;fJk101Bo54~Q!u;3YSdoLcXF zT^)rATet6hiT|Z!Hpp7oJSr9X&m!HBaoYEPkYt@rua(fU|<{PYvD zzZIuSgS<};BO<0Q-fVs-Ur%~*?Qg}q|DV5HUWwQ6kPL0-#f@)JoghO`*s4tf!#QF;ydMRUPVsV_lgf8?u3?G12(!#4YEn@0*`2E zO5X5sSvKg6Qe`RExm?51GVa0oRvZmf*Ssa+ti392T07BCV%fARgWhc}vxZE2ZFtA&wWKi~Sn&fG`Q3!Zl%gDuw>|KEdUX z@`>*Kn^|sY?~Kfl!O~yo$8xaP8=a>gM=b#;hrxT6PR1Y8!y@?);LajH?)%71wJ6Zq zN2k)oqv=XWv(uA`k%K1K2IS4m4uLNv1UL4o?_q8UEu^36>s zI;sZrYC^`e;#)?`kz9pRU{OKrmj+W6BMXPiycujx;K3^_=`(*vD=-UN5%+sMSn~U0 zubri&Cf}~5cl}!a7Sjh`R>VGy&wMFg#`K4W_vNYy*MbesuOe~&dE?!hZMl2zmyYE> ztmpM!ue+AJ>96za1w)e!$p9Ok0nH$`ev*d|WaD;}_j{BZ1d`lA|J^t`Eg!G_THsup zzX94{Xp~`;X+Cd2Ka|lPYA(|*pv7Ha2x`86Q#McR)aJ@CK#4G|wv1Tmo3Ni~*ZNMk z8C&6vt;%8W5&3&$p0t(mGFILGE`Z^ZevumYm?X^1;u1om`NW_~f9uutf+win@@3=n zp?9yy9Sp_?ESs3|s&=yhBNoD%!FG@c(ED!FMI z{v)o&a?utyZr--xn~=o^9lgTXg-GyeGH8KQ$=eCYLEKXk<8X_x&D2g1D<8IVyu1w6 zPkWNecz=hS8nS=J)+^T9zMAKGb?N`G_ntvfb?eqB8IdR`IVn*xG&C6$5hMx*az>J5 zu#qMQB?vS@B-lhz0f|k{Ip-XjoDpb{oZ+ta4QKCj&)MfxeYXmi@D268&&m)BToZZu*M`91Wov0N|CPLQnA@?{7O0 znEA&Jpqs6F#!c#c+)G;MHD%6GfiBCp?dm5%iSu$Uava+Cn6ckSh?^kBzqYL>#K*;( zj+jspMdGK)P$!oJ_c_&W^d_52-+vLFXMt-6`)az)e>Xq%bTfX`lTfz!)#skx9%p4y z%quS@d}xF2n}o>#&;2~N455(XMI&&a%Z#PC;?}&%=pdrsGKKJu<$`{~<3=6To_pmq z?-ytG$ldlJG@Rr1ar>KXPEQp{ub|-(nFb(Vo`;%@P)V zP5r7D6OIv3J}CdSg(XA`(qzHx56L?B^n15>h>+9?d6*k)y1(Wq9C%bW*ri=VcJ~gO;$p-p< zoOPZ+M4o}{uZ#~V#XiJSj-5=O9!}rUNhMN)Ph!k{0uzqXioR-iY7Vt@<#Hek#vki{ zY;gR!Ky<8Pawlm(g3H^&G@DRSF~b>HNYT|JGfS0z)R%;SIP zYHIdc7EdGv|0&7zM}_@Ki}!2z<`T5N;Jr3B7zWS>iPkLmsmO)x`@Kl7)`xNoXb{ol zNq8yKAk&GwNZ;jk9sbRGnCCd$1_?D;UL769aVWgG2F|0T%*I$>MYVX&y8<}__LC_L zDsKAA=yMR~1-gf--5r1Ag+VuM_m-^!>$9sA&7Pk*d1Ip5r_F;RF8&>=&1n(q--D)G z0)Ncdb|j|hS{w8|l&ncZIU6nV{bICRq-_gk^qn(UtTqQF`&hltVZq#<3=RGUJ8CtX z;@cnXnrOK+wYRjMG4q=7;>f>5W+VDRl(IO}Vj%M8AkWv*q$x~l^*@+5n8R9^Bz|;&h{m%6lq_P|&M2~YJG~+ssRd+5+mLt2Ew_(okL4jSR47Nw33kwd=?AIOlNJ4uo zu?p*)cTNQa^~jd|XXxBtBfJdfW9(<;JI)SmqT)0S@ebav@wM^(Vt@U8$u z7>fy&M>3t;9!3X5*knmC#ckH&hs5iSNS9LsFFbj;4cSh6%&(^MZx5#MgjM3|dR`~;HF=3bo*0_fyoua&$9yl7zXlt{?>(2i3ktK0wN0XT*()c|!o=UzkNsWzPby6|& zuRE!u`RC?~T&w?POvb@jKNkm{X%pVS)KYlNjwKhTy1N>6SZe;`w4}|h>$&$LBxWuq z)8B<*#7r_nzmWEX_PEfDV5s2kgMu@xkqIV8)7hObj|esppB~yu<&d8H!T4VH;%MP} z@Y)ud!+(<}qqL7F#I;x5Sz2nfwAG~+3kCllmj=Bp2m6UV4rj~^fRqf%Y42Jp|m_e1RN&uK6` zEq7go`FPAa>~#!3GP+kRvFai6B8R=^I# zpi|Ejeb=Y_G_~l_V&N})_J*)Q+ob!)C3ni^up34-cVli33=LY-+n07PV;>+g02*ax z^E*iVN&=mw z%TlbGFE=x7EW$_bHog~Jrm)C^`g}?;mTr{QBvKxIm_GItUvH{!GgycsVZR|7`425F z=5h4r;?4BQ4g7FvqXeu#)Yn~Ox5wV)O?pY$%8Nq7F3^&$cPzi4+r^Cz8YEAwlw?`u z>;djNbV5ANY0X{kHYUdgr*R~%0*79FE4GHcJ=W_zt5;-iTPu1?qEF|d!h7+`>-4AZ z!3cL6+2z`!-A`$!^s3Guawwz{S3D)0sH{Ey4pUgNE;bYcbOX4B+jK_-QTgL0{x2BeD8z#e$k3mg`rg z3mfO>cSA@m*`x&gJ3Zpm-=_m{4xcFmF!qn(-aZ>B!Wx@TXJ$OPhx}KfmY+u-X0R;Lsd@5XS;Zb2`NnN0ji7WCd z&Q<1DGG=10bxUc%Y2{>&@aN0E&}WL4Om3#=%~TRqbV+RAd!y=}sa{ub+=bm4|Uu^LWylIfddaAgg|Cumsn1iNJ|rh1oH5v4wt{ zN{{%&ZP#f%N}w6J=HQv%UN5{?sY&yjlBo_d#p-5Q2hIZKtttW`qxECS`23^x`HVsbh8 zeba{bKU`lt>xAu%)qly_=Ko@b4N&kY#1*m+%Yr`S&rIO2*)e!YZgdrNGLzuuMy6GM zZ#&*En>sr}@nIBwh%fq%3VK6*1XL-=< z#^ny;;NnTc*>YCylnIZ~z{DCRfIzV~mzyH8^QcD27kIzj(g0i3lsN6z+@3KUVAQ^YweR0Nc{iK5FFgEZvM^3#4SfXKnsPAF!oxRYY}FX+T<_rpExZj$G9`blGM2M*716oqTC{(WXqVD z24Pg9i=}9qpzi}f6YN3ldU6WhC#Y9TkRDC;xc+CVU>W?;Z-TnZxahcBWujA^<}Lbk z%P8A;r{KAkpuq6{Yv#5mG2x<}3P>OB!igu-Km$`Lfb)rNF`VZAh3*=t$E)OIn2+zr zTR;Dpv&Q26=DPKvkx`(}rwkqd-s#W{+Su2)M~2aB*Sm%5Snb$9Ob>)eV@h9tsAJix z^|a@{%sF1{KPGCBb`UrwA0MeM|4kz3IzNYN((!{gEFOUoFfpnBdKiIDp~o#`bh2CM zldxM?g)33H1Qw%IPE_ zyf{t4)@{&~FfbW=Ie>0Ti19kS2K`XK5IVL2xTOD%RWPcrMo)d02IM#d{o>aEO%7X| zL~`yx!#}VzO*qWD|2<1XGZT=gX)C;K+8QI^;r+6c;X%aZF6HVapM)^MccMHXMNxI= zKC?c^fApcBLwPdyr2^5krV0Vf6H)FrLi|k|LVuAVNXx=oj=RK=Uk|R8$Uoti^9Dm`JssCVXLeqaUHV)N7n#&uvy+KP&1bIq$1o(c*A_tNtB|1%~pqCnPyu$Kd5pT|O z`B$IQ)QhYZN?APMMe|_Cnl|NZA7~}=B5r+Exn~7B!qwdNTrcNzuqpTXNSHv=zhSPF zo9B(ab#g1pe9Yx1-75=-R-GJbw%BEgY5dcnveTL;`7&nvw5OVJ?;oUz54gB=$}Cy= z0Yz}@f6bKM{!m)?F4;hSOkXocKoc^GN6iFMQ4S{D;uY(ve!+9Cq> zy<7Za2U!ajCn?Ylkezd7CKU_wDT25;=Kkiyw5Gc)HS(u;*0{EB{r69K+y?9#;}&TK z<+^#Q)g1%s=OMFgoeraiwI%ICnuB)82#QJ+3JqAWhL!@^AAP&1;<5JbY0 zim+aw?L*rP299Q9x6B2jHIS-urYsF4l2AWtlE2EPX;dUZ@=HFa2Moe%o>=aHW<>8_ z2aDWW-0F8~455xmJl`h2FY1@4mvS%kr<4wm=jF zYiCXro%lZ*y#B%KrSg|>7MQQKgU8LvA^<@F@|GmBTHQ5>ofiaaN0hAtO`KKD~ks}o7{>=e2V?cr=cRGvRYT+;HZ5TG6)dtQATf(C(~5^ zz$SS2YmEq4=9}swVBgQ0ga_V$Jz|V@ zNFp#u5g+MBKu-#$hL@bBC?n4dnhQP)CP@(%P=s;2HR@cD(MN*1CBk^6C|ntcWrkr} zhBH>Sd$}!DK}_7yPd3=XDfK%(>a4R|vTA;#(3DG#ig9~3PD|GbDp90MQmIV77NKqz zsL%}PmLkgQ5q&g?AvoBhG`xBYzX;+GIMReKviH*kvv%rJ`1~k{pBuotzTlqA1yRc6 znFZ^&_g+LNOoi(j??;J_^g$o$G3CBwV5E+Z=i0t&l@S4o7Sy`fJ+(dZGSw-~Y+rKc zVLYgeGk!dyZC3I6DI=UJvX!^m1klgG+7s6dr&Ja5pSt^4SYJ4Qz+Wj#H7IY31J%4Uc1iTvnTdcP(4fVa0zbfpj(;x;m6wc%U({Xb)2(8 z3gIu^r%uwtdZg`(_l<`RE`MZopG7IIwwmvnFqrQ`#jkpTOO1_`-;Wy0OHl*tk&p11 zs{UbwT>ml)8K70qmB&&(SKp+2=?NIR-0~2B$T!xtB>K!&2xjh7jidcGBOqf6BMU5h zP8OHHupV2vtfg`{Z#-A-OQ2NiTGbs$7 zC@9G($2R07xhq=a{Emrn|Y zQj8Td7EIgdtjaXk7;B8*_iCyIb7jdrVb$_VqsPuN0`;uGIPP{Zl9x0(FP>E?+>Rio z%Y8Xrr#w+Pz!un;cbs?_4=ZzLcXK$$Ll>%uq1Hxhl%z;pK-4NuZ$*EvQ8#ilvbm|C z#E?{W7<>`g-$JSgY%-eP0H;l*nv)XEveFl2j+{4ey@2i_F!(Ht-s?*)+gc9bAr%|A zZhQ|$3ch68ESZET+bzs7TG4R>-^Q{wqZ{}~Md9WY?s6jVB~McPa2vLvD)1!jhlisc z!s~^_u}gb-=5FdTx(SEjs4hOQdtpNN}8&=IE`ww%+?`xJ_f&kdUrkz zS?OFW6Y{#fT-`Q*1t1H_RAQDF<^H-B`p-3`%Pxg?@`&fatVyLF{>p*+j3a zRJVl$t@n3^nd%Sj>M~epDQ#TO;9Q~HP>0Hc$(GyXYB45qzTDa&;JrNVDTh~fmwCM2 zzYogv#13)uAHw^pRau2xb;#7Y54)7|hO5S>FP`i@dB^0rV(2^O+|(Ar6Ev{K6!EP$ zOZwqdKxv|K^$9uK%7@7?aX?%esUis-ycemG`%&A7wF1l4(>>)xb5wId(PVC|%4hV$ z!t(u)((dj$$}kpBV2AU1ZI)c)!rv!A$uX8<6VVrRu(F;gC5u=Ewg)8%0$U}P9k_N? zs`?9Pr$E6>$3)b(^AMgtg``uzg`|)EevEg-2`eMPx&8Oie52rr7jdFnFfn?d)#YT~Vn^gb6e~eH^61FR98!7G`Rw=% zTA|3es4`&>VnNZ*y#jOuz;Dj)BR^dFuyJ~0?bsr&l{e0!0p+B|8iR>!l|yM}Lu$5a z*17jrp~yTpkyC55^#v(z^qG2iv4kMMH9eWwh4X>ets7%PeKuHc8E=Q-#0AyWyPt{} z?kGFq3a>q(XM{G~N9}wwza2IsURKPmUl}S(do-zM9ye~!6UY9ZYjn!RT(<7}`D9{zCaqu{!KVRwN>iZtm@9|gQ!rwT6~ zl2Q?1c6@Gxw=b@>%G4`|2u{jS4L@aM00xQ@AyR;d6OBCq)=JX!wa(1 zPlx4TMJXINU=C+Yl3VWb=j+byFXuxA%>N zfM0y4_wpHFZAlVs96dAuq zo9?t_eP|Wm7gb84nYobsT2IY!uu`cZx3@gHeefg+OSj=Z`8PL|sGV&e&UmqDd~XR7 z@`{LWjOAqcZHR=-0CB=%)?(1@0 zwV+S~X#VPYT|@Y0Lg90)J)HPDDgnoCoN=iCp@HUvrUgqsGBLuS8A-oOc2d=g@na?{ z93xajzbCt#4HgJIr(JkL;d{kqhx(xvcQBc^41zBe7d=u4@;#sKF(=# zbq??zXX%Tu29fMu*?m-~$Y)E_$oyy+m2+L0UO1Z1W(;^$cxCkM`5*EhN&P~Xc8Rnf zGT;fz@%seteL*R>DL8adxw%9Xt~Wv+{*;bwr+c!mQ{Rc3RzGd*7pK`#Y##q`P>pf& zJj_L+s>~d|DK$|xmi10jaq)HsHcWT3$%x9NcCde_bQp#rTf-jnXin%Ynd64|t58gDF8Z%Eb`oI(Kz7xoZ3=|4T;M1b(qREkoT|LA1&m#Z@tg+U~4 zF61QomJ-V{ww-v_o1vSiWm*@7nCIwK^LdD*MZAkS5NNgVkfh>ciIw^=Hy_es95jC> zzPG9u5XvN_AbN(|?257I_@w+uC{jeDa$g-@$5KG8-+l8}U|MCpUtRv7mFLM{)ccQA z9A8)qO%#3-8zOPi?X4+9Xry7EY4HG%ALY533;jNVLk(D9p@35N12uvJPa4nd^pj5= z#v(|*?yaK(CGvAETuhXYDeMmI`7x#Mp}6@jjR0d^H`-n>Z93HOnt@41t?Sd~H=yH{~b4(X5#|A#t?jm65qTjD>~wm1|wx z>Fj1U&n)$%CPc80=jkyd^Py=tW*E1jX9YGD`#M;rgJ7)vy)_j3CyA=DeGhNn{#T07 zO=HaxkI}Q;jSF$|cKk8nU_AiQzGM=jt5g`@2?3ea_W~luk_{|oZtgi`cOZwe0a};V z76Rjy;#6iJK6_cVQ-0*owg`NJ@a)oICvT7+;$(!=o;|&6_Gt*Z0HuZ%A2+pCb33cc z!Cb!0{OaePNKj7E?`4!cUq$sn?Zj)s z2edlRd_11S;_Ut}&iQ|vJ|vwtmb)KdZEhbQV<@#);r~y1h(Gg7|0hA?KLlz1&R+cw ze*6#n3QPU(SW-Vf;JSr>%|!gWB2F`0IVxmk7uE7=7K~@~ue+G9kf|w;7I&|j7W?~; z$K(6UNt0k#cUmL!BHL!`ov0y)M>egH#`-LGXa8Yz|^_G-A5L9on2xlK4(5+`Z@oF;_qyUe`rk8zr+Z*5I*PAj|SkTSghzykrxnH*eS%JY_Jvkcs()OJJ= zP*AwQC&|VoMx5_&O6Uo!Tdf*)m2JvAqR5o91of6Gy1Zb^G1LE8qugP~I#o|6L_(hl zXk=^xx_KHtNQv>FewnP+Pt*kC>TNt@Ds?AhE}-WCg;$*9;jfI*0~*XGuV`uSKfna? z#4I_nJhtEur${$sslU3S{)al(F=hJZz^twQPvcrD-?|n-f5?o<7(WtGo>JFOU;-aZ zk$HU?infQ8OuYnjja}|fbikWsm$%vtZ6mseY z4Wbc4v4)_)l=CWptZ|5=au;v&Mm5RhuraGE1(WE0*7a410Pu@o+Qn2+xq(6|S&s|UvQmMp!JRChGq zvzeR_Uo6SY*k`Y(y4m_=9bc&U$o9h~e+*-i{z467YQy;07FIkO=`ZRTVANuKP+bgvB9-)|oKaDAh<8FQ}U%`ZL zZ(GQ7^Y~uXNi7G^#8>kk*d-hLg6n;&8gUsR_@H%b2uBzb`uTqp_}8`Eciz>Rjs{Lm zJ`*}M(#4=gwnB+60Xn$3L6C127@>M>Q~QYA%Fh-wi?q`5>WhG9?`%X?^lhnYbcwxA zI(qY_4@Vm!czY3R17B!mxlG--posfnBVL*r{rg1mcg4LLO>27mD|A}eVX6?oQ^9|* z$zJgRgAPOYy*fB>Hfy_ApGi8?%!hY|$0T6Wz)vlg(+=xs=_DT8w*1x|y1RW5=qboI z1(6$C7S5&ORfu><}0JLF$`#=u>pEy8`4! zU>h%_TqFSCIVB!={e0H&#(jx=-c)I$2l$Y!kY0jqR~I>eO_o7|&BsKMs*bgR$))55 z8A2qtGo=}lg&Wx#L2tb{b+5s<#%%>g>upLNHXEpz9AWPoV9=&u`+11u_ppc>NV=gl zmG(q#AZ21#UxI}0sVV*MA2(_rv%FokUlscX#{gEYW;A|ME4$Ft5hbXY1>;g#KWp-a zv%^+V;-DKb{2?Hb5*LNbI6qkiXAFF}^^{Wf==4)S4u40wuM`(h^xI$A?P{ljC{r;< zb&|&Fs4P=3Aq<#-K3Y;7+}G6J=q1ntWPvap{}hz%p3NNO_q^Wj7Sl^Qy4V1YWH)%; zRAhge{DQ4fHo}cxPHMXi@s7UPY%Rk{+J{-V)OOxI)#AsocVNu!yoSK({_@+cFr~&? zDKU5qe#WsM?ac9pV(M(44Mvb_OHw+E)b91L0vqi=8dnG@HZSOXF=aDihqt1esupKA z-{K7Ti{!W!q;xMPcC-l`#9zHw1UY+bl_zTPIE>;xpfumL(L{#(5%4`UR16T5wUy=| zl?HZH%rVW3vO4Igf~;cNR_<$UI?$Z7NmAPc*+A1bGqfe`boMa@=pnuerFIi%w9p6=?3o5mnyl(+$m-> z53?{<(qLe2e9sMU7#ffWi8t1dG$hFPTcr=JzDFwd&A#>pWsL=)C+eLTKDx$NA1-LL za^7h+(YB?3s82g-GcP?+u_=8kkde3&7UegQ!vBSF+S^0v#j7*DsTH9w{N6tkgT-ip zWk%=L4zTysk*)nZ1;;V2NYvW zt#=NGW5FIgyx8(ZD*4VZ8ET5PAv~ew3+`|iBK6PnLZsTCo5#FtG$9~JM!~Q#BmO&Ew>Xk&3w^hIRC+QLUc&}z9spWp0mPvG#Dw1ql3~q4e z@iE%^&`POxozX8v3!Esr%G0BynyTy6Ui`G})si*Xe5a*C5^`jXt(++OG&^1*+`mq& zUFwb87I*j_G5QSKqrp;PSx*onBPiw$v3$fEKq^o;XmzRD(1|p!F~Jy`Obo6IwqG*t zL!=G8&9M!q>K_FpkFY-^k0!6Ab5nXC(>ERmJ&&I$PmGdPT%7H|A{i4djz%hk@3H9+ zl$Y9%-ipM?{rN!?o@irbuCc)(*cQg$f`|r6mw7GKh}po>-EmEyxxrOXEvxF!P3W6b z9-t05OeAl#>v$OvwJ&nC=Oe3mYLSbx>^`Qp|DvDDE_xQ&I# zlWgeAz^j;f4{ze2w03QC-jFM!7DBPd(=(*X3*c6#*TMd!%amP%RBL%SR2yFzLJ%c(2agiNv!}N2zS05Q zU$`7hH*qA=kW>~ge{&P@R@MGgOBmhQscI7)i}}|*4h-FLjMQ_ob44DtKZF0F$6v=5 zM_JLy@)UpDoAWQ5XYkD{?)@@VCk6_goICF$!X4Yj*hTapEvJRumX^pw_sqM_ufjN% zz{=C4zejr|M z`B@n81}(WtLb(h!mv0|gol<`KKK``4IRT7`_xiLHN85{Cz~XZZ>u_fgQcCe>YYP6A zY{h^A{>|#aA5~itY-PV3+iFex%c^R~E9?U-y$Qxq+}`8j?;idsI{eZw;&91MB>+kp zjUJoBlJvn@4wu@ph63}N%)xaawMDN|;mW!VlbTO}@Yo@MG|&|Mb34mI!{Jph4P4FM zIYErC1|J?`R(sr@R?IT2xqA<%HhiKoQ?_kRu@&fX70%cT0SS-q(fnqGD$iIQ0%P2Z z)Z-7cYN5LP!lm!i07CPFvvfuj51SHhn-vxWb?cCtF1Cz1j36I}RmAQ)9Z7!R=leHOMRzZ7r=o^P`(0g!%RyX+*VA7ISH?;!hC8=VD4p8nxT~=^8AVz43Slz=9S5EvR+%_v=0h&)36o8u z@xaG%8lm>1^yG)LOrp?i*Xw8L2OkC13WxTKU0t!vAX1*Gx4N;FydBCeu|%^+bV;QP zPWt&jZE-qYa(~b|PJ!VZijyDXj&TR;DcaMu@qx}oHfMgY^pWP;!UW;jWy4NO#u#12 zGH@QCVFC*qvLv5AWGn<0VtR&MftK^ghHClqv#zCIZHtM1V!VYZ;(TObwr(7NN>-#$ zoo`y_B8l^jFm>2ef88$%8B$rUFTF29E&|o$^L9lGfG2wPLGv`|1yn4ZD#~xN5PWm7{ z11rNPZeE6v-=}!}l5r=dg$=;S=H80l4r{YMe={3A59&BIcUdqOa{Mp>+~X8#VNAd! z5GTn|;dF;W=aUjAgFEW8xBjmK@sJsFa_xtVPRdgr+47H0oIx7-{?OE9vUWgcL}R@+ z8UDevuu{Y=&3+gol%)~fMZqzmAE`TA@jjE9SkboRFGE#1e-;x#Lmmjae==rix-~16?M2G_;)!tZ$a%J@{r9@MQ;_=T+R-)&A2ys~3>=Lt7++{FH=5eu zuN@oP%{F84Lb4c(8{3(;>7}o60`kC>6eFWKnYn}7$BD0r?3YV- z7cY5Tz}{KAC~8FC>{Ngp{>$2zW6Vvn)#GJD^&b3JUZGRiwq=dEY9*SEd0wd$4clMx zyhhF_tl9u(raT83j*LY#^1~rhS0PJx%M%fYl&{VwumP%r<`rt4BNFx()4^O!lk0e7 zY8S`PLMh?bQqAgl^}`7fci}p9|DSmsVY)>z6p})bL~CCCE2*x`gPG|JU4A*M%262 z+5T%be{0{`Sz=hT2})sL_}(n~pgB*^ouB!uEl-?!M{egGf4%vT2~p9Yjvt!Y&`D^n z4!J@>0aqn<7D9HmskSDJQ8nQHBfwY;~z1)|^fL;OAaf&N%*jk53Cx)#GI zT>9js@WQfavhkPk+IH2JvMt$&Zp-g>+~qWOL;5vw?t%T^k`T)$9d-&CG%Rto+qz?K zm4*+k^(3 zac93nBh6eSibSYFZ)m{4U`9*zn*#9|EB$;zL_e zZVrEr=MPTggATp0_ElDM9m<9AHI;9B+q6qQ!xZiyx(RgOnqBONQR- z(%JSbrL~t&eoq(nHIspL2Gtm?Kf#Sni1L^DIT0&`Sd)nEF69X#tR=3cfjA%#FS5>z zTg8cuZh)ummEBQUtLb6|;48aVa|p9gmx4MOh){BrY=rDD>>=EZ@PNPwjfmD;mY1ow zpG(k&_CAI)XT)LLRS&&KIX?EbUlfC4_1LXLMjcHrd?nyJ5w6?*EGZM4OGPzqTl9#( zhWN&+N3`KN@lILREeDKe3M()+JQH$dTYY=hHWM=iH#oefG&VY|y%+*u{*qk36jNIh zK1WB##MvLV8r5$>?F%BRzt&Wyu>l+Em}IfOW4aI>=0JhL8uE-{t&I;(V?SOHi7c=)(Ez%aGji;rm#I3s7%cIUVb_c}-6 zZ+!!hwM%+Hns0N++3!6q?or;iqnE{TiIBP3te*=?&d@wH^c=iW4_&En1&;v_LX zp#5&$_9>!O<_O4iSUvIJVf--MV&f0#bq>{;4jl#(y%bDAXAHgiI~)M$_+44zxM_@- z7GH1z=es&xBY9MJ{PYwlb*5lhYbAxju^gfzl7JdIszaI;xat)OZ~pHQ1NsIw^nGin zmS&FUf9M@QvNH7J{5x({%ZT5ZtM~OeKjLMt81Va#76_1VAKCMNTUseD#mVKI?y0O# z`&V>cDVHnFIcW)eLF82oYh}?y2OA0N)mi)xm5crU`hvsHwo|G>yK`E}s|`}rOxurg z6}67WnKmWv*dl+ZY)wAbt?$%5%4|TVmG-|l0s>iOQ|mAD%s|VAAtGH}cw1O@d6#W2 zwe4pNVF^}RZb}Ry<`(V7T>I;%yijksIbB`bIr?c&_|Zrdz+#e^MJ#)D1?wLTXGwbU zY;R;OxDFpPT1=TJdCOOHw@6*X5N5BufU&4J#nH{pNpanAAJFGGW(fQb!v95 zb}-a^g}S|OX{aTEQdHIH@^U|Z?r`;Br{DORvP%IgW;4Nf`S(!CF|DdeuJT0(Zc|jz zaT)oI1tSdRP?*dG!}JO5XG5(gPPUd{bF()F&rjKua`IbyQsSsGH?ZHn4hGj> zoTh%Gzf^3y>(~aQdgbQVnr#&<-V__!v99?HlRyj&&vnr-fqhhrQWJZHk9!Ae%PpQp z)FvZNgnR7U3OAa0&uu0i&;K+>SO7_x{c5wGZ0c4*)aIlrKyIC;lkPAe{FjeWH@G{= zHV(+-)Yff3YNx3`NjHj|`_Zh!*(@d&?=rKh2jqE83p5_KuNBm<^_^|*_`V_)Q>nK2 zGQa4q@=L#KfRbj7dF`q5M9D%PNiMiS*nv;ha7rSEm0$T}(S!I}m^9;(7&$p#OiX>) zl^P~7sy&g2-?!vgbY{_?d-5u=jq1eV09$0X=XBi#EDtPWWOk07lZE4Jv z-5omX*j&`N+l-MC`mMb@{eC+}?TF66NMGs6!t?UH!LniVl8sk(td*>d@5Pk^ zP@lui>86Dn4@N7WB=Q6KL@S(P!j&DFFfEfMWrP&c{nI>6O$$l)bJ^`-u}(ncer2V1z`viDZeuSk@8O|uvpx3En**(n2Fp6zgWqol%_QJ2H5;xYRSgJpD~nU|s={H?t#wsA zb1HFjULTUvWw%U=>&q@<$`?y|a}N(o6)$wg-iF@V?0%FvrwJuySV8US=#HkguS(I^ zvpLF*Ti%NonOU5hpPeere9=0o-5 ztmOqv3J;i1AlfwMHIS&3qQO5drYNUuy!q-A+0tl?PM$;$%b5)`GL6LIL_RU48}0Zi zS95A4Sh{Y$lSC8Bx9Q}+sX&>yQ3P~aP^C9-Yt_}cwXrMhxx6EdU+HdEKK8$D++PN_ zKhRy-w0?eaOfKnjx<8jpTn{2`3$7k0r!n2UTlkV{ee&a@E!o-bikd=4{_W)54!CoD z@3O%-V_iM6YtL+9@?D`NNO7?h^0nJOH)&)}!HO{uSga|}&yPoDs|EMPYmeLIAMkQfGQZvvrNA`j`=-8#c^*xt@hihU5dTEn@v-C)ded^2SBE80|hl zKG9E9%tN7b^_`Y*sFDQ422y=_`aX+KFSg_aZdBzm0GE&AN zlkZcrE2By$Ek)hR&`-r@!zFvp(nFCoQV2`k$zNA96Q0E5hm5mRG+~iFKpNbdx6Wk$ z2*{X57S?f}K}`lEI(4VN7h?VRh9;yadii!&sj9R>j{0+? zznCITo5woNc#Qv3^{?G|`>nQ_GpYgIVq1IjFKHrKJHv#gIx^}bWV~K5>OfQ*AmMC_ z&9DN`>(s_|bmeFQnmDB8&q?dOXcd9y>(0n`xm)R^MR7;ljs~4@3fuGo zMbww-qv-D9U>K?SCG2s}OJm~~ioH5E2cm5C2+}BRjxqVZeF(B5f3sm$ze~_|3OV(# z1Gw%*7(ofuZ4I{X$(oGJ0~G}=4mXBWu3FT=JY=+MkM*S7x<#4x$h(#WL(mcM_n@o< zd`e=v8(%5=zD={Y9h5rYS3aA~!TztqPN$b-eb!c$`zDE$0>VMw&lHz=Xu>*mh58PZ z>hD?oryx`MxsL1o)2Y-xSsUh57;wvvRj1f62RghRk&wi;q?&rb?GpEww*qK@#jqC0 zIV>;p*nrUge*f`*_z{?(-#7C#b|3G|$<}+HQ&JdMKlrDdE5+4FCRrg%bk0>k#ceXD z8)kU_bx-V%^p5|fTIGKkzXkC8XQx-EALu;JpWqV8%sPE^Ooe6#PWYzH5J?F6*46ve z)m^J3fSvIvR$=(@K8x9alheZ02LRtXM|!yC}W2jRo?J@;jK-rHIxPWu|S9sg&+uJ;us?_Vc&Jt%1` z4lpnd1az(~1o`9rrnZ^@RGP z+p}PoXw26%>%p!ijg<~8YvX40&GWzj%lpgAy_&r}J-e}*h8!xLot&3jv*U$IS_Ac> z_W2q`?9*%uU()>`5SuK`d)|Sq!x$@?h2J(NhN)>(p`AF#PFHxG=`Z06!*RB0z|$5Z z%@~dFe%z~#@m!RPsV>%ju~bmcoMIdh(ojZF>ldi(&KR~UBo}c=o$z`-P@#0@-zueP z2|j`EkWPI^JgwTz7&>||fzqbcm2{n}>KoPUceI1Mt{+At9IqX6A;WKhfKyCByO^gl zRFb)LZ+!3_gVouTYeAc+eia+Zdm!cX33EEpT=-CDmQgZm+A?h=)+KQ+Z7Kc1ppb|~ z3N^O36Uixyx!0#kV_q>xz$S~GS5f1YmoKPs(AhG(8w(dpRilrclTXzLbx3GEnkV3Y z0}i2lL0{5F5CZq_aN1zKYX!RUyVj5+iw~x90w0Jz@AtvYtHQl=`YYaFt(HHv0#6TI z+|O88*S*Ry{aIuqHu;I_H#K?+opRcsv7RZ%W@LSPJfB8;u#}PfY3=0&#z_$jtH>=+ zOoG~6*D34|g8QKc;!suR44m(G0 zW|^&3GQ2m@a!sJNvRci8MAQZ_(>exX;}jj?@ZD19<2zDP2M_d!7XI7;q$-(}!S& z3y4n{@Yj zmy&zsS;YoJ_qv)$6etXwYlkaYQ}qCq`gRv+3#ZgX_#+Koo_7ZZMD-Dk%g)72d#@(c~$iKH9VR* zA1f+b`*(LkHUX_9tNXeA>b0_hUC#1dE%)=Q#$Of;kEx|c-ryXV`nSAPIDFgJReh3G3)=t z01&pwwRqVC`@{?sK|Wiv*zZoh_9XD|f$ZT>U4X@w&!>t8v-esTUZxE|bgQ{JDl}wn zhX^=)cC=0MlfwAnqge*R$3+h?{f5{etQ0kdNmnpwQ|cgh6*OdpUdivvm25_su8w?4tU}4FHD)bhn!k-cd;rpmc;R7;$)KU#sT4QK7#HwDaUuQfP7JO2WpMd@{B>f)nL1f-HUb{A=*Z3jEj!-q+QFzYV+a;s(#(dyRY z;H2^DhyttDo(=1>bMb@KSEzCfC9?6ylulogVMwD*|I%loJafBwxfD!sJ3yq69%*3) zY58NxT*nXbc@nsnD*mV%Ax39cAZ~}aZ#EhAF^eh@krxxZt2zg`l7SjD&plPFz^Ld7 zwFn>0YMyA<~V_vT8>J-QFz#_FG zH;|xt&-zl0ugg}6C%F(yFvx*$!_oeyEAQG+&jr`Q1wlR%T0=wvcer4~i6}pWt2?>J zX5>M9JW)9Qtow(T(j%wlwiQa>sKbY+7cchaUodbm|Be|YHb^MwI7CZzTfSk^-F{V< zMSc0@*Z)J^dxk~1WbMBa1Q8Sg1(7IGi2~XL2_gz2K~W?J$wGG%1<42~L6U$Zn;@c~ z7l zx`tZ5M0q+383-kUeh$xpRtof%Vx({I$eq$|kem8xU{DaSPY6vGvZCW1T!SYB#iYWP zonTY1tMzUPjNXn5g)wF|^xef~olNK4>tCl!m|%m6P~Q0$*s{^v!41=-n7iX_$l}fN zttO<{bFm|}8ffez0p9g$aU^ZA!(3^p7^|Ceb^ zfczk~yJXAGZ7%yi5biU;t6HHWu#>adH5}b>u2VUGFx%gETkR4g23a6Gh>r}n^f2@alJ1;E})vG=3 z3;u+()|avB1IklCVOv)-Y!pY(2Td!GOwH7;XS7r(6wKXpTJWz&zIFuH^I8(=_&i6c zr*SJ%kLQ3=2;(8MV!;EoGlvzdo0^<=3UxbEf@hglfXn4<$PLL46@GcqDdgpM)}9rZ zeine^&hX?+S$6U=)Y2`PY{wD{oe>;?A>h_U&c*to8U=QSnJZ&o4Wgeg=9=>{h>Fgw zgYwZqvRHOT**E^O7fp}t=(mc{a(yfz(>I%7=!HN>1lc!q+-r>?vT!^#U2UeJ&stJJ zduG^Oh<)iDV4a#{`E~dGfY3sQSPwxq(-1^)4iRW?WhXxWN)Paf=YNrI+J8TMU(EZt zT}%(g^+@=t_7@t>2V%{)aI>b=dk^yzxj#PEV0bEa8`5!=ZqZHv?m8YU+#)1q=QF>! zzQ{k!2T-lXyK|Mup~4G6hU_;U(BsCu1)5x9PDMhnw?@7|tBHdaaG2|+lx{C#PKV$0 z^dqMLxAWUcGaIiRsF-VD(cHvaC-CK9afm#)kVm$8C<$nV-ugLW(F2IibBE>k; z?YKS_CW77c9>a^-7k!3oB*qIbBo%IN#|4TJrb9%EtvzIn`8;oafQW1>Yh=4Hdl`Gs zCeg+>YoHI)9hXg9k>6%#mG|86?LN8D0n}_YU#jlET{koAh*bL4ef;wAc7~l3?C@N6 zxJ_XDkM#^t(O;oYpso?o*Az7%exn|Piaft;+;hK_ zXWuLRS!kJ93gEzR;(7UtLIxTBEPAg*=O?`t2bRq{O(>t%uL1$D>Q9Bf%d<6o_Z3JC zQg2NClsCR69b=Df9Ip!zxt_Yy92LzJ($}{O>O7H0!WT^x#$8~V(lHkr@ATqXPSXmL zN9{nctc8ek8eJLhF`GG*PLH2kh zESvNDTOonCkvx+W{es>*!9=^)Xc#c?Z9Gh1kW7HMedpNrH0k|`mxT4Dmh!0}#Y33m zWbKv}Kiq=FcJxs-5Sg69p@pTQ&uZ}Y78{&`hUS=RPN9kE>{P1#^tIgh7Rri71u7%@ zt+|K_huvb?LIc{&1+3}r==;^z@e6FB*+XAj#RuAAehfr{+c!-ew(Cn(FuG4IN$&PC zm_%Z~Z5v7z8T*9}&}kC?ZF!ie(-ifJvtsFm34Ga9$gM9&XIuI+9bG6(g0fd<0Jt}T z!3m*13hQA(TCVhp1sYQ?CY)HnK%aBJGe5D)v7Q3Dw{W8zG0aGvM4>J~too*+dUS+3wqtZ_@eRYSu- z12!$63dsCH1?y&Q_3LXTZ^TH|i9X?PQ&z%NK;HbQVXj*Gg zK$CPSv>J?;@eIf{<%(JO%)_oh7Wau{!zd|0_;H%i=Y{t_-ZG5QK-r=!Jgs=Aip~6Q zHaXUB5-)=*~*CPMwMf4Y2B4AWR#-#m+U*RWUx%hV{#eev- zoBO3HusOn39qsNaU1p^DaWL6=yt9~7>*aN{x3=9meDt+ttQ=nrlp1bTY6V_}G}552 zfHd;>crhu-%iQrJ9+uoBvW9`I1Ln!vvHQ_hQ4A=l;c(%Nlb#3{<|S;GbQN1?r|tj+ zxD1sHn*56HTn>0gW{ z@qU#h?Hp_nzqJ^?Y5cF@@$8JHxy2?vyqY~fxy+6Uv1<KB?UK3@HrsDZ63bu+lkSi{}V{|BfQ~|S`Vpg&G_)? z!yAi1uuoPQ^J~f}Y!lzxj1$$B#&4qe@;=Ch_q%ggLI7y}>31*%BIGT(>uuQ~oxfi~ zG@`eq(eQY0nrzPUq_X=s7=5}1%e5z!xLaR;l$*flz&;{zwn*^SW zw;Rkiuti={K%Ses;I>-wA!nb&MQbT9j+^~29-;W>XoJajPmv24aT~t3>%`NDULA11 zoI@V;(b2j&%i`eDRaJO^vS z(&Z5%-0%&o>O!(df_E8gs`;PauLz>69yf4Mqq#V7x6o<+@_8n0Mo=PT$T{e)(MsC~ z@&;~tb-t#E2SFyYcEG>_=k~q%MN1t@IQAoOUK3jX1atpnHT}ng;z3rr+y1bet6?qw zdQDxU#e?!oGJ|y+Y-gIrE|Ur9vgC-~zay(eazk&N*FH-kvhD7ctx7yu*ZE^*wJ_+^ zjXPrk8SetkpA};lNF$!q_o{WROO@KGX+p_bP@RHnFNip zLGCpGOg8_!dlnlq3RaNOQJ}H9dhB!41Es(@27NW5?@QZWB&VUA-%IbhXi?bcscqzG zwaRD%dL%l0K>5LS`UWZJABJonayQGBcVDaCD{^n($Q*E-=y#z*cxPt1Lj@5xUG&Zm zw9zgy) z{>A;kwz=r@8|?l&2E~XNc3}R^pvkuQ&aNoow~L05aksP=F7k8gc8V zd6V`}^Sv*j`2YOn&%HOHe6b^JifcTkkGcMho0}qHmPR7)5BQX9+&`GW{uNl2`UtEH z60+>c99E#6xPFY4k@4CckdZm=F=M}16(b{kJlYF<<1W+6?&ioC_t*MSiVne$w1~p9Kh;86c0}kurZ{lqpn(DGxQ;0UyfV2EW7T8+nc}-I{}+c1!)>08 zG!@2oCG8aT-kYQ4TYrpdvLKX=d&$CrFZ3mLytvezb_x>Nx0z7gM(d8=q_8*jK!72o zjRv=?C)G)IpaU*D6c#GHW62KXEAzy>Lw*x+)jqV7&MrN2emo@2lhxjnWcf_H+Y_A> zBr+gj%ZEPH8JtSG#`BfW%r_PU zq1?;Nblv8cTIreKj1?>ePd$bBs!+_0y_5Mr&|`MPv-J-$&8Rx@TGh1C;`ZR2;rqt* z-edmz!6{-B7Fm}WJes7j0omN+>C<5b2;Z44wK#yllB-)4)Bg_b$9S6vfulX;*thlC z9dXu++}hQWG{Ms5k#a2K%B9CRl4+!V-F3x?avoiV-5DE?Jg4i{2ed@aXUBR!rHeZ1 z3LMg5jO%9@n_H0u^!>NnJU?x{|Eh0Rfq+DA2hL}0hrJYVqgWh`<%7%T8_}Ui%cJ3o zS-P!Dbwd(Yijyi0DQ0T28@(NO`7&|A`n(O5{OV&a8LIt+CaXwsYmt>{J1=kjaG6Vt18lbiz2pw4m zKI94YhVXrq4pYwy0%9B+#hrZ3%G^ril=|Yk*pX%};}+%j_p1dU%S~Xh#Tc85^t&Sb zGq38NfuVxj-pu*>SG6QwrF0vRV}Xxf+Zb2oJ`Xs?k}MlA^G7Fx!Kf{JzN6!64|P{5 zV(a?NGrgn0_TI)BlkVA38GsJWEDaqad~ zz7Jhmr*Z^%iw>jgb%Ov=syFVmng0;!$NDV&m1T{`q0%y3PxEhu;YXBQNDup1TsR1M za%nhYwbDqWuBv?yjawkt@ArV%3v%Gc4bqs^4^mj##pD9F3B{b}XyZv;% z(%oMyBOv+CnkDMTp%W9vcA^AjDZ@lBSBUEWNo9=xit325V>hdgjufUi9=EwBRM^I5 zBKe}V3#^W(@GB2)F6(kL?uTUz{L9K}+b}7l|E}l5@-oLL@d!3>>AvG?;t;@C+ZefOE%3k(Vu=L!PzQJdR)CwD) zThrDGX~d?NUw!Xd-Nf(=cfML3*=>qykbiqs9Jo5%e8yL_3Vy8`R5KVHK6eW z(Z_va6z#4}A_~dT7`VT8l(^l0*>~g3!B*b{iVPe*4RRV^@=jOO&>^ex6kZE`=XC6F zmD5 zeoj2KLz_RKF8|OjY5!LT`p%r00OYQCv(_L8foe|RkfY9zk2u`dZn>It#CEdPQx5pf+1wA-90SCP>lQ>zBRyIz%+5#i76C4rV zOO;kHUF8FZ3-*=3K{{K)9-&x$#c$2kn6oJE1^K*VPDfuPO=IYZZ4zKkRZK+bVR3zX z3MNaMl<4$yYlz`{@gL#l6M3IpScQN?Rb6i+%Jg}OgkU_ zh(qr7nY*a28iocZiu~|rCMY{-LGfE+fjai!LKB>%_1&w;TtAjeZUj9za?JL3jiCpO zw^{M5;Cl$QjR;M4_{+nTE;RGVmt)cxJDp1$7Y-(hWQ67`xaij~(Qz9G{KQQtdn!smI2-C>MDO>}A%N?pO z)aBCr;#48x7T7HVr58+8^;zMk!Ysc{3uzIvYzyX+uyZtJDo-Zq7ij1KvM z-*D|EOA%_pqGH8;i4zC3tKC$mq#CA4%5RKOjyoUybou`Ahe)E0o_M_x@;2?fy(Gi@ zv^MBqmZ!2H?h?%sRBOVOe3ZU0X1+p&~}b4 zN-rEdc}m_T3lZJ2MS{o2Yj4DO@Z9nmx1E99S&B}I7f@|!g*Vdf-(6ok9}=b5d$z8t z<14HVx~v_p0>>-^#?Z~E%U$WM&ll!DeVe;xTt3~Xa`3HG`e3Wp;l*oN2uo*BS3I zG6QyrOci17+fDjtdc#P06y(NXX$|K25`u>Zf?SpNM)TwodL+m9jYTAmd4%60-RvTEjLp-)Ztx8J?x zbAn#)6QLfLD9i3%7#P0YX6$HwGqO!MGZsp~kt?u8KbG@p$IU|>v<|T_wCf~Lk3LQr> zwZbdp-_Vvo`+|&f4Q?0unVk!)Q@VlP&o2pe#jT*>o750Jh}9(%EN^aNG5uP4@I^Bk z=v_TxjQJc?tQ11DXyu8(G4a!acBKwLx&Huas@H?%vPtd_pR!R!CN$l2I4j{sxz0EDx;Iw#HeFl3h>XvT*Zj{$`}6%X z_3O?joSL^Gl+bv21B}?%7S4sv=Ins~C7hT3j1T9hs$df|g>r)waqTb1PIZj`I__Fq z{5|SS!*jDWo0OeL(8)U)0#`##4iS}3Ho#^Exz9s_b1AIWa=)_hPUH^1MyXI!{Iqi9@0|uKekvB7=%wBaI-9a**zE*NtXR7bxg%|+u z%DTmyoj}4F<^e7=N2No(YJQtgMfFJ zI5nL=Dvp0C*1u3w5X<$X{bQaw+kI8X>0XU4vz4mxT-oo85P?{teS0uhw1oJtbXF`9_W3XqmF2)} zc-k}GvcFqOtvCF>ip7;@htFQp4p7$jE*W5SX~rU&V5wz(AZzra16h-T9#d8zOH;AE z-=J`MzTp{pp>S{3;<*h%+Mw^Ix5+DNNr<|g&K0DXxU8Ua+AMEXH?=q~p}OK~3$-uQ zUw*FSSgpyhRe2ZX8)7cK0FnjO!CAG22mVQ5yfHv?+V9FPU1;!`2ij{zXOcMQE4Ebs z^v>8YEvY4gf;WqWgdJ;kMHNOL(f{<7g}RBI66Kvt!$fj}VVM?-4`cTqBtIiH-4i}q z{KT9V2nC<4{9vCbswIn09-f?MKQJueC|7u%2wZ@>DyParnL$cX zfCw%UAii4DX*=*hVNAQu{GFJ^f@K2)V@`QJl@1=LO9Tlw;y7QhC4>4!r9A*LrT3HB zYkC;l4^LOKSaqHTYR<)B&dWhIZQ z8N|nh@D~wJ3B74PH0d2)acC!$$+q^SO`aCM4uyr+qz@)rkJ9FfE+C8*^^gUFosA4U|?69NB(*^44RoN~3q^Szho44Hwd*Dz&^A zOo&)@!}8<1A)!8P=JMlrV4B&Rzdu{SbkE)%IqUJs!x2xY^pEuQ+!^aj!=T+#<%Wq! z?`7x>Oh=zj?Qhzq8Zb#q>C{0c@d$3sgo`~bN^|>k6l^lG)Bi5|NJoMpG$W{Eup+aJ ztNv?vzcTck0@ge|p{h^;4Aw9~qUIUj6XJ_}^!wqZ4dF!%z3`q|2S*)A07lQ`2wG(<5Szx&9sqf+bR`aMHGH#B3r;t>ul&m*#bTd- zcM-4FbKM@5`og38gTa%Ej=>IMz3^l@Qxp)rBb40w2Lpj-@9GOQ!8s^MLo{Pp`gC8( zJeL-5b^UQP`U}c0uQ7L4iu~cRIbWrqADruMzej7L)uOO7Quqq9*-YZ-59xu_p|=4u5a z`8yIuB&gq=_F}A#+c_oM1~3)3+{?vfKR8RY^-;*4iph&p2$lVdi|pzpd_sX6STL`= ztS82vUVR_*~?s{Og)^;Ue($#C&-{97O&b}2Sg*gtqDDPsc{0riaOzm}ark?MG}v(wN3)UXva zJpWQ`=BwERt%8&^O>}HLDc>hTKuLX~B;a+v{)t|9Rwl zI2j{zod02*PN*QG+ZOLue9{$jk{()$1l=xX)*^xPbbc=lCaInBo}v>>#9kWNr4Z8z zdwLxoEE*Q-U2ipKYj*^_rw#UFUl``lEfYV)%yQs39zw^k znRnIX(b^Wz+BeWs*?Avd$i`QZLG}q>+>IM(i=cFoRh<+p!^Gr9CI~Mv)+MtC0Rsx# z@d_I&Z8OAt@h7&r?~yR0L^y}ynn9b7usbA(;LHcrkrOAc#NTyew=>&e>vni!p?@BY zxyVN?H-zYJdcs0f^4t>D$v@9o>?%q&`I1@>NJ<7!`W$+^;=nuNt@dMqh{32496mOb zBp0#o>&NB`I`ziTIpEi}Tixo1E4QJz%`YZC%$cIV$cl_@2GwO@n_9bKy6I5rs+FCy(Sq%&Ny!@(P6e@Xp`R}GdVXb)6 zp!0asAm#EC)1Z&|aFY>l8Wg96Q#LkYANkWX2y|YCbL`%{SH0Szm?w{;0QFhrq#mk& zEYsp^v!_A3qN2fDik5%Z(gm8l{%gTXx%=_a;rc+4jZn?MWd=t~JPUX|9u;XC@<03d z{MWr^fT@We0m%6NV;%T!CE;50TFSf{kdt8GDn2^gg?_BB1%=zIj>mh`(}LPVwT8z} z4!^c9AOC70JYKzxnT{2G z`{!X~8ppvk!KXr>EDisrq{**;tb3GHENHq<2}Bh|annZMQ|fnMYA$}Zb1Q=vH%s%@ z4@R3~rsMAI^$P`}np;T@d31;2i%J99*XBEcg3uXjz`fQx^T#C?Ev8(sk6ULKy)DTi;`X9X6z130~>+xqodu{W!H6b z_z`UjY`6++pqtw=(q6mkGP-eGrv2w|33ak!4$E|Gn6z}62u4=I4yOc}Z}P{YWCNb~ z&s5t5nA`22GRVd_(``}WdR>Y^1}3N^l-g)IdC)^By{G2Uy(0Zm;T63!&)wKWxK33t z$8w#AXOv7)qZ7*X@`79y5fJqycH5y-3WHLm5=5ev=f6Dxv_i>x+*=mjzb9iswh&IO z*JYPE&NFR4|Fq=SDLI6pS0|G&S6*IaUqBaElOd6js^|^%>EdewB%_2aYE@cxIKjE<1|kWANC}~%7Kx_D?^>WS(&TGQ`4!8Qd6tmXqjT09=LEz|V()N{s(YPzR-=p~ zPP<477Lm-uv}XieuQZxoqV1<%AFUz2EQo3#vv&ErU2RGZG7UdP$89PJ4~+;rFIU27 z&V+al?kjCpQHCqAaKI3FM(q){*ZO;1Qhfo+ zh}xxfStU;n?DV(6Nlo9ycVZcPfW{M3HWqB$DiEEgQ7)#y*Z(!;^ZD%-tpmRLJ>cyG z(qZ3@zq8TkCGSAUvhQ)||GZ}`ug&;<&Y4G?9lkL_n3@QHutTm{w)ngE-2N-V7E;#5?n_N+9RH?{tPs;d7( z0t7|Ww5kt=88h@;B^d+k;e*@Kulm~}g}TG1nj93IvK%cvU8aznGFxe3Oi}v>R)Glv zg2uhb5Zjt{m>J?OaG>3%lxL+0v@&kvwWk-~=CxQ$iZ>?ie}>)DPW zZp+P~+nFso+cBmEWkvMqLi}ywQL7s+xW0^ksjIT1S|74*+V=7b znTKQ2?fBCArK0i-;amatWC`I{`_J^&C*$mQGm^*FvFYRZ?CUh#0nssCZ_=zxIM&0?n$UD~ zLluFNL=b`G2#I$gyh~Gv9H)_rE6^2I*Wdi5o5zWK>-AGBK}nITxGCqxa8pRdv&9O0 zI^9L%SEqTQ(?DXf*VQ}BQFw7%?EC}d*Cc|WMc41|%o02&hFs{Ygb#s-&*Jn$^jN;V zX1wCU!0DO((Gp`a{0Rpvl8|{(<*tuReRwYpPci77d58&o91Y>ax~Q-WktnjK2~rEc zO`&bRMZ@~G1GxX{Di&Vy$bX~ZBE|Q#yCr8Pi|6)*Qg4Epb6s~^u?Vy zYq8pezVS}VW_;3uuNvZ-KI-3GnEir$gYQ~~aUE#&D;XqHc*KsBD*BM$`=rB#e0(~8 z;)U)|ZQE*rL+SJ_QbAF9QM(kg>>h{0^=I(*TN5_hT@l(b;cD344tCx=Y@rj;_rYNu z%{WTM^B0_=Vy?S&BOF>oReqxM6%rHvlJsSc!Joo>jvusk)XD@ZpTD!#?YwvQY2x;9 zQ%vSK)qF(EIrP|$l2)2NhiRXM%6)wQH@^}>u>kUo`0 zUn^$C#7wZ}tyg-0w6g6GUyUPcZP)I|0EZ6?%eAJQf2}UHfK9PfX-0@ss_tDnnJifd zsJpGilV#uGvb|zT_`dv9N%;RwZOT{_hgW<~I~;Y}{louPaMI|l^nakydyV1bbOr@o zaYf7n#8n{3nvIlT`>QVNpE}YfKT-jy0c&|4CW; zkb9L}h`fL&-!vL7xhZ-D4sRy1EG+~3SnM5_v% z|679kTZ4R#H)k|gwfRuwJ<>)=(*`?jTaW8MPMHC19pWo+aMX>KFy`YLsQ(ByRYM|~ zo?TrD8&vb6$#uD*6nrOvIa&71>$r6R)k7e(($0<>VQcnh`+er3oma}XEk6GtBUFKt z>?`9USL{(s_q`5f%Ot2A5M=7}AERW1ws$WjcJ;Jcx(f_skBYGQx=a367@mYLC(C}8 zGiRa{gT}~Q`F$twagZR(nE7%|lOCp*wq`gfD&bwS{&7p^*YMq{3pZkGxSIE=k4p3r zmvi?jd&vLx1oFV%r^eppdo2;S*SI^^6ojTJX3PiFEW5_H*zU`?y!;oxE03E2h1GH| zyougOyfv+%Sq{pi(P3SevDfg}FKvha@l2yKMDbTrzx?^(HsezeCyS@ncu(R#BbkSX zqTeRefOd%03!!fxV|b}&(N>_!RPRB84DG9aAU}R8k{@>mC>_cU(mz@IJAhI&7v|=H z78vuL!$+6mM$R^fD%n1Rq~AJK+m%nO$)5oyI+e`};)QYsDqGAvTeE(D)5#FiCbPgx zrbO!jTFE}Cbj;dj4Bv!NTt9^uViEkXnVu(o$)21$=BLNEZH3>G*fD8*d_^u$zW??30zI2SOS?LMn~xyGdf=J{6&R`iJ>#YdO4#Ab_SP|n@2%;T6>dB zNdHkF#o{~#Y=DAnX)PnOGU@q9(Jkhnzl4SOZ$Se?t@prsw6u3hjULp_-yi#C*cs?) zntB(qC`P<8DGb-G%G$H~<3^Lz4qGhrmpkIxplE}o?zI+7Pf1rrlpxFRz21SmEJEv-BcW>+I2TsLbJ|P7PaD5V5t1t{p&o zg_v(v_RbcaNVT`~=s(OuTE(}nRjM;3;LLZPFFV8-q71&h^U@)TbKi31_~$k8w=Eb1 zThyVkT!d2{rDCN=yzfxNF&vGw;#S=z!2Od_ZpXqP^+3a>iT}6*vSh&I;`FP*{7?54 z7d>l;uf>^4Vzi6DT79Up8h>MGkN z6a2N=Qjs}M#`B;l*28yx@^~L`#mXtDUY6TOWQPAsp}+7yxS(RM`uq<9SQ4;;nn=Lc zf1-hvxcB4VM}V!*|G;&4CynfR4aD6Sdb+1lOw;rE4yPIU7KXOd59ea^>SEkm( zZ85YXCp%`zs|)pL4=aHw^}KxB&E8n5Vr^;9b7E@*wQ5?Q*OS9pb#mcgy1Q)P@~VQ! zf6N80k{~r&)We-$)A398SE5!*CIxik%7R>slz3L?5$}w}CRy$jU5J(>TiyCV@Sb*c zX8-$Ppm^9=igFXOy-B-_q6ekt3(yZyLbP;zU={Q`u+xUsw`lMQ90;pT36~qb*EIU! zvNO6SwbK5ff}}+UxV(V|%D1t6+>w4Q&|t;VEknVzgKr z<`a?ovza}<71g7ZM@IH7{2mRpRzwdq{S*JA2U?&Rv*lJt7qbUwzqDGf;uyb7vQMK& zUHp7U(Rt}q-^LeJ8>iC2CY}A-xUXT|o{L1>J(E#|h2Va0J_zEPeV0Zref*2dvF5c% zR~Q;ke7KEcOv@uohtlDDJJ%Rcy4%%=NrN$V7YKtsmGfu3DX= zM;7gx#)s+&sIZR|$lIVU{~<(lfKxn6VhzHGqTi|V2#_1l$Rx;;Qj8m-DR7s(fxaAR ztcH|%|LQA>z7vv|5Em-H*jHcG_DVu{tA3AGs){p5XJazma<*Q9w41Y1XcDfqhUOnUps_ z1FH?*eR3(QB)fH?p)buk%`A8;e=;{5Wtb6KPPW~rYznV97Zm$?$H((}#{>7CEe@f# zs}r#W5Z}!_xBYyPZYp<9J*exR6}pFvzCtT(^PTXOBcOjj_T#4ENP}3lEi$C z7Bp!UEq3(~Y!(Zxp`+0=r%CV4dXq9^Kl#O$Cm}XqI{nv{Ao@i?dq>SfR$1ZHGD{t8 zS3NTICl)!c3KO~_osWJ=RJORGe< z*98d>S>U5NZw`5?J!~<}Ty@1oV5#9@XUrD}`AyIuJ!k~USymB|r+RgHAWy2=5-j-}bVW4_w-d0yf3M4lw z-Gd973jef?c#A8)rMvxDtg-@AWAZtR>8W8DX0+VVX6+P}_Uv+kLVC$S>F2Cu&7`RI znF1psHbgm7vR6*I8O^tO_{7f+6ry11_|l%4)8~P2_L5=&@fu7n45|alYrCU#jAoDC zm*B39J$yf2S#Y{VZf5U>4pM1ts|OO9b-g5SO0zb3Iytl*YwCUr^J=~+BX>>{r4@MV z?4(F@sy2cEuqKqi$B}l(vqFu`o7Q|D{vhMWI({YK5BYna&yaD%vp0XC(R&gmy#wP! zZpaA2sNk6tA5hd95pTMi#U6Eh1>wO6cd^cSHndH`eM3zqGJD4oX3`F(j@9Kinxy%Q zmx6?@z=KM4Si9D{dKRhWv(6q`0P)uB(BM7uINTg+ct zP2c|&mCKt`Zb@T)4hna3dY1ndV^b^dsU&X@BLbmmEf@QhEEzv~n(UB2i0-DD(!6g% zh}^_bz^zD!og2rqPZYD*!(_rq(tXdg0SrSf^FtDV)7D z@UYM$>WBAz&YDYSh&p^|AmNXr;V*9aE5REGUM4uP?%%!Mky+u^;D9^K;C7$B5O2n)qD5^C^25 z%R@N&BZIL3+i?!FR6}ycEyUAasvB~FKm~mu=(wEacv+u@|8{rC4Bv-aX5ZH^?vk#X z+WlV=t+pS41D`0T9LjEWj(k{YV?lmDYsq53{0I*F0y;#KfR@-A`?!%$r9R7i0QWIf z%VL|jiHDgvlCOd#Vw)YqmN-!1tyeO44LL<~Uk_eCMpc!!rn8p6R(%zm7xFVhyhX^h zpUC1|s%LmwqcQ)OxQSgCS^Ce}~n$KYazze7iO-Up#+ z_eodYpLeI<*GB&{5(X|cNlB|RZ^8GPHkRA|-jofV`UsyS#1qsC;!fg+|0kETzp>N` zIf|dK|5btpVuZXS`>%Rp9V3B*&p_++fI{ghV9RIv zE-$mXrgOU!8&lNT_b`cu*iP&N!Swh#|LM;$*zL}w?*+egvUuU~a~qykFJKP*@i0(~ z?;)aQ53n&r#4>?$E-5Kukvi#xs{o*?>&( zPS|7UZIKH#vEAhf0&cIzAq++PJtw6vFVPjB@Hzc0qZyjhdMrJ>r?jLP+BZAmF=nOslrJa zcQV&{dt>-@Ky%>*iv|@qJ|-WvcKI&sMrG$b0imgm86%7fWjurl%AVWzUp5uIgD%qO zX(7_1-zZ`?9yGWABn1Iddqf@xik7}cU$HH7&}EL`8wYr%T)$T8CH`kt%W9KDJSC)xPAAStbBd8qAST^YU~b@xNE z0ot~rP=8NJ-(ARrLWaW+o=UP!UttXN@eHp&Ut6qmx7(!NO%OSqS|vkv;aJk9=4|efUO98F*i}_x3B}(+U^T8l>Rz zTgh;l&mIW`aKzQu$_l*Chv(j=`z+9f<5=EHZY9>ZO4Cj9bvorNv$&Hwts1i#&7paOB}FEGF!2k};0 zzgv|M|B{`*c*aNU;@WG_?R}*xZj;6HkCpmI8|*LF%N(JEaFGFWq2%!0XY+5tSn2{eg$%e29c&ws9!*?>DV!@WzzUQ7D=)ec#42hzgsInx+7K($~ zO({L9F?bBrxN$zcB+UwEUtKDDY?*82lMju#<-cY`Dj$eD@_Q9@zR)2t*)j6oq`C=D z)!HY{DL0oPM2|fsnlE^bEcR1TV5;H)e6OTST1o$qT;Rhypo-M|do($Cm3fMtIbb3G zA7mx{CnWiV=!u}@|7EhK^?N|p1e^9DLvwJ>a`M*m9l+V#Tie<->Ll}tC3GLWkIey~ zOIM-VNUBzj%9|5K_9(Af>e+6d#ypsd~6|bBmV=qcv zSqaqOTSCu{is9Bq8?dzWdY!sNu9Px*m88Z)u((Wdz!v`%Bl9G#Q3vqFm7mV7UN?K; zFZ=^B{xwjD-+KR?z%u7#lh}Ye+1Q;G!j8iJP^m$5`+#&!sc>|=`24i=ihX`?+lShk8Y+usgB-6eS>^B;v~ zM`1f6LBadV4chI|y{P9+uddkTY12K98eTJcsxZh1?CaPbeU4OP?U-bE79H_A0$P9? zULn+7T$$Y@FiV`_CCk>>SNgf0VZu>dTff%+MEu)QB+k65Mw{imJ$IYP4<#qp3RU|x zsFa_B>V^7?=?pHwpk8$CrwNGDMJ9z*O@+Q_C=3E8D6Y?izr@!(2;S3uH4E!23~oTTo&oz?l0{Cp_}|nZx5`C~LG|w@Ul1N2Tf0@(^q? zj(c!;i@s4v?14|l_E^Lmd|;sz^}PihClCZ3Z$qt~@ww~=cu+c?QJh)c?+odLe+Uo{ zs9z9Wvy#-pp-&}*0J2m76d1eKJ2OMny^wVEM_SlHuIJnXqJ)rrf)(4!eu4$Uaht6! z#;{^~dj!($HUXgO^m~x?mG{n$ksdk^thE!+Ujpke%m_h0lz$prUC6=JCDZW!!SP~3 zyW@B$oF~Y{EVSGFS-cGvYq{P$Fk;mk zxwUa?O@gTv9o=)O+3~0R9(RbnK*cmW$D*!kSq)0%g&MRZYo0j``jSmI1=R?N6xO1)I35lqYv^j^gY5C$E>Jjw-0-_)WqzMQ?V?aU=3Q8|ZCqOs?0qG@xgeoGS^iF6ZT?j>b3n0=7M5&=k4TL7W zNKxL#?|$ET-@W(UH^v*|<&Qlw##(Fdx!0a+{^tCxwdPtF=WLKbi6S8Oe~oT$-R$>@ zKIxV?LKX`^N;GPkdAqXs#7rDkJiqgnSyY1%dn32d-+ig>skw1GF2s{(A4I%B4^+h7 zWxe- zi4!_CksadED3g+M?F(`gP4D2}`Y4lKWO9Vm@ic(N4`R^++b~NkjATQ=Nn5jeN$rkJ z@hur)Zkr}=q2(&DuozjJCH3rK>coV&pEJsi?L0Bn*)Fan|cz)R-{2e6unIngA9 z+xHf0_a17NPeq%w{QW)Pu3n$a2pUtzD#%zT|F=Ci!TmP=GVhYOX7%e4Ngix`#L4Lq zOmk}&@yZL|&TPj&7fC`}#8(kX!!6C4xykzBHKreJa~`!gDx%AW+MOmpKMm{XnNAI7 z@$=f4g&QK(4~%<5164*Xh(j*oAl1$NLUr-v>mt{>f}$t?Dg$WWmx14+D#vUPznWDNT>WRZcyQ>ycf&nejzAvEW*j`4JN$F@( zQTWpAZrdy4F+t`xsz%n(i~GR(zGmV|)W}h%*=*y+Q2ROkHb*O$^5s$gQhZ=ml<)o~ zzD(F^u1z*X#&To!h8M>j9kf|{C;BZlrM z0LkZ<>MA|A5Q!L&o~O2Y#X*JS3w!0GgmDL_GoK=xMN)69e^E=m$09;?7CLYN5@gs3 z#s*@S0$5svpq5Xxk;u~K^Hjn2;L(1*4fuQEEowbfLJT548)n!*{42vZNpKHn&lPiO z(fRpnAY&@0i$d;yF`H3aa#BduK1Z=K!C@D1yIuvsqT@M3QW zb_u)_9Y}Zg^;hWUxW0cY`MY{jqa26f$h6!=b{;iWpv~VU|LI0;Q`3j*sHG>ksdP+$ z{?q5%928`x-{+$@>+_?vm$_VRlrQIzn$ou>G!C`F`VuGn_&uUgoRbH2;#O|`n z6hDuqv^g|hw=QAZGmAyT9F5@N#4J^FJ^`8AOQprVEgqZuQQ@rknz%%LMAokl+g9#H zadDLy#P9|2y6`eAblWyHku*0WF=nTvyK-J8L^Bv(*5l~ZX8OeUSsInCORaLBpvcx* zN?Pp0OXoJF*z#aG0oAXKBdb^V*TNaVd@8%q7lSl zSR3kMnSR;FqSh`zRnq81uB*eCL79|)kuZ3@aa3Zd^DEMZry+6Gz(f}9`1-4xZc1mU zF>Ogsae~!wrp-;UF$UA9E?JGq%&+YYz1I(7HkC^TYV7BQ+tW(R(GR$Wf*fRfuk|oR z^0SL@vl5d1j&(-iDmL?~5SCXpSYy0EaSp$+c(u}(z5x&Ea6I^ti-a5iy9o+BMClTKkHpkw%6Rjm`%9gI7 z;yc#!I9`6tcCPYJvVEl@IU-4ScA!rTWhW2voQ#Lr{#N&Hv!Dt0_}%EfC|S=*{6g0T zqRUo3KXGocs-V4RxpWv5OB!xBXnY$lb8g72yY^QhoPEu{b^~Nwr}onzy zOB}VR?(lrTxBgpiD66%f4ZsDyTX|@F)?ko_5~-(8z`~sGu*u`a*Y$f24d=_nf*%!a zKeXLar?(tGcf)xL&@64TWJnn%UWbs#m0nC%w9`&YteQwQ*4_1V)$eUr6C9sa7^c|R zg-+Q003(mFpF@xOed1s=k18FT4g3l*=pI64(Ig_DvR=fdSe+P=q(1vrcz8ClN+^Fe zg$F5JMlBwz8Ql2M*g620aIr{Oanh$8opCM?R>SvO+oiP=y6UVuaR_>uEc!=A4b$gk zL=-7eQqlZ$9-eNOm)gQ?yh1@zP4Bf;I>Lmyi5qSQ<@pKv$i_Fn+VesKojV*Af3khk zD~IyLTwt#y3d$VQww*^Fe~c;BDA_bKt&3E!`03o4qI(&$T-n#{?CLd&Wkn1WyHNC* zI3_(_%&l6$*$-P>0_lo5zcIQy4+xfTjS-2X#PW{*nPrcKQynS5cG@=<3%Y187fG1L zC9!GRRC0~ciYrN+=mQH)^uI2V0+^FhW2*r0FRhI7%vA^puqCGgYgBhsq5o+>%FuM% zOaX=SHbp&$k8$BWG=h^sYY@OxvvEX_oCla**hkp3zm7q&b&9rS{#BI7=j4j)5^p29 z%e;iy`&f_{`p(1c$NMl+%aldqn9WV%c&WR&(yfE*W1Tl!B~KN2WDKy^#WpKhA$2Pw zDovM5;&0gB(na6+_E^%=({`*+0^w!5P@7-K$YZs$Mu4le?@55Rvez0V(~xQ~a| z%}|q$NNA69TYSti!6M*vh^eOGXeOmdj&rC>BSg?zQ&!M2%m7emA)USk#Ld_)ihKV zfB&g|ulsvCagNDb*E)n#A+nuH;-TW#t_vGF220+x&@ePrO|x%|k{tT1Ec;JcE(6%+(5CYq&gq$v!Up{Emyi=31l+itlX7q^$iSAne*TPogl!td zYS;RKIEz?WnJsEMJHpO4x`t|>TZAo*z}V&I*utGYZDMBBOcVEzLR)bruT?{fz_0vF zXbOye)Y{rFsV~$~DNgg@k`dnz6u0KKqBI@-gVK*P88w{ir@h0sRWL>^Ehq+H_z$C( zEc*h7N<0$HlB~2I&K!^uUn&{ zth?(UwrkR|$#%k(os<>oJwo&=r>IlomuE{P75|8%K(H8|xQ5p7GPiOe-TUZ3HF7I) z?wNTIrs>wosRYzp1>@AxLVyIiK`gR&f#;4ZN??$s8q{YXYjg8M_9vbARoWo2QYE@K zt{;vzVq$cV#ct%L(ySai89h?>10ku1GjJKI8c@#6o43UW>uO_3U3w$i^)ToC@t51P z>RQpuY>W))j_Si1y>~Ro21D_eydxEBHP%WymSBmWth>w=idg^R`7O+EWeJ}Dhq3@4 z1|KQChyAk68%+tnutHvc%q-$j8k(qI{cXCx^+IH&JdPexFiURJcf2t(H@kX7{ z+PNj{Ah9J>Lr#}s^6N5*aHvS$t!#Ur??v@Vy^A&v`$RQ1&e()@P?6T%i1zl9t*8B` z+UQN!{9Z=Jx4gOuLv$#4ri00LxK-!uhBuQ0*#fF~1hqAg)LfZ1F!BKZPFYki^ffSU3UyJTPu8ewj6P z23;uz*^;;UD(@P_?Nh zHMRINMR|!|D$i1Ri>b669D8C^4o-IB^G8a@WxLkU(I9+UbfPc>#QnJ?6 zd`)WgU`1k0JXcbcJCPky(M>fi6qjpEAgL1W=f2xxLv_<=ky4IlCM}?I5h*M*;iV-= zrYEswYO@53JF!xwn@U${KsQ5Z>^Qn+@B7vr6{FkwH@k$sT5@C{8cy=cy0=;6|)X=n`o37=zvOEt~RFK$n!-Ru|cML0sVV*PNm8js=1^K(~! z!a*JJ{-us>32eETK#BRyPIF6EVDDSUUSAOT0m^=u5a@{kJLU+@&vLgfry5@op;_`* z{Do_0f&AJV0s2n008`mVOpg;Dwn8INH;hVL9BaVeu}{rMwnlCYe=%})2acybT=ew| z0K2{UKEXXh7LTt6I!(~G$S){XV)f@Or(!I7g9fE2&Y?ygBXx<+7ti#--FkLJ? zhqy9;~ih7X!t8Bn9MD< z`I|~Sg$6^;m`B$0kqqdP%7}Pr)=>+m<=r*97d!-9_~i9J&ou(aZyp>x0$FWYmqcpR zHR~GDR-X17If%)n+9o+M!J3SRT6>dcI?ZyomHRbpSLH`7Ku71Qg|_f@PJAKQXS&E~ zNYuG&>2Yj}vbUCupVCskp9L9HEMS&@V7^AL6jlXu0Ima%d4oSoy@ij(b4n!YORQ24 z;S8i`Fr{8>;tFeU*s?*SXJO_B8d5CXB%7$o;&W9Ta0dGgjkJ>5v~aXk!~oIM)rSgO zljL1qloWwqqYE@^(O?pn;*F%X;tov`7yHnqbv&7-h%lFBmURhl^u@@06n|NtWzGk( z9ob;^i^iJ6@&rQ%`@(U0lg##=4T84vHDEM!uJlJ4Q4#;cD}ZG`e?iy3H@#gFwm^R1 z7deb}VOdjT7DISOVt zxart8EcBbGSn`1ZVf;ZQo%l6j_+9tHZ`ZHjgDvnsq{=&Jk;Ylc`sX!%=?s_F=BO5#;DOapO*7m> z0Y3cS81%Pae(2%TO3gyWb~cw5@Xh~)LI3XZ`!+N*_%t*&w$t6sfB7$Z?)BHC-ObHt zfg@^C$%zjaz<1orol8^vz-LB%O2>u5J{B85nsl$X@v1U`Wh!a1 zOe6&#-=BvaIF=pF=R8&0h08pzZ>q!)4#l^F6dXu+UX`Pu-K*OC^&0sHaD_3&VCBPa zQtQME>nq~tyHcUXxo*!uC!Q23=irFwFnM>4cK?Z9ot?gdVM&}WpUuIdli}AW&2i5k zJ2NqjyTp2Df0isx*5Fk(q;cn7UwV|PK$VfN)GP)j<`?W~G_C^76d%LDwhfA|Z{(VS zIf~!Cn(VF3PJC~LcHmGvsr$ld!mm7L9UsV2+Tg0O+$|Bk4==t`o0n7CJoKkolAX@0 zw!;}ejzC}J;MXYKH78F436+_jT86K)G^C9FTuM-q-)*8?uchqKDSpZ7T->Ea#- zikBQ7JWAT#S5E`pZSzJ1EB<;>84xQa_WNA*QJ>cmm3`&;jq-l@8Ne(h@EsGr?*!Z0VEnLK|4P=?^=CD90fZE3 zt*zbSU9w4RS}d?lZD`#I|BKt1d;E&0%w(}Cld#QJ1Lx~vp01`H#Fj+LRbIuk2|JAH z_ZvkX=}#0PZ$e#;;5&~ZOeb_DEguQ*3B&z9V^QYj?Sd%~1PZ}Ym0_;K?B%>>Ncm9l z;8AsKIO`XA-@2gf0__Cy;Uc11cX;PTPE*>2uTwM-g>rzjDf7t|P3F+!dB;Bff&7@@ zI4`41>RIIb6UyXkr!o~6v3yQ_(Kbsz?d@`_%9NLY+TDRaTc_J7jHDa>HOOvh!%bpjWb_^S0Bl*z?vH5ao$f01!^El#Lcei~G+K5Pt4xFTS zQBQ8i%E*Ysou(J?6#7?WrVcy1xb^DOH}V@$Ks;}k)Z$xL|5g7^6B-(!&nyyiL^L>Y z4PO{~ZWtG`FQo5@QpRb%K(`IH&znXgMD!)&_E6t|b`A~& z)KN$Yn&S1h`*`-iZ1QOpZdiQNz=-_&JBWSC_j%f2oF_km`sWq4bo9te`e@J`*nK%ri*5jhzF+SJZzBMZ~X zJm775UeS~$AFR^eBW2pd=n0H5oZ3paz1<003sH0N$r6?^eI}hh{ss}(T$Yf&kB3*K zxa4HmG<>}Y_rY(N-3Q}qN)Sk1`%>JsL+93Qn_4dWz2VQBi1kT5K90W4Kj*Ia*7Spn z%aX2ke7$Xd&oIYPf2bQl8tRV8Hp$Oh=Snf6OikPH%B>?bCq+jkq?AX^V^<}ULfe?- z7WuA3DwUYSacuGU9q-KfJKQn{swQI9Z5%fC*Gr>`L!Z%#yq_<=Vp zqDZ(1T){#RP?-r#{Lk6QBEwDoDPqmm()Nm8#`iD?G!0h zX`QjH9D}06raMQsnJ@7Gqy5&;wY2?(i!ype&V#7OQY;nGx9m|A-Jd(}1A*c5!dvEJ z)Jgu>VP|JAP9;gE_yB_|tampaZ|atQ`Aen0dJ-dMd3U2PGmU0UDW+;;_DJ(* zvtNP4_zx6ikXv-*1wed%#}V~eK~_}+8W8mcyR?Gvd98WE7OnpqqeXmPPjNr}uP9C9 zGT)rCq yhx@@o^{J8};0)IP|J}p+zmS^RdwnF*$(aZJv9lZL_bF7sPx-ONqhh(gUi~k list: - """ - check result path exist JOB dir - path : result path - """ - path_dir_filter = filter(partial(_path_dir_filter_func, root_dir=path), os.listdir(path)) - sub_dirs = list(path_dir_filter) - return sub_dirs - - -def _path_dir_filter_func(sub_path, root_dir): - return sub_path not in FILTER_DIRS and os.path.isdir(os.path.realpath(os.path.join(root_dir, sub_path))) - - -def natural_sort(files): - convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] - return sorted(files, key=alphanum_key) - - -def get_timeline_info(args, prof_dirs): - timeline_info = {} - - for prof in prof_dirs: - pro_path = os.path.join(args.input, prof) - - # 从info.json读取rank_id - rank_id = get_rank_id_from_info_json(pro_path) - if rank_id is None: - print(f"WARN, There is not rank id info in {pro_path}") - continue - - timeline_path = get_timeline_path(pro_path, args.type) - - if os.path.exists(timeline_path): - timeline_info[rank_id] = timeline_path - else: - print(f"WARN, The file \"{timeline_path}\" does not exist.") - return timeline_info - - -def get_timeline_path(pro_path, type): - for root, dirs, files in os.walk(pro_path): - for dir_ in dirs: - if 'ASCEND_PROFILER_OUTPUT' == dir_ and type == 'pytorch': - timeline_path = os.path.realpath(os.path.join(root, dir_, 'trace_view.json')) - return timeline_path - - for file_ in sorted(files, reverse=True): - if 'msprof' in file_: - timeline_path = os.path.join(root, file_) - return timeline_path - return - -def get_rank_id_from_info_json(pro_path): - info_json = "" - rank_id = None - for root, dirs, files in os.walk(pro_path): - for file in files: - if "info.json." in file and ".done" not in file: - info_json = os.path.join(root, file) - break - - if info_json: - if os.path.islink(info_json): - print(f"The file: \"{info_json}\" is link. Please check the path.") - return - try: - with open(info_json, "r+") as f: - info = json.load(f) - rank_id = info.get("rank_id") - except Exception as err: - print("[ERROR] %s" % err) - return - return rank_id - - -def merge_timeline_general(args): - """合并e2e profiling生成的msprof*.json""" - if not os.path.isdir(args.input): - print(f"No such file or directory: \"{args.input}\". Please check the path.") - return - prof_dir = get_path_dir(args.input) - if not prof_dir: - message = f"The path \"{args.input}\" does not have PROF dir. Please check the path." - print(message) - return - timeline_info = get_timeline_info(args, prof_dir) - timeline_files_dict = {} - - # 合并部分profiling items - process_list = args.items.split(",") if args.items else None - - # 合并部分rank - if args.rank: - rank_ids = [int(rank_id) for rank_id in args.rank.split(",")] - else: - rank_ids = list(timeline_info.keys()) - - for rank_id in rank_ids: - if not timeline_info.get(rank_id): - print(f"main.py: error rank_id '{rank_id}' ") - return - timeline_files_dict[rank_id] = timeline_info.get(rank_id) - merge_timeline_events(timeline_files_dict, process_list) - - -def merge_timeline_custom(args): - """合并指定目录里所有timeline文件""" - timeline_files = natural_sort(os.listdir(args.input)) - timeline_files_dict = {} - for idx, timeline_file in enumerate(timeline_files): - timeline_files_dict[idx] = os.path.join(args.input, timeline_file) - # 合并部分profiling items - process_list = args.items.split(",") if args.items else None - merge_timeline_events(timeline_files_dict, process_list) - - -def merge_timeline_events(timeline_file_dict, process_list): - """ - 输入需要合并的timeline文件路径及对应的rank_id/id、需要合并的process_list - 输出合并timeline - """ - new_events = [] - for rank_id, timeline_path in timeline_file_dict.items(): - node = rank_id // 8 - print("rank id: ", rank_id, "timeline file: ", timeline_path) - if os.path.islink(timeline_path): - print(f"The file: \"{timeline_path}\" is link. Please check the path.") - return - try: - with open(timeline_path, 'r+') as f: - cur_events = json.load(f) - except Exception as err: - print("[ERROR] %s" % err) - return - - proc_pid_dict = {} - for event in cur_events: - if event.get("name") == "process_name" and event.get("ph") == "M": - if event.get("args"): - proc_pid_dict[event["args"].get("name")] = event.get("pid") - process_list_tmp = process_list if process_list else list(proc_pid_dict.keys()) - # 提取待合并的items的pid - merged_pids = set() - for pro in process_list_tmp: - if pro not in proc_pid_dict.keys(): - print(f"main.py: error argument --items: invalid choice: '{pro}' (choose from {list(proc_pid_dict.keys())})") - return - merged_pids.add(proc_pid_dict.get(pro)) - - for event in cur_events: - - # 只合并特定数据项 - if merged_pids and event.get('pid') not in merged_pids: - continue - - # convert tid to int - if not isinstance(event['tid'], int): - print(f"[WARNNING] {event['tid']} is not int type") - - # 进程名加上rank_id区分不同rank - if event.get("name") == "process_name" and event.get("ph") == "M": - if event.get("args") is not None and event["args"].get("name") is not None: - event["args"]["name"] = event["args"]["name"] + f"_{rank_id}" - - #modify connect id - if event.get('id') and (event.get('ph') == 's' or event.get('ph') == 'f'): - event['id'] = float(event.get('id')) * RANK_ID_POS + rank_id - - new_events.append(event) - out_path = f"{args.output}.json" - if os.path.islink(out_path): - print(f"The file: \"{out_path}\" is link. Please check the path.") - return - if os.path.exists(out_path): - print(f"File {out_path} existed before and is now overwritten.") - os.remove(out_path) - try: - # 设置文件权限为640,安全考虑 - with os.fdopen(os.open(out_path, os.O_WRONLY | os.O_CREAT, 0o640), 'w') as f: - json.dump(new_events, f) - except FileNotFoundError: - print(f"Param -o (output path) is not exists, please check it.") - return - print(f"timeline merged output path: {out_path}") - - -def parse_args(): - parser = ArgumentParser(description="Merge timeline for multi card") - parser.add_argument("-i", "--input", default=None, help="root dir of PROF_* data") - parser.add_argument("-o", "--output", default="./merged", help="save path of merged.json ") - parser.add_argument("--rank", default=None, help="List of ranks to be merged. By default, all ranks are merged") - parser.add_argument("--items", default=None, help="Specify the data items (python,CANN,Ascend Hardware,HCCL,..)to be merged. in the timeline.") - parser.add_argument("--type", choices=('pytorch', 'e2e', 'custom'), help="Customize the timeline file to be merged.") - arg = parser.parse_args() - return arg - - -if __name__ == "__main__": - args = parse_args() - print("========================== start merge timeline ====================") - if args.type == "custom": - merge_timeline_custom(args) - else: - merge_timeline_general(args) \ No newline at end of file diff --git "a/profiler/merge_profiling_timeline/perfetto\344\275\277\347\224\250\346\214\207\345\257\274\346\210\252\345\233\2761.png" "b/profiler/merge_profiling_timeline/perfetto\344\275\277\347\224\250\346\214\207\345\257\274\346\210\252\345\233\2761.png" deleted file mode 100644 index beef396ce2996c25ecd74298285ccab5011ddea1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53047 zcmd?QWmr^S`1h-l(%m5-(jbxw!w4uKt+WV3hjce1T>_$%bce*yNDfj%NOw0#4nxD# z;rBn!bI$8?UY_$}Uwf~0t-AKT_gd?-zV{uYt*J~(#6bkIMs$kg1lOGRRfCkwwE43Zz3wl%#dwX4?W%71FVr6J{l@9bY&vRs4pv*p zXElhmBqnn*%$L?S%>TML9rZ*#%&s-8sJ??4pAYA|QM%O;6p<*|-0lvVdpS3#^W);- zcxvEUC+EwTT5~$1w>gKeN*NJ<#l}!kV|z$BLXL0>h0TFzN-m77MPDo}Y(6up*Y-Y- zki+fI-gyVR1cm?2MwAV1vS^=SmQn}0&f2A((qBJSQ+xhaXyk6UwhkOH`~ii!gx1E$B%N={ zIleVU>K#fE=#%+5wif^%z*YEt_HgJdt_Gr$6KK})7&?fQSLs@>b5xx5fb{h7 zD9J@@5DeAcn5KxsGdKE5HH<~!pjG+An<5dN;7%NfFp@(lZa=?zZ~4&onJu$s%zpHD zwzqBwBEjJGfF7PM=4T3z@^1wx_+!`zFF*McaK{au%8=*Yam0d_$hMMQ&XA(6JwoKw zS6jZ=MCU8CE&WSiR>Dgi3j#7J7D5v&OKwAZ;*>%AzuQzM9k%oc`^8LMxs8x?&J%>-gO9z1j zw71;zZn5mI2c@{>(E#;{H9oB(48`@+A(yBh8z}@L)r(h!+iOkvM{E3Mb%-zj=#WEU zQ}IjNL8%?F%_32k(^Lxhj)W^*Mcn{hobX}kBWpF`^=f|82>r{SQp4*wK^U3l! zT_`~DE{UL=)wAv_{xo}ksS2R^rgsS_pxg-!PT_J$%RRdwYm(fAd8Ac<<;bN(%o05Fx}3?4iCMGDzbvnJgFdEvGY?oWpz#dh5sQup!o&sMqFW zF_V6cIL{(t*(0cW5A4Dj%-x#ahpL2h5feym#2`fd3qG;uS-DY?AyzAs$SsER-M4Ws zKV5ouQU)wpVnVm#k>;;X@ik4p&ux^TD_M+F$GQQ3R{jC*|2ipf9BZ4=DtdS|#%Rp3 zy97+s(Oj@MNic8FMob$Irpm49^61|h_z7fxS>Gv=W{t~n|2YB@){X@F1>?olCRaMy z#{{W%hR#nHJ#J3&C2ow+xy83Whv2UhSTq6O>x?=_0#lQbgZPx&r2wD&11T6HrXksJ z2xZ}Dbng4F!@T)PBA^<$>Eno{g<#~F+`pqAyv)q$X@wbnsq$1i3z%G;PmGP7n_%;N zlzCOJQ7OzuS``=E)U%#1yo|Jzbw!g)_eF(Wj%}V^l|JV_3QqsFGqVO;VWa!~&Lh3Z z$nf%<_Bp?w2l}D4+N=fp8Wal1emirfH@~qi5XkPatJM#xg0DABF1^~7r*T|)u!S=e z%g5m&*=WGe zLXOTBE&UJKedZqsGAob?b1%N%?!V| zw%Y)5_=(9Vevi4Kq~03dxWLz`Cx*v8_N9mP>GALgv*_=q)`ZwSqW4}Xk+73VErxTz zauu#H;3!t=s05+qeC24;gIUJJu8Wq#Auml*&%$xjZE6jZVUwsnDjP!gT|8 z=hx{Oe(+++J;U=enWova#nLh1x|M>gd4?t0TvcDTkv6gBP|ItB@d)HU?YSM9`Bl_h zG-Kx9on7WVi9Wo0pa-4rn-nUAEFti7?q{=v^(WCDondvw4h2VXcoiQgdV`1;*)_kf zBE}MPTOXwHLZKK0PS%IT?yp9^d64zK#O#K(>i+LwGSblBCtGc&f4e`VAKm}(_{Ko< zOSvbN*_o1%x9_y?+xdkn&#{fF;^ zJxo-ir2yJZwt5sUkUbw6A~Fh%VucI=_}Zg3W7=tX4g1fy{Mr0Jnp!{PAkVr9aiqsw z>g^PPdDzmSxJurAr6g@FtMd^-lHS_Kx5TT}L6lL+uQ#jNE%cdW9&^XoqwtWXemJEB zmt}%#u%9|NuZ;|lfItJ*!|f|<*w%;%Z#^%;CEQYTm8vp>UBxHSn)%o2I!~PO515vd zOZQVRkmvzBr!6D+g+1K~C411>lEK2@hR4Mx*_a@x-od6!dP*BE-q>GQWm~Uu>2;UY zZN2A*HooI;crinpwWq`pdKI}uOy5z&WE~B_i{Ms6s^Dmz;Qk@R1U!yBU4X~!KuhqO zS;$^F4kVV1`Sp3%mV2U3M%__vEgWcgT8{gRGQP{F1cf_9cwJg5*Y&bfmRNAf3A;mH zZ8L&;ht0*et9fbT^Efg{~1erVw>?h?cXB7{KTpJ^jQZjEC%Z}7iQSFZ^U35 zBmTV5RyJZ297Lv7^mP44iAkAsb%RICF&*mYy?Tu9VEf)bU^1%wd3)gj|xPY_W<9%S6E0)c8*QFO*xtMrt zBoE8suu#FUa*QM{{E-uKRk&o&{hn(Sdf)6D?40n(bOFg7~onsKMn^6IS|Wn1&n!o+Sv46&opSZH`I zq$f2O$eW5~DV4#5o`@_RuF zkh(t+bfFms>Q~DGIavq^wLgQo27Z~MN4mMdQkvgif>Sp4#s(p_`V694jQv(6H8^l! zps_k(M$6&p0X)MD3t8JbEx_U(X0k`W1a|#0kwF|@3Q{cePNAifDVw)6*w?AAs|L2# z#?OCFfnVf&M+(L+JT{xTBb~nx+Zi6LjvZ^+K%hv$dj7Z<*yzwuTEPcKQD6bGqVopy zBiU7EyQp{Pi0&Nxc{!^~=sDl=l!4i4SN+YU$XxB0Bcpch4%UifvwxEW(wNfmobZ88 z0?Qq`_L_v0t=Y8G%um+yTP`=-u>=59sa9olcq7!v*8)R?LZ0^tQh+$Km+1v~Kro|* z>sH{q2qPe**BIVO0E5Nzj=k&2(iJ^i25;!{5DNm^zUau>?NG7xvY;R2JU>FLLsYq# zCE`+q1VD6M&K4m9g0)E}Vz!8V`ZFX&^FL_>v~&2;(Cs;*H<2e+%kWD{ZdhMSVx8GS zm<~^}H=A8s#jI0z^>JlRAIsY@dchBojYn&;#ZK6VrI>&HI|;yb zr()aV)zZ_0QFj_(&(L`qv^zC#OPB-a63XsN7H^DUn~9stVuwM{Op0^L5IeZ=NWGrU+xC!-{I=J@CO?|_lWvMBD8#JWdU+qncid2H^iP&L( zNZZx7pI6A;2Ih6I92Ir!+!#uBdQ@bIr&J5<6OBU!Px5B=E|ISHP!W7(iT zu|Z`}bjRz&h$<$nowWRd6H=7C1Uo^nh={E~F5XDvpJv6827=RdU?6!Rl9AHxSs=S* zig-}K3!9O_G@RzL1%u@$CGr=4LYXh{F z*xK6q=62V_PTzm&=F}F#Q8?&p*01H7sI;vIpsZ)Z~s^&ChFR5vjy7?y9?P`FPFnBHuEq2D1{bbEXBn9!rtT`r!Y{?V-z@QwkJ^ z*|ax7Cj`DFu_Ajh!jut0A@=IhjiEl&d0Gvv?Cna%qU}HbXIE#3|9y5eJb~y{O|aSs z+1ZdsbM6)-Cnq@TY2*VraYW{aqMgdVJ8(sRf&mD|;ho|$1b&g`w?OOO{DP$qTV{Uh zn!HPp{zD?yw;cqdPELzi-z1{btB|2K}dj`AtRzR6<F*8DaUs4xs1H6)`b2oVOJNO@>CNTC|R%8;7 z7)w@TaEjQ%cFsnOh2}g(p7_j6z9r(2(&?VIMSPyYMFq^N2DY3?`D#ji6G6?n4B#uM zsPrZ$zizI7wnMYqQ5eQ>8A^}5w3(&tZ9`>tF9UksX;3o||6 z=I#=GR@c!KQ!MiDT?K>gW5Jcpps~gp&vWHzAT``b`yw0K_STQ(67biknRQli3DG@< za}9d;>X!5m_gQ>17rZyQ(t^cVauEFN$xY&&$(~f;USUy^i9LA@>y?)(OLR_N5O?6= zvj&6%xF2p<p{R9WvxI8kK`z}#p}Xf@)xRK{VD2$Wxtq< z%8NP4nM#j;L5oxqx7x;>Z|a)xrM6StvgL-b^5{wx2m6{))@jWKTW(0*8K5nosBLaF z0;qB{IhIx=a|hnRk`6aptR!3uQD%R8N8Z1QB_RrIh1ACN+O1V#g7*j?PfhhdLWaiO zV8uH_p-WdJrj8wqY4**d=T&1JZGmdysQ)LlE&YAaCowo;U;yU z1a3fdHI@_`bb#ZgSK6HgUJ0d-H89g#4U{!cXHby=O2<#GJ7Z^RsdMqs9Z75$7^&Lr zxHjWZZ~z?^=F=DwWk&3u=PFKm$}s*i zqSz30i7)r_RNBT%QBaTl?ls>^qJ7*IM6`i!K8byX1H960GFuf81923z&(3DHhG zhn_ZF*gj3)=Jxe-5RJ|CBkI4ce=hV7e|HNi$Kt@RxB{akb6exg`O{ zMJ7Mm9*Y3SgoA^xXz0e!q9-Xp8<-%%g$A`h5|E7l{4+lty(7^b*{OLmE$?c><4g>K z{j>v0j$6=9ik4VxDld&DQP4V!GyHuZcxDR5dnx8#gKo4%0J)9k&oEe_R0jV@mnY%} z$n?uH_dGQ5RmWYfFLHTp{P<2R0lOD`q|+K!8UyoO__Nx z|3HRW6UWk~(;tIxL>gqU&j6T?Ld}-PAIx}u64>zaYy-`dM-C@R%U9AcUlgH!j3rXO zX*V)A+y0NJN@c%9C3BSO%iSrh0T5g_t;Xn{5)6B=v_a}B;n>rJMd8SKp-GQZ(eo2eH8%@` zLyds#gW%BHN5q8KfljV8O!OKWCY+8+G`eoiAGi+UlM)*?`~p1WxjFg5)imbT`mWk# z%E3hqD3X+SD_4R#B?uy~CH*Ju#)Hx9Nub^=)H2pw$APV_A6Pk6U+LYt5JtSy#yhxx zfW6l}k-3O{3DT0~(uPu?<(iA%lJ$}s3oL=&`*x}2tHe-V4*eWE_6xA)B4~)+58@}W zskL+ypt{jtx@VZ@buxDPH7C|+2(3AnP*}97*FG}28g%Rt=>8cvD>B;C4qix{5~l5x z*^k1Q53|iC+^og9w_rkbur0*2lVc7PFQl>8<@8BPq8!m_|B+?yQ* znx8n=6{OrfQWO>hdYH7b6dqfD$AHuGEWv;5gFRwUx*oL~xvyAp<-0KJ$1y)d9U|S| zVtCSXhQmS<2VDYqE?nvZb(B_P0xp1vTq~ecWtYx*-E(+u1zM5zlN9@!?Xc$X$I!k_ z)n=rxL~tsA{*Gt1pzL7w=8v*GlNUwQbKO8>xT zI#1We9&sZGo`{~o($BXZr(diW`YPa#3S*S7f`?L(6Fun$zht?6cx1}8YxjL!dbXZ0sg3Irs*{Yb1Ss19VBSIQg3or0o+0QuD(@6VE!j?E z5_-jxE+)ona$uD11={=)P_4uR7^oKP>uo{%+N}0e3I`LR3!J2dsl0s_)lrJQXRL;3 zyBi!5Rr1MVshZ&TbR-LvBOPq0>v9@?6uj+GM^bL5X-KZnkmAoq0_=02PH6Tu%Ua)} z@X~1ahohQ^G*&FfF8kKQV(rE2tv%xU)6;?vx<;X)REf%L-J91E_9^*)#*COv2YLUk6#kXs;7d9SOc346F&ZWf>& zqT&e=R)%-=Eri5i1fHsCMQ@=hMUe`MPt|noD|8J(Gka!C6q#<$LMWQS-eai$m1U=k zR$Yb|Z>c%&HRIU^&qk?%nI^l`#g#kPO3zTxqls|GOcVLis;=R_#5-){chL9`mK9dp z9q6$8To~^>l_!pUjlI@})KYRTj~P!WEVriu%VgfhvZlH7YC9Yov1V-Lc@XI6CyI)B zz3`79r<-_Y3=i|>kI+D&O+OK;6~(6>=Q%c<467}_q96F_&O_A?j3pJR_2T6AoF+FH z1V7H)4F<`TlNYh#A65i~Gt-*eIW1B--ty4BUi)fJA~8Hy;?8in;UvEUo{Y_vUcyd~ zG1pU5A-XEN-~_7mQ*!yRhJl&S*T>fM%^7zQ0wWI>`>xz;<$Oi!`ZGg+|JVpec_Hdf!qI*X_O` zyi+QbivWXv2Jv(^bKT127-ESXRgi2{^R=#{2O7Lrd(_`4A968?GfmY5IOqwv%{94s&)%X@BhWd{@7vZ9%#a+0*^3EZ68@+XWx@~(ULTv zD3^2RCJijtoGcJ*cI#IEW6IztyjY_@43&f;*bU8CdTfiZHz23#w{sgKf40SjN$*H! z-_+kmFfHx!HEDE$G2)aWd)tJ5mY26NXsB!Jk4AdhHYRr1q(7Y8)u(7RD!4&$->2>d z{;Kv`bg6;D&{TLBZYmSEB{?f;81$8GcxVGA4_@S`Jbrh)ZEX5`-qyfRMAI-}+v~L4 zShzI&O)A|E;+iehmHlM|8&utb6)dzff)%+law(w7NY*7lX&8;wQ^9BIu9X(O3eTV}`Ub3f>G zf@LW!A+=k$V=gz>d52@ttCNvhkQnPrU@y)4)D^)mFTEncX$$Qx4x&zrR!?iL{+`br zBFC_T{os0cA4EPdwJ90C0he|D)X`6@$ddV{ZA-DYoz?T)`_JMZ@mVqy_=MvK$$4ym zwOpnSrsPc_U=Q63sbcb3Ln^9U(zAk75?b0jf`F$6TaQ~b-rU8+bFkj4w~-te+$X?? zpdDbq2Yzgy%gENvWsX<62X*8rA%|5>3k126Qb#{S~1b0gw zii8{>uK$GHX8i*ynmtU-7>Aw$;q5c+CvON_%BjS45WRIUL@(<8U=rosuGjI1{7rjE z8ihiqAI*Kw&cc`1h+eTiU-PMY!<5VWQ6p--6$lxb{{N>T|G$*VBMqK-&U`KS*p~ie z`)b`;IJ8_to#Mszy!oP8q_KKL{anU8RX7Pqk)NV&?jUdKZ7{8bGLnhx0eKOpqj7K$ z4ro_W$Bt=|KF>3el}!}sz5TL@F*9++knFhe?Q;+6zZQi0drs?`H5D77G81+%n|)HT1PCky6`VB>jEkv&AdfnwjdpP|s9h z3_y0>j8E?(Y1h!Ygiv_vNdiD{A6(-JvdwVV?Y^`S zatzG>22Ytx9j^6nV%u2z=wb}`CCqqQp%~Z_#LKW2{%bu?eWS4uQ#|~<-BovSwX^Y^ z-Qp|*hn>a#qEVFC<8nhu7bU}tFkh##6@Run(7=+6ZaUE+@ueW_g^vdw6E(5f=Uw}? zn3ih(>DVrA?+nY7pPawHDsqd6a=65%KM4=yid+LIfLhGfbofCn*7SJQsx89ar!)qY zKONL-ykf?XaV85HWJ_kN-OU^zFTX|RnwN`uLbw9G_r zCE#lU=5i(XtIt-SPLj!PPHd=|fZ~?t-Jbv6KwjVIIhp0EPha3d(h~5DnI`-+{{Ehg z>910uax7qnZT4pEJ5Nj%gvH2ugd3P(Gu$qyq; zzuzU|beM=2vvWNINE#|1BnfIf<#LImWYnQfdj8?=$+{zTCH7lX z17g!E$F6HxSuRS(qQ%!7JQY)39{_6iY|4Qx|>GcJ_xg zI=^2k2c{LrkgctzaCer(0VZ|F`qurDvJf-0Qn#NhRJ)r%XI4MK%$+B6puT>b&V;e; zclMdgeXjtlj_A*zVcOctiavu5XAw0#<73|l5%8RH=LuxK*I=bH(R&B^%gLg}?CKRL z|K(G>zt(F>L(lkDggs*5F?$OE)-z*<{`z%6v3ma1H9^&tr;Vo*tFJbccr%6O-iNIMvr>UBC4E&3t;F8C} zTtiYLjwtX>&i~$_LIKn`!<`Za7f6HbSX;BBoz%|Ul}BTOz_S}Nb)rDt?1^*wug#Y) zE2~GN?^;kcF_4<~NAb*eaiBPJo{XDI=;QCH00AEJD0v$QUf>#)3%s`&fisA+q|^HwyN; z2cpayz<5>?n@=m$#;@Vo)3?@aJdxBv#<&@)Cn1MZQ3_I0hG)`PoXMr`tl>}N)1>Th z8hrv(^dE}$9QInvxsAr2*Duf>&UewCon7MFwL8zR&Y8`)1Ex_#ZbeLY+)CbFYkVT~ z!(QYpQ4VHoX}Est-ElniVtKS}uFW6w{Jg!^e|ZZo?VZV(H`Y6}m+ilw6}t;~INd7C zFoB=LdA!0jueD?bU60&*rRIR~-qzS9w$HnqpO`%OGPPyxH>R+IkJH9RcN?JJqM*;= zcIVD8qkn4U%3;kgMI!jTBnDNUAAc@!+$QE1-S0(QFL%c`p_u@|O zmHDxHPxpR|AOQ&+&+sEj4}-qh{TE=-QW?J&{kd-(`zp?tGH?DcOK280JK!1`HwM;l!YN%WEs0I(T?qL7_)h|zL@EnzIt;4H78ZE5-fYD$ zb8+tbWFU?D-$Px3q8IR4YrL(@v!SJG7t%iSD8Vx6 zPLX^Zb7ZyA@yy2rIrSolRz1f#_~@vQDr?tIY5G$}Tznj7wrTp|R$hl%JH0OTGx_s~ zz|iPaHa4>vK~mL3=U4j2CrVPr8i0DjzE>hgNP!~f+pbfM7iX$xd2SN<)vltGVUZji zABFng!D;x_TMcl==q>8j`sE8rCc%p>iHvQDOZ zC;RQl#W+@bmmxR&-CVMc)dea7*|u&@+sLF^&qgv+HsOkpq#O;vcPb9hS(TrQmQk1u-}IFFFZ$>M=3G(y>K{* zVvh(#Y$WTG_!*uZW(hQeb>LuZ##ua#>Af50t>98vwIn+UpiTqhl zbE>mk^sxg;ffR3OQd*e*E34JGy^U5yIr%qgo{XO#b zNv3$?ak$)?Bd=E@Rny6&3hflQ{toE~eikD6i8ufI{7J^O)t?sV+g1^U9hwe{VACw% zUg#Z}sNat_8U}gO!lk` zExgg$&Nq{AtG0>5&HKrKH)@r%BY#XT>Io-dtB=H2U~GHNRvBygc>(U3diTJ&Yvl4? z&*DY%vnw(bXY%om+T(Nv?>aWZne9)>?CWdIg_GBnd&}7>IlQ|O&HELaSz1gyc$8>O z4_qAnE#9@}bc?1xDo)EWqjA4h~kxn6TwF|-6d;_#l4xN{Y%H0Vo9 zCKqdbRj|r1FTy6(&O|yrIa5V{(sVuU{xJh*xuU0N7bgUbFpW9$_w=RC&pmj(V&3H9 z-PrF3Su~Gx-`aTn^k8!RS2Y6u!v{V$8%(CiFEg&BK@Nh;Nqb9m=Pw8Dd-bdixsY6S zwKfvQr}e(7{QJoSpp7tXuD>*Fo1z;~q|fwGenh-$h+%U#S8k+rwwKTs=5a7TW9a(f z=FzJ{|D)@maA{3uD+EbQDsHhHnRXd4D>jLo|DKmQ9*)Cf#Cra&i5!{pVp?Wi7raDe zEWC3nDV{^?(aK`CY^@Y4?EmDsnNL~%?O(3a_XQR_Tn2XV=|7Qq;`7?jDI62XkZbqk z(1GUT-FC_LIRNeTWkOnknf&Cvd2PB_Iq|>#uQT# zQ)lvC`3Lh)xsQzf5@bE=1|P+Xl9wM16z3RyX@9T5JF?Nk|C3kT%c8*@J{r$p`T9@f zJ511S^3rpA#fK!QUTQADq`w-~c*8qc%DPUn>_Qkqk)Pz&@AN2nSUpXL=q z>ca(e%Z$6HzQ>@lz3o2OuB*8z#ArrL7x$^y3n?bdN9_m)V;MziFiqS|%d?++O(lkg ztCoq!P(!@eFG#pZvejd?gu|RS9`!W(34O~%=EO4 z;my0l@1)M&b>~2F7nh!Y4Fes?lqQX4%0_@+shVkv->|e7QVA?BM&B-`R^{`ME0bMo z37g0KKmRkYkkU+X3-16;xx2CUM;pqAwlCasEst;gT1JFdS7TZZ>)zb4J{Ka9y<;=c z#=P8zv(`4X=d)$&#MO!d#;i2A@DrB8e-F(cDpcX|4X1R4E(hxN zhm*V&FsP=1Tgm0Z(Mx5@(G*k~XPn%Go6{#^6lZ7qZ)lA;@Xk$z#1hvx1w0bS&>TIhaqQ_3Nu zV7xQ10OE{%!R6|su7ppw!0uXVyQFw~X6&&`V}YN2-Hq#d=YH2#$uV?fIuU+FA(}PG z^i<-v^o*@HN_8fk)#;3saukoaaQUR|XA!9d(%e znz{Fjv7|>by;<;e>txuZwQ|to+*uBf_{UAc_U~f$w)|;srz|7$MlqmhSLDl>0Pp6Y6ZrCnyu|nE7gg znX!<}C|FWtBsq2DlMY_R5AYDA-|P8FL!Kv_vGP$^n%=-_sK1CsswH$-kH4ME zui4X!63dx5eU-r-Su*itMx(Zrb3-qM9Ii`3;i{10@HwMxwn#fC;SVnZL~yU5W@?t( za9Ni*mzTZQ(WN{$3{>r;Eq_PoxBY^OQ%XxWtO~PoPSNE53@66C=_mDXYX1z){)qbI zwO{duQ5hltoYhP=bz9Y}q<`c`Sgr_c5|M=bhuB7oXn@uG#9{TXTbvw<-LbR+34rsz zVx8-A+Uqaq?+%VE6x@mj4Ww=3T_7FCIi1Vf0!H9oAEOM{+YE1sTC7+?M`yRe*i9?+ zyUk6jmL78U=i*7o)+z^%Z!ERd^#WHs zM0uTZ8*ljaXK&^1)H8Mtz5nB!_g8NH2F+>Xu$ zj=+BBSq|jrmaCJD8@y+T3BJu~fSzp8I<_Qkx8D3(x}K;G1T|``s`Q@0ZwH#4qIiFr zcm&=)s}<&QI61B?mgG&9Jct;aVp88yC!<*y7dqoTy)hSs;#_neD=5cb!N)^VPJ&K zgga--*6U#|bvPQ^+uU#lNP;^{vNntiQLR0UnvYIaz5=|TB~`T*5ncQQaVNtjAgn9>w>7|9uDKD5n3>&)9TmsidKYZEZ(NEl>?a3(E>& z5xD#8le8EsE$LVv?YH8XkrFY0OHoyxN#!`t`+OkUNq3R4!oVBl-qFN5tc_;GJD zQ?S8L8SHK@39UsGS9K>+DkRLD3z&ud_{|)t#E+|L8hyHt5eh-r&yFC^bAt5tL~9Pq zcGVoa<^ryOZ1Bhb#iImmW@qzN+Cn2?!C%FR4+f0}OrAj5zu6lNO!BSFlNrj?)i~$S zO|t2Iw5oi*UKe((T@^BGsjOCkqnQexxe?TLer}e>y3TRi#zVg}$MB8!+04;_CU}(Z zQoqFr({F9dVKle~Fyb}+*1Okr)rnt`dYHefD9<%BE+Sf2q1^&2rH3%jnmqF@J@slq ze!EOJG+3Ce7bKl*Ac~nv>nUsM9u@vdkcKZIPCc)8#mCH|N#4%YII#E;7e}P5%t<9V z!2A2B9LIpcg^Hw!KTUs17iiHx78u84-D8FHS(&tTQxC@yKew}qwQGJ#i($>SwD)0jB~iELLvG zq^PNzk8w9?PJa)H-sD`C3Nl;Tpu!;1hElddK!s z`_%MD+c?@jtCQ%NRx_Bl#KwNSJtCsUj!65t*xApyiW!S6UlASi$qpvpPBRyRpOM>d znUhN0^2?hDL(DU;Y|g?cg7bE@{Uk5ntyQ&SIvqA+~w zeA4n~>Y=-F!D+9*;a-7~B`LQIDeG7I>;%R?7c~W2_RDiU8hjY%{V5^EKp`@EgP!ox zz=HopDowuo^?Pxb@q*fx>w1G4Lu<%4>mzAN15#I9Of|cEv*{9SG|Bm6x=> zL3*J$-yaCBg#6$@I`kztR>>MyMR3$VVD@2qbDkq8ecc=_u>S|qG+Qt#M!yq~+1IIV zD70(7y|Zd?!wLO`V0IH1jIVOII7<v&7IzB4wjH`nvkA2`$Ib3qP!lO?`gx@ z;FHBWZ@qo#PhtJ`uZjk8FV<*ql-O;STA3QE^rJkftJ-t@u^OQ{T1*qJ+QDPhT1oTH zk#Dmc+tdEapIf|(+*l%bA27)}uxG@~63A|QSnFL%%H}Q6@7E@KWR^oA(e;m9D~-AE z-iI&4C48sq+>rR=w$ah=D%fpU{8=X>oST>*R99ohL3vkBHqC0YlUKC!i&fGM=b)>` z39#Lr@L6Da^1$@B)@=8-=#vx^B649;vS~>9TM&o!e$Qr6TbNQ5=X5D zMvvHF3a#tDGDUKvde2Y*4K{hF@fgsMwF|d?d~|-2SMgb+0B6@HMQJEK13*pGlln)<&MN5^}fKQ+YKTg64~SW5V=l zMgjw`x&B4l*qGbX#mqp@RrQ(xxr)D7axUb}@{_7qqgPtL*U@KUQI-7;WPmOX_5WG{ z?sz47raGYQW{j_J@U!CEoL*QT>M_2_GgYn6?@z756(<>Rh$PiRk0bir(>z8$iu=0P z2KgO435~Scc1jN!Jw9uiq}S{6HxcqNI7iF+@Gu#i30tJ*NJ*q8303vliC*XObk}8! zslhh0S5#vBcG$b3L~XK`p6!VRJ>=J{X+N(%9&F)NsVfkV+RJsBidd$_fhzMFZgLHq z)eM`Q@5iZ5{L98=mFZGtJ+~CoM(L7YsoriOJvh05icE4J#spulmOV>KRj0{#CF+0F zrIxm}#NRp>Sr-v#{6F|tAi)U=Q&qf&r%c?#XMQ()=zXT&yO8fEF)=fd-~TrNJsrIV zpgKvNmjeOg1+yE;vVzP;QGM+uHhYD;4fy4euOV75kjt$JBh*@t?57MYH){i1f(_+b z;X#IXvfuwCEB`)4E5kbJ|HDd7xB$46HDj9b<^!&IdS%o}??*|<^$$bQrBBirR^#tq zo+VxVz4ZFW0&q54v)u93&f#+6DqW6z$4q3V9T{g)l(NQ^EV}ce2{5v>Mr|1IF){w- z#NwL^%eP0~^u<*d`B<+R5Q^bsL2*ST{*zXLbY-azx4+aJ!SGnH&7rHx1@P9|>-bEHo z)-Z51$M37BmETZm|o23{*f(P8*D{K5Ktrll%|{GVx*P-%xHH*=aPasEz8{Ug6#oFAjh`; zeFNGDXI&yR=b$n&jP==&C#K>(sCnjj4ElU7{{vvur7EdX>6mtE7|Ku6r0NRi6siW}_o@$c|<$IJiGvo6%8 zO}wYm1iuC2KKF>{J9StJ(vFufyZt~r`e|LqJN>gz&CFR4uK|i}{yG_dvL42EAu!A) zP4*^t50qFub74N7MQ#*KKLmW*npec;&0RJ3YU20Iye(#uKD7X|)pg~cVqQ~WlfLR6 zEsHp&%inO!BlvrdA0ujgi65)YALjo6^IvB%`h3(L0*C9OYPhh++@{a6qxdI$-`%O0 z8MKOge6w1(Cu6vcCekGSHH*88cRi1|ROL!x6(}Fw8WEB_vVH)=8J=aD52YTOe9zys-(i;Ufvi0{@(TRsI6oE^9kRx}&lFia7F7X=& z>%h(HqwR?o^tB;6(+#fe&XxaUTaA*(_;s_jnWUDKs_Ni|!=joFd2FgoW-p(DuL(KO zy z^@z^T6)(ni9Q^$5aU)F2^X95r5FufbynEv>EP1Os%Z@NfEeUN2)adi-6thsB@c5hw zxf2Kb^czS;9gocl1&w+g4PU3U2e0x3bx%$q{9{DGpeXUNOU<81d5|I(lPrORC+=^bhxA2{(i?tC2X&Ch4 zZoQZroDv+Njh8PzoIhNeHRufMTXfNif>i%roO6Fm^X>nX4h3}9?IqsK9m@Ii z8k`MZ7u6lgnQjcPaSy$2$o!ny9{*a-qJg?RYY26YuP+$}$z?Ncgxn?1^K88^7&r{- zzk+^G37a<`qtz!95C|0d;BP3iLc18-bEry*M)qFoZ3b-KK3lW4zMhmj3}olqak!@< zC@QJl`J3S}=5KWkJgsBYz%Es6qJpJ+xP9y{W-?&`s|cN|42ywwMQU4-ks=w(z(s?-hnw3zP!1NkR(-Qocqw||#6x5eQ z{I7FrC%IALem)zY7PruHLg>#JZA47c=YR0I!k$4${{K>QhT7jSq|)^~k2HyC{67V) zt>Ty3@nc;F`)L!D!3*>u|E+^<9lu=)Zp=Q?)p>pv-T2bTMSNKIpK6Mnxv^MMoBbDx2^=|MLd-kN+DkxK;iyxJXPZmLN`HCYiiy^l>l)I)$M_ znR5;W;kQdkQa?v1ZiicyiyYKn)ahI0g*hqGeT>{tvi*;PYOD1V8BKYF&#yn@#16S9 z$tq>^Oj3!9&^3K>ZzCPLtjIPQJSct&iu3xS8^nNSf@sLS>#JP=j ztF9h(SPmalsMTHnUkcoN{Pq<=(ML|%lkUMm4b*T;Ba9yoe)1-zaV>Q z<9PimpZ}5JBuFeS3=dj91@ly14FDV`-eBe2Ol5y&Kdc(4YuDnsP`y1kGke2~+{KHV zIYCagu_z3+C*H*@CRlDbT{c8Cs{ceQ(tA$zN-FeX$Rgr?()BzN%oOUxIne#*$?B?f zFcQwOJS2bysTH(&t7bxLfMQu|okn1BOQbj4||K;(=@MS%5)wrj@ z-R)%QYB#PihEW|anj8L5*BXOP6bw>_`D_TVdqjq>xox7NYqV;DG*8c7o_=m*-~sd2 z{OEoY>}ut)#-Gx9jkX_J{VGWg5@7V>_8b45p>6P+VPAgb;Na(5-dR8MH4q1Bc9s7G zKLN8K_(y~JwNAWNcacM2K+x3k|H0i`MpgB<-Tn#^l1d5)2uMhGcZZa8cc=6Q1nDkG z>6Gpg*mQTdfOL0BpS8i?ec#XXoH5QB|5s=H-^w12&0_7nu6fNlKi{0!hpwmYnumn< z%L_+2hru`uW>HR_WFF{2iqr_n-s3p&@hT$)4#QU4|^`pS3dXIp{RJsskLjKQuXWc_g2EKg}?-}QS)rp z*<90p$MPP!&-5TtUkd-v@7DaRobl_K`pQeyevN?LsuSX$X*vWSJNjX3dqf(meD2zb zzQ;>npGGkZi;qXZbgeuaUBHXtQfoTdp&8>%*#hUS*r(fFG!dUm!dbC8Z=)n5&7pek z_e@Fl^Zz%w@#iv%odp_%N2$m8D>Uw99V)ya zA=h8k%h8Ncd48s+6S1|Qdh_X)3m*EK;4)1;H-3(CPW^cO8^a1B;cwYjD862jOk}r#SsNKCvC>G*@?Ijz_9x;)O-->QaJes9MU3n}e7$ltpNKeID9YhU zb=k(${He)vOY~!JdqtF^>cD0EU-t`2<*awlewUM#Zto@OcZAD3cvJbHjJHJQkOzAy zC1%(8hqBns9wCw>>XA36k$r_AOW5GFoz}%zyBk&Tcu?pqlG)ZLp2}yStVH2DN{S;?@6nIgJ^X;v8jpzY#IX%>s3AKY}okCD~`iE`EA#ZmnCXxaqwr2k*$wHxG(~>{@q?(26O2|zcPjp=e=MbmU>o%(%fW^;Dpx9 z1jUzt?5JUa?Ve14bsc6ssoC8dC}QY!k6MiSWJ(w8HGiaK6f4RtPfnCL#w%2*joSif zd$v)XY%;@lrlz5fDNHZ_E{Gfw^oE330XEW4k^iu!fL8=*K(edwMDMNT|Gt1_4VnnZ z08Sb2rq6eHk_Z1nIM^UgnCs|>Gzc*B;V9Wz)Yf=$s>C4RUoC*&T#_h{ahz)oWMsAAXH?B|DT$M=f&K?X^qoW;HXZF;VD=`W8;I zod1^5b(_F#WHJ%Xspdia^w%I+!n_rI@$2Tk#*F9T7VaogX|(td<*+r3+K^a;X=Z%I z(zyP$*olU5)?Lf#HSUn>)m#5!Jf~QEyByq@r2!VQnT|sgk&ctXBo%~y84@WST|6D_ zmvjDngPKQ8k111ifdMOsKCl1Vm~LiP}bsUX{_ZG2&-fqHl<>&(h*P5^<@_`9BD!9y=xKxYa@d5nX13S!%0-;;t5w1hFg2p#=1DNn|3vmv{VH( zoO+RS5==x zVoXmSHdtB)8diB$jX@c5*{jpKr?1bX;d2mRlSZz3luv4xfifEmy*=ttUg1+-?%NCO_I7V8Dxxz8dUGem`)Iqo-`_7j zw-CdcG9vF?m?4=IAJPsBMM)CxH(kR$jpP(f{Hn|by(ksG9>g0ur0oyRCtgtJF?LDW zvX_op3Ozn4H7e{f(LF!w-`e8xMu@6@Pcg`sf;gxY&;8o+f4D;!7P=zHgD+6`a#d&9 zd5*#-h%GE$BQ4)OJSiyZi&A|@JeI=TryO(@t z4SH@mK1#zCzf}u_Fwt`90_3gyQquh3lCky5cFSLF_U<7?$;8zwBQg);W?+MqcOmkJ zb6r9J@eTuzTJqJf&)ve_M3c5uHP}PB*|rItiQKG`l*yrG>IV?_q;!Ncizo#Pz?=h_jq&5^9NC< z>>`gG&8($rTI#gd5EyLagp;&N65Z262w+Dh^*W1TT5d2n0m-#cU#_DsoM1adH=C~iiGUC`^hUYXdMyu zK+vE6o{rpbEFY5nQkyqml%GV#8ak&=iewS_S%0Y-Zr8Hb;>lxp5PI+8o@-l}G*9gr z$)f+8cBnW(d~hymroJ@F*l8BB9?W~Xxj^nPMCob~Rc<SI$r3(){<8cO$;A_In>Kx?hh1Xy@ZU0!W#C?z@?7 z$~PfPf;~KkW%+D4>s)^oHN)v{0k?P;y1Yl%1-?4nX#Ta0(6O7AY6F%6^y^9#Z;?NX z1yvHnrmxyn$0E__s7TDSNv~gJm4K($h|E2&WOZPobGh{A!64Z05%5dPy$i{FR5>~L9C4m2HTPgyS;QJ`B5)f zrvTo}vZ@CV1k73XZGjjzAy;e~|IOnmO=V@N8n2WqGlN56JrtUXg{|_{fztV_km3St zfXj46qQ8i|r4{f`gpcDl7+!vQS7-iP=%aXq1Xtms*PSQuJfEw3=~LBj(#E(+IgJry-e0rsV0;aCS?dk&FLKL!j`F2dVc=ySJjj?cmnb{I;Y zUrrsN%j-AbV3E$o516u3G;dzvKZ>uFk3Hk#ePd+OW#|&w=xg}qro5-(6L~9R>8CdgHr#yYJr--CC}GZZ90=AC%$b9{igiw1-B^@a*x!pH%*t z^P+M;wa7^24mP_&m)Ka_1&Sw1*0FpF@&_Q@Iq)%XCV6{b@wt5w7oa1uoQ*f=Ei8D` z&2mPy#bcx({NQ07Rp!~^{`JggIH;caXMx4-e!JUb@EjnQe*2BssArRD$e+gbTspE9 z!Qr_sk#zV~70)=nTG7TQVUg8?T+y?pF{P#z;dXA9Y_w^eX1uM3l;7`u(p&7@l^8HMH(aQO9-4KniS-Kl z9zii-05)~iaZj7&tjR(y6SpT0{7n7kk8^zF9|kZqk(2re-dB?8^Pj_>{^FvGPjNO` z%EvsLe(%5)xTGaqD9XkucmNeRBnony7~29m91Au8w}Pu zRXJ}GGyNMatO=#%B4o*B5obY4cP2$RAArJGun?LROkI2%Q!OJnitD4u%5ci4hPn_` zy?44O8`h~_+H57o2*@MSm!@b&(NEGx1j#|M9;YP<{>6=+V*NUW99!Q$Ljy)w5K|=S zhQ!S*{mYt@w`P1C>#QH=!~?6H>E$5);~U3GY{3tm^nmFcx9; z8pD&5iA^ni`EstDnw+-+3AW+(OzE09j_>gOmWymyRVqOp2kiv4{EiUJf~8pq!)|ac zm5n00r2QJm?}tpN@_{S@D+g-qDUYctc`nu6Hsj2}Lf{EJD%TmQF>j{aLRyF>c0aU+ z7$oyZfj`y(Ud$v!)Q|N8l-5M8l+DW^DH7!()z6my8O=lfl4=k;Q%Kul?xggwt?Ikn z)hrao;GW|gQM*JuOzh1Bg$od`l@=zTqD0M;??{trP#T)bs<5XGhJF1C;J&gM{mUfc z1H1VG$s>aC6+HxR;-j|Dep*DIUNH}Ve!KKT?PoD~rz09CgnwvPCYV8)fR4@)duxMx z7=UbQioPoAK2y4gV1))F0AOZqW?%&moTW1Q^Szw~toaTGgTyG`^XY$8{urg#Vt4Woz?FG9Fbaj zz9)hC0*7(0nr!OZOTb>k)%S`7ntGgD4qhrizb=8!rSq2EF$6kPuXqq+Oj}TUiGO6k zr#h0^uvA3Jo_wVm#_ezLPXwMwX!zCMYjtD=3o7-_UAcUM&06KwIZ|VEG|Pg~C1v#` zN-_1O5A=;J&_Vj;rQsvSXL+nnF_UI8~gq9pF`pJc|GyTYr=B#b25#cn{M zEf{9|gnK+7N=CaepmGmVLsVLvZaK+#X;BG^;3u1!nwwF^)MiI{QKRG77DU(imR3Iz z6wwX-r0Rq_yXkf3^4KI)&~}m{;f%e#0_o1GLu=TobSN=Nip)Xp23*#k$MT_57u($j zCmNjBx{S^aoMgqcOKprWbJ9dzdM>=|m$F!Al+^h;1|5#KE3F4&#jj}v`_-a@;K|MB zb-DMEDb9xr0~VHkc%QV7RbQF%xq%de=!@tDCl^0RU(hzstTX4GIn3(1%G_&bKWb!2 zM8n;;#l8$HP5Rj+f;j%yXeAN5BAo4W+!oD!wq(N%=Yk}xA-6@YqX@a&RK|@BNz$gb zBh;ncb3c5(h~d!%eMg%zX8XQ;P@#KjALeC+CMeE_w3?KIpSLvfO!}UexsvKo$WNWQ z;wwKVyvNwrU{)U1eJf*?^+9?g&+vljwf5T89uybGnpKKsbv{KDArnm8A<4?$F2(T3 ziIX$a1Ep&r8DH$}XP(#c;p%kdbKlw49SwJB?53)R3rE$zbeL?UHrz)#M}8ew@wtalowB>;4k$F% z!c#ezJ-)g3)2$k0hM)pDPoGPelqCb$xFaWhVe`-$uR>~eu9&a6I?J0sq*S^lB`0bws4x)F)jb&c&E&TKYRD_MQLr=1#Eo-Hp9<&vS9~p;8QT3_ioQd> zhyoJu#7!O@SvFFq$@8K?=Nrc*8Ur8EX1#a!`L zk9I)lF{Rn3fAVh4Jx!3LS{ZZ}H=?G zPU(-s($D5dZkU7`tQ6hq?in}NA_z70`L+2fU2+ndJf_f|4KcWCq!Qe>Tk0>+!SJbc z9#Ym4;{^FN08U&Gw{YjhIS`XeHM)w5Q}|oK5^rEP>Aadn)O$n4nACI8y!Z^6D}wyV zud~BHI%&{}K-uKqeHR0W7f-GrrJ*MiE3znN_iyyB=SqzrkLwPNgzI@gyvqx1wxWiQ zM>5Ov%ijHTP8`UzJm#ikF1mb1KJC5g3*s~tHCU}JV!2&p0^^Ga8gAsj2*a zdcF2V3L$?pw_b!QP|d?y?ul>`DjN`LGaHdik|$xwvo@G+z?L;O2|Q0Yc72~#Bksjy z@-PCkz>Mob;)^u&3dz@8*N~ii-FQ4%-)YRFZe&!UIMtb_C; zfP}GBIpp(7a&{8Ds>%&cXx zMaKRow#R-~P4%MSj_7;i9_(LrGys{M|b_bKuHRB7-grpI`4Eqq;3Lcn2)Wz820^t?KbV-j=#`_I8U?M zR8EFsTc!_^xATOH1P&4MmE*yAA3=c2Ai|0g=5&z{2OWxr)E@-b53)=b(?tv_f0d#!;X|+6oZhh< zxvK1**{0~5*;sVOGo6336^AGCVQ@cuov7ck)m~kujTzMmbR~;X^0^kr*SoeDaPzLB z622^giePBQ1PN4Yu`-T7>Ef%Xpein{p!d?d-@G%It)JCX+u*S9$(sJcg!{fGN$)i| z)*&eqE=u2_5!{GvLe46uTYrtsd?j@1jl!&#x?c<-NN>&{db7OBfxuchU zPd`8C$J*-`5Nx8t_D#C=diTXs+n(NjbWE4YSK&P@z^}o(uS6;DH?!RaGCtDMzgwSc z|0sEpPd@e(3G}+yzkbU@eScWlbd52wq1NEER~q2V3Aw_7z!lc}Y3$c~O?oz-T@8#K za{kAIW4%4|^w25u*!{BI)CFJ9>(x2`kUh@g+yk*qc^F}f5+h+NK04-aPELQ%ekjfA z|KR87{@gCd&3BE+4`;JRuiV8hXXORg;>C{LwEh8~tgjIZp}NFlYz-tl@Xb(9R38}G zXIKw(LS2gly^5=r5a@5#grkerKycrGqG z=3nqR#UeuNU`jee-MDOxGkI;%Kj?V2zn|%|&px;Bvr4EE!136l^1N$5Rd*;wHbPX> zcKjP(gS}eIll1%iCD`-f`;%2vASpa&2fa5IX~KyW!_toeMVV~r zqYRMaVfi0N8sOtl+y~OfiMdv6B()Q=9VBScBIBXiH5|Py?9g_6uDlDW0*&$gyBnnU zi$wUk(@G$V3E2$eMbvx{#OhJ!{wVN@Xbo2WodcfzzkQvzEAI@q?3*Bh<59_ z?_L5g8X0->SR;Z$2B0N?d-EdW{};zl)LTIlxmmp)Q0eTLZduTLp^%jD&S%Zns2?hO zcTCS5oAAs29{uE8ryKA)*EX;pzP4)zw|2;Uk$0Gt*D)6UAp@s%4HYw`J_67i@B#~s zG5nG>Z)OgW?ZC#~gH&X{y>e=z{NAvo5BdUqqlBN1U_#awZbz;}jU>HfV&2d^=c_LZ zUN)XDc9y56*Z@zgH7lds6v%F{GV|X}IK8#964}+bDRY&CqpVe*F|&d{ozwMu?i!oW zWZ3HGHkOVVE%ZINKPV=9IdaScz7!64mqKGBh`z~Kip9OYGPy#Jf+esd3`oTZx{`XA zR9u09%1@47Y?A%@K5q^7f)pE(MTPpkZZ^W)cAIdyi~w6>J<@)5D_Ct~G1MvLOj8)%N_@n=y-xMqR9P9L|W+=|352+NDz{ zZZuK~K`<;*v-h!gM+6)>+|voV(I@!Y9-jL9{BR^x&0`=-t(;97Q?1l#fPArz(}&pY z^@&~O^M>y{get+^YKnT6Rhzq_lT?#VY z4xx170{AbwW=2+FR_i=+%kG&oFqVD8br7a8K(2PukIZ+_sCh2|%b10)%dWz|vEwll&g*M8n zQm;OyHa(ISjw`rZ$5eh&xzH`&@T?XlDXeg$M$=L));5vRMGG~rduCyr*mC6$v6BT#)eBp8^_N4L;T<$l$` zd*bNXN!GM;@uiAtNyQ4?LbR8u`?T?F9V@Dd;KP!XPON$8x=+p*BLGn$Pqt9QsdL|N z=c>7ZwuC;+c124SWbmg3s5g1%elZGamT{x4;M6W%D|T-5o2msLMA=J%oFG}h%YDb5 z=+?H=1t3dk6P{PNWn?$&oXuF2{BYNp&iS%5pzO@QRzP0RhX6`MMUCR~2HE?k^|3$P+-WVU>2%E8R2ENTc|1GPHV*8?+)0~b`qK?` zY`GftEap_Vxa^|nFtTX4F0K)Dc+h7q^UaqMzu0kotGNUUMjY~DSNhu7PJoX+>f6o5 z4U4PJBe+?_&gVviWYFm%nEAYa8ZSNQ%Xg|{-Z;O|V>bJ@`e@8SD^`3)G`&JQZ>}0I za_uwfcD|8c8wgmZUz$B}5awnbOVy!^yTjv@;gSjH6jv+d>RO-WM38roB64;OAEEky zG?si+asxCeWoBgYj?TD?xr-%zz4nA3P)Hy_w01tLF*Jjw_65f!8T-3{ylbOuj*iKQpP{9!%60B*!V=%jlqF z%^@`XoVV`$xkv4pIC*K9;Ua+Mw)q$;yxsp85{e0{=~NaRG8xzXFO6cEdVN1R(-e4_ z7qk!fX8jV5RY2^}4sW*-|Fxhvp}+DwhZH};xsffQ9BDN2*`%+i3dcmx?{_Nk?zlDY zIJRN0xVC+X@+LQ_YD(u(X@;Kd5i=);y~W|wpd(rL$hifx zpB|gO`4KGfb0%LJoke=l;$$SO+5Iu7HdwFH5L z#AsZ1nQqy?HHf>BOJp&dK*$K%2(g*Bse$h+J0IXmbwCB34wY@S@17IRG=n{S0-Ewg zt4{FJt}H+Wf9NYduRg!2TO<#Z5rg{_4eRs`4+#Q0ICy1ZT8-sD0A=|L^lH$p;`gtm zHt$4R6xI5(1fr6W(Mj)`apZoSFj^!*bVcg={B>eDqB*iF|}i(aah=9(m$`1S3Y#h>3^t*3bJ$ zmLsdYo~dm+8GN)XnJdC*aY}@kH)f)M1Ed{#*fE$_8R>*23h*30{ljy(a9T5BFYQUY z)>_TiHv&7r$@u43h}CQn0yh=9LR5J+d~8ERCddaXY><1zaK2#+$>LDPK^wyg$gWtA zj3D?PK!RY;RZD}J(3_X5!36m3b{~ns#)xk-v6q0v73_M!6_)s|8Lj9cE#Tr9&9oq1 z_TnneG0_9?yu9WEIib_<%jdT<222OInc;(#kNu*uFLL)uwe>kpPt1%_F2MgDcCw(7s_fc9ED2hJHL(wSU_7X z*TaP?$MKqFI$x27LeZrgc|YvENsajC%8p^KVpEtj zI6SzfH&-hyODx!7=He4>xpXhN4IDtjk%^iI2GU=78JO_@;vh`qq}Dxw5N^fu$4J<^ z{SryR4LpGPp)@iWr_#Wn*N(VH%EQ1EoMU7`%YD;{&I&j%Q=BhF@W$Hew{HYe5ZZev zxrumK$i94?_)e*Kl{TbTVZv`f8+%z@MbJaXk9(+;kR0Ouo=Vc*Wp1q8B^R-ds^bgh z{yF_YRDj7)RajR$=la>M?|uP_nO-Bgx)Ukyr6!Es4DT4WRr~q}oD~A1xXkY`J$RGv zFQS74705-YE^|Ik2|4T2+-Q2V_1;;3U?o(5>cBAkSQly~d#YDNUUpGMfM?(F!94Ui z%oDKTxau)v<_Gg~6hLmZBRN>>Q1e|?=Rr;FnsY3@?xXuoW1!VNMvDxpa0D$hn#W9Q z)nTVz#rB{-FM`+_A$&7Er&{UOzEOOimKCI5!gVd5rw7U{ zwf|8(pzDS> zb4Ep_5q)9%hHcdmYCm75%Dt}i+1sixqNXUd4gsXaa>6-wsS)B$zU~wQ&O@t9ZM4H5 zWYRAs$BrGl69T@MRz;7veu(|YId1q@t}}WCFrih0CO zPIpU0(WuOFV2f<~5bmr-(9QqAiDFQ8e>L2;yNuWNfZ_EyH{nPFS)mZ6m~CtbP__vg zb#^%}QKtxgp4DOR}@>1~s z7c=fL97Kj$vm*FfCqVdt zwcS0S6)3Wy+l>c72H;0_4S23zk>lP4EGuH1Kn3XSt;HOKXGe0+Xa|Xa1%p%u=^Kp# z&PlJ}K8<6?Re%VvC zq{e5{j&M6_wrg_VJO)t|)JWENQ=8azIoMubGjO-8G~*Zve0V6AV4&^^*rqNf`=B$1 z>Z0x3)^@T#-2Xh_0UI0(8bh@-h8QIybj_za40LMm?kLPQaM3OM=sLt&Jr#-|Q@+&t zw?`!22M_6#VGCekKa9S8rcEza+Wp4ytty=Z;Ug%oD-d+--qR-}p(4SvCjbgcMY+GR z#+oq)UCEW=0T-XGniaji>Jbl@;1VGl5zHgpXgNtGW0&EWZ&~(N{ zKzuydK7EV`K((h97`W4<)N{I>uLjYU##GiB?Y~R8%W^NRH59$qpcGP$paoTIA{2JV zVm4uE|0v&K2k%)qHr>YaeH&XUFCD49tjb&ZeZ6PA8^adla14G60mX@w%&TtevM93! z#KuZnMgzX36&?5DGQ-Ho&TGzp7StZ}%%fk|fyfO7_VH|gB%2aV^E56!+bD88H_YHS zqn>)P@44Nf*|R-bF$m}4tURBP8<%+2byyp3;I~`rA8P7`tmi+V&rQK0Iac5* zbwY5N8rYJn+=Jzw*O2E&{AuU=r-gas)bA@WurMc=6(BE4F;ivN`W_G8KBoVstE|lu zQjW)G?CQUf|2y&$k#Z+uD5C&e!os}tlv-T1eY@|+4t$U*hj`87mcLN%Rr6*6ZvW0_ zpb9?6!?3HWJ^Y5k4YKw(;LuZYt%bc9XPu$@xm-7N^1WH>K83j&Ry_irrWl2tM>{Sa z(x4Z2#ACw$RYh&MEt~{=V$tFuvnqZb@|gm;mq{NH0Y73ioAlyepHpOQY9~x-|k8y_Q2?BdZ56UMU`P$VH51-pmco zkZ_qb8;hI_8LIjm&Ih#`SP8A{88S=~YR@=LXK-b`y-5^#R|7OMus@^~QG*0kIq(W) zgZX7Z`&%ZZv6%sPkX$X`@Cv;LoD6ZAs^GlXqe4=ERLmrzv+6F0Et03-A2kbSPvmcw8UCeb0z)}^WhtM+`mW|Lhe}lD7JLCjO8AxG=#H1IOf8mUnr~XO(Cy#Z%kvsa8VygK8SXN#eLwzEQG7*Uj3@z>&SuV2JTeF@b zwn^oF|05+nprie~EC7z#8d0*J+U}WMFDx6^=K$19(n`Cs5`*o~Ki%)qqiTLVYhWIT z4TmubJ0jaoZyJItQWV!x4~>~(TszYHk^D*ZVC7Qvxb36pqCd{ouO?MjbLubhFh$do zCp-%BIqwYmj4a&}4|iV{@f3G=B~vu2fRWffo=h9a)FDT+Vzjgz-U0Wqdm|86nR}ml zqOKD#G`gxPGeN($*;=U}LpK}(Sz2H52u9n>M;eNlp%6+!n$@&xC6R}(ga2Ni*24q> z2K^2peRbe$+0sDgwcV8`Hk0R@ExgtT*Gq0J`XC9Bxj18ttk*SWqK2WEglNE(Z6Ek| z@ZR(szO#HyWXbkb2P9V9bH{N~$jEcHVGO)(&FMdYP1!nV<7TRW0_oWg`2PZIV)MfP zfxJ;h=+fv2#2ISDApd_Cas+=~Kh(H52i)FpzCRlyhx)UJ54ag6{SjYXS2(UBxev0?pn)(5!{tH9b8!TGNt$bl8c< z6ta!Nw4OqR&)65PB$< zavzF&Vj(|FoJGJ7#6o zNvf;FHz6pIuMvQ?9jA=F!yn3aWk`}}y|2s(je1>%S<@90yHCyhPN^7tAh{R$+v&}x zS;9j6^WOR7tZT;$17PWvsHNYw`LBm9H5Ny+XPC|W=Xlw~+rK!Q6s^Q{`$mNgD!0b= z={wA{or@ID#ts#|?YNDno-7DhU~X2Fni*sq_K27%3j^R;o9%uY-gvP0;Yku{Q%`+&(zk*HT;IvXfbiKdeMqiVC}GYDD23UEQU}ye=q~Ow?+Mi=Pez zow)Wnxzbkb1ILjz43MkVVKljluKBHRC|27H@-{tK>-5vjHkAf`sRY0MPy(=IwLGU* z3RTtiEnas{KOt-eZ$75-R$7G9EuBSqCfxxI-^=KnB5IK<$4axBw1}9!fJq#>v^Fi1 z4q-y04>@&1mr>#dwX1=phBEl`B*3GMk$(9YCBnLras@~Sn|2aQ zAl7E-8WU&t{?k8oWBc{*V6;g=RE$!iBDy_ot6rHe&gGWe3Ajp(!47kn2-hG%J>F;iZKME|!eSSn|_z-{o%i z!~8!F{MKj|)|ZJF$}@9Z%NH&gO`a?@bdttQ6C)pwT0DYtI=8)?v$51PLUeT-oc?w_ zSGw1f{sxryB_l?^^tgVc>ofUkyh&_RL%|0)R7`vU^Gj}%@mogQkMj)qIlLW!+Obpv z3Ru8^p5^cKZVmEOhRqJ{^{AVASqacP0}|t=0&y(u0m^64B_kG+^jfO~>XC9jbIkkV zcAeRz(G9({OAV--;r`l*c}LxE%EE-f#}Ww+VRVDqz*O*^OEM(8oSIU1_j?Xe4u3tQ z^VuE{*7ZhC@!xD256q$!kCXojb`i%R07Di?*&?@(Xi^bf7ivbf0lXMyjMiV)qEX+- z*3B5OxJkwP`i6M<6vS?D@}hJd?VgqN_ls*$$ZJFy6MkbTc_ z&g^a`)842(9hjR{Zx^Rf);~MH+7ZYE_e&8mVB$yx%hlabB(j-~&J>-A<^MPL*zZvH zamyn`@f6|J@e(Cwtf9Oq74C{bQl#9TO?-{pv4G)Kp$Hi|)OEaYAvYxOb0ZJRWL3YJdciE#XZ zQsTZu?JxT_m!Y}efgDxiUj9Bn8WwY=X0bqE6@zE~(+dS8@y8}&=PSi{=&*(X@zLX) z=tsI`UsLI6QcXXk8EM%F>}2#BDL-{?aHy+Y&uR<@Nn+qT!jA0%cI_vO4gR~t#1`pE zZDlgR%$-4VG_zHj{!_czXf9&4ze%4ROw^!33LbjgGR^){k5!)h+>8H&gI7-%xgTPrfSY zyE3F+9Qb`sMX5jQ%=>Jtt5p9htu-JTAdUEUt^`#Cql4K56EQ^)7=LJX)#P3^rMAqC zyIKweO}WK}f{W{pz~(V&YwAxsh@^qd{K)#MHy?_PdX|NxIdaN=RU?5EKos3me?8`1 z8i>V=%WWw?tmy|X!R2VI{}_`BoQfyW^kBHg%l2|g`XpL!TXwpI#WU6+;uWC`)yz|> zRQvCP{uNUin}AEov0I?1^76~$3Dtp$H8YO`8Rn9hG+ZhObupr+67rzKWBKoBk0y@r z>5p4O41#oy`|b*R^xL8Ezl`3mC5>=5VLm<0)1m}cBZd{!fQNeBq;iSohs9oZUf>`I zCsS(BC>#EZ&-wJ%hl4L&X$VL5=OK?yoJ{`<1z^((JbY_k!3g@qB*+MHup!2h0&l<>fTc!NaoM_`Pl63 znO}}6`z|IEY@g4N0hy(C;!7=6Eda_aw%z6s=MVH*&AzTCCXB{gq|i54VSVSmdz?aA zFERsDADWOjd4M;w0(%aBuEBWXv|0Y(*_bTGmL*V$dy}X!I_CN; zcC`B*(Fq}aH&&c&fR2^;SPeaK)^3o+e%ZAgtLAyPpf;9!tbTV+zN+#UjA9L@NQ#Z$ zpGa7nOS&rNjN`(M6H-qyH%Kky_|e0#YZhW{W$3!)Qv-mL5?t4`_t-y_;!r&>AV0mM zUI6>5Wy-%UpPLZLaiQ2c*0}_8PH9-`w=J~p&=X)u4Pa3g1Bnw#DgnGDR|8OBRw zwER{;>Ur6+O5(}i{clQs3MaN{R;RBIF=zK7!u=d>VI{&=yUbhP7KTpM-V)WQI-tpg zQL&AK8D6x&mmL;EEU3%s+hqtCp6D4Nhk^;H^5o=}N zF@J{rhR^!i))r+(Gk+vP?@*fx;f(Svh5v=LQycg_7MIH_E54P+*Refs`r~*ipi@Kl z?G8IK?A~w|0;o&s-A(>Gf7raR%z*X31-$!pl=Rd~(Y^J?*8yeb4m<;6wz0ToNi`%= zR5Tc`NcMMvBrItlEAaL(+dB%87qA&`4>>c2;g{I$7eYR-b_c!uuFizASt|Uqxoh|9 zb`Hvx6l!@@?aCJ28T-R0ALjR4w>IRTM61u-eo8Pc%eXYXLJy$Xa!-E)2g2V|L!RU% z6*p=f?C9Y409&h~s;5gtosO#9b1ia1pDQ$f+bIMkJrBoTuM%C6jrd!*6U*Ty02+7W zL-0Zsz`bhiRy#V9fOTz%#ZU2ECJ9p}B{fC(31mK2m_9~q;P*Tpf2E3z22oAY=0#Sc z2K0>?-X8X^BTW^dv}SxzbmvEr22A|fUEtm@`1a`G759B`fyH_=o*wIzC%SXz>cDQ0 zVw{zur56x%DYw9f$C9d9l!%kCI&I{29*(p{A|fvDLb70C{|`IElHzIK zYMBheQKDYo{adBU6I+Fq7{i%eywUu#pUGCj3nl5+aeJDE(pcNdHFy=K{`oPPgA^;6Y#{O=hoQW3^Cc+`p{N=pNHI!nqq&LwQ<_Tx<5U3@z2ZRFo ztmSKB3Tz-T)O-48nRhLNAodUzE`0R#!I=cTgG;TALfIRPdo7OI9HowfyV|IcbJ&-L z4tx&igs5Lhehr49T_y^?RIhcJ4q$N2zK=i-w=mK5A-I$vw{*(X1cs;_bhF>pBwVf| zk^dDUQ5%ATfW@DTdbnH%)UL7wW&5Df81@NfWM_P=kCX&>9BI!=m@*o%wXm4KuweU} zwoZ#<(&`}WAKG#O8`@z-gLKWfT+yMVX(|{3?w+le`N;&zY%4TbDQaK$17@8PQ8V2o zppu6SA9y&IT~f>set&mpb0yx|Fo~PW7Gq53SZKYN9lnLz*4vg3G7n5ll?uyak9xm! zM6=$AvmVj23;C=+{*L~o-1beM0;rG;)gXC_&B#}{%IfdLH0U1VIQ$= zK@_5lkVN|ps->lmSf98m!P$(CR>e6>c?iD~FmqIwd$>{DYWHV!%js^f$sOgVSvcB% zf=_H5z62K4NknYSagQblY>^fPsv7m8d3&}_j1(K=Wq|pdVoL!62M!s~4}>J?7(DfZ zP@|J|^~5?B(ODje-I~;>IqNZPoK4T_mM`It#kOlV+D;4PJ?v zjXy`*+BA-rKu8b2rfJ7Y`h~X{kP+f2z0k&uL=0-5Gr6vJKOYxdKP1SHsv$$wjut=M zAu~3Nyhl|<;gkV`64xswG@cOz7AYzGs$G@*RA zD9~+6zrDuZ6fv=nJ+rur+7$a;ok7MkHXL@Q1jo%oRx`nt-S0hU6qiou-B=B2L*R?o zEh?f+&?$NR@B-2Q3apuAUwa=VUI~ZwK|$ZdH1v`4qDB z+O0eo;d@VsEE7REN_1*IG8ct8biLC8FQ0yDEF29ok>J632 zasJT{J8mz3fk}ej*hfS5*gpOogLy%*K=7|}jCdXWo zuhzu5O*t5FQR$Ms)|~J%X>#E6ecrfkDx4`ER~YUaoQ|fSG*7)e4Jo``pR_G{PA1O^ zoY*v=l2cm|xt!G!iKSC=EX2fmN`QC#{n8r;n2Xj^i)g~U>J8kS-crQQQ`#U>KRbIE zuyEtSQk;h}Q_!?a=i)+iJ@%6z(Y!Be##drlNHn!@P1WjNgmMl$Aog}^GyBV=2{O6AbpX^-=f z>B*%;BT4|m?8~H-vTIHps-Z&$XsJK-0BPJRKa9*wW3ZGP`FvL-FaCn5hdJ4*CAYu_*G3@9gu@{YKB4T;3= zgq)*tQ@$~6LZ22ditz)&*?c7Tw3z-^b8i(L$F^(>Zdqh8*kWd8D2thyS+Z4Pw8boo znOU-!nJs2!i%QJQ%=F6J`<#1U_v<^(82!<$zpSiEWv;a{R>X{mIm=3PpAVTQ)@Z}M zPTpIQX4tj%`_heBJh;1MVJ`vzxfij#D6l9wClWiK7PUjboGNVKIl)D$72N-wcM5XL z<4vgh$^&mgIZPG(FJ!CIfai}+&cB(}2vU~*=~cNj4>B;W%<#e?h^eT-nOG2?VYiIQ zODJrMGJcp-`hLvt2ZVJuq@7%W6OBs|xk0tPxT3!dQ1UM_yA6@l z`{ekw{X;f@T+)R=6zhiq!0ODh%AzTG_XHS%W_D!s0eSwoY;@0jr}>LH2mPQJyte@H zABrI7p(`vhN{yKjE{}qX2Mb}RFGq!f%+~AOE#sWj-Fvw;2bf0; z^L>orWwV!mINU(rn?h_Fw~N2aIMt1NaQ`wHz^ytj&O%R14{Ke>;himJ9(~5Qq4{p# zxfm1R_*w({$MHq@?f5oRBSnC94`IM9eegX&fcR_{r<`BEb6j9Lw8dW^`l+2r2}Y3n z<{<1tJg$l_hnJ^dqFDVeY>L@ii6>-&zL`?(+JxF=LM?&Ft>?|nJ zFEwB2XQrs6`>IHQC`>eZ&4;185T_= zZNJ+r7^d)v;hN#(K7E}U6GjQtD_**b&tB~+D8Od^qpPD8G6v=^^Ua9H{p)9We9zJ^ z7xl80e_Hoj&kVns|I8A*8H@oxYx(ObiJc?1>Q){-~YSu$jz%6@lOQEW8o z*&8_wravZ~)c4w|7z$}(>rrsh|M77CPCW<~;mk&&TPM+Lt#DQ#_B@^)Y_u8P zs`E1!2oFcKGM`k3)Vcg}ZG?c$3&>HSqU?T@&(h6z<8y-u+2H#IsC~_yEFgU_LK;mH zHC$H5pI^&tDT)1Ze=9UYy9Nm+V#A~+0~^3Vq%A?}Kj5K=?0#sbLL52X5EBKNEG_=c`{kr~am`c&4RVj)d> zq&wrB>pVwsFSuG0gRC zvLe)3hPt7oJJzo!Qz4j3Pb3+JLujd3SJ41|t&;N+fl0s=Q^}Hyvx`6EKKvr@3BuXS zD2+a(^ql0>&4}ITkdSorX0+2QLl$k*k~oN_-0kbVRH`C}kjsf=otw+Hzy3s{xMxnI z2xnNZ$JX+IJYb`Scvo|*!TtgRq>KY~kRne08dqjwr{njN6Mk6CcpN`eg2QyTV zk;|V8M10jo*_yk#E3tu$oTHt5kVNA<5qpor`Cpi1&y$I{@ZenU-)vq6_qY12z%w|n zs}5c{a6S?Pj66nrDGI$!f2)cg@~&gq(;)D3i;Y4R4#fRawIr#V^{0#}n)p8e{{DC7 zuK$xm4R+K2i`wh|qc3dk{kPtZ7%+j)EYSKVa`x<;bm{g3O9 z1xv!7X)W5-g62LuB@6%K;X8*c7^T3V) z*JWi>8UNE(BDeX_NSKc>o!@F)QS94CW8(j{rzQm2#@et33&pD5vYI$OK>^%DLin|l zZuffAvFBSEnwqirjKt!qBftulwmPw}nDf)!f8#PEQSvx^=zaHLNxguv&Z-q_v`9OS zW{2*#MC_ui+EcXW$`iQvlIibgVH`QH{Ib+l23P$ISyrQfE@)2YIL6M2RP}ZVae89L zY+ey1C|M3h{2$TQVx4tLP}1*?67uro$3Q)c-rYG4&9(-u;U#eNaLmM>r827CxVD0z zA)T0fIJTne)8G3pIaTYSwf=*>o?4N0#7Fq*?-r&6Vm{;sWe4 z3U-P&?Z^8-8U9j2v&YtK*$zn`RFckmCqdFi;c6!HiY>EnA`4112ESd<<28t*@=&?< z(-*K1Qu-GT_WqWBhH}bc1pPyj=2QQjd}@srLw(9KnnC?Aa7wtd^*021kNUo2in9wd z7gmUm3!!v6eSceH>+jH_`olW|jqCSN)?o(LAa8sJf3VC<>+jSt8Ij4aKLls`A8LOK z&tMrFuuM&sE;L8AU8BHh^ll)yvF*2#s_6)2aFnl${7f#{iw3o%LZEh@RQF-Iq^amwOTM!H&h3t1 zPnYON+c7`=q3>(tqYS{zD6}|W>_wFjbkdYQua#agR!9THZ8{g4?vX+F&KU*pq!1J+ zSPWCo;6y*~q54p9Q!OC;UhBWQTyWuacZCho^<>}wQ|1Ql8lh0g{8Q*QGS@@(r-8(v zZ2F%Z_AoD4OmqiamRtx`Ih%ci_Lh$Mr`bB+t>iYntw z`m4%PtE0gt8`sM}X>mgVKUqIuW##!S+bgeAX31^&!d%zdgSOYut*6*=%ibT|7_mji zGKXu%hR=LIEx_V4LnupO#yXki4*#y8D+8@;mQtJU6p){6{>@6Q8H(ec-a*(BrSbf> z)Pi?TUE=P&z7}jlTa(O5V}VK`370STD^-<9xC!0-Lmh=7$ybUTsOThpl1HrbVHvv5 z2=kXY6yKw>P+MCu!rNlELm_Bn66C_@L*c}HNp-TN`pSH@`d%i^Mn_F*7OXgZap%v1 zOo^PFoJ=OT>{<5!dp}k#BHp%a3?if@+A902X8@}nnY2qZ?e&agC^qL{Q1GmnDZ*(*UqAmw>EbY z{bk$vCFxpTi_Ptd#}$)DT;eh3urL%p?8*IvfyU<2`8?{{giZUxUG4EfJCa(W@6RHE z>ZJyU)D+Y5NT%pJ+@_gBmm%2`Z0sG{8<)pK>0H9O2inZE?KHiL4vpFJ1aShuI5f!3 z+Tz(&^rQK^ErJjq#;T#IwiriK+T)Y!eE%;p(Q{0ghpDyc1jP+`Ia3T8d9z1ChIMnScF@3H7zC-(ybN`p_$|=;+sV^c{nJk0#JnO8CM?A=c5kJU*rT41WH_hL4 z4dBymTxQkd$xbzzURVEemqxbx)`_hTKPZx=4vG`k<|U-Wb1VF7+*2GdAq7iYAs>Km z9mi!`@n(loGr$>N+;R*(W9fJygS71$F@cZo@dcJL--@liB+~A{`*Sx{8WoJvQ{qYvj)0BKwiqnd5v;3lN}0Ys~)Bon?SPFl+td~Lb1kzR(zS| zy~?1o1Ps#>g*GmZ2&teWQiGjonKb=cYH#Fz#NnR>+>iXM8QV5AFY zS<&5)5|aiQB7Q`3_`H(4jj1^5lQU|bg*MA$ogH)A~B$DQ(?ma1v? zLxkgGr~B#mxH^pvRFM33lg!=z*_}Wl2Rki9oGpN1R7Bf8X=SlJvWSp63^@B~N}p2e zhX}4=Tnv{TEoPY)nsHQrZsX{&R25mf0_LR&Lr+1C;~;l6TFVZka6ztdNJD>@)`pL} z0S3A)E9;UnF66>i3@0@TTavBtLMnaZ+?makmoDVfPXA^S+M!GRQD}-z z`9YEdp>o76R~~40twe2r46T?&{*yw}M%h#b-qRiaSN6x-GzLl>NZaDBSKxQcCB$uo zxen-j!bRxs@b|t@>~h1;d3X}&++;shC^3^CQj-=weYWj}ahD}H94ivOU?=)0ZV8Q# zdOe$lM`Yjm1upm(Kg2=Gx1vwyd9T9IV zG#EdlGbKbFgkXgiLmGdQZ5rNuAdy$g^_a7%hmjNw7kI~S72ncs)Uv{Bkz6@ zs^m#KI+tg?1$#WiQZ+;;LQ>f`$&E^{Q)uKRB$6l=6TKwk$q^@f2i)msi2TjQfJrm$u`9Bl3u>{b7d z^Ap1fMMy)r1=*&3R){flyCiIiUJ0WDF>wZ@c7!KkhZJPjw%5ySM3J>lCq|FPW>2X} zNE7x=kL~^C0eh#=gE3s3VEcaS^xmWXWT>l*Rs{D-L4mON-F(c7Mt^z&~wvAc-jn)=gb{9!|@OeiWXJS^_X zt+ww?G#q+nq~zA-{5>qKoLtX|>En~xw7pmqj z8Cpe8_#%VqJc?~2>Mva!q{K|;zB#lNXOmk=H6=T#U|_D4qD&yAszo+R0lW9dN`yD6 z0y9yn?j}UW58iD{do}}kr8h5`Df4fH1apB&EbksT7LvJ&nkuIHABfmTeC{P8>YRsr z>ox?d^9ftn3UTw8a|X_`q)x=U>+(umE?rT-d2GzTvaGW1`vv?nT&Q zl?{&t45AKb7Jfd_SQTT)y>FyrjoG#WwKgt8_d)~5h zYUbW9D;DfKc6b{j;X8}Z{p!RBlP_yMljRnbXm~-sa*)yA`zk1+IT}(6*-i{CoT1^PZ9oIgbd3q@8!+kU~PwNe5H>t}4e5 zV=>M1bNu)QN|)+g?{g#i)g4ZC|E`nsy>Gwr`o3y%-n1(D<0va^uR2Tva!Q(73SrIW z<=0GI>0)!Qax5Gx-r9}0PwJ7x7jOpVz1ik#ZQ^yJI@Hg~C5G*1LZ&#h)4g*NAiRQ` zC$r48Mdt7T(uM4+g3*kDF(yUJ=+Vf3|rv(4<5uIk!n#-)i zvc}~-`C87Dw1HmRMeLfFL028RK}0qAp&oR*)HLYvmJr3o{zZRn>Kk->8xigZk@_R& z=#R6dmFRGj69cf1s%$y?bo=_PwX)OPqjS%(eOPYV^NOGb-$D=d=E>7??qu%g)NK6t zU3obxX4yDN)EJjOZy(?#f2-eg?#V)5Z#%8-HCX%df!5QKi0(Y0(ZXZEVCq%iOY3rY zs`3?|+iMbcIGKj`^4;(m<-^s?c{}kTohNiu<;@A;Qq%b!dhJ?5|CdF`ZX1?|ib?xA zkKuO1cs&Sk;N0TLU}9jcvB_2{`x3I!E{0-#p3;OU@uFktDKl4ogAva;)1R8O_q5w# z6~D5_(Y_LETKGyL0gAGv);PI8)8iTjys44pFSZv{(qEhiWY2n@cw4^MNYy&_$V(57py7kg`8vt757X4%K*~gh=5&NquaIbHA}b5JOcwKAPKYxMU}Z){6zs-dh=ovkx5oP9cM? zq*y6Z)xkD}`evhR_PKZC$9JZc=&-z8jSW#Ta5}tG63C*CVm+vmD;w(v7p>~y3a~rG z3xl~Yfj-#t#h4t6SBrjXlH)_dzDi}cdHCUx-#U%GPHC`03s7dy68Vc!0%x*T)`!LIDrVV)9sDZp2IV5UVSm2G)-6m3MJ9h2HVE{lBnqHh* z3xws+=)-Z|L1Cz>;Iym3Z{f)!(<(>2NP&fqu^T{)XPQtmu5gPMo>>uZW&GF@7ri>9 z2|6^h_yLKGT#qhR$BFsAsb0JsuNj(81?Dp65=`;J`{0(99cY5TNXVYfTAh`H`T=%S zOiK(J)5y&94VdutF1+6+w_0W8Bq)43p8*pt9hZN`eaZ)kXY*HW7`^MdW;-T!I~0?} zQ^`Ct5?{lfPT*{)l74!q`9xB7jv)c%VrZ31&}y@r6GOi*&^)G+L7FrdP zFSXm>uITd!^~(~XejdhSR*l+|S>J|fzYZ%VD}_Oqrk7+_JIk4w{WamHSYMCL0k}n6 z{l%dikh4Is&t+V@xYsG=z-n=~aKGQAFp5mRj4pc9``jc#CotbQY>DzUk$wqRCt@Nv-87rvZ&H3_s#VkK#M zx4Y7=Q@wgOAmdK+NPtZPdlEvJGpFIJkLsv4oud+ux}QM*<{n;PSXw!rG@Ch+mMyz! zohQDgT7`rt2j49=gg}01-41c>~f~$>ka5G-#R*~TG_ZjgU zwrUMnRdraTv|wuCx@y59n1LnBh>YrwssBW5H-fo_Yz2-?T|ba!`yc-*w5Yz=A=~HG zvOOMFH`>?=Sj1?}XJFp@c%7mdnGt7Kw<|p#!)eS|?!fu-uM zaN|4v6R@!s=3D{_RHK!!bz zkU!RUkRR&Wr%4OvT}60GqB?3}@K$`dpNBvZ23p4Br@} zFV(;7ZGQh5FqDj7C*!^CUc-`Yfb9vXt*D*Of(8!E<7Jpesy6s$?X#$2kHs`w2HsPV zqw2QHA^_y{>M!L*%3==gA-E(3_GePHBF2!@I_`!bO}}t8BhTU8&g+MK#lXm2{IrVx z>th+an-(&+BG-L(;I+e0Z!iCV(gNz_{a)R*!Ku)#CYfec;g`c>D{@?8;8AR7yFMWD zD9ob9ORN$C*T5w(LKl$}4#NsTvNw{c4pX?;(v#3mj)RRk`q!Oi4LD z8`owoNM1QMRMUR!h?`+Y4LgI;DZj=+SIV={@%beMh{RRsVH-vFpGWl$0ySn7)#%0m z68Sx_r$*_`6bh&drg)ifLA@#9k^MeLU&)B1Ss^PbO#YYVx6N zvauw|(P_n3bq*^$-l&^SM+k@_%;=KR%UZ(isG4n(d=$#E3Xyby4?}|qFKWkK%V|F} zTNham8o0C-56y`@%p@+OxoJ_pF-6P;6%%S9DA;)~pL%zwuO{ItB$%CKYS?|wh87w@ zP~@Sm{DPe)gLw&~3cQeHK*14VdE+RDnAm^+w6oa!8wBuVt$V<}?uBrn=ffKUl1E|8 zv(e#<7s4wU8!1yNr-=Yr@i~Akug!>T*1XZaKXiOKU%b$vcFrU__hUlDqlx+Op}!X5 z4HO#^5QfqTd1oE5#ma$axhy?~kaF85{W1Cyw+KMbGXvZ%26?=+jwnRIQ;!=lw zcB~hg<-|gN0-Rh}-Lm1qDLFIn@<@O8>VmA!wE=spF=f`!!s(m!M=xI&K1UUcc;>3l zO&48QkZbcmXV@(3g8qb=5uJ-v#C&YX7(vEuczp;XG0Y}S{OkpM!x@ruYgpk8gpMpu zj@|W+DFsf|HCC_X@YPB49*4Kk2r9Ax2>?%3EvYD8F1 z45ky>>)Z~|Q2!l0A6l$E4o+9B@_jw2@k*28A(VoQ(XL%vBbzEq)|`io6eBzN5cA@{ zYJah*;wo7x5Q@x~iicY=_qsq18yHADh;Po)cQ?|5J|we1IZA+o)WD;Z$(9E_1qKM9 zw!h)j)CDftcS0m_5s3rCr20E`EUFCLlp`^KuZ#@AS|unELtkf>As(MB-jx0O5bSUC z30#+1ARlPwE$#y|aiHYz6Rfx*6Tafhh&L}l7VpUUhWYnYm_T~OH5g*R=LM-EMM$~9 zvrKAYmDu>pS`ND9CQPuLboXep_`TQGkSiQkx_l7K(+VUKnVS_PIjqRU3_g(&SC7_L zLnE?G{~#tBQxJg^A(orFR6a3g7DF)P*-EW@z%1XZ{l40oLANc|52gG@Qp`?^KNJNg zqKk%#lWz?|YHE!T9FcxuDH(;CI1+!5k%%q|l;$ns`w;XANZsQ;6`+CtG;6;CW3{Xe z9rd`_2&;A!wthl}q(iS9SE9IXP-uI^OB)gqXzDH`qD_9XB=e1&MTp~ItdeaA!zucM zdyAVYV|vsSP@FDnC)Z&_DUZ^E0-ZGqPO_;23h$PEROVZGN<!~!0 zy+KGs+9GcfP)^0?){%HMY@5koRf?AcvMj_2b>87Tg^)w@FUdlav z>Zv_dr#@kb1;sI2yA>E4e$z0bBpk?bzp)h``$j=_#+XaOR|WSW5$nq#v6Mnj&s*B3 z%Lp>AK!pUd$J3O-gl8IUx4(f9@uro(VGy;54I|G<`1Ym5RT} zw(Vh}BwpJ;kr#TF++u4p3@1$pO{yg7W8XxtlD=TOfzmq2*x%LCHGi?d-ofSh%s!?2 z2%um)P4O|b1>~6trLNv#=Gge7*Q}cD#IeGtw0wGY^tk+q9C0Xk<)~&q?6cX-eqDbH z6N1TKTaD_dG5M}K9Zu4g64jolLc~ttw`HZ5ZWkfKz;H~fE(2%1yIpRPKB@Zm4b(=@ zWl~X*KGLdXn{9kE;#-!rMMKkb=#RiN<(+=;Xiv8;laQPy!!tj#@-1y@;ujU&^JzK8 zm=9uoPa5iA&4lWURD5?G^fa;ORmnn{lZA|t%e%Xp7z6{3r{-WFN#vfOjF8#RIK)LA zGHwQi#~F?hNOOMT{)Q{iJdyqr*unzn6YXUg5U7J}^m+eU_!9$wsFWeiLaUMwMxlR` zwdqa`@@!I>-pi$^IYF@MM$UCZH`RxWXY`3D<=u3guTUIjuIdVt?4C9UM!wKlD8afF z!w)>nePtOJJOfnc{*P)u7UCCjK$?0PgT-Ds;^#c3l)br6GGMFy)H zl^%PoJ{i_r^Y}%@EZLx=p-lqH$50{`+>O2%1|p1$^dtCKRZHTfAuM%S)N9t);txg1 zi>5X=6^rX+a+{EDHb5iTIS>Xx5=_@l0;=K1f&F1)RgTg<$x58Z0v4^G!I1#&{Z@hunc*G*$746zb&1&f#fwZ zWRO>>FKrk+;GR8;w?+qg0oYD*yqoWQBgNzL8!!!bt3(-EQL7~-D@2Xs9(ZP{wazSX z-@B%}80*pO7Lm}sOnMKCf3#V$cD3CanqsQR2stJ!PuU0h@f9H7M7X8!p8w#?_d$ZPU_u~SH&T;Oz*_=BC2YkW!a8Aeq{|D2K;=kC?v{bo`;r+T zY~sAM(rS0!!TCbW@<3R30`g1=x$DO2Ps|Gaq1G1u-|M98L?Q7s#;=uehngGJgnEXC zpqB6iX0H|P(Al5$G~MUY@{+lJ=vNACOM`cK*{jb+41u{bRgc!>Y{dUSo3N&tHZwQ(=aWI=uBf@A8v&z~KX573 z0TR3i^S{0<0BRCO^al?LHEA+L^i6~R|It?Y7Zp+8pgtR$gKsL7`ed=bhayRLiuzx7 zwNW#|s1hZ^)TRn8M$60!`fc^}W~?~V;QdG6cDe+A4?3wY$NSi~+}vC=4yWUJ0wpD- z2Ane`ur$UOUa}n9x8_^;Z6}1jQTuT$V2S4_HYYmL>)C%uC{|gtIl1}vvmnXIXcx-a6hH>B5 zZ+t*R6RSel=Ea!w`PKdcyZ*f7<Z&de0hDC@HU*x5kvcS1VLj;V|xM8vCVF6)#ZER zR#R8+K5)R5DX%r3raWG4Thg@1nBge4w|?pG%bt4Zf+hMqk-_)L{kH4^J|N6TF}cPo z!zbzI#|;qCnpJB~+??~vn);2mPE3NQ0O5UQ0RRbUv_~d?DTebiYGS2!ZekC|?kgJY z{GppC))ofwi@;P>twR|6W?3RbMv&MQ5g@Us+=B5wdQPKUd&d?Lu5mZj^8A+wO14N4 zazEX*8rQc?s7QP+U!s?NiGI3rHiv!1>yxE9O>O`vtBRrd@vu}a{dsBa)udH;1N@?v z-#YD(=sGRr-z%Mwga6+F?Am~Fl746gUo&2%Ahh(HQ#(vh6h4PO2`T&pmVt-QoW46! z!?GR1V!fqlt8})<+tb=WHE`hT;S9|d20Sce_SJDUP2Bpjnr{ifX)9vx=hj_EdH`L8 zStV%2Ye_-zvHPlM`3|e~VA;o5X7_0hx>@z9$DqT63;ESE$*oN9hP~POs00L!cN&|` zUJIa{chk~mb}m>Pu%Dn;8ku4v=M$Y$F}lT+a-&#@6_>K?Xm7xEH(PfZgNxex;8E!g z*uuyfxNF`RYoMzyjg_aRMo0gsm$M`Cu8^p3*AvN_ zlb;BgkT+O@&=txL7NtLyLgpO&wiG-1Y>P*ViHTVd`8>gqgX`0V_)RA=BD|lsi276e zOG-;GI^LGuO#}OH)_Yr zO)BMa>>shPNeEcCKcQ*lMb#5Q==9wN09YvQvR zhjLT;t(Sflb2pic;i~<-Y~B{)mym2Xl2njP8F@11aW&A=O5wHv=_^`hWVoMlC3tHI zcs@GtUX8Mqlx@$$UR8R($Php-sNa%2#5+LISxVI&8%&WNBswG;6UgI0%M>;q*NlAa zK?2+#aRvnW{Bn(dn103n^+x65v?5NvI*)~b$=C&DBTE4j_4`xBe^=GM1g{1q;{i`KO5ba=^=lYce-5@-V&W%xB1g z>z6aI!>{*jC5zwo-gE0>!8CJ2AZXdR%)0Dx?`ZQ`j(IC~ix5so3!awupL2@T=<$qYY=T=KHFqs4 zF26~dP+N=W(d)o2T2(MeC*X0)0?+w;*ys&TOO8+3E_BDAb>YWYYu>=TXsitBrw} zl1ESRB9p|}7m7irnngG9d&!q_(+K&sO8Bk{y5F7lb-caUV*DE9TzdpHe7!&a0xTz# z)KF6^_8e?CYTE`Ikw}-#5UTS|NR*;2_X)l`Y#sLK>q>(TLV>r32{l*A7q^$2xrkk^(-0o+j@C}+N;6Ji^k34tcD}Jmdz%x98Ig< zjo?=O_^R-zx5MImdo%E|xQsh|{6eGjZdyaaSFuSrsb9*M*!(Zs_6L*MeHk)Zq>@pU z=o|F!gHqb^$Li=FgFns9%`FB+GDZ??%Q5GrMT{vbhuU}8w%cmXARu9iUSMM9Ha9od zomysVpk$^>iIM!+3HjWv6(PbFMu?x-D}U_p#S%8|D<4F%F zMI|Dj?4=Ac(W0cHDqYvTohw$MQd{-mi<#n~>w~RI8HSkevcrOh$$28fijhq?W;!E}u`-NGUGbytu?~YGQ?@YN3<*jy2X#v)huE`o zb9j}CGT86aC3afyU0N=Qk^%1Ax9!bH&?A`Ls~hmM_s)+tKMr^9TejY2&ZBrK!<(4) z;q+Vto|cKQgLBUaR1#M4ogQf=vu;iM)LoWz)}*6E~Hc(z&IfKq(7mi!D^MyCruceF!$#wtr?Oa`_npRUaLUe%U<2}(D& zu1?@4OmqnF2G|bhn$u5eh~W|*q@=JKhvF7Q3zBhjYwMjzGj+Y(tyR8*a7D!bq^LIp zqn{#8MxoI;^;h!tnIJC0K~r@sjI8{z36LT0n^`2g=_oI+pTn%0MzQ!k*k|b_dOe4o9U(?a<`^UW(JGm1w$gB1$OUq!Q!R)stk7%W zq(wPuH+IK)9U2)?Su!y-E!{GPAFYiKXf=s_fswJ4Ie=Gl+DI&o7UVqFgatt_aGvAS zLUfy-=$aTKt4Vt@L+>(CQi@w!!mCZZ8>)`}-Y4ygcTkM(W6a@zBm)A$P0NlrF{?k?- ziSm|TgS^>M0?UiQY|qe*UY3msdOP>U)VQaMHg9 zT>rxmv%z8xWlSx-t0sx!D}{?;xwuWzhTA0xByFSlZSpQg3|^IQ@G&qkAMs`qe^i!U zBo4EyE(|gkAYy=DD-CO^rl8)RHbPb~aU&J%h=0BF z-rYRsnLS)mBg%M`WKj_>|nNKPM)3KP6+Q{v-l6?%8x&frVB{0nYDu zGFK-rU8f@iM1znuKEFYZT8(*fM^?p8iv+B>3(N{m4ZaJEr zj@}VBASyQKs>sWai7!=rSUcW{cKyIw7oC;&@FhhXBWpZK#M@BZqhe(- zhpTyc;;^b)x>1Ev6|dAIMfp_L@OeV?9S6Wx)D2_1)_1HNZsuQxgq->PolAW$B5U{1 zS`tayC)RH((urf)8gw}C;_WA6t`Ba}h1s<1EeIM)#w%XAaEMB6!1T+kYl87aDA*F~pOhN27NVIUl zUT-Z1z{i=P!!{rEX8j;&d()!3q34_yub7Zwks!6Rp_a3@%D~iZ9kUuQ=~XCjst_G+ zm+?)Vy<*b0;Y#SzbEQ7LK5m2t#wY2AA<+Y5 zjeQYd_HaZyA-dpT)s*-N?b?qeQlS_Rd&%YlV;3p;an|aqF~FKCl?|MH9zYUTyJ;%% zf&B6W%lt2+wquF*4KgYxI&*gNEW1zmhj|P~`FFw<(G;J-hC=YR&328yUO_5O_8n&s zn(qOqJPo%X<|U;ocoh0uldi?P(ub7G6HYJ5GlE#u@*x?^ zdewrdCyKal#?-%bsU9$=o=66+Pa4*4lhR~OkTKq0pRVX6lq22^aZUy6P;I1YOKTL> zAT;O(#2c(X*D%TW!l&?%?~pl4M9!|gz22F0_+W=aF@bE8we-ASZVr6~cS%xqNaCLT zpnK4tC2{lUNAHG3y=vtf3c`J3fDD=6DtuhXzYu?OgTf$Qr_m$mRqmoK!%s;|eDm@je4cRD_psm4{uIlYqV_y@{MYfQLzM!x0IQY}*nZ`acb zrCelF^-LlZpe8ioTQ;kC=GovVTn_26sOW6+1&;drS3*G9r*}4;AV{-z1DJs#=wZL0|8%0-C ze1pf8eER{lF|OpWfy_#LKr3ker zx>QF$P>UJ_LK$0kPe)lst?f?`&AfV_&pp~l=+dC4bsBwCKIPq2i;;}0ScgD$scUDo#gUMqO+rh$QRsU; z7<1H-l2a#c&^-K{>JxNh&%+XTgEXc-(j8*f6cYeE!dY!Ll%Z)aG^>7w2}$nr6krh-HzG;Qdl`xV>cX ziC8=r^m-5&wQ&K8ffEs8-CVmlDE@?lV_jf;s2^QvB(GhvtqFCIFSfgljss1}aDgd7|6oQSkO2 zZ_Lr~pHKS!K!lj!%Yp$$%|C$T|8GxXu%p^F>4MQU@WUV&*jLNV%vS@Q!IA%p3H{TG z*V9Sk^W{hc#owo=Q`k(;!9=gvGDyGSW|P04FefYpO8Cd$!1I4r#9H)itH1npGmNPj kcjWKVR}yg5?8+O84`YOGDY53=U*JC}F?rDn;m^MR3qr^fJpcdz diff --git "a/profiler/merge_profiling_timeline/perfetto\344\275\277\347\224\250\346\214\207\345\257\274\346\210\252\345\233\2762.png" "b/profiler/merge_profiling_timeline/perfetto\344\275\277\347\224\250\346\214\207\345\257\274\346\210\252\345\233\2762.png" deleted file mode 100644 index 48793f136e48f21f618ff3cb13bdcc3388f76930..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 64432 zcmd?RRa9GD6z|)X;$9pI6!?l13GP;)Knul-6!%gbic5++l%mC@NQ(s5K!9Myi(3gU zMH4g-Ieg!_XPj}zeYy|#<=lsioxS$jd+m{|J@@?o)|@j=M@x;Ah>_^Qg9oIq)m8K! zJa`m&|IsJFyKhNN#9zGMJoMI6Q+iN4@$}$+=dt6rx*J zRNor^m>h0PlZa`&2=&wDx~NTcID!3a`HPp?s=q&(5{jTjN%Zhs9H3f*1^p4?DIxRT z=Fa8;u3yW+>TFn0;TyyGmCF`G%wfm;n`nO1mLu5n*S9PLY>^a7>eCG3E{m2i{N^1O z+(Gcq{wWkn_Xqx=eEoF|?Y=4YZy$o!dejt8{%dZpW83lf3s zg~I`7P#0d+9HYFQX798AuI#lg#F~O`tNZ*V{N=wVf7#whLGQCD{rCMJwTEo~_4-=+ z`OCjU{_{&$QT^Xr`nt{i?4SQe_4;3Lbh#MA(w;cGPM{|rmF{q{_p|19SMl99I&_EmMnPrPl z*5)0VJ#>j7<>JGJX@ljTJtx&q6cK#Qv1_hYAmEE|$1=%U#z}1IZODAn7gD}<{T}NV zEo*qOLDdL zrps{J&NrQM__SpQWCV65TMxNg^qFRHm3|n|8=Ltfup2ynD?h|lb6SFCcaGucPT&za z88ZJUAdAQeTLK})gWvBQtl~47WscBSw;BE=R{(@f&g$C_%YryL&eSes zs-Sb%bU72WkPksm|J2KUb~HPEP^i?#&SI(1Oy4#A_j4k7==0EK3z#aH@%J|;eK$ucKKjQ z!%kx0emvq~kXSdOq|1qS1na~5g4eRo^pDLQ^xe%wu)yv&x-N4o!NF253^6)Rx`@MV1Zjq zG8Mo*g@we6&O&Vt0@3zSAAY+#Pe5=Nd}fAXl5aor^T+FGjp0A^!jKBcqn^^;^?-pq zWG521*w8x?2i!o%lR<>#F~K+3XG;a)ddpKQ_et2s-vbE9;uHpVVn47C48lZf=esP0 zY1L&ZIF6!S*LO-7gm9ew;=y1r4&-h}Pd0~bcLT7fi=~w=DQiCUZWF*TcZJw2Yo|Bn zoiL>iS!4?AS?;)A4VwB2y2juFSm-RpQ?Hsm%V8%+TAIzrHn&T68xT~HHESg?32E*~e$5P*$u z4K)grQV69Ol)@FnMAkGGJ$eVha_^FE@^C;BI)%GaUSl9*r)+aT_ZbhdlRQBoNx)W6 z+d0D2_-d>0*;dSv`$x*qmbM2S_+Cqx7jS&c!=Y)9>S7{=pbM@k^gnduc+)}Ez`!7E z^3_6%EraD%*dtlV82+GB7rrWcqczHd4n6Ly!I0g-HWhpkCqA6M4>Jb?%pAcin3W*tNU3g1X=eFX|C(P<3; zi;3o5ugsZoM79x9TKZ9!VxP~jE$C3&-{QSZxV6SvgB*j^+YZ^xo@tCnDY>Z z*ZnrE;^0`$xKWahfe%EE5S8n>YW+x_EEVu&Ls<>_LLBs9hZZ;mH_FbOa5ylH3gP=x zDB6J3H@UM5_;pu9s@8&gD1Pc!BHz9S*{HFsLl2*`^iSquetthCO8%ij70YRnIwx?-T7whis#<-DcRZz8n=`F&BdznaLr#{nHD-Z z@)a!YOg)OVW%Vzq|J~Q@?w4+BpT89LeAIw90z*idDY*)+vR()|?g-?a6-q7S|`^;755nL}Ud+Y1$6^80z8P*s`L(u^CrS}H?3#0VnpRe2LG&i7JHK5|ZOM0_U zHJ-J_hT-@7B~w#%$zQ0-nw|eRF10Fx#xQjOd*x}H&?vu+oiQJ<;Q#{q_ix~C{hqd9 zhJYGUgSD3fuysvmh#jr3RK4gXEkCa7IqZjIah%fe<)bk56kW$4t8tM@HP&6Zb4smG zaSIU$iK9DG;`ajazbcLE&gXsk`2R#HG{unb1SKE$HM}xC`}>XCDSi*~Cwk0D()D9N0fGM>z3oB%a9P}0kK~*vhC|7XnU0cd_tes`!tPh*%HX?9(8=-exp1` zd$hhIg>d6D>wE}7&T*n{Eem?6r3ma?5tCfuL!_N;Prby%O!$5 zX4kwjcm?Qn$8s{76ULw)E>Y8pJ$e4r*X)2nLdY^tE}R*FoVg2;ecwj?T)UoJ(R2F9 zHH7d^>x`Vx*QWWX4+7pM4|N*~NdYM7o}@fd4y;}9vu0HgIxooW*n|JXDsLxu3PtQ3 zcYhKZk4T`iR14b1cUCf%)+*?^!}v7A2K;8>ZhBa39~_T&Ptkq$TCmE4GE;#?(+_0N z`_TfMV8*q;ky5z4_W2j9dFAOKxZ-9eQ^X_7|1b>(iBzZy)B+#Zk^9OJO9RhS5%LDv z7sFKCNQNp2Y#J*aO!fI)U6Rb$X!y=cxi2%9j2|tMsbbG8rhq`^5l0Y_zJHFPY?vDQ zciy3EHd+4QSrE(6ZgaxfK0RDsWQVn|H`-EuLcafQq3l+G_6XVf8`F)L1#JDb>8&A% z)_1IHaOA3S_R7DYy?kxRsF_Z%Hwd~?jvv^XmEExp)VUN zutv0{jL)0apZrVx4N?Hm#_AXE=TCkfmy)L&gfTk?6*nl!qWWV}k%0?$JdR3kk6@!e z1~Hav(c{D;XIBC6^1{26Mbu3oY7b#l$A2BPInK1VOG?lM90DS>HKTzluYc+BS=KXR zJFX~i)cBW=_tuY*&ckwV{7UVvwZIoBPE>6&+H@cE;418z%P&1`b8h{bKF2ELG{pnl zN@(&^e)Zn!j0N6UQ@h^cii51VOhbjSNVE-d^7k52!oS;NtAE8ESN_P+bukX+Y8>AT zLD7)OU)M>dVo8Tw$Rriew3hu&Kuf~ z7#I->WwPVGG|fJSbOcU4)U;v!q;Qj&O(EK7v{j}c3dOJj`+IS*%<~d!bl%-5JDt9p z63&bGD|iJbR{2kzLs<*kKx!-|&(0X`>Ylj2ec_MiY*Z&~(MZM=x(nKa6rmHD)>KxK zPftGp4}CtxUZ0pkm&X8wy@)wy_;mMNHk>3{kgQi!j)d~9HaUz34m46Ib4@USU5c+DOkna1(cTef$Z^?gme(y^>@Y|B< zcJ|h6KN|YY{wRdh*;0Ry=V8VZSu9Nqe?|C9Pxlhn-N~Dj>ovpf`p+BM+7N>-#^=uB z>lt#@6DoFVo|_!OUI#DEH@c=TLF=m?%MquVwm#RxA-&cnZ}A*t`@;8QPF2%ySoWoo zS}|YNg&2NoNZJp8&T_q4jv#f2yE2HDac4)*gzjcDD|t_Ay4#S@@6uqSjgQrW)2&Y^ zHm?;^X(H$SB8-Y!j{Qs8p1ivR zZE?(PEn?MtOVBqdb*?!Nn-8hAZi13L$lT}LdnYwarW#KIjtExiNrM9m@|$cght1*$ z75Ye^*Ts}hvzn%nqu*g_oZB_@sUdR9ZJ26+GlthUHjZ zF8_`SjuqIQ@2;r)fjp2Iv6z7M9BS){F;hc($?A5{$Y2)k_Cu^1pJdmI!#jr{_&(vT zYTiq6zPa?vwgq@VH8>QS(5*}NJBD)z*RN(lepmQ}7uo9L$Vrfu0uP;MggJ{ ztF#q9+Q_{3K(a-D&%e9bWxhA{+t&=CE9(0-99_TxR*Thmg;?Gp8SjNn32m4*%>0dd zBsIEm5n{{E$O2>pwRFe7BW~GZ6_+SRg<;t;+u-+3iq5Fki8Cu93I5eGEc!s=-3?NK zW}_EBUT5ioF5pAy!XX(6Go05|YCpgfDMLxX{TUmwB(qO-Qgne~=DK&9s{`&`7t2=L zAq~*R-0qpEN4Et)7GwP$volo;ZOS>dH9XleHa;U@$-gpJXYo7xn4h1meP)%sH;xD$ z4tZHPs!e@s`?FU)|3WZRp%rI*PTT5D6H6Jz+T`jD04}{_3RC^~A>~y(BMc6_sq@0* z#*`gMZ|l1HKZ4wRgTxTu^dlHQVw#_f($e*vsXV`G#+nPnM6OKO&-6Y=0NO8eV9z2mx?)6e^t)TJ~Q2UvF#@xj*xRe-BMX=wF! z&bin0Y2c!`Nr8X<D0M;GrYr-7DYuBEBYBO@Aw&;~&yXN8>HWTE7Y;)Ml zd^|}SA(m~6fNZbHVNKX&n~ST80VvJsL-}z5`hJ9;Yeg3?&c71J$cV-CmtMfSX~|tiMJ<@F(a=5Oa?bVUax&uD>Q~2_>dvND_7R`4 zdfoBa>0b?NSN*}2GC-49z3k-;$Mj07H2cnSQz0b`TK#hlS5J46w6wkRZhjO2-%jToJ%N@r~^Px|BmR|vaBH+$7R zEfaF2&Gs^r++7SCgYwXLQ!<2WsYSEuyd9xaZ+_gqF|we2u_|aBA;k#|ALw20{VqwO z7kY-{`_$qfnxOsivcQH8@q7=Cbqb_}t#*)1wvuAnx(lLJ6tlgUIybMc6$7-dnL-bJ zur!4?g&o^nI2^%g!Yjq$jY~hhiG_zEC`uo1sq7l6Lsz%(Wj1*?+egp%rC{bW)EJvksLGdiGi+ z71pVJ@Tf5S7C9QHtsLzYh|E$kI%Au8V}QYkVpjZ`dLKOVrhOC2Ft*@+LoplgSQjw! zp0DuAwSMX{t%QLNtX+Gqy-)DEbtr)G7!#v37)@5{>L4QnLxfM?6od&He>WGjp56z*evqr``&qPggB0Sa;V)J)KWK&-C z7$^G@r;PcR<iEtpp@dRlCLKUN9d6Cs#hn%4Yy1_%!pz6!$1I- zmwW-eAvPbHjHY1;1s~lRF82uQ(Rv{!Z#W+WP$I@HxPH(o5jpU-bcl?p8 zZ?DfT$RiH_LCNdi82UzsSi+47hU*Rd5|11A7nb8fI|p_?|3nuHZ4iQ~e~TR7q;R56 zSkXRLTwtF>^To1%79GB)4%_V!?EErj?NnckGD4oFo=ZEg4hRSvfBy9slbzdKBLfm( zH8Lf87+1g~*^X}j#*D>j`xh+?TrmWuln7SZBHqLVKzg6dgb=wiOAREKTJ^jpAiRZf z6{xu(ft%m5S~pVbH^=MAOK*3)*4vTXBKW6L-KqB`HXGqO=8q^mtHjQCbTs^)*;Upy6M|_vseaO^hV{IO0Ti9lF(6N3U%@F#OKwRGSXCIZfm(X5e%hueN z)1T*OR6n1R@bxME7TJ+%VOAK;C54t&=`8UDI&;bB%14?nn9+O6zZ za+-eT3Wax|>^=lRUpD34F>1{o;RLDnPCk;;V1|HHEPPcu#K>)t;Lg;a9b^HY$~Q21 zCcTP9KMC+KuX?TH-BunkX2PS8hnBnZb=NiG4VHbR~an&AwV@5;$ajlPQnR~Se3oNV(83$ z3tuGMl5Q40FVqzD#9gG6G^KQ#(lhK}yOoGqWp?Qg_oiinSMy$U6%Rh7H;+kjO<($b zKwNiLlhWwEx7@iOrv1Gt7<1X2f}ej zENcg$7^^A3F&8(`l>=ivL}fhSuHVsnKMRJxf1m8>u&fyXV+udV6Q)=}&nGv36OQb7 zj%3HwpG3|5&SLG=y2-WZiC%)GDa`8mXs^Tm1+tMo*qadYlv*V2ygezx`3dSebKr?@ zxcx(yc|DZxF_aB<@H~-(Oe|1Zdy9mX2)q@V?qZRohE}#KE|(31xn5#LX^4$k0%!qf zjr3!$V41n6Hk{=OdR)1l3YI+)y<`^ArMp0bofPWys7MFyqrsLKtYrC z;}d5EABffQ&eh9i<`1EfC2!^ha9~07N-{XlSHw&IS^k1jJmz@F2*pNx?ODirT&9^F zMT%`{0tRVW2T%b#H?;THwf7OkiAyQA^F@yg=PiJu9B0kaRzthhV`s7!2SNjdV;%DSP`Jf~JE;wslef?8~ zxXZguY}UicZWKFbwKn`6B%LPCk0|u|IyR9jp2DUuzYEXeRua}g2I|O6 zT743QK6A_+9e&@{GqkgO#saHSN9f#K%5Q!xTQ~s)dyN31uw2x?s2e zrz+)CyWbpj=+uuAyoWkjOXAvft_bvbMMQvK2!M_EcDT@V}+a z?ZKk|KQD;>f4<5~xyo-ZnE!-I;^94g+10SIQE!AAIMRVYUAZ-D!#aJ-yV4@7Ht}EV zME`lD6qL^r7;*t=$OPR{_&~t|vZUTd^F6^6T)VLN#b1J*j~ptpNv!O3vld@ZTNu&O zrb}K?7Z)pQn(@-*)O~!3^V9}W?ltlWXW6T5Sh|BfYmB=y`Q;y<8NeTuq(prtkVZ?Q zXO>nLAZa1qyIXj45MEREaBk8q@_EA;KxWFgAjQSk?N@iwlt7+>=i&%QRWopaDQ^43!MMxKmAnt}D~axdCG zh)Q(eZmMsBpTD_rBd}YG?4Oy2DxAHiSj*8}n&F&j?9+Xjxz9Pos;Iy3x>x1X6P;AY zH@MY6lgZI^9oDEz>wus;!?Mxjv)Mc+N++^jgh}fT9|=UK<@)0f zvUy8aKkutoxB~YC^3{}+EA0mN zAY>GB&0jlmK)?@uf$sr+#ZDT(KO?u~$Vpl8TNgki96uawk*kd#{6&1#_m4mH4~EK- zzDfD`TZ{O`Zk=eHMRt$iZo8_1TCk4P)oc&@9+xlX&b!A;HDxIPZ#^PygVh-Rch!mT zaGc=yfb{B9o$%UfWbLSpYPPXc;xrkUOw=%XcHHwh%xiLT^S$v%K*(*-bDLHM3*Wb_ z+4ZRlz@|0H3bE`HglZlMZ_9^+BnvfoGQPzEP3rM!I^nGp4W9?M%_)8mlWkThVDaiz zKrqj)O8k)z(L5Evk+*})o!Y)xR;TuNaDdQ1+J}rA>rMw*ZY^&!=i~i8>h@>qC*xnS z@UoW4D~~20)LU&eQoc;j!u<>-4O{OE>LbcnQy6%ZGX84#TV8bf#wI^xX1oqlL}6m9 zqLb#(e-tAhBeUEdi|hS%;@2?a^K#`+&G`dI!5q`u$5p8X*Z+KC_BEWLoM_r`xeBI# zrTL?qmrh&Dw})lPIZo8BhEKV7_EV*Pzv;sxn|d!A4dS2!$*4&>S%$@ekD9c^ft^+| z;}xrt$X0%I|ETUzoo3gc-+^-izJK9$}c%W{{Dp}}fZP&XP6uvz5t zmju->YuzlS=5i+2M zbas4>WX>#@SQshW49Q2xF05qe?KtPNAF~|r-R&SxwHm72bGc5j^pcN<85D(Jdu zdh;$f=XuswgUoqly#wzW#Ffl-#+#s(f$o4czsSAQDF&g_My-RXTN@z|kRjSO;Qi>; zke-*9QQ22K2Hpg7R7iA%6FT(Axkx~?Ug+U~?Cy50-VbC+ww2j#pwJ(F=AUHzj+qlc z8Fs`I4y^9Qatr>*@pkQtgIIO^T&m8AAQ_YH7L6LZS1&u>mmODKH&6=})}v(20U_Ki zTlI4q0}!;%n(7Cp#Qecw-}Th@RikBSG9iAz=yEPZ^XO9V$CBlHo6h>>#CEg5i>z_I zR(0RDSl{5(odY1=sJz9u=mp)pt&L5}ABe1&M?S|=-s&%aT0cY%Y#I$Zc(?as^^*&1 z-|4{xe($QVI7ZbTa=Ay$rE#-JX=@w3u0e=rc&&Y;3_#R5wLrK{Qx9Eft6*vD{D0ij zHOk(W?7S-Pgn3R}Lz7v7=#Z;o1FQnC^0J9!N}2rkn!xWIohA7i{yC_ju(NldZQXb? ztAk2fS7D_a`M&&B8WTJhnZv_7_n79+;d35$%S#|F)o&FOuFvCD(!1{GN_W2X!NW-Q z&l!ie9EhN}GO1;8YN64HIoAwH-|h497Ox-E+c2t?HF4mdNVnXskG^UTtg+z9FcdYG>jnz4ZEY(%@OeK~1s zRGF~QC;8Q!-`Du}w^l}}%@H#(PJG^S-_l0db01*0hT-h-)kcBmW)e^P@Z9J^Pa@9z zgjqG28BJAcYU?AZd~xv@b0LTXV|xGt`!VXLKR;py>0H0|wC1Z}2CbAv(!Ouq;~!#E zfyWzj5<0`qm(y05fIkB-F`C)+oUZ36&n(S30)(ZiM}=IrJiB40X1@$+4n z;?IN=wQ&E0o3;6=G3eoR-BR71bVpO{6z> zAgrufp=VNwC1XA_&2^7!*S2Pn47?K)dE{H%WfKAJV_(HoKP>Y3<>KZz##>P!bq|Sk zBlTUJi}BQLjb(J8#vO#XR0E=>^HvwV0lMEv=s@Eh^0Jv(I5q0N*~Iz#BMzgtD{yX( zU4>L02#a7>@#a%t3))Mb&({dOiVo6#Wgo-T z*o`|+XaHNb#8+`y5g9@Ztq?Gs;qUJ^?e)HnX~L8VDaU}!$eE|QyTz&pUw;27%WD)^ zT*t6EFNFB+7$ba3^wYi>{~6;`v{si9(MtL4tuf61cQ<(C#*1zY!)7ZFZfr9gv#Q9f ze;@hBQ$#7@6AMKhRgnox1Jz+2q&-)=m&qqvhvCf`K*F8>t|zc15|_coH=Fy;uA=aw zULL8kH}g^GeSp5z%MD3PF?6@vgpJ%4cRIb5vH~1jLqeTNr?mA^HRM3aZZ8AZP1Z;o z0Da#@er5D?n{8)jGJ&bl1T*Li)ejRopa*_)cc)O@*O8n|%E>~9*?qa*ithO$#L80b zuQo>i*|&qm9Jsx*b^RbD?%OqKI+j4LMl$}JVC>{aP0?qK=E{ zMA+V|g83KDt4V5`8 zEC}af4cbsn1Ows8C$9$;(NZfNsR8c&^%BomNai;`-0E}Z%|gC_)pElQ@SaP)rZ$zF z`0Py3Vzo*Pl{CQP3owk>r2!w0y*l8vSUsPL&>2{%f1xGt+bkor50f^3I<;O*MZcUi9UJ>E!tci6II)3 z2_s7ox^XW2VwaOmW`BU40?J*jpe^&Avvro=uLc%=#|K0VX6 zJx!H#3H7m79(_dAXu31p|E%rYxx+WYyIhI`98N0Qkhf_p;ouwjij(N^0%JM0mw0pYa+d0d%da<)Hs?=WlY5d-Mglm4W1KE>!z}|b);GROzt-}8xraI!{k$%C zRSIO$$T-jcI9r#N3tnp_E1D=Y91P0AeH$FR`3cAHxGBzO@x7eI)kbz7;xNibgCR%Y z@%j4SgQ&kuM`ELvyS+6Zq>mw#1tFD3!W<$VrFZLgeEvi~<(fZSUKa(LNQ*I6s+|$C zu2v)$HJcZ3Xjq>Hm0Y!Webf}W_1NCgS06`Q2loF0(sQ{0HHVV7C|?eZHd=?Q49+L% z?@?XQ!x|Kf_rUn>`Z&@{b~zXpNnZ)v>pgGi8ZAOFp6|D)mqi<7N|9p_CnW1g(ngL4 zk_oRB2B~TGJB#4Zr^Q{+wp2q%+iDiE@m@0+#RJl+Qpq9ZoQcnk_~G_jomMX|i8reh z-Sm~uze&`g7k4U^cF98okKwJh0OD}QdP@s#vmANi+T-T1rs38Pvms=q0>49hAFF)N zjUv%W`4p8x@$>_URH(~`#e*n$VN!XfWPi?t1V-^C-^T^Nu4BdX2{3cKd>~o+yr&`j zr*Mb$)atgb5{5xdQHV{H%y$ zy(8qwmVpP=?)ZK7F>5t>YA-sYnd8`Q$2a2MuJe?Pm5yYSJazxr-TLe#+oPR{@T-EBI6T( zq9{b11Wwdpd9m|~Qh+*t&`X-?kiU^JC7ZvDb&?*PT2RBeB=@MTL9Gwwh|^Q9nvRM`r!-N9|l;JMJ0}!2lW_WYGQcL5dOYHJt@zOP3?b*{%Cru0d=? z^nxuL%-urhv)e@K?%v#|AwsP>6TUT5#4x4LYfz*32#C3d<*ITEp4rVF#Bygr{G5}@hthK+S?y}H0xSwH zf|h6<)OPo09*|;D-LX2R>RP;O@=D}iJy6l|)LM_2mjvUb+!hKa>nHy`c+RaW!4oi6 zr8q;t%(A@_+uvI67F<-Y>)K$A>M(BoU{*Z*sWI;?QF=J=!Y?ykuTUyyrQ

L(jehNidt}CYuhO zpw|6@5Z)@$q!|b)8@7)EW2(Q*ep4N_6e)erTJY{|JfgoT(^DXB^|8MCYv!q)k5i6} z3kGokM|?|f93DgiD>&Tg4jHG!0{G`0@q>!yL^O2#+gVZEQmilTGY~-e zj3&ilzlQmU@Ialh!d#JN692axgU1~YqZ94}@aI+k-(@GneARHs#zW1cOewHC5%drb zFR@jAPIwiyoqGE!?s=$lzq=G{d(}|1n*li!6M6qKrB`}bOm5CEW-L~SHOplA4=V|b z9Aw~CwxMegC$1F$jQ*z6?4#Kl;1z-xe%wT=$mYy_lag%W6rR5-uSg zg)Kz~QAym(RNs^^osc>1ku6tm0mCkY$b4f^C}eeSY@3>!H|B^q@&I*I0o$i)$-+tt_DDvBo+oL(ATo0`cY~k8I&Ga_ z&h9-A|IPV@8W}(3k zha{O%w7_upM`$+jF??InjexDSfb9u|w3XCPr6W$O^<1pY zt|%(yB&QiUNp=WJ;_w=tw>ltuKY#nY;k+L1;juqt;TT>v{?BAu&_{uPp-(CT=O%zn z?lDJ2uRl^9yT%E^Ny=awWhF-&d-dJ+y$GY=zctXv%0+o<;=ecBye^H|u_2hi1mBvmaVrICE`KAf#o%>RWMxYQaW2L`5Q>tw&GK?b` zpT^9c!uco2chA>&(f<)9KsF=>;A?sr+OzCbG*tc2H(-VD*rj7L|2=hm$knMU$?2<< zZc+hXCVI_3%PB!tqCW?yJJr9Lgf}2nzo%$Yo<8>Z5A*nH(N9#&s!s+z=^*ROGxUqW z>F6{2p78bu7v1KMRKrD+=0I)HjnVd~sU+(2lbocb(p3+hG&KFLRzxW*>MOZ;0=~$( z@W3bPpQ7}RYY`LYm}!0JnX+$U^i%TtN41fj@!>|9z#^9&ySXM_Vs27JIkzJ!@fCiZ z5=bTC1zI$?@!0N6S)8(hslJp8e`R|u8oK!D)MWOlp8R9*kh4xmZ|vLPwOr0^v!ANw z&nlNva4I-Q&wn<~tV=fGtmU~%@?Dp^hS!h6l#9Z9Z{98*75_l=b$A2LB8@|_@9i4M z9k4pzw|0^~6x7jzM!HC=ggT&!TK2Tww9PT*!+*wtq~6z!e^49v`u@}S>US4sKeq1G z8R}+<&7;~t$rGAU*U9j%UUvDj(lR0G<_Uty0|8{SnLPN~P@vPO{w0uhu0%o=jl*nK zf%@T5GE)|Hm##qPwg}j_qA%4P&@zf;p`CS=B8CFe*Yf|vzOOE*gK8GxL^^fv#(2k7 z&b5GoTZ+o|BElM$8KcgypTGBmp@n1?3hwD%d)PGQhC^JgNTDw1w)Vw*s@MM0OrN_ zjc;u`IN#&D-Gu^Vc%oNImRN)XkRJHnni>9#q=ZZ4MIqI0#Gm15+))F$FlKT`n>lW_ zND_19yW%IlxCKH2i@6lvBRnH%YCHptZW%Jj-l{RVFq?Y>h3LRKAFa@yg$YZzoPMke zj(hlVjY=rbdxz}aUUX{;zbTkBiYg-@+ePIse7+~< zpHm^hOgE)ol~nwUOk6v8NdSV7R@P_7YTzk9@yrcal>VHU;wWWm{l`Er3k5%M_>3cM zkw=5S?e{QT;r#s*?2m+5a7Fm2(~{RwM3w2c_^*TX)1veM2*nOG$bCmxht2ix6Y3us zYxIfJY^9wVgdhJE`Wqj&FP1VC=T_ph-a4$u9Bh?qc(j@n1vua)Zi*O+jl@3JaEr+} z)pS)cv41G6GMVyGb68y9QweVnuGe5}0$$4+iVIct!=X2v?utrWGSA{B+Bvnej0BVX zvnfafpXH?RYl2yqF`HYLZ|`zO6jFaCJ&<|guMhk^y;{SEQfT~ytf4$cG}-!6p_wUP z_pe}wr0ctyuz6=S=Ug1E`R;eNUbhJzVHM$NY#DgqrX-GH(1fVG1+J(lT2x?->ShXc1)X& z2EAT_&9;9)Z|Dybk-l$!|3P4o$b^v1Zl7`aVeusCQ2Y98X@kMBPoF}+oN@6G7a0rc z{)_>kAN(|&Q1bkwKfWRZcht$3FCjdR!io6z=|+yjvbF9Jx!DzS}xqgCR&wErdIm++ZRDP{H)|8B3xd_;XB3@I({QU=o+*bXV5N9i%4 zUOvL!V~_cJ>=BxqAwoQ*)-N!gh)fOG3Oi_G&3qtuAz@_!I4F?61iFY9rd( z#3X`1V~TY7c-*k++Ogt?xOcgoNNR9||7R{=({V-X05;KDhuJIv72iDTl|>tOj!-7` zIyaMCX)X&3A;Uy3{zj|j?P7shJ{$Wl`6)lC4g?)4ltOaB={@1S`IjD({DkDoC4^l* zJ&w>~GPwqoN#Dw;Ac7Tne{$`RQWhpyoFsX^x9S1&^T;p7t{)ypmQEMTK*NcI7(8kj zg3#s^se|-_%WI4TgL>*fKPIb>1^BRXAog74>@e$SR%5d9deHj3_0 z!WuH(IBFX>rFrYL)iRbx@N+Y*Job*s?)^}a7>$lpc&8;Q=l=5cYkt^qtIHMad+p1d z4x^Lfn~taoA!)x(+?=?PXSpK`ODkK@C6U^m827<#gS?~r5VY3`L8=WqT=qTJj$BVG zK7a8S6oPiX(l-5aiZW?)vu*Y)CVV$E1E2M^AI>Isb_-PJ6BQ=?I|_fJPkL-XtKza4 zhQojNC8(aVCQRFv=HxdpZa(r?3w;_@W5tkkF244C%-)tbo6;7+w_uAK#0^tS_t*>0Zc*`ev6BB5=T zzC&)lBue9gj^z20%_5fe8RQTnZy~+0!T@3xrI9(H#0Ke3yDd*C&h{lC?2 zb)|H)Xb7#@inDds_w@SHuQWbBh0tNYT_0%0Z$*CC!oalWUxN25I|IfA!l`R|#ElTQ zRl_6TfMpJuXmvHIe9y*t!>#tqH|om;fMR?g;_u zDiwn_6Jb+WNxlonmpu4JY9ZMN>Q$s%ze%fy0!t0|V7S!3W^&NR6}?Bl#3cZY;_U!;;6v zq};hy$~$QzRLR7={8P1j5xT2@-9`P#=r0A5TJmA6bT}bY#JcDVTU&>BP+J=4N z)^g%;x%+c7S*3Lqw8V`_ofR_!aK)=%wr{R80s{$&)I>{d3WPzkvrz`}{R=6KgPThJ zegf%*Pv$18KKp1A5}~dsMrN}eE@HVzMReK>q5|cs2U<+QGh|6_jNR0%ZNc>3SKnTY zZ5^1`(U>2YMErf(?p5rxtrX3+SEB^w`od^_r zGo9$jIp+FH=P*=d^nY;?y;`F+RX)G7yF>pVo5Pyi=vvn|cfTxjaGeXKLmA$RDguLP zR#Fh@o?|W2^$+eD2V7nz)VD*YZq`z*TK-8Zc(w>xF{!KLeOhkZ zr-h$=_74`y!QH>oxq#O`|34w5r5^vW4!p{7L8j+e=AdUf)a<-W*3 zZTw1j(#mI=3=>Oj#bLnaU&b#DMcijQv#mrh9x6rwx{`g1j(+yYRWpA@$)-YK&U*=; z{AXzj;G`6YX#(-kZyN~mWfShPx^=;X!d364*a??73)GeR_LD*@Q*kxcP4W5zhCdQ^ZohD zV6N2LHylrG-VcUnbkzn^YZ>y)kh>~rm@+LZVKeu)O_D=0>fZVj$yn~3wpziWm-a(` z{9V4DqWrh;!)kc+@E1=!qJpy`9>MB^h~v-9jdTpp`g$c-_vO>Vob;P@NR8HXlg1QJ zrk6ff0YRmOodeV?g3*_m$SI$;_ZN~A13&4;_B;3-dWr7}h_)RY0|Kh7Zl8q}4F9V$ z&r@p_ID$fP8Rf}Lh7J{N?J0K=EZQwyIa^vn5#Ij^XckKle3At&KQz6l7x7LuX$iUL z- z!kUYbOo^T~4u^itT)NDWcOQtx+5eqUZf$JdTkP$MFdpXbf<_&H`0wD(k0!U(y>YD4(aZc?(XjHmM)QQIBTQ7 z`@Wy&Ip>V?{9m4T4#hDxd#}CLHRtu2^IKy2?K}w&80{{wNvz+4@Fj)q>QOkItV<<+ zS#bqR0G9d?arR>bk}j)>f|e$HVvRdZrG?qY2^g>f0`Rmk^{(%IcWGJOc=c|wBszVK zId$8|Zd+qDz?^N+nQQKXaU~!0;zwvKYX-PorLlizp#e9Ob8tuApv}1vb_y$awsYbt z=M{VG51E-9Z}sD8_&BxB=>_jX9kc=657xpI7O6|Ru$YIs#x$P8rd^?m-#m-_HVNkx z#hAt#Ck6#9w<2IZl;o`TVX1CtD9e4v4dJAr{&YN^BRuVE-aB;yf^>A~D${k^hN|s^Tbr@Ih@F%?5Vp(gj@nkXO^?CU z%)p&DQI(qW!zQkMs{%MD<@pV1xon|fS(17JE5g~G(hMex57tMrR1?-m`iLA0j^FxD z)SgzcT-DrOV{qF-JZm2f6%iF)Ttvg_xE(#^Nyd1eOU;Lkl*13CmeNe4uX2JUm4n47M(O7sby z=nNQr4t1D9>AP|D`0q>LnxUrgsE>L;9>8LiS?xG9Zj2Rop0m@G-&K(V-fBHN1oGAk zj*Y&4882Ujn@Oaq!_kIVMw6D$Y4K}@h5_-y;ZB0WB|Nd#?zEMy6PHetZjdS7KWEF0qiYrLmH z=qa3yTftK*xoSb$vd#G5IuwV=k!mw%LZmSdMSriExkmZ8;E}a9ix0w%gaDJT$W(9h*cE7%ZGh zHSl~YS=@|)S_jrlzqlPNmyPl8J^Uy#;`BZ~?U87FOO@vn?Bl=ZZ@Z>FEp{>$1V~p% zIYX7C`b(f_7$=7ET;;MdM?m`?!d7%zxCoI1dAz;jbSHx|RmtmO-Kuo$<|alwr`^Bm zPRm_Pn5Dy3(Rga52>4AKxcs^XV3W8<(R&+?JYlBjlvYC%Ss=GH1R+)_zS$!}fs2|@ z-qmp5!Vd!szt)}5N92}M8|qPtKM3hF_p5jfO@S~rPN!)RPuZp#p&vH zYZ5X(`3z;_>qd{Oqi%ELhBsZ8A8y4lwvNv+&4d)4FuN%#lyJqQ*)ni2tW&l#fI)0h zMpO&OqKV#wAh^Tyw^8qXoc4Q+pZRO&qBxrvnbqph$;V>)%X#>Pt{GU)sR}8K8nqJv zNk8!w@Q%m3QyG_EiZwntKv!!rXok4WXFZ6uO8R4OPn{K&VD^T2*Mw}UVo!EH$k*L;kJDGxpxVwJb#*hh>!n|x_}%e z;JkmuFvdxqyAx1O-zpAh7z#WRb*(3rnhk%d=E2KBJf04*>U$&)t+2V9 zzS8P`8NZ0&E15i}E&AIr)b;CI@9-Uff86<+zk^lE4iKHyLu5JVp|J(gD~pF^TDsXV z8k>j2J(v6>gON4q*+noyk)4T}e(zpAJG5hPa|S>2QURDaeVUn@^_74vS3>k(#)7K) zF?S6kuL0No>WJslV*?=;({WiV2{mZ0m(JoF_h&aRj>hE99~F3J{;ANfh*?vKQIK;a z7}IWp<;1NJXVv1b?&Ndhwvu)@GKFr)FRb-2r!9ItRpJ>o9G#PT#b*L31n$(R&+x*I z?{UB4T345T%apig!Z6gHNNn59=+h}HnGj5zg$dyW(a_(EhjiP44tm8|%JvRm@qTXe z0am@C5duzF-^cn6Lj2IW(N`l;0)qz?aJ=6d)k{t7q}`&~j?*I_Ty{+IWqiM#*_7?U zFxArrDF>gDC$jRYo&_?uqcDy*)T3aWa5bl343~5UBVTZEmk-e7sSnnaAS~3XiW}Xu z28Xz)fB(9|&Sau6tUs{B;Ghp0+`el{=dw^>yHI??n`lmAfngHLxdu8&%r>pS8U8&i zSNBPbH}`o=vXEJPIALgsTItcTMFB!cuAss}S!Q|?lKClfoL0~n^v2=KIR_Cblo3Ux zFjXTYQfG4xBQIq_j_u9XQ+*Erf8Bx|-v)hNpp4`Bed$21KWgWg$w=p1rv?i|6Ec!9 zc-y zn6{_*?C0RXkaMylzDp*5K~rPV}XPCB#-dKE#8&53Sy z1KyR{2iF)zS8Q1>H%X<9trN84lrsUy1t8&~Pw73d6BT2Z7>~7qX714bs)ia$)syX* z)@;X{RCnf7lHfDE;gC7J!+i(BE7?Jr6SPIqx7#^J`p8GvG23>f$dp9>3X9mvEk?Y+r>hH?>TQBaff7HNC zEY+1LQMzX)Zu;*Y#w5(diuw(ozS5s7!eo%5_0m#KUHpREyn>v3E1KPOs6(U)&O&l1 zy$fPNZ%+ASQbZej!P!}U@}La9+FsbtmaWKxes8bR(Kpa!uV5@?_Ly7}E!K6*u?eOUK;wTti_&Tn}&S@V17C68svCJ9(+(%jcm+ zkFx^RY#IU%lJQgJTw5d8GV)7}{0Vu=8bk1IiJI)b#l0PB1E{X?@JIUzMibZ71<5cTiusTfkK*Bt^vQ3VXd|(T z=RK@qGif6d_XBU18K_fhrhi3C1|HhC$k0ZA`b#&;G ze=%cQk8-AUMZ}X2g4f2T*pe6&;%`22()xqN%@#KdJs)DC(n4o__XJ7z9hUY7Xt}9) zlgf38SB0u48|qx3DeJgmR9@=0oqiSxljZ_TrCXL`D~;naa1Md6+?TY443|wdf@M&% zaU;DjMl~#)2bY!TpsMTL{?2DEyh9)}O%_tDI;Y1su}>IcN~S)$j=ahS z^@ofFVpOfc9yz8it~vycHSsjXPtLip7xs7u+l#+1^0mn!grSZB_J`o?L}_}NEWg8* zzla|zp9*xRHF&pQdfD;=p?RSYJsWDtQV&t@X6{|yF=kk4qqp~?BgYQt!eI3QKn1>e7s zH{4t0=#q@%CbLp7{dQN!bV3;HFu$tzXlI{Ia*vxd;Hp9+95nM9qg!gU^%lmR#y4^^ z9gF*xN}#9_;g;{f>aCsk)w4#G|6(&^yW@V(aB=j#@x4;~eSRLO2>nu33RETBcmO?k z5>7snd&25)O5sT2=be%D#WnV&WASUT$HabVwKpIzo)r+z$(~Xh&@Uo`Xhu8pmzT=`kT8}Jd5gM-8k}H+_@Mb+S#30uivtiWJx1--X!tO80ZxZEob!=5W7OVVdxe|!bd^@h*~pY@ z!y#542>$94t~?=F{I*WH&a)iZ=UHcxS{*|DoKVH|su${X7x*D6p#pUmxhSPGCYVB?knMzhPqS=+q^fw%^V zt%+Oqq41svX!7nfb0nxZv&d*Oh~WnUv`YitbJiwjUSolEsLgY&?A?mhc@6k|(#?0j z8uWzJm$vCoKB|Q*Opdy2ec`!-W9;*v^s@6{{(Z+WkFRq@MJ{33ItMLb>F;t&t0aBz zdE#oe_nn8ig=$)Re*uy8tpbs%I}W|u{Y4nZi zkq09SaAbphMK=8WI1+mf-+w**oB!S2IAXCe+g(?|Bc7CoJ?~mpj8( zXR;dS;SW2ex%E2lcSdFXkQOPCvwP&B>rhI&_C&0ec1P=&Q=G0UFehUc<@R{ws2k|* zfr}@eAijrkU45Muxd2$W#K1Y&^{Gr~5S6JHQtH+0crkiieer@7#z-f1mIFRtBnbiR zW0(Bm$unkS#JCj{sLYX9rylqH`}CB052J7sgo)!ZO7XS?W*;X>OMWie+V#*AZzwQ& zKNoN#N`GBMawmlfugmy-zK_SGCt$ZAD@DVz{1uD*@V21*sq`ERp*~f}$#1=RL8&$) zMc@8%AaC2pO6Y|7u@{<~B!nD%Vb9E6#MP&GEd1IQ=@>`xn}z9;<1EWiq+j13k_i>N z#NJW3$jizRL@2RC(T{*$3~^B5h1?3X4&^S1atNZms-(_zN|J!Ox+hR7lGuhtAcH(#T-P(ZolULqqxyq`7Ywc&5q1~>xIs0_6KL$qANz2=BTBIJc;U^j zyF2%!;Y3xPo=~+Q=Lh*w8jYaAYSBS7JDobOm6`ra+yYQ{LC=%rtnl6MfXsBw$)(N( zX)P_3ss;vQX2bceGL9u3jjShNK~3+t->c|*)ARLEOsZ3*8*Vg0d4rFu+dok4BFAG+ zX0QE|@iJ+_7~44o+`g}fYeN4gNktrBb&$`uJt%Q+QS-(-Jh2nPOcBU(18u9;a#pIN z8-oIvFkZkvJQc87!{9emq-*NJvJ8ito^wEq+F&3wI*@fMlNB4=#bu8&taiQ;~r{kkj)=@bD zi0Zn;Qoa@S*rt}1kSfz|%YTLR!Kuq6NO8QmI8WmJs*`yv;PK`XqalxfjzS2<+p_B) z>AzMlVcB4lhgVE_MPqKxcDKJtzesGL1~(i7>xg+MsJif8FMDZcJ;NQRXC1QtKxFvt zPYBPV(KXTo4*S~KovHY^ZX?(|;{i2<7yA$+BcEuz zDp6B*0)z;4yV?WE0k}ZgW|4x9Y~QB+r2WTgV+`$8 zC-vFv7&7)RrnYI4<*+t@1RH?sce_UFD)~_X<3ta@Xw0r!-QCx79=12b@iHVx$H#{= ztuC~shONc{>~ysnEGMk2>HFnzR(0?assblC5mRnl_bTTa#>Ub) zc95Q+vaK;bmcMbCusWn;iaf9yv^5Y!ya_-Wp{ucfVEtt*LZOysQujd-M z`bQlTES3<_sJx!yUgL7pC$ZS(&_CQc&}Ayf3j)mFFFc_1@$ssT>f$##mPI`)V^H@pI#bte=SHC7+`d}4cd#MxG6&5 zW|`zUqMLa!zr719kB>;f+O468GRCye*AWxGW?h0YMU{xGG+-z=1Yg}+&28vuu6=zv zy=t8yxnbWkE?2gaUf6Mxktzo@F#NV!Qm>>GD~6`3)aQK?!3;*mfljK|P0iE}_pn#5 zYSKmbLe9Bbsrl{ckl*!r&|I0)@QPGpPdxI;2i1Dp-7)=vn9p}d!W?x4@_^!c9~aqR z@VP4oiF8EKrJtlqAy({`fDoKAxj^%wgbr)qQZzY7*0^D+OM^kPE^B(FIO>Aq3Ziz5 zfyeWI33p|z-_p1j$;}-p67F`W2W5)}m21ZSlW8hv|9n&U<%g5gf5@%k2o>ISv}D

zhAmE+L-l2M7N~$5{?Y*lSkp==v$_ zRj4;xsQbn2UZIaP7KkX7e~k_7;@W*XaH<>a!RX&trby|!n?-I>J%qN99eukoS?&@m zTA?&A6?pa}Fjy$E$&Z|4^T_HN1G5iV^flA%aFttLX&ciecb!;ZZH^JBxYOo?&6{jx z-HQCDeRZbv_3Ak_sbUQdUoWYQ_Nk}evQtdm#;)(4wuH*%raLOrZrPEJ>_iV(AB%~y z+uWa8l0dEw1MuUqPJaJ*k=aB?j`HoJwBERWvEl_mZjmoM#Q@tO9Xef$Q$t+o44ejB z|850R_Ti(w2*<5^)Fql}>e6`nw!rDg`1#^@w_WbKAcBR53?3P}=#mw}LcExmL$bSt z`?AW~8-F$_$5~s;R6|W=3UltQ5;Rh7k2~+`+?P$&haQJEv^LSQvB$&w>6=s^tI1kA zPcTcGj?GJ}uPxTD5sM0BD@5KA>-Dacts)aLHpA8Ne&FLyYEk%;Bf9UfSHTX3U_Ak} zN)6@o?k2b!-y0!$Lk;+K5qs{1e}qr>mAtNXsg4ON2Xt)v*x)CA4d3#WH4|)18owLp zh8^XTwX#NhqptAuzgkONF6T>Tuu}Pkz0#rCE|Wr1GCNVXjLsS}i$w?uZVa`4(_3s| z%8JPw0rWNWfmdlwa?s8VpbML?j<>T9K{e@?Z#6}ri)KQnS>Td1hz7)Hf6U-0_tk1U zD|&5?V1Iuf8~xf?E;dcX97YOu`_}kt8iEe)zsRSG!-6Aq4|90NPVqMm06G1(X+Cw( zcPk>O3H4Uv?cQhFlDDhqBYhttF413;1v$+;sl4Xx&dwiVL>#;d=|(d2OE_Q{vt25z z63Gtzj5n!M(P{*gM%#saU;RLcV2!t5C#{Hq{Lz|tQMp7FV)t%p2f^GeRz*=a1K+Kf z|A6oOAtIMjQqUR7UC74;GOC)-EJ~6=eRAfOO9Rs=k#6ykU)Zi+xo>v#$x(9E*)k75 zD6{wC=W^8=Mm%B<2(-0RTct<~vM%|LEcILOp4O!6)dLlIPP1+t$O#_IRMl^gM@r!v)@$t`60^yVIh{EP|#1gptcx;Tf!2M7zU?% z4wVOADppmHm+|=D@R8Yv^E1Z6saq}gY*9jMctlqHw9-aoTt%imF|4XYGgLLVOFqPEAcW-PncERxDu?L-N)hv}| zEipsNI)t~597lS&MbbQ1SJ&aLju?Qb(%`v*h^nY;)Svr4hFL0g8DY$goEv*eYfo98 z*>STKLz0qE<&2Gqv)@)q?)1zOcO@@i%OWfL*vZutK@)A5Q6c7@bB`N2D)8%zsD&%w zXEe^%YcAPehHLtFKh~6cuL7(PZJq^MWPs=dv7rpk4+kRQ0S=l371&=jvLVVud6BIv zqJn29fy{N}wZo_cwql+O@ zDyM3T)9zuGyfCO{bngYytZKNCf0Hiqe8Sb*DQg=u{G@s$j(yYPMJ+{5Z*7+`&{nMQ z{?t-DXb=U_R%6dQGMoGEgbtU;tYLqxZ_eA5<}D2*+66sO8x}@bid@u$hQo`}lndO} z78Uz>p+(6pj_2`+Hw;+yp0}ghe>n=66Ks?8)GRG9J9f-HiXeExbND#UbQ8-Sxob{{ zN4I@kr69Q?lL(Z2xP7grcRYl%T%QsJ5gIzni9+D!7+nuYRhO#`;TkXZlwOW${>{rA z-qI?|C|C%ascd1wN0ul)u>#%m>pk0@pg9lgf%%<*16`qN|EMi- zoQ5J2JeH$H$1ZH)ys_MP*Ok+3a&!KZU^!dYyj^om-08wGxS0yFZag6DLsTNSz4>1fz_Z=_euokYS9gf-F zm}cv;eP&rQT%;HK{nrLG3GV}zQ^>y_SdFy=qoD#fAbb5oy63!-mF(oBEa^Xd9Ow{p zTz_cC?RnTaP&Mn>;cyojkB01ad(?ax4T6tBXnI{8D=BaeU34O>rs24hKbjQ)KbS(R z6k5}7qgRukeoftN^n#TEewP4lbVJ0C|HYBj2>+_2u+03Q#Y*>QELEV4~eI^#OcrmI?Sh!Np!@XeAa+j`dQU$R?x}C z6UYg&~Rq`rXJFC}L{mj^Wd5qzkH@9(OtG>{+EU8r{;#x2DsEo+|F z$wDFB@9wBJPT_K4w1@<)&qTNO`bo%hpcnNa3?}{!%y22YGzn1{Ex$=YP?HNQo*cVd z$9iIcD0er(WuX-OP%VCzI#8nHMWXd*f0I7_uF8JvKU?fxE6k_@Wz-HOIpmDebBa(xZ z#ez!*I?m^MOUyj!)RF8`Yqh_vM6v@(o(t#xS*ajnG!854B#$Q4pUGacvK5cU_|#NU z_dw={e@pO0MFx~q<9KNP%uMxWyz0hMd>|?een=OEYSvar2n& z+otC6oA>wSEH!Dr1<*>fUUD2S7x}K_DptygzbO2ce>gqB^oSdrt`K`ypll`=7|tZx z$nP6U2>;_UU*<&IvxaN&StvKgPEy_OJmL|WK*aJ>WClz~Sr;R`nKtjvE7PlKchTwq zwR9kGGy(sxR1ZZ@9B|xzoC7oO=l7NVWetqPFtwfxy~xWt#0VzH5|0w{rmw*ZVAA;pIQT5n`jDQiR=G+;I8jjVj~_fO%^{{Kbc{5>xv3-oe{<~=~; zyn_6?;qO?0R+;U#*OP7sXmb3au>)EhT`M&mSu=t}*NTRUm3) z<$_ejZ?!nJdJcqO{VnMcS?fpiwC3e$|#wJ>$Ye)iu z;go7kqKm+9A7N~8N6+Os=3)vtard{!$jtuLt|PoFe#J4kV3pM5un@n7Sx$j4P>$j0 z_o3=&?w033yNUA$*X@j~!+)VSN3lY$9Os%56@ya7>y*wt*)N=2C6zRkeD2Ih)iG`f zMC)}9NKBltds18`tGnn7A-@NL6=V-xZ#Ksm@VKNzm|x}8aJKgNJxjy6N&Z~8#xa$X zVyeVEMootjRnu*AzDd?JR&4KHw2}Q;Y@NKy-GV~jRDrldi#lnMOTJuu?JV_FqTuCL zM8eE;h5X0nUq>6o%CcpUu?2INTyT#bf4hPYqnjAQbmjQ?4{jP0XKE)0f(b416 zx`mBb-Xe-k>M2w1M91TTkn1H8Hw?yaRFKpJ+nt5*ljc`}b$$%%ZKJN+c1{dyDV5)Z z?OI3CCe|p`MNH2xn(|a4QdhaX*QLV^KPK#WPV&h67b2=&m7ZhNJt+;EONIKUnO&^& z%<=<5P~bN?bu$6bkP_Vi*(^`;KVZJ_2h{?|jz)++cw<3JY!A|Suxi@%fs!C0lx3Eq ze)Zxet=R#lnx+?-sUv%2#W!NtSQLp`YNiR+mgDx)(ONNy5b~icqWezEb!S?K^Nu4Z z+C$y=Fx>wlE)*UnX9&ZKM|>KPT%W;hIKLbiwNx1l+DmL@?G#gfeLLc(!C+T(MN8E& z;K66VXC2o_2zF%oua5i{QT5?zhP_oU$4$Oc^8)k;A4$c3pJ)|w+PoiAA_NS-lf9r~3FwK!b_#x8B5&bo9R)70W2n>JN_BY(bj7;Pyl0jV zQ4Z_S_MpC1-ECo&1szVRMg0e)&DGb@qkZ4zZ#OThC6c8yZriIY@4L6;H&!kgloMm* zngZ~XGaZo?uWnU3FC0pcPcv#0&LCWbUN4?C)LL4|DBt-n#JKsS-{%3BPD@w>d$jI|!m98@L~Fi9Jr|pjKJO)pUgU7Am*QJQZ}+~vKw;&EfEsnXOI%Re0Pi%n z0#s#D@ARuw;IH!(e>&#cyss<3TT7^e@Ex^G|K~j~Cl_#%+fjz{!Y5d;vU6y&LVs#2 z0lXjq(mw3NMls=>)O7p#mGw2D0MD6fIXTRb&hp}uw_5bV;xEq z6BKO=yh1L)cW?BFC{K=1Ke6eH?p%7JeTYLlU3Jf#@$l^e9X!5XfeWFRTK0{p#zdDu zwOD@pGiLomaYbSx;EcYXt}m%btQss~<_qmDx!e&pMW8p>g} zY#wYEv~_L~a>&Xl~{Vl7m^RgRn2W&;*JCN-6EJ>-t#JqIXTs2b%Xuua)dE%WOaG z%jovBubH3=;ls-;U@s^bB@&6umJRt_$aiy&bpA;F)|P6BN~;TetCwSF)tzkT%Joqn z8>}Qn&rK6aW8+?z&WL3jY%UGEW{)r1hux{%^pm*O>SkBYq>J1CqKfHX*@_H)^ELMU5jh?ndU(lavvt8xaW( zFsFZ8uuy!99v6u(+<5`uA+)rql$+pR7|PM5DDA#|{#Bgd16x zYfQoX48{A|=IUs%==YDZX36F{D~1bB{otkoM=Vjk1zA1TYURe@jr0xO$$IH@}N%mr#38_2uY z2ZH$z?h2xxHo(VwsU?ck=2iY+LY8~mD8_X=HFesu|8OYbgu));XHY7OhR)*$!QzkQ3?d z0ToqtQIDD^&Tn0^clX6LRN4JA9He23TU_JHF%OhM5zU>U1^)HNr7m||N~~lKLQxJz zDq!%{g4(+tjr_Ch$hYcd6=c<7RQH>>-!*v%dZl`;unLl4!_0}_5tNNES?!UvYYj66 zq%;7xKBu2$s}d7}dH>=3P12)V2(EqP>47peq*WLh6X+uQ@`s#2nNP$N!+ectz)w|@ z=liuDCAER1WOQmYSYPp&%*>Ll={IQRt%Dd&up*a0wvToMC}u+EL>^D=5_eW(v9zh6 z?Zhxx1aH_dFPz2hRp6w;EZl@Ky{KRCFWPLlpPry!M8(3Asp@q#W01(Ve9;v4$E$eOH1qgCn%&hYxr4Suk^2sZt0Fc=_46#UBjhpK_(lNl3`j!~NC_ z=xae54(fkL54sj2KJ=ba%#F0*J-&nSjxIo{m{_zto*_`M-72fc&6M{y5l2)IKNQaG z^*ze1^8iDiLM+(t8gE=5Sfa$GEF`C!;^lqN8%XFQ30+qe^O98YO;B_+x-6UGjrT9f# zH}t3tVwU8EdtTt0F&H~4Ru#ph3t!UaQ)L7;wE~=O6*5Cgn$QgZW1tmVFjqSG&v=>~ zI2twkYmEOu$+zq~h0Sv>5`Hxe+wKPqX-W!1x`qq!HOO+rF+jdPg*UgL);DoyxP&qQ zV>8EvLF%FQiMm^GV;i&}zMnZA;sTL@-LW7lIgh^4AS7wCCiyU%;8}=39i)$B1U3MeNXdWOu7X5DpLjzJ!xzB26VP(|Y5Q0wW0|XX%Wskl^*?BNBg0j3SH|X*4(50MBGRBpW#`d!CVUS}7H-l(UNs1{oq> zd0+tcBt-&NAFivt{y2KXd zE$n`rMqhsX`KP_dKi*ZN*2yV7qNJWl>uwrqvS}OfY#uCCp;fQ~S~)o5eEu7uzNgCr z=$UE5>vh#|RHqOLAk2Ly0_u=QDr`_a&VC@&uYGAu&$ZefdZ!*LnQHQGb``&WxFynK=m(x=7> zDe9k8$9@drN-tV(z^tFcofLj$Oe~F_F(pdWyS2gx*op~cN8ANEiBy=G3l$|giYoFM zcK`(Gk2geQ&e*N&<$}+_vv*35-6qv|ECa7SK#&b=mlb>bvXm&z9#-iGNDhy5I*9oX zEOIxnV4}P5CFdtR)u~8fQ@vdJXF00F!d&>6^@jw>4sJ*%67PNCi{vBu%&jo4PJ)kh zzSAIgHp3P=X^RAAxWOa;@VRa8>sGhT4-SdA2_uajhkhr7CBk#shL$VEBV+qvYPLpWE@KWA;E#$viet8;k@0LuS>y^T)Nz&O1E#npye> zti*7pz>zN%9un$urra^?FF4Qb@8ot}l9oC_GWe0MtG`)KA~%N_c`+(SKOepJ z;{XQ3=p@81#WkFC91f=jpA0vRw>dCd2K-z;f2N82rS#P96=bj+q9 z<2lpgEU7GBXJV6NcBPb}%0%x0yJ9F4_a*8daF}K5g4DS~8Xi`kf}**~bbFZA`1N6X zXyu<+`hRH;g}Iu=82;SOTpe6QJ z!|S+;B<=78Ie(LCy!sh*lDJ_ys>^@Q^FcI)!Jkk774Pu|JIyFSQXFENZKx@GT@BfBjvV#+^xZ*S-~o+576Br<$gc47nVIn|g4N`;xrZQi>^ zwQ&$F7y7?BY&X^$DxDz54nQnYo33kB`?WYo-}`#2W~ybD&741yhLlX{bK)wTX!hrS zBQ5-fMs+$`ZQXGi@y2;6`u;U=f^RsLAh-97Hy|`-PN0J=!XEV$&ssd5^B;|c%sMis zYqo(b4=4nnA5k9pYn45NsQMPF>_6ylni+pH9OJ31G8c$>oOuZsE`o>sded50ZS&X( zfcZyLv-crTKIz^OE)ZcH@_gP-OaWq7omYC6b{8L5QMx;{YCEP^ri8E;zhCCIwd!WZ z{s+V8q}ehUNgMzL6LHqwh-ZNo`|Td8aN~W>@ybzemy88o;$--W^F5XAcDX2tht*M; zYS{(iBu#kYf_uf|4SBZFn4-=pw}Oa%0StfBr$Mcm{R?i4-fdpo)SA~hNIGgQm$PHi zDRhro_lwc*XYckk89Qu8VNVTsq;@4N35(i(v^L|e)ed4+c68=YE$0iNEC?$HF@D?T zGz|T2@X*kRIlHHz{WXf{oG6%%&Ns_7nU{NzLar3+*cvOL8fZzO*p!|a_tDygsZ$ki zt2~?4a#EQ43h9S&!B@5yMKbuKt`1?H#JG0MwDqHUmIpLK6V}tC9iz|N9C^R;;jC1< zL5hETz_~54ezC@m?ovi!YHuvIab;ZDm?aux3t=&vk)A4OHqm1)q{~j)CbWcR)D-qCwO+Al zyL!|pW^*$WntOk)JQW!WLn$jkoG!lW`Do;y#GcsbF_$j?-&yO3kbR^6a%%xkgOiiu&Gpf4`NAeV-W{7eov+Qw5rwO zKiye_bHDshY0=!f6OTXOUZIXIv4~LvD02B0`-VxUGQ||Kx~DQ((d4`HE)v;IIwY9s z(uL)GPfIP`LJa0`S?clr6& z`Cdt1E5#N<$Ey}>phgm^Hpw^cXEC-_Pd8Slm@8ieZ*CjM80eG99tp*4|rAXs>Nj2hY`&$j>j(UYtjN zy0PpLH7*g1M*wK|AXmq%>20Ov`^};LU!y897vy~)DXf8+Ff6ezm@-?0EJOXPf@Sfu z=O~ntuD$vybgnxkrf>!O$`6c&x34FC-ZGoFHI_BX2Oaj!K0JifV8v|jSsS+q>d8tH ziTNq<+NxSDU=++0XDykXf8oRaiG{+D%@o)X6tWeO)V`-7k0buRdTeQ=jjz zCJy8*1pTi5=}yq3c6D2krK5nkB>HoX*)&rpxuV2-YmT()!M;r11T%zFK1sJzpEj0I z9dB7<;C{hhwPtO@Hi0LKOOs*(DBCq7(n+YXi1~HM67$SWa;~#qI4k?|jyV6UP9Zy( zBy7g9$9@C+P4_6;x2i_#HYialCFjK{&`h97MhK4>LMTv$q8|;@i=8k2<8PcVl})#6 zF8)*-pVV!KX2_q^G#bKj;{x$dZbtqogFs=7R+`3 zey1^{-`1XAJf*xnpS2XfK%08W+cV&8B(>`wDWf;~?3#XSJwNPl3+CSs7{MSxtMnZ4UfA?Q1r(urf85Z!U8ShkZ|HzRo_SE*XaUhfDMPr%bUH z{9#o}2#U-R*_wE!V<6Qnc~MqbP@#vVr>)<9Zd7!@Quns>L%zu~J)<~`54FVfdV_xE zl*=egkgViy&fPEbwwoJEklbU`<=v#iG19cg>d=SJFws{r*Yv%UTto}PSpI0 z_K7hU^mX*?9F5{y?1-fiD2khqin7h`4)xm#GV-cY{FQj(myu@Rk>rRY%H4!udd@Se znIADh$b*ip-tBo#K3O}ers(Tqvz1}iLb6A1!%NLg@sl+7^J(tR-hRiW4}3le>b-QR z1gCmfG4F}=09@j^*kGfS_e+<&d0xH22OtC0f2$4K>!*Omz3LxS#HkSU8?*RHAu*L+ zs7$u33-^nhe|h1xXk_#;4RyHdIf=w%^^|6#uD3bK-o;$m%6j9^`g3&!UpMG;20M0f z4J5J8u&)Q;)2%*aHro=Hp8|9phXNI0XBl_e)%B!{>2Dgqscc3RBze;~>iuJV*U{uw z`*`&uJPY@`t+F@kg^O}1?b~wCqt=%0ovmn3*{P|O8=;Q^{OIr8IZrD48Qshlgcerd zRdb5W;jDZ7uZdmE?eZ>%yK|aoblA}p{K5AhKgT|mnKayd+Y-%^80KHAH;F^6PBSw@ z8-(Uj{^4tHLMzI0(CQK6+S@eiKkZM6odjI2Q|7pi8}%W?-Q0%v!OVP@sVr=?soo&i zMH0n;f1~O5)n>yHSV*y2Jg`3Ir!IO@6=>?)U$ooo#OpE0DGZ_RehbG*lA>25MZnHi zE_-`U@Dq>_Pm)v}@3SCI#hrU_z~2=y`)jDs1lx#}{<{#x-&#Sq9A>T<4|8OU`NEqk?M!j3 zX0tj7o&Lpuf$}1K>-8CR&J&W3H(UI-WhC#m-Wjg36e!o5@X>lu<8G366^suaev(gq z>@I)g6Y4-t>CMT&=s!@^%07tUC@h&ujm;_R3}E1}xh9{4pauEPbagrI=gd(20@H#;6L0 zXun-(VGKgsJt8Jx)+11vtk_b5f7x_6rJ*FBG+G{qeV>)b@ny5alX?ozUd9r1><@Pn zt#T}V`@(77cIm9N&_W|L>2P(-v17ZE$O8bf+VIVhB^$97aZ2#Hc#eMc4^Er85!ckncPb;{M~Qm zs&xAg)m^7-mvs5DR~6yAN4wp`Xq9<77eZW9+Usc^w@=FM#z@>Snd&&SgBsEv?^hgm z=KLDRZ88AQ>h*^QGNMnoGUZ=g-VB|x%+b54zX~4njR0n6wVES(xhL@?N$P$B#wZ2g z1pY3uEtQ&VdS)^tMVa zFsX3R+#@q$490`~Dt}IkAN%X6sJ?GsUM5*>3Z3h9!fAZN{siV=D36g4u{F>VU7 zJ#KH{gJHXwGJkDrwFfko{nNc7>i}7!oZN`5fAjCJiasL}-@NUDPwZ-#=SYR63#D=MLJ=BzXg-i^A+cpUf^|F`y zvpu^>;Qz5sKVjpHNYTgwzVCrGgAo_$O{uG79A4pQjvPIkqdNVJ@+!LS&~i1j5g+W; zX-ggAtDli_Z*7&Xm>2rC>I99 z=4>=bo6|Y@RSXLn5G$6M{s{{?R0C7HIE5y%=Jki38|Hhopf4HsPBo3 zUIo}jF+pn|<)}xiq`+qrTEE5VUajy)1^-u>%h5lBxf@^{g`>Bp5VO#EXL7f zhJSW$;U*o!y!1i+#;*OrrG=uglx841WG^vs7DDv6D9o(c>C$sXI{@JVP;@bR*xL zMp*x*KILs)S~g4UC~xl}%Lmgkd1IN2ea2X^u!nNxJp~rB_SU!*KzRn+_Sj=B_# zZzsOU?i;#f$o&A$k5Na5Zx=mQ^n098gtClI*h*Qy*qm zC_NlzaMcU$kC1*SDT8g;Hga~M0_5VBkn7j8rA8K#y zt_rt%T!K7~+znPE+ox^E>-25vj%O7W{Aq3ue;1iwZdB*ejW^7(}$wM=azwi zIk{AiVjrW8UN$~^8Y|;vu9D=l|Cn2QE;J!Mh1UYOMospW)$FThH%c17U>EvG-;7u7 z@ImPz7{%CHIsdQQ|69+pIdu!R|4q)ntZvqY=9^`V+3GifgDGg3BAfR4oDBsG#J(?8Fl2x7mlI!lTd)}N)H>S6?9!b~K8S8R=W4|(GzhHQ|C ziEhr3a4K33tb^jl;De_GI`slkRI*O_3X$kzPl*-u3BWt{K>Se9R{${vP$EdrSz z0OvKu#Kk^Oa(LS~F#<;vCZdEa2b9HuuUCPxxKNoGQU4OGl98$@(brAX`X#;W_rIoF@pXjFelYAq z+o4{m8vAQ#e1DGxGozg0_4TtQo={{LQ3A5 zePD5`Pz|nV=d3!NWZghng1pr@w<|VOZ3?Ma7u?ZCmZkZhg|q(j)o&O=`s%~s;4|jJ zy1mTec`hPoEEX!Dm!0jXy?6c?!1Jvau7IUoaEmWYpz8EA@WEXUIkWvo+71mLICj?M z>+Hf+2eyV%O;q}>0DV(&ibkFz-Q0f#ORt_W{J(Ie^8MkE#uB=qs&(Dllmot|mPTw1 z*6L9>(_%c#=@46=^NR}$!LmJBpFyG+yc{sY0vnF=kq4sZZbCK;YxXIKZ2Wl8c*P+^+@!e)SMGoqfl2= zy7h=SN$FdJZ9xRBD+}f?+jK387Qb*Lrr0zyPf+yfb>rVow*JPC`MGqTw54b;PjbI1 z<0wAW#I-rU)Lpx!#V76$DpEAr6ZrAi+_x_Zg5V%0opL+kk1Xg7wBUNNNeN&aBHjLy zX6%iK)Ng|g#NeW1{x9m@GAORLYZrWiCJ-P5cMI-Lqrro_ySuw5XmEE64#C~s-K}wV z8rJ~RkmvorIcKU)oijhCYJSm0``&%udtI`w^^Okr)oh{B&+u$GSfSUAzm7uRc9;EU zmp1YRq7N6HBSfsdWR#YoWde$Z(pS30rFi4^0PO@7gqO zsSaX&>f`6{DfK&y0hbk6m`Lw!G7A0{83RuI@ARPXt|?ZS3h7?YPmk>10e&&t|0g8s z|0_4hTi+e{Q;J1E2!Q{9jfDSSyi5z(F~jU)?-8|0Ikn$?OcEh@cYS}K@-h1R1j1z# zagy|Xk7HEpSWj0e;cKQx#;9nWHj>vWl_IFVn|fEc-?!GuxGv~@fh2{+!R_h%n1>?? z8-93<-_NUFO**m!5CZc3DR|h|5E(~r?udo3lMurZcMBZ+Ek}>y`+5X~c;TesVO}nR zVr_;T>ZmVud|t@d;J%IbZFTB~qQ$VU$6?u4urNqAJQfy5x1EM8r_qpqC!w05WGN^C zEaIK4`D-eX3^{X6@Wr(N3QfxTACk=((KND$p^fyb@=;6#;rcA^y4!(R1>gu%eF!E$x-bf#A9Q*;mGcr`&u!in?%2+e24De z4`~FXB$P$5xa$3vB?go0VF=BkaX@>lVwLX(2)6Yc>{(O#dE3#_>?}?k^)?X=pP#6W zv+8Ym^77#fXVwSaJ`5@j{K-NpfWY(ObU$|qF4^+Rei{cjWQ4+_&g;5Iv&8ki?Y-ZfoZ5g`Wp=Hohmr<_5 z-npRUC&GAj;M({xJOyoGzRTmwyuf20*k~iW8vq!9D0-aR{71)n9Io(Yw&ODvCMgAN zGqzQ;3ZaDqJP-)R7a-UZ(Qa@~du`&l58AK-j&8(zD!?@qUegnD7U+wmr3BU%Q?&aK z-^eLA-wVbuXZu4R?9!6-etu!rluS2!WF1V<)^vW>I?gr4l7Vm{mnt9d05EU{>%P~5 znVud|x3!U#v1N*JHQgUdX$(%$;md?SWB?nM1p)(!Ze@en9x20Yiu`DS6X&Y5ESo#E zx&I4CL{&ws+`klI9M{GLTojkwRZS7IY=vw%n@8AcPca+VIG`rWc{DSzV)3?Wh(gnnc8O9J00PL+voW?VQ(xJcV23+pookHpW){Sv1fDp2=~T)_b7h!jyn>E@D>8kJ%s3t|4YI>ZDS2n|JyvbMFU%~wC1T#NfTTat4= zBFsNbn9UO!q$M4b2SAgwa@W9bH6J6FqyA5+ys$p((5hgL>O5;ZsvXY_63O}E#KQVA zh+ikfr~PmLRxo5;#z(`4*jPrayQFxdAf!OCXZG|8Tuc8VS969tEm_D*YH^#)^6rlo zJr&&TK^0$ZL{)lJb_%bMh_5*atercijK3xp+Hg{CaJHN3opdX@ZujCpj^`SFoG%IPil$+j) zd|*C&r&ZIdPVVuC=|4KT<-7+FMzqEQ66sZW-k-kWoM>2s&5=H^n9c?0-oV!Ll8?aN zP?v9s>HSDy=QeHq{QCpvdbr4)cOm@MAATpKkEQ|&<%mD_#OM}u(<#2$7PMagVmDO50F`EuYE{ z0DHpj8wIesEd2r=?fE)^WwJQFQkN$`eK929&%wsAry`z-g%D0zy2W>(W)^o*OCi2$ z4X#LK8k+}nA0BN&edd!PE%$4qdJpsDKTqdMG_A-o6iCYIWFlOx6M?*9BjVH!`f!{I z)189)V|FtTTGB6BD`9~9yFab5XW>I#z)d?2C6$tGIX>_ZR69LmHzN3#hw0tz_2I1| zS6PBa;XpnkO|t_ZiGSWUv7i6XoU-aFa8b+E{;_XjAqc#Pi2QqM-lp=}B5e@fx^z_A|t^~DBsS2+KU2YKH}{6CBj0pA((zqnF2A3zF#T~U$+ z5AAK&3wr7%BKzfBNKUV5ojHH(M@qN*cA{&8nu;F~aKP{TMHd{ld72R7{Ugml2HAY6 z%O`o^+`jf==FxQGL?lum};NZKpf5YC=aAmUmUXYkjGS%?w zEmlF&CT>L|)A@aoG~*~>(n%!L@%gcvq`Jae5Pe@%Yyj;M8z|dHi~>jg)}dhd9tT^n z51i|8S^*m*NblIiocD?O@tB4M^(_Z%Ac`D@U31r(v2$6^&um9?|Jr>-YyN$|5o7Fm zI&)4RIG_d0O}LKdf+b#e)RUTD`VsgK6pz*O$*ST4X-ruq!rPl~(xo2VQa&#@>Px-p z(>~}m$BUa_@#0ys;`-+;r2fg0-`J~%(*R^dFFJZYK?wYt^hy25?DeS?G`uwVGd$GK ztv1lQ%Pd$P5w1M?Pyb$nqt6H7+Ki16{vI$+C&Z&CHRC0?tE zV%vESbaJG30$SWZP!wA8UVeBOikD4)#bA)?|3klbj=1?XMR~K%5*4dX+?Vmn(kbnr z3vEzv>C_xb>yW>r{O>g@p&3C^H&vS38zTH(T;EK%}C!;^f$Kxzx_g6H>&Em!)}z~^D=Vq-k3v)IMwR)E6F2u=6On$ zP`4Ck1vO1TK8~??)K3L@xtSmRfgZ)EIcV&s{d0)|XH6T>7E#3MpEu5b)tc9T6!) zN!#%=3})N7Z1l;-b{T<0XmRAX75fVwOU@*p#71qJqiFR^1GlS0+Tu(>3%?T{#f6)# z1ZasDngo}ig`Yw3z2nq*B>HNvi$;zc9T!YM=E^|e?%h$dACEcii@Y;WN6g@(22YC2 z{3lS1>i&8?FZ);Fr>5wkIAuPBHu#kbi?{iX${k5$G6Me&G??{vYNh?Z4P%f&5Iko? zl{RGNEZHB?2v1`a*ZxlrHj10Nq47I76LX^SqJP!73^v%3^hQ-4(UJdXh&>{+B5_LS zgo1H?f5%Y{klOWyR5S0ryeu^n+!g+2_W485w!p6z)H_CNw2IFaU-VeB;@4H)ml{ur%cf)<_7Ha`%zw=D z4=`3Yt*T0)X(`Z&(<=hwhkkc53|2d$agrE3K=& z%qoacbWjec2dW%J`<&0kgGwIOvpewvnVa0n&L(D(O(KZ2yWsnwlbeRR(Nb^x+J?9}? z)bbHUVWvj4LUVY}-JsTju741H!``IvkQn=PpQ4)}S82#lKpX&(PGMx2mh^koXRlp0 zz<1(gwHdXx?et7<%lp(}gj*ye#w1Lt*B`$1wwXSE>ADK{JI;2yVU4N9g;ilqV{x?2 zp^$>phg}{P6w+qy$)Gus9@@}}6S%-ReNlfcSP31U)4pqxalxNy!a7d z15gc=vH^osd4(KQa19?U%u;$-$ z@z_!MHu=&9P0Y=qUjReWDuJAkBwanw8g>*kqimL=;S`n3;e{MYA>?((vBN9|D}MGF z1M^}wLXp{P<$aI=UI0S#N|b>#Z7f{3QV|!m@h2`>W?Ce9;JhyW4rTo(r68$+ia~8@L2%2B zG*Vpm2rDhaq*o$p&2mC`eB|{n>)@NX^A;;@Lk#0k+o-_4&?vsD4@T8ODGA9GWmf$Z zPrNN_yK23$OqN~exMx)+7(i#GamJ#m14Jms{(1?si@l-D2_Q(!S)$tP693L zYy7T?t49j>k(w@3`ss|Y3?0Z346`B5+vB7aQ2UPoOamG>gdB2-Km?o{))d!+yu=wK z@UHN>NK{)~&zJmu2SqW0k7$^d7w#vf*Bhh&#P@HHh{1*R2R{(gKr8}M;Psx$Fa7U` z-kuw?_5JNxG=p|u!EX`9n6S-3X!!bI3H$&5yiAu+T6In?kd%C(qd~TLD1r{ax^Iie z4E_@OeqRW~kZEvBg~c4!i2V2BA!Z0-Q4+gL^i*f7|1K@^wGgnQOLOh__vg zEbk0yM`h@XS#X*ZOk!(CzmMJNaiT}K=ep?i!D{A=Il9vitK4RBgy@1lFkc#GI?G8EI z@qg;n)Qh8Thd9wNS8hUkTs=fidkYTx0*#)6I++}l=DX}P=`F<9-j$F01$JCm=<%V9 zm~2{2qIuKc-gbJ(9_O?X%u1Kyedyeh*xgeknH#E7eP|hoWRrMb^jNY;;n9nhAhPA* zY}IPoYKzfo8hsy3leupM!niN@Wo|gi#ufJP5ev&jXW+9{}c_I9XXW#~~O zk=hC0s%lz0hb2AFg*g2nV5lzvF)xU$x%Vmt`@Cp(yEs7n0LNZireNw})A#Fx-WVHY z@6vTGR7wV2-CoQR$W&fDbG6uU;xSS^9*2@30S)8}vv7jk- z@eZ#`D2p;kewiseW18r)$0t=8=9WKwm2NM6wOEnmcjJWXVNaXFgLR&@SkaWD`fxx> zM#h@#dEL?$x>J>~84vYsZne_lg<<+PHI8N-N*`w%-PThSkNBy*8|uRZm%s$OpsAWB z)kTjKGJzuw>E{`HDx?tw>0|i>8*3%bom};{A5Fm&IJI;_8MWaQJSM^!iHa1Z=Df9a za$9jb1eDr%{lk6~mz#-^6f2qLu`3{Jxdf1+1f}HhO>unhDvJH8sI?L3u|3)3E+OJ8ZisWAUG9NmV(_j%APa7dkTb?;pNm)-&aO{z$ z5l1_w>$^gWQ|q)D*FC6`NR6{LSx<7q{N=?Q=RMCUN(uP#f^$d#E`)jfcgK_0IgkC1 z6frz8!Yk0NMV`L^nB%P2G8)!CLbkFS|FpTWcAd3()UIqph~)dShb7;W?e?wUHh91b z;$9&P=uwTpEZ;Oix$0b7+i`UQp(<)o9hTsUx=sPC9c0MqnyS$My%vh-Zc~$$%Z+5l zgwVYZ<#zsaXDR{!$-`3Sr>#m${A7UG;E*YFqv|9h?7pb#Yy-O-U2o90bov2SdX*2h zZmEVPO250Sz>49%m$w-gGd2}Pb)16{mfEBs#08&Y8Q{t!+h=S@UzOF3Qn|re`*Kxsc^TG`>=we;w%z+W6{RFP*o9k?WvfwB5ANa99eCEgF0DpOT|#%`H= zIRS3El8qy>HfMM*=={`ZfxG_pyZ6Wq^)RtX_VVdI%QnqWV6#-agd+?bTu)ulvG#~s{-1TT|VSxvfAF6$7ptTND<(eZ8YCfx@~5J@{j^!Ga-tx z)1|uivbMVPoR{_QJH=0CxjS@%bxV{Wm+LK@#zp(qZzr}jCi-Kol84Lt-l}GM^bj}l*!u9h7P}8QXTn&# zVU1eReNfe5C13miO3e&9xR948Koia_Avh@a@9e`fStA{F4)dFZGM@Oq3GNkwPExLk z1l8;QQL>k&m%A{+WPg_*nWHGeQhCUsjV}`Vt7!q^Wn-mKl^0$u0J-~Ae&hxxFO886 z4TWyshz*W}*spMdWE)59IHSpt54c$vjcm3qa&Hr*vt3%x$T0YoI&GkngSqRrCv3*f zo7TooSj{C@Rpp84bHOo!GPkZ{huHaZvT_5NiTUkZA9WD?NyU+1ABzqD$XuN53RB<0 zwQdxYKS>Y!O@=lGBDg}mmyTs3#knu0l4~x&BHhB)1I@=qk|im)+ib-k5Jb*;04L_q zAEO*pV4AY4BOz4)qSrsRBdKP7B8X}3N&rm)ZtmdA~ z6=h{=bSj}n_S)b>x5MZ4Ii3BwfXV%c8Ho}L+yd)6g!eenG8ERgyFdP2)T(`Q=w6ab z`UY;{vn>F)`91hgM#ORBb@o%ukKHRGU{iK!FxjliV!?`mZ}liFNDpc1H0FfH7;v|- zEa!K6?6j~*!?%W9&1&}f@0I~0_B9E{A`nl>gZpf7_2UZw_IJHov>V1X&fWTgtgpz^ z?tLg&W&r&lDiJN9H#W=#$X@5Gz?;)KcUCsPvv$(?6x_PeD$j##VL%1& zggsq1H=xj>xdQ)royXroefk$H!Q5;&EtvvS2}dJW!}v2ZZJ}qDu;#?HZnF{lZ=_(y zTD3Dg7ntFO+=NYg@vWIG!GdohMA<6glQ!F5r~#K-T$gpo*PVdeI?f??ezDG_ z8XV(QB{k`|mpM624G}W!Ev%LcTKw6!We*znm}ppuaFlonBBPhkz-5rTJqTa;*3WQF zBcFa+?(t&bbR1s#;Mc2CW(@Cp5E@yQMl=B=Qmi+DdIPfVAI{5-#_}Km|n2ZlD zA2sKu_rQ}&9WgK$-ENs&8H|sdy)an>`^+4uqxUR2LI5mru>o#4X-Q`zoQ7*64&fDO zyWRCx8Em+K;*SxP!ULhsNP4)t$-+r?*$~x2*lG^!*#`E`S03-fu__SO{6+P7!m-V3 z5QHy9f0>1QR$U)p*F<9HXxV;d1rW%5Ebrt7)-MYv;;|8R-vp9gxrj(yBUp^e&5QL%S9zxbwX+QlExU;OAvB?YAE8 z8|rtLt@jT+7t#vu;Av*iz`Kx0ZN^rRvFzc#M>Xl?uY_$4#6%g%jJjAysc5Xx+b?M} zGQE=dE(}$nE@LCKD63VH@@e3*#3>u0l**U(@RP~r=1wPv73IaairJ$@a&VQPF{&2{ z6I94N_?%e&g%nS3kOJ%Jd7(`+qR#b0jX?t6iZQP1vj^pX?u;7)6TT3;0 z*or*WtqUFj2wpRMjNRtEz7fH5x{twXh?E1GrhXEwT14>G!#EoZ<`QTI1U#8(c zTZZ4E!w_71Mfzt z=Cbgyou!XD#@6P1G7)cBsxF&8lAt)JP7<8|PY& zzl7jPZX`xhtFCm`X}u_b!dZ{_+FqbmuAosGs|hXk z^_AGSTaM6~gd!cdwm!pO$w<=Gd=T0QPvv z5DV6^vu()N*NT?H<6E5QH*bOA#`f#q7j6@>+JsB%aF(2ylKka<9WA?2Ec&w=*-L0v zOV=TP#6+Tn+Y3egHn1i!HLm!6H{b|65HV(%~%GbD8VU8~e7x+_a)(>9hWsCDBh5WeSYRXLC?5Wtf&H`LmXu%uT`84F=wDHZSK zlrv$v&zOFzx`jW)uDjb^N|@0U$*m?{Ht~RA!9i8H(0fZ2&Z?*3A>W}h_t_)5O7|0G zb!v%tE&N4mch0!{*=)%ft1F6ApuSP4V7Z@k-gWPedTNM*&3PS8iqGwsK;Bh2BCVBE zbMMgVGz-y?Ra2ow!3awH_@P#aMna^z%!3`Vf1(&fKY!^64w!8q3fJ!>Vl?FLw2T4i z7LzN9b)@`gxUWCHmk88Wf8=Mx*M0*&n8HeDK zDw^7-I6^nzuXa+(LcC>7y{vsmgnZCy)io8Mj2RRU(f7*ltifz}A*xT_RCv1(nAqeo9b)DNF>LH*O{Q>!7% zWVn~)ftcyA%%^bOhh53-HIVORgDd?ojYkRFI0GBb@af6-WL40U0EhnIJ> z2qKlrV)jRr{;_hMIQZSMK&3He=^_4A%M-6Prf7EhTmSlrsA}(+AvCcf-LuSQI-2gn&w|FkV--%8A^)-p|-o z>3cf1=@Tz-(uwMM$BDER$7LEynVoX3nS(E#{68tg`JD3SEM`~e@zPo!%w4!_aA^+~Ov zI`|N;aCrKA7GN>TA2S;XSuT|!{usXMeXE^&vE3u65bpEt8JyU%@?G((vJoF+7w)gq zZ@U~NK%bcOvUa%DEk)a;8nn%RDiJWDmzPYT0|Zg0z~!yENiJ3qDpX%leNP?rly|ix z-W&o=Vl%p^SF8HZp<6vK-GQXQzTwpvX5wo%T!uc(=8TUa-x&<_QXeYbx5DKQAUV}1 zGj1etZF|!l_~MjJnth80hfX&4L(S_Efajz~-^^-n5Hh4eO#Cb>Wj^5mFmYps^u6-D zgHP{U5;STvQj`AJo)+@E+;ARSCLA5BlI;mklRTSz;@9&xy}x-BK75kmv*RINXOgA&N*{U(aQPuGf-AYpZ_wBgl!82l0Q4paR?H$OoV?*bkwO& zJL{ZheJ$3Uv-s8`68uUJal>&Cn*AP6y^8T^vEuhragXh)(grFB+u zfdu!YNQiwic_q<1ry?FYAZgpySYf6kzqY2Xe7M#nQ!n3?=<6p-3R90TOMZ$VIrR8h zPMcsSwcu1JfpAsmsrg!PlH}{pfv+A?ekTblg>%#Bp8e@(LLP?I{-cgO95oNHQ-w8 ziXnx4!M@#jYyacMaVLh;N#j2H7F$FwP1*lXv=l|EaJL<^oaR!5+(WBg7MYqJph18} zP^KfF&>;(f&S`-4zpE>P6r9N88bnLQTON1|Ei8%$k+{$*B1mYNg6>A^1Z5Emb=_=_ zlaF3`nGNTQ2m1@$GSrFo7)%L!gB^--i@%FZ=l9;~QAY6!>%K5hnCLJQE&XMk6Nwfm zDRSk5<=QA znw|9(u_0puV74IEc5AOdl;d(;*Pe**Josc%z7p1!;KK3hc)-^v3o_yUPcg=Pfuf=G zkZ2#w$-v0*OZ9M?%uQf_q$U{XQ7*)2b zzA!!*Onp2Q7Y$$g;m=S%tIJpfaQFESF_T0R#ZpYx1WXyKwMN$`01`vr0}Bg87ajTG zFg!!7w>`B5n{tb;s0WB(s8tAmhJHPB4Lj99MLcEzV7CZC0rp`JB=1uU!vkJM8APsi<}moj-}! zOZ$=~+TA|Kk0D$3_?o=XB4J?{8arW(Y)}Z#-z$nCPsJ4Un2*S{A;V(rD~|bpUHaY( zlACB1(x;a`zkeA4S9=5F!M7b`B}uV)&-T5?Q&Da#%hj(WuzF-0UZ~CBtb3GLHz8sT zNF#JnZHBrLH5Ji)7C==FnVMkH9>{N2*OJh2X4Atd+STOdY1=AKR?muKRNi1nAN4+v z-5;ur7nUlKoj3}m9VJ!U*4MjeR7nfIzeuDc{;}!GP1LrLp|Xxt^D28M0Ik!lncEzQ z&Bxum?cPW|b8#m$_5p`2+umy$(&QShRFwU-Ocs;}tbS)=jr_OH)W4Fv<2Ie}2-vxC zJincV4_bg#r?-&C!uFn~H7S$N?%0zi8Q z4?=>Ogzf~S12adZP?jRveJ9P6kGREg1%ZN{281mwTeC#9S?G&w;zG&SvoH_8hbJ!o zF|hoWqeG?E;#w(C-dQ9HsPbx*|6^ZlUsWyYjXq;Wubj`PR)Bl|E%nqjN0HL{c!KO- zX83BhDV>u3yn_->EI*B+(CP>elPvj%2eG_VGTsyAOXPpm+D58i68V)FRu_4AbY0@SKeRX6;z|edhPr`*awngp z11Oqr<41$*0Gm#LZ*0JXi2lI-E`ewf_y3J z6Zy+w6?OMGV_vkCIBuxiQu6vL<>6+<2;Pptk^LfFA+2c5OO#__dg9?@`L53o`Lfe? zXG!{B4=R^yIlE?^!$3RScEnl-jd13gn(W#im`+%?+~6JOH<&$#5bT37q*(@M3HG=w zrK3?KO+VsyrQP2TV3xMB`#8Z!?s3)z2fHn_EC-@4HkzWYeJ>?%`GL>=(hV5E$1xb_ zXTy5uX;H%Wj{?T+4G??#xN8(`o{VO3Rb_a zWuy9N6gjZvn5u`=jq7KyMg2!&Z z2KEk7`FlqCh{|Kqc>vosFdVT7efyn=H@XnPg&{T=riRw~fZ@&AD)QpvG<3fQnJRg3 z{TsZ&DtW0Sj=Xj5pe-gZ!NZnfc+@nty6iZ*3^j6)ZKS%mO@htnZ*zRp8{YW*8D$K* z7A!=b4a}NrIA14=LD6>+z7Ji?R$rgf_N~aBQ;)i$1r4HNQ}av7#R*wmNN1>gO_5^L zO##xNfE=rR)T4kzxt5w>g0+Fs1S<;d=Rt~XH2*XM8iftM*l%X$Bbm&dP75FAi|Fp? zLOfBeMR;pNc}Mv(+Fs&b?j`g*Wk8cM82u66*32D$CYZK=ZiPl{@gAL(;clxEOHfl- zYu#+U{$|u+gum|p&L=`XK|3;2-EzgTPbe2TDO&;Z`1f|gezjJl$BdbLAL}VW?&YK^ zg{Pc#1usjwcQt5v==4J_9RnjSZXNwI@!WYhE)hBL^*>u9QG3>duOxR0WVd1Q``n(4 zhsgE)Z`4zL>WsTCj&l0~j{%OkuuK3LGB=1w+UGh`)8R6tN;x;$2?+_yQ7$-=?cQpz z`y?kZ64Zi`uc5E=vuom0F|Q$wwpt#oJxp+IdHViE_}7vcLxFak`3KZYyCjQk|)2;5LhAE-Q1(oe`)nxZK&fz+iV(@^OVMC3QrsW8EBbf`TInp^#Y2y4g3qLZHnl*+aJmJX9Q za!>(Vwf-a1Oq2Iz&M>yE^B**7{vnqJw)Twhjk1v1TJ{>JDf`Bw%Xyb87w%;*)H_^t zGC<^vJ#J$}e49DlvBNDuMEP7#hr$Q0Isy~;kv|#JSPJ6sFc51)D{KeFyd>d8DA)_g zdnC}%)!f>xWwe)Kv$kQWs?5_&m-jLOY&zse7_?+%nuBrDAq+;GgzoG)7c7d-!8AHQ zyk!{i{*hzO3Q1Xq0!0f0i5J*a3m%K1z(nlZ+}5*{Krn{Kx<95Jls;O;>%5<5gxb6k zo&g1J;L2U*#b3erEY=g+x2NHY^f_4N@<9?nkB=fB#sd_tr0?buaK|N0X7^*SVjqo+ zCFj&@5tHtVc8>fo#zxQ*#Ph9rxWtR1tpe2&@MY=O^P6?&SUb#k!5Cfdx^BhPn-q3c zMT6wfa@{S=rKBi3F7)L}%yCtS=h|)#j~BoA7HZF;oLCisk|8p3)_JqG(e#mGZo+SW zQdxDJYol>rKx6!=;wA)kt6)W9zGEbt zIZsBbY;)pX7>83*RFaXM+Zbe6WvAH$5*CgmOpRr6z%&jw{C&hy?*~@MqWVE3{j+pX zSK%ZX<^eZF+jSj*scf{J@S0a|n zTR%YANvQW%$H>F^8apWClmnPcKUUfqI*94J%+Mjz=m~yoz%#9XQ*E)jUq8ZzRtRZX zPqPsn=5MX}`0XW|pthW&_PVF)!~?rRB7Ghywv^$x5Q$rM<57zQ+t~uteLrzs`GKr< zv#0ks4gL{NbmsOvSQjuG@teJs828g(6@#c2B`QX*cW@>AY@p?Cyyn+M06Ma3UdKbN zwB;+rO!-Nm74%!~sG5jGnO>#vzZrh_4)(=BIgj5P9OOrtL#o@6^3lBYv1XZUew$)qQjkXc!j%LM3tN}F4LdHE`g!nzv+I47tJy6&{mX|e zYi6Aq*d$L|OA7uR@8i%W5I4O*Jx-07LI~_l{WkA?YXu0A=eINTtxn0FrxCTMi^)JV zd_9M=?={G)8ZLbEF55xm<%ppQp@%AT*b!eh3_;xQ1;1;Fsj|wJJ3<*_Xrnt>_1v8N zl>X#VrYD#C4$88O>3VSNUrygBm($k$!^nV}J^6&!)IR4)lI@EtKxQOeBbZwU4-WtH!sLL{) zh2of()2P+`*V|QEPNbl2^ZcHrG+L&PSIh54&R|A&L{|-S)mc8DSX7JBJ5^#(J-Gj= zs?>N?FrFPpqT1JM6J}TuJb%Dl1!P-MUgHrQ`{Mu8V9MS$K$ZQ1(tyM=;Qq{N`f!!$ zuVNJ2Uk0$i>LZ|-0rqYw?YEI!6%KXI#F3TKF7qH=tA=DhUSms(-fSaUdZyigkV`l$ zdk@tv(k4NiVN)>ral*@&Usd3T?m*E@Y=-}{6-({K)mt<8238`n0CyKzZ0k1r*gq0Gjxf@RS_+FWo7DUhIrcEN!;1cqf~Ffi^sxgx!opXlkwP7vIL zDUHBnjYOdnc1q1iSAIrI#{@N);+5zY?E9` zyFWC?sTJ~&@r7g+H$v5l?m&SNKHImC58o%63qC3l3}x26-?(SgQuXi|qbs0WP!WD+ zJNo+S`J$5#wXIQpJW-t80nW{?|21`mzqGFA-Z2ksELK(n#dTeORJ!4S4sRX#WTG1}v*K(|0TD`&3WX4r=ma5&w2SvsJrvNF+uNoSuH|`F2Oz;+d(>PVp#2C#f z2`5oY1@mRB6&Zy+`F@_rcu8kfxDpkJ|MP{f^?9M#ic-V%|V>4>|V3JJ&}5fM5T5?&rA0nq%jA7u-mVY7|%#Lg@E8 z!(aACyK>M-NLH0Q`Irlz35&1;eOP54Ily0jbARq0cM=OC1>;Xwd`0gX_Td`gjQLae zo5BesAQxCkI!U9cT%n=yTK%CL!t3fRyrz=olGUd82;dp=0eiUM?OW_+_wXFDodHj2 zY0%6n!RwC@qjp)!mhH!-RH#44uQRw`D7n~*hK=7WYsJIn2P|zS^A}zKj~+!LCz_$= z8y>GUA^c*E^^$ydYPs9!q4K;^Wm*l|aUvtB>pNebs6Er8trm*Vg6u>CS&ZXZb(^$e zbs80*?{DZhZy_Y|>s=njzyvF3{R~N+yKT#gGT)ujV5PR;5M`uQNc|Thj}$HJHlDRg zm^kR&Sy=BJE^VCfVKB9x4S^X^TO9jT9QkgS+&Deh=|(A=@UTullYQp06QBI}(CQy# zC->j);*t9s_Tr^XoG3u0+L@`c`;M@q#G#Vqt_wk1&kU60vSa77?5< zPdpaQS-H|di~k`V2NS1jzu@T2kY;gq2$o>#DL^lg*;ftbaUtxS+j}9Kt7?UDZQk-U zM4rIX1|51OIG3hbxAYrqFuZhl^pEoAROi7H-9S0HGnu0>Tp4MvjI<#*+$Kjkl%!Tu4VrnMqjxcYVi zNe_>#s@}z%F|JoL*PY*!co65}e`#KSgd63bn=~|JMz|<%YO@36U8tQT#QpAH21<{$ zv?wld+HpBg@`gyLkK#xOiekn4aq`5AS`|`dIpg!=S@r32!DhG+Qr4kI`$cP$aQVB< zM>r!Tsl7$AWYD*4TxGW48RDN!G!eC>pUm+>IaOknhnwGwYeG%a!uK40q$LSDr8FA+ zv_}{k@zPFBq6dHRc)Zi+#!Sqi{z~(9eGXygF;1+A7Q<;KR4q4Gwp&Ed? z^F!>$j@B}ThGg1BIF-1^sS*;oGvzRQ=u_gfdCCK)$u0HH_^HzJ!;LhUF);E7pBE7n zUb)SLCub6l`MK@~XX7Ay$+28ZapKiTX;pw9mjI^^m z4&DOGY%*LV!&S532t>Br!fBY? z<>pV&BCwhdiH{Z6;SAxIo!4cieA)zmx<>~VWg#?Np|z{h?umAuX0aI3j}+JX>DPpW zXS#d1Ew$-xbfj@;(N$$!XSL)0gYj)_z!?8G!tZi6?4Wk|h?CkgnS94@o7apLe|n0l zm*n+0=V10KogMH0Fs!zIOx>IMuD`a2@RtE8Wehdo9);Y)0=Uh*D;M4_&ODj<13=LX zO1Div^aW)vWOj`XWPUL@eBdrKNT(LSvfW$h_~ByY$46dAIc8%PjEnOj*oF6t+@TNQ z36+q5k*H<$dCyZ7Zx~jE>hhU9{W=x~FrNp%P^a69ZN!a6%Zp%X7QoKnOp6fmJt}bJ zDs{?$_lKJXn{i}a4l_0VHhwg|ZW>^-jgncu|K3cxJq#Nl&^En7@q%!n6K7+JDM~<` zwc+@S{$pI7gd+}y zufh++Iw&K^E|KA{O&`H=zLdK+?v4Agf;kUI+B7SZ;isksyouopL14R_UP_1at6h%1 z68luk_+OlD&m;g`o*iI!$}Vbxy2MWjj?u~}6jV>#6uI7{!0HVUX=`QLp$^)g(GwQ0 z20QBz=^}pIJKt+FNzeG^-dV9|s%Gq==JYE1XPZry$Jreje7_G?Gs&@{psefgab>4U z7#C39Amf;k>z3fMNh(mAc$VPod$Q%^?R>hGMSPr>4xbjUnA!|FnuV*i&PKewantPQ zRqKqTWM*8RL@u@&l%=_vVb6I84=E;xGGtUY2sEoXTW~#5%r+kbzq^+=IpDnq^*;8UB~vr8{zQGJ<%E_m;smce2; zv|7U(8SX!ZN1%ZFuVXTZnz=}L98OG zpp!rYQEJ}GWvm^cM2I6)B=ak7__)AJMV&QP1@LMlpZugcj`SCC+KHrC3O;0jGrj2g z-N*hK;zH+87|!e&emZe&#ImOp{Cqk;HuV>6pIlpmR?#3v!ubg=4>l9VxinV1Lfoa6 zftKR(J$+%kwJMz1peBf=b36=;t&VP(^Pg4E>g09ScE2RF79-09XA7?vqmXNlI{Y4X zls3C4$)9-dI7M}ZK;n|iQX)yb7o@TxiSlig_d=f-%xgZFJD@qOdH+Hy1=1i>tAQJM zY(4(<1wx5KTIF~J>kvQ=%N%oF;*|&u^4e6aI6hWK63xAIiH1(gr`AR*3Ax18B!;URJ z1JQ2l6R(E0v7oHaHY$HRm2hsm+B_0(cqgku`_q2Z2xhlkvozr@T>sYaAecr2FA}za zkLTZI)Ao0jiN{rfe1m8zxD~&1>flq&KKcuBIJn?na27K}29g=acQQ`-F05&x#0UNn zhM5^DAc{<;Sl;!eG@#;*Cg;$vkc3|Sr^H2=PXn_6jmXCkWysjf40XQFV8+TTQ%$}~ zdDE_adA`K0@WR-@3>{jLSO(`}-J|iV2ZXg4o@b;o%xN)}D*!W^arC>?1Bx$r!${$S zkh0i6;D$H;i$t@bT3gMoXTGC3m}XWp*!ZEKWT*ulFKt8EO~{VUpx`jd@FJ_*Mfqw1 zs*x1uUWMc`YnQ)Bxl!s^jm2!h!yE$F{-p}PTwpIpg8qb;$NMA6lsDeAcT&HQUnEr1Qh8_Is{1QC@m;b zq=hP=ROuxkAVrWKL3;0<1OlY+1z+Et-I<-8o$ue~@5z~*bDo_0d9M2^&wV+P9pbiv z80RAt_XB8MnLkC@xBB1;6&s>a?R@W`vZ3!f5{NuVw>TrdC^AEnIk0i}g(Xp-ad5xT zVo}q^FK&zfRrY;AsX8|FI%S}W^%l75593SR-F^ zVbi!PE8iR|I8xcX+8J{Pw<#I3A1`$-M>M&z$ZvHHQKdTng4ZAagFouJoAR3Kwut}t z-H&8mcgBwJy;qD4h#ETwSkK#w4P5ruq|b)H#og84zD7WFV@b_lXG7%hA!?>`1^uNB z9^S~|vUQ?Bg)^RL;%p#F5ADlZyH|-SjjJizfZSIf--aBVe&fAQ9N9n?*1B`aVqK5; zNKF|q1FYYY1;Sz64|42n?;cp#tC{(II`wb}o&F^Pb@fneSlv}Oo0NamDVhS>{!Ao~ zs~5hETP~>Sc;yy4&p+om2mOir@%z^qgSFhQHt?8-j!md%4c5@7@k+OXK-7A>vyQgL zL}`PM`3^v4Qn)RdGUXd|vNpM2+_J51GtaWWY7VJR(4g-$Ibgqhk^k_DiGFzrsWn;+ zb$a&zC5}vXfNCodDb@`D+8+MpobrtrKhRq01-ZS|%R#}vS|!M*TetpPCJGC$3O+%} zFS(nh04IV3E3VMYL(3>jr|t-q=Us+;>c1z>0vIyFF{sD^q`uklT4CmeF}_Ds(J1OqNJq<*=a3C zTq^N-_F?;zO`vH7ej|SJ;zHH+ha+yE868*c01GW>zsOMq<|uH~Ez&~pmq(A)re&wy z^`|e{-Z`Y(TxM3I*XF*Z_Ai_2EuoU5Imd{=9!?ktL!NE8A{og->D+?q83vF7ab1X>O?#26%-ru>Ajt{ zIP1>e)mkk^Uw!vqhJ(h2-)(n195I#4{#P40J+bu1^`n~KpZl0-of63}gBv01;ohP< zV>*d#8AKI)${(y}_F+`l`Rl6Fm#@~^;lrnlkBNfCXR^W*x19YS7x*KEz3N@VS8g`w zVl}OI?d2FlQ^)SgGv8Uxq|3MWa+KbcrS4g=0%u!-m@LHVixn>Piz<3boy!HKBfcb)aZyKzzL!>(_R z#0Qvq-U53L`br)aq#I$LS`fA%`R2%4Gl|GQzOYl5MFRt+=IcEgJhntBsZw)(Z6c(m z-Qt7jG8hqkT{p__>Q|bVHTfbqk>!peC0~u- zmm;64*CQH-xFR_I0=gmKi@!AP)s3nDK%ASI6@Ne-f=1TQB4J*ZljWStG`52Wz3z-X zy7jKpqy!F2N<*16oGLXNut}*l$39#+9g=W`=dDBQHohKq_tU8(pJX`(DGq$u8w*J8L+_6Rz zfpfapsl!p{A7yKY)Z*bu#}Sc-?)mhI7sa-Ps*9eDW3w`zoui?UxO9&33Sj3LPjMmE zH&$9}3hAo3aSzZ(>GPLCeKB=8MJbLgOHM-9MsQKnTgo%{X1M{!rOHAzAbL1+<*M{7 zGJ^eZK&;-;4LK%SKv^$^Ub1IyO$msORx%Q!7>`)~2Qd0B>a$L~7>kUt&mev3uF32+ ztViT4PCn_%JWaNoQIj^vp;yGT-tP-JvKa1Yr7u`ynu5Z-=l3iIBN#}Cn=_vaSq(;H zbVXYqM)Q^$iz4TX3)>NHlULYD5}NERINpJ#yxEs~+i4GGF92`?E%R$L8iwVi?dF|F z)@;EBs~Y+eGe-qKz70zs4%}mF!Q03uts~})NF_FYv;W>HD z>}0s~N_A0FJWh|<%9k67OK=#x0V=3=omT-> z!DZ3S$~3z{nBoA56Fs>4>SM;h3%Q1tT;JaIq`VBueN3-%TR%(5WH@5w@Z*8>yIf+_ z!2?KXOU(*s#cVVZ*0rHHKWXPE8gl35-B5Lgnq0E!@y+r|Q}Gt7kLj!$Q^eP1HZ3&g zj#)*c-%a;^*{nP7=spiUdxaaK90(VD-4>-_4WhL&wzBx`{oT=T3zYU>0~fGTa%!x# zVnAKIT{8Yii!E9{ja4UApx^L~6us3hJcn*WL-NaJH3n^mGV4#&CMqrBR< zpcCCoX|hAJGjj5b3n5l%K`(GJ`PpDGE!fMe&t0LKf=k2Th?KR8Z{vNB@kTx0@Y{*< zg<7@Hv1q#O_6sfGSu0-WA9fMWHjK|)Cu3KP-RA$}T30KG)0e+~OA}T9w^e~@Vx9tjnUfnv@A7*?XUNvsK2g+y zl!*8vCK8Y;u5Obr%VeF2{Ca=fxAI#=j=dsISW=<*z3 zB&3IRZP%EtIAFSLhbpO;y|O3kSc0M8A~puQ=kHv3%R^QkzUPY1~b*gSXu>ORj(XP}`K%{}`g`0Jof7 zt{((=31!{n4aMIVvah;whsOB7&mhX!K_g*1yV} z#fGSIro_OKQGDXBD-C$ZQF*ro{hv|}LydEnHJq$gYDF!1E0`;n2bpM#bQLQYQA>t_ zSIISmuGR834=@s2a_7b~)l78k+ar&Es7%{`0OSJtUf#J9xb(>h($Y^5SH!|5(W8{y z?cDVUK6sZc6*{pU|@s2mDpZ_eF@Dgd@-Arg4+heA?|!C!IgMar{Ys;JW&ZKtrVD zjMQ)VqIeTA|7NUH68!bm@A4NGxo?v}U~PQ7l=c|dTCXGTTE%o!o?}h&RmHHFiVJ7{w08GI zB4==*-<#Z$1@5n3k3Z^ZS!w0_VZsJE%RE}h%8>H*dad03o6ok+U!;>O9xCapOFBhg z+AN?pLj;e+Z5sYkol-o1^~LvJoePeolD7UA>f-+abN*k>NL~c-F;zHP?k{w>((eTAMGi!bwe(PSmKx4pb;dOgbW;-X`PN2GQ2bw3 z`Tf-3IzNv5od&pw`NUNq;s+F4*{^>)9v@xPe+beI{fXUulib7|;(wle0#s9T>`swL z-2%5crx7hOe64?Re$3ex&7Tgy*Z+t=0kUEk2&cJc9Sv5{Rtdtfuu`)S;MEpKo&CMN z!xqFv>jXfk#PpR_?C8_F$UO<@2{i$?3dk)eAdhOJr>^K#GnbHj`JG}a0M&Q=(^g3k zfRL?=t!%ZF2 z$Bi3YG+$eTCito~a%%e=gChpn8iz}*6&7Xt$#S!KWg3+b6WB)iF|5n`ZTGd5p} zSRn1#*9mt@?Be_AO2zxgq3dHZZ2g0?ACB4y+b0O?S0xTChV>foye0+Qsy8r|!9Hb?kv$VSUYNO9?lHnHk+r{jS zN+ECuUDpcq!wCP584TYt6_Hc1fUi8<3-~>OJ?!lh(q|=5J#(3*T_rz2(W|E_WVQ)b ziE}d0v6OM20{fjOYl>JS_0f^WX=!Qut>nCgBc4Ou{rx+K4)*p2tJnagCfq@93IM9) z?%`no0@2k@26mhUZCX+aNnGc}1CvA72)PFnRrZEu-ns(42@nz2-5G>Q-Rta$q_Hv! z*erg>$KA!r^A>L9Zp%AE6?8o$orVmK-?H5&4*tSQnrjmLvc^t3vdB=%|Hw%$vF`ks zovUlIVXDR1>DciPJ;)Vr-DCLK3Gd<5!G0HSuio#(PU4Nz|28_6PR9;gnLuH<2ZyG` z=111(DyObb(i1nQ`&r)|brj8)m6a-&q!^cPv;V|SE*1uUR>tFk+gOM02gr{?&lHZ+ zYvireG#@^s+G#@RR#jL3(34s1&uHg8JJx#i@wP_Br`et2($a`&7kYnd8=KDvYR?=! z9UV1i=Mr!*{;0O41Y{1uPR@Sbl=d#cuj@hECS6;&F*;P$%g{3nwCnNR>9EhAMXJT1 zn`X05{C~vp0>qPF2A(vwU>nPW;(Ks_GZf$#k2cjMFVD;SkQsiAX*1lx z>%A zBGwO{hzi1HPQ~C`KA@EBS_yry@XOV3jnU)$REIKFd6QwEYo(Yo&zAT4TfkwEesCKy>3PN6DCM zN^-A(_m9v^Z3#E^J|n`H&L5H6+93aJ|6-Hd6B@U&f-MqK)|Kovrao zHUOmQG&7EJBQ`&BIf^)c2KEyi@ms^j9P)(R&YC-RLJ>2aMPkW(sbgpRNn+H@D?6d zGzS)gOkoB#E8ZVK2Bb|;CRB8;=E3C1%zpSB#s@+@Yf)UHGieUCWd(^AWDZ_3sw2?( zgm0k&XU$+CW&q`rMKGkxs%Y!_w&Qdbt?vUTZ1oXfae6n})BSGufJTrnj1h2VOia!D z2*lAueTu?M41ujZdZFQrFs1O_-pm=(w6n4aM>EFmXraae_{BaRyYcG@?*z5l9IDB5 zzo8#z4%Xt-2k!=)R$vSZbV+sR!gL8RrPIRnsJKxN`HbU!pXSg{pKjcF6L9#;7)xWP z8OxCmx__=uIWq6hP%!SN)Uu!To^-aev-`%{yhqBV6hKUq&e$M@)p&Gq`^=Tp&OAXq zR(sBT8Hq#|q6)hb#ARh*N3kId=L7mwqS(~b)ORuVE;2K=`9MX{@B{4E7vnG3TUZ17EP(c5vZ)xT#g80y3CqG*#eG zb|7&=YAGYjEzpBMOO7>m3F{%o*zFjiG^|wxSw3Wfl;{CO^51VYLpdn*=RH0el*mKQ zrn*hDFRy1uqF=*yZG}pbEAtFSckK|`!Nz+$wG$cx3)Pv}azZuBz1gV<-jVowTso zyNb~Vnj4h)UK)e9B9X$|A&mjY>m!_d6H-z|E5?^6Y)X=42n}!Lj-qW1hOV_}6ABqd z%ib#M9rf@y2CzgTG^fDikKf{(D#-yB7Eu}eettg3wo-xLtdj$7=w%jXHm$$H6R^9B zGyc1@jY7lW5xS#jXEW$Jg2FQ=^NwQ@hyIg6HAQ#k%h#{O>%$fR#L~9lN?374&aiEC zWK&-*fy(JK_>KX6itG(RMXU?KEf=V-p{89K*crbV7Sq}+fcx1$i zOfqnriQ0#_LMpuYxH#LYAz5q!;U%aUqra)Yo5)ev`>V4Q?Zy<{XJa@i=$z0jnc8O) zfF9CvcVS3m4GbPasjVgwx4!+X!hZAlv5)(&>2oM?w8`k#bV$eJe{N;_~vS_FGpgBJ2Wc?K+WVGu~QMuozJa{Z> zbSpJB#v{==6H3mClA#|!bfLO@efjqbFsK7xi~ zGihn{eH;}(v{!rOa>C4p;@TO3a5Ja})!jHk(<$U&;m{Ygouu3#TGGliFBHlNxgk?(EHx@?F_`=y~V;)IxIPxl%sjHL*#A`Nw%janw_WzJXzb7@ajGzE;C<9@Ma-@y=BZe3mz_4 z&f)D32pWSN-)fd~yul-ay-=&vKq0zZe9>=JF9wyqf zo8>N0F$kHX%~Xs9-e1anxducH4HAp%HSr#klYcQ%yG6-#qg(G3raR2e&c2~fdw6?T zf7Mb|F#>zEO-=1&&z1z17V33$vhFxd5G{|>!YFk3Hmn$D8N3j)d^$@p0nw%N zcgJm-v{)%7(ba94K=oaj4_}4lxUM5ble!rmuMKh|3T8;CYzi^K%|j!7YYpA&gshoI zJ$h9joeA;7pF=IkXcd+1_t$lhoFl>ya`{xDYoA(#_HudnICgS4*X$pph!alLE@TU_ zWp5j}mhfDTNYcym#eRR8o4p76W$6NtohV06gd}50=#l25-apI8l5n^ zfDyLn9PX&fq`1&h&$C%TN|LxvJ!XZi(Kvv7*G|DK%`T3j&aPj|qAy?Pmbd{$k>d8! zES~XBq@<&mK@e+2qV7ft5e4UA55^Lf{e`{`d*665+T$#g5&XjEEUbBPb zw7>emGp$qQvD*LbXiZfe8jw6QLVJMRR6fbGpAZL{3$NvXD8UWy!FSM2gO4{(Nrgd5 zYd?N9G&Cp=FyZBRCrWwD!%B7h@@{8)p|@pZWg}?WwZwT{;U-Qpo{y}xoprSom(W4| z#Pu|FwM!9AQGd}+%RypUp&G1W;`T2R#kH(6{Nv{<2+WM(?mp80L_9w>yaBm*gN3RS zEhBMtn~9cwBw>X4^2VC(>ml^?-lbxljxVRzqt7Is^$!hGVgzIn;voswt7cr9 znqFUltPbxcU`cx5f$S~p7(HK8ghUyO=m_HX3EoykaH8c5BK(juu+DbeW{Y*w=xnKH z9{!@B#{E%Qi7$s4RDlOov;Azu9I$B9gbICPqz|*ilR4q58BwG5sL`ph(7@gFwh|k; zNBCUZio*vFf8*M3h9DR|9DO=_&?@M-M7VW)AA*U77pk~fY-;8J<~=q6Z#`#bxbEm< z@zQ2{6DJZ@eK3u2oTqeB9G1Rmg*o^^1B+8r&4Sz(Zt+@i@yH4*FygD#{Msev6Mag= z+NB3KJyNx9LQXvxPw@cH;5-nUs@}~fSy$eQgV3??2(9%@;RcGqauML8wO4v~qenVo zR@Uu@<>kT3L4*EBH3PsW{Qo4M%~(edDR1xjGD{;ABhPp*+* z>h()ym>NwN(NU#!#oM>!sUUW1CS~NRhuWBZ0~oX7lJc@Pv_EWTC{I@TWwkX`ClwVH z@&pMfp>$KHUwv?R_2C>)q*`POGU_52QPpy-hgOI*Z0WSL*Cy zT_%sAy@XUa)?ImgJS-;F>;ar79`Cgvgp`(w3q=V4UThuYWHOxg%?t#rNFOBpY1lcH z9{y?A&EEb*Ee`axjjNwer5lpcETny67Q;ycBK1_wVH%0OJJYS^xb-#XNPT`%icJe~5aX8uaTq=2d4on^)QnOnI?!)?Nrr z5}BF(L}gkqVC{tm-t?T0_tMVU#5iWWS=OHNI&Hk)SUdyOYML3HK^dNVflB#_`EhHP z!(xDCf4bn_wEOUk7k@4xG@$z!QCrPwHCEZ!F|Q?YepMW7xWC=mt*C{6by-mFh(BXC z{(r6$Zf>HyhB{eNAl@2LfrS{cYAFxeI~=oE&H5*An^OO$Rs}D6&3R^7dju^uRlei= zo1O2Ya)ew_e~0S7$?N8 dict: - data = {} - input_dim = self._event.args.get("Input Dims") - if input_dim: - data["Input Dims"] = input_dim - input_type = self._event.args.get("Input type") - if input_type: - data["Input type"] = input_type - return data - - @property - def data(self): - return {"Input Data": self.input_data, - "Host Self Duration(us)": round(self.host_self_dur, 2), - "Host Total Duration(us)": round(self.host_total_dur, 2), - "Device Self Duration(us)": round(self.device_self_dur, 2), - "Device Total Duration(us)": round(self.device_total_dur, 2)} - - @property - def info(self): - return {"id": self.node_id, - "node_type": self.MODULE_TYPE, - "data": self.data, - "upnode": self.parent_node.node_id if self.parent_node else "None", - "subnodes": [node.node_id for node in iter(self.child_nodes)]} - - @property - def is_root_node(self): - return self.node_id == Constant.NPU_ROOT_ID - - def update_child_nodes(self, node): - self._child_nodes.append(node) - - def update_kernel_total_list(self, kernel_list: list): - self._kernel_total_list.extend(kernel_list) diff --git a/profiler/module_visualization/graph_build/__init__.py b/profiler/module_visualization/graph_build/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/profiler/module_visualization/graph_build/fwd_module_node.py b/profiler/module_visualization/graph_build/fwd_module_node.py deleted file mode 100644 index 34d7ab82964..00000000000 --- a/profiler/module_visualization/graph_build/fwd_module_node.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2024 Huawei Technologies Co., Ltd -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from profiler.prof_common.base_node import BaseNode -from profiler.prof_common.trace_event_bean import TraceEventBean - - -class FwdModuleNode(BaseNode): - def __init__(self, event: TraceEventBean, parent_node=None): - super().__init__(event, parent_node) - self._bwd_op_list = [] - - @property - def bwd_op_list(self): - return self._bwd_op_list - - def update_bwd_op(self, bwd_op_list: list): - self._bwd_op_list.extend(bwd_op_list) diff --git a/profiler/module_visualization/graph_build/prof_graph_builder.py b/profiler/module_visualization/graph_build/prof_graph_builder.py deleted file mode 100644 index 83331b62502..00000000000 --- a/profiler/module_visualization/graph_build/prof_graph_builder.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2024 Huawei Technologies Co., Ltd -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from profiler.module_visualization.graph.prof_node import ProfNode -from profiler.module_visualization.graph_build.fwd_module_node import FwdModuleNode -from profiler.prof_common.tree_builder import TreeBuilder -from profiler.prof_common.trace_event_bean import TraceEventBean -from profiler.prof_common.constant import Constant -from profiler.module_visualization.prof_parse.prof_data_pre_process import ProfDataPreProcess - - -class ProfGraphBuilder: - def __init__(self, prof_data_path: str): - self._prof_data_path = prof_data_path - self._prof_data = {} - - @classmethod - def _create_event_bean_from_ops(cls, op_list: list, name: str) -> TraceEventBean: - min_start = min((op.start_time for op in iter(op_list))) - max_end = max((op.end_time for op in iter(op_list))) - # 以反向算子的区间作为反向module的区间范围,为了module包含算子,做了+1 +2处理 - return TraceEventBean({"ts": min_start - 1, "dur": float(max_end - min_start) + 2, "name": name}) - - @classmethod - def _trans_flow_to_dict(cls, flow_events: dict, end_events: list) -> dict: - end_event_dict = {} - for event in end_events: - end_event_dict[event.start_time] = event - result_data = {} - for flow in flow_events.values(): - start_point = flow.get("start") - end_point = flow.get("end") - if not start_point or not end_point: - continue - end_event = end_event_dict.get(end_point.start_time) - if end_event: - result_data.setdefault(start_point.start_time, []).append(end_event) - return result_data - - def build_graph(self): - self._prof_data = ProfDataPreProcess(self._prof_data_path).run() - all_data = [*self._prof_data.get(Constant.MODULE_EVENT, []), - *self.find_bwd_module(), - *self._prof_data.get(Constant.CPU_OP_EVENT, [])] - all_data.sort(key=lambda x: x.start_time) - name_dict = {} - for event in all_data: - order_id = name_dict.get(event.name, 0) - event.set_id(f"{event.name}_{order_id}") - name_dict[event.name] = order_id + 1 - root_node = TreeBuilder.build_tree(all_data, ProfNode, TraceEventBean({}, Constant.NPU_ROOT_ID)) - kernel_flow_dict = self._trans_flow_to_dict(self._prof_data.get(Constant.TORCH_TO_NPU_FLOW, {}), - self._prof_data.get(Constant.KERNEL_EVENT, [])) - for start_time, kernels in kernel_flow_dict.items(): - matched_node = root_node.binary_search(start_time) - while matched_node != Constant.INVALID_RETURN: - matched_node.update_kernel_total_list(kernels) - matched_node = matched_node.binary_search(start_time) - all_data = root_node.find_all_child_nodes() - all_data.append(root_node) - return all_data - - def find_bwd_module(self) -> list: - bwd_module_list = [] - fwdbwd_flow = self._prof_data.get(Constant.FWD_BWD_FLOW, {}) - module_list = self._prof_data.get(Constant.MODULE_EVENT, []) - cpu_op_list = self._prof_data.get(Constant.CPU_OP_EVENT, []) - if not fwdbwd_flow or not module_list or not cpu_op_list: - return bwd_module_list - fwd_tid = module_list[0].tid - bwd_tid = fwd_tid - for end_point in (flow.get("end") for flow in fwdbwd_flow.values()): - if end_point: - bwd_tid = end_point.tid - break - if fwd_tid == bwd_tid: - return bwd_module_list - # 将每一个反向包成一个module,名字叫“nn.Module: BACKWARD_0” - cpu_op_list.sort(key=lambda x: x.start_time) - pre_status = Constant.FWD_OR_OPT - bwd_op_list = [] - for op in cpu_op_list: - if op.tid == bwd_tid: - bwd_op_list.append(op) - pre_status = Constant.BACKWARD - elif pre_status == Constant.BACKWARD: - bwd_module_list.append(self._create_event_bean_from_ops(bwd_op_list, "nn.Module: BACKWARD")) - bwd_op_list.clear() - pre_status = Constant.FWD_OR_OPT - - # 通过连线匹配正向module,构建出反向的整体module关系 - root_node = TreeBuilder.build_tree(module_list, FwdModuleNode, TraceEventBean({})) - fwdbwd_flow_dict = self._trans_flow_to_dict(fwdbwd_flow, cpu_op_list) - for start_time, end_events in fwdbwd_flow_dict.items(): - matched_node = root_node.binary_search(start_time) - while matched_node != Constant.INVALID_RETURN: - matched_node.update_bwd_op(end_events) - matched_node = matched_node.binary_search(start_time) - all_nodes = root_node.find_all_child_nodes() - for module_node in all_nodes: - if module_node.bwd_op_list: - bwd_module_list.append( - self._create_event_bean_from_ops(module_node.bwd_op_list, f"{module_node.name} [BACKWARD]")) - return bwd_module_list diff --git a/profiler/module_visualization/prof_graph_export.py b/profiler/module_visualization/prof_graph_export.py deleted file mode 100644 index d336e97f741..00000000000 --- a/profiler/module_visualization/prof_graph_export.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2024 Huawei Technologies Co., Ltd -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -from datetime import datetime - -from profiler.prof_common.constant import Constant -from profiler.prof_common.file_reader import FileReader -from profiler.prof_common.path_manager import PathManager -from profiler.module_visualization.graph_build.prof_graph_builder import ProfGraphBuilder - - -class ProfGraphExport: - @staticmethod - def export_to_json(prof_data_path: str, output_path: str): - logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") - try: - PathManager.input_path_common_check(prof_data_path) - PathManager.check_input_directory_path(output_path) - PathManager.make_dir_safety(output_path) - all_nodes = ProfGraphBuilder(prof_data_path).build_graph() - result_data = {"root": Constant.NPU_ROOT_ID, "node": {}} - for node in all_nodes: - result_data["node"][node.node_id] = node.info - file_name = "prof_graph_json_{}.vis".format(datetime.utcnow().strftime("%Y%m%d%H%M%S%f")[:-3]) - FileReader.write_json_file(output_path, result_data, file_name) - except RuntimeError as err: - logging.error(err) diff --git a/profiler/module_visualization/prof_parse/__init__.py b/profiler/module_visualization/prof_parse/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/profiler/module_visualization/prof_parse/prof_data_pre_process.py b/profiler/module_visualization/prof_parse/prof_data_pre_process.py deleted file mode 100644 index 9dc820e4ca5..00000000000 --- a/profiler/module_visualization/prof_parse/prof_data_pre_process.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2024 Huawei Technologies Co., Ltd -# All rights reserved. -# -# Licensed under the BSD 3-Clause License (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://opensource.org/licenses/BSD-3-Clause -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from profiler.prof_common.file_reader import FileReader -from profiler.prof_common.constant import Constant -from profiler.prof_common.trace_event_bean import TraceEventBean - - -class ProfDataPreProcess: - def __init__(self, prof_data_path: str): - self._prof_data_path = prof_data_path - self._trace_path = "" - self._kernel_pid = None - self._result_data = {Constant.CPU_OP_EVENT: [], Constant.MODULE_EVENT: [], Constant.KERNEL_EVENT: [], - Constant.TORCH_TO_NPU_FLOW: {}, Constant.FWD_BWD_FLOW: {}} - - def run(self) -> dict: - self._check_trace_path() - self._parse_trace_events() - self._check_result_data() - return self._result_data - - def _check_trace_path(self): - if os.path.isfile(self._prof_data_path): - (split_file_path, split_file_name) = os.path.split(self._prof_data_path) - (shot_name, extension) = os.path.splitext(split_file_name) - if extension != ".json": - msg = f"Invalid profiling path suffix: {self._prof_data_path}. " \ - f"You should input in a json file path, such as trace_view.json." - raise RuntimeError(msg) - self._trace_path = self._prof_data_path - return - ascend_output = os.path.join(self._prof_data_path, "ASCEND_PROFILER_OUTPUT") - profiler_output = ascend_output if os.path.isdir(ascend_output) else self._prof_data_path - json_path = os.path.join(profiler_output, "trace_view.json") - if not os.path.isfile(json_path): - msg = f"Invalid profiling path: {self._prof_data_path}. The data path should be the " \ - f"folder that ends with the ascend_pt collected by the Ascend PyTorch Profiler." - raise RuntimeError(msg) - self._trace_path = json_path - - def _parse_trace_events(self): - trace_data = FileReader.read_json_file(self._trace_path) - self._check_trace_data(trace_data) - iter_trace_data = iter(trace_data) - for event in iter_trace_data: - bean = TraceEventBean(event) - if bean.is_optimizer(): - self._result_data[Constant.MODULE_EVENT].append(bean) - elif bean.is_cpu_op(): - if not bean.is_step(): - self._result_data[Constant.CPU_OP_EVENT].append(bean) - elif bean.is_nn_module(): - self._result_data[Constant.MODULE_EVENT].append(bean) - elif bean.is_torch_to_npu(): - if bean.is_flow_start(): - self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(bean.id, {})["start"] = bean - else: - self._result_data[Constant.TORCH_TO_NPU_FLOW].setdefault(bean.id, {})["end"] = bean - elif bean.is_fwd_bwd_flow(): - if bean.is_flow_start(): - self._result_data[Constant.FWD_BWD_FLOW].setdefault(bean.id, {})["start"] = bean - else: - self._result_data[Constant.FWD_BWD_FLOW].setdefault(bean.id, {})["end"] = bean - elif bean.is_kernel_event(self._kernel_pid): - self._result_data[Constant.KERNEL_EVENT].append(bean) - - def _check_trace_data(self, trace_data): - if not isinstance(trace_data, list): - msg = f"Invalid profiling data path, this feature only supports performance data " \ - f"collected by Ascend PyTorch Profiler." - raise RuntimeError(msg) - iter_trace_data = iter(trace_data) - for event in iter_trace_data: - bean = TraceEventBean(event) - if bean.is_npu_process(): - self._kernel_pid = bean.pid - break - if self._kernel_pid is None: - msg = f"There is no operator on the NPU side for this data, please check whether the NPU switch is enabled." - raise RuntimeError(msg) - - def _check_result_data(self): - if not self._result_data.get(Constant.CPU_OP_EVENT): - msg = f"This data does not have any aten operator, please make sure to enable the CPU switch." - raise RuntimeError(msg) - if not self._result_data.get(Constant.MODULE_EVENT): - msg = f"This data does not collect any modules, please make sure to turn on the with_stack switch." - raise RuntimeError(msg) diff --git a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py index 44d97b248e6..80734635929 100644 --- a/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py +++ b/profiler/test/ut/compare_tools/profiling_parser/test_base_profiling_parser.py @@ -24,6 +24,11 @@ class ProfilingParser(BaseProfilingParser): self._enable_operator_compare = True self._enable_memory_compare = True self._enable_communication_compare = True + self._enable_kernel_compare = True + self._enable_api_compare = True + + def _update_kernel_details(self): + pass def _update_memory_list(self): pass -- Gitee From f557dfbbf0c96b57069f816ac2b70463145cc79f Mon Sep 17 00:00:00 2001 From: yangxinxian <947098055@qq.com> Date: Tue, 6 Aug 2024 15:06:44 +0800 Subject: [PATCH 67/67] Delete api_registry copy.py --- .../dump/hook_cell/api_registry copy.py | 198 ------------------ 1 file changed, 198 deletions(-) delete mode 100644 debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py b/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py deleted file mode 100644 index ad73bcd9119..00000000000 --- a/debug/accuracy_tools/msprobe/mindspore/dump/hook_cell/api_registry copy.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2024 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import os -import functools -import mindspore as ms -from mindspore import ops -from mindspore.common.tensor import Tensor -from msprobe.core.common.utils import Const -from msprobe.core.data_dump.data_processor.base import ModuleBackwardInputsOutputs, ModuleForwardInputsOutputs -from msprobe.mindspore.dump.hook_cell.wrap_functional import get_functional_ops, setup_hooks, \ - HOOKFunctionalOP, HOOKMintOP, HOOKMintNNFunctionalOP -from msprobe.mindspore.dump.hook_cell.wrap_tensor import get_tensor_ops, wrap_tensor_ops_and_bind, HOOKTensor -from msprobe.core.common.utils import Const - -PRIMITIVE_PREFIX = "Primitive" - -class ApiRegistry: - def __init__(self): - self.tensor_ori_attr = {} - self.functional_ori_attr = {} - self.mint_ops_ori_attr = {} - self.mint_func_ops_ori_attr = {} - self.norm_inner_ops_ori_attr = {} - - self.tensor_hook_attr = {} - self.functional_hook_attr = {} - self.mint_ops_hook_attr = {} - self.mint_func_ops_hook_attr = {} - self.norm_inner_ops_hook_attr = {} - - self.norm_inner_ops = ["norm", "square", "sqrt", "is_complex"] - self.primitive_counters = {} - - @staticmethod - def store_ori_attr(ori_api_group, api_list, api_ori_attr): - for api in api_list: - if Const.SEP in api: - sub_module_name, sub_op = api.rsplit(Const.SEP, 1) - sub_module = getattr(ori_api_group, sub_module_name) - api_ori_attr[api] = getattr(sub_module, sub_op) - else: - api_ori_attr[api] = getattr(ori_api_group, api) - - @staticmethod - def set_api_attr(api_group, attr_dict): - for api, api_attr in attr_dict.items(): - if Const.SEP in api: - sub_module_name, sub_op = api.rsplit(Const.SEP, 1) - sub_module = getattr(api_group, sub_module_name, None) - if sub_module is not None: - setattr(sub_module, sub_op, api_attr) - else: - setattr(api_group, api, api_attr) - - def norm_inner_op_set_hook_func(self): - self.set_api_attr(ms.ops, self.norm_inner_ops_hook_attr) - - def norm_inner_op_set_ori_func(self): - self.set_api_attr(ms.ops, self.norm_inner_ops_ori_attr) - - def api_set_hook_func(self): - self.set_api_attr(ms.Tensor, self.tensor_hook_attr) - self.set_api_attr(ms.ops, self.functional_hook_attr) - self.set_api_attr(ms.mint, self.mint_ops_hook_attr) - self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_hook_attr) - - def api_set_ori_func(self): - self.set_api_attr(ms.Tensor, self.tensor_ori_attr) - self.set_api_attr(ms.ops, self.functional_ori_attr) - self.set_api_attr(ms.mint, self.mint_ops_ori_attr) - self.set_api_attr(ms.mint.nn.functional, self.mint_func_ops_ori_attr) - - def initialize_hook(self, hook): - self.store_ori_attr(ms.Tensor, get_tensor_ops(), self.tensor_ori_attr) - wrap_tensor_ops_and_bind(hook) - for attr_name in dir(HOOKTensor): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.tensor_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKTensor, attr_name) - - functional_ops, mint_ops, mint_func_ops = get_functional_ops() - self.store_ori_attr(ms.ops, self.norm_inner_ops, self.norm_inner_ops_ori_attr) - self.store_ori_attr(ms.ops, functional_ops, self.functional_ori_attr) - self.store_ori_attr(ms.mint, mint_ops, self.mint_ops_ori_attr) - self.store_ori_attr(ms.mint.nn.functional, mint_func_ops, self.mint_func_ops_ori_attr) - setup_hooks(hook) - for attr_name in dir(HOOKFunctionalOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.functional_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKFunctionalOP, attr_name) - if attr_name[Const.ATTR_NAME_PREFIX_LEN:] in self.norm_inner_ops: - self.norm_inner_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKFunctionalOP, attr_name) - for attr_name in dir(HOOKMintOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.mint_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintOP, attr_name) - for attr_name in dir(HOOKMintNNFunctionalOP): - if attr_name.startswith(Const.ATTR_NAME_PREFIX): - self.mint_func_ops_hook_attr[attr_name[Const.ATTR_NAME_PREFIX_LEN:]] = getattr(HOOKMintNNFunctionalOP, attr_name) - - def wrap_primitive(self, origin_func, primitive_name, service_instance): - primitive_instance = self - def func(self, *args, **kwargs): - if primitive_name not in primitive_instance.primitive_counters: - primitive_instance.primitive_counters[primitive_name] = 0 - else: - primitive_instance.primitive_counters[primitive_name] += 1 - - current_count = primitive_instance.primitive_counters[primitive_name] - updated_primitive_name = f"{PRIMITIVE_PREFIX}.{primitive_name}.{current_count}" - captured_grads_input = [] - captured_grads_output = [] - - def input_backward_hook(grad): - print(f"Grad input length: {len(grad)}") - print("Captured input grad:", grad) - captured_grads_input.append(grad) - backward_primitive_name = updated_primitive_name + Const.BACKWARD - new_module_input_output = ModuleBackwardInputsOutputs( - grad_input=tuple(captured_grads_input), - grad_output=tuple(captured_grads_output) if captured_grads_output else None - ) - service_instance.data_collector.backward_data_collect( - backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output - ) -#1未考虑多输出场景 -# 如果时多grad呢 -# 3 输出的序号问题 - def output_backward_hook(grad): - captured_grads_output.append(grad) - backward_primitive_name = f"{updated_primitive_name}.{Const.BACKWARD}" - new_module_input_output = ModuleBackwardInputsOutputs( - grad_input=None, - grad_output=tuple(captured_grads_output) - ) - service_instance.data_collector.backward_data_collect( - backward_primitive_name + Const.BACKWARD, self, os.getpid(), new_module_input_output - ) - - if not service_instance.switch: - return origin_func(*args, **kwargs) - - print(f"Entering {updated_primitive_name} hook, number of args: {len(args)}, name: {self.name}") - hooked_inputs = [] - - # for idx, arg in enumerate(args): - # if isinstance(arg, Tensor): - # arg_hooked = ops.HookBackward(input_backward_hook)(arg) - # hooked_inputs.append(arg_hooked) - # else: - # hooked_inputs.append(arg) - - out = origin_func(*args, **kwargs) - forward_primitive_name = f"{updated_primitive_name}.{Const.FORWARD}" - - if service_instance.data_collector: - module_input_output = ModuleForwardInputsOutputs(args=args, kwargs=kwargs, output=out) - service_instance.data_collector.forward_data_collect(forward_primitive_name, self, os.getpid(), module_input_output) - if service_instance.data_collector.if_return_forward_new_output(): - out = service_instance.data_collector.get_forward_new_output() - - if isinstance(out, Tensor): - out = ops.HookBackward(output_backward_hook)(out) - elif isinstance(out, tuple): - hooked_outputs = [] - for tensor in out: - if isinstance(tensor, Tensor): - hooked_outputs.append(ops.HookBackward(output_backward_hook)(tensor)) - else: - hooked_outputs.append(tensor) - out = tuple(hooked_outputs) - - return out - - return func - - def register_hooks(self, service_instance): - primitive_set = set() - for name, cell in service_instance.model.cells_and_names(): - for pname, primitive in cell._primitives.items(): - primitive_set.add((pname, primitive)) - - for pname, primitive in primitive_set: - print("primitive name is", pname) - NewPrimitive = type('NewPrimitive', (primitive.__class__,), {'__call__': self.wrap_primitive(primitive.__call__, pname, service_instance)}) - primitive.__class__ = NewPrimitive - -api_register = ApiRegistry() -- Gitee